duckdb 0.8.2-dev37.0 → 0.8.2-dev3989.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1087) hide show
  1. package/README.md +7 -0
  2. package/binding.gyp +29 -13
  3. package/binding.gyp.in +1 -1
  4. package/configure.py +11 -3
  5. package/duckdb_extension_config.cmake +10 -0
  6. package/package.json +1 -1
  7. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  8. package/src/duckdb/extension/icu/icu-datefunc.cpp +10 -1
  9. package/src/duckdb/extension/icu/icu-datepart.cpp +162 -41
  10. package/src/duckdb/extension/icu/icu-datesub.cpp +3 -2
  11. package/src/duckdb/extension/icu/icu-datetrunc.cpp +2 -1
  12. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  13. package/src/duckdb/extension/icu/icu-makedate.cpp +19 -6
  14. package/src/duckdb/extension/icu/icu-strptime.cpp +5 -24
  15. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  16. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  17. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  18. package/src/duckdb/extension/icu/icu_extension.cpp +10 -12
  19. package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -0
  20. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +5 -19
  21. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  22. package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
  23. package/src/duckdb/extension/json/include/json_enums.hpp +60 -0
  24. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  25. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  26. package/src/duckdb/extension/json/include/json_scan.hpp +14 -10
  27. package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
  28. package/src/duckdb/extension/json/include/json_transform.hpp +3 -0
  29. package/src/duckdb/extension/json/json_common.cpp +272 -40
  30. package/src/duckdb/extension/json/json_deserializer.cpp +37 -73
  31. package/src/duckdb/extension/json/json_enums.cpp +105 -0
  32. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
  33. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  34. package/src/duckdb/extension/json/json_functions/json_transform.cpp +93 -38
  35. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  36. package/src/duckdb/extension/json/json_functions.cpp +26 -25
  37. package/src/duckdb/extension/json/json_scan.cpp +47 -6
  38. package/src/duckdb/extension/json/json_serializer.cpp +29 -72
  39. package/src/duckdb/extension/json/serialize_json.cpp +92 -0
  40. package/src/duckdb/extension/parquet/column_reader.cpp +37 -25
  41. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  42. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  43. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  44. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  45. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  46. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  47. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  48. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  49. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +4 -0
  50. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  51. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  52. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +1 -0
  53. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +28 -5
  54. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  55. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  56. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  57. package/src/duckdb/extension/parquet/parquet_extension.cpp +258 -40
  58. package/src/duckdb/extension/parquet/parquet_reader.cpp +10 -10
  59. package/src/duckdb/extension/parquet/parquet_statistics.cpp +25 -8
  60. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +6 -0
  61. package/src/duckdb/extension/parquet/parquet_writer.cpp +149 -31
  62. package/src/duckdb/extension/parquet/serialize_parquet.cpp +26 -0
  63. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  64. package/src/duckdb/src/catalog/catalog.cpp +147 -70
  65. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +8 -11
  66. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +17 -41
  67. package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +2 -10
  68. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +4 -14
  69. package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +11 -28
  70. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +11 -42
  71. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +7 -26
  72. package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +11 -27
  73. package/src/duckdb/src/catalog/catalog_entry.cpp +25 -1
  74. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  75. package/src/duckdb/src/catalog/catalog_set.cpp +0 -63
  76. package/src/duckdb/src/catalog/default/default_functions.cpp +21 -0
  77. package/src/duckdb/src/catalog/dependency_manager.cpp +0 -36
  78. package/src/duckdb/src/common/adbc/adbc.cpp +541 -171
  79. package/src/duckdb/src/common/adbc/driver_manager.cpp +92 -39
  80. package/src/duckdb/src/common/adbc/nanoarrow/allocator.cpp +57 -0
  81. package/src/duckdb/src/common/adbc/nanoarrow/metadata.cpp +121 -0
  82. package/src/duckdb/src/common/adbc/nanoarrow/schema.cpp +474 -0
  83. package/src/duckdb/src/common/adbc/nanoarrow/single_batch_array_stream.cpp +84 -0
  84. package/src/duckdb/src/common/allocator.cpp +14 -2
  85. package/src/duckdb/src/common/arrow/appender/bool_data.cpp +44 -0
  86. package/src/duckdb/src/common/arrow/appender/list_data.cpp +78 -0
  87. package/src/duckdb/src/common/arrow/appender/map_data.cpp +86 -0
  88. package/src/duckdb/src/common/arrow/appender/struct_data.cpp +45 -0
  89. package/src/duckdb/src/common/arrow/appender/union_data.cpp +70 -0
  90. package/src/duckdb/src/common/arrow/arrow_appender.cpp +95 -666
  91. package/src/duckdb/src/common/arrow/arrow_converter.cpp +65 -37
  92. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +37 -42
  93. package/src/duckdb/src/common/assert.cpp +3 -0
  94. package/src/duckdb/src/common/constants.cpp +2 -1
  95. package/src/duckdb/src/common/enum_util.cpp +4979 -4458
  96. package/src/duckdb/src/common/enums/date_part_specifier.cpp +2 -0
  97. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  98. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  99. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  100. package/src/duckdb/src/common/exception.cpp +15 -2
  101. package/src/duckdb/src/common/extra_type_info.cpp +487 -0
  102. package/src/duckdb/src/common/field_writer.cpp +1 -1
  103. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  104. package/src/duckdb/src/common/file_system.cpp +46 -12
  105. package/src/duckdb/src/common/filename_pattern.cpp +1 -1
  106. package/src/duckdb/src/common/gzip_file_system.cpp +7 -12
  107. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  108. package/src/duckdb/src/common/http_state.cpp +78 -0
  109. package/src/duckdb/src/common/local_file_system.cpp +36 -28
  110. package/src/duckdb/src/common/multi_file_reader.cpp +193 -20
  111. package/src/duckdb/src/common/operator/cast_operators.cpp +92 -1
  112. package/src/duckdb/src/common/operator/string_cast.cpp +45 -8
  113. package/src/duckdb/src/common/radix_partitioning.cpp +34 -39
  114. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  115. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
  116. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  117. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +63 -73
  118. package/src/duckdb/src/common/serializer/binary_serializer.cpp +85 -80
  119. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +0 -9
  120. package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
  121. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  122. package/src/duckdb/src/common/sort/partition_state.cpp +102 -74
  123. package/src/duckdb/src/common/sort/sort_state.cpp +1 -1
  124. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  125. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  126. package/src/duckdb/src/common/types/bit.cpp +51 -0
  127. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  128. package/src/duckdb/src/common/types/column/column_data_collection.cpp +68 -2
  129. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +20 -6
  130. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  131. package/src/duckdb/src/common/types/data_chunk.cpp +46 -10
  132. package/src/duckdb/src/common/types/date.cpp +15 -0
  133. package/src/duckdb/src/common/types/hugeint.cpp +40 -0
  134. package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
  135. package/src/duckdb/src/common/types/interval.cpp +6 -0
  136. package/src/duckdb/src/common/types/list_segment.cpp +56 -198
  137. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +251 -131
  138. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  139. package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
  140. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
  141. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +41 -26
  142. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
  143. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  144. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
  145. package/src/duckdb/src/common/types/string_heap.cpp +4 -0
  146. package/src/duckdb/src/common/types/time.cpp +105 -0
  147. package/src/duckdb/src/common/types/timestamp.cpp +7 -0
  148. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  149. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  150. package/src/duckdb/src/common/types/value.cpp +99 -60
  151. package/src/duckdb/src/common/types/vector.cpp +73 -80
  152. package/src/duckdb/src/common/types.cpp +38 -724
  153. package/src/duckdb/src/common/virtual_file_system.cpp +142 -1
  154. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +26 -0
  155. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +5 -7
  156. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +64 -19
  157. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +30 -0
  158. package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +1 -0
  159. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +83 -59
  160. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  161. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  162. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  163. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  164. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  165. package/src/duckdb/src/core_functions/function_list.cpp +10 -4
  166. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +2 -0
  167. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +380 -89
  168. package/src/duckdb/src/core_functions/scalar/date/date_sub.cpp +2 -0
  169. package/src/duckdb/src/core_functions/scalar/date/date_trunc.cpp +4 -0
  170. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  171. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  172. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +10 -0
  173. package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
  174. package/src/duckdb/src/core_functions/scalar/enum/enum_functions.cpp +16 -12
  175. package/src/duckdb/src/core_functions/scalar/generic/current_setting.cpp +3 -1
  176. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +314 -82
  177. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  178. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +23 -3
  179. package/src/duckdb/src/core_functions/scalar/map/map_entries.cpp +2 -2
  180. package/src/duckdb/src/core_functions/scalar/string/to_base.cpp +66 -0
  181. package/src/duckdb/src/core_functions/scalar/union/union_tag.cpp +1 -1
  182. package/src/duckdb/src/execution/aggregate_hashtable.cpp +226 -346
  183. package/src/duckdb/src/execution/column_binding_resolver.cpp +10 -7
  184. package/src/duckdb/src/execution/expression_executor/execute_parameter.cpp +2 -2
  185. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  186. package/src/duckdb/src/execution/index/art/art.cpp +219 -259
  187. package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
  188. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +11 -15
  189. package/src/duckdb/src/execution/index/art/iterator.cpp +130 -214
  190. package/src/duckdb/src/execution/index/art/leaf.cpp +300 -266
  191. package/src/duckdb/src/execution/index/art/node.cpp +211 -205
  192. package/src/duckdb/src/execution/index/art/node16.cpp +10 -19
  193. package/src/duckdb/src/execution/index/art/node256.cpp +10 -18
  194. package/src/duckdb/src/execution/index/art/node4.cpp +21 -23
  195. package/src/duckdb/src/execution/index/art/node48.cpp +10 -20
  196. package/src/duckdb/src/execution/index/art/prefix.cpp +308 -338
  197. package/src/duckdb/src/execution/join_hashtable.cpp +9 -10
  198. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  199. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +250 -317
  200. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +6 -4
  201. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  202. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +231 -190
  203. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +367 -1068
  204. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +157 -174
  205. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
  206. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
  207. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
  208. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
  209. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +67 -28
  210. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
  211. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
  212. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +46 -47
  213. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
  214. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
  215. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
  216. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
  217. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
  218. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
  219. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  220. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +12 -9
  221. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +2 -2
  222. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +10 -8
  223. package/src/duckdb/src/execution/operator/helper/physical_load.cpp +2 -1
  224. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +7 -5
  225. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +3 -1
  226. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +3 -1
  227. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +7 -5
  228. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +449 -288
  229. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  230. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  231. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +13 -6
  232. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +28 -15
  233. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +35 -17
  234. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  235. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +7 -4
  236. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +31 -10
  237. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  238. package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -5
  239. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +7 -5
  240. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  241. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +14 -10
  242. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +11 -9
  243. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +9 -7
  244. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +14 -12
  245. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +11 -11
  246. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -2
  247. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  248. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +24 -27
  249. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  250. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -12
  251. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +2 -1
  252. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +198 -0
  253. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +2 -6
  254. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  255. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +16 -7
  256. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +37 -6
  257. package/src/duckdb/src/execution/physical_operator.cpp +20 -16
  258. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  259. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +57 -35
  260. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +32 -15
  261. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +45 -34
  262. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  263. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +2 -5
  264. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  265. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  266. package/src/duckdb/src/execution/physical_plan_generator.cpp +6 -11
  267. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +636 -349
  268. package/src/duckdb/src/execution/window_executor.cpp +1285 -0
  269. package/src/duckdb/src/execution/window_segment_tree.cpp +408 -144
  270. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -13
  271. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +6 -12
  272. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  273. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  274. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
  275. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  276. package/src/duckdb/src/function/cast/string_cast.cpp +2 -2
  277. package/src/duckdb/src/function/cast/time_casts.cpp +7 -6
  278. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +7 -2
  279. package/src/duckdb/src/function/function.cpp +3 -1
  280. package/src/duckdb/src/function/pragma/pragma_queries.cpp +7 -1
  281. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  282. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  283. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  284. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  285. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  286. package/src/duckdb/src/function/scalar/operators/add.cpp +9 -0
  287. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +6 -3
  288. package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
  289. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  290. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +39 -5
  291. package/src/duckdb/src/function/scalar_function.cpp +5 -20
  292. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +57 -0
  293. package/src/duckdb/src/function/table/arrow.cpp +110 -88
  294. package/src/duckdb/src/function/table/arrow_conversion.cpp +86 -73
  295. package/src/duckdb/src/function/table/copy_csv.cpp +102 -97
  296. package/src/duckdb/src/function/table/read_csv.cpp +263 -141
  297. package/src/duckdb/src/function/table/system/test_all_types.cpp +48 -21
  298. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  299. package/src/duckdb/src/function/table/table_scan.cpp +42 -0
  300. package/src/duckdb/src/function/table/version/pragma_version.cpp +49 -2
  301. package/src/duckdb/src/function/table_function.cpp +4 -3
  302. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +20 -5
  303. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +3 -3
  304. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +1 -4
  305. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -5
  306. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp +1 -6
  307. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +2 -13
  308. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +1 -4
  309. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/view_catalog_entry.hpp +2 -5
  310. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +14 -0
  311. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +0 -6
  312. package/src/duckdb/src/include/duckdb/common/adbc/adbc.h +1 -0
  313. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +4 -1
  314. package/src/duckdb/src/include/duckdb/common/adbc/single_batch_array_stream.hpp +16 -0
  315. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  316. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +109 -0
  317. package/src/duckdb/src/include/duckdb/common/arrow/appender/bool_data.hpp +15 -0
  318. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +69 -0
  319. package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +8 -0
  320. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +18 -0
  321. package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +18 -0
  322. package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +88 -0
  323. package/src/duckdb/src/include/duckdb/common/arrow/appender/struct_data.hpp +18 -0
  324. package/src/duckdb/src/include/duckdb/common/arrow/appender/union_data.hpp +21 -0
  325. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +105 -0
  326. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +9 -4
  327. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +3 -5
  328. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +5 -3
  329. package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.h +462 -0
  330. package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.hpp +14 -0
  331. package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +4 -0
  332. package/src/duckdb/src/include/duckdb/common/assert.hpp +1 -1
  333. package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +70 -55
  334. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  335. package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +1 -0
  336. package/src/duckdb/src/include/duckdb/common/constants.hpp +4 -0
  337. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  338. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +681 -577
  339. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  340. package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +9 -1
  341. package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +4 -3
  342. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  343. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  344. package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +5 -1
  345. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  346. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  347. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  348. package/src/duckdb/src/include/duckdb/common/exception.hpp +15 -1
  349. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  350. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +215 -0
  351. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +0 -4
  352. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
  353. package/src/duckdb/src/include/duckdb/common/file_system.hpp +10 -8
  354. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +1 -1
  355. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
  356. package/src/duckdb/src/include/duckdb/common/helper.hpp +8 -3
  357. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  358. package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
  359. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +15 -0
  360. package/src/duckdb/src/include/duckdb/common/index_vector.hpp +12 -0
  361. package/src/duckdb/src/include/duckdb/common/limits.hpp +52 -149
  362. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +11 -5
  363. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +12 -42
  364. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  365. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +48 -0
  366. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +6 -2
  367. package/src/duckdb/src/include/duckdb/common/operator/add.hpp +5 -2
  368. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +65 -4
  369. package/src/duckdb/src/include/duckdb/common/operator/multiply.hpp +3 -2
  370. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  371. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
  372. package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +3 -2
  373. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
  374. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
  375. package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
  376. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  377. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  378. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  379. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +71 -30
  380. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +48 -39
  381. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +0 -4
  382. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +128 -0
  383. package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
  384. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +186 -133
  385. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +166 -121
  386. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +27 -4
  387. package/src/duckdb/src/include/duckdb/common/serializer.hpp +0 -7
  388. package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
  389. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +34 -13
  390. package/src/duckdb/src/include/duckdb/common/stack_checker.hpp +34 -0
  391. package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
  392. package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
  393. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
  394. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  395. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  396. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +11 -1
  397. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -1
  398. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +3 -1
  399. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  400. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +6 -3
  401. package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -5
  402. package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +46 -3
  403. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
  404. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
  405. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +11 -15
  406. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +46 -11
  407. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +10 -1
  408. package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
  409. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
  410. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +7 -3
  411. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
  412. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
  413. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +6 -2
  414. package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +3 -0
  415. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  416. package/src/duckdb/src/include/duckdb/common/types/time.hpp +5 -0
  417. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +16 -10
  418. package/src/duckdb/src/include/duckdb/common/types/value.hpp +7 -2
  419. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +10 -3
  420. package/src/duckdb/src/include/duckdb/common/types.hpp +6 -25
  421. package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
  422. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +7 -2
  423. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +40 -97
  424. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  425. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  426. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  427. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +4 -2
  428. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  429. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  430. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  431. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  432. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  433. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  434. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  435. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +40 -11
  436. package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
  437. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  438. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  439. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +7 -5
  440. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  441. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +6 -4
  442. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +4 -2
  443. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  444. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +12 -1
  445. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  446. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  447. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +128 -131
  448. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  449. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +13 -12
  450. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
  451. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
  452. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +32 -28
  453. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +46 -51
  454. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +134 -53
  455. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +5 -7
  456. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +5 -7
  457. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +7 -9
  458. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +5 -7
  459. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  460. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +8 -7
  461. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  462. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -3
  463. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +6 -5
  464. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
  465. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +1 -1
  466. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +1 -1
  467. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -1
  468. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -2
  469. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +5 -12
  470. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
  471. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +2 -2
  472. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -2
  473. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +3 -3
  474. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -2
  475. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +3 -3
  476. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  477. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -2
  478. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -2
  479. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  480. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
  481. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  482. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -2
  483. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +2 -2
  484. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -2
  485. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +1 -1
  486. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +19 -19
  487. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
  488. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
  489. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
  490. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
  491. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +5 -4
  492. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +61 -28
  493. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
  494. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
  495. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
  496. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +22 -28
  497. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
  498. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  499. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -5
  500. package/src/duckdb/src/include/duckdb/execution/operator/schema/{physical_create_index.hpp → physical_create_art_index.hpp} +14 -7
  501. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  502. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  503. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +4 -2
  504. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +6 -5
  505. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +11 -0
  506. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +6 -2
  507. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +19 -21
  508. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
  509. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +79 -63
  510. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -4
  511. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  512. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  513. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +6 -1
  514. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +80 -0
  515. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +3 -0
  516. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  517. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  518. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  519. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +12 -4
  520. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  521. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +8 -3
  522. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +3 -0
  523. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +99 -0
  524. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +6 -36
  525. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +24 -12
  526. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +5 -1
  527. package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -0
  528. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +3 -0
  529. package/src/duckdb/src/include/duckdb/function/udf_function.hpp +2 -1
  530. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -1
  531. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +4 -3
  532. package/src/duckdb/src/include/duckdb/main/chunk_scan_state/query_result.hpp +29 -0
  533. package/src/duckdb/src/include/duckdb/main/chunk_scan_state.hpp +43 -0
  534. package/src/duckdb/src/include/duckdb/main/client_config.hpp +7 -2
  535. package/src/duckdb/src/include/duckdb/main/client_context.hpp +16 -14
  536. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  537. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
  538. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +25 -0
  539. package/src/duckdb/src/include/duckdb/main/config.hpp +16 -1
  540. package/src/duckdb/src/include/duckdb/main/connection.hpp +3 -4
  541. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +27 -0
  542. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +210 -144
  543. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +41 -6
  544. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +4 -0
  545. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  546. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +73 -5
  547. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +6 -6
  548. package/src/duckdb/src/include/duckdb/main/query_result.hpp +2 -27
  549. package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
  550. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  551. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  552. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
  553. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  554. package/src/duckdb/src/include/duckdb/main/settings.hpp +71 -11
  555. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  556. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  557. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  558. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +7 -0
  559. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +38 -64
  560. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  561. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  562. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  563. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +8 -22
  564. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -12
  565. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  566. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +19 -30
  567. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  568. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  569. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  570. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  571. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  572. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  573. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  574. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  575. package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
  576. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -3
  577. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +3 -2
  578. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +9 -1
  579. package/src/duckdb/src/include/duckdb/parser/column_definition.hpp +6 -5
  580. package/src/duckdb/src/include/duckdb/parser/column_list.hpp +4 -0
  581. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  582. package/src/duckdb/src/include/duckdb/parser/constraint.hpp +5 -0
  583. package/src/duckdb/src/include/duckdb/parser/constraints/check_constraint.hpp +3 -0
  584. package/src/duckdb/src/include/duckdb/parser/constraints/foreign_key_constraint.hpp +6 -0
  585. package/src/duckdb/src/include/duckdb/parser/constraints/not_null_constraint.hpp +3 -0
  586. package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +6 -0
  587. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +4 -1
  588. package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +1 -1
  589. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +4 -1
  590. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +4 -1
  591. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +4 -1
  592. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +4 -1
  593. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
  594. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +4 -1
  595. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  596. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -1
  597. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +4 -1
  598. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +21 -4
  599. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +18 -2
  600. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +4 -1
  601. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +1 -1
  602. package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +1 -1
  603. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -1
  604. package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
  605. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +12 -1
  606. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +66 -2
  607. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +8 -1
  608. package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +8 -1
  609. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +9 -1
  610. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +9 -2
  611. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +3 -0
  612. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_schema_info.hpp +3 -0
  613. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +3 -0
  614. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_info.hpp +3 -0
  615. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +3 -0
  616. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +3 -0
  617. package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +7 -0
  618. package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +7 -0
  619. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +7 -0
  620. package/src/duckdb/src/include/duckdb/parser/parsed_data/load_info.hpp +17 -3
  621. package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +22 -0
  622. package/src/duckdb/src/include/duckdb/parser/parsed_data/pragma_info.hpp +10 -0
  623. package/src/duckdb/src/include/duckdb/parser/parsed_data/show_select_info.hpp +7 -0
  624. package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +10 -0
  625. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +10 -0
  626. package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
  627. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  628. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  629. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  630. package/src/duckdb/src/include/duckdb/parser/statement/execute_statement.hpp +1 -1
  631. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  632. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  633. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  634. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  635. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +23 -26
  636. package/src/duckdb/src/include/duckdb/planner/binder.hpp +16 -5
  637. package/src/duckdb/src/include/duckdb/planner/bound_constraint.hpp +0 -8
  638. package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +2 -1
  639. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +6 -0
  640. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  641. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +9 -0
  642. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  643. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
  644. package/src/duckdb/src/include/duckdb/planner/expression/bound_between_expression.hpp +6 -0
  645. package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +6 -0
  646. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +6 -0
  647. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +3 -0
  648. package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +3 -0
  649. package/src/duckdb/src/include/duckdb/planner/expression/bound_conjunction_expression.hpp +3 -0
  650. package/src/duckdb/src/include/duckdb/planner/expression/bound_constant_expression.hpp +3 -0
  651. package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +3 -0
  652. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +4 -0
  653. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +3 -1
  654. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +3 -0
  655. package/src/duckdb/src/include/duckdb/planner/expression/bound_operator_expression.hpp +3 -0
  656. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +24 -6
  657. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +9 -2
  658. package/src/duckdb/src/include/duckdb/planner/expression/bound_reference_expression.hpp +3 -0
  659. package/src/duckdb/src/include/duckdb/planner/expression/bound_unnest_expression.hpp +3 -0
  660. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +3 -0
  661. package/src/duckdb/src/include/duckdb/planner/expression/list.hpp +1 -0
  662. package/src/duckdb/src/include/duckdb/planner/expression.hpp +3 -0
  663. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  664. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +13 -1
  665. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
  666. package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
  667. package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
  668. package/src/duckdb/src/include/duckdb/planner/joinside.hpp +3 -0
  669. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +3 -2
  670. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -2
  671. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +3 -3
  672. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +3 -0
  673. package/src/duckdb/src/include/duckdb/planner/operator/logical_any_join.hpp +3 -0
  674. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +4 -0
  675. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +12 -7
  676. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +2 -0
  677. package/src/duckdb/src/include/duckdb/planner/operator/logical_create.hpp +9 -6
  678. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +12 -23
  679. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_table.hpp +10 -6
  680. package/src/duckdb/src/include/duckdb/planner/operator/logical_cross_product.hpp +3 -0
  681. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +9 -2
  682. package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +7 -0
  683. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +3 -0
  684. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  685. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +6 -10
  686. package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +2 -0
  687. package/src/duckdb/src/include/duckdb/planner/operator/logical_empty_result.hpp +2 -0
  688. package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +4 -0
  689. package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +3 -0
  690. package/src/duckdb/src/include/duckdb/planner/operator/logical_extension_operator.hpp +8 -0
  691. package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +3 -0
  692. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +11 -1
  693. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +6 -0
  694. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit.hpp +3 -0
  695. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +3 -0
  696. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +52 -0
  697. package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +7 -35
  698. package/src/duckdb/src/include/duckdb/planner/operator/logical_pivot.hpp +6 -0
  699. package/src/duckdb/src/include/duckdb/planner/operator/logical_positional_join.hpp +3 -0
  700. package/src/duckdb/src/include/duckdb/planner/operator/logical_projection.hpp +3 -0
  701. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +10 -7
  702. package/src/duckdb/src/include/duckdb/planner/operator/logical_reset.hpp +4 -0
  703. package/src/duckdb/src/include/duckdb/planner/operator/logical_sample.hpp +6 -0
  704. package/src/duckdb/src/include/duckdb/planner/operator/logical_set.hpp +4 -0
  705. package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +4 -0
  706. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +3 -0
  707. package/src/duckdb/src/include/duckdb/planner/operator/logical_simple.hpp +3 -0
  708. package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +4 -0
  709. package/src/duckdb/src/include/duckdb/planner/operator/logical_unnest.hpp +2 -0
  710. package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +6 -0
  711. package/src/duckdb/src/include/duckdb/planner/operator/logical_window.hpp +3 -0
  712. package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +1 -0
  713. package/src/duckdb/src/include/duckdb/planner/planner.hpp +4 -3
  714. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  715. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  716. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  717. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  718. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  719. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  720. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +7 -1
  721. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  722. package/src/duckdb/src/include/duckdb/planner/tableref/bound_pivotref.hpp +3 -0
  723. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -1
  724. package/src/duckdb/src/include/duckdb/storage/block.hpp +33 -4
  725. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +11 -11
  726. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
  727. package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +5 -5
  728. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_reader.hpp +2 -2
  729. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -3
  730. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +19 -16
  731. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
  732. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +2 -2
  733. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +2 -2
  734. package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -2
  735. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +88 -0
  736. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +54 -0
  737. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +45 -0
  738. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  739. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -2
  740. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +8 -5
  741. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
  742. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
  743. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
  744. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
  745. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
  746. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +7 -0
  747. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
  748. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +2 -2
  749. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -2
  750. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +18 -3
  751. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -2
  752. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +8 -3
  753. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -3
  754. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -2
  755. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  756. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  757. package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
  758. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +3 -4
  759. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  760. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
  761. package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +1 -1
  762. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  763. package/src/duckdb/src/include/duckdb.h +98 -1
  764. package/src/duckdb/src/main/appender.cpp +3 -1
  765. package/src/duckdb/src/main/attached_database.cpp +2 -2
  766. package/src/duckdb/src/main/capi/arrow-c.cpp +196 -8
  767. package/src/duckdb/src/main/capi/duckdb-c.cpp +16 -0
  768. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  769. package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
  770. package/src/duckdb/src/main/capi/pending-c.cpp +23 -0
  771. package/src/duckdb/src/main/capi/prepared-c.cpp +106 -28
  772. package/src/duckdb/src/main/capi/result-c.cpp +3 -1
  773. package/src/duckdb/src/main/chunk_scan_state/query_result.cpp +53 -0
  774. package/src/duckdb/src/main/chunk_scan_state.cpp +48 -0
  775. package/src/duckdb/src/main/client_context.cpp +42 -19
  776. package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
  777. package/src/duckdb/src/main/client_verify.cpp +18 -0
  778. package/src/duckdb/src/main/config.cpp +9 -3
  779. package/src/duckdb/src/main/connection.cpp +3 -3
  780. package/src/duckdb/src/main/database.cpp +3 -12
  781. package/src/duckdb/src/main/db_instance_cache.cpp +14 -6
  782. package/src/duckdb/src/main/extension/extension_helper.cpp +164 -88
  783. package/src/duckdb/src/main/extension/extension_install.cpp +76 -15
  784. package/src/duckdb/src/main/extension/extension_load.cpp +62 -13
  785. package/src/duckdb/src/main/extension/extension_util.cpp +16 -0
  786. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  787. package/src/duckdb/src/main/prepared_statement.cpp +38 -11
  788. package/src/duckdb/src/main/prepared_statement_data.cpp +23 -18
  789. package/src/duckdb/src/main/query_result.cpp +0 -21
  790. package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
  791. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  792. package/src/duckdb/src/main/relation/join_relation.cpp +6 -6
  793. package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
  794. package/src/duckdb/src/main/relation.cpp +10 -9
  795. package/src/duckdb/src/main/settings/settings.cpp +125 -33
  796. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  797. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +2 -4
  798. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  799. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  800. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  801. package/src/duckdb/src/optimizer/compressed_materialization.cpp +477 -0
  802. package/src/duckdb/src/optimizer/deliminator.cpp +180 -323
  803. package/src/duckdb/src/optimizer/filter_pushdown.cpp +23 -6
  804. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  805. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  806. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  807. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  808. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1047
  809. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  810. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  811. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +52 -41
  812. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  813. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  814. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  815. package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
  816. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  817. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  818. package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +34 -7
  819. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  820. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  821. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  822. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  823. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  824. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  825. package/src/duckdb/src/optimizer/topn_optimizer.cpp +27 -10
  826. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +3 -5
  827. package/src/duckdb/src/parallel/executor.cpp +25 -1
  828. package/src/duckdb/src/parallel/pipeline.cpp +0 -17
  829. package/src/duckdb/src/parallel/pipeline_executor.cpp +33 -13
  830. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +55 -1
  831. package/src/duckdb/src/parallel/task_scheduler.cpp +18 -2
  832. package/src/duckdb/src/parser/column_definition.cpp +20 -32
  833. package/src/duckdb/src/parser/column_list.cpp +8 -0
  834. package/src/duckdb/src/parser/constraints/foreign_key_constraint.cpp +3 -0
  835. package/src/duckdb/src/parser/constraints/unique_constraint.cpp +3 -0
  836. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  837. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -25
  838. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  839. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  840. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  841. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  842. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  843. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  844. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  845. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  846. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  847. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  848. package/src/duckdb/src/parser/expression/parameter_expression.cpp +7 -19
  849. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  850. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  851. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  852. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  853. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +5 -2
  854. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +38 -0
  855. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +17 -1
  856. package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +2 -0
  857. package/src/duckdb/src/parser/parsed_data/detach_info.cpp +1 -1
  858. package/src/duckdb/src/parser/parsed_data/drop_info.cpp +1 -1
  859. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +0 -18
  860. package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +4 -1
  861. package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +1 -1
  862. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  863. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  864. package/src/duckdb/src/parser/parser.cpp +62 -36
  865. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  866. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  867. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  868. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  869. package/src/duckdb/src/parser/query_node.cpp +15 -47
  870. package/src/duckdb/src/parser/result_modifier.cpp +0 -87
  871. package/src/duckdb/src/parser/statement/execute_statement.cpp +2 -2
  872. package/src/duckdb/src/parser/statement/select_statement.cpp +0 -10
  873. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  874. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  875. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  876. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  877. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -55
  878. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  879. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  880. package/src/duckdb/src/parser/tableref.cpp +0 -44
  881. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +55 -38
  882. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
  883. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  884. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  885. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -0
  886. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  887. package/src/duckdb/src/parser/transform/expression/transform_param_ref.cpp +45 -26
  888. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  889. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +16 -1
  890. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  891. package/src/duckdb/src/parser/transform/statement/transform_create_index.cpp +32 -17
  892. package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
  893. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  894. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  895. package/src/duckdb/src/parser/transform/statement/transform_load.cpp +1 -0
  896. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  897. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  898. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +28 -6
  899. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  900. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  901. package/src/duckdb/src/parser/transformer.cpp +44 -25
  902. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +5 -3
  903. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +10 -10
  904. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  905. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  906. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  907. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  908. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +36 -33
  909. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
  910. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +14 -52
  911. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +0 -23
  912. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +13 -7
  913. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +70 -29
  914. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +93 -28
  915. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  916. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
  917. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  918. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +67 -31
  919. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  920. package/src/duckdb/src/planner/binder.cpp +44 -31
  921. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +24 -1
  922. package/src/duckdb/src/planner/expression/bound_between_expression.cpp +4 -0
  923. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +13 -8
  924. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +22 -0
  925. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +28 -20
  926. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +48 -4
  927. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  928. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +5 -4
  929. package/src/duckdb/src/planner/expression_binder.cpp +23 -0
  930. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  931. package/src/duckdb/src/planner/logical_operator.cpp +19 -7
  932. package/src/duckdb/src/planner/logical_operator_visitor.cpp +5 -6
  933. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +4 -2
  934. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +8 -0
  935. package/src/duckdb/src/planner/operator/logical_create.cpp +14 -0
  936. package/src/duckdb/src/planner/operator/logical_create_index.cpp +36 -7
  937. package/src/duckdb/src/planner/operator/logical_create_table.cpp +16 -0
  938. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  939. package/src/duckdb/src/planner/operator/logical_delete.cpp +9 -2
  940. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  941. package/src/duckdb/src/planner/operator/logical_distinct.cpp +13 -0
  942. package/src/duckdb/src/planner/operator/logical_explain.cpp +1 -1
  943. package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +39 -0
  944. package/src/duckdb/src/planner/operator/logical_get.cpp +82 -4
  945. package/src/duckdb/src/planner/operator/logical_insert.cpp +8 -2
  946. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +22 -0
  947. package/src/duckdb/src/planner/operator/logical_order.cpp +39 -0
  948. package/src/duckdb/src/planner/operator/logical_pivot.cpp +3 -0
  949. package/src/duckdb/src/planner/operator/logical_recursive_cte.cpp +5 -5
  950. package/src/duckdb/src/planner/operator/logical_sample.cpp +3 -0
  951. package/src/duckdb/src/planner/operator/logical_update.cpp +8 -2
  952. package/src/duckdb/src/planner/parsed_data/bound_create_table_info.cpp +4 -2
  953. package/src/duckdb/src/planner/planner.cpp +18 -7
  954. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  955. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  956. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  957. package/src/duckdb/src/storage/arena_allocator.cpp +13 -2
  958. package/src/duckdb/src/storage/buffer/block_manager.cpp +13 -9
  959. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  960. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -4
  961. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +7 -7
  962. package/src/duckdb/src/storage/checkpoint_manager.cpp +78 -72
  963. package/src/duckdb/src/storage/compression/bitpacking.cpp +87 -63
  964. package/src/duckdb/src/storage/compression/bitpacking_hugeint.cpp +295 -0
  965. package/src/duckdb/src/storage/compression/fsst.cpp +1 -1
  966. package/src/duckdb/src/storage/compression/rle.cpp +52 -13
  967. package/src/duckdb/src/storage/data_table.cpp +36 -25
  968. package/src/duckdb/src/storage/index.cpp +4 -26
  969. package/src/duckdb/src/storage/local_storage.cpp +3 -4
  970. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +267 -0
  971. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +80 -0
  972. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +86 -0
  973. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +98 -0
  974. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +194 -0
  975. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +283 -0
  976. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +762 -0
  977. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +62 -0
  978. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +461 -0
  979. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +421 -0
  980. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +342 -0
  981. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  982. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +97 -0
  983. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +22 -0
  984. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
  985. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +97 -0
  986. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +164 -0
  987. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  988. package/src/duckdb/src/storage/single_file_block_manager.cpp +69 -51
  989. package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
  990. package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
  991. package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
  992. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
  993. package/src/duckdb/src/storage/statistics/string_stats.cpp +44 -2
  994. package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
  995. package/src/duckdb/src/storage/storage_info.cpp +3 -2
  996. package/src/duckdb/src/storage/storage_manager.cpp +11 -5
  997. package/src/duckdb/src/storage/table/chunk_info.cpp +99 -3
  998. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
  999. package/src/duckdb/src/storage/table/list_column_data.cpp +6 -3
  1000. package/src/duckdb/src/storage/table/persistent_table_data.cpp +1 -2
  1001. package/src/duckdb/src/storage/table/row_group.cpp +102 -20
  1002. package/src/duckdb/src/storage/table/row_group_collection.cpp +23 -19
  1003. package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
  1004. package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
  1005. package/src/duckdb/src/storage/table_index_list.cpp +1 -1
  1006. package/src/duckdb/src/storage/wal_replay.cpp +26 -26
  1007. package/src/duckdb/src/storage/write_ahead_log.cpp +3 -2
  1008. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
  1009. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -11
  1010. package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
  1011. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  1012. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +5 -2
  1013. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  1014. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  1015. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +11 -0
  1016. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  1017. package/src/duckdb/third_party/libpg_query/pg_functions.cpp +13 -0
  1018. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11019 -10364
  1019. package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +9 -0
  1020. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +10 -0
  1021. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +31 -1
  1022. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
  1023. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  1024. package/src/duckdb/ub_src_common.cpp +4 -0
  1025. package/src/duckdb/ub_src_common_adbc_nanoarrow.cpp +8 -0
  1026. package/src/duckdb/ub_src_common_arrow_appender.cpp +10 -0
  1027. package/src/duckdb/ub_src_common_serializer.cpp +2 -0
  1028. package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
  1029. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
  1030. package/src/duckdb/ub_src_execution.cpp +2 -2
  1031. package/src/duckdb/ub_src_execution_index_art.cpp +0 -6
  1032. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
  1033. package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
  1034. package/src/duckdb/ub_src_execution_operator_persistent.cpp +1 -11
  1035. package/src/duckdb/ub_src_execution_operator_schema.cpp +1 -1
  1036. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  1037. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  1038. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  1039. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  1040. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  1041. package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
  1042. package/src/duckdb/ub_src_main.cpp +2 -0
  1043. package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
  1044. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  1045. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  1046. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
  1047. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  1048. package/src/duckdb/ub_src_parser.cpp +0 -2
  1049. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  1050. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  1051. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  1052. package/src/duckdb/ub_src_planner_operator.cpp +3 -3
  1053. package/src/duckdb/ub_src_storage.cpp +0 -4
  1054. package/src/duckdb/ub_src_storage_compression.cpp +2 -0
  1055. package/src/duckdb/ub_src_storage_metadata.cpp +6 -0
  1056. package/src/duckdb/ub_src_storage_serialization.cpp +30 -0
  1057. package/src/duckdb_node.hpp +1 -0
  1058. package/src/statement.cpp +10 -5
  1059. package/test/columns.test.ts +25 -3
  1060. package/test/extension.test.ts +1 -1
  1061. package/test/test_all_types.test.ts +234 -0
  1062. package/tsconfig.json +1 -0
  1063. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
  1064. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  1065. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
  1066. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  1067. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  1068. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  1069. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -193
  1070. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -172
  1071. package/src/duckdb/src/include/duckdb/common/arrow/arrow_options.hpp +0 -25
  1072. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
  1073. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  1074. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
  1075. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
  1076. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
  1077. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -69
  1078. package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +0 -27
  1079. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_join.hpp +0 -32
  1080. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +0 -49
  1081. package/src/duckdb/src/include/duckdb/storage/meta_block_writer.hpp +0 -50
  1082. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
  1083. package/src/duckdb/src/parser/common_table_expression_info.cpp +0 -19
  1084. package/src/duckdb/src/planner/operator/logical_asof_join.cpp +0 -14
  1085. package/src/duckdb/src/planner/operator/logical_delim_join.cpp +0 -27
  1086. package/src/duckdb/src/storage/meta_block_reader.cpp +0 -78
  1087. package/src/duckdb/src/storage/meta_block_writer.cpp +0 -80
@@ -1,46 +1,28 @@
1
1
  #include "duckdb/execution/radix_partitioned_hashtable.hpp"
2
2
 
3
+ #include "duckdb/common/radix_partitioning.hpp"
4
+ #include "duckdb/common/row_operations/row_operations.hpp"
5
+ #include "duckdb/common/types/row/tuple_data_collection.hpp"
6
+ #include "duckdb/common/types/row/tuple_data_iterator.hpp"
7
+ #include "duckdb/execution/aggregate_hashtable.hpp"
8
+ #include "duckdb/execution/executor.hpp"
3
9
  #include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp"
10
+ #include "duckdb/main/config.hpp"
4
11
  #include "duckdb/parallel/event.hpp"
5
- #include "duckdb/parallel/task_scheduler.hpp"
6
12
  #include "duckdb/planner/expression/bound_reference_expression.hpp"
7
13
 
8
14
  namespace duckdb {
9
15
 
10
- // compute the GROUPING values
11
- // for each parameter to the GROUPING clause, we check if the hash table groups on this particular group
12
- // if it does, we return 0, otherwise we return 1
13
- // we then use bitshifts to combine these values
14
- void RadixPartitionedHashTable::SetGroupingValues() {
15
- auto &grouping_functions = op.GetGroupingFunctions();
16
- for (auto &grouping : grouping_functions) {
17
- int64_t grouping_value = 0;
18
- D_ASSERT(grouping.size() < sizeof(int64_t) * 8);
19
- for (idx_t i = 0; i < grouping.size(); i++) {
20
- if (grouping_set.find(grouping[i]) == grouping_set.end()) {
21
- // we don't group on this value!
22
- grouping_value += (int64_t)1 << (grouping.size() - (i + 1));
23
- }
24
- }
25
- grouping_values.push_back(Value::BIGINT(grouping_value));
26
- }
27
- }
28
-
29
16
  RadixPartitionedHashTable::RadixPartitionedHashTable(GroupingSet &grouping_set_p, const GroupedAggregateData &op_p)
30
17
  : grouping_set(grouping_set_p), op(op_p) {
31
-
32
18
  auto groups_count = op.GroupCount();
33
19
  for (idx_t i = 0; i < groups_count; i++) {
34
20
  if (grouping_set.find(i) == grouping_set.end()) {
35
21
  null_groups.push_back(i);
36
22
  }
37
23
  }
38
-
39
- // 10000 seems like a good compromise here
40
- radix_limit = 10000;
41
-
42
24
  if (grouping_set.empty()) {
43
- // fake a single group with a constant value for aggregation without groups
25
+ // Fake a single group with a constant value for aggregation without groups
44
26
  group_types.emplace_back(LogicalType::TINYINT);
45
27
  }
46
28
  for (auto &entry : grouping_set) {
@@ -48,71 +30,279 @@ RadixPartitionedHashTable::RadixPartitionedHashTable(GroupingSet &grouping_set_p
48
30
  group_types.push_back(op.group_types[entry]);
49
31
  }
50
32
  SetGroupingValues();
33
+
34
+ auto group_types_copy = group_types;
35
+ group_types_copy.emplace_back(LogicalType::HASH);
36
+ layout.Initialize(std::move(group_types_copy), AggregateObject::CreateAggregateObjects(op.bindings));
37
+ }
38
+
39
+ void RadixPartitionedHashTable::SetGroupingValues() {
40
+ // Compute the GROUPING values:
41
+ // For each parameter to the GROUPING clause, we check if the hash table groups on this particular group
42
+ // If it does, we return 0, otherwise we return 1
43
+ // We then use bitshifts to combine these values
44
+ auto &grouping_functions = op.GetGroupingFunctions();
45
+ for (auto &grouping : grouping_functions) {
46
+ int64_t grouping_value = 0;
47
+ D_ASSERT(grouping.size() < sizeof(int64_t) * 8);
48
+ for (idx_t i = 0; i < grouping.size(); i++) {
49
+ if (grouping_set.find(grouping[i]) == grouping_set.end()) {
50
+ // We don't group on this value!
51
+ grouping_value += (int64_t)1 << (grouping.size() - (i + 1));
52
+ }
53
+ }
54
+ grouping_values.push_back(Value::BIGINT(grouping_value));
55
+ }
56
+ }
57
+
58
+ const TupleDataLayout &RadixPartitionedHashTable::GetLayout() const {
59
+ return layout;
60
+ }
61
+
62
+ unique_ptr<GroupedAggregateHashTable> RadixPartitionedHashTable::CreateHT(ClientContext &context, const idx_t capacity,
63
+ const idx_t radix_bits) const {
64
+ return make_uniq<GroupedAggregateHashTable>(context, BufferAllocator::Get(context), group_types, op.payload_types,
65
+ op.bindings, capacity, radix_bits);
51
66
  }
52
67
 
53
68
  //===--------------------------------------------------------------------===//
54
69
  // Sink
55
70
  //===--------------------------------------------------------------------===//
56
- class RadixHTGlobalState : public GlobalSinkState {
57
- constexpr const static idx_t MAX_RADIX_PARTITIONS = 32;
71
+ struct AggregatePartition {
72
+ explicit AggregatePartition(unique_ptr<TupleDataCollection> data_p) : data(std::move(data_p)), finalized(false) {
73
+ }
74
+ unique_ptr<TupleDataCollection> data;
75
+ atomic<bool> finalized;
76
+ };
77
+
78
+ class RadixHTGlobalSinkState;
58
79
 
80
+ struct RadixHTConfig {
59
81
  public:
60
- explicit RadixHTGlobalState(ClientContext &context)
61
- : is_empty(true), multi_scan(true), partitioned(false),
62
- partition_info(
63
- MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads())) {
64
- }
82
+ explicit RadixHTConfig(ClientContext &context, RadixHTGlobalSinkState &sink);
65
83
 
66
- vector<unique_ptr<PartitionableHashTable>> intermediate_hts;
67
- vector<shared_ptr<GroupedAggregateHashTable>> finalized_hts;
84
+ void SetRadixBits(idx_t radix_bits_p);
85
+ bool SetRadixBitsToExternal();
86
+ idx_t GetRadixBits() const;
68
87
 
69
- //! Whether or not any tuples were added to the HT
70
- bool is_empty;
71
- //! Whether or not the hash table should be scannable multiple times
72
- bool multi_scan;
73
- //! The lock for updating the global aggregate state
74
- mutex lock;
75
- //! Whether or not any thread has crossed the partitioning threshold
76
- atomic<bool> partitioned;
88
+ private:
89
+ void SetRadixBitsInternal(const idx_t radix_bits_p, bool external);
90
+ static idx_t InitialSinkRadixBits(ClientContext &context);
91
+ static idx_t MaximumSinkRadixBits(ClientContext &context);
92
+ static idx_t ExternalRadixBits(const idx_t &maximum_sink_radix_bits_p);
93
+ static idx_t SinkCapacity(ClientContext &context);
94
+
95
+ private:
96
+ //! Assume (1 << 15) = 32KB L1 cache per core, divided by two because hyperthreading
97
+ static constexpr const idx_t L1_CACHE_SIZE = 32768 / 2;
98
+ //! Assume (1 << 20) = 1MB L2 cache per core, divided by two because hyperthreading
99
+ static constexpr const idx_t L2_CACHE_SIZE = 1048576 / 2;
100
+ //! Assume (1 << 20) + (1 << 19) = 1.5MB L3 cache per core (shared), divided by two because hyperthreading
101
+ static constexpr const idx_t L3_CACHE_SIZE = 1572864 / 2;
102
+
103
+ //! Sink radix bits to initialize with
104
+ static constexpr const idx_t MAXIMUM_INITIAL_SINK_RADIX_BITS = 3;
105
+ //! Maximum Sink radix bits (independent of threads)
106
+ static constexpr const idx_t MAXIMUM_FINAL_SINK_RADIX_BITS = 7;
107
+ //! By how many radix bits to increment if we go external
108
+ static constexpr const idx_t EXTERNAL_RADIX_BITS_INCREMENT = 3;
109
+
110
+ //! The global sink state
111
+ RadixHTGlobalSinkState &sink;
112
+ //! Current thread-global sink radix bits
113
+ atomic<idx_t> sink_radix_bits;
114
+ //! Maximum Sink radix bits (set based on number of threads)
115
+ const idx_t maximum_sink_radix_bits;
116
+ //! Radix bits if we go external
117
+ const idx_t external_radix_bits;
77
118
 
78
- bool is_finalized = false;
79
- bool is_partitioned = false;
119
+ public:
120
+ //! Capacity of HTs during the Sink
121
+ const idx_t sink_capacity;
80
122
 
81
- RadixPartitionInfo partition_info;
82
- AggregateHTAppendState append_state;
123
+ //! If we fill this many blocks per partition, we trigger a repartition
124
+ static constexpr const double BLOCK_FILL_FACTOR = 1.8;
125
+ //! By how many bits to repartition if a repartition is triggered
126
+ static constexpr const idx_t REPARTITION_RADIX_BITS = 2;
83
127
  };
84
128
 
85
- class RadixHTLocalState : public LocalSinkState {
129
+ class RadixHTGlobalSinkState : public GlobalSinkState {
86
130
  public:
87
- explicit RadixHTLocalState(const RadixPartitionedHashTable &ht) : total_groups(0), is_empty(true) {
88
- // if there are no groups we create a fake group so everything has the same group
89
- group_chunk.InitializeEmpty(ht.group_types);
90
- if (ht.grouping_set.empty()) {
91
- group_chunk.data[0].Reference(Value::TINYINT(42));
131
+ RadixHTGlobalSinkState(ClientContext &context, const RadixPartitionedHashTable &radix_ht);
132
+
133
+ //! Destroys aggregate states (if multi-scan)
134
+ ~RadixHTGlobalSinkState() override;
135
+ void Destroy();
136
+
137
+ public:
138
+ //! The radix HT
139
+ const RadixPartitionedHashTable &radix_ht;
140
+ //! Config for partitioning
141
+ RadixHTConfig config;
142
+
143
+ //! Whether we've called Finalize
144
+ bool finalized;
145
+ //! Whether we are doing an external aggregation
146
+ atomic<bool> external;
147
+ //! Threads that have called Sink
148
+ atomic<idx_t> active_threads;
149
+ //! If any thread has called combine
150
+ atomic<bool> any_combined;
151
+
152
+ //! Lock for uncombined_data/stored_allocators
153
+ mutex lock;
154
+ //! Uncombined partitioned data that will be put into the AggregatePartitions
155
+ unique_ptr<PartitionedTupleData> uncombined_data;
156
+ //! Allocators used during the Sink/Finalize
157
+ vector<shared_ptr<ArenaAllocator>> stored_allocators;
158
+
159
+ //! Partitions that are finalized during GetData
160
+ vector<unique_ptr<AggregatePartition>> partitions;
161
+
162
+ //! For synchronizing finalize tasks
163
+ atomic<idx_t> finalize_idx;
164
+
165
+ //! Pin properties when scanning
166
+ TupleDataPinProperties scan_pin_properties;
167
+ //! Total count before combining
168
+ idx_t count_before_combining;
169
+ };
170
+
171
+ RadixHTGlobalSinkState::RadixHTGlobalSinkState(ClientContext &context, const RadixPartitionedHashTable &radix_ht_p)
172
+ : radix_ht(radix_ht_p), config(context, *this), finalized(false), external(false), active_threads(0),
173
+ any_combined(false), finalize_idx(0), scan_pin_properties(TupleDataPinProperties::DESTROY_AFTER_DONE),
174
+ count_before_combining(0) {
175
+ }
176
+
177
+ RadixHTGlobalSinkState::~RadixHTGlobalSinkState() {
178
+ Destroy();
179
+ }
180
+
181
+ // LCOV_EXCL_START
182
+ void RadixHTGlobalSinkState::Destroy() {
183
+ if (scan_pin_properties == TupleDataPinProperties::DESTROY_AFTER_DONE || count_before_combining == 0 ||
184
+ partitions.empty()) {
185
+ // Already destroyed / empty
186
+ return;
187
+ }
188
+
189
+ TupleDataLayout layout = partitions[0]->data->GetLayout().Copy();
190
+ if (!layout.HasDestructor()) {
191
+ return; // No destructors, exit
192
+ }
193
+
194
+ // There are aggregates with destructors: Call the destructor for each of the aggregates
195
+ RowOperationsState row_state(*stored_allocators.back());
196
+ for (auto &partition : partitions) {
197
+ auto &data_collection = *partition->data;
198
+ if (data_collection.Count() == 0) {
199
+ continue;
92
200
  }
201
+ TupleDataChunkIterator iterator(data_collection, TupleDataPinProperties::DESTROY_AFTER_DONE, false);
202
+ auto &row_locations = iterator.GetChunkState().row_locations;
203
+ do {
204
+ RowOperations::DestroyStates(row_state, layout, row_locations, iterator.GetCurrentChunkCount());
205
+ } while (iterator.Next());
206
+ data_collection.Reset();
207
+ }
208
+ }
209
+ // LCOV_EXCL_STOP
210
+
211
+ RadixHTConfig::RadixHTConfig(ClientContext &context, RadixHTGlobalSinkState &sink_p)
212
+ : sink(sink_p), sink_radix_bits(InitialSinkRadixBits(context)),
213
+ maximum_sink_radix_bits(MaximumSinkRadixBits(context)),
214
+ external_radix_bits(ExternalRadixBits(maximum_sink_radix_bits)), sink_capacity(SinkCapacity(context)) {
215
+ }
216
+
217
+ void RadixHTConfig::SetRadixBits(idx_t radix_bits_p) {
218
+ SetRadixBitsInternal(MinValue(radix_bits_p, maximum_sink_radix_bits), false);
219
+ }
220
+
221
+ bool RadixHTConfig::SetRadixBitsToExternal() {
222
+ SetRadixBitsInternal(external_radix_bits, true);
223
+ return sink.external;
224
+ }
225
+
226
+ idx_t RadixHTConfig::GetRadixBits() const {
227
+ return sink_radix_bits;
228
+ }
229
+
230
+ void RadixHTConfig::SetRadixBitsInternal(const idx_t radix_bits_p, bool external) {
231
+ if (sink_radix_bits >= radix_bits_p || sink.any_combined) {
232
+ return;
233
+ }
234
+
235
+ lock_guard<mutex> guard(sink.lock);
236
+ if (sink_radix_bits >= radix_bits_p || sink.any_combined) {
237
+ return;
93
238
  }
94
239
 
240
+ if (external) {
241
+ sink.external = true;
242
+ }
243
+ sink_radix_bits = radix_bits_p;
244
+ return;
245
+ }
246
+
247
+ idx_t RadixHTConfig::InitialSinkRadixBits(ClientContext &context) {
248
+ const idx_t active_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
249
+ return MinValue(RadixPartitioning::RadixBits(NextPowerOfTwo(active_threads)), MAXIMUM_INITIAL_SINK_RADIX_BITS);
250
+ }
251
+
252
+ idx_t RadixHTConfig::MaximumSinkRadixBits(ClientContext &context) {
253
+ const idx_t active_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
254
+ return MinValue(RadixPartitioning::RadixBits(NextPowerOfTwo(active_threads)), MAXIMUM_FINAL_SINK_RADIX_BITS);
255
+ }
256
+
257
+ idx_t RadixHTConfig::ExternalRadixBits(const idx_t &maximum_sink_radix_bits_p) {
258
+ return MinValue(maximum_sink_radix_bits_p + EXTERNAL_RADIX_BITS_INCREMENT, MAXIMUM_FINAL_SINK_RADIX_BITS);
259
+ }
260
+
261
+ idx_t RadixHTConfig::SinkCapacity(ClientContext &context) {
262
+ // Get active and maximum number of threads
263
+ const idx_t active_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
264
+ const auto max_threads = DBConfig::GetSystemMaxThreads(FileSystem::GetFileSystem(context));
265
+
266
+ // Compute cache size per active thread (assuming cache is shared)
267
+ const auto total_shared_cache_size = max_threads * L3_CACHE_SIZE;
268
+ const auto cache_per_active_thread = L1_CACHE_SIZE + L2_CACHE_SIZE + total_shared_cache_size / active_threads;
269
+
270
+ // Divide cache per active thread by entry size, round up to next power of two, to get capacity
271
+ const auto size_per_entry = sizeof(aggr_ht_entry_t) * GroupedAggregateHashTable::LOAD_FACTOR;
272
+ const auto capacity = NextPowerOfTwo(cache_per_active_thread / size_per_entry);
273
+
274
+ // Capacity must be at least the minimum capacity
275
+ return MaxValue<idx_t>(capacity, GroupedAggregateHashTable::InitialCapacity());
276
+ }
277
+
278
+ class RadixHTLocalSinkState : public LocalSinkState {
279
+ public:
280
+ RadixHTLocalSinkState(ClientContext &context, const RadixPartitionedHashTable &radix_ht);
281
+
282
+ public:
283
+ //! Thread-local HT that is re-used after abandoning
284
+ unique_ptr<GroupedAggregateHashTable> ht;
285
+ //! Chunk with group columns
95
286
  DataChunk group_chunk;
96
- //! The aggregate HT
97
- unique_ptr<PartitionableHashTable> ht;
98
- //! The total number of groups found by this thread
99
- idx_t total_groups;
100
287
 
101
- //! Whether or not any tuples were added to the HT
102
- bool is_empty;
288
+ //! Data that is abandoned ends up here (only if we're doing external aggregation)
289
+ unique_ptr<PartitionedTupleData> abandoned_data;
103
290
  };
104
291
 
105
- void RadixPartitionedHashTable::SetMultiScan(GlobalSinkState &state) {
106
- auto &gstate = state.Cast<RadixHTGlobalState>();
107
- gstate.multi_scan = true;
292
+ RadixHTLocalSinkState::RadixHTLocalSinkState(ClientContext &, const RadixPartitionedHashTable &radix_ht) {
293
+ // If there are no groups we create a fake group so everything has the same group
294
+ group_chunk.InitializeEmpty(radix_ht.group_types);
295
+ if (radix_ht.grouping_set.empty()) {
296
+ group_chunk.data[0].Reference(Value::TINYINT(42));
297
+ }
108
298
  }
109
299
 
110
300
  unique_ptr<GlobalSinkState> RadixPartitionedHashTable::GetGlobalSinkState(ClientContext &context) const {
111
- return make_uniq<RadixHTGlobalState>(context);
301
+ return make_uniq<RadixHTGlobalSinkState>(context, *this);
112
302
  }
113
303
 
114
304
  unique_ptr<LocalSinkState> RadixPartitionedHashTable::GetLocalSinkState(ExecutionContext &context) const {
115
- return make_uniq<RadixHTLocalState>(*this);
305
+ return make_uniq<RadixHTLocalSinkState>(context.client, *this);
116
306
  }
117
307
 
118
308
  void RadixPartitionedHashTable::PopulateGroupChunk(DataChunk &group_chunk, DataChunk &input_chunk) const {
@@ -130,284 +320,461 @@ void RadixPartitionedHashTable::PopulateGroupChunk(DataChunk &group_chunk, DataC
130
320
  group_chunk.Verify();
131
321
  }
132
322
 
133
- void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input,
134
- DataChunk &payload_input, const unsafe_vector<idx_t> &filter) const {
135
- auto &llstate = input.local_state.Cast<RadixHTLocalState>();
136
- auto &gstate = input.global_state.Cast<RadixHTGlobalState>();
137
- D_ASSERT(!gstate.is_finalized);
138
-
139
- DataChunk &group_chunk = llstate.group_chunk;
140
- PopulateGroupChunk(group_chunk, chunk);
323
+ bool MaybeRepartition(ClientContext &context, RadixHTGlobalSinkState &gstate, RadixHTLocalSinkState &lstate) {
324
+ auto &config = gstate.config;
325
+ auto &ht = *lstate.ht;
326
+ auto &partitioned_data = ht.GetPartitionedData();
327
+
328
+ // Check if we're approaching the memory limit
329
+ const idx_t n_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
330
+ const idx_t limit = BufferManager::GetBufferManager(context).GetMaxMemory();
331
+ const idx_t thread_limit = 0.6 * limit / n_threads;
332
+ if (ht.GetPartitionedData()->SizeInBytes() > thread_limit || context.config.force_external) {
333
+ if (gstate.config.SetRadixBitsToExternal()) {
334
+ // We're approaching the memory limit, unpin the data
335
+ if (!lstate.abandoned_data) {
336
+ lstate.abandoned_data = make_uniq<RadixPartitionedTupleData>(
337
+ BufferManager::GetBufferManager(context), gstate.radix_ht.GetLayout(), config.GetRadixBits(),
338
+ gstate.radix_ht.GetLayout().ColumnCount() - 1);
339
+ }
141
340
 
142
- // if we have non-combinable aggregates (e.g. string_agg) we cannot keep parallel hash
143
- // tables
144
- if (ForceSingleHT(input.global_state)) {
145
- lock_guard<mutex> glock(gstate.lock);
146
- gstate.is_empty = gstate.is_empty && group_chunk.size() == 0;
147
- if (gstate.finalized_hts.empty()) {
148
- // Create a finalized ht in the global state, that we can populate
149
- gstate.finalized_hts.push_back(
150
- make_shared<GroupedAggregateHashTable>(context.client, Allocator::Get(context.client), group_types,
151
- op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64));
341
+ ht.UnpinData();
342
+ partitioned_data->Repartition(*lstate.abandoned_data);
343
+ ht.SetRadixBits(gstate.config.GetRadixBits());
344
+ ht.InitializePartitionedData();
345
+ return true;
152
346
  }
153
- D_ASSERT(gstate.finalized_hts.size() == 1);
154
- D_ASSERT(gstate.finalized_hts[0]);
155
- llstate.total_groups +=
156
- gstate.finalized_hts[0]->AddChunk(gstate.append_state, group_chunk, payload_input, filter);
157
- return;
158
347
  }
159
348
 
160
- if (group_chunk.size() > 0) {
161
- llstate.is_empty = false;
162
- }
349
+ const auto partition_count = partitioned_data->PartitionCount();
350
+ const auto current_radix_bits = RadixPartitioning::RadixBits(partition_count);
351
+ D_ASSERT(current_radix_bits <= config.GetRadixBits());
163
352
 
164
- if (!llstate.ht) {
165
- llstate.ht =
166
- make_uniq<PartitionableHashTable>(context.client, Allocator::Get(context.client), gstate.partition_info,
167
- group_types, op.payload_types, op.bindings);
353
+ const auto row_size_per_partition =
354
+ partitioned_data->Count() * partitioned_data->GetLayout().GetRowWidth() / partition_count;
355
+ if (row_size_per_partition > config.BLOCK_FILL_FACTOR * Storage::BLOCK_SIZE) {
356
+ // We crossed our block filling threshold, try to increment radix bits
357
+ config.SetRadixBits(current_radix_bits + config.REPARTITION_RADIX_BITS);
168
358
  }
169
359
 
170
- llstate.total_groups += llstate.ht->AddChunk(group_chunk, payload_input,
171
- gstate.partitioned && gstate.partition_info.n_partitions > 1, filter);
172
- if (llstate.total_groups >= radix_limit) {
173
- gstate.partitioned = true;
360
+ const auto global_radix_bits = config.GetRadixBits();
361
+ if (current_radix_bits == global_radix_bits) {
362
+ return false; // We're already on the right number of radix bits
174
363
  }
364
+
365
+ // We're out-of-sync with the global radix bits, repartition
366
+ ht.UnpinData();
367
+ auto old_partitioned_data = std::move(partitioned_data);
368
+ ht.SetRadixBits(global_radix_bits);
369
+ ht.InitializePartitionedData();
370
+ old_partitioned_data->Repartition(*ht.GetPartitionedData());
371
+ return true;
175
372
  }
176
373
 
177
- void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkState &state,
178
- LocalSinkState &lstate) const {
179
- auto &llstate = lstate.Cast<RadixHTLocalState>();
180
- auto &gstate = state.Cast<RadixHTGlobalState>();
181
- D_ASSERT(!gstate.is_finalized);
374
+ void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input,
375
+ DataChunk &payload_input, const unsafe_vector<idx_t> &filter) const {
376
+ auto &gstate = input.global_state.Cast<RadixHTGlobalSinkState>();
377
+ auto &lstate = input.local_state.Cast<RadixHTLocalSinkState>();
378
+ if (!lstate.ht) {
379
+ lstate.ht = CreateHT(context.client, gstate.config.sink_capacity, gstate.config.GetRadixBits());
380
+ gstate.active_threads++;
381
+ }
382
+
383
+ auto &group_chunk = lstate.group_chunk;
384
+ PopulateGroupChunk(group_chunk, chunk);
182
385
 
183
- // this actually does not do a lot but just pushes the local HTs into the global state so we can later combine them
184
- // in parallel
386
+ auto &ht = *lstate.ht;
387
+ ht.AddChunk(group_chunk, payload_input, filter);
185
388
 
186
- if (ForceSingleHT(state)) {
187
- D_ASSERT(gstate.finalized_hts.size() <= 1);
188
- return;
389
+ if (ht.Count() + STANDARD_VECTOR_SIZE < ht.ResizeThreshold()) {
390
+ return; // We can fit another chunk
189
391
  }
190
392
 
191
- if (!llstate.ht) {
192
- return; // no data
393
+ if (gstate.active_threads > 2) {
394
+ // 'Reset' the HT without taking its data, we can just keep appending to the same collection
395
+ // This only works because we never resize the HT
396
+ ht.ClearPointerTable();
397
+ ht.ResetCount();
398
+ // We don't do this when running with 1 or 2 threads, it only makes sense when there's many threads
193
399
  }
194
400
 
195
- if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate.partitioned) {
196
- llstate.ht->Partition();
401
+ // Check if we need to repartition
402
+ auto repartitioned = MaybeRepartition(context.client, gstate, lstate);
403
+
404
+ if (repartitioned && ht.Count() != 0) {
405
+ // We repartitioned, but we didn't clear the pointer table / reset the count because we're on 1 or 2 threads
406
+ ht.ClearPointerTable();
407
+ ht.ResetCount();
197
408
  }
198
409
 
199
- // we will never add new values to these HTs so we can drop the first part of the HT
200
- llstate.ht->Finalize();
410
+ // TODO: combine early and often
411
+ }
201
412
 
202
- lock_guard<mutex> glock(gstate.lock);
203
- if (!llstate.is_empty) {
204
- gstate.is_empty = false;
413
+ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkState &gstate_p,
414
+ LocalSinkState &lstate_p) const {
415
+ auto &gstate = gstate_p.Cast<RadixHTGlobalSinkState>();
416
+ auto &lstate = lstate_p.Cast<RadixHTLocalSinkState>();
417
+ if (!lstate.ht) {
418
+ return;
205
419
  }
206
- // at this point we just collect them the PhysicalHashAggregateFinalizeTask (below) will merge them in parallel
207
- gstate.intermediate_hts.push_back(std::move(llstate.ht));
208
- }
209
420
 
210
- bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState &gstate_p) const {
211
- auto &gstate = gstate_p.Cast<RadixHTGlobalState>();
212
- D_ASSERT(!gstate.is_finalized);
213
- gstate.is_finalized = true;
421
+ // Set any_combined, then check one last time whether we need to repartition
422
+ gstate.any_combined = true;
423
+ MaybeRepartition(context.client, gstate, lstate);
214
424
 
215
- // special case if we have non-combinable aggregates
216
- // we have already aggreagted into a global shared HT that does not require any additional finalization steps
217
- if (ForceSingleHT(gstate)) {
218
- D_ASSERT(gstate.finalized_hts.size() <= 1);
219
- D_ASSERT(gstate.finalized_hts.empty() || gstate.finalized_hts[0]);
220
- return false;
221
- }
425
+ auto &ht = *lstate.ht;
426
+ ht.UnpinData();
222
427
 
223
- // we can have two cases now, non-partitioned for few groups and radix-partitioned for very many groups.
224
- // go through all of the child hts and see if we ever called partition() on any of them
225
- // if we did, its the latter case.
226
- bool any_partitioned = false;
227
- for (auto &pht : gstate.intermediate_hts) {
228
- if (pht->IsPartitioned()) {
229
- any_partitioned = true;
230
- break;
231
- }
428
+ if (lstate.abandoned_data) {
429
+ D_ASSERT(gstate.external);
430
+ D_ASSERT(lstate.abandoned_data->PartitionCount() == lstate.ht->GetPartitionedData()->PartitionCount());
431
+ D_ASSERT(lstate.abandoned_data->PartitionCount() ==
432
+ RadixPartitioning::NumberOfPartitions(gstate.config.GetRadixBits()));
433
+ lstate.abandoned_data->Combine(*lstate.ht->GetPartitionedData());
434
+ } else {
435
+ lstate.abandoned_data = std::move(ht.GetPartitionedData());
232
436
  }
233
437
 
234
- auto &allocator = Allocator::Get(context);
235
- if (any_partitioned) {
236
- // if one is partitioned, all have to be
237
- // this should mostly have already happened in Combine, but if not we do it here
238
- for (auto &pht : gstate.intermediate_hts) {
239
- if (!pht->IsPartitioned()) {
240
- pht->Partition();
241
- }
242
- }
243
- // schedule additional tasks to combine the partial HTs
244
- gstate.finalized_hts.resize(gstate.partition_info.n_partitions);
245
- for (idx_t r = 0; r < gstate.partition_info.n_partitions; r++) {
246
- gstate.finalized_hts[r] = make_shared<GroupedAggregateHashTable>(
247
- context, allocator, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64);
248
- }
249
- gstate.is_partitioned = true;
250
- return true;
251
- } else { // in the non-partitioned case we immediately combine all the unpartitioned hts created by the threads.
252
- // TODO possible optimization, if total count < limit for 32 bit ht, use that one
253
- // create this ht here so finalize needs no lock on gstate
254
-
255
- gstate.finalized_hts.push_back(make_shared<GroupedAggregateHashTable>(
256
- context, allocator, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64));
257
- for (auto &pht : gstate.intermediate_hts) {
258
- auto unpartitioned = pht->GetUnpartitioned();
259
- for (auto &unpartitioned_ht : unpartitioned) {
260
- D_ASSERT(unpartitioned_ht);
261
- gstate.finalized_hts[0]->Combine(*unpartitioned_ht);
262
- unpartitioned_ht.reset();
263
- }
264
- unpartitioned.clear();
265
- }
266
- D_ASSERT(gstate.finalized_hts[0]);
267
- gstate.finalized_hts[0]->Finalize();
268
- return false;
438
+ lock_guard<mutex> guard(gstate.lock);
439
+ if (gstate.uncombined_data) {
440
+ gstate.uncombined_data->Combine(*lstate.abandoned_data);
441
+ } else {
442
+ gstate.uncombined_data = std::move(lstate.abandoned_data);
269
443
  }
444
+ gstate.stored_allocators.emplace_back(ht.GetAggregateAllocator());
270
445
  }
271
446
 
272
- // this task is run in multiple threads and combines the radix-partitioned hash tables into a single onen and then
273
- // folds them into the global ht finally.
274
- class RadixAggregateFinalizeTask : public ExecutorTask {
275
- public:
276
- RadixAggregateFinalizeTask(Executor &executor, shared_ptr<Event> event_p, RadixHTGlobalState &state_p,
277
- idx_t radix_p)
278
- : ExecutorTask(executor), event(std::move(event_p)), state(state_p), radix(radix_p) {
279
- }
280
-
281
- static void FinalizeHT(RadixHTGlobalState &gstate, idx_t radix) {
282
- D_ASSERT(gstate.partition_info.n_partitions <= gstate.finalized_hts.size());
283
- D_ASSERT(gstate.finalized_hts[radix]);
284
- for (auto &pht : gstate.intermediate_hts) {
285
- for (auto &ht : pht->GetPartition(radix)) {
286
- gstate.finalized_hts[radix]->Combine(*ht);
287
- ht.reset();
288
- }
289
- }
290
- gstate.finalized_hts[radix]->Finalize();
291
- }
447
+ void RadixPartitionedHashTable::Finalize(ClientContext &, GlobalSinkState &gstate_p) const {
448
+ auto &gstate = gstate_p.Cast<RadixHTGlobalSinkState>();
292
449
 
293
- TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
294
- FinalizeHT(state, radix);
295
- event->FinishTask();
296
- return TaskExecutionResult::TASK_FINISHED;
297
- }
450
+ if (gstate.uncombined_data) {
451
+ auto &uncombined_data = *gstate.uncombined_data;
452
+ gstate.count_before_combining = uncombined_data.Count();
298
453
 
299
- private:
300
- shared_ptr<Event> event;
301
- RadixHTGlobalState &state;
302
- idx_t radix;
303
- };
454
+ // If true there is no need to combine, it was all done by a single thread in a single HT
455
+ const auto single_ht = !gstate.external && gstate.active_threads == 1;
304
456
 
305
- void RadixPartitionedHashTable::ScheduleTasks(Executor &executor, const shared_ptr<Event> &event,
306
- GlobalSinkState &state, vector<shared_ptr<Task>> &tasks) const {
307
- auto &gstate = state.Cast<RadixHTGlobalState>();
308
- if (!gstate.is_partitioned) {
309
- return;
310
- }
311
- for (idx_t r = 0; r < gstate.partition_info.n_partitions; r++) {
312
- D_ASSERT(gstate.partition_info.n_partitions <= gstate.finalized_hts.size());
313
- D_ASSERT(gstate.finalized_hts[r]);
314
- tasks.push_back(make_uniq<RadixAggregateFinalizeTask>(executor, event, gstate, r));
457
+ auto &uncombined_partition_data = uncombined_data.GetPartitions();
458
+ const auto n_partitions = uncombined_partition_data.size();
459
+ gstate.partitions.reserve(n_partitions);
460
+ for (idx_t i = 0; i < n_partitions; i++) {
461
+ gstate.partitions.emplace_back(make_uniq<AggregatePartition>(std::move(uncombined_partition_data[i])));
462
+ if (single_ht) {
463
+ gstate.finalize_idx++;
464
+ gstate.partitions.back()->finalized = true;
465
+ }
466
+ }
467
+ } else {
468
+ gstate.count_before_combining = 0;
315
469
  }
316
- }
317
470
 
318
- bool RadixPartitionedHashTable::ForceSingleHT(GlobalSinkState &state) const {
319
- auto &gstate = state.Cast<RadixHTGlobalState>();
320
- return gstate.partition_info.n_partitions < 2;
471
+ gstate.finalized = true;
321
472
  }
322
473
 
323
474
  //===--------------------------------------------------------------------===//
324
475
  // Source
325
476
  //===--------------------------------------------------------------------===//
477
+ idx_t RadixPartitionedHashTable::Count(GlobalSinkState &sink_p) const {
478
+ const auto count = CountInternal(sink_p);
479
+ return count == 0 && grouping_set.empty() ? 1 : count;
480
+ }
481
+
482
+ idx_t RadixPartitionedHashTable::CountInternal(GlobalSinkState &sink_p) const {
483
+ auto &sink = sink_p.Cast<RadixHTGlobalSinkState>();
484
+ return sink.count_before_combining;
485
+ }
486
+
487
+ void RadixPartitionedHashTable::SetMultiScan(GlobalSinkState &sink_p) {
488
+ auto &sink = sink_p.Cast<RadixHTGlobalSinkState>();
489
+ sink.scan_pin_properties = TupleDataPinProperties::UNPIN_AFTER_DONE;
490
+ }
491
+
492
+ enum class RadixHTSourceTaskType : uint8_t { NO_TASK, FINALIZE, SCAN };
493
+
494
+ class RadixHTLocalSourceState;
495
+
326
496
  class RadixHTGlobalSourceState : public GlobalSourceState {
327
497
  public:
328
- explicit RadixHTGlobalSourceState(Allocator &allocator, const RadixPartitionedHashTable &ht)
329
- : ht_index(0), initialized(false), finished(false) {
330
- }
498
+ RadixHTGlobalSourceState(ClientContext &context, const RadixPartitionedHashTable &radix_ht);
331
499
 
332
- //! Heavy handed for now.
333
- mutex lock;
334
- //! The current position to scan the HT for output tuples
335
- idx_t ht_index;
336
- //! The set of aggregate scan states
337
- unsafe_unique_array<TupleDataParallelScanState> ht_scan_states;
338
- atomic<bool> initialized;
500
+ //! Assigns a task to a local source state
501
+ bool AssignTask(RadixHTGlobalSinkState &sink, RadixHTLocalSourceState &lstate);
502
+
503
+ public:
504
+ //! The client context
505
+ ClientContext &context;
506
+ //! For synchronizing the source phase
339
507
  atomic<bool> finished;
508
+
509
+ //! Column ids for scanning
510
+ vector<column_t> column_ids;
511
+
512
+ //! For synchronizing scan tasks
513
+ atomic<idx_t> scan_idx;
514
+ atomic<idx_t> scan_done;
340
515
  };
341
516
 
517
+ enum class RadixHTScanStatus : uint8_t { INIT, IN_PROGRESS, DONE };
518
+
342
519
  class RadixHTLocalSourceState : public LocalSourceState {
343
520
  public:
344
- explicit RadixHTLocalSourceState(ExecutionContext &context, const RadixPartitionedHashTable &ht) {
345
- auto &allocator = Allocator::Get(context.client);
346
- auto scan_chunk_types = ht.group_types;
347
- for (auto &aggr_type : ht.op.aggregate_return_types) {
348
- scan_chunk_types.push_back(aggr_type);
349
- }
350
- scan_chunk.Initialize(allocator, scan_chunk_types);
351
- }
521
+ explicit RadixHTLocalSourceState(ExecutionContext &context, const RadixPartitionedHashTable &radix_ht);
352
522
 
353
- //! Materialized GROUP BY expressions & aggregates
523
+ public:
524
+ //! Do the work this thread has been assigned
525
+ void ExecuteTask(RadixHTGlobalSinkState &sink, RadixHTGlobalSourceState &gstate, DataChunk &chunk);
526
+ //! Whether this thread has finished the work it has been assigned
527
+ bool TaskFinished();
528
+
529
+ private:
530
+ //! Execute the finalize or scan task
531
+ void Finalize(RadixHTGlobalSinkState &sink, RadixHTGlobalSourceState &gstate);
532
+ void Scan(RadixHTGlobalSinkState &sink, RadixHTGlobalSourceState &gstate, DataChunk &chunk);
533
+
534
+ public:
535
+ //! Current task and index
536
+ RadixHTSourceTaskType task;
537
+ idx_t task_idx;
538
+
539
+ //! Thread-local HT that is re-used to Finalize
540
+ unique_ptr<GroupedAggregateHashTable> ht;
541
+ //! Current status of a Scan
542
+ RadixHTScanStatus scan_status;
543
+
544
+ private:
545
+ //! Allocator and layout for finalizing state
546
+ TupleDataLayout layout;
547
+ ArenaAllocator aggregate_allocator;
548
+
549
+ //! State and chunk for scanning
550
+ TupleDataScanState scan_state;
354
551
  DataChunk scan_chunk;
355
- //! HT index
356
- idx_t ht_index = DConstants::INVALID_INDEX;
357
- //! A reference to the current HT that we are scanning
358
- shared_ptr<GroupedAggregateHashTable> ht;
359
- //! Scan state for the current HT
360
- TupleDataLocalScanState scan_state;
361
552
  };
362
553
 
363
554
  unique_ptr<GlobalSourceState> RadixPartitionedHashTable::GetGlobalSourceState(ClientContext &context) const {
364
- return make_uniq<RadixHTGlobalSourceState>(Allocator::Get(context), *this);
555
+ return make_uniq<RadixHTGlobalSourceState>(context, *this);
365
556
  }
366
557
 
367
558
  unique_ptr<LocalSourceState> RadixPartitionedHashTable::GetLocalSourceState(ExecutionContext &context) const {
368
559
  return make_uniq<RadixHTLocalSourceState>(context, *this);
369
560
  }
370
561
 
371
- idx_t RadixPartitionedHashTable::Size(GlobalSinkState &sink_state) const {
372
- auto &gstate = sink_state.Cast<RadixHTGlobalState>();
373
- if (gstate.is_empty && grouping_set.empty()) {
374
- return 1;
562
+ RadixHTGlobalSourceState::RadixHTGlobalSourceState(ClientContext &context_p, const RadixPartitionedHashTable &radix_ht)
563
+ : context(context_p), finished(false), scan_idx(0), scan_done(0) {
564
+ for (column_t column_id = 0; column_id < radix_ht.group_types.size(); column_id++) {
565
+ column_ids.push_back(column_id);
566
+ }
567
+ }
568
+
569
+ bool RadixHTGlobalSourceState::AssignTask(RadixHTGlobalSinkState &sink, RadixHTLocalSourceState &lstate) {
570
+ D_ASSERT(lstate.scan_status != RadixHTScanStatus::IN_PROGRESS);
571
+
572
+ const auto n_partitions = sink.partitions.size();
573
+ if (scan_done == n_partitions) {
574
+ finished = true;
575
+ return false;
576
+ }
577
+ // We first try to assign a Scan task, then a Finalize task if that didn't work, without using any locks
578
+
579
+ // We need an atomic compare-and-swap to assign a Scan task, because we need to only increment
580
+ // the 'scan_idx' atomic if the 'finalize' of that partition is true, i.e., ready to be scanned
581
+ bool scan_assigned = true;
582
+ do {
583
+ lstate.task_idx = scan_idx.load();
584
+ if (lstate.task_idx >= n_partitions || !sink.partitions[lstate.task_idx]->finalized) {
585
+ scan_assigned = false;
586
+ break;
587
+ }
588
+ } while (!std::atomic_compare_exchange_weak(&scan_idx, &lstate.task_idx, lstate.task_idx + 1));
589
+
590
+ if (scan_assigned) {
591
+ // We successfully assigned a Scan task
592
+ D_ASSERT(lstate.task_idx < n_partitions && sink.partitions[lstate.task_idx]->finalized);
593
+ lstate.task = RadixHTSourceTaskType::SCAN;
594
+ lstate.scan_status = RadixHTScanStatus::INIT;
595
+ return true;
596
+ }
597
+
598
+ // We can just increment the atomic here, much simpler than assigning the scan task
599
+ lstate.task_idx = sink.finalize_idx++;
600
+ if (lstate.task_idx < n_partitions) {
601
+ // We successfully assigned a Finalize task
602
+ lstate.task = RadixHTSourceTaskType::FINALIZE;
603
+ return true;
604
+ }
605
+
606
+ // We didn't manage to assign a finalize task
607
+ return false;
608
+ }
609
+
610
+ RadixHTLocalSourceState::RadixHTLocalSourceState(ExecutionContext &context, const RadixPartitionedHashTable &radix_ht)
611
+ : task(RadixHTSourceTaskType::NO_TASK), scan_status(RadixHTScanStatus::DONE), layout(radix_ht.GetLayout().Copy()),
612
+ aggregate_allocator(BufferAllocator::Get(context.client)) {
613
+ auto &allocator = BufferAllocator::Get(context.client);
614
+ auto scan_chunk_types = radix_ht.group_types;
615
+ for (auto &aggr_type : radix_ht.op.aggregate_return_types) {
616
+ scan_chunk_types.push_back(aggr_type);
617
+ }
618
+ scan_chunk.Initialize(allocator, scan_chunk_types);
619
+ }
620
+
621
+ void RadixHTLocalSourceState::ExecuteTask(RadixHTGlobalSinkState &sink, RadixHTGlobalSourceState &gstate,
622
+ DataChunk &chunk) {
623
+ switch (task) {
624
+ case RadixHTSourceTaskType::FINALIZE:
625
+ Finalize(sink, gstate);
626
+ break;
627
+ case RadixHTSourceTaskType::SCAN:
628
+ Scan(sink, gstate, chunk);
629
+ break;
630
+ default:
631
+ throw InternalException("Unexpected RadixHTSourceTaskType in ExecuteTask!");
632
+ }
633
+ }
634
+
635
+ void RadixHTLocalSourceState::Finalize(RadixHTGlobalSinkState &sink, RadixHTGlobalSourceState &gstate) {
636
+ D_ASSERT(task == RadixHTSourceTaskType::FINALIZE);
637
+ D_ASSERT(scan_status != RadixHTScanStatus::IN_PROGRESS);
638
+
639
+ auto &partition = *sink.partitions[task_idx];
640
+ if (partition.data->Count() == 0) {
641
+ partition.finalized = true;
642
+ return;
375
643
  }
376
644
 
377
- idx_t count = 0;
378
- for (const auto &ht : gstate.finalized_hts) {
379
- count += ht->Count();
645
+ if (!ht) {
646
+ // Create a HT with sufficient capacity
647
+ const auto capacity = GroupedAggregateHashTable::GetCapacityForCount(partition.data->Count());
648
+ ht = sink.radix_ht.CreateHT(gstate.context, capacity, 0);
649
+ } else {
650
+ // We may want to resize here to the size of this partition, but for now we just assume uniform partition sizes
651
+ ht->InitializePartitionedData();
652
+ ht->ClearPointerTable();
653
+ ht->ResetCount();
654
+ }
655
+
656
+ // Now combine the uncombined data using this thread's HT
657
+ ht->Combine(*partition.data);
658
+ ht->UnpinData();
659
+
660
+ // Move the combined data back to the partition
661
+ partition.data =
662
+ make_uniq<TupleDataCollection>(BufferManager::GetBufferManager(gstate.context), sink.radix_ht.GetLayout());
663
+ partition.data->Combine(*ht->GetPartitionedData()->GetPartitions()[0]);
664
+
665
+ // Mark partition as ready to scan
666
+ partition.finalized = true;
667
+
668
+ // Make sure this thread's aggregate allocator does not get lost
669
+ lock_guard<mutex> guard(sink.lock);
670
+ sink.stored_allocators.emplace_back(ht->GetAggregateAllocator());
671
+ }
672
+
673
+ void RadixHTLocalSourceState::Scan(RadixHTGlobalSinkState &sink, RadixHTGlobalSourceState &gstate, DataChunk &chunk) {
674
+ D_ASSERT(task == RadixHTSourceTaskType::SCAN);
675
+ D_ASSERT(scan_status != RadixHTScanStatus::DONE);
676
+
677
+ auto &partition = *sink.partitions[task_idx];
678
+ D_ASSERT(partition.finalized);
679
+ auto &data_collection = *partition.data;
680
+
681
+ if (data_collection.Count() == 0) {
682
+ scan_status = RadixHTScanStatus::DONE;
683
+ if (++gstate.scan_done == sink.partitions.size()) {
684
+ gstate.finished = true;
685
+ }
686
+ return;
687
+ }
688
+
689
+ if (scan_status == RadixHTScanStatus::INIT) {
690
+ data_collection.InitializeScan(scan_state, gstate.column_ids, sink.scan_pin_properties);
691
+ scan_status = RadixHTScanStatus::IN_PROGRESS;
692
+ }
693
+
694
+ if (!data_collection.Scan(scan_state, scan_chunk)) {
695
+ scan_status = RadixHTScanStatus::DONE;
696
+ if (++gstate.scan_done == sink.partitions.size()) {
697
+ gstate.finished = true;
698
+ }
699
+ if (sink.scan_pin_properties == TupleDataPinProperties::DESTROY_AFTER_DONE) {
700
+ data_collection.Reset();
701
+ }
702
+ return;
703
+ }
704
+
705
+ RowOperationsState row_state(aggregate_allocator);
706
+ const auto group_cols = layout.ColumnCount() - 1;
707
+ RowOperations::FinalizeStates(row_state, layout, scan_state.chunk_state.row_locations, scan_chunk, group_cols);
708
+
709
+ if (sink.scan_pin_properties == TupleDataPinProperties::DESTROY_AFTER_DONE && layout.HasDestructor()) {
710
+ RowOperations::DestroyStates(row_state, layout, scan_state.chunk_state.row_locations, scan_chunk.size());
711
+ }
712
+
713
+ auto &radix_ht = sink.radix_ht;
714
+ idx_t chunk_index = 0;
715
+ for (auto &entry : radix_ht.grouping_set) {
716
+ chunk.data[entry].Reference(scan_chunk.data[chunk_index++]);
717
+ }
718
+ for (auto null_group : radix_ht.null_groups) {
719
+ chunk.data[null_group].SetVectorType(VectorType::CONSTANT_VECTOR);
720
+ ConstantVector::SetNull(chunk.data[null_group], true);
721
+ }
722
+ D_ASSERT(radix_ht.grouping_set.size() + radix_ht.null_groups.size() == radix_ht.op.GroupCount());
723
+ for (idx_t col_idx = 0; col_idx < radix_ht.op.aggregates.size(); col_idx++) {
724
+ chunk.data[radix_ht.op.GroupCount() + col_idx].Reference(
725
+ scan_chunk.data[radix_ht.group_types.size() + col_idx]);
726
+ }
727
+ D_ASSERT(radix_ht.op.grouping_functions.size() == radix_ht.grouping_values.size());
728
+ for (idx_t i = 0; i < radix_ht.op.grouping_functions.size(); i++) {
729
+ chunk.data[radix_ht.op.GroupCount() + radix_ht.op.aggregates.size() + i].Reference(radix_ht.grouping_values[i]);
730
+ }
731
+ chunk.SetCardinality(scan_chunk);
732
+ D_ASSERT(chunk.size() != 0);
733
+ }
734
+
735
+ bool RadixHTLocalSourceState::TaskFinished() {
736
+ switch (task) {
737
+ case RadixHTSourceTaskType::FINALIZE:
738
+ return true;
739
+ case RadixHTSourceTaskType::SCAN:
740
+ return scan_status == RadixHTScanStatus::DONE;
741
+ default:
742
+ D_ASSERT(task == RadixHTSourceTaskType::NO_TASK);
743
+ return true;
380
744
  }
381
- return count;
382
745
  }
383
746
 
384
747
  SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &chunk,
385
- GlobalSinkState &sink_state, OperatorSourceInput &input) const {
386
- auto &gstate = sink_state.Cast<RadixHTGlobalState>();
387
- auto &state = input.global_state.Cast<RadixHTGlobalSourceState>();
748
+ GlobalSinkState &sink_p, OperatorSourceInput &input) const {
749
+ auto &sink = sink_p.Cast<RadixHTGlobalSinkState>();
750
+ D_ASSERT(sink.finalized);
751
+
752
+ auto &gstate = input.global_state.Cast<RadixHTGlobalSourceState>();
388
753
  auto &lstate = input.local_state.Cast<RadixHTLocalSourceState>();
389
- D_ASSERT(gstate.is_finalized);
390
- if (state.finished) {
754
+ D_ASSERT(sink.scan_pin_properties == TupleDataPinProperties::UNPIN_AFTER_DONE ||
755
+ sink.scan_pin_properties == TupleDataPinProperties::DESTROY_AFTER_DONE);
756
+
757
+ if (gstate.finished) {
391
758
  return SourceResultType::FINISHED;
392
759
  }
393
760
 
394
- // special case hack to sort out aggregating from empty intermediates
395
- // for aggregations without groups
396
- if (gstate.is_empty && grouping_set.empty()) {
761
+ // Special case hack to sort out aggregating from empty intermediates for aggregations without groups
762
+ if (CountInternal(sink_p) == 0 && grouping_set.empty()) {
397
763
  D_ASSERT(chunk.ColumnCount() == null_groups.size() + op.aggregates.size() + op.grouping_functions.size());
398
- // for each column in the aggregates, set to initial state
764
+ // For each column in the aggregates, set to initial state
399
765
  chunk.SetCardinality(1);
400
766
  for (auto null_group : null_groups) {
401
767
  chunk.data[null_group].SetVectorType(VectorType::CONSTANT_VECTOR);
402
768
  ConstantVector::SetNull(chunk.data[null_group], true);
403
769
  }
770
+ ArenaAllocator allocator(BufferAllocator::Get(context.client));
404
771
  for (idx_t i = 0; i < op.aggregates.size(); i++) {
405
772
  D_ASSERT(op.aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
406
773
  auto &aggr = op.aggregates[i]->Cast<BoundAggregateExpression>();
407
774
  auto aggr_state = make_unsafe_uniq_array<data_t>(aggr.function.state_size());
408
775
  aggr.function.initialize(aggr_state.get());
409
776
 
410
- AggregateInputData aggr_input_data(aggr.bind_info.get(), Allocator::DefaultAllocator());
777
+ AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
411
778
  Vector state_vector(Value::POINTER(CastPointerToValue(aggr_state.get())));
412
779
  aggr.function.finalize(state_vector, aggr_input_data, chunk.data[null_groups.size() + i], 1, 0);
413
780
  if (aggr.function.destructor) {
@@ -419,97 +786,17 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
419
786
  for (idx_t i = 0; i < op.grouping_functions.size(); i++) {
420
787
  chunk.data[null_groups.size() + op.aggregates.size() + i].Reference(grouping_values[i]);
421
788
  }
422
- state.finished = true;
423
- return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
424
- }
425
- if (gstate.is_empty) {
426
- state.finished = true;
427
- return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
428
- }
429
- idx_t elements_found = 0;
430
-
431
- lstate.scan_chunk.Reset();
432
- if (!state.initialized) {
433
- lock_guard<mutex> l(state.lock);
434
- if (!state.initialized) {
435
- auto &finalized_hts = gstate.finalized_hts;
436
- state.ht_scan_states = make_unsafe_uniq_array<TupleDataParallelScanState>(finalized_hts.size());
437
-
438
- const auto &layout = gstate.finalized_hts[0]->GetDataCollection().GetLayout();
439
- vector<column_t> column_ids;
440
- column_ids.reserve(layout.ColumnCount() - 1);
441
- for (idx_t col_idx = 0; col_idx < layout.ColumnCount() - 1; col_idx++) {
442
- column_ids.emplace_back(col_idx);
443
- }
444
-
445
- for (idx_t ht_idx = 0; ht_idx < finalized_hts.size(); ht_idx++) {
446
- gstate.finalized_hts[ht_idx]->GetDataCollection().InitializeScan(
447
- state.ht_scan_states.get()[ht_idx].scan_state, column_ids);
448
- }
449
- state.initialized = true;
450
- }
789
+ gstate.finished = true;
790
+ return SourceResultType::HAVE_MORE_OUTPUT;
451
791
  }
452
792
 
453
- auto &local_scan_state = lstate.scan_state;
454
- while (true) {
455
- D_ASSERT(state.ht_scan_states);
456
- idx_t ht_index;
457
- {
458
- lock_guard<mutex> l(state.lock);
459
- ht_index = state.ht_index;
460
- if (ht_index >= gstate.finalized_hts.size()) {
461
- state.finished = true;
462
- return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
463
- }
464
- }
465
- D_ASSERT(ht_index < gstate.finalized_hts.size());
466
- if (lstate.ht_index != DConstants::INVALID_INDEX && ht_index != lstate.ht_index) {
467
- lstate.ht->GetDataCollection().FinalizePinState(local_scan_state.pin_state);
468
- }
469
- lstate.ht_index = ht_index;
470
- lstate.ht = gstate.finalized_hts[ht_index];
471
- D_ASSERT(lstate.ht);
472
-
473
- auto &global_scan_state = state.ht_scan_states[ht_index];
474
- elements_found = lstate.ht->Scan(global_scan_state, local_scan_state, lstate.scan_chunk);
475
- if (elements_found > 0) {
476
- break;
477
- }
478
- lstate.ht->GetDataCollection().FinalizePinState(local_scan_state.pin_state);
479
-
480
- // move to the next hash table
481
- lock_guard<mutex> l(state.lock);
482
- ht_index++;
483
- if (ht_index > state.ht_index) {
484
- // we have not yet worked on the table
485
- // move the global index forwards
486
- if (!gstate.multi_scan) {
487
- gstate.finalized_hts[state.ht_index].reset();
488
- }
489
- state.ht_index = ht_index;
793
+ while (!gstate.finished && chunk.size() == 0) {
794
+ if (!lstate.TaskFinished() || gstate.AssignTask(sink, lstate)) {
795
+ lstate.ExecuteTask(sink, gstate, chunk);
490
796
  }
491
797
  }
492
798
 
493
- // compute the final projection list
494
- chunk.SetCardinality(elements_found);
495
-
496
- idx_t chunk_index = 0;
497
- for (auto &entry : grouping_set) {
498
- chunk.data[entry].Reference(lstate.scan_chunk.data[chunk_index++]);
499
- }
500
- for (auto null_group : null_groups) {
501
- chunk.data[null_group].SetVectorType(VectorType::CONSTANT_VECTOR);
502
- ConstantVector::SetNull(chunk.data[null_group], true);
503
- }
504
- D_ASSERT(grouping_set.size() + null_groups.size() == op.GroupCount());
505
- for (idx_t col_idx = 0; col_idx < op.aggregates.size(); col_idx++) {
506
- chunk.data[op.GroupCount() + col_idx].Reference(lstate.scan_chunk.data[group_types.size() + col_idx]);
507
- }
508
- D_ASSERT(op.grouping_functions.size() == grouping_values.size());
509
- for (idx_t i = 0; i < op.grouping_functions.size(); i++) {
510
- chunk.data[op.GroupCount() + op.aggregates.size() + i].Reference(grouping_values[i]);
511
- }
512
- return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
799
+ return SourceResultType::HAVE_MORE_OUTPUT;
513
800
  }
514
801
 
515
802
  } // namespace duckdb