duckdb 0.8.2-dev157.0 → 0.8.2-dev1573.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (493) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  24. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  26. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  27. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  28. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  29. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  30. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  31. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  32. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  33. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  34. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  36. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  38. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  39. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  40. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  41. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  42. package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
  43. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  44. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  45. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  46. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  47. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  49. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  50. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  51. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  52. package/src/duckdb/src/common/allocator.cpp +14 -2
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  54. package/src/duckdb/src/common/assert.cpp +3 -0
  55. package/src/duckdb/src/common/enum_util.cpp +4619 -4446
  56. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  57. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  58. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/exception.cpp +2 -2
  60. package/src/duckdb/src/common/extra_type_info.cpp +506 -0
  61. package/src/duckdb/src/common/file_system.cpp +19 -0
  62. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  63. package/src/duckdb/src/common/local_file_system.cpp +14 -14
  64. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  65. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  66. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  67. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  68. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  69. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  70. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  71. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  72. package/src/duckdb/src/common/types/bit.cpp +51 -0
  73. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  74. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  75. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  76. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  77. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  78. package/src/duckdb/src/common/types/date.cpp +9 -0
  79. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  80. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  81. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  82. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  83. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  84. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  85. package/src/duckdb/src/common/types.cpp +8 -655
  86. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  87. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  88. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  92. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  93. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  94. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  95. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  96. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  97. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  98. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  99. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  100. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  102. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  103. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  104. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  105. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  106. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  107. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  108. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  109. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  110. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  111. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  112. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  113. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  114. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  115. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  116. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  117. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  118. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
  119. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  121. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
  122. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  123. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
  124. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  125. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  126. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  127. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  128. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  129. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  130. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  131. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  132. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  133. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  134. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  135. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  136. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  137. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  138. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  139. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  140. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  141. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
  142. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
  143. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  144. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  145. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  146. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  147. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  148. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  149. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  150. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  151. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  152. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  153. package/src/duckdb/src/function/function.cpp +3 -1
  154. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  155. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  156. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  157. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  158. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  159. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  160. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  161. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  162. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  163. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  164. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  165. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  166. package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
  167. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  168. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  169. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  170. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  171. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
  172. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  173. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  174. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  175. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  176. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  177. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  178. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
  179. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  180. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  181. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  182. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  183. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  184. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  185. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  186. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  187. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  188. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  189. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  190. package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
  191. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  192. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  193. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  194. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  195. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  196. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  197. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  198. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  199. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  200. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  201. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  202. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  203. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  204. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
  205. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  206. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  207. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  208. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  209. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  210. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  211. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  212. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  213. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  214. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  215. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  218. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  219. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  220. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  221. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  222. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  223. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  224. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  225. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  226. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  227. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  228. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  229. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  230. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  231. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  232. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  233. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  234. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  235. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  236. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  237. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  238. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  239. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
  240. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  241. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  242. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  243. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  244. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  245. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  246. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  247. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  248. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  249. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  250. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  251. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  252. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  253. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  254. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  255. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  256. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  257. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  258. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  259. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  260. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  261. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  262. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  263. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  264. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  265. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  266. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  267. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  268. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  269. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  270. package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
  271. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  272. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  273. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  274. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  275. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  276. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  277. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  278. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  279. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  280. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  281. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  282. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  283. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  284. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  285. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  286. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  287. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  288. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  289. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  291. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  293. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  294. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  295. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  296. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  297. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  298. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  299. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  300. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  301. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  302. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  303. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  304. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  305. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  306. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  307. package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
  308. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  309. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  310. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  311. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  312. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  313. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  314. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  315. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  316. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  317. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  318. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  319. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  320. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  321. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  322. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  323. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  324. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  325. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  326. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  327. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  328. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  329. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  330. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  331. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  332. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  333. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  334. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  335. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  336. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  337. package/src/duckdb/src/include/duckdb.h +28 -0
  338. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  339. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  340. package/src/duckdb/src/main/config.cpp +4 -0
  341. package/src/duckdb/src/main/database.cpp +1 -1
  342. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  343. package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
  344. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  345. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  346. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  347. package/src/duckdb/src/main/relation.cpp +6 -5
  348. package/src/duckdb/src/main/settings/settings.cpp +79 -18
  349. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  350. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  351. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  352. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  353. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  354. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  355. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  356. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  357. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  358. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  359. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  360. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  361. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  362. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  363. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  364. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  365. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  366. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  367. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  368. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  369. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  370. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  371. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  372. package/src/duckdb/src/parallel/executor.cpp +15 -0
  373. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  374. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  375. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  376. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  377. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  378. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  379. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  380. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  381. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  382. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  383. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  384. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  385. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  386. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  387. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  388. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  389. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  390. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  391. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  392. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  393. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  394. package/src/duckdb/src/parser/parser.cpp +8 -2
  395. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  396. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  397. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  398. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  399. package/src/duckdb/src/parser/query_node.cpp +15 -37
  400. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  401. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  402. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  403. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  404. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  405. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  406. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  407. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  408. package/src/duckdb/src/parser/tableref.cpp +0 -44
  409. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  410. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  411. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  412. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  413. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  414. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  415. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  416. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  417. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  418. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  419. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  420. package/src/duckdb/src/parser/transformer.cpp +15 -0
  421. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  422. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  423. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  424. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  425. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  426. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  427. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  428. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  429. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  430. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  431. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
  432. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  433. package/src/duckdb/src/planner/binder.cpp +44 -31
  434. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  435. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  436. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  437. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  438. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  439. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  440. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  441. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  442. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  443. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  444. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  445. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  446. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  447. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  448. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  449. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  450. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  451. package/src/duckdb/src/storage/data_table.cpp +1 -1
  452. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  453. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  454. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  455. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  456. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  457. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  458. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  459. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  460. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  461. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  462. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  463. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  464. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  465. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  466. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  467. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  468. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  469. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  470. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  471. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  472. package/src/duckdb/ub_src_common.cpp +2 -0
  473. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  474. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  475. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  476. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  477. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  478. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  479. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  480. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  481. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  482. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  483. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  484. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  485. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  486. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  487. package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
  488. package/src/statement.cpp +10 -3
  489. package/test/test_all_types.test.ts +233 -0
  490. package/tsconfig.json +1 -0
  491. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  492. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  493. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -1,9 +1,11 @@
1
1
  #include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
2
+
3
+ #include "duckdb/common/allocator.hpp"
4
+ #include "duckdb/common/types/batched_data_collection.hpp"
5
+ #include "duckdb/common/vector_operations/vector_operations.hpp"
2
6
  #include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
3
7
  #include "duckdb/parallel/base_pipeline_event.hpp"
4
- #include "duckdb/common/vector_operations/vector_operations.hpp"
5
- #include "duckdb/common/types/batched_data_collection.hpp"
6
- #include "duckdb/common/allocator.hpp"
8
+
7
9
  #include <algorithm>
8
10
 
9
11
  namespace duckdb {
@@ -67,7 +69,7 @@ public:
67
69
  optional_idx batch_index;
68
70
 
69
71
  void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
70
- collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), op.children[0]->types);
72
+ collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
71
73
  collection->InitializeAppend(append_state);
72
74
  }
73
75
  };
@@ -116,7 +116,7 @@ public:
116
116
  optional_idx batch_index;
117
117
 
118
118
  void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
119
- collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), op.children[0]->types);
119
+ collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
120
120
  collection->InitializeAppend(append_state);
121
121
  }
122
122
  };
@@ -353,7 +353,7 @@ void PhysicalFixedBatchCopy::RepartitionBatches(ClientContext &context, GlobalSi
353
353
  } else {
354
354
  // the collection is too large for a batch - we need to repartition
355
355
  // create an empty collection
356
- current_collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), children[0]->types);
356
+ current_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
357
357
  }
358
358
  if (current_collection) {
359
359
  current_collection->InitializeAppend(append_state);
@@ -373,7 +373,7 @@ void PhysicalFixedBatchCopy::RepartitionBatches(ClientContext &context, GlobalSi
373
373
  }
374
374
  // the collection is full - move it to the result and create a new one
375
375
  gstate.AddTask(make_uniq<PrepareBatchTask>(gstate.scheduled_batch_index++, std::move(current_collection)));
376
- current_collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), children[0]->types);
376
+ current_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
377
377
  current_collection->InitializeAppend(append_state);
378
378
  }
379
379
  }
@@ -41,7 +41,7 @@ PhysicalInsert::PhysicalInsert(vector<LogicalType> types_p, TableCatalogEntry &t
41
41
  return;
42
42
  }
43
43
 
44
- D_ASSERT(set_expressions.size() == set_columns.size());
44
+ D_ASSERT(this->set_expressions.size() == this->set_columns.size());
45
45
 
46
46
  // One or more columns are referenced from the existing table,
47
47
  // we use the 'insert_types' to figure out which types these columns have
@@ -16,6 +16,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
16
16
  pivot_map[bound_pivot.pivot_values[p]] = bound_pivot.group_count + p;
17
17
  }
18
18
  // extract the empty aggregate expressions
19
+ ArenaAllocator allocator(Allocator::DefaultAllocator());
19
20
  for (auto &aggr_expr : bound_pivot.aggregates) {
20
21
  auto &aggr = aggr_expr->Cast<BoundAggregateExpression>();
21
22
  // for each aggregate, initialize an empty aggregate state and finalize it immediately
@@ -23,7 +24,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
23
24
  aggr.function.initialize(state.get());
24
25
  Vector state_vector(Value::POINTER(CastPointerToValue(state.get())));
25
26
  Vector result_vector(aggr_expr->return_type);
26
- AggregateInputData aggr_input_data(aggr.bind_info.get(), Allocator::DefaultAllocator());
27
+ AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
27
28
  aggr.function.finalize(state_vector, aggr_input_data, result_vector, 1, 0);
28
29
  empty_aggregates.push_back(result_vector.GetValue(0));
29
30
  }
@@ -64,6 +64,9 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline &current, MetaPipeline &met
64
64
  state.SetPipelineSource(current, delim_join.distinct->Cast<PhysicalOperator>());
65
65
  return;
66
66
  }
67
+ case PhysicalOperatorType::CTE_SCAN: {
68
+ break;
69
+ }
67
70
  case PhysicalOperatorType::RECURSIVE_CTE_SCAN:
68
71
  if (!meta_pipeline.HasRecursiveCTE()) {
69
72
  throw InternalException("Recursive CTE scan found without recursive CTE node");
@@ -76,4 +79,20 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline &current, MetaPipeline &met
76
79
  state.SetPipelineSource(current, *this);
77
80
  }
78
81
 
82
+ string PhysicalColumnDataScan::ParamsToString() const {
83
+ string result = "";
84
+ switch (type) {
85
+ case PhysicalOperatorType::CTE_SCAN:
86
+ case PhysicalOperatorType::RECURSIVE_CTE_SCAN: {
87
+ result += "\n[INFOSEPARATOR]\n";
88
+ result += StringUtil::Format("idx: %llu", cte_index);
89
+ break;
90
+ }
91
+ default:
92
+ break;
93
+ }
94
+
95
+ return result;
96
+ }
97
+
79
98
  } // namespace duckdb
@@ -16,17 +16,18 @@ PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction fu
16
16
  : PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, std::move(types), estimated_cardinality),
17
17
  function(std::move(function_p)), bind_data(std::move(bind_data_p)), column_ids(std::move(column_ids_p)),
18
18
  names(std::move(names_p)), table_filters(std::move(table_filters_p)) {
19
+ extra_info.file_filters = "";
19
20
  }
20
21
 
21
22
  PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction function_p,
22
23
  unique_ptr<FunctionData> bind_data_p, vector<LogicalType> returned_types_p,
23
24
  vector<column_t> column_ids_p, vector<idx_t> projection_ids_p,
24
25
  vector<string> names_p, unique_ptr<TableFilterSet> table_filters_p,
25
- idx_t estimated_cardinality)
26
+ idx_t estimated_cardinality, ExtraOperatorInfo extra_info)
26
27
  : PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, std::move(types), estimated_cardinality),
27
28
  function(std::move(function_p)), bind_data(std::move(bind_data_p)), returned_types(std::move(returned_types_p)),
28
29
  column_ids(std::move(column_ids_p)), projection_ids(std::move(projection_ids_p)), names(std::move(names_p)),
29
- table_filters(std::move(table_filters_p)) {
30
+ table_filters(std::move(table_filters_p)), extra_info(extra_info) {
30
31
  }
31
32
 
32
33
  class TableScanGlobalSourceState : public GlobalSourceState {
@@ -149,6 +150,10 @@ string PhysicalTableScan::ParamsToString() const {
149
150
  }
150
151
  }
151
152
  }
153
+ if (!extra_info.file_filters.empty()) {
154
+ result += "\n[INFOSEPARATOR]\n";
155
+ result += "File Filters: " + extra_info.file_filters;
156
+ }
152
157
  result += "\n[INFOSEPARATOR]\n";
153
158
  result += StringUtil::Format("EC: %llu", estimated_props->GetCardinality<idx_t>());
154
159
  return result;
@@ -0,0 +1,160 @@
1
+ #include "duckdb/execution/operator/set/physical_cte.hpp"
2
+
3
+ #include "duckdb/common/types/column/column_data_collection.hpp"
4
+ #include "duckdb/common/vector_operations/vector_operations.hpp"
5
+ #include "duckdb/execution/aggregate_hashtable.hpp"
6
+ #include "duckdb/execution/executor.hpp"
7
+ #include "duckdb/parallel/event.hpp"
8
+ #include "duckdb/parallel/meta_pipeline.hpp"
9
+ #include "duckdb/parallel/pipeline.hpp"
10
+ #include "duckdb/parallel/task_scheduler.hpp"
11
+ #include "duckdb/storage/buffer_manager.hpp"
12
+
13
+ namespace duckdb {
14
+
15
+ PhysicalCTE::PhysicalCTE(string ctename, idx_t table_index, vector<LogicalType> types, unique_ptr<PhysicalOperator> top,
16
+ unique_ptr<PhysicalOperator> bottom, idx_t estimated_cardinality)
17
+ : PhysicalOperator(PhysicalOperatorType::CTE, std::move(types), estimated_cardinality), table_index(table_index),
18
+ ctename(std::move(ctename)) {
19
+ children.push_back(std::move(top));
20
+ children.push_back(std::move(bottom));
21
+ }
22
+
23
+ PhysicalCTE::~PhysicalCTE() {
24
+ }
25
+
26
+ //===--------------------------------------------------------------------===//
27
+ // Sink
28
+ //===--------------------------------------------------------------------===//
29
+ class CTEState : public GlobalSinkState {
30
+ public:
31
+ explicit CTEState(ClientContext &context, const PhysicalCTE &op)
32
+ : intermediate_table(context, op.children[1]->GetTypes()) {
33
+ }
34
+ ColumnDataCollection intermediate_table;
35
+ ColumnDataScanState scan_state;
36
+ bool initialized = false;
37
+ bool finished_scan = false;
38
+ };
39
+
40
+ unique_ptr<GlobalSinkState> PhysicalCTE::GetGlobalSinkState(ClientContext &context) const {
41
+ working_table->Reset();
42
+ return make_uniq<CTEState>(context, *this);
43
+ }
44
+
45
+ SinkResultType PhysicalCTE::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
46
+ auto &gstate = input.global_state.Cast<CTEState>();
47
+ if (!gstate.finished_scan) {
48
+ working_table->Append(chunk);
49
+ } else {
50
+ gstate.intermediate_table.Append(chunk);
51
+ }
52
+ return SinkResultType::NEED_MORE_INPUT;
53
+ }
54
+
55
+ //===--------------------------------------------------------------------===//
56
+ // Source
57
+ //===--------------------------------------------------------------------===//
58
+ SourceResultType PhysicalCTE::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const {
59
+ auto &gstate = sink_state->Cast<CTEState>();
60
+ if (!gstate.initialized) {
61
+ gstate.intermediate_table.InitializeScan(gstate.scan_state);
62
+ gstate.finished_scan = false;
63
+ gstate.initialized = true;
64
+ }
65
+ if (!gstate.finished_scan) {
66
+ gstate.finished_scan = true;
67
+ ExecuteRecursivePipelines(context);
68
+ }
69
+
70
+ gstate.intermediate_table.Scan(gstate.scan_state, chunk);
71
+
72
+ return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
73
+ }
74
+
75
+ void PhysicalCTE::ExecuteRecursivePipelines(ExecutionContext &context) const {
76
+ if (!recursive_meta_pipeline) {
77
+ throw InternalException("Missing meta pipeline for recursive CTE");
78
+ }
79
+
80
+ // get and reset pipelines
81
+ vector<shared_ptr<Pipeline>> pipelines;
82
+ recursive_meta_pipeline->GetPipelines(pipelines, true);
83
+ for (auto &pipeline : pipelines) {
84
+ auto sink = pipeline->GetSink();
85
+ if (sink.get() != this) {
86
+ sink->sink_state.reset();
87
+ }
88
+ for (auto &op_ref : pipeline->GetOperators()) {
89
+ auto &op = op_ref.get();
90
+ op.op_state.reset();
91
+ }
92
+ pipeline->ClearSource();
93
+ }
94
+
95
+ // get the MetaPipelines in the recursive_meta_pipeline and reschedule them
96
+ vector<shared_ptr<MetaPipeline>> meta_pipelines;
97
+ recursive_meta_pipeline->GetMetaPipelines(meta_pipelines, true, false);
98
+ auto &executor = recursive_meta_pipeline->GetExecutor();
99
+ vector<shared_ptr<Event>> events;
100
+ executor.ReschedulePipelines(meta_pipelines, events);
101
+
102
+ while (true) {
103
+ executor.WorkOnTasks();
104
+ if (executor.HasError()) {
105
+ executor.ThrowException();
106
+ }
107
+ bool finished = true;
108
+ for (auto &event : events) {
109
+ if (!event->IsFinished()) {
110
+ finished = false;
111
+ break;
112
+ }
113
+ }
114
+ if (finished) {
115
+ // all pipelines finished: done!
116
+ break;
117
+ }
118
+ }
119
+ }
120
+
121
+ //===--------------------------------------------------------------------===//
122
+ // Pipeline Construction
123
+ //===--------------------------------------------------------------------===//
124
+ void PhysicalCTE::BuildPipelines(Pipeline &current, MetaPipeline &meta_pipeline) {
125
+ D_ASSERT(children.size() == 2);
126
+ op_state.reset();
127
+ sink_state.reset();
128
+ recursive_meta_pipeline.reset();
129
+
130
+ auto &state = meta_pipeline.GetState();
131
+ state.SetPipelineSource(current, *this);
132
+
133
+ auto &executor = meta_pipeline.GetExecutor();
134
+ executor.AddMaterializedCTE(*this);
135
+
136
+ auto &child_meta_pipeline = meta_pipeline.CreateChildMetaPipeline(current, *this);
137
+ child_meta_pipeline.Build(*children[0]);
138
+
139
+ // the RHS is the recursive pipeline
140
+ recursive_meta_pipeline = make_shared<MetaPipeline>(executor, state, this);
141
+ if (meta_pipeline.HasRecursiveCTE()) {
142
+ recursive_meta_pipeline->SetRecursiveCTE();
143
+ }
144
+ recursive_meta_pipeline->Build(*children[1]);
145
+ }
146
+
147
+ vector<const_reference<PhysicalOperator>> PhysicalCTE::GetSources() const {
148
+ return {*this};
149
+ }
150
+
151
+ string PhysicalCTE::ParamsToString() const {
152
+ string result = "";
153
+ result += "\n[INFOSEPARATOR]\n";
154
+ result += ctename;
155
+ result += "\n[INFOSEPARATOR]\n";
156
+ result += StringUtil::Format("idx: %llu", table_index);
157
+ return result;
158
+ }
159
+
160
+ } // namespace duckdb
@@ -12,10 +12,11 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- PhysicalRecursiveCTE::PhysicalRecursiveCTE(vector<LogicalType> types, bool union_all, unique_ptr<PhysicalOperator> top,
16
- unique_ptr<PhysicalOperator> bottom, idx_t estimated_cardinality)
15
+ PhysicalRecursiveCTE::PhysicalRecursiveCTE(string ctename, idx_t table_index, vector<LogicalType> types, bool union_all,
16
+ unique_ptr<PhysicalOperator> top, unique_ptr<PhysicalOperator> bottom,
17
+ idx_t estimated_cardinality)
17
18
  : PhysicalOperator(PhysicalOperatorType::RECURSIVE_CTE, std::move(types), estimated_cardinality),
18
- union_all(union_all) {
19
+ ctename(std::move(ctename)), table_index(table_index), union_all(union_all) {
19
20
  children.push_back(std::move(top));
20
21
  children.push_back(std::move(bottom));
21
22
  }
@@ -30,8 +31,8 @@ class RecursiveCTEState : public GlobalSinkState {
30
31
  public:
31
32
  explicit RecursiveCTEState(ClientContext &context, const PhysicalRecursiveCTE &op)
32
33
  : intermediate_table(context, op.GetTypes()), new_groups(STANDARD_VECTOR_SIZE) {
33
- ht = make_uniq<GroupedAggregateHashTable>(context, Allocator::Get(context), op.types, vector<LogicalType>(),
34
- vector<BoundAggregateExpression *>());
34
+ ht = make_uniq<GroupedAggregateHashTable>(context, BufferAllocator::Get(context), op.types,
35
+ vector<LogicalType>(), vector<BoundAggregateExpression *>());
35
36
  }
36
37
 
37
38
  unique_ptr<GroupedAggregateHashTable> ht;
@@ -195,4 +196,13 @@ vector<const_reference<PhysicalOperator>> PhysicalRecursiveCTE::GetSources() con
195
196
  return {*this};
196
197
  }
197
198
 
199
+ string PhysicalRecursiveCTE::ParamsToString() const {
200
+ string result = "";
201
+ result += "\n[INFOSEPARATOR]\n";
202
+ result += ctename;
203
+ result += "\n[INFOSEPARATOR]\n";
204
+ result += StringUtil::Format("idx: %llu", table_index);
205
+ return result;
206
+ }
207
+
198
208
  } // namespace duckdb
@@ -9,10 +9,10 @@ RadixPartitionInfo::RadixPartitionInfo(const idx_t n_partitions_upper_bound)
9
9
  radix_bits(RadixPartitioning::RadixBits(n_partitions)), radix_mask(RadixPartitioning::Mask(radix_bits)),
10
10
  radix_shift(RadixPartitioning::Shift(radix_bits)) {
11
11
 
12
+ D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
12
13
  D_ASSERT(n_partitions > 0);
13
- D_ASSERT(n_partitions <= 256);
14
+ D_ASSERT(n_partitions == RadixPartitioning::NumberOfPartitions(radix_bits));
14
15
  D_ASSERT(IsPowerOfTwo(n_partitions));
15
- D_ASSERT(radix_bits <= 8);
16
16
  }
17
17
 
18
18
  PartitionableHashTable::PartitionableHashTable(ClientContext &context, Allocator &allocator,
@@ -47,11 +47,21 @@ HtEntryType PartitionableHashTable::GetHTEntrySize() {
47
47
  return HtEntryType::HT_WIDTH_32;
48
48
  }
49
49
 
50
+ bool OverMemoryLimit(ClientContext &context, const bool is_partitioned, const RadixPartitionInfo &partition_info,
51
+ const GroupedAggregateHashTable &ht) {
52
+ const auto n_partitions = is_partitioned ? partition_info.n_partitions : 1;
53
+ const auto max_memory = BufferManager::GetBufferManager(context).GetMaxMemory();
54
+ const auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
55
+ const auto memory_per_partition = 0.6 * max_memory / num_threads / n_partitions;
56
+ return ht.TotalSize() > memory_per_partition;
57
+ }
58
+
50
59
  idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes,
51
60
  DataChunk &payload, const unsafe_vector<idx_t> &filter) {
52
61
  // If this is false, a single AddChunk would overflow the max capacity
53
62
  D_ASSERT(list.empty() || groups.size() <= list.back()->MaxCapacity());
54
- if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity()) {
63
+ if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity() ||
64
+ OverMemoryLimit(context, is_partitioned, partition_info, *list.back())) {
55
65
  idx_t new_capacity = GroupedAggregateHashTable::InitialCapacity();
56
66
  if (!list.empty()) {
57
67
  new_capacity = list.back()->Capacity();
@@ -70,7 +80,7 @@ idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bo
70
80
 
71
81
  // we partition when we are asked to or when the unpartitioned ht runs out of space
72
82
  if (!IsPartitioned() && do_partition) {
73
- Partition();
83
+ Partition(false);
74
84
  }
75
85
 
76
86
  if (!IsPartitioned()) {
@@ -117,7 +127,7 @@ idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bo
117
127
  return group_count;
118
128
  }
119
129
 
120
- void PartitionableHashTable::Partition() {
130
+ void PartitionableHashTable::Partition(bool sink_done) {
121
131
  D_ASSERT(!IsPartitioned());
122
132
  D_ASSERT(radix_partitioned_hts.empty());
123
133
  D_ASSERT(partition_info.n_partitions > 1);
@@ -130,7 +140,7 @@ void PartitionableHashTable::Partition() {
130
140
  context, allocator, group_types, payload_types, bindings, GetHTEntrySize()));
131
141
  partition_hts[r] = radix_partitioned_hts[r].back().get();
132
142
  }
133
- unpartitioned_ht->Partition(partition_hts, partition_info.radix_bits);
143
+ unpartitioned_ht->Partition(partition_hts, partition_info.radix_bits, sink_done);
134
144
  unpartitioned_ht.reset();
135
145
  }
136
146
  unpartitioned_hts.clear();
@@ -153,6 +163,22 @@ HashTableList PartitionableHashTable::GetUnpartitioned() {
153
163
  return std::move(unpartitioned_hts);
154
164
  }
155
165
 
166
+ idx_t PartitionableHashTable::GetPartitionCount(idx_t partition) const {
167
+ idx_t total_size = 0;
168
+ for (const auto &ht : radix_partitioned_hts[partition]) {
169
+ total_size += ht->Count();
170
+ }
171
+ return total_size;
172
+ }
173
+
174
+ idx_t PartitionableHashTable::GetPartitionSize(idx_t partition) const {
175
+ idx_t total_size = 0;
176
+ for (const auto &ht : radix_partitioned_hts[partition]) {
177
+ total_size += ht->DataSize();
178
+ }
179
+ return total_size;
180
+ }
181
+
156
182
  void PartitionableHashTable::Finalize() {
157
183
  if (IsPartitioned()) {
158
184
  for (auto &ht_list : radix_partitioned_hts) {
@@ -169,4 +195,13 @@ void PartitionableHashTable::Finalize() {
169
195
  }
170
196
  }
171
197
 
198
+ void PartitionableHashTable::Append(GroupedAggregateHashTable &ht) {
199
+ if (unpartitioned_hts.empty()) {
200
+ unpartitioned_hts.push_back(make_uniq<GroupedAggregateHashTable>(context, allocator, group_types, payload_types,
201
+ bindings, GetHTEntrySize(),
202
+ GroupedAggregateHashTable::InitialCapacity()));
203
+ }
204
+ unpartitioned_hts.back()->Append(ht);
205
+ }
206
+
172
207
  } // namespace duckdb
@@ -1,6 +1,7 @@
1
1
  #include "duckdb/execution/perfect_aggregate_hashtable.hpp"
2
- #include "duckdb/execution/expression_executor.hpp"
2
+
3
3
  #include "duckdb/common/row_operations/row_operations.hpp"
4
+ #include "duckdb/execution/expression_executor.hpp"
4
5
 
5
6
  namespace duckdb {
6
7
 
@@ -93,6 +94,18 @@ static void ComputeGroupLocation(Vector &group, Value &min, uintptr_t *address_d
93
94
  case PhysicalType::INT64:
94
95
  ComputeGroupLocationTemplated<int64_t>(vdata, min, address_data, current_shift, count);
95
96
  break;
97
+ case PhysicalType::UINT8:
98
+ ComputeGroupLocationTemplated<uint8_t>(vdata, min, address_data, current_shift, count);
99
+ break;
100
+ case PhysicalType::UINT16:
101
+ ComputeGroupLocationTemplated<uint16_t>(vdata, min, address_data, current_shift, count);
102
+ break;
103
+ case PhysicalType::UINT32:
104
+ ComputeGroupLocationTemplated<uint32_t>(vdata, min, address_data, current_shift, count);
105
+ break;
106
+ case PhysicalType::UINT64:
107
+ ComputeGroupLocationTemplated<uint64_t>(vdata, min, address_data, current_shift, count);
108
+ break;
96
109
  default:
97
110
  throw InternalException("Unsupported group type for perfect aggregate hash table");
98
111
  }
@@ -123,7 +136,7 @@ void PerfectAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload)
123
136
  // after finding the group location we update the aggregates
124
137
  idx_t payload_idx = 0;
125
138
  auto &aggregates = layout.GetAggregates();
126
- RowOperationsState row_state(aggregate_allocator.GetAllocator());
139
+ RowOperationsState row_state(aggregate_allocator);
127
140
  for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) {
128
141
  auto &aggregate = aggregates[aggr_idx];
129
142
  auto input_count = (idx_t)aggregate.child_count;
@@ -152,7 +165,7 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) {
152
165
  data_ptr_t source_ptr = other.data;
153
166
  data_ptr_t target_ptr = data;
154
167
  idx_t combine_count = 0;
155
- RowOperationsState row_state(aggregate_allocator.GetAllocator());
168
+ RowOperationsState row_state(aggregate_allocator);
156
169
  for (idx_t i = 0; i < total_groups; i++) {
157
170
  auto has_entry_source = other.group_is_set[i];
158
171
  // we only have any work to do if the source has an entry for this group
@@ -208,6 +221,18 @@ static void ReconstructGroupVector(uint32_t group_values[], Value &min, idx_t re
208
221
  case PhysicalType::INT64:
209
222
  ReconstructGroupVectorTemplated<int64_t>(group_values, min, mask, shift, entry_count, result);
210
223
  break;
224
+ case PhysicalType::UINT8:
225
+ ReconstructGroupVectorTemplated<uint8_t>(group_values, min, mask, shift, entry_count, result);
226
+ break;
227
+ case PhysicalType::UINT16:
228
+ ReconstructGroupVectorTemplated<uint16_t>(group_values, min, mask, shift, entry_count, result);
229
+ break;
230
+ case PhysicalType::UINT32:
231
+ ReconstructGroupVectorTemplated<uint32_t>(group_values, min, mask, shift, entry_count, result);
232
+ break;
233
+ case PhysicalType::UINT64:
234
+ ReconstructGroupVectorTemplated<uint64_t>(group_values, min, mask, shift, entry_count, result);
235
+ break;
211
236
  default:
212
237
  throw InternalException("Invalid type for perfect aggregate HT group");
213
238
  }
@@ -243,7 +268,7 @@ void PerfectAggregateHashTable::Scan(idx_t &scan_position, DataChunk &result) {
243
268
  }
244
269
  // then construct the payloads
245
270
  result.SetCardinality(entry_count);
246
- RowOperationsState row_state(aggregate_allocator.GetAllocator());
271
+ RowOperationsState row_state(aggregate_allocator);
247
272
  RowOperations::FinalizeStates(row_state, layout, addresses, result, grouping_columns);
248
273
  }
249
274
 
@@ -264,7 +289,7 @@ void PerfectAggregateHashTable::Destroy() {
264
289
  idx_t count = 0;
265
290
 
266
291
  // iterate over all initialised slots of the hash table
267
- RowOperationsState row_state(aggregate_allocator.GetAllocator());
292
+ RowOperationsState row_state(aggregate_allocator);
268
293
  data_ptr_t payload_ptr = data;
269
294
  for (idx_t i = 0; i < total_groups; i++) {
270
295
  if (group_is_set[i]) {
@@ -5,12 +5,12 @@
5
5
  #include "duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp"
6
6
  #include "duckdb/execution/operator/projection/physical_projection.hpp"
7
7
  #include "duckdb/execution/physical_plan_generator.hpp"
8
+ #include "duckdb/function/function_binder.hpp"
8
9
  #include "duckdb/main/client_context.hpp"
9
10
  #include "duckdb/parser/expression/comparison_expression.hpp"
10
11
  #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
11
- #include "duckdb/planner/operator/logical_aggregate.hpp"
12
- #include "duckdb/function/function_binder.hpp"
13
12
  #include "duckdb/planner/expression/bound_reference_expression.hpp"
13
+ #include "duckdb/planner/operator/logical_aggregate.hpp"
14
14
 
15
15
  namespace duckdb {
16
16
 
@@ -23,6 +23,11 @@ static uint32_t RequiredBitsForValue(uint32_t n) {
23
23
  return required_bits;
24
24
  }
25
25
 
26
+ template <class T>
27
+ hugeint_t GetRangeHugeint(const BaseStatistics &nstats) {
28
+ return Hugeint::Convert(NumericStats::GetMax<T>(nstats)) - Hugeint::Convert(NumericStats::GetMin<T>(nstats));
29
+ }
30
+
26
31
  static bool CanUsePerfectHashAggregate(ClientContext &context, LogicalAggregate &op, vector<idx_t> &bits_per_group) {
27
32
  if (op.grouping_sets.size() > 1 || !op.grouping_functions.empty()) {
28
33
  return false;
@@ -40,6 +45,10 @@ static bool CanUsePerfectHashAggregate(ClientContext &context, LogicalAggregate
40
45
  case PhysicalType::INT16:
41
46
  case PhysicalType::INT32:
42
47
  case PhysicalType::INT64:
48
+ case PhysicalType::UINT8:
49
+ case PhysicalType::UINT16:
50
+ case PhysicalType::UINT32:
51
+ case PhysicalType::UINT64:
43
52
  break;
44
53
  default:
45
54
  // we only support simple integer types for perfect hashing
@@ -53,6 +62,8 @@ static bool CanUsePerfectHashAggregate(ClientContext &context, LogicalAggregate
53
62
  switch (group_type.InternalType()) {
54
63
  case PhysicalType::INT8:
55
64
  case PhysicalType::INT16:
65
+ case PhysicalType::UINT8:
66
+ case PhysicalType::UINT16:
56
67
  break;
57
68
  default:
58
69
  // type is too large and there are no stats: skip perfect hashing
@@ -68,33 +79,55 @@ static bool CanUsePerfectHashAggregate(ClientContext &context, LogicalAggregate
68
79
  if (!NumericStats::HasMinMax(nstats)) {
69
80
  return false;
70
81
  }
82
+
83
+ if (NumericStats::Max(*stats) < NumericStats::Min(*stats)) {
84
+ // May result in underflow
85
+ return false;
86
+ }
87
+
71
88
  // we have a min and a max value for the stats: use that to figure out how many bits we have
72
89
  // we add two here, one for the NULL value, and one to make the computation one-indexed
73
90
  // (e.g. if min and max are the same, we still need one entry in total)
74
- int64_t range;
91
+ hugeint_t range_h;
75
92
  switch (group_type.InternalType()) {
76
93
  case PhysicalType::INT8:
77
- range = int64_t(NumericStats::GetMax<int8_t>(nstats)) - int64_t(NumericStats::GetMin<int8_t>(nstats));
94
+ range_h = GetRangeHugeint<int8_t>(nstats);
78
95
  break;
79
96
  case PhysicalType::INT16:
80
- range = int64_t(NumericStats::GetMax<int16_t>(nstats)) - int64_t(NumericStats::GetMin<int16_t>(nstats));
97
+ range_h = GetRangeHugeint<int16_t>(nstats);
81
98
  break;
82
99
  case PhysicalType::INT32:
83
- range = int64_t(NumericStats::GetMax<int32_t>(nstats)) - int64_t(NumericStats::GetMin<int32_t>(nstats));
100
+ range_h = GetRangeHugeint<int32_t>(nstats);
84
101
  break;
85
102
  case PhysicalType::INT64:
86
- if (!TrySubtractOperator::Operation(NumericStats::GetMax<int64_t>(nstats),
87
- NumericStats::GetMin<int64_t>(nstats), range)) {
88
- return false;
89
- }
103
+ range_h = GetRangeHugeint<int64_t>(nstats);
104
+ break;
105
+ case PhysicalType::UINT8:
106
+ range_h = GetRangeHugeint<uint8_t>(nstats);
107
+ break;
108
+ case PhysicalType::UINT16:
109
+ range_h = GetRangeHugeint<uint16_t>(nstats);
110
+ break;
111
+ case PhysicalType::UINT32:
112
+ range_h = GetRangeHugeint<uint32_t>(nstats);
113
+ break;
114
+ case PhysicalType::UINT64:
115
+ range_h = GetRangeHugeint<uint64_t>(nstats);
90
116
  break;
91
117
  default:
92
118
  throw InternalException("Unsupported type for perfect hash (should be caught before)");
93
119
  }
120
+
121
+ uint64_t range;
122
+ if (!Hugeint::TryCast(range_h, range)) {
123
+ return false;
124
+ }
125
+
94
126
  // bail out on any range bigger than 2^32
95
127
  if (range >= NumericLimits<int32_t>::Maximum()) {
96
128
  return false;
97
129
  }
130
+
98
131
  range += 2;
99
132
  // figure out how many bits we need
100
133
  idx_t required_bits = RequiredBitsForValue(range);