duckdb 0.7.2-dev12.0 → 0.7.2-dev1238.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (631) hide show
  1. package/binding.gyp +12 -7
  2. package/lib/duckdb.d.ts +55 -2
  3. package/lib/duckdb.js +20 -1
  4. package/package.json +1 -1
  5. package/src/connection.cpp +1 -2
  6. package/src/database.cpp +1 -1
  7. package/src/duckdb/extension/icu/icu-extension.cpp +4 -0
  8. package/src/duckdb/extension/icu/icu-list-range.cpp +207 -0
  9. package/src/duckdb/extension/icu/icu-table-range.cpp +194 -0
  10. package/src/duckdb/extension/icu/include/icu-list-range.hpp +17 -0
  11. package/src/duckdb/extension/icu/include/icu-table-range.hpp +17 -0
  12. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  13. package/src/duckdb/extension/json/include/json_common.hpp +1 -0
  14. package/src/duckdb/extension/json/include/json_functions.hpp +2 -0
  15. package/src/duckdb/extension/json/include/json_serializer.hpp +77 -0
  16. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +147 -0
  17. package/src/duckdb/extension/json/json_functions/read_json.cpp +6 -5
  18. package/src/duckdb/extension/json/json_functions.cpp +12 -4
  19. package/src/duckdb/extension/json/json_scan.cpp +2 -2
  20. package/src/duckdb/extension/json/json_serializer.cpp +217 -0
  21. package/src/duckdb/extension/parquet/column_reader.cpp +94 -15
  22. package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
  23. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
  24. package/src/duckdb/extension/parquet/include/decode_utils.hpp +5 -4
  25. package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
  26. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +2 -1
  27. package/src/duckdb/extension/parquet/parquet-extension.cpp +14 -3
  28. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -1
  29. package/src/duckdb/extension/parquet/parquet_statistics.cpp +49 -36
  30. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +16 -6
  31. package/src/duckdb/src/catalog/catalog.cpp +34 -5
  32. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +4 -0
  33. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -21
  34. package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +7 -6
  35. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +3 -3
  36. package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +20 -1
  37. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +8 -2
  38. package/src/duckdb/src/catalog/catalog_set.cpp +1 -0
  39. package/src/duckdb/src/catalog/default/default_functions.cpp +3 -0
  40. package/src/duckdb/src/catalog/dependency_list.cpp +12 -0
  41. package/src/duckdb/src/catalog/duck_catalog.cpp +34 -7
  42. package/src/duckdb/src/common/arrow/arrow_appender.cpp +48 -4
  43. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  44. package/src/duckdb/src/common/box_renderer.cpp +109 -23
  45. package/src/duckdb/src/common/enums/expression_type.cpp +8 -222
  46. package/src/duckdb/src/common/enums/join_type.cpp +3 -22
  47. package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
  48. package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
  49. package/src/duckdb/src/common/exception.cpp +15 -1
  50. package/src/duckdb/src/common/field_writer.cpp +1 -0
  51. package/src/duckdb/src/common/hive_partitioning.cpp +3 -1
  52. package/src/duckdb/src/common/local_file_system.cpp +64 -7
  53. package/src/duckdb/src/common/operator/cast_operators.cpp +1 -1
  54. package/src/duckdb/src/common/preserved_error.cpp +7 -5
  55. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +7 -0
  56. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +4 -0
  57. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +15 -2
  58. package/src/duckdb/src/common/serializer/enum_serializer.cpp +1176 -0
  59. package/src/duckdb/src/common/sort/comparators.cpp +14 -5
  60. package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
  61. package/src/duckdb/src/common/sort/sorted_block.cpp +0 -1
  62. package/src/duckdb/src/common/string_util.cpp +18 -1
  63. package/src/duckdb/src/common/types/bit.cpp +166 -87
  64. package/src/duckdb/src/common/types/blob.cpp +1 -1
  65. package/src/duckdb/src/common/types/chunk_collection.cpp +2 -2
  66. package/src/duckdb/src/common/types/column_data_collection.cpp +39 -2
  67. package/src/duckdb/src/common/types/column_data_collection_segment.cpp +12 -10
  68. package/src/duckdb/src/common/types/data_chunk.cpp +1 -1
  69. package/src/duckdb/src/common/types/interval.cpp +0 -41
  70. package/src/duckdb/src/common/types/list_segment.cpp +658 -0
  71. package/src/duckdb/src/common/types/string_heap.cpp +1 -1
  72. package/src/duckdb/src/common/types/string_type.cpp +1 -1
  73. package/src/duckdb/src/common/types/time.cpp +13 -0
  74. package/src/duckdb/src/common/types/validity_mask.cpp +24 -7
  75. package/src/duckdb/src/common/types/value.cpp +320 -154
  76. package/src/duckdb/src/common/types/vector.cpp +158 -134
  77. package/src/duckdb/src/common/types.cpp +313 -153
  78. package/src/duckdb/src/common/value_operations/comparison_operations.cpp +14 -22
  79. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +10 -10
  80. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +11 -10
  81. package/src/duckdb/src/common/vector_operations/vector_cast.cpp +2 -1
  82. package/src/duckdb/src/execution/aggregate_hashtable.cpp +98 -74
  83. package/src/duckdb/src/execution/column_binding_resolver.cpp +21 -5
  84. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -1
  85. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
  86. package/src/duckdb/src/execution/index/art/art.cpp +19 -5
  87. package/src/duckdb/src/execution/join_hashtable.cpp +3 -1
  88. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +1 -1
  89. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
  90. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +117 -26
  91. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +3 -0
  92. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +5 -3
  93. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -17
  94. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +2 -0
  95. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +2 -2
  96. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +13 -4
  97. package/src/duckdb/src/execution/operator/join/physical_join.cpp +0 -3
  98. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +6 -11
  99. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +3 -1
  100. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +11 -4
  101. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +24 -19
  102. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +3 -0
  103. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -1
  104. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +2 -2
  105. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -3
  106. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -0
  107. package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +34 -0
  108. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +20 -5
  109. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +20 -40
  110. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +2 -5
  111. package/src/duckdb/src/execution/partitionable_hashtable.cpp +20 -5
  112. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +22 -16
  113. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +97 -0
  114. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +95 -47
  115. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +2 -1
  116. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +5 -8
  117. package/src/duckdb/src/execution/physical_plan/plan_positional_join.cpp +14 -5
  118. package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -0
  119. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +23 -15
  120. package/src/duckdb/src/execution/window_segment_tree.cpp +173 -1
  121. package/src/duckdb/src/function/aggregate/algebraic/avg.cpp +0 -6
  122. package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +99 -95
  123. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +269 -0
  124. package/src/duckdb/src/function/aggregate/distributive/bool.cpp +2 -0
  125. package/src/duckdb/src/function/aggregate/distributive/count.cpp +3 -4
  126. package/src/duckdb/src/function/aggregate/distributive/first.cpp +1 -0
  127. package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -0
  128. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +19 -16
  129. package/src/duckdb/src/function/aggregate/distributive_functions.cpp +1 -0
  130. package/src/duckdb/src/function/aggregate/holistic/approximate_quantile.cpp +5 -2
  131. package/src/duckdb/src/function/aggregate/holistic/mode.cpp +1 -1
  132. package/src/duckdb/src/function/aggregate/holistic/quantile.cpp +16 -1
  133. package/src/duckdb/src/function/aggregate/nested/list.cpp +6 -712
  134. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +138 -45
  135. package/src/duckdb/src/function/cast/bit_cast.cpp +0 -2
  136. package/src/duckdb/src/function/cast/blob_cast.cpp +0 -1
  137. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -1
  138. package/src/duckdb/src/function/cast/enum_casts.cpp +25 -3
  139. package/src/duckdb/src/function/cast/list_casts.cpp +17 -4
  140. package/src/duckdb/src/function/cast/map_cast.cpp +5 -2
  141. package/src/duckdb/src/function/cast/string_cast.cpp +36 -10
  142. package/src/duckdb/src/function/cast/struct_cast.cpp +24 -4
  143. package/src/duckdb/src/function/cast/time_casts.cpp +2 -2
  144. package/src/duckdb/src/function/cast/union_casts.cpp +33 -7
  145. package/src/duckdb/src/function/cast_rules.cpp +9 -4
  146. package/src/duckdb/src/function/function_binder.cpp +1 -8
  147. package/src/duckdb/src/function/pragma/pragma_queries.cpp +24 -1
  148. package/src/duckdb/src/function/scalar/bit/bitstring.cpp +100 -0
  149. package/src/duckdb/src/function/scalar/date/current.cpp +0 -2
  150. package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
  151. package/src/duckdb/src/function/scalar/date/date_part.cpp +18 -26
  152. package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
  153. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
  154. package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
  155. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +4 -146
  156. package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
  157. package/src/duckdb/src/function/scalar/list/list_aggregates.cpp +1 -1
  158. package/src/duckdb/src/function/scalar/list/list_concat.cpp +8 -12
  159. package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
  160. package/src/duckdb/src/function/scalar/list/list_lambdas.cpp +7 -3
  161. package/src/duckdb/src/function/scalar/list/list_sort.cpp +25 -18
  162. package/src/duckdb/src/function/scalar/list/list_value.cpp +6 -10
  163. package/src/duckdb/src/function/scalar/map/map.cpp +47 -1
  164. package/src/duckdb/src/function/scalar/map/map_entries.cpp +61 -0
  165. package/src/duckdb/src/function/scalar/map/map_extract.cpp +68 -26
  166. package/src/duckdb/src/function/scalar/map/map_keys_values.cpp +97 -0
  167. package/src/duckdb/src/function/scalar/math/numeric.cpp +101 -17
  168. package/src/duckdb/src/function/scalar/math_functions.cpp +3 -0
  169. package/src/duckdb/src/function/scalar/nested_functions.cpp +3 -0
  170. package/src/duckdb/src/function/scalar/operators/add.cpp +0 -9
  171. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +29 -48
  172. package/src/duckdb/src/function/scalar/operators/bitwise.cpp +0 -63
  173. package/src/duckdb/src/function/scalar/operators/multiply.cpp +5 -6
  174. package/src/duckdb/src/function/scalar/operators/subtract.cpp +0 -6
  175. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
  176. package/src/duckdb/src/function/scalar/string/hex.cpp +201 -0
  177. package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
  178. package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
  179. package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
  180. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +243 -0
  181. package/src/duckdb/src/function/scalar/string/regexp/regexp_util.cpp +79 -0
  182. package/src/duckdb/src/function/scalar/string/regexp.cpp +21 -80
  183. package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
  184. package/src/duckdb/src/function/scalar/string_functions.cpp +2 -0
  185. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +5 -10
  186. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +11 -14
  187. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +6 -7
  188. package/src/duckdb/src/function/table/arrow.cpp +5 -2
  189. package/src/duckdb/src/function/table/arrow_conversion.cpp +25 -1
  190. package/src/duckdb/src/function/table/checkpoint.cpp +5 -1
  191. package/src/duckdb/src/function/table/read_csv.cpp +60 -0
  192. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +2 -2
  193. package/src/duckdb/src/function/table/system/test_all_types.cpp +2 -2
  194. package/src/duckdb/src/function/table/table_scan.cpp +9 -12
  195. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  196. package/src/duckdb/src/function/table_function.cpp +30 -11
  197. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +6 -0
  198. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
  199. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +6 -8
  200. package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +3 -0
  201. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -1
  202. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +8 -2
  203. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -19
  204. package/src/duckdb/src/include/duckdb/common/enums/aggregate_handling.hpp +2 -0
  205. package/src/duckdb/src/include/duckdb/common/enums/expression_type.hpp +2 -3
  206. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +7 -4
  207. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
  208. package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +2 -0
  209. package/src/duckdb/src/include/duckdb/common/enums/set_operation_type.hpp +2 -1
  210. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -1
  211. package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
  212. package/src/duckdb/src/include/duckdb/common/exception.hpp +69 -2
  213. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +12 -4
  214. package/src/duckdb/src/include/duckdb/common/helper.hpp +1 -1
  215. package/src/duckdb/src/include/duckdb/common/{http_stats.hpp → http_state.hpp} +18 -4
  216. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +45 -149
  217. package/src/duckdb/src/include/duckdb/common/operator/multiply.hpp +2 -0
  218. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +45 -0
  219. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +6 -1
  220. package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +2 -0
  221. package/src/duckdb/src/include/duckdb/common/serializer/buffered_deserializer.hpp +4 -2
  222. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +8 -2
  223. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +113 -0
  224. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +336 -0
  225. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +268 -0
  226. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +126 -0
  227. package/src/duckdb/src/include/duckdb/common/serializer.hpp +13 -0
  228. package/src/duckdb/src/include/duckdb/common/string_util.hpp +27 -0
  229. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +12 -7
  230. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +39 -3
  231. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +70 -0
  232. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +73 -3
  233. package/src/duckdb/src/include/duckdb/common/types/time.hpp +3 -0
  234. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
  235. package/src/duckdb/src/include/duckdb/common/types/value.hpp +17 -48
  236. package/src/duckdb/src/include/duckdb/common/types/value_map.hpp +1 -1
  237. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -1
  238. package/src/duckdb/src/include/duckdb/common/types.hpp +45 -8
  239. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +2 -2
  240. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +35 -20
  241. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +3 -14
  242. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  243. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +2 -0
  244. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +1 -0
  245. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +10 -0
  246. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +5 -0
  247. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  248. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +1 -3
  249. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +54 -0
  250. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +5 -0
  251. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +18 -6
  252. package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +84 -0
  253. package/src/duckdb/src/include/duckdb/function/cast/cast_function_set.hpp +2 -2
  254. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +28 -64
  255. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -6
  256. package/src/duckdb/src/include/duckdb/function/scalar/bit_functions.hpp +4 -0
  257. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +138 -0
  258. package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +8 -0
  259. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +59 -0
  260. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +81 -1
  261. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
  262. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
  263. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +12 -1
  264. package/src/duckdb/src/include/duckdb/function/table_function.hpp +10 -0
  265. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
  266. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  267. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -3
  268. package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
  269. package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +2 -0
  270. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -0
  271. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
  272. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +2 -0
  273. package/src/duckdb/src/include/duckdb/main/relation/explain_relation.hpp +2 -1
  274. package/src/duckdb/src/include/duckdb/main/relation.hpp +2 -1
  275. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
  276. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
  277. package/src/duckdb/src/include/duckdb/optimizer/rule/list.hpp +1 -0
  278. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +24 -0
  279. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +4 -0
  280. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  281. package/src/duckdb/src/include/duckdb/parser/expression/bound_expression.hpp +2 -0
  282. package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +5 -0
  283. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +2 -0
  284. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +2 -0
  285. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +2 -0
  286. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -0
  287. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +2 -0
  288. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  289. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  290. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -2
  291. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +2 -0
  292. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +2 -0
  293. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +2 -0
  294. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +2 -0
  295. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +4 -2
  296. package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +2 -0
  297. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +5 -0
  298. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +5 -1
  299. package/src/duckdb/src/include/duckdb/parser/parsed_data/{alter_function_info.hpp → alter_scalar_function_info.hpp} +13 -13
  300. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +47 -0
  301. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +6 -0
  302. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_function_info.hpp +2 -1
  303. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +2 -0
  304. package/src/duckdb/src/include/duckdb/parser/parsed_expression.hpp +5 -0
  305. package/src/duckdb/src/include/duckdb/parser/query_node/recursive_cte_node.hpp +3 -0
  306. package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +5 -0
  307. package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +3 -0
  308. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +13 -2
  309. package/src/duckdb/src/include/duckdb/parser/result_modifier.hpp +24 -1
  310. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -1
  311. package/src/duckdb/src/include/duckdb/parser/statement/multi_statement.hpp +28 -0
  312. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +6 -1
  313. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +4 -0
  314. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +2 -0
  315. package/src/duckdb/src/include/duckdb/parser/tableref/expressionlistref.hpp +3 -0
  316. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +3 -0
  317. package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
  318. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +87 -0
  319. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  320. package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +3 -0
  321. package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
  322. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -0
  323. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +33 -0
  324. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
  325. package/src/duckdb/src/include/duckdb/planner/binder.hpp +15 -4
  326. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +3 -0
  327. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
  328. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +64 -0
  329. package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -2
  330. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -1
  331. package/src/duckdb/src/include/duckdb/planner/expression_binder/qualify_binder.hpp +2 -2
  332. package/src/duckdb/src/include/duckdb/planner/expression_binder/select_binder.hpp +9 -38
  333. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +1 -1
  334. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  335. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -0
  336. package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +22 -0
  337. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -2
  338. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +3 -0
  339. package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +8 -2
  340. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +2 -0
  341. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +76 -44
  342. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
  343. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +1 -1
  344. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
  345. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  346. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +2 -1
  347. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
  348. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  349. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +2 -1
  350. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +4 -3
  351. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +4 -3
  352. package/src/duckdb/src/include/duckdb/storage/index.hpp +5 -4
  353. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +7 -0
  354. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -29
  355. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
  356. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +8 -6
  357. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
  358. package/src/duckdb/src/include/duckdb/storage/statistics/node_statistics.hpp +26 -0
  359. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +114 -0
  360. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +62 -0
  361. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
  362. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
  363. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
  364. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
  365. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
  366. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +21 -7
  367. package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +3 -2
  368. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +5 -6
  369. package/src/duckdb/src/include/duckdb/storage/table/column_segment_tree.hpp +18 -0
  370. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
  371. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +6 -3
  372. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +41 -45
  373. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +23 -7
  374. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +35 -0
  375. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +21 -29
  376. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +6 -6
  377. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +281 -26
  378. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +0 -4
  379. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  380. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +0 -1
  381. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +1 -1
  382. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +6 -3
  383. package/src/duckdb/src/include/duckdb.h +71 -2
  384. package/src/duckdb/src/include/duckdb.hpp +0 -1
  385. package/src/duckdb/src/main/capi/pending-c.cpp +16 -3
  386. package/src/duckdb/src/main/capi/result-c.cpp +27 -1
  387. package/src/duckdb/src/main/capi/stream-c.cpp +25 -0
  388. package/src/duckdb/src/main/capi/table_function-c.cpp +23 -0
  389. package/src/duckdb/src/main/client_context.cpp +38 -34
  390. package/src/duckdb/src/main/client_data.cpp +7 -6
  391. package/src/duckdb/src/main/config.cpp +70 -1
  392. package/src/duckdb/src/main/database.cpp +19 -2
  393. package/src/duckdb/src/main/extension/extension_install.cpp +7 -2
  394. package/src/duckdb/src/main/prepared_statement.cpp +4 -0
  395. package/src/duckdb/src/main/query_profiler.cpp +17 -15
  396. package/src/duckdb/src/main/relation/explain_relation.cpp +3 -3
  397. package/src/duckdb/src/main/relation.cpp +3 -2
  398. package/src/duckdb/src/main/settings/settings.cpp +20 -8
  399. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  400. package/src/duckdb/src/optimizer/deliminator.cpp +1 -1
  401. package/src/duckdb/src/optimizer/filter_combiner.cpp +3 -6
  402. package/src/duckdb/src/optimizer/filter_pullup.cpp +3 -1
  403. package/src/duckdb/src/optimizer/filter_pushdown.cpp +14 -8
  404. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +107 -71
  405. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +32 -12
  406. package/src/duckdb/src/optimizer/optimizer.cpp +1 -0
  407. package/src/duckdb/src/optimizer/pullup/pullup_from_left.cpp +2 -2
  408. package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +33 -5
  409. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -1
  410. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +3 -0
  411. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +5 -12
  412. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +2 -2
  413. package/src/duckdb/src/optimizer/pushdown/pushdown_single_join.cpp +1 -1
  414. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +1 -0
  415. package/src/duckdb/src/optimizer/rule/move_constants.cpp +10 -4
  416. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +30 -0
  417. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +9 -2
  418. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
  419. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
  420. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
  421. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  422. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
  423. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
  424. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
  425. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
  426. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
  427. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
  428. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +29 -32
  429. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +5 -5
  430. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
  431. package/src/duckdb/src/optimizer/statistics_propagator.cpp +2 -1
  432. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +2 -2
  433. package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -7
  434. package/src/duckdb/src/parser/common_table_expression_info.cpp +19 -0
  435. package/src/duckdb/src/parser/expression/between_expression.cpp +17 -0
  436. package/src/duckdb/src/parser/expression/case_expression.cpp +28 -0
  437. package/src/duckdb/src/parser/expression/cast_expression.cpp +17 -0
  438. package/src/duckdb/src/parser/expression/collate_expression.cpp +16 -0
  439. package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -0
  440. package/src/duckdb/src/parser/expression/comparison_expression.cpp +16 -0
  441. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +17 -0
  442. package/src/duckdb/src/parser/expression/constant_expression.cpp +14 -0
  443. package/src/duckdb/src/parser/expression/default_expression.cpp +7 -0
  444. package/src/duckdb/src/parser/expression/function_expression.cpp +35 -0
  445. package/src/duckdb/src/parser/expression/lambda_expression.cpp +16 -0
  446. package/src/duckdb/src/parser/expression/operator_expression.cpp +15 -0
  447. package/src/duckdb/src/parser/expression/parameter_expression.cpp +15 -0
  448. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +14 -0
  449. package/src/duckdb/src/parser/expression/star_expression.cpp +26 -6
  450. package/src/duckdb/src/parser/expression/subquery_expression.cpp +20 -0
  451. package/src/duckdb/src/parser/expression/window_expression.cpp +43 -0
  452. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +7 -3
  453. package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +56 -0
  454. package/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +51 -0
  455. package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
  456. package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +6 -0
  457. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +22 -10
  458. package/src/duckdb/src/parser/parsed_expression.cpp +72 -0
  459. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +15 -1
  460. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +21 -0
  461. package/src/duckdb/src/parser/query_node/select_node.cpp +31 -0
  462. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +17 -0
  463. package/src/duckdb/src/parser/query_node.cpp +51 -1
  464. package/src/duckdb/src/parser/result_modifier.cpp +78 -0
  465. package/src/duckdb/src/parser/statement/multi_statement.cpp +18 -0
  466. package/src/duckdb/src/parser/statement/select_statement.cpp +12 -0
  467. package/src/duckdb/src/parser/tableref/basetableref.cpp +21 -0
  468. package/src/duckdb/src/parser/tableref/emptytableref.cpp +4 -0
  469. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +17 -0
  470. package/src/duckdb/src/parser/tableref/joinref.cpp +29 -0
  471. package/src/duckdb/src/parser/tableref/pivotref.cpp +373 -0
  472. package/src/duckdb/src/parser/tableref/subqueryref.cpp +15 -0
  473. package/src/duckdb/src/parser/tableref/table_function.cpp +17 -0
  474. package/src/duckdb/src/parser/tableref.cpp +49 -0
  475. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +11 -0
  476. package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +1 -1
  477. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +17 -2
  478. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +85 -42
  479. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
  480. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
  481. package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +12 -6
  482. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +24 -0
  483. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +7 -0
  484. package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +0 -7
  485. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +3 -2
  486. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +4 -0
  487. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +4 -0
  488. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +179 -0
  489. package/src/duckdb/src/parser/transform/statement/transform_rename.cpp +3 -4
  490. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -0
  491. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +2 -3
  492. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +12 -1
  493. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +121 -0
  494. package/src/duckdb/src/parser/transform/tableref/transform_tableref.cpp +2 -0
  495. package/src/duckdb/src/parser/transformer.cpp +15 -3
  496. package/src/duckdb/src/planner/bind_context.cpp +18 -25
  497. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +9 -7
  498. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +4 -3
  499. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +23 -12
  500. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +3 -2
  501. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +176 -0
  502. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +4 -0
  503. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +163 -24
  504. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +2 -2
  505. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +109 -94
  506. package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +11 -0
  507. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +9 -4
  508. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +5 -3
  509. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +3 -2
  510. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +10 -1
  511. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  512. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +12 -8
  513. package/src/duckdb/src/planner/binder/statement/bind_logical_plan.cpp +17 -0
  514. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +4 -2
  515. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +19 -3
  516. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +366 -0
  517. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +11 -1
  518. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -0
  519. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -13
  520. package/src/duckdb/src/planner/binder.cpp +19 -24
  521. package/src/duckdb/src/planner/bound_result_modifier.cpp +27 -1
  522. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +9 -2
  523. package/src/duckdb/src/planner/expression/bound_expression.cpp +4 -0
  524. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
  525. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +146 -0
  526. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +6 -3
  527. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +3 -3
  528. package/src/duckdb/src/planner/expression_binder/select_binder.cpp +1 -132
  529. package/src/duckdb/src/planner/expression_binder.cpp +10 -3
  530. package/src/duckdb/src/planner/expression_iterator.cpp +17 -10
  531. package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
  532. package/src/duckdb/src/planner/logical_operator.cpp +7 -2
  533. package/src/duckdb/src/planner/logical_operator_visitor.cpp +6 -0
  534. package/src/duckdb/src/planner/operator/logical_asof_join.cpp +8 -0
  535. package/src/duckdb/src/planner/operator/logical_distinct.cpp +3 -0
  536. package/src/duckdb/src/planner/planner.cpp +2 -1
  537. package/src/duckdb/src/planner/pragma_handler.cpp +10 -2
  538. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +3 -1
  539. package/src/duckdb/src/storage/buffer_manager.cpp +44 -46
  540. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  541. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +4 -15
  542. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +10 -4
  543. package/src/duckdb/src/storage/checkpoint_manager.cpp +9 -3
  544. package/src/duckdb/src/storage/compression/bitpacking.cpp +29 -25
  545. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +45 -46
  546. package/src/duckdb/src/storage/compression/numeric_constant.cpp +10 -11
  547. package/src/duckdb/src/storage/compression/patas.cpp +1 -1
  548. package/src/duckdb/src/storage/compression/rle.cpp +20 -15
  549. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +6 -6
  550. package/src/duckdb/src/storage/data_table.cpp +23 -23
  551. package/src/duckdb/src/storage/index.cpp +12 -1
  552. package/src/duckdb/src/storage/local_storage.cpp +27 -23
  553. package/src/duckdb/src/storage/meta_block_reader.cpp +22 -0
  554. package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
  555. package/src/duckdb/src/storage/statistics/column_statistics.cpp +57 -3
  556. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +8 -9
  557. package/src/duckdb/src/storage/statistics/list_stats.cpp +121 -0
  558. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +591 -0
  559. package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +65 -0
  560. package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
  561. package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
  562. package/src/duckdb/src/storage/statistics/struct_stats.cpp +133 -0
  563. package/src/duckdb/src/storage/storage_info.cpp +2 -2
  564. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +4 -10
  565. package/src/duckdb/src/storage/table/column_data.cpp +118 -62
  566. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +10 -9
  567. package/src/duckdb/src/storage/table/column_segment.cpp +30 -45
  568. package/src/duckdb/src/storage/table/list_column_data.cpp +50 -71
  569. package/src/duckdb/src/storage/table/persistent_table_data.cpp +2 -1
  570. package/src/duckdb/src/storage/table/row_group.cpp +213 -143
  571. package/src/duckdb/src/storage/table/row_group_collection.cpp +151 -105
  572. package/src/duckdb/src/storage/table/scan_state.cpp +45 -33
  573. package/src/duckdb/src/storage/table/standard_column_data.cpp +11 -12
  574. package/src/duckdb/src/storage/table/struct_column_data.cpp +27 -34
  575. package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
  576. package/src/duckdb/src/storage/table/update_segment.cpp +23 -18
  577. package/src/duckdb/src/storage/wal_replay.cpp +8 -5
  578. package/src/duckdb/src/storage/write_ahead_log.cpp +2 -2
  579. package/src/duckdb/src/transaction/commit_state.cpp +11 -7
  580. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +0 -1
  581. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +35 -0
  582. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +36 -2
  583. package/src/duckdb/third_party/libpg_query/include/nodes/primnodes.hpp +3 -3
  584. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1022 -530
  585. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +8 -0
  586. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +24462 -22828
  587. package/src/duckdb/third_party/re2/re2/re2.cc +9 -0
  588. package/src/duckdb/third_party/re2/re2/re2.h +2 -0
  589. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  590. package/src/duckdb/ub_extension_json_json_functions.cpp +2 -0
  591. package/src/duckdb/ub_src_common_serializer.cpp +2 -0
  592. package/src/duckdb/ub_src_common_types.cpp +2 -0
  593. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  594. package/src/duckdb/ub_src_function_aggregate_distributive.cpp +2 -0
  595. package/src/duckdb/ub_src_function_scalar_bit.cpp +2 -0
  596. package/src/duckdb/ub_src_function_scalar_map.cpp +4 -0
  597. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  598. package/src/duckdb/ub_src_function_scalar_string_regexp.cpp +4 -0
  599. package/src/duckdb/ub_src_main_capi.cpp +2 -0
  600. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  601. package/src/duckdb/ub_src_parser.cpp +2 -0
  602. package/src/duckdb/ub_src_parser_parsed_data.cpp +4 -2
  603. package/src/duckdb/ub_src_parser_statement.cpp +2 -0
  604. package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
  605. package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
  606. package/src/duckdb/ub_src_parser_transform_tableref.cpp +2 -0
  607. package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
  608. package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
  609. package/src/duckdb/ub_src_planner_expression_binder.cpp +2 -0
  610. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
  611. package/src/duckdb/ub_src_storage_statistics.cpp +6 -6
  612. package/src/duckdb/ub_src_storage_table.cpp +0 -2
  613. package/src/duckdb_node.hpp +2 -1
  614. package/src/statement.cpp +5 -5
  615. package/src/utils.cpp +27 -2
  616. package/test/extension.test.ts +44 -26
  617. package/test/syntax_error.test.ts +3 -1
  618. package/filelist.cache +0 -0
  619. package/src/duckdb/src/include/duckdb/main/loadable_extension.hpp +0 -59
  620. package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
  621. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
  622. package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
  623. package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
  624. package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
  625. package/src/duckdb/src/parser/parsed_data/alter_function_info.cpp +0 -55
  626. package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
  627. package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
  628. package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
  629. package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
  630. package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
  631. package/src/duckdb/src/storage/table/segment_tree.cpp +0 -179
@@ -65,33 +65,33 @@ inline bool ValuePositionComparator::Final<duckdb::NotEquals>(const Value &lhs,
65
65
  // Non-strict inequalities must use strict comparisons for Definite
66
66
  template <>
67
67
  bool ValuePositionComparator::Definite<duckdb::LessThanEquals>(const Value &lhs, const Value &rhs) {
68
- return ValueOperations::DistinctLessThan(lhs, rhs);
68
+ return !ValuePositionComparator::Definite<duckdb::GreaterThan>(lhs, rhs);
69
+ }
70
+
71
+ template <>
72
+ bool ValuePositionComparator::Final<duckdb::GreaterThan>(const Value &lhs, const Value &rhs) {
73
+ return ValueOperations::DistinctGreaterThan(lhs, rhs);
69
74
  }
70
75
 
71
76
  template <>
72
77
  bool ValuePositionComparator::Final<duckdb::LessThanEquals>(const Value &lhs, const Value &rhs) {
73
- return ValueOperations::DistinctLessThanEquals(lhs, rhs);
78
+ return !ValuePositionComparator::Final<duckdb::GreaterThan>(lhs, rhs);
74
79
  }
75
80
 
76
81
  template <>
77
82
  bool ValuePositionComparator::Definite<duckdb::GreaterThanEquals>(const Value &lhs, const Value &rhs) {
78
- return ValueOperations::DistinctGreaterThan(lhs, rhs);
83
+ return !ValuePositionComparator::Definite<duckdb::GreaterThan>(rhs, lhs);
79
84
  }
80
85
 
81
86
  template <>
82
87
  bool ValuePositionComparator::Final<duckdb::GreaterThanEquals>(const Value &lhs, const Value &rhs) {
83
- return ValueOperations::DistinctGreaterThanEquals(lhs, rhs);
88
+ return !ValuePositionComparator::Final<duckdb::GreaterThan>(rhs, lhs);
84
89
  }
85
90
 
86
91
  // Strict inequalities just use strict for both Definite and Final
87
92
  template <>
88
93
  bool ValuePositionComparator::Final<duckdb::LessThan>(const Value &lhs, const Value &rhs) {
89
- return ValueOperations::DistinctLessThan(lhs, rhs);
90
- }
91
-
92
- template <>
93
- bool ValuePositionComparator::Final<duckdb::GreaterThan>(const Value &lhs, const Value &rhs) {
94
- return ValueOperations::DistinctGreaterThan(lhs, rhs);
94
+ return ValuePositionComparator::Final<duckdb::GreaterThan>(rhs, lhs);
95
95
  }
96
96
 
97
97
  template <class OP>
@@ -194,10 +194,7 @@ bool ValueOperations::GreaterThan(const Value &left, const Value &right) {
194
194
  }
195
195
 
196
196
  bool ValueOperations::GreaterThanEquals(const Value &left, const Value &right) {
197
- if (left.IsNull() || right.IsNull()) {
198
- throw InternalException("Comparison on NULL values");
199
- }
200
- return TemplatedBooleanOperation<duckdb::GreaterThanEquals>(left, right);
197
+ return !ValueOperations::GreaterThan(right, left);
201
198
  }
202
199
 
203
200
  bool ValueOperations::LessThan(const Value &left, const Value &right) {
@@ -205,7 +202,7 @@ bool ValueOperations::LessThan(const Value &left, const Value &right) {
205
202
  }
206
203
 
207
204
  bool ValueOperations::LessThanEquals(const Value &left, const Value &right) {
208
- return ValueOperations::GreaterThanEquals(right, left);
205
+ return !ValueOperations::GreaterThan(left, right);
209
206
  }
210
207
 
211
208
  bool ValueOperations::NotDistinctFrom(const Value &left, const Value &right) {
@@ -234,12 +231,7 @@ bool ValueOperations::DistinctGreaterThan(const Value &left, const Value &right)
234
231
  }
235
232
 
236
233
  bool ValueOperations::DistinctGreaterThanEquals(const Value &left, const Value &right) {
237
- if (left.IsNull()) {
238
- return true;
239
- } else if (right.IsNull()) {
240
- return false;
241
- }
242
- return TemplatedBooleanOperation<duckdb::GreaterThanEquals>(left, right);
234
+ return !ValueOperations::DistinctGreaterThan(right, left);
243
235
  }
244
236
 
245
237
  bool ValueOperations::DistinctLessThan(const Value &left, const Value &right) {
@@ -247,7 +239,7 @@ bool ValueOperations::DistinctLessThan(const Value &left, const Value &right) {
247
239
  }
248
240
 
249
241
  bool ValueOperations::DistinctLessThanEquals(const Value &left, const Value &right) {
250
- return ValueOperations::DistinctGreaterThanEquals(right, left);
242
+ return !ValueOperations::DistinctGreaterThan(left, right);
251
243
  }
252
244
 
253
245
  } // namespace duckdb
@@ -22,12 +22,12 @@ bool EqualsFloat(T left, T right) {
22
22
  }
23
23
 
24
24
  template <>
25
- bool Equals::Operation(float left, float right) {
25
+ bool Equals::Operation(const float &left, const float &right) {
26
26
  return EqualsFloat<float>(left, right);
27
27
  }
28
28
 
29
29
  template <>
30
- bool Equals::Operation(double left, double right) {
30
+ bool Equals::Operation(const double &left, const double &right) {
31
31
  return EqualsFloat<double>(left, right);
32
32
  }
33
33
 
@@ -49,12 +49,12 @@ bool GreaterThanFloat(T left, T right) {
49
49
  }
50
50
 
51
51
  template <>
52
- bool GreaterThan::Operation(float left, float right) {
52
+ bool GreaterThan::Operation(const float &left, const float &right) {
53
53
  return GreaterThanFloat<float>(left, right);
54
54
  }
55
55
 
56
56
  template <>
57
- bool GreaterThan::Operation(double left, double right) {
57
+ bool GreaterThan::Operation(const double &left, const double &right) {
58
58
  return GreaterThanFloat<double>(left, right);
59
59
  }
60
60
 
@@ -77,12 +77,12 @@ bool GreaterThanEqualsFloat(T left, T right) {
77
77
  }
78
78
 
79
79
  template <>
80
- bool GreaterThanEquals::Operation(float left, float right) {
80
+ bool GreaterThanEquals::Operation(const float &left, const float &right) {
81
81
  return GreaterThanEqualsFloat<float>(left, right);
82
82
  }
83
83
 
84
84
  template <>
85
- bool GreaterThanEquals::Operation(double left, double right) {
85
+ bool GreaterThanEquals::Operation(const double &left, const double &right) {
86
86
  return GreaterThanEqualsFloat<double>(left, right);
87
87
  }
88
88
 
@@ -127,14 +127,14 @@ template <>
127
127
  inline idx_t ComparisonSelector::Select<duckdb::LessThan>(Vector &left, Vector &right, const SelectionVector *sel,
128
128
  idx_t count, SelectionVector *true_sel,
129
129
  SelectionVector *false_sel) {
130
- return VectorOperations::LessThan(left, right, sel, count, true_sel, false_sel);
130
+ return VectorOperations::GreaterThan(right, left, sel, count, true_sel, false_sel);
131
131
  }
132
132
 
133
133
  template <>
134
134
  inline idx_t ComparisonSelector::Select<duckdb::LessThanEquals>(Vector &left, Vector &right, const SelectionVector *sel,
135
135
  idx_t count, SelectionVector *true_sel,
136
136
  SelectionVector *false_sel) {
137
- return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel);
137
+ return VectorOperations::GreaterThanEquals(right, left, sel, count, true_sel, false_sel);
138
138
  }
139
139
 
140
140
  static void ComparesNotNull(UnifiedVectorFormat &ldata, UnifiedVectorFormat &rdata, ValidityMask &vresult,
@@ -272,7 +272,7 @@ void VectorOperations::GreaterThanEquals(Vector &left, Vector &right, Vector &re
272
272
  }
273
273
 
274
274
  void VectorOperations::LessThanEquals(Vector &left, Vector &right, Vector &result, idx_t count) {
275
- ComparisonExecutor::Execute<duckdb::LessThanEquals>(left, right, result, count);
275
+ ComparisonExecutor::Execute<duckdb::GreaterThanEquals>(right, left, result, count);
276
276
  }
277
277
 
278
278
  void VectorOperations::GreaterThan(Vector &left, Vector &right, Vector &result, idx_t count) {
@@ -280,7 +280,7 @@ void VectorOperations::GreaterThan(Vector &left, Vector &right, Vector &result,
280
280
  }
281
281
 
282
282
  void VectorOperations::LessThan(Vector &left, Vector &right, Vector &result, idx_t count) {
283
- ComparisonExecutor::Execute<duckdb::LessThan>(left, right, result, count);
283
+ ComparisonExecutor::Execute<duckdb::GreaterThan>(right, left, result, count);
284
284
  }
285
285
 
286
286
  } // namespace duckdb
@@ -401,14 +401,14 @@ idx_t PositionComparator::Definite<duckdb::DistinctLessThanEquals>(Vector &left,
401
401
  const SelectionVector &sel, idx_t count,
402
402
  SelectionVector *true_sel,
403
403
  SelectionVector &false_sel) {
404
- return VectorOperations::DistinctLessThan(left, right, &sel, count, true_sel, &false_sel);
404
+ return VectorOperations::DistinctGreaterThan(right, left, &sel, count, true_sel, &false_sel);
405
405
  }
406
406
 
407
407
  template <>
408
408
  idx_t PositionComparator::Final<duckdb::DistinctLessThanEquals>(Vector &left, Vector &right, const SelectionVector &sel,
409
409
  idx_t count, SelectionVector *true_sel,
410
410
  SelectionVector *false_sel) {
411
- return VectorOperations::DistinctLessThanEquals(left, right, &sel, count, true_sel, false_sel);
411
+ return VectorOperations::DistinctGreaterThanEquals(right, left, &sel, count, true_sel, false_sel);
412
412
  }
413
413
 
414
414
  template <>
@@ -432,7 +432,7 @@ template <>
432
432
  idx_t PositionComparator::Final<duckdb::DistinctLessThan>(Vector &left, Vector &right, const SelectionVector &sel,
433
433
  idx_t count, SelectionVector *true_sel,
434
434
  SelectionVector *false_sel) {
435
- return VectorOperations::DistinctLessThan(left, right, &sel, count, true_sel, false_sel);
435
+ return VectorOperations::DistinctGreaterThan(right, left, &sel, count, true_sel, false_sel);
436
436
  }
437
437
 
438
438
  template <>
@@ -869,7 +869,7 @@ idx_t VectorOperations::DistinctFrom(Vector &left, Vector &right, const Selectio
869
869
  // true := A == B with nulls being equal
870
870
  idx_t VectorOperations::NotDistinctFrom(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
871
871
  SelectionVector *true_sel, SelectionVector *false_sel) {
872
- return TemplatedDistinctSelectOperation<duckdb::NotDistinctFrom>(left, right, sel, count, true_sel, false_sel);
872
+ return count - TemplatedDistinctSelectOperation<duckdb::DistinctFrom>(left, right, sel, count, false_sel, true_sel);
873
873
  }
874
874
 
875
875
  // true := A > B with nulls being maximal
@@ -888,13 +888,13 @@ idx_t VectorOperations::DistinctGreaterThanNullsFirst(Vector &left, Vector &righ
888
888
  // true := A >= B with nulls being maximal
889
889
  idx_t VectorOperations::DistinctGreaterThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
890
890
  SelectionVector *true_sel, SelectionVector *false_sel) {
891
- return TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThanEquals>(left, right, sel, count, true_sel,
892
- false_sel);
891
+ return count -
892
+ TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThan>(right, left, sel, count, false_sel, true_sel);
893
893
  }
894
894
  // true := A < B with nulls being maximal
895
895
  idx_t VectorOperations::DistinctLessThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
896
896
  SelectionVector *true_sel, SelectionVector *false_sel) {
897
- return TemplatedDistinctSelectOperation<duckdb::DistinctLessThan>(left, right, sel, count, true_sel, false_sel);
897
+ return TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThan>(right, left, sel, count, true_sel, false_sel);
898
898
  }
899
899
 
900
900
  // true := A < B with nulls being minimal
@@ -907,8 +907,8 @@ idx_t VectorOperations::DistinctLessThanNullsFirst(Vector &left, Vector &right,
907
907
  // true := A <= B with nulls being maximal
908
908
  idx_t VectorOperations::DistinctLessThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
909
909
  SelectionVector *true_sel, SelectionVector *false_sel) {
910
- return TemplatedDistinctSelectOperation<duckdb::DistinctLessThanEquals>(left, right, sel, count, true_sel,
911
- false_sel);
910
+ return TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThanEquals>(right, left, sel, count, true_sel,
911
+ false_sel);
912
912
  }
913
913
 
914
914
  // true := A != B with nulls being equal, inputs selected
@@ -919,7 +919,8 @@ idx_t VectorOperations::NestedNotEquals(Vector &left, Vector &right, const Selec
919
919
  // true := A == B with nulls being equal, inputs selected
920
920
  idx_t VectorOperations::NestedEquals(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
921
921
  SelectionVector *true_sel, SelectionVector *false_sel) {
922
- return TemplatedDistinctSelectOperation<duckdb::NotDistinctFrom>(left, right, &sel, count, true_sel, false_sel);
922
+ return count -
923
+ TemplatedDistinctSelectOperation<duckdb::DistinctFrom>(left, right, &sel, count, false_sel, true_sel);
923
924
  }
924
925
 
925
926
  } // namespace duckdb
@@ -11,7 +11,8 @@ bool VectorOperations::TryCast(CastFunctionSet &set, GetCastFunctionInput &input
11
11
  auto cast_function = set.GetCastFunction(source.GetType(), result.GetType(), input);
12
12
  unique_ptr<FunctionLocalState> local_state;
13
13
  if (cast_function.init_local_state) {
14
- local_state = cast_function.init_local_state(*input.context);
14
+ CastLocalStateParameters lparameters(input.context, cast_function.cast_data);
15
+ local_state = cast_function.init_local_state(lparameters);
15
16
  }
16
17
  CastParameters parameters(cast_function.cast_data.get(), strict, error_message, local_state.get());
17
18
  return cast_function.function(source, result, count, parameters);
@@ -21,9 +21,9 @@ using ValidityBytes = RowLayout::ValidityBytes;
21
21
  GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, Allocator &allocator,
22
22
  vector<LogicalType> group_types, vector<LogicalType> payload_types,
23
23
  const vector<BoundAggregateExpression *> &bindings,
24
- HtEntryType entry_type)
24
+ HtEntryType entry_type, idx_t initial_capacity)
25
25
  : GroupedAggregateHashTable(context, allocator, std::move(group_types), std::move(payload_types),
26
- AggregateObject::CreateAggregateObjects(bindings), entry_type) {
26
+ AggregateObject::CreateAggregateObjects(bindings), entry_type, initial_capacity) {
27
27
  }
28
28
 
29
29
  GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, Allocator &allocator,
@@ -31,17 +31,19 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
31
31
  : GroupedAggregateHashTable(context, allocator, std::move(group_types), {}, vector<AggregateObject>()) {
32
32
  }
33
33
 
34
+ AggregateHTAppendState::AggregateHTAppendState()
35
+ : ht_offsets(LogicalTypeId::BIGINT), hash_salts(LogicalTypeId::SMALLINT),
36
+ group_compare_vector(STANDARD_VECTOR_SIZE), no_match_vector(STANDARD_VECTOR_SIZE),
37
+ empty_vector(STANDARD_VECTOR_SIZE), new_groups(STANDARD_VECTOR_SIZE), addresses(LogicalType::POINTER) {
38
+ }
39
+
34
40
  GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, Allocator &allocator,
35
41
  vector<LogicalType> group_types_p,
36
42
  vector<LogicalType> payload_types_p,
37
43
  vector<AggregateObject> aggregate_objects_p,
38
- HtEntryType entry_type)
44
+ HtEntryType entry_type, idx_t initial_capacity)
39
45
  : BaseAggregateHashTable(context, allocator, aggregate_objects_p, std::move(payload_types_p)),
40
- entry_type(entry_type), capacity(0), entries(0), payload_page_offset(0), is_finalized(false),
41
- ht_offsets(LogicalTypeId::BIGINT), hash_salts(LogicalTypeId::SMALLINT),
42
- group_compare_vector(STANDARD_VECTOR_SIZE), no_match_vector(STANDARD_VECTOR_SIZE),
43
- empty_vector(STANDARD_VECTOR_SIZE) {
44
-
46
+ entry_type(entry_type), capacity(0), entries(0), payload_page_offset(0), is_finalized(false) {
45
47
  // Append hash column to the end and initialise the row layout
46
48
  group_types_p.emplace_back(LogicalType::HASH);
47
49
  layout.Initialize(std::move(group_types_p), std::move(aggregate_objects_p));
@@ -59,12 +61,12 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
59
61
  switch (entry_type) {
60
62
  case HtEntryType::HT_WIDTH_64: {
61
63
  hash_prefix_shift = (HASH_WIDTH - sizeof(aggr_ht_entry_64::salt)) * 8;
62
- Resize<aggr_ht_entry_64>(STANDARD_VECTOR_SIZE * 2L);
64
+ Resize<aggr_ht_entry_64>(initial_capacity);
63
65
  break;
64
66
  }
65
67
  case HtEntryType::HT_WIDTH_32: {
66
68
  hash_prefix_shift = (HASH_WIDTH - sizeof(aggr_ht_entry_32::salt)) * 8;
67
- Resize<aggr_ht_entry_32>(STANDARD_VECTOR_SIZE * 2L);
69
+ Resize<aggr_ht_entry_32>(initial_capacity);
68
70
  break;
69
71
  }
70
72
  default:
@@ -155,25 +157,34 @@ void GroupedAggregateHashTable::VerifyInternal() {
155
157
  D_ASSERT(count == entries);
156
158
  }
157
159
 
158
- idx_t GroupedAggregateHashTable::MaxCapacity() {
159
- idx_t max_pages = 0;
160
- idx_t max_tuples = 0;
160
+ idx_t GroupedAggregateHashTable::InitialCapacity() {
161
+ return STANDARD_VECTOR_SIZE * 2ULL;
162
+ }
163
+
164
+ idx_t GroupedAggregateHashTable::GetMaxCapacity(HtEntryType entry_type, idx_t tuple_size) {
165
+ idx_t max_pages;
166
+ idx_t max_tuples;
161
167
 
162
168
  switch (entry_type) {
163
169
  case HtEntryType::HT_WIDTH_32:
164
170
  max_pages = NumericLimits<uint8_t>::Maximum();
165
171
  max_tuples = NumericLimits<uint16_t>::Maximum();
166
172
  break;
167
- default:
168
- D_ASSERT(entry_type == HtEntryType::HT_WIDTH_64);
173
+ case HtEntryType::HT_WIDTH_64:
169
174
  max_pages = NumericLimits<uint32_t>::Maximum();
170
175
  max_tuples = NumericLimits<uint16_t>::Maximum();
171
176
  break;
177
+ default:
178
+ throw InternalException("Unsupported hash table width");
172
179
  }
173
180
 
174
181
  return max_pages * MinValue(max_tuples, (idx_t)Storage::BLOCK_SIZE / tuple_size);
175
182
  }
176
183
 
184
+ idx_t GroupedAggregateHashTable::MaxCapacity() {
185
+ return GetMaxCapacity(entry_type, tuple_size);
186
+ }
187
+
177
188
  void GroupedAggregateHashTable::Verify() {
178
189
  #ifdef DEBUG
179
190
  switch (entry_type) {
@@ -208,7 +219,6 @@ void GroupedAggregateHashTable::Resize(idx_t size) {
208
219
  hashes_hdl_ptr = hashes_hdl.Ptr();
209
220
  }
210
221
  memset(hashes_hdl_ptr, 0, byte_size);
211
- hashes_end_ptr = hashes_hdl_ptr + byte_size;
212
222
  capacity = size;
213
223
 
214
224
  auto hashes_arr = (ENTRY *)hashes_hdl_ptr;
@@ -235,7 +245,8 @@ void GroupedAggregateHashTable::Resize(idx_t size) {
235
245
  Verify();
236
246
  }
237
247
 
238
- idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload, AggregateType filter) {
248
+ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload,
249
+ AggregateType filter) {
239
250
  vector<idx_t> aggregate_filter;
240
251
 
241
252
  auto &aggregates = layout.GetAggregates();
@@ -245,34 +256,32 @@ idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload,
245
256
  aggregate_filter.push_back(i);
246
257
  }
247
258
  }
248
- return AddChunk(groups, payload, aggregate_filter);
259
+ return AddChunk(state, groups, payload, aggregate_filter);
249
260
  }
250
261
 
251
- idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload, const vector<idx_t> &filter) {
262
+ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload,
263
+ const vector<idx_t> &filter) {
252
264
  Vector hashes(LogicalType::HASH);
253
265
  groups.Hash(hashes);
254
266
 
255
- return AddChunk(groups, hashes, payload, filter);
267
+ return AddChunk(state, groups, hashes, payload, filter);
256
268
  }
257
269
 
258
- idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashes, DataChunk &payload,
259
- const vector<idx_t> &filter) {
270
+ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes,
271
+ DataChunk &payload, const vector<idx_t> &filter) {
260
272
  D_ASSERT(!is_finalized);
261
273
 
262
274
  if (groups.size() == 0) {
263
275
  return 0;
264
276
  }
265
- // dummy
266
- SelectionVector new_groups(STANDARD_VECTOR_SIZE);
267
277
 
268
278
  D_ASSERT(groups.ColumnCount() + 1 == layout.ColumnCount());
269
279
  for (idx_t i = 0; i < groups.ColumnCount(); i++) {
270
280
  D_ASSERT(groups.GetTypes()[i] == layout.GetTypes()[i]);
271
281
  }
272
282
 
273
- Vector addresses(LogicalType::POINTER);
274
- auto new_group_count = FindOrCreateGroups(groups, group_hashes, addresses, new_groups);
275
- VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), payload.size());
283
+ auto new_group_count = FindOrCreateGroups(state, groups, group_hashes, state.addresses, state.new_groups);
284
+ VectorOperations::AddInPlace(state.addresses, layout.GetAggrOffset(), payload.size());
276
285
 
277
286
  // now every cell has an entry
278
287
  // update the aggregates
@@ -285,20 +294,21 @@ idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashe
285
294
  if (filter_idx >= filter.size() || i < filter[filter_idx]) {
286
295
  // Skip all the aggregates that are not in the filter
287
296
  payload_idx += aggr.child_count;
288
- VectorOperations::AddInPlace(addresses, aggr.payload_size, payload.size());
297
+ VectorOperations::AddInPlace(state.addresses, aggr.payload_size, payload.size());
289
298
  continue;
290
299
  }
291
300
  D_ASSERT(i == filter[filter_idx]);
292
301
 
293
302
  if (aggr.aggr_type != AggregateType::DISTINCT && aggr.filter) {
294
- RowOperations::UpdateFilteredStates(filter_set.GetFilterData(i), aggr, addresses, payload, payload_idx);
303
+ RowOperations::UpdateFilteredStates(filter_set.GetFilterData(i), aggr, state.addresses, payload,
304
+ payload_idx);
295
305
  } else {
296
- RowOperations::UpdateStates(aggr, addresses, payload, payload_idx, payload.size());
306
+ RowOperations::UpdateStates(aggr, state.addresses, payload, payload_idx, payload.size());
297
307
  }
298
308
 
299
309
  // move to the next aggregate
300
310
  payload_idx += aggr.child_count;
301
- VectorOperations::AddInPlace(addresses, aggr.payload_size, payload.size());
311
+ VectorOperations::AddInPlace(state.addresses, aggr.payload_size, payload.size());
302
312
  filter_idx++;
303
313
  }
304
314
 
@@ -316,16 +326,23 @@ void GroupedAggregateHashTable::FetchAggregates(DataChunk &groups, DataChunk &re
316
326
  if (groups.size() == 0) {
317
327
  return;
318
328
  }
329
+
319
330
  // find the groups associated with the addresses
320
331
  // FIXME: this should not use the FindOrCreateGroups, creating them is unnecessary
332
+ AggregateHTAppendState append_state;
321
333
  Vector addresses(LogicalType::POINTER);
322
- FindOrCreateGroups(groups, addresses);
334
+ FindOrCreateGroups(append_state, groups, addresses);
323
335
  // now fetch the aggregates
324
336
  RowOperations::FinalizeStates(layout, addresses, result, 0);
325
337
  }
326
338
 
339
+ idx_t GroupedAggregateHashTable::ResizeThreshold() {
340
+ return capacity / LOAD_FACTOR;
341
+ }
342
+
327
343
  template <class ENTRY>
328
- idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, Vector &group_hashes, Vector &addresses,
344
+ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(AggregateHTAppendState &state, DataChunk &groups,
345
+ Vector &group_hashes, Vector &addresses,
329
346
  SelectionVector &new_groups_out) {
330
347
  D_ASSERT(!is_finalized);
331
348
 
@@ -334,7 +351,7 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
334
351
  }
335
352
 
336
353
  // resize at 50% capacity, also need to fit the entire vector
337
- if (capacity - entries <= groups.size() || entries > capacity / LOAD_FACTOR) {
354
+ if (capacity - entries <= groups.size() || entries > ResizeThreshold()) {
338
355
  Resize<ENTRY>(capacity * 2);
339
356
  }
340
357
 
@@ -347,42 +364,47 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
347
364
  group_hashes.Flatten(groups.size());
348
365
  auto group_hashes_ptr = FlatVector::GetData<hash_t>(group_hashes);
349
366
 
350
- D_ASSERT(ht_offsets.GetVectorType() == VectorType::FLAT_VECTOR);
351
- D_ASSERT(ht_offsets.GetType() == LogicalType::BIGINT);
367
+ D_ASSERT(state.ht_offsets.GetVectorType() == VectorType::FLAT_VECTOR);
368
+ D_ASSERT(state.ht_offsets.GetType() == LogicalType::BIGINT);
352
369
 
353
370
  D_ASSERT(addresses.GetType() == LogicalType::POINTER);
354
371
  addresses.Flatten(groups.size());
355
372
  auto addresses_ptr = FlatVector::GetData<data_ptr_t>(addresses);
356
373
 
357
- // now compute the entry in the table based on the hash using a modulo
358
- UnaryExecutor::Execute<hash_t, uint64_t>(group_hashes, ht_offsets, groups.size(), [&](hash_t element) {
374
+ // compute the entry in the table based on the hash using a modulo
375
+ // and precompute the hash salts for faster comparison below
376
+ D_ASSERT(state.hash_salts.GetType() == LogicalType::SMALLINT);
377
+ auto ht_offsets_ptr = FlatVector::GetData<uint64_t>(state.ht_offsets);
378
+ auto hash_salts_ptr = FlatVector::GetData<uint16_t>(state.hash_salts);
379
+ for (idx_t r = 0; r < groups.size(); r++) {
380
+ auto element = group_hashes_ptr[r];
359
381
  D_ASSERT((element & bitmask) == (element % capacity));
360
- return (element & bitmask);
361
- });
362
- auto ht_offsets_ptr = FlatVector::GetData<uint64_t>(ht_offsets);
363
-
364
- // precompute the hash salts for faster comparison below
365
- D_ASSERT(hash_salts.GetType() == LogicalType::SMALLINT);
366
- UnaryExecutor::Execute<hash_t, uint16_t>(group_hashes, hash_salts, groups.size(),
367
- [&](hash_t element) { return (element >> hash_prefix_shift); });
368
- auto hash_salts_ptr = FlatVector::GetData<uint16_t>(hash_salts);
369
-
382
+ ht_offsets_ptr[r] = element & bitmask;
383
+ hash_salts_ptr[r] = element >> hash_prefix_shift;
384
+ }
370
385
  // we start out with all entries [0, 1, 2, ..., groups.size()]
371
386
  const SelectionVector *sel_vector = FlatVector::IncrementalSelectionVector();
372
387
 
373
388
  idx_t remaining_entries = groups.size();
374
389
 
375
390
  // make a chunk that references the groups and the hashes
376
- DataChunk group_chunk;
377
- group_chunk.InitializeEmpty(layout.GetTypes());
391
+ if (state.group_chunk.ColumnCount() == 0) {
392
+ state.group_chunk.InitializeEmpty(layout.GetTypes());
393
+ }
394
+ D_ASSERT(state.group_chunk.ColumnCount() == layout.GetTypes().size());
378
395
  for (idx_t grp_idx = 0; grp_idx < groups.ColumnCount(); grp_idx++) {
379
- group_chunk.data[grp_idx].Reference(groups.data[grp_idx]);
396
+ state.group_chunk.data[grp_idx].Reference(groups.data[grp_idx]);
380
397
  }
381
- group_chunk.data[groups.ColumnCount()].Reference(group_hashes);
382
- group_chunk.SetCardinality(groups);
398
+ state.group_chunk.data[groups.ColumnCount()].Reference(group_hashes);
399
+ state.group_chunk.SetCardinality(groups);
383
400
 
384
401
  // convert all vectors to unified format
385
- auto group_data = group_chunk.ToUnifiedFormat();
402
+ if (!state.group_data) {
403
+ state.group_data = unique_ptr<UnifiedVectorFormat[]>(new UnifiedVectorFormat[state.group_chunk.ColumnCount()]);
404
+ }
405
+ for (idx_t col_idx = 0; col_idx < state.group_chunk.ColumnCount(); col_idx++) {
406
+ state.group_chunk.data[col_idx].ToUnifiedFormat(state.group_chunk.size(), state.group_data[col_idx]);
407
+ }
386
408
 
387
409
  idx_t new_group_count = 0;
388
410
  while (remaining_entries > 0) {
@@ -415,7 +437,7 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
415
437
  ht_entry_ptr->page_offset = payload_page_offset++;
416
438
 
417
439
  // update selection lists for outer loops
418
- empty_vector.set_index(new_entry_count++, index);
440
+ state.empty_vector.set_index(new_entry_count++, index);
419
441
  new_groups_out.set_index(new_group_count++, index);
420
442
  entries++;
421
443
 
@@ -425,37 +447,37 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
425
447
  // cell is occupied: add to check list
426
448
  // only need to check if hash salt in ptr == prefix of hash in payload
427
449
  if (ht_entry_ptr->salt == hash_salts_ptr[index]) {
428
- group_compare_vector.set_index(need_compare_count++, index);
450
+ state.group_compare_vector.set_index(need_compare_count++, index);
429
451
 
430
452
  auto page_ptr = payload_hds_ptrs[ht_entry_ptr->page_nr - 1];
431
453
  auto page_offset = ht_entry_ptr->page_offset * tuple_size;
432
454
  addresses_ptr[index] = page_ptr + page_offset;
433
455
 
434
456
  } else {
435
- no_match_vector.set_index(no_match_count++, index);
457
+ state.no_match_vector.set_index(no_match_count++, index);
436
458
  }
437
459
  }
438
460
  }
439
461
 
440
462
  // for each of the locations that are empty, serialize the group columns to the locations
441
- RowOperations::Scatter(group_chunk, group_data.get(), layout, addresses, *string_heap, empty_vector,
442
- new_entry_count);
443
- RowOperations::InitializeStates(layout, addresses, empty_vector, new_entry_count);
463
+ RowOperations::Scatter(state.group_chunk, state.group_data.get(), layout, addresses, *string_heap,
464
+ state.empty_vector, new_entry_count);
465
+ RowOperations::InitializeStates(layout, addresses, state.empty_vector, new_entry_count);
444
466
 
445
467
  // now we have only the tuples remaining that might match to an existing group
446
468
  // start performing comparisons with each of the groups
447
- RowOperations::Match(group_chunk, group_data.get(), layout, addresses, predicates, group_compare_vector,
448
- need_compare_count, &no_match_vector, no_match_count);
469
+ RowOperations::Match(state.group_chunk, state.group_data.get(), layout, addresses, predicates,
470
+ state.group_compare_vector, need_compare_count, &state.no_match_vector, no_match_count);
449
471
 
450
472
  // each of the entries that do not match we move them to the next entry in the HT
451
473
  for (idx_t i = 0; i < no_match_count; i++) {
452
- idx_t index = no_match_vector.get_index(i);
474
+ idx_t index = state.no_match_vector.get_index(i);
453
475
  ht_offsets_ptr[index]++;
454
476
  if (ht_offsets_ptr[index] >= capacity) {
455
477
  ht_offsets_ptr[index] = 0;
456
478
  }
457
479
  }
458
- sel_vector = &no_match_vector;
480
+ sel_vector = &state.no_match_vector;
459
481
  remaining_entries = no_match_count;
460
482
  }
461
483
 
@@ -464,29 +486,30 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
464
486
 
465
487
  // this is to support distinct aggregations where we need to record whether we
466
488
  // have already seen a value for a group
467
- idx_t GroupedAggregateHashTable::FindOrCreateGroups(DataChunk &groups, Vector &group_hashes, Vector &addresses_out,
489
+ idx_t GroupedAggregateHashTable::FindOrCreateGroups(AggregateHTAppendState &state, DataChunk &groups,
490
+ Vector &group_hashes, Vector &addresses_out,
468
491
  SelectionVector &new_groups_out) {
469
492
  switch (entry_type) {
470
493
  case HtEntryType::HT_WIDTH_64:
471
- return FindOrCreateGroupsInternal<aggr_ht_entry_64>(groups, group_hashes, addresses_out, new_groups_out);
494
+ return FindOrCreateGroupsInternal<aggr_ht_entry_64>(state, groups, group_hashes, addresses_out, new_groups_out);
472
495
  case HtEntryType::HT_WIDTH_32:
473
- return FindOrCreateGroupsInternal<aggr_ht_entry_32>(groups, group_hashes, addresses_out, new_groups_out);
496
+ return FindOrCreateGroupsInternal<aggr_ht_entry_32>(state, groups, group_hashes, addresses_out, new_groups_out);
474
497
  default:
475
498
  throw InternalException("Unknown HT entry width");
476
499
  }
477
500
  }
478
501
 
479
- void GroupedAggregateHashTable::FindOrCreateGroups(DataChunk &groups, Vector &addresses) {
502
+ void GroupedAggregateHashTable::FindOrCreateGroups(AggregateHTAppendState &state, DataChunk &groups,
503
+ Vector &addresses) {
480
504
  // create a dummy new_groups sel vector
481
- SelectionVector new_groups(STANDARD_VECTOR_SIZE);
482
- FindOrCreateGroups(groups, addresses, new_groups);
505
+ FindOrCreateGroups(state, groups, addresses, state.new_groups);
483
506
  }
484
507
 
485
- idx_t GroupedAggregateHashTable::FindOrCreateGroups(DataChunk &groups, Vector &addresses_out,
486
- SelectionVector &new_groups_out) {
508
+ idx_t GroupedAggregateHashTable::FindOrCreateGroups(AggregateHTAppendState &state, DataChunk &groups,
509
+ Vector &addresses_out, SelectionVector &new_groups_out) {
487
510
  Vector hashes(LogicalType::HASH);
488
511
  groups.Hash(hashes);
489
- return FindOrCreateGroups(groups, hashes, addresses_out, new_groups_out);
512
+ return FindOrCreateGroups(state, groups, hashes, addresses_out, new_groups_out);
490
513
  }
491
514
 
492
515
  struct FlushMoveState {
@@ -516,7 +539,8 @@ void GroupedAggregateHashTable::FlushMove(FlushMoveState &state, Vector &source_
516
539
  *FlatVector::IncrementalSelectionVector(), count, layout, col_no);
517
540
  }
518
541
 
519
- FindOrCreateGroups(state.groups, source_hashes, state.group_addresses, state.new_groups_sel);
542
+ AggregateHTAppendState append_state;
543
+ FindOrCreateGroups(append_state, state.groups, source_hashes, state.group_addresses, state.new_groups_sel);
520
544
 
521
545
  RowOperations::CombineStates(layout, source_addresses, state.group_addresses, count);
522
546
  }