duckdb 0.8.2-dev157.0 → 0.8.2-dev1573.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (493) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  24. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  26. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  27. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  28. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  29. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  30. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  31. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  32. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  33. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  34. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  36. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  38. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  39. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  40. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  41. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  42. package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
  43. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  44. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  45. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  46. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  47. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  49. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  50. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  51. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  52. package/src/duckdb/src/common/allocator.cpp +14 -2
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  54. package/src/duckdb/src/common/assert.cpp +3 -0
  55. package/src/duckdb/src/common/enum_util.cpp +4619 -4446
  56. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  57. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  58. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/exception.cpp +2 -2
  60. package/src/duckdb/src/common/extra_type_info.cpp +506 -0
  61. package/src/duckdb/src/common/file_system.cpp +19 -0
  62. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  63. package/src/duckdb/src/common/local_file_system.cpp +14 -14
  64. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  65. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  66. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  67. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  68. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  69. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  70. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  71. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  72. package/src/duckdb/src/common/types/bit.cpp +51 -0
  73. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  74. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  75. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  76. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  77. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  78. package/src/duckdb/src/common/types/date.cpp +9 -0
  79. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  80. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  81. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  82. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  83. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  84. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  85. package/src/duckdb/src/common/types.cpp +8 -655
  86. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  87. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  88. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  92. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  93. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  94. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  95. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  96. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  97. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  98. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  99. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  100. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  102. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  103. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  104. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  105. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  106. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  107. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  108. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  109. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  110. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  111. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  112. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  113. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  114. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  115. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  116. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  117. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  118. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
  119. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  121. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
  122. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  123. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
  124. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  125. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  126. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  127. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  128. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  129. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  130. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  131. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  132. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  133. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  134. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  135. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  136. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  137. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  138. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  139. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  140. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  141. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
  142. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
  143. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  144. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  145. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  146. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  147. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  148. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  149. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  150. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  151. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  152. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  153. package/src/duckdb/src/function/function.cpp +3 -1
  154. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  155. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  156. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  157. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  158. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  159. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  160. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  161. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  162. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  163. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  164. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  165. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  166. package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
  167. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  168. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  169. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  170. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  171. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
  172. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  173. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  174. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  175. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  176. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  177. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  178. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
  179. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  180. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  181. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  182. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  183. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  184. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  185. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  186. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  187. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  188. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  189. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  190. package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
  191. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  192. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  193. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  194. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  195. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  196. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  197. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  198. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  199. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  200. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  201. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  202. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  203. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  204. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
  205. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  206. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  207. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  208. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  209. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  210. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  211. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  212. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  213. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  214. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  215. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  218. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  219. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  220. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  221. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  222. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  223. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  224. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  225. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  226. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  227. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  228. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  229. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  230. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  231. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  232. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  233. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  234. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  235. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  236. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  237. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  238. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  239. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
  240. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  241. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  242. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  243. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  244. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  245. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  246. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  247. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  248. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  249. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  250. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  251. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  252. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  253. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  254. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  255. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  256. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  257. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  258. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  259. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  260. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  261. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  262. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  263. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  264. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  265. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  266. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  267. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  268. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  269. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  270. package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
  271. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  272. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  273. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  274. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  275. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  276. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  277. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  278. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  279. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  280. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  281. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  282. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  283. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  284. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  285. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  286. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  287. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  288. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  289. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  291. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  293. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  294. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  295. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  296. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  297. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  298. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  299. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  300. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  301. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  302. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  303. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  304. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  305. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  306. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  307. package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
  308. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  309. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  310. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  311. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  312. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  313. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  314. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  315. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  316. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  317. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  318. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  319. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  320. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  321. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  322. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  323. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  324. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  325. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  326. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  327. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  328. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  329. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  330. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  331. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  332. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  333. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  334. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  335. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  336. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  337. package/src/duckdb/src/include/duckdb.h +28 -0
  338. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  339. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  340. package/src/duckdb/src/main/config.cpp +4 -0
  341. package/src/duckdb/src/main/database.cpp +1 -1
  342. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  343. package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
  344. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  345. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  346. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  347. package/src/duckdb/src/main/relation.cpp +6 -5
  348. package/src/duckdb/src/main/settings/settings.cpp +79 -18
  349. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  350. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  351. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  352. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  353. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  354. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  355. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  356. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  357. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  358. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  359. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  360. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  361. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  362. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  363. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  364. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  365. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  366. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  367. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  368. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  369. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  370. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  371. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  372. package/src/duckdb/src/parallel/executor.cpp +15 -0
  373. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  374. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  375. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  376. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  377. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  378. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  379. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  380. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  381. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  382. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  383. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  384. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  385. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  386. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  387. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  388. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  389. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  390. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  391. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  392. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  393. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  394. package/src/duckdb/src/parser/parser.cpp +8 -2
  395. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  396. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  397. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  398. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  399. package/src/duckdb/src/parser/query_node.cpp +15 -37
  400. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  401. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  402. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  403. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  404. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  405. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  406. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  407. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  408. package/src/duckdb/src/parser/tableref.cpp +0 -44
  409. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  410. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  411. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  412. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  413. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  414. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  415. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  416. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  417. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  418. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  419. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  420. package/src/duckdb/src/parser/transformer.cpp +15 -0
  421. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  422. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  423. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  424. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  425. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  426. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  427. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  428. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  429. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  430. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  431. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
  432. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  433. package/src/duckdb/src/planner/binder.cpp +44 -31
  434. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  435. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  436. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  437. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  438. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  439. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  440. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  441. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  442. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  443. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  444. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  445. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  446. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  447. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  448. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  449. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  450. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  451. package/src/duckdb/src/storage/data_table.cpp +1 -1
  452. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  453. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  454. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  455. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  456. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  457. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  458. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  459. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  460. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  461. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  462. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  463. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  464. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  465. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  466. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  467. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  468. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  469. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  470. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  471. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  472. package/src/duckdb/ub_src_common.cpp +2 -0
  473. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  474. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  475. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  476. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  477. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  478. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  479. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  480. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  481. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  482. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  483. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  484. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  485. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  486. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  487. package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
  488. package/src/statement.cpp +10 -3
  489. package/test/test_all_types.test.ts +233 -0
  490. package/tsconfig.json +1 -0
  491. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  492. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  493. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -5,6 +5,7 @@
5
5
  #include "duckdb/planner/expression/list.hpp"
6
6
  #include "duckdb/planner/expression_iterator.hpp"
7
7
  #include "duckdb/planner/operator/list.hpp"
8
+ #include "duckdb/common/queue.hpp"
8
9
 
9
10
  #include <algorithm>
10
11
  #include <cmath>
@@ -323,6 +324,65 @@ void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode &node) {
323
324
  }
324
325
  }
325
326
 
327
+ static vector<unordered_set<idx_t>> AddSuperSets(const vector<unordered_set<idx_t>> &current,
328
+ const vector<idx_t> &all_neighbors) {
329
+ vector<unordered_set<idx_t>> ret;
330
+
331
+ for (const auto &neighbor_set : current) {
332
+ auto max_val = std::max_element(neighbor_set.begin(), neighbor_set.end());
333
+ for (const auto &neighbor : all_neighbors) {
334
+ if (*max_val >= neighbor) {
335
+ continue;
336
+ }
337
+ if (neighbor_set.count(neighbor) == 0) {
338
+ unordered_set<idx_t> new_set;
339
+ for (auto &n : neighbor_set) {
340
+ new_set.insert(n);
341
+ }
342
+ new_set.insert(neighbor);
343
+ ret.push_back(new_set);
344
+ }
345
+ }
346
+ }
347
+
348
+ return ret;
349
+ }
350
+
351
+ // works by first creating all sets with cardinality 1
352
+ // then iterates over each previously created group of subsets and will only add a neighbor if the neighbor
353
+ // is greater than all relations in the set.
354
+ static vector<unordered_set<idx_t>> GetAllNeighborSets(vector<idx_t> neighbors) {
355
+ vector<unordered_set<idx_t>> ret;
356
+ sort(neighbors.begin(), neighbors.end());
357
+ vector<unordered_set<idx_t>> added;
358
+ for (auto &neighbor : neighbors) {
359
+ added.push_back(unordered_set<idx_t>({neighbor}));
360
+ ret.push_back(unordered_set<idx_t>({neighbor}));
361
+ }
362
+ do {
363
+ added = AddSuperSets(added, neighbors);
364
+ for (auto &d : added) {
365
+ ret.push_back(d);
366
+ }
367
+ } while (!added.empty());
368
+ #if DEBUG
369
+ // drive by test to make sure we have an accurate amount of
370
+ // subsets, and that each neighbor is in a correct amount
371
+ // of those subsets.
372
+ D_ASSERT(ret.size() == pow(2, neighbors.size()) - 1);
373
+ for (auto &n : neighbors) {
374
+ idx_t count = 0;
375
+ for (auto &set : ret) {
376
+ if (set.count(n) >= 1) {
377
+ count += 1;
378
+ }
379
+ }
380
+ D_ASSERT(count == pow(2, neighbors.size() - 1));
381
+ }
382
+ #endif
383
+ return ret;
384
+ }
385
+
326
386
  JoinNode &JoinOrderOptimizer::EmitPair(JoinRelationSet &left, JoinRelationSet &right,
327
387
  const vector<reference<NeighborInfo>> &info) {
328
388
  // get the left and right join plans
@@ -405,8 +465,19 @@ bool JoinOrderOptimizer::EmitCSG(JoinRelationSet &node) {
405
465
  //! Neighbors should be reversed when iterating over them.
406
466
  std::sort(neighbors.begin(), neighbors.end(), std::greater_equal<idx_t>());
407
467
  for (idx_t i = 0; i < neighbors.size() - 1; i++) {
408
- D_ASSERT(neighbors[i] >= neighbors[i + 1]);
468
+ D_ASSERT(neighbors[i] > neighbors[i + 1]);
469
+ }
470
+
471
+ // Dphyp paper missiing this.
472
+ // Because we are traversing in reverse order, we need to add neighbors whose number is smaller than the current
473
+ // node to exclusion_set
474
+ // This avoids duplicated enumeration
475
+ unordered_set<idx_t> new_exclusion_set = exclusion_set;
476
+ for (idx_t i = 0; i < neighbors.size(); ++i) {
477
+ D_ASSERT(new_exclusion_set.find(neighbors[i]) == new_exclusion_set.end());
478
+ new_exclusion_set.insert(neighbors[i]);
409
479
  }
480
+
410
481
  for (auto neighbor : neighbors) {
411
482
  // since the GetNeighbors only returns the smallest element in a list, the entry might not be connected to
412
483
  // (only!) this neighbor, hence we have to do a connectedness check before we can emit it
@@ -417,27 +488,35 @@ bool JoinOrderOptimizer::EmitCSG(JoinRelationSet &node) {
417
488
  return false;
418
489
  }
419
490
  }
420
- if (!EnumerateCmpRecursive(node, neighbor_relation, exclusion_set)) {
491
+
492
+ if (!EnumerateCmpRecursive(node, neighbor_relation, new_exclusion_set)) {
421
493
  return false;
422
494
  }
495
+
496
+ new_exclusion_set.erase(neighbor);
423
497
  }
424
498
  return true;
425
499
  }
426
500
 
427
501
  bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right,
428
- unordered_set<idx_t> exclusion_set) {
502
+ unordered_set<idx_t> &exclusion_set) {
429
503
  // get the neighbors of the second relation under the exclusion set
430
504
  auto neighbors = query_graph.GetNeighbors(right, exclusion_set);
431
505
  if (neighbors.empty()) {
432
506
  return true;
433
507
  }
508
+
509
+ auto all_subset = GetAllNeighborSets(neighbors);
434
510
  vector<reference<JoinRelationSet>> union_sets;
435
- union_sets.reserve(neighbors.size());
436
- for (idx_t i = 0; i < neighbors.size(); i++) {
437
- auto &neighbor = set_manager.GetJoinRelation(neighbors[i]);
511
+ union_sets.reserve(all_subset.size());
512
+ for (const auto &rel_set : all_subset) {
513
+ auto &neighbor = set_manager.GetJoinRelation(rel_set);
438
514
  // emit the combinations of this node and its neighbors
439
515
  auto &combined_set = set_manager.Union(right, neighbor);
440
- if (combined_set.count > right.count && plans.find(&combined_set) != plans.end()) {
516
+ // If combined_set.count == right.count, This means we found a neighbor that has been present before
517
+ // This means we didn't set exclusion_set correctly.
518
+ D_ASSERT(combined_set.count > right.count);
519
+ if (plans.find(&combined_set) != plans.end()) {
441
520
  auto connections = query_graph.GetConnections(left, combined_set);
442
521
  if (!connections.empty()) {
443
522
  if (!TryEmitPair(left, combined_set, connections)) {
@@ -447,11 +526,15 @@ bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelati
447
526
  }
448
527
  union_sets.push_back(combined_set);
449
528
  }
450
- // recursively enumerate the sets
529
+
451
530
  unordered_set<idx_t> new_exclusion_set = exclusion_set;
452
- for (idx_t i = 0; i < neighbors.size(); i++) {
531
+ for (const auto &neighbor : neighbors) {
532
+ new_exclusion_set.insert(neighbor);
533
+ }
534
+
535
+ // recursively enumerate the sets
536
+ for (idx_t i = 0; i < union_sets.size(); i++) {
453
537
  // updated the set of excluded entries with this neighbor
454
- new_exclusion_set.insert(neighbors[i]);
455
538
  if (!EnumerateCmpRecursive(left, union_sets[i], new_exclusion_set)) {
456
539
  return false;
457
540
  }
@@ -465,26 +548,30 @@ bool JoinOrderOptimizer::EnumerateCSGRecursive(JoinRelationSet &node, unordered_
465
548
  if (neighbors.empty()) {
466
549
  return true;
467
550
  }
551
+
552
+ auto all_subset = GetAllNeighborSets(neighbors);
468
553
  vector<reference<JoinRelationSet>> union_sets;
469
- union_sets.reserve(neighbors.size());
470
- for (idx_t i = 0; i < neighbors.size(); i++) {
471
- auto &neighbor = set_manager.GetJoinRelation(neighbors[i]);
554
+ union_sets.reserve(all_subset.size());
555
+ for (const auto &rel_set : all_subset) {
556
+ auto &neighbor = set_manager.GetJoinRelation(rel_set);
472
557
  // emit the combinations of this node and its neighbors
473
558
  auto &new_set = set_manager.Union(node, neighbor);
474
- if (new_set.count > node.count && plans.find(&new_set) != plans.end()) {
559
+ D_ASSERT(new_set.count > node.count);
560
+ if (plans.find(&new_set) != plans.end()) {
475
561
  if (!EmitCSG(new_set)) {
476
562
  return false;
477
563
  }
478
564
  }
479
565
  union_sets.push_back(new_set);
480
566
  }
481
- // recursively enumerate the sets
567
+
482
568
  unordered_set<idx_t> new_exclusion_set = exclusion_set;
483
- for (idx_t i = 0; i < neighbors.size(); i++) {
484
- // Reset the exclusion set so that the algorithm considers all combinations
485
- // of the exclusion_set with a subset of neighbors.
486
- new_exclusion_set = exclusion_set;
487
- new_exclusion_set.insert(neighbors[i]);
569
+ for (const auto &neighbor : neighbors) {
570
+ new_exclusion_set.insert(neighbor);
571
+ }
572
+
573
+ // recursively enumerate the sets
574
+ for (idx_t i = 0; i < union_sets.size(); i++) {
488
575
  // updated the set of excluded entries with this neighbor
489
576
  if (!EnumerateCSGRecursive(union_sets[i], new_exclusion_set)) {
490
577
  return false;
@@ -505,7 +592,7 @@ bool JoinOrderOptimizer::SolveJoinOrderExactly() {
505
592
  }
506
593
  // initialize the set of exclusion_set as all the nodes with a number below this
507
594
  unordered_set<idx_t> exclusion_set;
508
- for (idx_t j = 0; j < i - 1; j++) {
595
+ for (idx_t j = 0; j < i; j++) {
509
596
  exclusion_set.insert(j);
510
597
  }
511
598
  // then we recursively search for neighbors that do not belong to the banned entries
@@ -516,63 +603,6 @@ bool JoinOrderOptimizer::SolveJoinOrderExactly() {
516
603
  return true;
517
604
  }
518
605
 
519
- static vector<unordered_set<idx_t>> AddSuperSets(vector<unordered_set<idx_t>> current,
520
- const vector<idx_t> &all_neighbors) {
521
- vector<unordered_set<idx_t>> ret;
522
- for (auto &neighbor : all_neighbors) {
523
- for (auto &neighbor_set : current) {
524
- auto max_val = std::max_element(neighbor_set.begin(), neighbor_set.end());
525
- if (*max_val >= neighbor) {
526
- continue;
527
- }
528
- if (neighbor_set.count(neighbor) == 0) {
529
- unordered_set<idx_t> new_set;
530
- for (auto &n : neighbor_set) {
531
- new_set.insert(n);
532
- }
533
- new_set.insert(neighbor);
534
- ret.push_back(new_set);
535
- }
536
- }
537
- }
538
- return ret;
539
- }
540
-
541
- // works by first creating all sets with cardinality 1
542
- // then iterates over each previously created group of subsets and will only add a neighbor if the neighbor
543
- // is greater than all relations in the set.
544
- static vector<unordered_set<idx_t>> GetAllNeighborSets(unordered_set<idx_t> &exclusion_set, vector<idx_t> neighbors) {
545
- vector<unordered_set<idx_t>> ret;
546
- sort(neighbors.begin(), neighbors.end());
547
- vector<unordered_set<idx_t>> added;
548
- for (auto &neighbor : neighbors) {
549
- added.push_back(unordered_set<idx_t>({neighbor}));
550
- ret.push_back(unordered_set<idx_t>({neighbor}));
551
- }
552
- do {
553
- added = AddSuperSets(added, neighbors);
554
- for (auto &d : added) {
555
- ret.push_back(d);
556
- }
557
- } while (!added.empty());
558
- #if DEBUG
559
- // drive by test to make sure we have an accurate amount of
560
- // subsets, and that each neighbor is in a correct amount
561
- // of those subsets.
562
- D_ASSERT(ret.size() == pow(2, neighbors.size()) - 1);
563
- for (auto &n : neighbors) {
564
- idx_t count = 0;
565
- for (auto &set : ret) {
566
- if (set.count(n) >= 1) {
567
- count += 1;
568
- }
569
- }
570
- D_ASSERT(count == pow(2, neighbors.size() - 1));
571
- }
572
- #endif
573
- return ret;
574
- }
575
-
576
606
  void JoinOrderOptimizer::UpdateDPTree(JoinNode &new_plan) {
577
607
  if (!NodeInFullPlan(new_plan)) {
578
608
  // if the new node is not in the full plan, feel free to return
@@ -586,8 +616,8 @@ void JoinOrderOptimizer::UpdateDPTree(JoinNode &new_plan) {
586
616
  exclusion_set.insert(new_set.relations[i]);
587
617
  }
588
618
  auto neighbors = query_graph.GetNeighbors(new_set, exclusion_set);
589
- auto all_neighbors = GetAllNeighborSets(exclusion_set, neighbors);
590
- for (auto neighbor : all_neighbors) {
619
+ auto all_neighbors = GetAllNeighborSets(neighbors);
620
+ for (const auto &neighbor : all_neighbors) {
591
621
  auto &neighbor_relation = set_manager.GetJoinRelation(neighbor);
592
622
  auto &combined_set = set_manager.Union(new_set, neighbor_relation);
593
623
 
@@ -820,8 +850,9 @@ GenerateJoinRelation JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<Logical
820
850
  // FILTER on top of GET, add estimated properties to both
821
851
  auto &filter_props = *result_operator->estimated_props;
822
852
  auto &child_operator = *result_operator->children[0];
823
- child_operator.estimated_props = make_uniq<EstimatedProperties>(
824
- filter_props.GetCardinality<double>() / CardinalityEstimator::DEFAULT_SELECTIVITY, filter_props.GetCost());
853
+ child_operator.estimated_props = make_uniq<EstimatedProperties>(filter_props.GetCardinality<double>() /
854
+ CardinalityEstimator::DEFAULT_SELECTIVITY,
855
+ filter_props.GetCost<double>());
825
856
  child_operator.estimated_cardinality = child_operator.estimated_props->GetCardinality<idx_t>();
826
857
  child_operator.has_estimated_cardinality = true;
827
858
  }
@@ -65,7 +65,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) {
65
65
  return GetJoinRelation(std::move(relations), count);
66
66
  }
67
67
 
68
- JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unordered_set<idx_t> &bindings) {
68
+ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(const unordered_set<idx_t> &bindings) {
69
69
  // create a sorted vector of the relations
70
70
  unsafe_unique_array<idx_t> relations = bindings.empty() ? nullptr : make_unsafe_uniq_array<idx_t>(bindings.size());
71
71
  idx_t count = 0;
@@ -94,16 +94,12 @@ JoinRelationSet &JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelati
94
94
  relations[count++] = left.relations[i];
95
95
  }
96
96
  break;
97
- } else if (left.relations[i] == right.relations[j]) {
98
- // equivalent, add only one of the two pairs
99
- relations[count++] = left.relations[i];
100
- i++;
101
- j++;
102
97
  } else if (left.relations[i] < right.relations[j]) {
103
98
  // left is smaller, progress left and add it to the set
104
99
  relations[count++] = left.relations[i];
105
100
  i++;
106
101
  } else {
102
+ D_ASSERT(left.relations[i] > right.relations[j]);
107
103
  // right is smaller, progress right and add it to the set
108
104
  relations[count++] = right.relations[j];
109
105
  j++;
@@ -76,22 +76,30 @@ void QueryGraph::CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optio
76
76
  info.neighbors.push_back(std::move(n));
77
77
  }
78
78
 
79
+ void QueryGraph::EnumerateNeighborsDFS(JoinRelationSet &node, reference<QueryEdge> info, idx_t index,
80
+ const std::function<bool(NeighborInfo &)> &callback) {
81
+
82
+ for (auto &neighbor : info.get().neighbors) {
83
+ if (callback(*neighbor)) {
84
+ return;
85
+ }
86
+ }
87
+
88
+ for (idx_t node_index = index; node_index < node.count; ++node_index) {
89
+ auto iter = info.get().children.find(node.relations[node_index]);
90
+ if (iter != info.get().children.end()) {
91
+ reference<QueryEdge> new_info = *iter->second;
92
+ EnumerateNeighborsDFS(node, new_info, node_index + 1, callback);
93
+ }
94
+ }
95
+ }
96
+
79
97
  void QueryGraph::EnumerateNeighbors(JoinRelationSet &node, const std::function<bool(NeighborInfo &)> &callback) {
80
98
  for (idx_t j = 0; j < node.count; j++) {
81
- reference<QueryEdge> info = root;
82
- for (idx_t i = j; i < node.count; i++) {
83
- auto entry = info.get().children.find(node.relations[i]);
84
- if (entry == info.get().children.end()) {
85
- // node not found
86
- break;
87
- }
88
- // check if any subset of the other set is in this sets neighbors
89
- info = *entry->second;
90
- for (auto &neighbor : info.get().neighbors) {
91
- if (callback(*neighbor)) {
92
- return;
93
- }
94
- }
99
+ auto iter = root.children.find(node.relations[j]);
100
+ if (iter != root.children.end()) {
101
+ reference<QueryEdge> new_info = *iter->second;
102
+ EnumerateNeighborsDFS(node, new_info, j + 1, callback);
95
103
  }
96
104
  }
97
105
  }
@@ -1,27 +1,28 @@
1
1
  #include "duckdb/optimizer/optimizer.hpp"
2
2
 
3
3
  #include "duckdb/execution/column_binding_resolver.hpp"
4
- #include "duckdb/execution/expression_executor.hpp"
5
4
  #include "duckdb/main/client_context.hpp"
6
5
  #include "duckdb/main/config.hpp"
7
6
  #include "duckdb/main/query_profiler.hpp"
8
7
  #include "duckdb/optimizer/column_lifetime_optimizer.hpp"
9
8
  #include "duckdb/optimizer/common_aggregate_optimizer.hpp"
9
+ #include "duckdb/optimizer/compressed_materialization.hpp"
10
10
  #include "duckdb/optimizer/cse_optimizer.hpp"
11
11
  #include "duckdb/optimizer/deliminator.hpp"
12
- #include "duckdb/optimizer/unnest_rewriter.hpp"
13
12
  #include "duckdb/optimizer/expression_heuristics.hpp"
14
13
  #include "duckdb/optimizer/filter_pullup.hpp"
15
14
  #include "duckdb/optimizer/filter_pushdown.hpp"
16
15
  #include "duckdb/optimizer/in_clause_rewriter.hpp"
17
16
  #include "duckdb/optimizer/join_order/join_order_optimizer.hpp"
18
17
  #include "duckdb/optimizer/regex_range_filter.hpp"
18
+ #include "duckdb/optimizer/remove_duplicate_groups.hpp"
19
19
  #include "duckdb/optimizer/remove_unused_columns.hpp"
20
20
  #include "duckdb/optimizer/rule/equal_or_null_simplification.hpp"
21
21
  #include "duckdb/optimizer/rule/in_clause_simplification.hpp"
22
22
  #include "duckdb/optimizer/rule/list.hpp"
23
23
  #include "duckdb/optimizer/statistics_propagator.hpp"
24
24
  #include "duckdb/optimizer/topn_optimizer.hpp"
25
+ #include "duckdb/optimizer/unnest_rewriter.hpp"
25
26
  #include "duckdb/planner/binder.hpp"
26
27
  #include "duckdb/planner/planner.hpp"
27
28
 
@@ -52,6 +53,10 @@ Optimizer::Optimizer(Binder &binder, ClientContext &context) : context(context),
52
53
  #endif
53
54
  }
54
55
 
56
+ ClientContext &Optimizer::GetContext() {
57
+ return context;
58
+ }
59
+
55
60
  void Optimizer::RunOptimizer(OptimizerType type, const std::function<void()> &callback) {
56
61
  auto &config = DBConfig::GetConfig(context);
57
62
  if (config.options.disabled_optimizers.find(type) != config.options.disabled_optimizers.end()) {
@@ -73,6 +78,16 @@ void Optimizer::Verify(LogicalOperator &op) {
73
78
 
74
79
  unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan_p) {
75
80
  Verify(*plan_p);
81
+
82
+ switch (plan_p->type) {
83
+ case LogicalOperatorType::LOGICAL_TRANSACTION:
84
+ case LogicalOperatorType::LOGICAL_SET:
85
+ case LogicalOperatorType::LOGICAL_PRAGMA:
86
+ return plan_p;
87
+ default:
88
+ break;
89
+ }
90
+
76
91
  this->plan = std::move(plan_p);
77
92
  // first we perform expression rewrites using the ExpressionRewriter
78
93
  // this does not change the logical plan structure, but only simplifies the expression trees
@@ -96,8 +111,14 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
96
111
  });
97
112
 
98
113
  RunOptimizer(OptimizerType::IN_CLAUSE, [&]() {
99
- InClauseRewriter rewriter(context, *this);
100
- plan = rewriter.Rewrite(std::move(plan));
114
+ InClauseRewriter ic_rewriter(context, *this);
115
+ plan = ic_rewriter.Rewrite(std::move(plan));
116
+ });
117
+
118
+ // removes any redundant DelimGets/DelimJoins
119
+ RunOptimizer(OptimizerType::DELIMINATOR, [&]() {
120
+ Deliminator deliminator;
121
+ plan = deliminator.Optimize(std::move(plan));
101
122
  });
102
123
 
103
124
  // then we perform the join ordering optimization
@@ -107,12 +128,6 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
107
128
  plan = optimizer.Optimize(std::move(plan));
108
129
  });
109
130
 
110
- // removes any redundant DelimGets/DelimJoins
111
- RunOptimizer(OptimizerType::DELIMINATOR, [&]() {
112
- Deliminator deliminator(context);
113
- plan = deliminator.Optimize(std::move(plan));
114
- });
115
-
116
131
  // rewrites UNNESTs in DelimJoins by moving them to the projection
117
132
  RunOptimizer(OptimizerType::UNNEST_REWRITER, [&]() {
118
133
  UnnestRewriter unnest_rewriter;
@@ -125,10 +140,10 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
125
140
  unused.VisitOperator(*plan);
126
141
  });
127
142
 
128
- // perform statistics propagation
129
- RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() {
130
- StatisticsPropagator propagator(context);
131
- propagator.PropagateStatistics(plan);
143
+ // Remove duplicate groups from aggregates
144
+ RunOptimizer(OptimizerType::DUPLICATE_GROUPS, [&]() {
145
+ RemoveDuplicateGroups remove;
146
+ remove.VisitOperator(*plan);
132
147
  });
133
148
 
134
149
  // then we extract common subexpressions inside the different operators
@@ -137,16 +152,38 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
137
152
  cse_optimizer.VisitOperator(*plan);
138
153
  });
139
154
 
155
+ // creates projection maps so unused columns are projected out early
156
+ RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() {
157
+ ColumnLifetimeAnalyzer column_lifetime(true);
158
+ column_lifetime.VisitOperator(*plan);
159
+ });
160
+
161
+ // perform statistics propagation
162
+ column_binding_map_t<unique_ptr<BaseStatistics>> statistics_map;
163
+ RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() {
164
+ StatisticsPropagator propagator(*this);
165
+ propagator.PropagateStatistics(plan);
166
+ statistics_map = propagator.GetStatisticsMap();
167
+ });
168
+
169
+ // remove duplicate aggregates
140
170
  RunOptimizer(OptimizerType::COMMON_AGGREGATE, [&]() {
141
171
  CommonAggregateOptimizer common_aggregate;
142
172
  common_aggregate.VisitOperator(*plan);
143
173
  });
144
174
 
175
+ // creates projection maps so unused columns are projected out early
145
176
  RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() {
146
177
  ColumnLifetimeAnalyzer column_lifetime(true);
147
178
  column_lifetime.VisitOperator(*plan);
148
179
  });
149
180
 
181
+ // compress data based on statistics for materializing operators
182
+ RunOptimizer(OptimizerType::COMPRESSED_MATERIALIZATION, [&]() {
183
+ CompressedMaterialization compressed_materialization(context, binder, std::move(statistics_map));
184
+ compressed_materialization.Compress(plan);
185
+ });
186
+
150
187
  // transform ORDER BY + LIMIT to TopN
151
188
  RunOptimizer(OptimizerType::TOP_N, [&]() {
152
189
  TopN topn;
@@ -42,13 +42,13 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownCrossProduct(unique_ptr<Logi
42
42
  vector<JoinCondition> conditions;
43
43
  vector<unique_ptr<Expression>> arbitrary_expressions;
44
44
  auto join_type = JoinType::INNER;
45
- LogicalComparisonJoin::ExtractJoinConditions(join_type, op->children[0], op->children[1], left_bindings,
46
- right_bindings, join_expressions, conditions,
45
+ LogicalComparisonJoin::ExtractJoinConditions(GetContext(), join_type, op->children[0], op->children[1],
46
+ left_bindings, right_bindings, join_expressions, conditions,
47
47
  arbitrary_expressions);
48
48
  // create the join from the join conditions
49
- return LogicalComparisonJoin::CreateJoin(JoinType::INNER, JoinRefType::REGULAR, std::move(op->children[0]),
50
- std::move(op->children[1]), std::move(conditions),
51
- std::move(arbitrary_expressions));
49
+ return LogicalComparisonJoin::CreateJoin(GetContext(), JoinType::INNER, JoinRefType::REGULAR,
50
+ std::move(op->children[0]), std::move(op->children[1]),
51
+ std::move(conditions), std::move(arbitrary_expressions));
52
52
  } else {
53
53
  // no join conditions found: keep as cross product
54
54
  return op;
@@ -4,7 +4,6 @@
4
4
  #include "duckdb/planner/expression/bound_parameter_expression.hpp"
5
5
  #include "duckdb/planner/operator/logical_filter.hpp"
6
6
  #include "duckdb/planner/operator/logical_get.hpp"
7
- #include "duckdb/storage/data_table.hpp"
8
7
 
9
8
  namespace duckdb {
10
9
 
@@ -0,0 +1,127 @@
1
+ #include "duckdb/optimizer/remove_duplicate_groups.hpp"
2
+
3
+ #include "duckdb/common/pair.hpp"
4
+ #include "duckdb/planner/expression/bound_columnref_expression.hpp"
5
+ #include "duckdb/planner/operator/logical_aggregate.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ void RemoveDuplicateGroups::VisitOperator(LogicalOperator &op) {
10
+ switch (op.type) {
11
+ case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY:
12
+ VisitAggregate(op.Cast<LogicalAggregate>());
13
+ break;
14
+ default:
15
+ break;
16
+ }
17
+ LogicalOperatorVisitor::VisitOperatorExpressions(op);
18
+ LogicalOperatorVisitor::VisitOperatorChildren(op);
19
+ }
20
+
21
+ void RemoveDuplicateGroups::VisitAggregate(LogicalAggregate &aggr) {
22
+ if (!aggr.grouping_functions.empty()) {
23
+ return;
24
+ }
25
+
26
+ auto &groups = aggr.groups;
27
+
28
+ column_binding_map_t<idx_t> duplicate_map;
29
+ vector<pair<idx_t, idx_t>> duplicates;
30
+ for (idx_t group_idx = 0; group_idx < groups.size(); group_idx++) {
31
+ const auto &group = groups[group_idx];
32
+ if (group->type != ExpressionType::BOUND_COLUMN_REF) {
33
+ continue;
34
+ }
35
+ const auto &colref = group->Cast<BoundColumnRefExpression>();
36
+ const auto &binding = colref.binding;
37
+ const auto it = duplicate_map.find(binding);
38
+ if (it == duplicate_map.end()) {
39
+ duplicate_map.emplace(binding, group_idx);
40
+ } else {
41
+ duplicates.emplace_back(it->second, group_idx);
42
+ }
43
+ }
44
+
45
+ if (duplicates.empty()) {
46
+ return;
47
+ }
48
+
49
+ // Sort duplicates by max duplicate group idx, because we want to remove groups from the back
50
+ sort(duplicates.begin(), duplicates.end(),
51
+ [](const pair<idx_t, idx_t> &lhs, const pair<idx_t, idx_t> &rhs) { return lhs.second > rhs.second; });
52
+
53
+ // Now we want to remove the duplicates, but this alters the column bindings coming out of the aggregate,
54
+ // so we keep track of how they shift and do another round of column binding replacements
55
+ column_binding_map_t<ColumnBinding> group_binding_map;
56
+ for (idx_t group_idx = 0; group_idx < groups.size(); group_idx++) {
57
+ group_binding_map.emplace(ColumnBinding(aggr.group_index, group_idx),
58
+ ColumnBinding(aggr.group_index, group_idx));
59
+ }
60
+
61
+ for (idx_t duplicate_idx = 0; duplicate_idx < duplicates.size(); duplicate_idx++) {
62
+ const auto &duplicate = duplicates[duplicate_idx];
63
+ const auto &remaining_idx = duplicate.first;
64
+ const auto &removed_idx = duplicate.second;
65
+
66
+ // Store expression and remove it from groups
67
+ stored_expressions.emplace_back(std::move(groups[removed_idx]));
68
+ groups.erase(groups.begin() + removed_idx);
69
+
70
+ // This optimizer should run before statistics propagation, so this should be empty
71
+ // If it runs after, then group_stats should be updated too
72
+ D_ASSERT(aggr.group_stats.empty());
73
+
74
+ // Remove from grouping sets too
75
+ for (auto &grouping_set : aggr.grouping_sets) {
76
+ // Replace removed group with duplicate remaining group
77
+ if (grouping_set.erase(removed_idx) != 0) {
78
+ grouping_set.insert(remaining_idx);
79
+ }
80
+
81
+ // Indices shifted: Reinsert groups in the set with group_idx - 1
82
+ vector<idx_t> group_indices_to_reinsert;
83
+ for (auto &entry : grouping_set) {
84
+ if (entry > removed_idx) {
85
+ group_indices_to_reinsert.emplace_back(entry);
86
+ }
87
+ }
88
+ for (const auto group_idx : group_indices_to_reinsert) {
89
+ grouping_set.erase(group_idx);
90
+ }
91
+ for (const auto group_idx : group_indices_to_reinsert) {
92
+ grouping_set.insert(group_idx - 1);
93
+ }
94
+ }
95
+
96
+ // Update mapping
97
+ auto it = group_binding_map.find(ColumnBinding(aggr.group_index, removed_idx));
98
+ D_ASSERT(it != group_binding_map.end());
99
+ it->second.column_index = remaining_idx;
100
+
101
+ for (auto &map_entry : group_binding_map) {
102
+ auto &new_binding = map_entry.second;
103
+ if (new_binding.column_index > removed_idx) {
104
+ new_binding.column_index--;
105
+ }
106
+ }
107
+ }
108
+
109
+ // Replace all references to the old group binding with the new group binding
110
+ for (const auto &map_entry : group_binding_map) {
111
+ auto it = column_references.find(map_entry.first);
112
+ if (it != column_references.end()) {
113
+ for (auto expr : it->second) {
114
+ expr.get().binding = map_entry.second;
115
+ }
116
+ }
117
+ }
118
+ }
119
+
120
+ unique_ptr<Expression> RemoveDuplicateGroups::VisitReplace(BoundColumnRefExpression &expr,
121
+ unique_ptr<Expression> *expr_ptr) {
122
+ // add a column reference
123
+ column_references[expr.binding].push_back(expr);
124
+ return nullptr;
125
+ }
126
+
127
+ } // namespace duckdb
@@ -302,6 +302,10 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
302
302
  everything_referenced = true;
303
303
  break;
304
304
  }
305
+ case LogicalOperatorType::LOGICAL_MATERIALIZED_CTE: {
306
+ everything_referenced = true;
307
+ break;
308
+ }
305
309
  case LogicalOperatorType::LOGICAL_CTE_REF: {
306
310
  everything_referenced = true;
307
311
  break;