duckdb 0.8.2-dev161.0 → 0.8.2-dev1764.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (504) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
  23. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  24. package/src/duckdb/extension/json/json_functions/json_transform.cpp +91 -38
  25. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  26. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  27. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  28. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  29. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  30. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  31. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  32. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  33. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  34. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  36. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  38. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  39. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  40. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  41. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  42. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  43. package/src/duckdb/extension/parquet/parquet_extension.cpp +194 -20
  44. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  45. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  46. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  47. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  48. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  49. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  50. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  51. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  52. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  53. package/src/duckdb/src/common/allocator.cpp +14 -2
  54. package/src/duckdb/src/common/arrow/arrow_appender.cpp +79 -12
  55. package/src/duckdb/src/common/arrow/arrow_converter.cpp +44 -19
  56. package/src/duckdb/src/common/assert.cpp +3 -0
  57. package/src/duckdb/src/common/enum_util.cpp +4619 -4446
  58. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  60. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  61. package/src/duckdb/src/common/exception.cpp +2 -2
  62. package/src/duckdb/src/common/extra_type_info.cpp +506 -0
  63. package/src/duckdb/src/common/file_system.cpp +19 -0
  64. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  65. package/src/duckdb/src/common/local_file_system.cpp +14 -14
  66. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  67. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  68. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  69. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  70. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  71. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  72. package/src/duckdb/src/common/sort/partition_state.cpp +70 -50
  73. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  74. package/src/duckdb/src/common/types/bit.cpp +51 -0
  75. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  76. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  77. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  78. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  79. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  80. package/src/duckdb/src/common/types/date.cpp +9 -0
  81. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  82. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  83. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  84. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  85. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  86. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  87. package/src/duckdb/src/common/types/value.cpp +11 -6
  88. package/src/duckdb/src/common/types.cpp +9 -656
  89. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  90. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  91. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  92. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  93. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  94. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  95. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  96. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  97. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  98. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  99. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  100. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  101. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  102. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  103. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  104. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  105. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  106. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  107. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  108. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  109. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  110. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  111. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  112. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  113. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  114. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  115. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  116. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  117. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  118. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  119. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  120. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  121. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
  122. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  123. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  124. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
  125. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  126. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
  127. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  128. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  129. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  130. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  131. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  132. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  133. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  134. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  135. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  136. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  137. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  138. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  139. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  140. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  141. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  142. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  143. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  144. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
  145. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
  146. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  147. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  148. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  149. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  150. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  151. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  152. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  153. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  154. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  155. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  156. package/src/duckdb/src/function/function.cpp +3 -1
  157. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  158. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  159. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  160. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  161. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  162. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  163. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  164. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  165. package/src/duckdb/src/function/table/arrow.cpp +19 -0
  166. package/src/duckdb/src/function/table/arrow_conversion.cpp +35 -1
  167. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  168. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  169. package/src/duckdb/src/function/table/system/test_all_types.cpp +7 -0
  170. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  171. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  172. package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
  173. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  174. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  175. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  176. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  177. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
  178. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  179. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  180. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  181. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  182. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  183. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  184. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
  185. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  186. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  187. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  188. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  189. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  190. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +2 -2
  191. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  192. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  193. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  194. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  195. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  196. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +23 -8
  197. package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
  198. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  199. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  200. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  201. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  202. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  203. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  204. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  205. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  206. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  207. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +5 -2
  208. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  209. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  210. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  211. package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -0
  212. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
  213. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  214. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  215. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  218. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  219. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  220. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  221. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  222. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  223. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  224. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  225. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  226. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  227. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  228. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  229. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  230. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  231. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  232. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  233. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  234. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  235. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  236. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  237. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  238. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  239. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  240. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  241. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  242. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  243. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  244. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  245. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  246. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  247. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
  248. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  249. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  250. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  251. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  252. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  253. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  254. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  255. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  256. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  257. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  258. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  259. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  260. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  261. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  262. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  263. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  264. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  265. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  266. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  267. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  268. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  269. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  270. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  271. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  272. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  273. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  274. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  275. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +22 -0
  276. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
  277. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  278. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  279. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  280. package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
  281. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  282. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  283. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  284. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  285. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  286. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  287. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  288. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  289. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  291. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  293. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  294. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  295. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  296. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  297. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  298. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  299. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  300. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  301. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  302. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  303. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  304. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  305. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  306. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  307. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  308. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  309. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  310. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  311. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  312. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  313. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  314. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  315. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  316. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  317. package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
  318. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  319. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  320. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  321. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  322. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  323. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  324. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  325. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  326. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  327. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  328. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  329. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  330. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  331. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  332. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  333. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  334. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  335. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  336. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  337. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  338. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  339. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  340. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  341. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  342. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  343. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  344. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  345. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  346. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  347. package/src/duckdb/src/include/duckdb.h +28 -0
  348. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  349. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  350. package/src/duckdb/src/main/config.cpp +4 -0
  351. package/src/duckdb/src/main/database.cpp +1 -1
  352. package/src/duckdb/src/main/extension/extension_helper.cpp +93 -88
  353. package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
  354. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  355. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  356. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  357. package/src/duckdb/src/main/relation.cpp +6 -5
  358. package/src/duckdb/src/main/settings/settings.cpp +79 -18
  359. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  360. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  361. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  362. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  363. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  364. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  365. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  366. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  367. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  368. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  369. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  370. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  371. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  372. package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
  373. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  374. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  375. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  376. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  377. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  378. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  379. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  380. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  381. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  382. package/src/duckdb/src/parallel/executor.cpp +15 -0
  383. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  384. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  385. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  386. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  387. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  388. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  389. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  390. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  391. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  392. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  393. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  394. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  395. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  396. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  397. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  398. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  399. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  400. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  401. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  402. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  403. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  404. package/src/duckdb/src/parser/parser.cpp +8 -2
  405. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  406. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  407. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  408. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  409. package/src/duckdb/src/parser/query_node.cpp +15 -37
  410. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  411. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  412. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  413. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  414. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  415. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  416. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  417. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  418. package/src/duckdb/src/parser/tableref.cpp +0 -44
  419. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  420. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  421. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  422. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  423. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  424. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  425. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  426. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  427. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  428. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  429. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  430. package/src/duckdb/src/parser/transformer.cpp +15 -0
  431. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  432. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  433. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  434. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  435. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  436. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  437. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  438. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  439. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  440. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  441. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
  442. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  443. package/src/duckdb/src/planner/binder.cpp +44 -31
  444. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  445. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  446. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  447. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  448. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  449. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  450. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  451. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  452. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  453. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  454. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  455. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  456. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  457. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  458. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  459. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  460. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  461. package/src/duckdb/src/storage/data_table.cpp +1 -1
  462. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  463. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  464. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  465. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  466. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  467. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  468. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  469. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  470. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  471. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  472. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  473. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  474. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  475. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  476. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  477. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  478. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  479. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  480. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  481. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  482. package/src/duckdb/ub_src_common.cpp +2 -0
  483. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  484. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  485. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  486. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  487. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  488. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  489. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  490. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  491. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  492. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  493. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  494. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  495. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  496. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  497. package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
  498. package/src/statement.cpp +10 -3
  499. package/test/columns.test.ts +24 -1
  500. package/test/test_all_types.test.ts +234 -0
  501. package/tsconfig.json +1 -0
  502. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  503. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  504. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -5,6 +5,7 @@
5
5
  #include "duckdb/planner/expression/list.hpp"
6
6
  #include "duckdb/planner/expression_iterator.hpp"
7
7
  #include "duckdb/planner/operator/list.hpp"
8
+ #include "duckdb/common/queue.hpp"
8
9
 
9
10
  #include <algorithm>
10
11
  #include <cmath>
@@ -323,6 +324,65 @@ void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode &node) {
323
324
  }
324
325
  }
325
326
 
327
+ static vector<unordered_set<idx_t>> AddSuperSets(const vector<unordered_set<idx_t>> &current,
328
+ const vector<idx_t> &all_neighbors) {
329
+ vector<unordered_set<idx_t>> ret;
330
+
331
+ for (const auto &neighbor_set : current) {
332
+ auto max_val = std::max_element(neighbor_set.begin(), neighbor_set.end());
333
+ for (const auto &neighbor : all_neighbors) {
334
+ if (*max_val >= neighbor) {
335
+ continue;
336
+ }
337
+ if (neighbor_set.count(neighbor) == 0) {
338
+ unordered_set<idx_t> new_set;
339
+ for (auto &n : neighbor_set) {
340
+ new_set.insert(n);
341
+ }
342
+ new_set.insert(neighbor);
343
+ ret.push_back(new_set);
344
+ }
345
+ }
346
+ }
347
+
348
+ return ret;
349
+ }
350
+
351
+ // works by first creating all sets with cardinality 1
352
+ // then iterates over each previously created group of subsets and will only add a neighbor if the neighbor
353
+ // is greater than all relations in the set.
354
+ static vector<unordered_set<idx_t>> GetAllNeighborSets(vector<idx_t> neighbors) {
355
+ vector<unordered_set<idx_t>> ret;
356
+ sort(neighbors.begin(), neighbors.end());
357
+ vector<unordered_set<idx_t>> added;
358
+ for (auto &neighbor : neighbors) {
359
+ added.push_back(unordered_set<idx_t>({neighbor}));
360
+ ret.push_back(unordered_set<idx_t>({neighbor}));
361
+ }
362
+ do {
363
+ added = AddSuperSets(added, neighbors);
364
+ for (auto &d : added) {
365
+ ret.push_back(d);
366
+ }
367
+ } while (!added.empty());
368
+ #if DEBUG
369
+ // drive by test to make sure we have an accurate amount of
370
+ // subsets, and that each neighbor is in a correct amount
371
+ // of those subsets.
372
+ D_ASSERT(ret.size() == pow(2, neighbors.size()) - 1);
373
+ for (auto &n : neighbors) {
374
+ idx_t count = 0;
375
+ for (auto &set : ret) {
376
+ if (set.count(n) >= 1) {
377
+ count += 1;
378
+ }
379
+ }
380
+ D_ASSERT(count == pow(2, neighbors.size() - 1));
381
+ }
382
+ #endif
383
+ return ret;
384
+ }
385
+
326
386
  JoinNode &JoinOrderOptimizer::EmitPair(JoinRelationSet &left, JoinRelationSet &right,
327
387
  const vector<reference<NeighborInfo>> &info) {
328
388
  // get the left and right join plans
@@ -405,8 +465,19 @@ bool JoinOrderOptimizer::EmitCSG(JoinRelationSet &node) {
405
465
  //! Neighbors should be reversed when iterating over them.
406
466
  std::sort(neighbors.begin(), neighbors.end(), std::greater_equal<idx_t>());
407
467
  for (idx_t i = 0; i < neighbors.size() - 1; i++) {
408
- D_ASSERT(neighbors[i] >= neighbors[i + 1]);
468
+ D_ASSERT(neighbors[i] > neighbors[i + 1]);
469
+ }
470
+
471
+ // Dphyp paper missiing this.
472
+ // Because we are traversing in reverse order, we need to add neighbors whose number is smaller than the current
473
+ // node to exclusion_set
474
+ // This avoids duplicated enumeration
475
+ unordered_set<idx_t> new_exclusion_set = exclusion_set;
476
+ for (idx_t i = 0; i < neighbors.size(); ++i) {
477
+ D_ASSERT(new_exclusion_set.find(neighbors[i]) == new_exclusion_set.end());
478
+ new_exclusion_set.insert(neighbors[i]);
409
479
  }
480
+
410
481
  for (auto neighbor : neighbors) {
411
482
  // since the GetNeighbors only returns the smallest element in a list, the entry might not be connected to
412
483
  // (only!) this neighbor, hence we have to do a connectedness check before we can emit it
@@ -417,27 +488,35 @@ bool JoinOrderOptimizer::EmitCSG(JoinRelationSet &node) {
417
488
  return false;
418
489
  }
419
490
  }
420
- if (!EnumerateCmpRecursive(node, neighbor_relation, exclusion_set)) {
491
+
492
+ if (!EnumerateCmpRecursive(node, neighbor_relation, new_exclusion_set)) {
421
493
  return false;
422
494
  }
495
+
496
+ new_exclusion_set.erase(neighbor);
423
497
  }
424
498
  return true;
425
499
  }
426
500
 
427
501
  bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right,
428
- unordered_set<idx_t> exclusion_set) {
502
+ unordered_set<idx_t> &exclusion_set) {
429
503
  // get the neighbors of the second relation under the exclusion set
430
504
  auto neighbors = query_graph.GetNeighbors(right, exclusion_set);
431
505
  if (neighbors.empty()) {
432
506
  return true;
433
507
  }
508
+
509
+ auto all_subset = GetAllNeighborSets(neighbors);
434
510
  vector<reference<JoinRelationSet>> union_sets;
435
- union_sets.reserve(neighbors.size());
436
- for (idx_t i = 0; i < neighbors.size(); i++) {
437
- auto &neighbor = set_manager.GetJoinRelation(neighbors[i]);
511
+ union_sets.reserve(all_subset.size());
512
+ for (const auto &rel_set : all_subset) {
513
+ auto &neighbor = set_manager.GetJoinRelation(rel_set);
438
514
  // emit the combinations of this node and its neighbors
439
515
  auto &combined_set = set_manager.Union(right, neighbor);
440
- if (combined_set.count > right.count && plans.find(&combined_set) != plans.end()) {
516
+ // If combined_set.count == right.count, This means we found a neighbor that has been present before
517
+ // This means we didn't set exclusion_set correctly.
518
+ D_ASSERT(combined_set.count > right.count);
519
+ if (plans.find(&combined_set) != plans.end()) {
441
520
  auto connections = query_graph.GetConnections(left, combined_set);
442
521
  if (!connections.empty()) {
443
522
  if (!TryEmitPair(left, combined_set, connections)) {
@@ -447,11 +526,15 @@ bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelati
447
526
  }
448
527
  union_sets.push_back(combined_set);
449
528
  }
450
- // recursively enumerate the sets
529
+
451
530
  unordered_set<idx_t> new_exclusion_set = exclusion_set;
452
- for (idx_t i = 0; i < neighbors.size(); i++) {
531
+ for (const auto &neighbor : neighbors) {
532
+ new_exclusion_set.insert(neighbor);
533
+ }
534
+
535
+ // recursively enumerate the sets
536
+ for (idx_t i = 0; i < union_sets.size(); i++) {
453
537
  // updated the set of excluded entries with this neighbor
454
- new_exclusion_set.insert(neighbors[i]);
455
538
  if (!EnumerateCmpRecursive(left, union_sets[i], new_exclusion_set)) {
456
539
  return false;
457
540
  }
@@ -465,26 +548,30 @@ bool JoinOrderOptimizer::EnumerateCSGRecursive(JoinRelationSet &node, unordered_
465
548
  if (neighbors.empty()) {
466
549
  return true;
467
550
  }
551
+
552
+ auto all_subset = GetAllNeighborSets(neighbors);
468
553
  vector<reference<JoinRelationSet>> union_sets;
469
- union_sets.reserve(neighbors.size());
470
- for (idx_t i = 0; i < neighbors.size(); i++) {
471
- auto &neighbor = set_manager.GetJoinRelation(neighbors[i]);
554
+ union_sets.reserve(all_subset.size());
555
+ for (const auto &rel_set : all_subset) {
556
+ auto &neighbor = set_manager.GetJoinRelation(rel_set);
472
557
  // emit the combinations of this node and its neighbors
473
558
  auto &new_set = set_manager.Union(node, neighbor);
474
- if (new_set.count > node.count && plans.find(&new_set) != plans.end()) {
559
+ D_ASSERT(new_set.count > node.count);
560
+ if (plans.find(&new_set) != plans.end()) {
475
561
  if (!EmitCSG(new_set)) {
476
562
  return false;
477
563
  }
478
564
  }
479
565
  union_sets.push_back(new_set);
480
566
  }
481
- // recursively enumerate the sets
567
+
482
568
  unordered_set<idx_t> new_exclusion_set = exclusion_set;
483
- for (idx_t i = 0; i < neighbors.size(); i++) {
484
- // Reset the exclusion set so that the algorithm considers all combinations
485
- // of the exclusion_set with a subset of neighbors.
486
- new_exclusion_set = exclusion_set;
487
- new_exclusion_set.insert(neighbors[i]);
569
+ for (const auto &neighbor : neighbors) {
570
+ new_exclusion_set.insert(neighbor);
571
+ }
572
+
573
+ // recursively enumerate the sets
574
+ for (idx_t i = 0; i < union_sets.size(); i++) {
488
575
  // updated the set of excluded entries with this neighbor
489
576
  if (!EnumerateCSGRecursive(union_sets[i], new_exclusion_set)) {
490
577
  return false;
@@ -505,7 +592,7 @@ bool JoinOrderOptimizer::SolveJoinOrderExactly() {
505
592
  }
506
593
  // initialize the set of exclusion_set as all the nodes with a number below this
507
594
  unordered_set<idx_t> exclusion_set;
508
- for (idx_t j = 0; j < i - 1; j++) {
595
+ for (idx_t j = 0; j < i; j++) {
509
596
  exclusion_set.insert(j);
510
597
  }
511
598
  // then we recursively search for neighbors that do not belong to the banned entries
@@ -516,63 +603,6 @@ bool JoinOrderOptimizer::SolveJoinOrderExactly() {
516
603
  return true;
517
604
  }
518
605
 
519
- static vector<unordered_set<idx_t>> AddSuperSets(vector<unordered_set<idx_t>> current,
520
- const vector<idx_t> &all_neighbors) {
521
- vector<unordered_set<idx_t>> ret;
522
- for (auto &neighbor : all_neighbors) {
523
- for (auto &neighbor_set : current) {
524
- auto max_val = std::max_element(neighbor_set.begin(), neighbor_set.end());
525
- if (*max_val >= neighbor) {
526
- continue;
527
- }
528
- if (neighbor_set.count(neighbor) == 0) {
529
- unordered_set<idx_t> new_set;
530
- for (auto &n : neighbor_set) {
531
- new_set.insert(n);
532
- }
533
- new_set.insert(neighbor);
534
- ret.push_back(new_set);
535
- }
536
- }
537
- }
538
- return ret;
539
- }
540
-
541
- // works by first creating all sets with cardinality 1
542
- // then iterates over each previously created group of subsets and will only add a neighbor if the neighbor
543
- // is greater than all relations in the set.
544
- static vector<unordered_set<idx_t>> GetAllNeighborSets(unordered_set<idx_t> &exclusion_set, vector<idx_t> neighbors) {
545
- vector<unordered_set<idx_t>> ret;
546
- sort(neighbors.begin(), neighbors.end());
547
- vector<unordered_set<idx_t>> added;
548
- for (auto &neighbor : neighbors) {
549
- added.push_back(unordered_set<idx_t>({neighbor}));
550
- ret.push_back(unordered_set<idx_t>({neighbor}));
551
- }
552
- do {
553
- added = AddSuperSets(added, neighbors);
554
- for (auto &d : added) {
555
- ret.push_back(d);
556
- }
557
- } while (!added.empty());
558
- #if DEBUG
559
- // drive by test to make sure we have an accurate amount of
560
- // subsets, and that each neighbor is in a correct amount
561
- // of those subsets.
562
- D_ASSERT(ret.size() == pow(2, neighbors.size()) - 1);
563
- for (auto &n : neighbors) {
564
- idx_t count = 0;
565
- for (auto &set : ret) {
566
- if (set.count(n) >= 1) {
567
- count += 1;
568
- }
569
- }
570
- D_ASSERT(count == pow(2, neighbors.size() - 1));
571
- }
572
- #endif
573
- return ret;
574
- }
575
-
576
606
  void JoinOrderOptimizer::UpdateDPTree(JoinNode &new_plan) {
577
607
  if (!NodeInFullPlan(new_plan)) {
578
608
  // if the new node is not in the full plan, feel free to return
@@ -586,8 +616,8 @@ void JoinOrderOptimizer::UpdateDPTree(JoinNode &new_plan) {
586
616
  exclusion_set.insert(new_set.relations[i]);
587
617
  }
588
618
  auto neighbors = query_graph.GetNeighbors(new_set, exclusion_set);
589
- auto all_neighbors = GetAllNeighborSets(exclusion_set, neighbors);
590
- for (auto neighbor : all_neighbors) {
619
+ auto all_neighbors = GetAllNeighborSets(neighbors);
620
+ for (const auto &neighbor : all_neighbors) {
591
621
  auto &neighbor_relation = set_manager.GetJoinRelation(neighbor);
592
622
  auto &combined_set = set_manager.Union(new_set, neighbor_relation);
593
623
 
@@ -820,8 +850,9 @@ GenerateJoinRelation JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<Logical
820
850
  // FILTER on top of GET, add estimated properties to both
821
851
  auto &filter_props = *result_operator->estimated_props;
822
852
  auto &child_operator = *result_operator->children[0];
823
- child_operator.estimated_props = make_uniq<EstimatedProperties>(
824
- filter_props.GetCardinality<double>() / CardinalityEstimator::DEFAULT_SELECTIVITY, filter_props.GetCost());
853
+ child_operator.estimated_props = make_uniq<EstimatedProperties>(filter_props.GetCardinality<double>() /
854
+ CardinalityEstimator::DEFAULT_SELECTIVITY,
855
+ filter_props.GetCost<double>());
825
856
  child_operator.estimated_cardinality = child_operator.estimated_props->GetCardinality<idx_t>();
826
857
  child_operator.has_estimated_cardinality = true;
827
858
  }
@@ -65,7 +65,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) {
65
65
  return GetJoinRelation(std::move(relations), count);
66
66
  }
67
67
 
68
- JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unordered_set<idx_t> &bindings) {
68
+ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(const unordered_set<idx_t> &bindings) {
69
69
  // create a sorted vector of the relations
70
70
  unsafe_unique_array<idx_t> relations = bindings.empty() ? nullptr : make_unsafe_uniq_array<idx_t>(bindings.size());
71
71
  idx_t count = 0;
@@ -94,16 +94,12 @@ JoinRelationSet &JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelati
94
94
  relations[count++] = left.relations[i];
95
95
  }
96
96
  break;
97
- } else if (left.relations[i] == right.relations[j]) {
98
- // equivalent, add only one of the two pairs
99
- relations[count++] = left.relations[i];
100
- i++;
101
- j++;
102
97
  } else if (left.relations[i] < right.relations[j]) {
103
98
  // left is smaller, progress left and add it to the set
104
99
  relations[count++] = left.relations[i];
105
100
  i++;
106
101
  } else {
102
+ D_ASSERT(left.relations[i] > right.relations[j]);
107
103
  // right is smaller, progress right and add it to the set
108
104
  relations[count++] = right.relations[j];
109
105
  j++;
@@ -76,22 +76,30 @@ void QueryGraph::CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optio
76
76
  info.neighbors.push_back(std::move(n));
77
77
  }
78
78
 
79
+ void QueryGraph::EnumerateNeighborsDFS(JoinRelationSet &node, reference<QueryEdge> info, idx_t index,
80
+ const std::function<bool(NeighborInfo &)> &callback) {
81
+
82
+ for (auto &neighbor : info.get().neighbors) {
83
+ if (callback(*neighbor)) {
84
+ return;
85
+ }
86
+ }
87
+
88
+ for (idx_t node_index = index; node_index < node.count; ++node_index) {
89
+ auto iter = info.get().children.find(node.relations[node_index]);
90
+ if (iter != info.get().children.end()) {
91
+ reference<QueryEdge> new_info = *iter->second;
92
+ EnumerateNeighborsDFS(node, new_info, node_index + 1, callback);
93
+ }
94
+ }
95
+ }
96
+
79
97
  void QueryGraph::EnumerateNeighbors(JoinRelationSet &node, const std::function<bool(NeighborInfo &)> &callback) {
80
98
  for (idx_t j = 0; j < node.count; j++) {
81
- reference<QueryEdge> info = root;
82
- for (idx_t i = j; i < node.count; i++) {
83
- auto entry = info.get().children.find(node.relations[i]);
84
- if (entry == info.get().children.end()) {
85
- // node not found
86
- break;
87
- }
88
- // check if any subset of the other set is in this sets neighbors
89
- info = *entry->second;
90
- for (auto &neighbor : info.get().neighbors) {
91
- if (callback(*neighbor)) {
92
- return;
93
- }
94
- }
99
+ auto iter = root.children.find(node.relations[j]);
100
+ if (iter != root.children.end()) {
101
+ reference<QueryEdge> new_info = *iter->second;
102
+ EnumerateNeighborsDFS(node, new_info, j + 1, callback);
95
103
  }
96
104
  }
97
105
  }
@@ -1,27 +1,28 @@
1
1
  #include "duckdb/optimizer/optimizer.hpp"
2
2
 
3
3
  #include "duckdb/execution/column_binding_resolver.hpp"
4
- #include "duckdb/execution/expression_executor.hpp"
5
4
  #include "duckdb/main/client_context.hpp"
6
5
  #include "duckdb/main/config.hpp"
7
6
  #include "duckdb/main/query_profiler.hpp"
8
7
  #include "duckdb/optimizer/column_lifetime_optimizer.hpp"
9
8
  #include "duckdb/optimizer/common_aggregate_optimizer.hpp"
9
+ #include "duckdb/optimizer/compressed_materialization.hpp"
10
10
  #include "duckdb/optimizer/cse_optimizer.hpp"
11
11
  #include "duckdb/optimizer/deliminator.hpp"
12
- #include "duckdb/optimizer/unnest_rewriter.hpp"
13
12
  #include "duckdb/optimizer/expression_heuristics.hpp"
14
13
  #include "duckdb/optimizer/filter_pullup.hpp"
15
14
  #include "duckdb/optimizer/filter_pushdown.hpp"
16
15
  #include "duckdb/optimizer/in_clause_rewriter.hpp"
17
16
  #include "duckdb/optimizer/join_order/join_order_optimizer.hpp"
18
17
  #include "duckdb/optimizer/regex_range_filter.hpp"
18
+ #include "duckdb/optimizer/remove_duplicate_groups.hpp"
19
19
  #include "duckdb/optimizer/remove_unused_columns.hpp"
20
20
  #include "duckdb/optimizer/rule/equal_or_null_simplification.hpp"
21
21
  #include "duckdb/optimizer/rule/in_clause_simplification.hpp"
22
22
  #include "duckdb/optimizer/rule/list.hpp"
23
23
  #include "duckdb/optimizer/statistics_propagator.hpp"
24
24
  #include "duckdb/optimizer/topn_optimizer.hpp"
25
+ #include "duckdb/optimizer/unnest_rewriter.hpp"
25
26
  #include "duckdb/planner/binder.hpp"
26
27
  #include "duckdb/planner/planner.hpp"
27
28
 
@@ -52,6 +53,10 @@ Optimizer::Optimizer(Binder &binder, ClientContext &context) : context(context),
52
53
  #endif
53
54
  }
54
55
 
56
+ ClientContext &Optimizer::GetContext() {
57
+ return context;
58
+ }
59
+
55
60
  void Optimizer::RunOptimizer(OptimizerType type, const std::function<void()> &callback) {
56
61
  auto &config = DBConfig::GetConfig(context);
57
62
  if (config.options.disabled_optimizers.find(type) != config.options.disabled_optimizers.end()) {
@@ -73,6 +78,14 @@ void Optimizer::Verify(LogicalOperator &op) {
73
78
 
74
79
  unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan_p) {
75
80
  Verify(*plan_p);
81
+
82
+ switch (plan_p->type) {
83
+ case LogicalOperatorType::LOGICAL_TRANSACTION:
84
+ return plan_p; // skip optimizing simple & often-occurring plans unaffected by rewrites
85
+ default:
86
+ break;
87
+ }
88
+
76
89
  this->plan = std::move(plan_p);
77
90
  // first we perform expression rewrites using the ExpressionRewriter
78
91
  // this does not change the logical plan structure, but only simplifies the expression trees
@@ -96,8 +109,14 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
96
109
  });
97
110
 
98
111
  RunOptimizer(OptimizerType::IN_CLAUSE, [&]() {
99
- InClauseRewriter rewriter(context, *this);
100
- plan = rewriter.Rewrite(std::move(plan));
112
+ InClauseRewriter ic_rewriter(context, *this);
113
+ plan = ic_rewriter.Rewrite(std::move(plan));
114
+ });
115
+
116
+ // removes any redundant DelimGets/DelimJoins
117
+ RunOptimizer(OptimizerType::DELIMINATOR, [&]() {
118
+ Deliminator deliminator;
119
+ plan = deliminator.Optimize(std::move(plan));
101
120
  });
102
121
 
103
122
  // then we perform the join ordering optimization
@@ -107,12 +126,6 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
107
126
  plan = optimizer.Optimize(std::move(plan));
108
127
  });
109
128
 
110
- // removes any redundant DelimGets/DelimJoins
111
- RunOptimizer(OptimizerType::DELIMINATOR, [&]() {
112
- Deliminator deliminator(context);
113
- plan = deliminator.Optimize(std::move(plan));
114
- });
115
-
116
129
  // rewrites UNNESTs in DelimJoins by moving them to the projection
117
130
  RunOptimizer(OptimizerType::UNNEST_REWRITER, [&]() {
118
131
  UnnestRewriter unnest_rewriter;
@@ -125,10 +138,10 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
125
138
  unused.VisitOperator(*plan);
126
139
  });
127
140
 
128
- // perform statistics propagation
129
- RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() {
130
- StatisticsPropagator propagator(context);
131
- propagator.PropagateStatistics(plan);
141
+ // Remove duplicate groups from aggregates
142
+ RunOptimizer(OptimizerType::DUPLICATE_GROUPS, [&]() {
143
+ RemoveDuplicateGroups remove;
144
+ remove.VisitOperator(*plan);
132
145
  });
133
146
 
134
147
  // then we extract common subexpressions inside the different operators
@@ -137,16 +150,38 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
137
150
  cse_optimizer.VisitOperator(*plan);
138
151
  });
139
152
 
153
+ // creates projection maps so unused columns are projected out early
154
+ RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() {
155
+ ColumnLifetimeAnalyzer column_lifetime(true);
156
+ column_lifetime.VisitOperator(*plan);
157
+ });
158
+
159
+ // perform statistics propagation
160
+ column_binding_map_t<unique_ptr<BaseStatistics>> statistics_map;
161
+ RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() {
162
+ StatisticsPropagator propagator(*this);
163
+ propagator.PropagateStatistics(plan);
164
+ statistics_map = propagator.GetStatisticsMap();
165
+ });
166
+
167
+ // remove duplicate aggregates
140
168
  RunOptimizer(OptimizerType::COMMON_AGGREGATE, [&]() {
141
169
  CommonAggregateOptimizer common_aggregate;
142
170
  common_aggregate.VisitOperator(*plan);
143
171
  });
144
172
 
173
+ // creates projection maps so unused columns are projected out early
145
174
  RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() {
146
175
  ColumnLifetimeAnalyzer column_lifetime(true);
147
176
  column_lifetime.VisitOperator(*plan);
148
177
  });
149
178
 
179
+ // compress data based on statistics for materializing operators
180
+ RunOptimizer(OptimizerType::COMPRESSED_MATERIALIZATION, [&]() {
181
+ CompressedMaterialization compressed_materialization(context, binder, std::move(statistics_map));
182
+ compressed_materialization.Compress(plan);
183
+ });
184
+
150
185
  // transform ORDER BY + LIMIT to TopN
151
186
  RunOptimizer(OptimizerType::TOP_N, [&]() {
152
187
  TopN topn;
@@ -42,13 +42,13 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownCrossProduct(unique_ptr<Logi
42
42
  vector<JoinCondition> conditions;
43
43
  vector<unique_ptr<Expression>> arbitrary_expressions;
44
44
  auto join_type = JoinType::INNER;
45
- LogicalComparisonJoin::ExtractJoinConditions(join_type, op->children[0], op->children[1], left_bindings,
46
- right_bindings, join_expressions, conditions,
45
+ LogicalComparisonJoin::ExtractJoinConditions(GetContext(), join_type, op->children[0], op->children[1],
46
+ left_bindings, right_bindings, join_expressions, conditions,
47
47
  arbitrary_expressions);
48
48
  // create the join from the join conditions
49
- return LogicalComparisonJoin::CreateJoin(JoinType::INNER, JoinRefType::REGULAR, std::move(op->children[0]),
50
- std::move(op->children[1]), std::move(conditions),
51
- std::move(arbitrary_expressions));
49
+ return LogicalComparisonJoin::CreateJoin(GetContext(), JoinType::INNER, JoinRefType::REGULAR,
50
+ std::move(op->children[0]), std::move(op->children[1]),
51
+ std::move(conditions), std::move(arbitrary_expressions));
52
52
  } else {
53
53
  // no join conditions found: keep as cross product
54
54
  return op;
@@ -4,7 +4,6 @@
4
4
  #include "duckdb/planner/expression/bound_parameter_expression.hpp"
5
5
  #include "duckdb/planner/operator/logical_filter.hpp"
6
6
  #include "duckdb/planner/operator/logical_get.hpp"
7
- #include "duckdb/storage/data_table.hpp"
8
7
 
9
8
  namespace duckdb {
10
9
 
@@ -0,0 +1,127 @@
1
+ #include "duckdb/optimizer/remove_duplicate_groups.hpp"
2
+
3
+ #include "duckdb/common/pair.hpp"
4
+ #include "duckdb/planner/expression/bound_columnref_expression.hpp"
5
+ #include "duckdb/planner/operator/logical_aggregate.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ void RemoveDuplicateGroups::VisitOperator(LogicalOperator &op) {
10
+ switch (op.type) {
11
+ case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY:
12
+ VisitAggregate(op.Cast<LogicalAggregate>());
13
+ break;
14
+ default:
15
+ break;
16
+ }
17
+ LogicalOperatorVisitor::VisitOperatorExpressions(op);
18
+ LogicalOperatorVisitor::VisitOperatorChildren(op);
19
+ }
20
+
21
+ void RemoveDuplicateGroups::VisitAggregate(LogicalAggregate &aggr) {
22
+ if (!aggr.grouping_functions.empty()) {
23
+ return;
24
+ }
25
+
26
+ auto &groups = aggr.groups;
27
+
28
+ column_binding_map_t<idx_t> duplicate_map;
29
+ vector<pair<idx_t, idx_t>> duplicates;
30
+ for (idx_t group_idx = 0; group_idx < groups.size(); group_idx++) {
31
+ const auto &group = groups[group_idx];
32
+ if (group->type != ExpressionType::BOUND_COLUMN_REF) {
33
+ continue;
34
+ }
35
+ const auto &colref = group->Cast<BoundColumnRefExpression>();
36
+ const auto &binding = colref.binding;
37
+ const auto it = duplicate_map.find(binding);
38
+ if (it == duplicate_map.end()) {
39
+ duplicate_map.emplace(binding, group_idx);
40
+ } else {
41
+ duplicates.emplace_back(it->second, group_idx);
42
+ }
43
+ }
44
+
45
+ if (duplicates.empty()) {
46
+ return;
47
+ }
48
+
49
+ // Sort duplicates by max duplicate group idx, because we want to remove groups from the back
50
+ sort(duplicates.begin(), duplicates.end(),
51
+ [](const pair<idx_t, idx_t> &lhs, const pair<idx_t, idx_t> &rhs) { return lhs.second > rhs.second; });
52
+
53
+ // Now we want to remove the duplicates, but this alters the column bindings coming out of the aggregate,
54
+ // so we keep track of how they shift and do another round of column binding replacements
55
+ column_binding_map_t<ColumnBinding> group_binding_map;
56
+ for (idx_t group_idx = 0; group_idx < groups.size(); group_idx++) {
57
+ group_binding_map.emplace(ColumnBinding(aggr.group_index, group_idx),
58
+ ColumnBinding(aggr.group_index, group_idx));
59
+ }
60
+
61
+ for (idx_t duplicate_idx = 0; duplicate_idx < duplicates.size(); duplicate_idx++) {
62
+ const auto &duplicate = duplicates[duplicate_idx];
63
+ const auto &remaining_idx = duplicate.first;
64
+ const auto &removed_idx = duplicate.second;
65
+
66
+ // Store expression and remove it from groups
67
+ stored_expressions.emplace_back(std::move(groups[removed_idx]));
68
+ groups.erase(groups.begin() + removed_idx);
69
+
70
+ // This optimizer should run before statistics propagation, so this should be empty
71
+ // If it runs after, then group_stats should be updated too
72
+ D_ASSERT(aggr.group_stats.empty());
73
+
74
+ // Remove from grouping sets too
75
+ for (auto &grouping_set : aggr.grouping_sets) {
76
+ // Replace removed group with duplicate remaining group
77
+ if (grouping_set.erase(removed_idx) != 0) {
78
+ grouping_set.insert(remaining_idx);
79
+ }
80
+
81
+ // Indices shifted: Reinsert groups in the set with group_idx - 1
82
+ vector<idx_t> group_indices_to_reinsert;
83
+ for (auto &entry : grouping_set) {
84
+ if (entry > removed_idx) {
85
+ group_indices_to_reinsert.emplace_back(entry);
86
+ }
87
+ }
88
+ for (const auto group_idx : group_indices_to_reinsert) {
89
+ grouping_set.erase(group_idx);
90
+ }
91
+ for (const auto group_idx : group_indices_to_reinsert) {
92
+ grouping_set.insert(group_idx - 1);
93
+ }
94
+ }
95
+
96
+ // Update mapping
97
+ auto it = group_binding_map.find(ColumnBinding(aggr.group_index, removed_idx));
98
+ D_ASSERT(it != group_binding_map.end());
99
+ it->second.column_index = remaining_idx;
100
+
101
+ for (auto &map_entry : group_binding_map) {
102
+ auto &new_binding = map_entry.second;
103
+ if (new_binding.column_index > removed_idx) {
104
+ new_binding.column_index--;
105
+ }
106
+ }
107
+ }
108
+
109
+ // Replace all references to the old group binding with the new group binding
110
+ for (const auto &map_entry : group_binding_map) {
111
+ auto it = column_references.find(map_entry.first);
112
+ if (it != column_references.end()) {
113
+ for (auto expr : it->second) {
114
+ expr.get().binding = map_entry.second;
115
+ }
116
+ }
117
+ }
118
+ }
119
+
120
+ unique_ptr<Expression> RemoveDuplicateGroups::VisitReplace(BoundColumnRefExpression &expr,
121
+ unique_ptr<Expression> *expr_ptr) {
122
+ // add a column reference
123
+ column_references[expr.binding].push_back(expr);
124
+ return nullptr;
125
+ }
126
+
127
+ } // namespace duckdb
@@ -302,6 +302,10 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
302
302
  everything_referenced = true;
303
303
  break;
304
304
  }
305
+ case LogicalOperatorType::LOGICAL_MATERIALIZED_CTE: {
306
+ everything_referenced = true;
307
+ break;
308
+ }
305
309
  case LogicalOperatorType::LOGICAL_CTE_REF: {
306
310
  everything_referenced = true;
307
311
  break;