duckdb 0.8.2-dev150.0 → 0.8.2-dev1559.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (489) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  24. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  26. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  27. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  28. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  29. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  30. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  31. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  32. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  33. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  34. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  36. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  38. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  39. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  40. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  41. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  42. package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
  43. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  44. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  45. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  46. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  47. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  49. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  50. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  51. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  52. package/src/duckdb/src/common/allocator.cpp +14 -2
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  54. package/src/duckdb/src/common/assert.cpp +3 -0
  55. package/src/duckdb/src/common/enum_util.cpp +4619 -4446
  56. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  57. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  58. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/exception.cpp +2 -2
  60. package/src/duckdb/src/common/extra_type_info.cpp +506 -0
  61. package/src/duckdb/src/common/file_system.cpp +19 -0
  62. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  63. package/src/duckdb/src/common/local_file_system.cpp +14 -14
  64. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  65. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  66. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  67. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  68. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  69. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  70. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  71. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  72. package/src/duckdb/src/common/types/bit.cpp +51 -0
  73. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  74. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  75. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  76. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  77. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  78. package/src/duckdb/src/common/types/date.cpp +9 -0
  79. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  80. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  81. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  82. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  83. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  84. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  85. package/src/duckdb/src/common/types.cpp +8 -655
  86. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  87. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  88. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  92. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  93. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  94. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  95. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  96. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  97. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  98. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  99. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  100. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  102. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  103. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  104. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  105. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  106. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  107. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  108. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  109. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  110. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  111. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  112. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  113. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  114. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  115. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  116. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  117. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  118. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
  119. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  121. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
  122. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  123. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
  124. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  125. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  126. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  127. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  128. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  129. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  130. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  131. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  132. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  133. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  134. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  135. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  136. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  137. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  138. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  139. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  140. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  141. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +13 -22
  142. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
  143. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  144. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  145. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  146. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  147. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  148. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  149. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  150. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  151. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  152. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  153. package/src/duckdb/src/function/function.cpp +3 -1
  154. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  155. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  156. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  157. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  158. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  159. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  160. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  161. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  162. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  163. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  164. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  165. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  166. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  167. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  168. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  169. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
  170. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  171. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  172. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  173. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  174. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  175. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  176. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
  177. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  178. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  179. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  180. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  181. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  182. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  183. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  184. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  185. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  186. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  187. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  188. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  189. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  190. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  191. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  192. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  193. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  194. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  195. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  196. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  197. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  198. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  199. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  200. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  201. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
  202. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  203. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  204. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  205. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  206. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  207. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  208. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  209. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  210. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  211. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  212. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  213. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  214. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  215. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  218. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  219. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  220. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  221. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  222. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  223. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  224. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  225. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  226. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  227. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  228. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  229. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  230. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  231. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  232. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  233. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  234. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  235. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  236. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  237. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  238. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  239. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  240. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  241. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  242. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  243. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  244. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  245. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  246. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  247. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  248. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  249. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  250. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  251. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  252. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  253. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  254. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  255. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  256. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  257. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  258. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  259. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  260. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  261. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  262. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  263. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  264. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  265. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  266. package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
  267. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  268. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  269. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  270. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  271. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  272. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  273. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  274. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  275. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  276. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  277. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  278. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  279. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  280. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  281. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  282. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  283. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  284. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  285. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  286. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  287. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  288. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  289. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  291. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  293. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  294. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  295. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  296. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  297. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  298. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  299. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  300. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  301. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  302. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  303. package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
  304. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  305. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  306. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  307. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  308. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  309. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  310. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  311. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  312. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  313. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  314. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  315. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  316. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  317. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  318. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  319. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  320. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  321. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  322. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  323. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  324. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  325. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  326. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  327. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  328. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  329. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  330. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  331. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  332. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  333. package/src/duckdb/src/include/duckdb.h +28 -0
  334. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  335. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  336. package/src/duckdb/src/main/config.cpp +4 -0
  337. package/src/duckdb/src/main/database.cpp +1 -1
  338. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  339. package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
  340. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  341. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  342. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  343. package/src/duckdb/src/main/relation.cpp +6 -5
  344. package/src/duckdb/src/main/settings/settings.cpp +79 -18
  345. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  346. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  347. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  348. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  349. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  350. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  351. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  352. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  353. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  354. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  355. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  356. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  357. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  358. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  359. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  360. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  361. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  362. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  363. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  364. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  365. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  366. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  367. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  368. package/src/duckdb/src/parallel/executor.cpp +15 -0
  369. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  370. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  371. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  372. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  373. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  374. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  375. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  376. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  377. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  378. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  379. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  380. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  381. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  382. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  383. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  384. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  385. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  386. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  387. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  388. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  389. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  390. package/src/duckdb/src/parser/parser.cpp +8 -2
  391. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  392. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  393. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  394. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  395. package/src/duckdb/src/parser/query_node.cpp +15 -37
  396. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  397. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  398. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  399. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  400. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  401. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  402. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  403. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  404. package/src/duckdb/src/parser/tableref.cpp +0 -44
  405. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  406. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  407. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  408. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  409. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  410. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  411. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  412. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  413. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  414. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  415. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  416. package/src/duckdb/src/parser/transformer.cpp +15 -0
  417. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  418. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  419. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  420. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  421. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  422. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  423. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  424. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  425. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  426. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  427. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  428. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  429. package/src/duckdb/src/planner/binder.cpp +44 -31
  430. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  431. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  432. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  433. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  434. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  435. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  436. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  437. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  438. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  439. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  440. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  441. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  442. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  443. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  444. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  445. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  446. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  447. package/src/duckdb/src/storage/data_table.cpp +1 -1
  448. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  449. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  450. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  451. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  452. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  453. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  454. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  455. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  456. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  457. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  458. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  459. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  460. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  461. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  462. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  463. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  464. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  465. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  466. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  467. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  468. package/src/duckdb/ub_src_common.cpp +2 -0
  469. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  470. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  471. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  472. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  473. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  474. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  475. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  476. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  477. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  478. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  479. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  480. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  481. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  482. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  483. package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
  484. package/src/statement.cpp +10 -3
  485. package/test/test_all_types.test.ts +233 -0
  486. package/tsconfig.json +1 -0
  487. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  488. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  489. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -0,0 +1,478 @@
1
+ #include "duckdb/optimizer/compressed_materialization.hpp"
2
+
3
+ #include "duckdb/execution/expression_executor.hpp"
4
+ #include "duckdb/function/scalar/operators.hpp"
5
+ #include "duckdb/optimizer/column_binding_replacer.hpp"
6
+ #include "duckdb/optimizer/topn_optimizer.hpp"
7
+ #include "duckdb/planner/binder.hpp"
8
+ #include "duckdb/planner/expression/bound_constant_expression.hpp"
9
+ #include "duckdb/planner/expression/bound_function_expression.hpp"
10
+ #include "duckdb/planner/expression_iterator.hpp"
11
+ #include "duckdb/planner/operator/logical_comparison_join.hpp"
12
+ #include "duckdb/planner/operator/logical_delim_join.hpp"
13
+ #include "duckdb/planner/operator/logical_projection.hpp"
14
+
15
+ namespace duckdb {
16
+
17
+ CMChildInfo::CMChildInfo(LogicalOperator &op, const column_binding_set_t &referenced_bindings)
18
+ : bindings_before(op.GetColumnBindings()), types(op.types), can_compress(bindings_before.size(), true) {
19
+ for (const auto &binding : referenced_bindings) {
20
+ for (idx_t binding_idx = 0; binding_idx < bindings_before.size(); binding_idx++) {
21
+ if (binding == bindings_before[binding_idx]) {
22
+ can_compress[binding_idx] = false;
23
+ }
24
+ }
25
+ }
26
+ }
27
+
28
+ CMBindingInfo::CMBindingInfo(ColumnBinding binding_p, const LogicalType &type_p)
29
+ : binding(binding_p), type(type_p), needs_decompression(false) {
30
+ }
31
+
32
+ CompressedMaterializationInfo::CompressedMaterializationInfo(LogicalOperator &op, vector<idx_t> &&child_idxs_p,
33
+ const column_binding_set_t &referenced_bindings)
34
+ : child_idxs(child_idxs_p) {
35
+ child_info.reserve(child_idxs.size());
36
+ for (const auto &child_idx : child_idxs) {
37
+ child_info.emplace_back(*op.children[child_idx], referenced_bindings);
38
+ }
39
+ }
40
+
41
+ CompressExpression::CompressExpression(unique_ptr<Expression> expression_p, unique_ptr<BaseStatistics> stats_p)
42
+ : expression(std::move(expression_p)), stats(std::move(stats_p)) {
43
+ }
44
+
45
+ CompressedMaterialization::CompressedMaterialization(ClientContext &context_p, Binder &binder_p,
46
+ statistics_map_t &&statistics_map_p)
47
+ : context(context_p), binder(binder_p), statistics_map(std::move(statistics_map_p)) {
48
+ }
49
+
50
+ void CompressedMaterialization::GetReferencedBindings(const Expression &expression,
51
+ column_binding_set_t &referenced_bindings) {
52
+ if (expression.GetExpressionType() == ExpressionType::BOUND_COLUMN_REF) {
53
+ const auto &col_ref = expression.Cast<BoundColumnRefExpression>();
54
+ referenced_bindings.insert(col_ref.binding);
55
+ } else {
56
+ ExpressionIterator::EnumerateChildren(
57
+ expression, [&](const Expression &child) { GetReferencedBindings(child, referenced_bindings); });
58
+ }
59
+ }
60
+
61
+ void CompressedMaterialization::UpdateBindingInfo(CompressedMaterializationInfo &info, const ColumnBinding &binding,
62
+ bool needs_decompression) {
63
+ auto &binding_map = info.binding_map;
64
+ auto binding_it = binding_map.find(binding);
65
+ if (binding_it == binding_map.end()) {
66
+ return;
67
+ }
68
+
69
+ auto &binding_info = binding_it->second;
70
+ binding_info.needs_decompression = needs_decompression;
71
+ auto stats_it = statistics_map.find(binding);
72
+ if (stats_it != statistics_map.end()) {
73
+ binding_info.stats = statistics_map[binding]->ToUnique();
74
+ }
75
+ }
76
+
77
+ void CompressedMaterialization::Compress(unique_ptr<LogicalOperator> &op) {
78
+ root = op.get();
79
+ root->ResolveOperatorTypes();
80
+
81
+ CompressInternal(op);
82
+ }
83
+
84
+ void CompressedMaterialization::CompressInternal(unique_ptr<LogicalOperator> &op) {
85
+ if (TopN::CanOptimize(*op)) { // Let's not mess with the TopN optimizer
86
+ CompressInternal(op->children[0]->children[0]);
87
+ return;
88
+ }
89
+
90
+ for (auto &child : op->children) {
91
+ CompressInternal(child);
92
+ }
93
+
94
+ switch (op->type) {
95
+ case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY:
96
+ CompressAggregate(op);
97
+ break;
98
+ case LogicalOperatorType::LOGICAL_DISTINCT:
99
+ CompressDistinct(op);
100
+ break;
101
+ case LogicalOperatorType::LOGICAL_ORDER_BY:
102
+ CompressOrder(op);
103
+ break;
104
+ default:
105
+ return;
106
+ }
107
+ }
108
+
109
+ void CompressedMaterialization::CreateProjections(unique_ptr<LogicalOperator> &op,
110
+ CompressedMaterializationInfo &info) {
111
+ auto &materializing_op = *op;
112
+
113
+ bool compressed_anything = false;
114
+ for (idx_t i = 0; i < info.child_idxs.size(); i++) {
115
+ auto &child_info = info.child_info[i];
116
+ vector<unique_ptr<CompressExpression>> compress_exprs;
117
+ if (TryCompressChild(info, child_info, compress_exprs)) {
118
+ // We can compress: Create a projection on top of the child operator
119
+ const auto child_idx = info.child_idxs[i];
120
+ CreateCompressProjection(materializing_op.children[child_idx], std::move(compress_exprs), info, child_info);
121
+ compressed_anything = true;
122
+ }
123
+ }
124
+
125
+ if (compressed_anything) {
126
+ CreateDecompressProjection(op, info);
127
+ }
128
+ }
129
+
130
+ bool CompressedMaterialization::TryCompressChild(CompressedMaterializationInfo &info, const CMChildInfo &child_info,
131
+ vector<unique_ptr<CompressExpression>> &compress_exprs) {
132
+ // Try to compress each of the column bindings of the child
133
+ bool compressed_anything = false;
134
+ for (idx_t child_i = 0; child_i < child_info.bindings_before.size(); child_i++) {
135
+ const auto child_binding = child_info.bindings_before[child_i];
136
+ const auto &child_type = child_info.types[child_i];
137
+ const auto &can_compress = child_info.can_compress[child_i];
138
+ auto compress_expr = GetCompressExpression(child_binding, child_type, can_compress);
139
+ bool compressed = false;
140
+ if (compress_expr) { // We compressed, mark the outgoing binding in need of decompression
141
+ compress_exprs.emplace_back(std::move(compress_expr));
142
+ compressed = true;
143
+ } else { // We did not compress, just push a colref
144
+ auto colref_expr = make_uniq<BoundColumnRefExpression>(child_type, child_binding);
145
+ auto it = statistics_map.find(colref_expr->binding);
146
+ unique_ptr<BaseStatistics> colref_stats = it != statistics_map.end() ? it->second->ToUnique() : nullptr;
147
+ compress_exprs.emplace_back(make_uniq<CompressExpression>(std::move(colref_expr), std::move(colref_stats)));
148
+ }
149
+ UpdateBindingInfo(info, child_binding, compressed);
150
+ compressed_anything = compressed_anything || compressed;
151
+ }
152
+ if (!compressed_anything) {
153
+ // If we compressed anything non-generically, we still need to decompress
154
+ for (const auto &entry : info.binding_map) {
155
+ compressed_anything = compressed_anything || entry.second.needs_decompression;
156
+ }
157
+ }
158
+ return compressed_anything;
159
+ }
160
+
161
+ void CompressedMaterialization::CreateCompressProjection(unique_ptr<LogicalOperator> &child_op,
162
+ vector<unique_ptr<CompressExpression>> &&compress_exprs,
163
+ CompressedMaterializationInfo &info, CMChildInfo &child_info) {
164
+ // Replace child op with a projection
165
+ vector<unique_ptr<Expression>> projections;
166
+ projections.reserve(compress_exprs.size());
167
+ for (auto &compress_expr : compress_exprs) {
168
+ projections.emplace_back(std::move(compress_expr->expression));
169
+ }
170
+ const auto table_index = binder.GenerateTableIndex();
171
+ auto compress_projection = make_uniq<LogicalProjection>(table_index, std::move(projections));
172
+ compression_table_indices.insert(table_index);
173
+ compress_projection->ResolveOperatorTypes();
174
+
175
+ compress_projection->children.emplace_back(std::move(child_op));
176
+ child_op = std::move(compress_projection);
177
+
178
+ // Get the new bindings and types
179
+ child_info.bindings_after = child_op->GetColumnBindings();
180
+ const auto &new_types = child_op->types;
181
+
182
+ // Initialize a ColumnBindingReplacer with the new bindings and types
183
+ ColumnBindingReplacer replacer;
184
+ auto &replacement_bindings = replacer.replacement_bindings;
185
+ for (idx_t col_idx = 0; col_idx < child_info.bindings_before.size(); col_idx++) {
186
+ const auto &old_binding = child_info.bindings_before[col_idx];
187
+ const auto &new_binding = child_info.bindings_after[col_idx];
188
+ const auto &new_type = new_types[col_idx];
189
+ replacement_bindings.emplace_back(old_binding, new_binding, new_type);
190
+
191
+ // Remove the old binding from the statistics map
192
+ statistics_map.erase(old_binding);
193
+ }
194
+
195
+ // Make sure we skip the compress operator when replacing bindings
196
+ replacer.stop_operator = child_op.get();
197
+
198
+ // Make the plan consistent again
199
+ replacer.VisitOperator(*root);
200
+
201
+ // Replace in/out exprs in the binding map too
202
+ auto &binding_map = info.binding_map;
203
+ for (auto &replacement_binding : replacement_bindings) {
204
+ auto it = binding_map.find(replacement_binding.old_binding);
205
+ if (it == binding_map.end()) {
206
+ continue;
207
+ }
208
+ auto &binding_info = it->second;
209
+ if (binding_info.binding == replacement_binding.old_binding) {
210
+ binding_info.binding = replacement_binding.new_binding;
211
+ }
212
+
213
+ if (it->first == replacement_binding.old_binding) {
214
+ auto binding_info_local = std::move(binding_info);
215
+ binding_map.erase(it);
216
+ binding_map.emplace(replacement_binding.new_binding, std::move(binding_info_local));
217
+ }
218
+ }
219
+
220
+ // Add projection stats to statistics map
221
+ for (idx_t col_idx = 0; col_idx < child_info.bindings_after.size(); col_idx++) {
222
+ const auto &binding = child_info.bindings_after[col_idx];
223
+ auto &stats = compress_exprs[col_idx]->stats;
224
+ statistics_map.emplace(binding, std::move(stats));
225
+ }
226
+ }
227
+
228
+ void CompressedMaterialization::CreateDecompressProjection(unique_ptr<LogicalOperator> &op,
229
+ CompressedMaterializationInfo &info) {
230
+ const auto bindings = op->GetColumnBindings();
231
+ op->ResolveOperatorTypes();
232
+ const auto &types = op->types;
233
+
234
+ // Create decompress expressions for everything we compressed
235
+ auto &binding_map = info.binding_map;
236
+ vector<unique_ptr<Expression>> decompress_exprs;
237
+ vector<optional_ptr<BaseStatistics>> statistics;
238
+ for (idx_t col_idx = 0; col_idx < bindings.size(); col_idx++) {
239
+ const auto &binding = bindings[col_idx];
240
+ auto decompress_expr = make_uniq_base<Expression, BoundColumnRefExpression>(types[col_idx], binding);
241
+ optional_ptr<BaseStatistics> stats;
242
+ for (auto &entry : binding_map) {
243
+ auto &binding_info = entry.second;
244
+ if (binding_info.binding != binding) {
245
+ continue;
246
+ }
247
+ stats = binding_info.stats.get();
248
+ if (binding_info.needs_decompression) {
249
+ decompress_expr = GetDecompressExpression(std::move(decompress_expr), binding_info.type, *stats);
250
+ }
251
+ }
252
+ statistics.push_back(stats);
253
+ decompress_exprs.emplace_back(std::move(decompress_expr));
254
+ }
255
+
256
+ // Replace op with a projection
257
+ const auto table_index = binder.GenerateTableIndex();
258
+ auto decompress_projection = make_uniq<LogicalProjection>(table_index, std::move(decompress_exprs));
259
+ decompression_table_indices.insert(table_index);
260
+
261
+ decompress_projection->children.emplace_back(std::move(op));
262
+ op = std::move(decompress_projection);
263
+
264
+ // Check if we're placing a projection on top of the root
265
+ if (op->children[0].get() == root.get()) {
266
+ root = op.get();
267
+ return;
268
+ }
269
+
270
+ // Get the new bindings and types
271
+ auto new_bindings = op->GetColumnBindings();
272
+ op->ResolveOperatorTypes();
273
+ auto &new_types = op->types;
274
+
275
+ // Initialize a ColumnBindingReplacer with the new bindings and types
276
+ ColumnBindingReplacer replacer;
277
+ auto &replacement_bindings = replacer.replacement_bindings;
278
+ for (idx_t col_idx = 0; col_idx < bindings.size(); col_idx++) {
279
+ const auto &old_binding = bindings[col_idx];
280
+ const auto &new_binding = new_bindings[col_idx];
281
+ const auto &new_type = new_types[col_idx];
282
+ replacement_bindings.emplace_back(old_binding, new_binding, new_type);
283
+
284
+ if (statistics[col_idx]) {
285
+ statistics_map[new_binding] = statistics[col_idx]->ToUnique();
286
+ }
287
+ }
288
+
289
+ // Make sure we skip the decompress operator when replacing bindings
290
+ replacer.stop_operator = op.get();
291
+
292
+ // Make the plan consistent again
293
+ replacer.VisitOperator(*root);
294
+ }
295
+
296
+ unique_ptr<CompressExpression> CompressedMaterialization::GetCompressExpression(const ColumnBinding &binding,
297
+ const LogicalType &type,
298
+ const bool &can_compress) {
299
+ auto it = statistics_map.find(binding);
300
+ if (can_compress && it != statistics_map.end() && it->second) {
301
+ auto input = make_uniq<BoundColumnRefExpression>(type, binding);
302
+ const auto &stats = *it->second;
303
+ return GetCompressExpression(std::move(input), stats);
304
+ }
305
+ return nullptr;
306
+ }
307
+
308
+ unique_ptr<CompressExpression> CompressedMaterialization::GetCompressExpression(unique_ptr<Expression> input,
309
+ const BaseStatistics &stats) {
310
+ const auto &type = input->return_type;
311
+ if (type != stats.GetType()) { // LCOV_EXCL_START
312
+ return nullptr;
313
+ } // LCOV_EXCL_STOP
314
+ if (type.IsIntegral()) {
315
+ return GetIntegralCompress(std::move(input), stats);
316
+ } else if (type.id() == LogicalTypeId::VARCHAR) {
317
+ return GetStringCompress(std::move(input), stats);
318
+ }
319
+ return nullptr;
320
+ }
321
+
322
+ static Value GetIntegralRangeValue(ClientContext &context, const LogicalType &type, const BaseStatistics &stats) {
323
+ auto min = NumericStats::Min(stats);
324
+ auto max = NumericStats::Max(stats);
325
+
326
+ vector<unique_ptr<Expression>> arguments;
327
+ arguments.emplace_back(make_uniq<BoundConstantExpression>(max));
328
+ arguments.emplace_back(make_uniq<BoundConstantExpression>(min));
329
+ BoundFunctionExpression sub(type, SubtractFun::GetFunction(type, type), std::move(arguments), nullptr);
330
+
331
+ Value result;
332
+ if (ExpressionExecutor::TryEvaluateScalar(context, sub, result)) {
333
+ return result;
334
+ } else {
335
+ // Couldn't evaluate: Return max hugeint as range so GetIntegralCompress will return nullptr
336
+ return Value::HUGEINT(NumericLimits<hugeint_t>::Maximum());
337
+ }
338
+ }
339
+
340
+ unique_ptr<CompressExpression> CompressedMaterialization::GetIntegralCompress(unique_ptr<Expression> input,
341
+ const BaseStatistics &stats) {
342
+ const auto &type = input->return_type;
343
+ if (GetTypeIdSize(type.InternalType()) == 1 || !NumericStats::HasMinMax(stats)) {
344
+ return nullptr;
345
+ }
346
+
347
+ // Get range and cast to UBIGINT (might fail for HUGEINT, in which case we just return)
348
+ Value range_value = GetIntegralRangeValue(context, type, stats);
349
+ if (!range_value.DefaultTryCastAs(LogicalType::UBIGINT)) {
350
+ return nullptr;
351
+ }
352
+
353
+ // Get the smallest type that the range can fit into
354
+ const auto range = UBigIntValue::Get(range_value);
355
+ LogicalType cast_type;
356
+ if (range <= NumericLimits<uint8_t>().Maximum()) {
357
+ cast_type = LogicalType::UTINYINT;
358
+ } else if (range <= NumericLimits<uint16_t>().Maximum()) {
359
+ cast_type = LogicalType::USMALLINT;
360
+ } else if (range <= NumericLimits<uint32_t>().Maximum()) {
361
+ cast_type = LogicalType::UINTEGER;
362
+ } else {
363
+ D_ASSERT(range <= NumericLimits<uint64_t>().Maximum());
364
+ cast_type = LogicalType::UBIGINT;
365
+ }
366
+
367
+ // Check if type that fits the range is smaller than the input type
368
+ if (GetTypeIdSize(cast_type.InternalType()) == GetTypeIdSize(type.InternalType())) {
369
+ return nullptr;
370
+ }
371
+ D_ASSERT(GetTypeIdSize(cast_type.InternalType()) < GetTypeIdSize(type.InternalType()));
372
+
373
+ // Compressing will yield a benefit
374
+ auto compress_function = CMIntegralCompressFun::GetFunction(type, cast_type);
375
+ vector<unique_ptr<Expression>> arguments;
376
+ arguments.emplace_back(std::move(input));
377
+ arguments.emplace_back(make_uniq<BoundConstantExpression>(NumericStats::Min(stats)));
378
+ auto compress_expr =
379
+ make_uniq<BoundFunctionExpression>(cast_type, compress_function, std::move(arguments), nullptr);
380
+
381
+ auto compress_stats = BaseStatistics::CreateEmpty(cast_type);
382
+ compress_stats.CopyBase(stats);
383
+ NumericStats::SetMin(compress_stats, Value(0).DefaultCastAs(cast_type));
384
+ NumericStats::SetMax(compress_stats, range_value.DefaultCastAs(cast_type));
385
+
386
+ return make_uniq<CompressExpression>(std::move(compress_expr), compress_stats.ToUnique());
387
+ }
388
+
389
+ unique_ptr<CompressExpression> CompressedMaterialization::GetStringCompress(unique_ptr<Expression> input,
390
+ const BaseStatistics &stats) {
391
+ if (!StringStats::HasMaxStringLength(stats)) {
392
+ return nullptr;
393
+ }
394
+
395
+ const auto max_string_length = StringStats::MaxStringLength(stats);
396
+ LogicalType cast_type = LogicalType::INVALID;
397
+ for (const auto &compressed_type : CompressedMaterializationFunctions::StringTypes()) {
398
+ if (max_string_length < GetTypeIdSize(compressed_type.InternalType())) {
399
+ cast_type = compressed_type;
400
+ break;
401
+ }
402
+ }
403
+ if (cast_type == LogicalType::INVALID) {
404
+ return nullptr;
405
+ }
406
+
407
+ auto compress_stats = BaseStatistics::CreateEmpty(cast_type);
408
+ compress_stats.CopyBase(stats);
409
+ if (cast_type.id() == LogicalTypeId::USMALLINT) {
410
+ auto min_string = StringStats::Min(stats);
411
+ auto max_string = StringStats::Max(stats);
412
+
413
+ uint8_t min_numeric = 0;
414
+ if (max_string_length != 0 && min_string.length() != 0) {
415
+ min_numeric = *reinterpret_cast<const uint8_t *>(min_string.c_str());
416
+ }
417
+ uint8_t max_numeric = 0;
418
+ if (max_string_length != 0 && max_string.length() != 0) {
419
+ max_numeric = *reinterpret_cast<const uint8_t *>(max_string.c_str());
420
+ }
421
+
422
+ Value min_val = Value::USMALLINT(min_numeric);
423
+ Value max_val = Value::USMALLINT(max_numeric + 1);
424
+ if (max_numeric < NumericLimits<uint8_t>::Maximum()) {
425
+ cast_type = LogicalType::UTINYINT;
426
+ compress_stats = BaseStatistics::CreateEmpty(cast_type);
427
+ compress_stats.CopyBase(stats);
428
+ min_val = Value::UTINYINT(min_numeric);
429
+ max_val = Value::UTINYINT(max_numeric + 1);
430
+ }
431
+
432
+ NumericStats::SetMin(compress_stats, min_val);
433
+ NumericStats::SetMax(compress_stats, max_val);
434
+ }
435
+
436
+ auto compress_function = CMStringCompressFun::GetFunction(cast_type);
437
+ vector<unique_ptr<Expression>> arguments;
438
+ arguments.emplace_back(std::move(input));
439
+ auto compress_expr =
440
+ make_uniq<BoundFunctionExpression>(cast_type, compress_function, std::move(arguments), nullptr);
441
+ return make_uniq<CompressExpression>(std::move(compress_expr), compress_stats.ToUnique());
442
+ }
443
+
444
+ unique_ptr<Expression> CompressedMaterialization::GetDecompressExpression(unique_ptr<Expression> input,
445
+ const LogicalType &result_type,
446
+ const BaseStatistics &stats) {
447
+ const auto &type = result_type;
448
+ if (TypeIsIntegral(type.InternalType())) {
449
+ return GetIntegralDecompress(std::move(input), result_type, stats);
450
+ } else if (type.id() == LogicalTypeId::VARCHAR) {
451
+ return GetStringDecompress(std::move(input), stats);
452
+ } else {
453
+ throw InternalException("Type other than integral/string marked for decompression!");
454
+ }
455
+ }
456
+
457
+ unique_ptr<Expression> CompressedMaterialization::GetIntegralDecompress(unique_ptr<Expression> input,
458
+ const LogicalType &result_type,
459
+ const BaseStatistics &stats) {
460
+ D_ASSERT(NumericStats::HasMinMax(stats));
461
+ auto decompress_function = CMIntegralDecompressFun::GetFunction(input->return_type, result_type);
462
+ vector<unique_ptr<Expression>> arguments;
463
+ arguments.emplace_back(std::move(input));
464
+ arguments.emplace_back(make_uniq<BoundConstantExpression>(NumericStats::Min(stats)));
465
+ return make_uniq<BoundFunctionExpression>(result_type, decompress_function, std::move(arguments), nullptr);
466
+ }
467
+
468
+ unique_ptr<Expression> CompressedMaterialization::GetStringDecompress(unique_ptr<Expression> input,
469
+ const BaseStatistics &stats) {
470
+ D_ASSERT(StringStats::HasMaxStringLength(stats));
471
+ auto decompress_function = CMStringDecompressFun::GetFunction(input->return_type);
472
+ vector<unique_ptr<Expression>> arguments;
473
+ arguments.emplace_back(std::move(input));
474
+ return make_uniq<BoundFunctionExpression>(decompress_function.return_type, decompress_function,
475
+ std::move(arguments), nullptr);
476
+ }
477
+
478
+ } // namespace duckdb