duckdb 0.8.2-dev1.0 → 0.8.2-dev1182.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (385) hide show
  1. package/binding.gyp +16 -14
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/lib/duckdb.d.ts +59 -0
  6. package/lib/duckdb.js +21 -0
  7. package/package.json +1 -1
  8. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  10. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  11. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  12. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  13. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  14. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  15. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  16. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  17. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  18. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  19. package/src/duckdb/extension/icu/{icu-extension.cpp → icu_extension.cpp} +29 -34
  20. package/src/duckdb/extension/icu/include/{icu-extension.hpp → icu_extension.hpp} +2 -2
  21. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  22. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  23. package/src/duckdb/extension/json/include/{json-extension.hpp → json_extension.hpp} +2 -2
  24. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  25. package/src/duckdb/extension/json/json_common.cpp +272 -40
  26. package/src/duckdb/extension/json/{json-extension.cpp → json_extension.cpp} +4 -4
  27. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  28. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  29. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  30. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  31. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  32. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  33. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  34. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  35. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  36. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  37. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  38. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  39. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  40. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  41. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  42. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  43. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  44. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  45. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  46. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  47. package/src/duckdb/extension/parquet/{parquet-extension.cpp → parquet_extension.cpp} +190 -19
  48. package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -5
  49. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  50. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  51. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  52. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  53. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  54. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  55. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  56. package/src/duckdb/src/common/allocator.cpp +16 -4
  57. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -10
  58. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +0 -12
  59. package/src/duckdb/src/common/assert.cpp +3 -0
  60. package/src/duckdb/src/common/enum_util.cpp +42 -5
  61. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  62. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  63. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  64. package/src/duckdb/src/common/file_system.cpp +15 -0
  65. package/src/duckdb/src/common/local_file_system.cpp +1 -1
  66. package/src/duckdb/src/common/multi_file_reader.cpp +181 -18
  67. package/src/duckdb/src/common/radix_partitioning.cpp +27 -9
  68. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  69. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  70. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  71. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  72. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  73. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  74. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  75. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  76. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  77. package/src/duckdb/src/common/types/date.cpp +9 -0
  78. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  79. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  80. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  81. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  82. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  83. package/src/duckdb/src/common/types/vector.cpp +15 -14
  84. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +6 -4
  85. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  86. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  87. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  88. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  91. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  92. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  93. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +0 -17
  94. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  95. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  96. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  97. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  98. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  99. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  100. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  101. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  102. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  103. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  104. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  105. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  106. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  107. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  108. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  109. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  110. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  111. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  112. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  113. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  114. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +413 -282
  115. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  116. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  117. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  118. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
  119. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  120. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  121. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  122. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  123. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  124. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -2
  125. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  126. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  127. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  128. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  129. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  130. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  131. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  132. package/src/duckdb/src/execution/physical_operator.cpp +17 -14
  133. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  134. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  135. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  136. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  137. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  138. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  139. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  140. package/src/duckdb/src/function/function.cpp +2 -0
  141. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  142. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  143. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  144. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  145. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  146. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  147. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  148. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  149. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  150. package/src/duckdb/src/function/table/system/test_all_types.cpp +38 -18
  151. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  152. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  153. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  154. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  155. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  156. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  157. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  158. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  159. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  160. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  161. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  162. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  163. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  164. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  165. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  166. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  167. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  168. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  169. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  170. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  171. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  172. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  173. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  174. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  175. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  176. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  177. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  178. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  179. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  180. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  181. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  182. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  183. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  184. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  185. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +24 -6
  186. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  187. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  188. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  189. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  190. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  191. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  192. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  193. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  194. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  195. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  196. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  197. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  198. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +1 -1
  199. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  200. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  201. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  202. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  203. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  204. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  205. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  206. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  207. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +3 -0
  208. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  209. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  210. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  211. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  212. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  213. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  214. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  215. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  216. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  217. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  218. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +1 -1
  219. package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
  220. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  221. package/src/duckdb/src/include/duckdb/main/settings.hpp +21 -1
  222. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  223. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  224. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  225. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  226. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  227. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  228. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  229. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  230. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  231. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  232. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  233. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  234. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  235. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  236. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  237. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  238. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  239. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  240. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  241. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  242. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  243. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
  244. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  245. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  246. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  247. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  248. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  249. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  250. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  251. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  252. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  253. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  254. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  255. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  256. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  257. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  258. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  259. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  260. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  261. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  262. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  263. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  264. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  265. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  266. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  267. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  268. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  269. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  270. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  271. package/src/duckdb/src/include/duckdb.h +28 -0
  272. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  273. package/src/duckdb/src/main/config.cpp +2 -0
  274. package/src/duckdb/src/main/extension/extension_helper.cpp +106 -99
  275. package/src/duckdb/src/main/settings/settings.cpp +40 -18
  276. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  277. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  278. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  279. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  280. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  281. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  282. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  283. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  284. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  285. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  286. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  287. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  288. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  289. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  290. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  291. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  292. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  293. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  294. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  295. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  296. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  297. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  298. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  299. package/src/duckdb/src/parallel/executor.cpp +15 -0
  300. package/src/duckdb/src/parallel/pipeline_executor.cpp +7 -6
  301. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  302. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  303. package/src/duckdb/src/parser/expression/lambda_expression.cpp +1 -1
  304. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  305. package/src/duckdb/src/parser/query_node/cte_node.cpp +75 -0
  306. package/src/duckdb/src/parser/query_node.cpp +18 -1
  307. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -0
  308. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  309. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  310. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  311. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  312. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  313. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  314. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  315. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  316. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  317. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  318. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  319. package/src/duckdb/src/parser/transformer.cpp +15 -0
  320. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  321. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  322. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  323. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  324. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  325. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  326. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  327. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
  328. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  329. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  330. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  331. package/src/duckdb/src/planner/binder.cpp +5 -0
  332. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  333. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  334. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  335. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  336. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  337. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  338. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  339. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  340. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  341. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  342. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  343. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  344. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  345. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  346. package/src/duckdb/src/storage/data_table.cpp +1 -1
  347. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  348. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  349. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  350. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  351. package/src/duckdb/src/storage/storage_manager.cpp +7 -2
  352. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  353. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  354. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  355. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  356. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  357. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  358. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  359. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  360. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  361. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  362. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
  363. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  364. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  365. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  366. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  367. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  368. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  369. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  370. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  371. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  372. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  373. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  374. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  375. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  376. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  377. package/src/duckdb_node.hpp +1 -0
  378. package/src/statement.cpp +103 -4
  379. package/test/columns.test.ts +243 -0
  380. package/test/test_all_types.test.ts +233 -0
  381. package/tsconfig.json +1 -0
  382. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  383. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  384. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
  385. /package/src/duckdb/extension/parquet/include/{parquet-extension.hpp → parquet_extension.hpp} +0 -0
@@ -3,6 +3,8 @@
3
3
  #include "duckdb/execution/expression_executor.hpp"
4
4
  #include "duckdb/planner/expression/bound_function_expression.hpp"
5
5
  #include "duckdb/planner/expression/bound_constant_expression.hpp"
6
+ #include "duckdb/function/scalar/string_functions.hpp"
7
+ #include "duckdb/function/scalar/regexp.hpp"
6
8
 
7
9
  #include "re2/re2.h"
8
10
  #include "re2/regexp.h"
@@ -12,17 +14,138 @@ namespace duckdb {
12
14
  RegexOptimizationRule::RegexOptimizationRule(ExpressionRewriter &rewriter) : Rule(rewriter) {
13
15
  auto func = make_uniq<FunctionExpressionMatcher>();
14
16
  func->function = make_uniq<SpecificFunctionMatcher>("regexp_matches");
15
- func->policy = SetMatcher::Policy::ORDERED;
17
+ func->policy = SetMatcher::Policy::SOME_ORDERED;
16
18
  func->matchers.push_back(make_uniq<ExpressionMatcher>());
17
19
  func->matchers.push_back(make_uniq<ConstantExpressionMatcher>());
20
+
18
21
  root = std::move(func);
19
22
  }
20
23
 
24
+ struct LikeString {
25
+ bool exists = true;
26
+ bool escaped = false;
27
+ string like_string = "";
28
+ };
29
+
30
+ static void AddCharacter(char chr, LikeString &ret, bool contains) {
31
+ // if we are not converting into a contains, and the string has LIKE special characters
32
+ // then don't return a possible LIKE match
33
+ // same if the character is a control character
34
+ if (iscntrl(chr) || (!contains && (chr == '%' || chr == '_'))) {
35
+ ret.exists = false;
36
+ return;
37
+ }
38
+ auto run_as_str {chr};
39
+ ret.like_string += run_as_str;
40
+ }
41
+
42
+ static LikeString GetLikeStringEscaped(duckdb_re2::Regexp *regexp, bool contains = false) {
43
+ D_ASSERT(regexp->op() == duckdb_re2::kRegexpLiteralString || regexp->op() == duckdb_re2::kRegexpLiteral);
44
+ LikeString ret;
45
+
46
+ if (regexp->parse_flags() & duckdb_re2::Regexp::FoldCase ||
47
+ !(regexp->parse_flags() & duckdb_re2::Regexp::OneLine)) {
48
+ // parse flags can turn on and off within a regex match, return no optimization
49
+ // For now, we just don't optimize if these every turn on.
50
+ // TODO: logic to attempt the optimization, then if the parse flags change, then abort
51
+ ret.exists = false;
52
+ return ret;
53
+ }
54
+
55
+ // case insensitivity may be on now, but it can also turn off.
56
+ if (regexp->op() == duckdb_re2::kRegexpLiteralString) {
57
+ auto nrunes = (idx_t)regexp->nrunes();
58
+ auto runes = regexp->runes();
59
+ for (idx_t i = 0; i < nrunes; i++) {
60
+ char chr = toascii(runes[i]);
61
+ AddCharacter(chr, ret, contains);
62
+ if (!ret.exists) {
63
+ return ret;
64
+ }
65
+ }
66
+ } else {
67
+ auto rune = regexp->rune();
68
+ char chr = toascii(rune);
69
+ AddCharacter(chr, ret, contains);
70
+ }
71
+ D_ASSERT(ret.like_string.size() >= 1 || !ret.exists);
72
+ return ret;
73
+ }
74
+
75
+ static LikeString LikeMatchFromRegex(duckdb_re2::RE2 &pattern) {
76
+ LikeString ret = LikeString();
77
+ auto num_subs = pattern.Regexp()->nsub();
78
+ auto subs = pattern.Regexp()->sub();
79
+ auto cur_sub_index = 0;
80
+ while (cur_sub_index < num_subs) {
81
+ switch (subs[cur_sub_index]->op()) {
82
+ case duckdb_re2::kRegexpAnyChar:
83
+ if (cur_sub_index == 0) {
84
+ ret.like_string += "%";
85
+ }
86
+ ret.like_string += "_";
87
+ if (cur_sub_index + 1 == num_subs) {
88
+ ret.like_string += "%";
89
+ }
90
+ break;
91
+ case duckdb_re2::kRegexpStar:
92
+ // .* is a Star operator is a anyChar operator as a child.
93
+ // any other child operator would represent a pattern LIKE cannot match.
94
+ if (subs[cur_sub_index]->nsub() == 1 && subs[cur_sub_index]->sub()[0]->op() == duckdb_re2::kRegexpAnyChar) {
95
+ ret.like_string += "%";
96
+ break;
97
+ }
98
+ ret.exists = false;
99
+ return ret;
100
+ case duckdb_re2::kRegexpLiteralString:
101
+ case duckdb_re2::kRegexpLiteral: {
102
+ // if this is the only matching op, we should have directly called
103
+ // GetEscapedLikeString
104
+ D_ASSERT(!(cur_sub_index == 0 && cur_sub_index + 1 == num_subs));
105
+ if (cur_sub_index == 0) {
106
+ ret.like_string += "%";
107
+ }
108
+ // if the kRegexpLiteral or kRegexpLiteralString is the only op to match
109
+ // the string can directly be converted into a contains
110
+ LikeString escaped_like_string = GetLikeStringEscaped(subs[cur_sub_index], false);
111
+ if (!escaped_like_string.exists) {
112
+ return escaped_like_string;
113
+ }
114
+ ret.like_string += escaped_like_string.like_string;
115
+ ret.escaped = escaped_like_string.escaped;
116
+ if (cur_sub_index + 1 == num_subs) {
117
+ ret.like_string += "%";
118
+ }
119
+ break;
120
+ }
121
+ case duckdb_re2::kRegexpEndText:
122
+ case duckdb_re2::kRegexpEmptyMatch:
123
+ case duckdb_re2::kRegexpBeginText: {
124
+ break;
125
+ }
126
+ default:
127
+ // some other regexp op that doesn't have an equivalent to a like string
128
+ // return false;
129
+ ret.exists = false;
130
+ return ret;
131
+ }
132
+ cur_sub_index += 1;
133
+ }
134
+ return ret;
135
+ }
136
+
21
137
  unique_ptr<Expression> RegexOptimizationRule::Apply(LogicalOperator &op, vector<reference<Expression>> &bindings,
22
138
  bool &changes_made, bool is_root) {
23
139
  auto &root = bindings[0].get().Cast<BoundFunctionExpression>();
24
140
  auto &constant_expr = bindings[2].get().Cast<BoundConstantExpression>();
25
- D_ASSERT(root.children.size() == 2);
141
+ D_ASSERT(root.children.size() == 2 || root.children.size() == 3);
142
+ auto regexp_bind_data = root.bind_info.get()->Cast<RegexpMatchesBindData>();
143
+
144
+ auto constant_value = ExpressionExecutor::EvaluateScalar(GetContext(), constant_expr);
145
+ D_ASSERT(constant_value.type() == constant_expr.return_type);
146
+ auto patt_str = StringValue::Get(constant_value);
147
+
148
+ duckdb_re2::RE2::Options parsed_options = regexp_bind_data.options;
26
149
 
27
150
  if (constant_expr.value.IsNull()) {
28
151
  return make_uniq<BoundConstantExpression>(Value(root.return_type));
@@ -31,34 +154,50 @@ unique_ptr<Expression> RegexOptimizationRule::Apply(LogicalOperator &op, vector<
31
154
  // the constant_expr is a scalar expression that we have to fold
32
155
  if (!constant_expr.IsFoldable()) {
33
156
  return nullptr;
34
- }
157
+ };
35
158
 
36
- auto constant_value = ExpressionExecutor::EvaluateScalar(GetContext(), constant_expr);
37
- D_ASSERT(constant_value.type() == constant_expr.return_type);
38
- auto patt_str = StringValue::Get(constant_value);
39
-
40
- duckdb_re2::RE2 pattern(patt_str);
159
+ duckdb_re2::RE2 pattern(patt_str, parsed_options);
41
160
  if (!pattern.ok()) {
42
161
  return nullptr; // this should fail somewhere else
43
162
  }
44
163
 
164
+ LikeString like_string;
165
+ // check for a like string. If we can convert it to a like string, the like string
166
+ // optimizer will further optimize suffix and prefix things.
45
167
  if (pattern.Regexp()->op() == duckdb_re2::kRegexpLiteralString ||
46
168
  pattern.Regexp()->op() == duckdb_re2::kRegexpLiteral) {
47
-
48
- string min;
49
- string max;
50
- pattern.PossibleMatchRange(&min, &max, patt_str.size() + 1);
51
- if (min != max) {
169
+ // convert to contains.
170
+ LikeString escaped_like_string = GetLikeStringEscaped(pattern.Regexp(), true);
171
+ if (!escaped_like_string.exists) {
52
172
  return nullptr;
53
173
  }
54
- auto parameter = make_uniq<BoundConstantExpression>(Value(std::move(min)));
174
+ auto parameter = make_uniq<BoundConstantExpression>(Value(std::move(escaped_like_string.like_string)));
55
175
  auto contains = make_uniq<BoundFunctionExpression>(root.return_type, ContainsFun::GetFunction(),
56
176
  std::move(root.children), nullptr);
57
177
  contains->children[1] = std::move(parameter);
58
178
 
59
179
  return std::move(contains);
180
+ } else if (pattern.Regexp()->op() == duckdb_re2::kRegexpConcat) {
181
+ like_string = LikeMatchFromRegex(pattern);
182
+ } else {
183
+ like_string.exists = false;
184
+ }
185
+
186
+ if (!like_string.exists) {
187
+ return nullptr;
188
+ }
189
+
190
+ // if regexp had options, remove them so the new Like Expression can be matched for other optimizers.
191
+ if (root.children.size() == 3) {
192
+ root.children.pop_back();
193
+ D_ASSERT(root.children.size() == 2);
60
194
  }
61
- return nullptr;
195
+
196
+ auto like_expression = make_uniq<BoundFunctionExpression>(root.return_type, LikeFun::GetLikeFunction(),
197
+ std::move(root.children), nullptr);
198
+ auto parameter = make_uniq<BoundConstantExpression>(Value(std::move(like_string.like_string)));
199
+ like_expression->children[1] = std::move(parameter);
200
+ return std::move(like_expression);
62
201
  }
63
202
 
64
203
  } // namespace duckdb
@@ -1,9 +1,13 @@
1
1
  #include "duckdb/common/types/hugeint.hpp"
2
+ #include "duckdb/optimizer/filter_pushdown.hpp"
2
3
  #include "duckdb/optimizer/statistics_propagator.hpp"
3
4
  #include "duckdb/planner/expression/bound_columnref_expression.hpp"
5
+ #include "duckdb/planner/expression/bound_comparison_expression.hpp"
6
+ #include "duckdb/planner/expression/bound_constant_expression.hpp"
4
7
  #include "duckdb/planner/operator/logical_any_join.hpp"
5
8
  #include "duckdb/planner/operator/logical_comparison_join.hpp"
6
9
  #include "duckdb/planner/operator/logical_cross_product.hpp"
10
+ #include "duckdb/planner/operator/logical_filter.hpp"
7
11
  #include "duckdb/planner/operator/logical_join.hpp"
8
12
  #include "duckdb/planner/operator/logical_limit.hpp"
9
13
  #include "duckdb/planner/operator/logical_positional_join.hpp"
@@ -13,8 +17,8 @@ namespace duckdb {
13
17
  void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, unique_ptr<LogicalOperator> *node_ptr) {
14
18
  for (idx_t i = 0; i < join.conditions.size(); i++) {
15
19
  auto &condition = join.conditions[i];
16
- auto stats_left = PropagateExpression(condition.left);
17
- auto stats_right = PropagateExpression(condition.right);
20
+ const auto stats_left = PropagateExpression(condition.left);
21
+ const auto stats_right = PropagateExpression(condition.right);
18
22
  if (stats_left && stats_right) {
19
23
  if ((condition.comparison == ExpressionType::COMPARE_DISTINCT_FROM ||
20
24
  condition.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) &&
@@ -25,8 +29,8 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq
25
29
  }
26
30
  auto prune_result = PropagateComparison(*stats_left, *stats_right, condition.comparison);
27
31
  // Add stats to logical_join for perfect hash join
28
- join.join_stats.push_back(std::move(stats_left));
29
- join.join_stats.push_back(std::move(stats_right));
32
+ join.join_stats.push_back(stats_left->ToUnique());
33
+ join.join_stats.push_back(stats_right->ToUnique());
30
34
  switch (prune_result) {
31
35
  case FilterPropagateResult::FILTER_FALSE_OR_NULL:
32
36
  case FilterPropagateResult::FILTER_ALWAYS_FALSE:
@@ -123,12 +127,21 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq
123
127
  case JoinType::INNER:
124
128
  case JoinType::SEMI: {
125
129
  UpdateFilterStatistics(*condition.left, *condition.right, condition.comparison);
126
- auto stats_left = PropagateExpression(condition.left);
127
- auto stats_right = PropagateExpression(condition.right);
130
+ auto updated_stats_left = PropagateExpression(condition.left);
131
+ auto updated_stats_right = PropagateExpression(condition.right);
132
+
133
+ // Try to push lhs stats down rhs and vice versa
134
+ if (!context.config.force_index_join && stats_left && stats_right && updated_stats_left &&
135
+ updated_stats_right && condition.left->type == ExpressionType::BOUND_COLUMN_REF &&
136
+ condition.right->type == ExpressionType::BOUND_COLUMN_REF) {
137
+ CreateFilterFromJoinStats(join.children[0], condition.left, *stats_left, *updated_stats_left);
138
+ CreateFilterFromJoinStats(join.children[1], condition.right, *stats_right, *updated_stats_right);
139
+ }
140
+
128
141
  // Update join_stats when is already part of the join
129
142
  if (join.join_stats.size() == 2) {
130
- join.join_stats[0] = std::move(stats_left);
131
- join.join_stats[1] = std::move(stats_right);
143
+ join.join_stats[0] = std::move(updated_stats_left);
144
+ join.join_stats[1] = std::move(updated_stats_right);
132
145
  }
133
146
  break;
134
147
  }
@@ -281,4 +294,48 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalPosi
281
294
  return std::move(node_stats);
282
295
  }
283
296
 
297
+ void StatisticsPropagator::CreateFilterFromJoinStats(unique_ptr<LogicalOperator> &child, unique_ptr<Expression> &expr,
298
+ const BaseStatistics &stats_before,
299
+ const BaseStatistics &stats_after) {
300
+ // Only do this for integral colref's that have stats
301
+ if (expr->type != ExpressionType::BOUND_COLUMN_REF || !expr->return_type.IsIntegral() ||
302
+ !NumericStats::HasMinMax(stats_before) || !NumericStats::HasMinMax(stats_after)) {
303
+ return;
304
+ }
305
+
306
+ // Retrieve min/max
307
+ auto min_before = NumericStats::Min(stats_before);
308
+ auto max_before = NumericStats::Max(stats_before);
309
+ auto min_after = NumericStats::Min(stats_after);
310
+ auto max_after = NumericStats::Max(stats_after);
311
+
312
+ vector<unique_ptr<Expression>> filter_exprs;
313
+ if (min_after > min_before) {
314
+ filter_exprs.emplace_back(
315
+ make_uniq<BoundComparisonExpression>(ExpressionType::COMPARE_GREATERTHANOREQUALTO, expr->Copy(),
316
+ make_uniq<BoundConstantExpression>(std::move(min_after))));
317
+ }
318
+ if (max_after < max_before) {
319
+ filter_exprs.emplace_back(
320
+ make_uniq<BoundComparisonExpression>(ExpressionType::COMPARE_LESSTHANOREQUALTO, expr->Copy(),
321
+ make_uniq<BoundConstantExpression>(std::move(max_after))));
322
+ }
323
+
324
+ if (filter_exprs.empty()) {
325
+ return;
326
+ }
327
+
328
+ auto filter = make_uniq<LogicalFilter>();
329
+ filter->children.emplace_back(std::move(child));
330
+ child = std::move(filter);
331
+
332
+ for (auto &filter_expr : filter_exprs) {
333
+ child->expressions.emplace_back(std::move(filter_expr));
334
+ }
335
+
336
+ FilterPushdown filter_pushdown(optimizer);
337
+ child = filter_pushdown.Rewrite(std::move(child));
338
+ PropagateExpression(expr);
339
+ }
340
+
284
341
  } // namespace duckdb
@@ -11,7 +11,7 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalOrde
11
11
 
12
12
  // then propagate to each of the order expressions
13
13
  for (auto &bound_order : order.orders) {
14
- PropagateAndCompress(bound_order.expression, bound_order.stats);
14
+ bound_order.stats = PropagateExpression(bound_order.expression);
15
15
  }
16
16
  return std::move(node_stats);
17
17
  }
@@ -1,24 +1,26 @@
1
1
  #include "duckdb/optimizer/statistics_propagator.hpp"
2
2
 
3
3
  #include "duckdb/main/client_context.hpp"
4
+ #include "duckdb/optimizer/optimizer.hpp"
5
+ #include "duckdb/planner/expression/list.hpp"
4
6
  #include "duckdb/planner/expression_iterator.hpp"
5
7
  #include "duckdb/planner/logical_operator.hpp"
6
8
  #include "duckdb/planner/operator/logical_aggregate.hpp"
7
- #include "duckdb/planner/operator/logical_empty_result.hpp"
8
9
  #include "duckdb/planner/operator/logical_cross_product.hpp"
10
+ #include "duckdb/planner/operator/logical_empty_result.hpp"
9
11
  #include "duckdb/planner/operator/logical_filter.hpp"
10
12
  #include "duckdb/planner/operator/logical_get.hpp"
11
13
  #include "duckdb/planner/operator/logical_join.hpp"
12
- #include "duckdb/planner/operator/logical_projection.hpp"
14
+ #include "duckdb/planner/operator/logical_order.hpp"
13
15
  #include "duckdb/planner/operator/logical_positional_join.hpp"
16
+ #include "duckdb/planner/operator/logical_projection.hpp"
14
17
  #include "duckdb/planner/operator/logical_set_operation.hpp"
15
- #include "duckdb/planner/operator/logical_order.hpp"
16
18
  #include "duckdb/planner/operator/logical_window.hpp"
17
- #include "duckdb/planner/expression/list.hpp"
18
19
 
19
20
  namespace duckdb {
20
21
 
21
- StatisticsPropagator::StatisticsPropagator(ClientContext &context) : context(context) {
22
+ StatisticsPropagator::StatisticsPropagator(Optimizer &optimizer_p)
23
+ : optimizer(optimizer_p), context(optimizer.context) {
22
24
  }
23
25
 
24
26
  void StatisticsPropagator::ReplaceWithEmptyResult(unique_ptr<LogicalOperator> &node) {
@@ -1,24 +1,34 @@
1
1
  #include "duckdb/optimizer/topn_optimizer.hpp"
2
- #include "duckdb/planner/operator/logical_order.hpp"
2
+
3
+ #include "duckdb/common/limits.hpp"
3
4
  #include "duckdb/planner/operator/logical_limit.hpp"
5
+ #include "duckdb/planner/operator/logical_order.hpp"
4
6
  #include "duckdb/planner/operator/logical_top_n.hpp"
5
- #include "duckdb/common/limits.hpp"
6
7
 
7
8
  namespace duckdb {
8
9
 
9
- unique_ptr<LogicalOperator> TopN::Optimize(unique_ptr<LogicalOperator> op) {
10
- if (op->type == LogicalOperatorType::LOGICAL_LIMIT &&
11
- op->children[0]->type == LogicalOperatorType::LOGICAL_ORDER_BY) {
12
- auto &limit = op->Cast<LogicalLimit>();
13
- auto &order_by = (op->children[0])->Cast<LogicalOrder>();
10
+ bool TopN::CanOptimize(LogicalOperator &op) {
11
+ if (op.type == LogicalOperatorType::LOGICAL_LIMIT &&
12
+ op.children[0]->type == LogicalOperatorType::LOGICAL_ORDER_BY) {
13
+ auto &limit = op.Cast<LogicalLimit>();
14
14
 
15
15
  // This optimization doesn't apply when OFFSET is present without LIMIT
16
16
  // Or if offset is not constant
17
17
  if (limit.limit_val != NumericLimits<int64_t>::Maximum() || limit.offset) {
18
- auto topn = make_uniq<LogicalTopN>(std::move(order_by.orders), limit.limit_val, limit.offset_val);
19
- topn->AddChild(std::move(order_by.children[0]));
20
- op = std::move(topn);
18
+ return true;
21
19
  }
20
+ }
21
+ return false;
22
+ }
23
+
24
+ unique_ptr<LogicalOperator> TopN::Optimize(unique_ptr<LogicalOperator> op) {
25
+ if (CanOptimize(*op)) {
26
+ auto &limit = op->Cast<LogicalLimit>();
27
+ auto &order_by = (op->children[0])->Cast<LogicalOrder>();
28
+
29
+ auto topn = make_uniq<LogicalTopN>(std::move(order_by.orders), limit.limit_val, limit.offset_val);
30
+ topn->AddChild(std::move(order_by.children[0]));
31
+ op = std::move(topn);
22
32
  } else {
23
33
  for (auto &child : op->children) {
24
34
  child = Optimize(std::move(child));
@@ -3,6 +3,7 @@
3
3
  #include "duckdb/execution/execution_context.hpp"
4
4
  #include "duckdb/execution/operator/helper/physical_result_collector.hpp"
5
5
  #include "duckdb/execution/operator/set/physical_recursive_cte.hpp"
6
+ #include "duckdb/execution/operator/set/physical_cte.hpp"
6
7
  #include "duckdb/execution/physical_operator.hpp"
7
8
  #include "duckdb/main/client_context.hpp"
8
9
  #include "duckdb/main/client_data.hpp"
@@ -267,6 +268,10 @@ void Executor::AddRecursiveCTE(PhysicalOperator &rec_cte) {
267
268
  recursive_ctes.push_back(rec_cte);
268
269
  }
269
270
 
271
+ void Executor::AddMaterializedCTE(PhysicalOperator &mat_cte) {
272
+ materialized_ctes.push_back(mat_cte);
273
+ }
274
+
270
275
  void Executor::ReschedulePipelines(const vector<shared_ptr<MetaPipeline>> &pipelines_p,
271
276
  vector<shared_ptr<Event>> &events_p) {
272
277
  ScheduleEventData event_data(pipelines_p, events_p, false);
@@ -344,6 +349,12 @@ void Executor::InitializeInternal(PhysicalOperator &plan) {
344
349
  rec_cte.recursive_meta_pipeline->Ready();
345
350
  }
346
351
 
352
+ // ready materialized cte pipelines too
353
+ for (auto &mat_cte_ref : materialized_ctes) {
354
+ auto &mat_cte = mat_cte_ref.get().Cast<PhysicalCTE>();
355
+ mat_cte.recursive_meta_pipeline->Ready();
356
+ }
357
+
347
358
  // set root pipelines, i.e., all pipelines that end in the final sink
348
359
  root_pipeline->GetPipelines(root_pipelines, false);
349
360
  root_pipeline_idx = 0;
@@ -381,6 +392,10 @@ void Executor::CancelTasks() {
381
392
  auto &rec_cte = rec_cte_ref.get().Cast<PhysicalRecursiveCTE>();
382
393
  rec_cte.recursive_meta_pipeline.reset();
383
394
  }
395
+ for (auto &mat_cte_ref : materialized_ctes) {
396
+ auto &mat_cte = mat_cte_ref.get().Cast<PhysicalCTE>();
397
+ mat_cte.recursive_meta_pipeline.reset();
398
+ }
384
399
  pipelines.clear();
385
400
  root_pipelines.clear();
386
401
  to_be_rescheduled_tasks.clear();
@@ -17,11 +17,10 @@ PipelineExecutor::PipelineExecutor(ClientContext &context_p, Pipeline &pipeline_
17
17
  requires_batch_index = pipeline.sink->RequiresBatchIndex() && pipeline.source->SupportsBatchIndex();
18
18
  if (requires_batch_index) {
19
19
  auto &partition_info = local_sink_state->partition_info;
20
- if (!partition_info.batch_index.IsValid()) {
21
- // batch index is not set yet - initialize before fetching anything
22
- partition_info.batch_index = pipeline.RegisterNewBatchIndex();
23
- partition_info.min_batch_index = partition_info.batch_index;
24
- }
20
+ D_ASSERT(!partition_info.batch_index.IsValid());
21
+ // batch index is not set yet - initialize before fetching anything
22
+ partition_info.batch_index = pipeline.RegisterNewBatchIndex();
23
+ partition_info.min_batch_index = partition_info.batch_index;
25
24
  }
26
25
  }
27
26
  local_source_state = pipeline.source->GetLocalSourceState(context, *pipeline.source_state);
@@ -79,6 +78,7 @@ bool PipelineExecutor::TryFlushCachingOperators() {
79
78
  OperatorResultType push_result;
80
79
 
81
80
  if (in_process_operators.empty()) {
81
+ curr_chunk.Reset();
82
82
  StartOperator(current_operator);
83
83
  finalize_result = current_operator.FinalExecute(context, curr_chunk, *current_operator.op_state,
84
84
  *intermediate_states[flushing_idx]);
@@ -477,7 +477,8 @@ SourceResultType PipelineExecutor::FetchFromSource(DataChunk &result) {
477
477
  } else {
478
478
  next_batch_index =
479
479
  pipeline.source->GetBatchIndex(context, result, *pipeline.source_state, *local_source_state);
480
- next_batch_index += pipeline.base_batch_index;
480
+ // we start with the base_batch_index as a valid starting value. Make sure that next batch is called below
481
+ next_batch_index += pipeline.base_batch_index + 1;
481
482
  }
482
483
  auto &partition_info = local_sink_state->partition_info;
483
484
  if (next_batch_index != partition_info.batch_index.GetIndex()) {
@@ -1,13 +1,14 @@
1
1
  #include "duckdb/parallel/task_scheduler.hpp"
2
2
 
3
+ #include "duckdb/common/chrono.hpp"
3
4
  #include "duckdb/common/exception.hpp"
4
5
  #include "duckdb/main/client_context.hpp"
5
6
  #include "duckdb/main/database.hpp"
6
7
 
7
8
  #ifndef DUCKDB_NO_THREADS
8
9
  #include "concurrentqueue.h"
9
- #include "lightweightsemaphore.h"
10
10
  #include "duckdb/common/thread.hpp"
11
+ #include "lightweightsemaphore.h"
11
12
  #else
12
13
  #include <queue>
13
14
  #endif
@@ -93,7 +94,9 @@ ProducerToken::ProducerToken(TaskScheduler &scheduler, unique_ptr<QueueProducerT
93
94
  ProducerToken::~ProducerToken() {
94
95
  }
95
96
 
96
- TaskScheduler::TaskScheduler(DatabaseInstance &db) : db(db), queue(make_uniq<ConcurrentQueue>()) {
97
+ TaskScheduler::TaskScheduler(DatabaseInstance &db)
98
+ : db(db), queue(make_uniq<ConcurrentQueue>()),
99
+ allocator_flush_threshold(db.config.options.allocator_flush_threshold) {
97
100
  }
98
101
 
99
102
  TaskScheduler::~TaskScheduler() {
@@ -146,6 +149,9 @@ void TaskScheduler::ExecuteForever(atomic<bool> *marker) {
146
149
  task.reset();
147
150
  break;
148
151
  }
152
+
153
+ // Flushes the outstanding allocator's outstanding allocations
154
+ Allocator::ThreadFlush(allocator_flush_threshold);
149
155
  }
150
156
  }
151
157
  #else
@@ -241,6 +247,9 @@ void TaskScheduler::SetThreads(int32_t n) {
241
247
  #endif
242
248
  }
243
249
 
250
+ void TaskScheduler::SetAllocatorFlushTreshold(idx_t threshold) {
251
+ }
252
+
244
253
  void TaskScheduler::Signal(idx_t n) {
245
254
  #ifndef DUCKDB_NO_THREADS
246
255
  queue->semaphore.signal(n);
@@ -7,12 +7,14 @@ namespace duckdb {
7
7
  void CommonTableExpressionInfo::FormatSerialize(FormatSerializer &serializer) const {
8
8
  serializer.WriteProperty("aliases", aliases);
9
9
  serializer.WriteProperty("query", query);
10
+ serializer.WriteProperty("materialized", materialized);
10
11
  }
11
12
 
12
13
  unique_ptr<CommonTableExpressionInfo> CommonTableExpressionInfo::FormatDeserialize(FormatDeserializer &deserializer) {
13
14
  auto result = make_uniq<CommonTableExpressionInfo>();
14
15
  result->aliases = deserializer.ReadProperty<vector<string>>("aliases");
15
16
  result->query = deserializer.ReadProperty<unique_ptr<SelectStatement>>("query");
17
+ result->materialized = deserializer.ReadProperty<CTEMaterialize>("materialized");
16
18
  return result;
17
19
  }
18
20
 
@@ -13,7 +13,7 @@ LambdaExpression::LambdaExpression(unique_ptr<ParsedExpression> lhs, unique_ptr<
13
13
  }
14
14
 
15
15
  string LambdaExpression::ToString() const {
16
- return lhs->ToString() + " -> " + expr->ToString();
16
+ return "(" + lhs->ToString() + " -> " + expr->ToString() + ")";
17
17
  }
18
18
 
19
19
  bool LambdaExpression::Equal(const LambdaExpression &a, const LambdaExpression &b) {
@@ -3,6 +3,7 @@
3
3
  #include "duckdb/parser/expression/list.hpp"
4
4
  #include "duckdb/parser/query_node.hpp"
5
5
  #include "duckdb/parser/query_node/recursive_cte_node.hpp"
6
+ #include "duckdb/parser/query_node/cte_node.hpp"
6
7
  #include "duckdb/parser/query_node/select_node.hpp"
7
8
  #include "duckdb/parser/query_node/set_operation_node.hpp"
8
9
  #include "duckdb/parser/tableref/list.hpp"
@@ -256,6 +257,12 @@ void ParsedExpressionIterator::EnumerateQueryNodeChildren(
256
257
  EnumerateQueryNodeChildren(*rcte_node.right, callback);
257
258
  break;
258
259
  }
260
+ case QueryNodeType::CTE_NODE: {
261
+ auto &cte_node = node.Cast<CTENode>();
262
+ EnumerateQueryNodeChildren(*cte_node.query, callback);
263
+ EnumerateQueryNodeChildren(*cte_node.child, callback);
264
+ break;
265
+ }
259
266
  case QueryNodeType::SELECT_NODE: {
260
267
  auto &sel_node = node.Cast<SelectNode>();
261
268
  for (idx_t i = 0; i < sel_node.select_list.size(); i++) {
@@ -0,0 +1,75 @@
1
+ #include "duckdb/parser/query_node/cte_node.hpp"
2
+ #include "duckdb/common/field_writer.hpp"
3
+ #include "duckdb/common/serializer/format_serializer.hpp"
4
+ #include "duckdb/common/serializer/format_deserializer.hpp"
5
+
6
+ namespace duckdb {
7
+
8
+ string CTENode::ToString() const {
9
+ string result;
10
+ result += child->ToString();
11
+ return result;
12
+ }
13
+
14
+ bool CTENode::Equals(const QueryNode *other_p) const {
15
+ if (!QueryNode::Equals(other_p)) {
16
+ return false;
17
+ }
18
+ if (this == other_p) {
19
+ return true;
20
+ }
21
+ auto &other = other_p->Cast<CTENode>();
22
+
23
+ if (!query->Equals(other.query.get())) {
24
+ return false;
25
+ }
26
+ if (!child->Equals(other.child.get())) {
27
+ return false;
28
+ }
29
+ return true;
30
+ }
31
+
32
+ unique_ptr<QueryNode> CTENode::Copy() const {
33
+ auto result = make_uniq<CTENode>();
34
+ result->ctename = ctename;
35
+ result->query = query->Copy();
36
+ result->child = child->Copy();
37
+ result->aliases = aliases;
38
+ this->CopyProperties(*result);
39
+ return std::move(result);
40
+ }
41
+
42
+ void CTENode::Serialize(FieldWriter &writer) const {
43
+ writer.WriteString(ctename);
44
+ writer.WriteSerializable(*query);
45
+ writer.WriteSerializable(*child);
46
+ writer.WriteList<string>(aliases);
47
+ }
48
+
49
+ unique_ptr<QueryNode> CTENode::Deserialize(FieldReader &reader) {
50
+ auto result = make_uniq<CTENode>();
51
+ result->ctename = reader.ReadRequired<string>();
52
+ result->query = reader.ReadRequiredSerializable<QueryNode>();
53
+ result->child = reader.ReadRequiredSerializable<QueryNode>();
54
+ result->aliases = reader.ReadRequiredList<string>();
55
+ return std::move(result);
56
+ }
57
+
58
+ void CTENode::FormatSerialize(FormatSerializer &serializer) const {
59
+ QueryNode::FormatSerialize(serializer);
60
+ serializer.WriteProperty("cte_name", ctename);
61
+ serializer.WriteProperty("query", *query);
62
+ serializer.WriteProperty("child", *child);
63
+ serializer.WriteProperty("aliases", aliases);
64
+ }
65
+
66
+ unique_ptr<QueryNode> CTENode::FormatDeserialize(FormatDeserializer &deserializer) {
67
+ auto result = make_uniq<CTENode>();
68
+ deserializer.ReadProperty("cte_name", result->ctename);
69
+ deserializer.ReadProperty("query", result->query);
70
+ deserializer.ReadProperty("child", result->child);
71
+ deserializer.ReadProperty("aliases", result->aliases);
72
+ return std::move(result);
73
+ }
74
+
75
+ } // namespace duckdb