duckdb 0.8.2-dev150.0 → 0.8.2-dev1549.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (489) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  24. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  26. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  27. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  28. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  29. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  30. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  31. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  32. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  33. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  34. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  36. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  38. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  39. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  40. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  41. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  42. package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
  43. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  44. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  45. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  46. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  47. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  49. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  50. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  51. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  52. package/src/duckdb/src/common/allocator.cpp +14 -2
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  54. package/src/duckdb/src/common/assert.cpp +3 -0
  55. package/src/duckdb/src/common/enum_util.cpp +4619 -4446
  56. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  57. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  58. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/exception.cpp +2 -2
  60. package/src/duckdb/src/common/extra_type_info.cpp +506 -0
  61. package/src/duckdb/src/common/file_system.cpp +19 -0
  62. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  63. package/src/duckdb/src/common/local_file_system.cpp +14 -14
  64. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  65. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  66. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  67. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  68. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  69. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  70. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  71. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  72. package/src/duckdb/src/common/types/bit.cpp +51 -0
  73. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  74. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  75. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  76. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  77. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  78. package/src/duckdb/src/common/types/date.cpp +9 -0
  79. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  80. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  81. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  82. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  83. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  84. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  85. package/src/duckdb/src/common/types.cpp +8 -655
  86. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  87. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  88. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  92. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  93. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  94. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  95. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  96. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  97. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  98. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  99. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  100. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  102. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  103. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  104. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  105. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  106. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  107. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  108. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  109. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  110. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  111. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  112. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  113. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  114. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  115. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  116. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  117. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  118. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
  119. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  121. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
  122. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  123. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
  124. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  125. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  126. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  127. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  128. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  129. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  130. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  131. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  132. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  133. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  134. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  135. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  136. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  137. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  138. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  139. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  140. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  141. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +13 -22
  142. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
  143. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  144. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  145. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  146. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  147. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  148. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  149. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  150. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  151. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  152. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  153. package/src/duckdb/src/function/function.cpp +3 -1
  154. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  155. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  156. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  157. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  158. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  159. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  160. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  161. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  162. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  163. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  164. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  165. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  166. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  167. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  168. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  169. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
  170. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  171. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  172. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  173. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  174. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  175. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  176. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
  177. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  178. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  179. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  180. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  181. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  182. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  183. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  184. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  185. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  186. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  187. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  188. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  189. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  190. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  191. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  192. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  193. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  194. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  195. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  196. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  197. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  198. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  199. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  200. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  201. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
  202. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  203. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  204. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  205. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  206. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  207. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  208. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  209. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  210. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  211. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  212. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  213. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  214. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  215. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  218. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  219. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  220. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  221. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  222. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  223. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  224. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  225. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  226. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  227. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  228. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  229. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  230. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  231. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  232. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  233. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  234. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  235. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  236. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  237. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  238. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  239. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  240. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  241. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  242. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  243. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  244. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  245. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  246. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  247. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  248. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  249. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  250. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  251. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  252. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  253. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  254. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  255. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  256. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  257. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  258. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  259. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  260. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  261. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  262. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  263. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  264. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  265. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  266. package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
  267. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  268. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  269. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  270. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  271. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  272. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  273. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  274. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  275. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  276. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  277. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  278. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  279. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  280. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  281. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  282. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  283. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  284. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  285. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  286. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  287. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  288. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  289. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  291. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  293. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  294. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  295. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  296. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  297. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  298. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  299. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  300. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  301. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  302. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  303. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
  304. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  305. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  306. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  307. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  308. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  309. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  310. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  311. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  312. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  313. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  314. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  315. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  316. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  317. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  318. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  319. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  320. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  321. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  322. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  323. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  324. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  325. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  326. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  327. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  328. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  329. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  330. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  331. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  332. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  333. package/src/duckdb/src/include/duckdb.h +28 -0
  334. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  335. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  336. package/src/duckdb/src/main/config.cpp +4 -0
  337. package/src/duckdb/src/main/database.cpp +1 -1
  338. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  339. package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
  340. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  341. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  342. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  343. package/src/duckdb/src/main/relation.cpp +6 -5
  344. package/src/duckdb/src/main/settings/settings.cpp +79 -18
  345. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  346. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  347. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  348. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  349. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  350. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  351. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  352. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  353. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  354. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  355. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  356. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  357. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  358. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  359. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  360. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  361. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  362. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  363. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  364. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  365. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  366. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  367. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  368. package/src/duckdb/src/parallel/executor.cpp +15 -0
  369. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  370. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  371. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  372. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  373. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  374. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  375. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  376. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  377. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  378. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  379. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  380. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  381. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  382. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  383. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  384. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  385. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  386. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  387. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  388. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  389. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  390. package/src/duckdb/src/parser/parser.cpp +8 -2
  391. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  392. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  393. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  394. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  395. package/src/duckdb/src/parser/query_node.cpp +15 -37
  396. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  397. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  398. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  399. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  400. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  401. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  402. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  403. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  404. package/src/duckdb/src/parser/tableref.cpp +0 -44
  405. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  406. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  407. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  408. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  409. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  410. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  411. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  412. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  413. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  414. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  415. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  416. package/src/duckdb/src/parser/transformer.cpp +15 -0
  417. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  418. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  419. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  420. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  421. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  422. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  423. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  424. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  425. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  426. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  427. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  428. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  429. package/src/duckdb/src/planner/binder.cpp +5 -0
  430. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  431. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  432. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  433. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  434. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  435. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  436. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  437. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  438. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  439. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  440. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  441. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  442. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  443. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  444. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  445. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  446. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  447. package/src/duckdb/src/storage/data_table.cpp +1 -1
  448. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  449. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  450. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  451. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  452. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  453. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  454. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  455. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  456. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  457. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  458. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  459. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  460. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  461. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  462. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  463. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  464. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  465. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  466. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  467. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  468. package/src/duckdb/ub_src_common.cpp +2 -0
  469. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  470. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  471. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  472. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  473. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  474. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  475. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  476. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  477. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  478. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  479. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  480. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  481. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  482. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  483. package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
  484. package/src/statement.cpp +10 -3
  485. package/test/test_all_types.test.ts +233 -0
  486. package/tsconfig.json +1 -0
  487. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  488. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  489. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -13,6 +13,8 @@
13
13
  #include "duckdb/parallel/event.hpp"
14
14
  #include "duckdb/parallel/thread_context.hpp"
15
15
 
16
+ #include <thread>
17
+
16
18
  namespace duckdb {
17
19
 
18
20
  PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> left,
@@ -67,21 +69,32 @@ PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<Physica
67
69
  class AsOfGlobalSinkState : public GlobalSinkState {
68
70
  public:
69
71
  AsOfGlobalSinkState(ClientContext &context, const PhysicalAsOfJoin &op)
70
- : global_partition(context, op.rhs_partitions, op.rhs_orders, op.children[1]->types, {},
71
- op.estimated_cardinality),
72
+ : rhs_sink(context, op.rhs_partitions, op.rhs_orders, op.children[1]->types, {}, op.estimated_cardinality),
72
73
  is_outer(IsRightOuterJoin(op.join_type)), has_null(false) {
73
74
  }
74
75
 
75
76
  idx_t Count() const {
76
- return global_partition.count;
77
+ return rhs_sink.count;
78
+ }
79
+
80
+ PartitionLocalSinkState *RegisterBuffer(ClientContext &context) {
81
+ lock_guard<mutex> guard(lock);
82
+ lhs_buffers.emplace_back(make_uniq<PartitionLocalSinkState>(context, *lhs_sink));
83
+ return lhs_buffers.back().get();
77
84
  }
78
85
 
79
- PartitionGlobalSinkState global_partition;
86
+ PartitionGlobalSinkState rhs_sink;
80
87
 
81
88
  // One per partition
82
89
  const bool is_outer;
83
90
  vector<OuterJoinMarker> right_outers;
84
91
  bool has_null;
92
+
93
+ // Left side buffering
94
+ unique_ptr<PartitionGlobalSinkState> lhs_sink;
95
+
96
+ mutex lock;
97
+ vector<unique_ptr<PartitionLocalSinkState>> lhs_buffers;
85
98
  };
86
99
 
87
100
  class AsOfLocalSinkState : public LocalSinkState {
@@ -108,7 +121,7 @@ unique_ptr<GlobalSinkState> PhysicalAsOfJoin::GetGlobalSinkState(ClientContext &
108
121
  unique_ptr<LocalSinkState> PhysicalAsOfJoin::GetLocalSinkState(ExecutionContext &context) const {
109
122
  // We only sink the RHS
110
123
  auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
111
- return make_uniq<AsOfLocalSinkState>(context.client, gsink.global_partition);
124
+ return make_uniq<AsOfLocalSinkState>(context.client, gsink.rhs_sink);
112
125
  }
113
126
 
114
127
  SinkResultType PhysicalAsOfJoin::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
@@ -131,15 +144,21 @@ SinkFinalizeType PhysicalAsOfJoin::Finalize(Pipeline &pipeline, Event &event, Cl
131
144
  GlobalSinkState &gstate_p) const {
132
145
  auto &gstate = gstate_p.Cast<AsOfGlobalSinkState>();
133
146
 
147
+ // The data is all in so we can initialise the left partitioning.
148
+ const vector<unique_ptr<BaseStatistics>> partitions_stats;
149
+ gstate.lhs_sink = make_uniq<PartitionGlobalSinkState>(context, lhs_partitions, lhs_orders, children[0]->types,
150
+ partitions_stats, 0);
151
+ gstate.lhs_sink->SyncPartitioning(gstate.rhs_sink);
152
+
134
153
  // Find the first group to sort
135
- auto &groups = gstate.global_partition.grouping_data->GetPartitions();
154
+ auto &groups = gstate.rhs_sink.grouping_data->GetPartitions();
136
155
  if (groups.empty() && EmptyResultIfRHSIsEmpty()) {
137
156
  // Empty input!
138
157
  return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
139
158
  }
140
159
 
141
160
  // Schedule all the sorts for maximum thread utilisation
142
- auto new_event = make_shared<PartitionMergeEvent>(gstate.global_partition, pipeline);
161
+ auto new_event = make_shared<PartitionMergeEvent>(gstate.rhs_sink, pipeline);
143
162
  event.InsertEvent(std::move(new_event));
144
163
 
145
164
  return SinkFinalizeType::READY;
@@ -152,10 +171,10 @@ class AsOfGlobalState : public GlobalOperatorState {
152
171
  public:
153
172
  explicit AsOfGlobalState(AsOfGlobalSinkState &gsink) {
154
173
  // for FULL/RIGHT OUTER JOIN, initialize right_outers to false for every tuple
155
- auto &global_partition = gsink.global_partition;
174
+ auto &rhs_partition = gsink.rhs_sink;
156
175
  auto &right_outers = gsink.right_outers;
157
- right_outers.reserve(global_partition.hash_groups.size());
158
- for (const auto &hash_group : global_partition.hash_groups) {
176
+ right_outers.reserve(rhs_partition.hash_groups.size());
177
+ for (const auto &hash_group : rhs_partition.hash_groups) {
159
178
  right_outers.emplace_back(OuterJoinMarker(gsink.is_outer));
160
179
  right_outers.back().Initialize(hash_group->count);
161
180
  }
@@ -169,79 +188,47 @@ unique_ptr<GlobalOperatorState> PhysicalAsOfJoin::GetGlobalOperatorState(ClientC
169
188
 
170
189
  class AsOfLocalState : public CachingOperatorState {
171
190
  public:
172
- using Orders = vector<BoundOrderByNode>;
173
- using Match = std::pair<hash_t, idx_t>;
191
+ AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op)
192
+ : context(context), allocator(Allocator::Get(context)), op(op), lhs_executor(context),
193
+ left_outer(IsLeftOuterJoin(op.join_type)), fetch_next_left(true) {
194
+ lhs_keys.Initialize(allocator, op.join_key_types);
195
+ for (const auto &cond : op.conditions) {
196
+ lhs_executor.AddExpression(*cond.left);
197
+ }
174
198
 
175
- AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op, bool force_external);
199
+ lhs_payload.Initialize(allocator, op.children[0]->types);
200
+ lhs_sel.Initialize();
201
+ left_outer.Initialize(STANDARD_VECTOR_SIZE);
176
202
 
177
- public:
178
- void ResolveJoin(DataChunk &input, bool *found_matches, Match *matches = nullptr);
203
+ auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
204
+ lhs_partition_sink = gsink.RegisterBuffer(context);
205
+ }
179
206
 
180
- void ResolveJoinKeys(DataChunk &input);
207
+ bool Sink(DataChunk &input);
208
+ OperatorResultType ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk);
181
209
 
182
210
  ClientContext &context;
183
211
  Allocator &allocator;
184
212
  const PhysicalAsOfJoin &op;
185
- BufferManager &buffer_manager;
186
- const bool force_external;
187
- Orders lhs_orders;
188
213
 
189
- // LHS sorting
190
214
  ExpressionExecutor lhs_executor;
191
215
  DataChunk lhs_keys;
192
216
  ValidityMask lhs_valid_mask;
193
217
  SelectionVector lhs_sel;
194
- idx_t lhs_valid;
195
- RowLayout lhs_layout;
196
- unique_ptr<GlobalSortState> lhs_global_state;
197
- DataChunk lhs_sorted;
198
-
199
- // LHS binning
200
- Vector hash_vector;
201
- Vector bin_vector;
218
+ DataChunk lhs_payload;
202
219
 
203
- // Output
204
- idx_t lhs_match_count;
205
- SelectionVector lhs_matched;
206
220
  OuterJoinMarker left_outer;
207
221
  bool fetch_next_left;
208
- DataChunk group_payload;
209
- DataChunk rhs_payload;
210
- };
211
-
212
- AsOfLocalState::AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op, bool force_external)
213
- : context(context), allocator(Allocator::Get(context)), op(op),
214
- buffer_manager(BufferManager::GetBufferManager(context)), force_external(force_external), lhs_executor(context),
215
- hash_vector(LogicalType::HASH), bin_vector(LogicalType::HASH), left_outer(IsLeftOuterJoin(op.join_type)),
216
- fetch_next_left(true) {
217
- vector<unique_ptr<BaseStatistics>> partition_stats;
218
- Orders partitions; // Not used.
219
- PartitionGlobalSinkState::GenerateOrderings(partitions, lhs_orders, op.lhs_partitions, op.lhs_orders,
220
- partition_stats);
221
222
 
222
- // We sort the row numbers of the incoming block, not the rows
223
- lhs_layout.Initialize({LogicalType::UINTEGER});
224
- lhs_sorted.Initialize(allocator, lhs_layout.GetTypes());
225
-
226
- lhs_keys.Initialize(allocator, op.join_key_types);
227
- for (const auto &cond : op.conditions) {
228
- lhs_executor.AddExpression(*cond.left);
229
- }
230
-
231
- group_payload.Initialize(allocator, op.children[1]->types);
232
- rhs_payload.Initialize(allocator, op.children[1]->types);
233
-
234
- lhs_matched.Initialize();
235
- lhs_sel.Initialize();
236
- left_outer.Initialize(STANDARD_VECTOR_SIZE);
237
- }
223
+ optional_ptr<PartitionLocalSinkState> lhs_partition_sink;
224
+ };
238
225
 
239
- void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
226
+ bool AsOfLocalState::Sink(DataChunk &input) {
240
227
  // Compute the join keys
241
228
  lhs_keys.Reset();
242
229
  lhs_executor.Execute(input, lhs_keys);
243
230
 
244
- // Extract the NULLs
231
+ // Combine the NULLs
245
232
  const auto count = input.size();
246
233
  lhs_valid_mask.Reset();
247
234
  for (auto col_idx : op.null_sensitive) {
@@ -251,17 +238,19 @@ void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
251
238
  lhs_valid_mask.Combine(unified.validity, count);
252
239
  }
253
240
 
254
- // Convert the mask to a selection vector.
255
- // We need this anyway for sorting
256
- lhs_valid = 0;
241
+ // Convert the mask to a selection vector
242
+ // and mark all the rows that cannot match for early return.
243
+ idx_t lhs_valid = 0;
257
244
  const auto entry_count = lhs_valid_mask.EntryCount(count);
258
245
  idx_t base_idx = 0;
246
+ left_outer.Reset();
259
247
  for (idx_t entry_idx = 0; entry_idx < entry_count;) {
260
248
  const auto validity_entry = lhs_valid_mask.GetValidityEntry(entry_idx++);
261
249
  const auto next = MinValue<idx_t>(base_idx + ValidityMask::BITS_PER_VALUE, count);
262
250
  if (ValidityMask::AllValid(validity_entry)) {
263
251
  for (; base_idx < next; ++base_idx) {
264
252
  lhs_sel.set_index(lhs_valid++, base_idx);
253
+ left_outer.SetMatch(base_idx);
265
254
  }
266
255
  } else if (ValidityMask::NoneValid(validity_entry)) {
267
256
  base_idx = next;
@@ -270,120 +259,219 @@ void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
270
259
  for (; base_idx < next; ++base_idx) {
271
260
  if (ValidityMask::RowIsValid(validity_entry, base_idx - start)) {
272
261
  lhs_sel.set_index(lhs_valid++, base_idx);
262
+ left_outer.SetMatch(base_idx);
273
263
  }
274
264
  }
275
265
  }
276
266
  }
277
267
 
278
268
  // Slice the keys to the ones we can match
279
- if (lhs_valid < count) {
280
- lhs_keys.Slice(lhs_sel, lhs_valid);
269
+ lhs_payload.Reset();
270
+ if (lhs_valid == count) {
271
+ lhs_payload.Reference(input);
272
+ lhs_payload.SetCardinality(input);
273
+ } else {
274
+ lhs_payload.Slice(input, lhs_sel, lhs_valid);
275
+ lhs_payload.SetCardinality(lhs_valid);
276
+
277
+ // Flush the ones that can't match
278
+ fetch_next_left = false;
281
279
  }
282
280
 
283
- // Hash to assign the partitions
284
- auto &global_partition = op.sink_state->Cast<AsOfGlobalSinkState>().global_partition;
285
- if (op.lhs_partitions.empty()) {
286
- // Only one hash group
287
- bin_vector.Reference(Value::HASH(0));
288
- } else {
289
- // Hash to determine the partitions.
290
- VectorOperations::Hash(lhs_keys.data[0], hash_vector, lhs_sel, lhs_valid);
291
- for (size_t prt_idx = 1; prt_idx < op.lhs_partitions.size(); ++prt_idx) {
292
- VectorOperations::CombineHash(hash_vector, lhs_keys.data[prt_idx], lhs_sel, lhs_valid);
281
+ lhs_partition_sink->Sink(lhs_payload);
282
+
283
+ return false;
284
+ }
285
+
286
+ OperatorResultType AsOfLocalState::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk) {
287
+ input.Verify();
288
+ Sink(input);
289
+
290
+ // If there were any unmatchable rows, return them now so we can forget about them.
291
+ if (!fetch_next_left) {
292
+ fetch_next_left = true;
293
+ left_outer.ConstructLeftJoinResult(input, chunk);
294
+ left_outer.Reset();
295
+ }
296
+
297
+ // Just keep asking for data and buffering it
298
+ return OperatorResultType::NEED_MORE_INPUT;
299
+ }
300
+
301
+ OperatorResultType PhysicalAsOfJoin::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
302
+ GlobalOperatorState &gstate, OperatorState &lstate_p) const {
303
+ auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
304
+ auto &lstate = lstate_p.Cast<AsOfLocalState>();
305
+
306
+ if (gsink.rhs_sink.count == 0) {
307
+ // empty RHS
308
+ if (!EmptyResultIfRHSIsEmpty()) {
309
+ ConstructEmptyJoinResult(join_type, gsink.has_null, input, chunk);
310
+ return OperatorResultType::NEED_MORE_INPUT;
311
+ } else {
312
+ return OperatorResultType::FINISHED;
293
313
  }
314
+ }
294
315
 
295
- // Convert hashes to hash groups
296
- const auto radix_bits = global_partition.grouping_data->GetRadixBits();
297
- RadixPartitioning::HashesToBins(hash_vector, radix_bits, bin_vector, count);
316
+ return lstate.ExecuteInternal(context, input, chunk);
317
+ }
318
+
319
+ //===--------------------------------------------------------------------===//
320
+ // Source
321
+ //===--------------------------------------------------------------------===//
322
+ class AsOfProbeBuffer {
323
+ public:
324
+ using Orders = vector<BoundOrderByNode>;
325
+
326
+ static bool IsExternal(ClientContext &context) {
327
+ return ClientConfig::GetConfig(context).force_external;
328
+ }
329
+
330
+ AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin &op);
331
+
332
+ public:
333
+ void ResolveJoin(bool *found_matches, idx_t *matches = nullptr);
334
+ bool Scanning() const {
335
+ return lhs_scanner.get();
298
336
  }
337
+ void BeginLeftScan(hash_t scan_bin);
338
+ bool NextLeft();
339
+ void EndScan();
340
+
341
+ // resolve joins that output max N elements (SEMI, ANTI, MARK)
342
+ void ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk);
343
+ // resolve joins that can potentially output N*M elements (INNER, LEFT, FULL)
344
+ void ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk);
345
+ // Chunk may be empty
346
+ void GetData(ExecutionContext &context, DataChunk &chunk);
347
+ bool HasMoreData() const {
348
+ return !fetch_next_left || (lhs_scanner && lhs_scanner->Remaining());
349
+ }
350
+
351
+ ClientContext &context;
352
+ Allocator &allocator;
353
+ const PhysicalAsOfJoin &op;
354
+ BufferManager &buffer_manager;
355
+ const bool force_external;
356
+ const idx_t memory_per_thread;
357
+ Orders lhs_orders;
358
+
359
+ // LHS scanning
360
+ SelectionVector lhs_sel;
361
+ optional_ptr<PartitionGlobalHashGroup> left_hash;
362
+ OuterJoinMarker left_outer;
363
+ unique_ptr<SBIterator> left_itr;
364
+ unique_ptr<PayloadScanner> lhs_scanner;
365
+ DataChunk lhs_payload;
366
+
367
+ // RHS scanning
368
+ optional_ptr<PartitionGlobalHashGroup> right_hash;
369
+ optional_ptr<OuterJoinMarker> right_outer;
370
+ unique_ptr<SBIterator> right_itr;
371
+ unique_ptr<PayloadScanner> rhs_scanner;
372
+ DataChunk rhs_payload;
373
+
374
+ idx_t lhs_match_count;
375
+ bool fetch_next_left;
376
+ };
377
+
378
+ AsOfProbeBuffer::AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin &op)
379
+ : context(context), allocator(Allocator::Get(context)), op(op),
380
+ buffer_manager(BufferManager::GetBufferManager(context)), force_external(IsExternal(context)),
381
+ memory_per_thread(op.GetMaxThreadMemory(context)), left_outer(IsLeftOuterJoin(op.join_type)),
382
+ fetch_next_left(true) {
383
+ vector<unique_ptr<BaseStatistics>> partition_stats;
384
+ Orders partitions; // Not used.
385
+ PartitionGlobalSinkState::GenerateOrderings(partitions, lhs_orders, op.lhs_partitions, op.lhs_orders,
386
+ partition_stats);
387
+
388
+ // We sort the row numbers of the incoming block, not the rows
389
+ lhs_payload.Initialize(allocator, op.children[0]->types);
390
+ rhs_payload.Initialize(allocator, op.children[1]->types);
391
+
392
+ lhs_sel.Initialize();
393
+ left_outer.Initialize(STANDARD_VECTOR_SIZE);
394
+ }
299
395
 
300
- // Sort the selection vector on the valid keys
301
- lhs_global_state = make_uniq<GlobalSortState>(buffer_manager, lhs_orders, lhs_layout);
302
- auto &global_state = *lhs_global_state;
303
- LocalSortState local_sort;
304
- local_sort.Initialize(*lhs_global_state, buffer_manager);
396
+ void AsOfProbeBuffer::BeginLeftScan(hash_t scan_bin) {
397
+ auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
398
+ auto &lhs_sink = *gsink.lhs_sink;
399
+ const auto left_group = lhs_sink.bin_groups[scan_bin];
400
+ if (left_group >= lhs_sink.bin_groups.size()) {
401
+ return;
402
+ }
305
403
 
306
- DataChunk payload_chunk;
307
- payload_chunk.InitializeEmpty({LogicalType::UINTEGER});
308
- FlatVector::SetData(payload_chunk.data[0], data_ptr_cast(lhs_sel.data()));
309
- payload_chunk.SetCardinality(lhs_valid);
310
- local_sort.SinkChunk(lhs_keys, payload_chunk);
404
+ left_hash = lhs_sink.hash_groups[left_group].get();
405
+ auto &left_sort = *(left_hash->global_sort);
406
+ lhs_scanner = make_uniq<PayloadScanner>(left_sort, false);
407
+ left_itr = make_uniq<SBIterator>(left_sort, ExpressionType::COMPARE_LESSTHANOREQUALTO);
408
+
409
+ // We are only probing the corresponding right side bin, which may be empty
410
+ // If they are empty, we leave the iterator as null so we can emit left matches
411
+ auto &rhs_sink = gsink.rhs_sink;
412
+ const auto right_group = rhs_sink.bin_groups[scan_bin];
413
+ if (right_group < rhs_sink.bin_groups.size()) {
414
+ right_hash = rhs_sink.hash_groups[right_group].get();
415
+ right_outer = gsink.right_outers.data() + right_group;
416
+ auto &right_sort = *(right_hash->global_sort);
417
+ right_itr = make_uniq<SBIterator>(right_sort, ExpressionType::COMPARE_LESSTHANOREQUALTO);
418
+ rhs_scanner = make_uniq<PayloadScanner>(right_sort, false);
419
+ }
420
+ }
311
421
 
312
- // Set external (can be forced with the PRAGMA)
313
- global_state.external = force_external;
314
- global_state.AddLocalState(local_sort);
315
- global_state.PrepareMergePhase();
316
- while (global_state.sorted_blocks.size() > 1) {
317
- MergeSorter merge_sorter(*lhs_global_state, buffer_manager);
318
- merge_sorter.PerformInMergeRound();
319
- global_state.CompleteMergeRound();
422
+ bool AsOfProbeBuffer::NextLeft() {
423
+ if (!HasMoreData()) {
424
+ return false;
320
425
  }
321
426
 
322
- // Scan the sorted selection
323
- D_ASSERT(global_state.sorted_blocks.size() == 1);
427
+ // Scan the next sorted chunk
428
+ lhs_payload.Reset();
429
+ left_itr->SetIndex(lhs_scanner->Scanned());
430
+ lhs_scanner->Scan(lhs_payload);
324
431
 
325
- auto scanner = make_uniq<PayloadScanner>(*global_state.sorted_blocks[0]->payload_data, global_state, false);
326
- lhs_sorted.Reset();
327
- scanner->Scan(lhs_sorted);
432
+ return true;
328
433
  }
329
434
 
330
- void AsOfLocalState::ResolveJoin(DataChunk &input, bool *found_match, std::pair<hash_t, idx_t> *matches) {
331
- // Sort the input into lhs_payload, radix keys in lhs_global_state
332
- ResolveJoinKeys(input);
435
+ void AsOfProbeBuffer::EndScan() {
436
+ right_hash = nullptr;
437
+ right_itr.reset();
438
+ rhs_scanner.reset();
439
+ right_outer = nullptr;
333
440
 
334
- auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
335
- auto &global_partition = gsink.global_partition;
441
+ left_hash = nullptr;
442
+ left_itr.reset();
443
+ lhs_scanner.reset();
444
+ }
336
445
 
337
- // The bins are contiguous from sorting, so load them one at a time
338
- // But they may be constant, so unify.
339
- UnifiedVectorFormat bin_unified;
340
- bin_vector.ToUnifiedFormat(lhs_valid, bin_unified);
341
- const auto bins = UnifiedVectorFormat::GetData<hash_t>(bin_unified);
446
+ void AsOfProbeBuffer::ResolveJoin(bool *found_match, idx_t *matches) {
447
+ // If there was no right partition, there are no matches
448
+ lhs_match_count = 0;
449
+ left_outer.Reset();
450
+ if (!right_itr) {
451
+ return;
452
+ }
342
453
 
343
- hash_t prev_bin = global_partition.bin_groups.size();
344
- optional_ptr<PartitionGlobalHashGroup> hash_group;
345
- optional_ptr<OuterJoinMarker> right_outer;
454
+ const auto count = lhs_payload.size();
455
+ const auto left_base = left_itr->GetIndex();
346
456
  // Searching for right <= left
347
- SBIterator left(*lhs_global_state, ExpressionType::COMPARE_LESSTHANOREQUALTO);
348
- unique_ptr<SBIterator> right;
349
- lhs_match_count = 0;
350
- const auto sorted_sel = FlatVector::GetData<sel_t>(lhs_sorted.data[0]);
351
- for (idx_t i = 0; i < lhs_valid; ++i) {
352
- // idx is the index in the input; i is the index in the sorted keys
353
- const auto idx = sorted_sel[i];
354
- const auto curr_bin = bins[bin_unified.sel->get_index(idx)];
355
- if (!hash_group || curr_bin != prev_bin) {
356
- // Grab the next group
357
- prev_bin = curr_bin;
358
- const auto group_idx = global_partition.bin_groups[curr_bin];
359
- if (group_idx >= global_partition.hash_groups.size()) {
360
- // No matching partition
361
- hash_group = nullptr;
362
- right_outer = nullptr;
363
- right.reset();
364
- continue;
365
- }
366
- hash_group = global_partition.hash_groups[group_idx].get();
367
- right_outer = gsink.right_outers.data() + group_idx;
368
- right = make_uniq<SBIterator>(*(hash_group->global_sort), ExpressionType::COMPARE_LESSTHANOREQUALTO);
369
- }
370
- left.SetIndex(i);
457
+ for (idx_t i = 0; i < count; ++i) {
458
+ left_itr->SetIndex(left_base + i);
371
459
 
372
460
  // If right > left, then there is no match
373
- if (!right->Compare(left)) {
461
+ if (!right_itr->Compare(*left_itr)) {
374
462
  continue;
375
463
  }
376
464
 
377
465
  // Exponential search forward for a non-matching value using radix iterators
378
466
  // (We use exponential search to avoid thrashing the block manager on large probes)
379
467
  idx_t bound = 1;
380
- idx_t begin = right->GetIndex();
381
- right->SetIndex(begin + bound);
382
- while (right->GetIndex() < hash_group->count) {
383
- if (right->Compare(left)) {
468
+ idx_t begin = right_itr->GetIndex();
469
+ right_itr->SetIndex(begin + bound);
470
+ while (right_itr->GetIndex() < right_hash->count) {
471
+ if (right_itr->Compare(*left_itr)) {
384
472
  // If right <= left, jump ahead
385
473
  bound *= 2;
386
- right->SetIndex(begin + bound);
474
+ right_itr->SetIndex(begin + bound);
387
475
  } else {
388
476
  break;
389
477
  }
@@ -392,255 +480,298 @@ void AsOfLocalState::ResolveJoin(DataChunk &input, bool *found_match, std::pair<
392
480
  // Binary search for the first non-matching value using radix iterators
393
481
  // The previous value (which we know exists) is the match
394
482
  auto first = begin + bound / 2;
395
- auto last = MinValue<idx_t>(begin + bound, hash_group->count);
483
+ auto last = MinValue<idx_t>(begin + bound, right_hash->count);
396
484
  while (first < last) {
397
485
  const auto mid = first + (last - first) / 2;
398
- right->SetIndex(mid);
399
- if (right->Compare(left)) {
486
+ right_itr->SetIndex(mid);
487
+ if (right_itr->Compare(*left_itr)) {
400
488
  // If right <= left, new lower bound
401
489
  first = mid + 1;
402
490
  } else {
403
491
  last = mid;
404
492
  }
405
493
  }
406
- right->SetIndex(--first);
494
+ right_itr->SetIndex(--first);
407
495
 
408
496
  // Check partitions for strict equality
409
- if (!op.lhs_partitions.empty() && hash_group->ComparePartitions(left, *right)) {
497
+ if (right_hash->ComparePartitions(*left_itr, *right_itr)) {
410
498
  continue;
411
499
  }
412
500
 
413
501
  // Emit match data
414
502
  right_outer->SetMatch(first);
415
- left_outer.SetMatch(idx);
503
+ left_outer.SetMatch(i);
416
504
  if (found_match) {
417
- found_match[idx] = true;
505
+ found_match[i] = true;
418
506
  }
419
507
  if (matches) {
420
- matches[idx] = Match(curr_bin, first);
508
+ matches[i] = first;
421
509
  }
422
- lhs_matched.set_index(lhs_match_count++, idx);
510
+ lhs_sel.set_index(lhs_match_count++, i);
423
511
  }
424
512
  }
425
513
 
426
514
  unique_ptr<OperatorState> PhysicalAsOfJoin::GetOperatorState(ExecutionContext &context) const {
427
- auto &config = ClientConfig::GetConfig(context.client);
428
- return make_uniq<AsOfLocalState>(context.client, *this, config.force_external);
515
+ return make_uniq<AsOfLocalState>(context.client, *this);
429
516
  }
430
517
 
431
- void PhysicalAsOfJoin::ResolveSimpleJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
432
- OperatorState &lstate_p) const {
433
- auto &lstate = lstate_p.Cast<AsOfLocalState>();
434
- auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
435
-
518
+ void AsOfProbeBuffer::ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk) {
436
519
  // perform the actual join
437
520
  bool found_match[STANDARD_VECTOR_SIZE] = {false};
438
- lstate.ResolveJoin(input, found_match);
521
+ ResolveJoin(found_match);
439
522
 
440
523
  // now construct the result based on the join result
441
- switch (join_type) {
442
- case JoinType::MARK: {
443
- PhysicalJoin::ConstructMarkJoinResult(lstate.lhs_keys, input, chunk, found_match, gsink.has_null);
444
- break;
445
- }
524
+ switch (op.join_type) {
446
525
  case JoinType::SEMI:
447
- PhysicalJoin::ConstructSemiJoinResult(input, chunk, found_match);
526
+ PhysicalJoin::ConstructSemiJoinResult(lhs_payload, chunk, found_match);
448
527
  break;
449
528
  case JoinType::ANTI:
450
- PhysicalJoin::ConstructAntiJoinResult(input, chunk, found_match);
529
+ PhysicalJoin::ConstructAntiJoinResult(lhs_payload, chunk, found_match);
451
530
  break;
452
531
  default:
453
532
  throw NotImplementedException("Unimplemented join type for AsOf join");
454
533
  }
455
534
  }
456
535
 
457
- OperatorResultType PhysicalAsOfJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
458
- OperatorState &lstate_p) const {
459
- auto &lstate = lstate_p.Cast<AsOfLocalState>();
460
- auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
461
-
462
- if (!lstate.fetch_next_left) {
463
- lstate.fetch_next_left = true;
464
- if (lstate.left_outer.Enabled()) {
465
- // left join: before we move to the next chunk, see if we need to output any vectors that didn't
466
- // have a match found
467
- lstate.left_outer.ConstructLeftJoinResult(input, chunk);
468
- lstate.left_outer.Reset();
469
- }
470
- return OperatorResultType::NEED_MORE_INPUT;
471
- }
472
-
536
+ void AsOfProbeBuffer::ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk) {
473
537
  // perform the actual join
474
- AsOfLocalState::Match matches[STANDARD_VECTOR_SIZE];
475
- lstate.ResolveJoin(input, nullptr, matches);
476
- lstate.group_payload.Reset();
477
- lstate.rhs_payload.Reset();
478
-
479
- auto &global_partition = gsink.global_partition;
480
- hash_t scan_bin = global_partition.bin_groups.size();
481
- optional_ptr<PartitionGlobalHashGroup> hash_group;
482
- unique_ptr<PayloadScanner> scanner;
483
- for (idx_t i = 0; i < lstate.lhs_match_count; ++i) {
484
- const auto idx = lstate.lhs_matched[i];
485
- const auto match_bin = matches[idx].first;
486
- const auto match_pos = matches[idx].second;
487
- if (match_bin != scan_bin) {
488
- // Grab the next group
489
- const auto group_idx = global_partition.bin_groups[match_bin];
490
- hash_group = global_partition.hash_groups[group_idx].get();
491
- scan_bin = match_bin;
492
- scanner = make_uniq<PayloadScanner>(*hash_group->global_sort, false);
493
- lstate.group_payload.Reset();
494
- }
538
+ idx_t matches[STANDARD_VECTOR_SIZE];
539
+ ResolveJoin(nullptr, matches);
540
+
541
+ for (idx_t i = 0; i < lhs_match_count; ++i) {
542
+ const auto idx = lhs_sel[i];
543
+ const auto match_pos = matches[idx];
495
544
  // Skip to the range containing the match
496
- while (match_pos >= scanner->Scanned()) {
497
- lstate.group_payload.Reset();
498
- scanner->Scan(lstate.group_payload);
545
+ while (match_pos >= rhs_scanner->Scanned()) {
546
+ rhs_payload.Reset();
547
+ rhs_scanner->Scan(rhs_payload);
499
548
  }
500
549
  // Append the individual values
501
550
  // TODO: Batch the copies
502
- const auto source_offset = match_pos - (scanner->Scanned() - lstate.group_payload.size());
503
- for (idx_t col_idx = 0; col_idx < right_projection_map.size(); ++col_idx) {
504
- const auto rhs_idx = right_projection_map[col_idx];
505
- auto &source = lstate.group_payload.data[rhs_idx];
506
- auto &target = chunk.data[input.ColumnCount() + col_idx];
551
+ const auto source_offset = match_pos - (rhs_scanner->Scanned() - rhs_payload.size());
552
+ for (column_t col_idx = 0; col_idx < op.right_projection_map.size(); ++col_idx) {
553
+ const auto rhs_idx = op.right_projection_map[col_idx];
554
+ auto &source = rhs_payload.data[rhs_idx];
555
+ auto &target = chunk.data[lhs_payload.ColumnCount() + col_idx];
507
556
  VectorOperations::Copy(source, target, source_offset + 1, source_offset, i);
508
557
  }
509
558
  }
510
559
 
511
- // Slice the input into the left side
512
- chunk.Slice(input, lstate.lhs_matched, lstate.lhs_match_count);
513
-
514
- // If we are doing a left join, come back for the NULLs
515
- if (lstate.left_outer.Enabled()) {
516
- lstate.fetch_next_left = false;
517
- return OperatorResultType::HAVE_MORE_OUTPUT;
560
+ // Slice the left payload into the result
561
+ for (column_t i = 0; i < lhs_payload.ColumnCount(); ++i) {
562
+ chunk.data[i].Slice(lhs_payload.data[i], lhs_sel, lhs_match_count);
518
563
  }
564
+ chunk.SetCardinality(lhs_match_count);
519
565
 
520
- return OperatorResultType::NEED_MORE_INPUT;
566
+ // If we are doing a left join, come back for the NULLs
567
+ fetch_next_left = !left_outer.Enabled();
521
568
  }
522
569
 
523
- OperatorResultType PhysicalAsOfJoin::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
524
- GlobalOperatorState &gstate, OperatorState &lstate) const {
525
- auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
526
-
527
- if (gsink.global_partition.count == 0) {
528
- // empty RHS
529
- if (!EmptyResultIfRHSIsEmpty()) {
530
- ConstructEmptyJoinResult(join_type, gsink.has_null, input, chunk);
531
- return OperatorResultType::NEED_MORE_INPUT;
532
- } else {
533
- return OperatorResultType::FINISHED;
570
+ void AsOfProbeBuffer::GetData(ExecutionContext &context, DataChunk &chunk) {
571
+ // Handle dangling left join results from current chunk
572
+ if (!fetch_next_left) {
573
+ fetch_next_left = true;
574
+ if (left_outer.Enabled()) {
575
+ // left join: before we move to the next chunk, see if we need to output any vectors that didn't
576
+ // have a match found
577
+ left_outer.ConstructLeftJoinResult(lhs_payload, chunk);
578
+ left_outer.Reset();
534
579
  }
580
+ return;
535
581
  }
536
582
 
537
- input.Verify();
538
- switch (join_type) {
583
+ // Stop if there is no more data
584
+ if (!NextLeft()) {
585
+ return;
586
+ }
587
+
588
+ switch (op.join_type) {
539
589
  case JoinType::SEMI:
540
590
  case JoinType::ANTI:
541
591
  case JoinType::MARK:
542
592
  // simple joins can have max STANDARD_VECTOR_SIZE matches per chunk
543
- ResolveSimpleJoin(context, input, chunk, lstate);
544
- return OperatorResultType::NEED_MORE_INPUT;
593
+ ResolveSimpleJoin(context, chunk);
594
+ break;
545
595
  case JoinType::LEFT:
546
596
  case JoinType::INNER:
547
597
  case JoinType::RIGHT:
548
598
  case JoinType::OUTER:
549
- return ResolveComplexJoin(context, input, chunk, lstate);
599
+ ResolveComplexJoin(context, chunk);
600
+ break;
550
601
  default:
551
602
  throw NotImplementedException("Unimplemented type for as-of join!");
552
603
  }
553
604
  }
554
605
 
555
- //===--------------------------------------------------------------------===//
556
- // Source
557
- //===--------------------------------------------------------------------===//
558
606
  class AsOfGlobalSourceState : public GlobalSourceState {
559
607
  public:
560
- explicit AsOfGlobalSourceState(PartitionGlobalSinkState &gsink_p) : gsink(gsink_p), next_bin(0) {
608
+ explicit AsOfGlobalSourceState(AsOfGlobalSinkState &gsink_p)
609
+ : gsink(gsink_p), next_combine(0), combined(0), merged(0), mergers(0), next_left(0), flushed(0), next_right(0) {
561
610
  }
562
611
 
563
- PartitionGlobalSinkState &gsink;
564
- //! The output read position.
565
- atomic<idx_t> next_bin;
566
-
567
- public:
568
- idx_t MaxThreads() override {
569
- // If there is only one partition, we have to process it on one thread.
570
- if (!gsink.grouping_data) {
571
- return 1;
612
+ PartitionGlobalMergeStates &GetMergeStates() {
613
+ lock_guard<mutex> guard(lock);
614
+ if (!merge_states) {
615
+ merge_states = make_uniq<PartitionGlobalMergeStates>(*gsink.lhs_sink);
572
616
  }
617
+ return *merge_states;
618
+ }
573
619
 
574
- // If there is not a lot of data, process serially.
575
- if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
576
- return 1;
577
- }
620
+ AsOfGlobalSinkState &gsink;
621
+ //! The next buffer to combine
622
+ atomic<size_t> next_combine;
623
+ //! The number of combined buffers
624
+ atomic<size_t> combined;
625
+ //! The number of combined buffers
626
+ atomic<size_t> merged;
627
+ //! The number of combined buffers
628
+ atomic<size_t> mergers;
629
+ //! The next buffer to flush
630
+ atomic<size_t> next_left;
631
+ //! The number of flushed buffers
632
+ atomic<size_t> flushed;
633
+ //! The right outer output read position.
634
+ atomic<idx_t> next_right;
635
+ //! The merge handler
636
+ mutex lock;
637
+ unique_ptr<PartitionGlobalMergeStates> merge_states;
578
638
 
579
- return gsink.hash_groups.size();
639
+ public:
640
+ idx_t MaxThreads() override {
641
+ return gsink.lhs_buffers.size();
580
642
  }
581
643
  };
582
644
 
583
645
  unique_ptr<GlobalSourceState> PhysicalAsOfJoin::GetGlobalSourceState(ClientContext &context) const {
584
646
  auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
585
- return make_uniq<AsOfGlobalSourceState>(gsink.global_partition);
647
+ return make_uniq<AsOfGlobalSourceState>(gsink);
586
648
  }
587
649
 
588
650
  class AsOfLocalSourceState : public LocalSourceState {
589
651
  public:
590
652
  using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
591
653
 
592
- explicit AsOfLocalSourceState(AsOfGlobalSinkState &gstate_p);
654
+ AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op);
655
+
656
+ void CombineLeftPartitions();
657
+ void MergeLeftPartitions();
658
+
659
+ idx_t BeginRightScan(const idx_t hash_bin);
593
660
 
594
- idx_t GeneratePartition(const idx_t hash_bin);
661
+ AsOfGlobalSourceState &gsource;
595
662
 
596
- AsOfGlobalSinkState &gstate;
663
+ //! The left side partition being probed
664
+ AsOfProbeBuffer probe_buffer;
597
665
 
598
666
  //! The read partition
599
667
  idx_t hash_bin;
600
668
  HashGroupPtr hash_group;
601
-
602
669
  //! The read cursor
603
670
  unique_ptr<PayloadScanner> scanner;
604
- //! Buffer for the inputs
605
- DataChunk input_chunk;
606
671
  //! Pointer to the matches
607
- const bool *found_match;
672
+ const bool *found_match = {};
608
673
  };
609
674
 
610
- AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSinkState &gstate_p) : gstate(gstate_p) {
611
- input_chunk.Initialize(gstate.global_partition.allocator, gstate.global_partition.payload_types);
675
+ AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op)
676
+ : gsource(gsource), probe_buffer(gsource.gsink.lhs_sink->context, op) {
677
+ gsource.mergers++;
678
+ }
679
+
680
+ void AsOfLocalSourceState::CombineLeftPartitions() {
681
+ const auto buffer_count = gsource.gsink.lhs_buffers.size();
682
+ while (gsource.combined < buffer_count) {
683
+ const auto next_combine = gsource.next_combine++;
684
+ if (next_combine < buffer_count) {
685
+ gsource.gsink.lhs_buffers[next_combine]->Combine();
686
+ ++gsource.combined;
687
+ } else {
688
+ std::this_thread::yield();
689
+ }
690
+ }
691
+ }
692
+
693
+ void AsOfLocalSourceState::MergeLeftPartitions() {
694
+ PartitionGlobalMergeStates::Callback local_callback;
695
+ PartitionLocalMergeState local_merge;
696
+ gsource.GetMergeStates().ExecuteTask(local_merge, local_callback);
697
+ gsource.merged++;
698
+ while (gsource.merged < gsource.mergers) {
699
+ std::this_thread::yield();
700
+ }
612
701
  }
613
702
 
614
- idx_t AsOfLocalSourceState::GeneratePartition(const idx_t hash_bin_p) {
615
- // Get rid of any stale data
703
+ idx_t AsOfLocalSourceState::BeginRightScan(const idx_t hash_bin_p) {
616
704
  hash_bin = hash_bin_p;
617
705
 
618
- hash_group = std::move(gstate.global_partition.hash_groups[hash_bin]);
706
+ hash_group = std::move(gsource.gsink.rhs_sink.hash_groups[hash_bin]);
619
707
  scanner = make_uniq<PayloadScanner>(*hash_group->global_sort);
620
- found_match = gstate.right_outers[hash_bin].GetMatches();
708
+ found_match = gsource.gsink.right_outers[hash_bin].GetMatches();
621
709
 
622
710
  return scanner->Remaining();
623
711
  }
624
712
 
625
713
  unique_ptr<LocalSourceState> PhysicalAsOfJoin::GetLocalSourceState(ExecutionContext &context,
626
714
  GlobalSourceState &gstate) const {
627
- auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
628
- return make_uniq<AsOfLocalSourceState>(gsink);
715
+ auto &gsource = gstate.Cast<AsOfGlobalSourceState>();
716
+ return make_uniq<AsOfLocalSourceState>(gsource, *this);
629
717
  }
630
718
 
631
719
  SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk &chunk,
632
720
  OperatorSourceInput &input) const {
633
- D_ASSERT(IsRightOuterJoin(join_type));
634
-
635
721
  auto &gsource = input.global_state.Cast<AsOfGlobalSourceState>();
636
722
  auto &lsource = input.local_state.Cast<AsOfLocalSourceState>();
637
- auto &gsink = gsource.gsink;
723
+ auto &rhs_sink = gsource.gsink.rhs_sink;
724
+
725
+ // Step 1: Combine the partitions
726
+ lsource.CombineLeftPartitions();
727
+
728
+ // Step 2: Sort on all threads
729
+ lsource.MergeLeftPartitions();
730
+
731
+ // Step 3: Join the partitions
732
+ auto &lhs_sink = *gsource.gsink.lhs_sink;
733
+ auto &partitions = lhs_sink.grouping_data->GetPartitions();
734
+ const auto left_bins = partitions.size();
735
+ while (gsource.flushed < left_bins) {
736
+ // Make sure we have something to flush
737
+ if (!lsource.probe_buffer.Scanning()) {
738
+ const auto left_bin = gsource.next_left++;
739
+ if (left_bin < left_bins) {
740
+ // More to flush
741
+ lsource.probe_buffer.BeginLeftScan(left_bin);
742
+ } else if (!IsRightOuterJoin(join_type)) {
743
+ return SourceResultType::FINISHED;
744
+ } else {
745
+ // Wait for all threads to finish
746
+ // TODO: How to implement a spin wait correctly?
747
+ // Returning BLOCKED seems to hang the system.
748
+ std::this_thread::yield();
749
+ continue;
750
+ }
751
+ }
752
+
753
+ lsource.probe_buffer.GetData(context, chunk);
754
+ if (chunk.size()) {
755
+ return SourceResultType::HAVE_MORE_OUTPUT;
756
+ } else if (lsource.probe_buffer.HasMoreData()) {
757
+ // Join the next partition
758
+ continue;
759
+ } else {
760
+ lsource.probe_buffer.EndScan();
761
+ gsource.flushed++;
762
+ }
763
+ }
764
+
765
+ // Step 4: Emit right join matches
766
+ if (!IsRightOuterJoin(join_type)) {
767
+ return SourceResultType::FINISHED;
768
+ }
638
769
 
639
- auto &hash_groups = gsink.hash_groups;
640
- const auto bin_count = hash_groups.size();
770
+ auto &hash_groups = rhs_sink.hash_groups;
771
+ const auto right_groups = hash_groups.size();
641
772
 
642
773
  DataChunk rhs_chunk;
643
- rhs_chunk.Initialize(Allocator::Get(context.client), gsink.payload_types);
774
+ rhs_chunk.Initialize(Allocator::Get(context.client), rhs_sink.payload_types);
644
775
  SelectionVector rsel(STANDARD_VECTOR_SIZE);
645
776
 
646
777
  while (chunk.size() == 0) {
@@ -648,17 +779,17 @@ SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk
648
779
  while (!lsource.scanner || !lsource.scanner->Remaining()) {
649
780
  lsource.scanner.reset();
650
781
  lsource.hash_group.reset();
651
- auto hash_bin = gsource.next_bin++;
652
- if (hash_bin >= bin_count) {
782
+ auto hash_bin = gsource.next_right++;
783
+ if (hash_bin >= right_groups) {
653
784
  return SourceResultType::FINISHED;
654
785
  }
655
786
 
656
- for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_bin++) {
787
+ for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_right++) {
657
788
  if (hash_groups[hash_bin]) {
658
789
  break;
659
790
  }
660
791
  }
661
- lsource.GeneratePartition(hash_bin);
792
+ lsource.BeginRightScan(hash_bin);
662
793
  }
663
794
  const auto rhs_position = lsource.scanner->Scanned();
664
795
  lsource.scanner->Scan(rhs_chunk);