duckdb 0.8.2-dev161.0 → 0.8.2-dev1764.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (504) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
  23. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  24. package/src/duckdb/extension/json/json_functions/json_transform.cpp +91 -38
  25. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  26. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  27. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  28. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  29. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  30. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  31. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  32. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  33. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  34. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  36. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  38. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  39. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  40. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  41. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  42. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  43. package/src/duckdb/extension/parquet/parquet_extension.cpp +194 -20
  44. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  45. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  46. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  47. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  48. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  49. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  50. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  51. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  52. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  53. package/src/duckdb/src/common/allocator.cpp +14 -2
  54. package/src/duckdb/src/common/arrow/arrow_appender.cpp +79 -12
  55. package/src/duckdb/src/common/arrow/arrow_converter.cpp +44 -19
  56. package/src/duckdb/src/common/assert.cpp +3 -0
  57. package/src/duckdb/src/common/enum_util.cpp +4619 -4446
  58. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  60. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  61. package/src/duckdb/src/common/exception.cpp +2 -2
  62. package/src/duckdb/src/common/extra_type_info.cpp +506 -0
  63. package/src/duckdb/src/common/file_system.cpp +19 -0
  64. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  65. package/src/duckdb/src/common/local_file_system.cpp +14 -14
  66. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  67. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  68. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  69. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  70. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  71. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  72. package/src/duckdb/src/common/sort/partition_state.cpp +70 -50
  73. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  74. package/src/duckdb/src/common/types/bit.cpp +51 -0
  75. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  76. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  77. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  78. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  79. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  80. package/src/duckdb/src/common/types/date.cpp +9 -0
  81. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  82. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  83. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  84. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  85. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  86. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  87. package/src/duckdb/src/common/types/value.cpp +11 -6
  88. package/src/duckdb/src/common/types.cpp +9 -656
  89. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  90. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  91. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  92. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  93. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  94. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  95. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  96. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  97. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  98. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  99. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  100. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  101. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  102. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  103. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  104. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  105. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  106. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  107. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  108. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  109. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  110. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  111. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  112. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  113. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  114. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  115. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  116. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  117. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  118. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  119. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  120. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  121. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
  122. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  123. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  124. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
  125. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  126. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
  127. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  128. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  129. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  130. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  131. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  132. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  133. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  134. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  135. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  136. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  137. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  138. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  139. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  140. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  141. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  142. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  143. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  144. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
  145. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
  146. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  147. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  148. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  149. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  150. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  151. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  152. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  153. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  154. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  155. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  156. package/src/duckdb/src/function/function.cpp +3 -1
  157. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  158. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  159. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  160. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  161. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  162. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  163. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  164. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  165. package/src/duckdb/src/function/table/arrow.cpp +19 -0
  166. package/src/duckdb/src/function/table/arrow_conversion.cpp +35 -1
  167. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  168. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  169. package/src/duckdb/src/function/table/system/test_all_types.cpp +7 -0
  170. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  171. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  172. package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
  173. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  174. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  175. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  176. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  177. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
  178. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  179. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  180. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  181. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  182. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  183. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  184. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
  185. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  186. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  187. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  188. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  189. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  190. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +2 -2
  191. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  192. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  193. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  194. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  195. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  196. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +23 -8
  197. package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
  198. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  199. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  200. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  201. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  202. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  203. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  204. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  205. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  206. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  207. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +5 -2
  208. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  209. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  210. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  211. package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -0
  212. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
  213. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  214. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  215. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  218. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  219. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  220. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  221. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  222. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  223. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  224. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  225. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  226. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  227. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  228. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  229. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  230. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  231. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  232. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  233. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  234. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  235. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  236. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  237. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  238. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  239. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  240. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  241. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  242. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  243. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  244. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  245. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  246. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  247. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
  248. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  249. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  250. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  251. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  252. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  253. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  254. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  255. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  256. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  257. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  258. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  259. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  260. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  261. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  262. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  263. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  264. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  265. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  266. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  267. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  268. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  269. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  270. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  271. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  272. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  273. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  274. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  275. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +22 -0
  276. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
  277. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  278. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  279. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  280. package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
  281. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  282. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  283. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  284. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  285. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  286. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  287. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  288. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  289. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  291. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  293. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  294. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  295. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  296. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  297. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  298. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  299. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  300. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  301. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  302. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  303. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  304. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  305. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  306. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  307. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  308. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  309. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  310. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  311. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  312. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  313. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  314. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  315. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  316. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  317. package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
  318. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  319. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  320. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  321. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  322. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  323. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  324. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  325. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  326. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  327. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  328. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  329. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  330. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  331. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  332. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  333. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  334. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  335. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  336. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  337. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  338. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  339. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  340. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  341. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  342. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  343. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  344. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  345. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  346. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  347. package/src/duckdb/src/include/duckdb.h +28 -0
  348. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  349. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  350. package/src/duckdb/src/main/config.cpp +4 -0
  351. package/src/duckdb/src/main/database.cpp +1 -1
  352. package/src/duckdb/src/main/extension/extension_helper.cpp +93 -88
  353. package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
  354. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  355. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  356. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  357. package/src/duckdb/src/main/relation.cpp +6 -5
  358. package/src/duckdb/src/main/settings/settings.cpp +79 -18
  359. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  360. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  361. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  362. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  363. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  364. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  365. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  366. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  367. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  368. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  369. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  370. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  371. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  372. package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
  373. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  374. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  375. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  376. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  377. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  378. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  379. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  380. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  381. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  382. package/src/duckdb/src/parallel/executor.cpp +15 -0
  383. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  384. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  385. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  386. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  387. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  388. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  389. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  390. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  391. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  392. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  393. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  394. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  395. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  396. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  397. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  398. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  399. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  400. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  401. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  402. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  403. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  404. package/src/duckdb/src/parser/parser.cpp +8 -2
  405. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  406. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  407. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  408. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  409. package/src/duckdb/src/parser/query_node.cpp +15 -37
  410. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  411. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  412. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  413. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  414. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  415. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  416. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  417. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  418. package/src/duckdb/src/parser/tableref.cpp +0 -44
  419. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  420. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  421. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  422. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  423. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  424. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  425. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  426. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  427. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  428. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  429. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  430. package/src/duckdb/src/parser/transformer.cpp +15 -0
  431. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  432. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  433. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  434. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  435. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  436. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  437. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  438. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  439. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  440. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  441. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
  442. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  443. package/src/duckdb/src/planner/binder.cpp +44 -31
  444. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  445. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  446. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  447. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  448. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  449. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  450. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  451. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  452. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  453. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  454. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  455. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  456. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  457. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  458. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  459. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  460. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  461. package/src/duckdb/src/storage/data_table.cpp +1 -1
  462. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  463. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  464. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  465. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  466. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  467. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  468. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  469. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  470. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  471. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  472. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  473. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  474. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  475. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  476. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  477. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  478. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  479. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  480. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  481. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  482. package/src/duckdb/ub_src_common.cpp +2 -0
  483. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  484. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  485. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  486. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  487. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  488. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  489. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  490. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  491. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  492. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  493. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  494. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  495. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  496. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  497. package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
  498. package/src/statement.cpp +10 -3
  499. package/test/columns.test.ts +24 -1
  500. package/test/test_all_types.test.ts +234 -0
  501. package/tsconfig.json +1 -0
  502. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  503. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  504. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -13,12 +13,15 @@
13
13
  #include "duckdb/parallel/event.hpp"
14
14
  #include "duckdb/parallel/thread_context.hpp"
15
15
 
16
+ #include <thread>
17
+
16
18
  namespace duckdb {
17
19
 
18
20
  PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> left,
19
21
  unique_ptr<PhysicalOperator> right)
20
22
  : PhysicalComparisonJoin(op, PhysicalOperatorType::ASOF_JOIN, std::move(op.conditions), op.join_type,
21
- op.estimated_cardinality) {
23
+ op.estimated_cardinality),
24
+ comparison_type(ExpressionType::INVALID) {
22
25
 
23
26
  // Convert the conditions partitions and sorts
24
27
  for (auto &cond : conditions) {
@@ -29,9 +32,19 @@ PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<Physica
29
32
  auto right = cond.right->Copy();
30
33
  switch (cond.comparison) {
31
34
  case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
35
+ case ExpressionType::COMPARE_GREATERTHAN:
32
36
  null_sensitive.emplace_back(lhs_orders.size());
33
37
  lhs_orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(left));
34
38
  rhs_orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(right));
39
+ comparison_type = cond.comparison;
40
+ break;
41
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
42
+ case ExpressionType::COMPARE_LESSTHAN:
43
+ // Always put NULLS LAST so they can be ignored.
44
+ null_sensitive.emplace_back(lhs_orders.size());
45
+ lhs_orders.emplace_back(OrderType::DESCENDING, OrderByNullType::NULLS_LAST, std::move(left));
46
+ rhs_orders.emplace_back(OrderType::DESCENDING, OrderByNullType::NULLS_LAST, std::move(right));
47
+ comparison_type = cond.comparison;
35
48
  break;
36
49
  case ExpressionType::COMPARE_EQUAL:
37
50
  null_sensitive.emplace_back(lhs_orders.size());
@@ -67,21 +80,32 @@ PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<Physica
67
80
  class AsOfGlobalSinkState : public GlobalSinkState {
68
81
  public:
69
82
  AsOfGlobalSinkState(ClientContext &context, const PhysicalAsOfJoin &op)
70
- : global_partition(context, op.rhs_partitions, op.rhs_orders, op.children[1]->types, {},
71
- op.estimated_cardinality),
83
+ : rhs_sink(context, op.rhs_partitions, op.rhs_orders, op.children[1]->types, {}, op.estimated_cardinality),
72
84
  is_outer(IsRightOuterJoin(op.join_type)), has_null(false) {
73
85
  }
74
86
 
75
87
  idx_t Count() const {
76
- return global_partition.count;
88
+ return rhs_sink.count;
77
89
  }
78
90
 
79
- PartitionGlobalSinkState global_partition;
91
+ PartitionLocalSinkState *RegisterBuffer(ClientContext &context) {
92
+ lock_guard<mutex> guard(lock);
93
+ lhs_buffers.emplace_back(make_uniq<PartitionLocalSinkState>(context, *lhs_sink));
94
+ return lhs_buffers.back().get();
95
+ }
96
+
97
+ PartitionGlobalSinkState rhs_sink;
80
98
 
81
99
  // One per partition
82
100
  const bool is_outer;
83
101
  vector<OuterJoinMarker> right_outers;
84
102
  bool has_null;
103
+
104
+ // Left side buffering
105
+ unique_ptr<PartitionGlobalSinkState> lhs_sink;
106
+
107
+ mutex lock;
108
+ vector<unique_ptr<PartitionLocalSinkState>> lhs_buffers;
85
109
  };
86
110
 
87
111
  class AsOfLocalSinkState : public LocalSinkState {
@@ -108,7 +132,7 @@ unique_ptr<GlobalSinkState> PhysicalAsOfJoin::GetGlobalSinkState(ClientContext &
108
132
  unique_ptr<LocalSinkState> PhysicalAsOfJoin::GetLocalSinkState(ExecutionContext &context) const {
109
133
  // We only sink the RHS
110
134
  auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
111
- return make_uniq<AsOfLocalSinkState>(context.client, gsink.global_partition);
135
+ return make_uniq<AsOfLocalSinkState>(context.client, gsink.rhs_sink);
112
136
  }
113
137
 
114
138
  SinkResultType PhysicalAsOfJoin::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
@@ -131,15 +155,21 @@ SinkFinalizeType PhysicalAsOfJoin::Finalize(Pipeline &pipeline, Event &event, Cl
131
155
  GlobalSinkState &gstate_p) const {
132
156
  auto &gstate = gstate_p.Cast<AsOfGlobalSinkState>();
133
157
 
158
+ // The data is all in so we can initialise the left partitioning.
159
+ const vector<unique_ptr<BaseStatistics>> partitions_stats;
160
+ gstate.lhs_sink = make_uniq<PartitionGlobalSinkState>(context, lhs_partitions, lhs_orders, children[0]->types,
161
+ partitions_stats, 0);
162
+ gstate.lhs_sink->SyncPartitioning(gstate.rhs_sink);
163
+
134
164
  // Find the first group to sort
135
- auto &groups = gstate.global_partition.grouping_data->GetPartitions();
165
+ auto &groups = gstate.rhs_sink.grouping_data->GetPartitions();
136
166
  if (groups.empty() && EmptyResultIfRHSIsEmpty()) {
137
167
  // Empty input!
138
168
  return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
139
169
  }
140
170
 
141
171
  // Schedule all the sorts for maximum thread utilisation
142
- auto new_event = make_shared<PartitionMergeEvent>(gstate.global_partition, pipeline);
172
+ auto new_event = make_shared<PartitionMergeEvent>(gstate.rhs_sink, pipeline);
143
173
  event.InsertEvent(std::move(new_event));
144
174
 
145
175
  return SinkFinalizeType::READY;
@@ -152,10 +182,10 @@ class AsOfGlobalState : public GlobalOperatorState {
152
182
  public:
153
183
  explicit AsOfGlobalState(AsOfGlobalSinkState &gsink) {
154
184
  // for FULL/RIGHT OUTER JOIN, initialize right_outers to false for every tuple
155
- auto &global_partition = gsink.global_partition;
185
+ auto &rhs_partition = gsink.rhs_sink;
156
186
  auto &right_outers = gsink.right_outers;
157
- right_outers.reserve(global_partition.hash_groups.size());
158
- for (const auto &hash_group : global_partition.hash_groups) {
187
+ right_outers.reserve(rhs_partition.hash_groups.size());
188
+ for (const auto &hash_group : rhs_partition.hash_groups) {
159
189
  right_outers.emplace_back(OuterJoinMarker(gsink.is_outer));
160
190
  right_outers.back().Initialize(hash_group->count);
161
191
  }
@@ -169,79 +199,47 @@ unique_ptr<GlobalOperatorState> PhysicalAsOfJoin::GetGlobalOperatorState(ClientC
169
199
 
170
200
  class AsOfLocalState : public CachingOperatorState {
171
201
  public:
172
- using Orders = vector<BoundOrderByNode>;
173
- using Match = std::pair<hash_t, idx_t>;
202
+ AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op)
203
+ : context(context), allocator(Allocator::Get(context)), op(op), lhs_executor(context),
204
+ left_outer(IsLeftOuterJoin(op.join_type)), fetch_next_left(true) {
205
+ lhs_keys.Initialize(allocator, op.join_key_types);
206
+ for (const auto &cond : op.conditions) {
207
+ lhs_executor.AddExpression(*cond.left);
208
+ }
174
209
 
175
- AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op, bool force_external);
210
+ lhs_payload.Initialize(allocator, op.children[0]->types);
211
+ lhs_sel.Initialize();
212
+ left_outer.Initialize(STANDARD_VECTOR_SIZE);
176
213
 
177
- public:
178
- void ResolveJoin(DataChunk &input, bool *found_matches, Match *matches = nullptr);
214
+ auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
215
+ lhs_partition_sink = gsink.RegisterBuffer(context);
216
+ }
179
217
 
180
- void ResolveJoinKeys(DataChunk &input);
218
+ bool Sink(DataChunk &input);
219
+ OperatorResultType ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk);
181
220
 
182
221
  ClientContext &context;
183
222
  Allocator &allocator;
184
223
  const PhysicalAsOfJoin &op;
185
- BufferManager &buffer_manager;
186
- const bool force_external;
187
- Orders lhs_orders;
188
224
 
189
- // LHS sorting
190
225
  ExpressionExecutor lhs_executor;
191
226
  DataChunk lhs_keys;
192
227
  ValidityMask lhs_valid_mask;
193
228
  SelectionVector lhs_sel;
194
- idx_t lhs_valid;
195
- RowLayout lhs_layout;
196
- unique_ptr<GlobalSortState> lhs_global_state;
197
- DataChunk lhs_sorted;
198
-
199
- // LHS binning
200
- Vector hash_vector;
201
- Vector bin_vector;
229
+ DataChunk lhs_payload;
202
230
 
203
- // Output
204
- idx_t lhs_match_count;
205
- SelectionVector lhs_matched;
206
231
  OuterJoinMarker left_outer;
207
232
  bool fetch_next_left;
208
- DataChunk group_payload;
209
- DataChunk rhs_payload;
210
- };
211
233
 
212
- AsOfLocalState::AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op, bool force_external)
213
- : context(context), allocator(Allocator::Get(context)), op(op),
214
- buffer_manager(BufferManager::GetBufferManager(context)), force_external(force_external), lhs_executor(context),
215
- hash_vector(LogicalType::HASH), bin_vector(LogicalType::HASH), left_outer(IsLeftOuterJoin(op.join_type)),
216
- fetch_next_left(true) {
217
- vector<unique_ptr<BaseStatistics>> partition_stats;
218
- Orders partitions; // Not used.
219
- PartitionGlobalSinkState::GenerateOrderings(partitions, lhs_orders, op.lhs_partitions, op.lhs_orders,
220
- partition_stats);
221
-
222
- // We sort the row numbers of the incoming block, not the rows
223
- lhs_layout.Initialize({LogicalType::UINTEGER});
224
- lhs_sorted.Initialize(allocator, lhs_layout.GetTypes());
225
-
226
- lhs_keys.Initialize(allocator, op.join_key_types);
227
- for (const auto &cond : op.conditions) {
228
- lhs_executor.AddExpression(*cond.left);
229
- }
230
-
231
- group_payload.Initialize(allocator, op.children[1]->types);
232
- rhs_payload.Initialize(allocator, op.children[1]->types);
233
-
234
- lhs_matched.Initialize();
235
- lhs_sel.Initialize();
236
- left_outer.Initialize(STANDARD_VECTOR_SIZE);
237
- }
234
+ optional_ptr<PartitionLocalSinkState> lhs_partition_sink;
235
+ };
238
236
 
239
- void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
237
+ bool AsOfLocalState::Sink(DataChunk &input) {
240
238
  // Compute the join keys
241
239
  lhs_keys.Reset();
242
240
  lhs_executor.Execute(input, lhs_keys);
243
241
 
244
- // Extract the NULLs
242
+ // Combine the NULLs
245
243
  const auto count = input.size();
246
244
  lhs_valid_mask.Reset();
247
245
  for (auto col_idx : op.null_sensitive) {
@@ -251,17 +249,19 @@ void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
251
249
  lhs_valid_mask.Combine(unified.validity, count);
252
250
  }
253
251
 
254
- // Convert the mask to a selection vector.
255
- // We need this anyway for sorting
256
- lhs_valid = 0;
252
+ // Convert the mask to a selection vector
253
+ // and mark all the rows that cannot match for early return.
254
+ idx_t lhs_valid = 0;
257
255
  const auto entry_count = lhs_valid_mask.EntryCount(count);
258
256
  idx_t base_idx = 0;
257
+ left_outer.Reset();
259
258
  for (idx_t entry_idx = 0; entry_idx < entry_count;) {
260
259
  const auto validity_entry = lhs_valid_mask.GetValidityEntry(entry_idx++);
261
260
  const auto next = MinValue<idx_t>(base_idx + ValidityMask::BITS_PER_VALUE, count);
262
261
  if (ValidityMask::AllValid(validity_entry)) {
263
262
  for (; base_idx < next; ++base_idx) {
264
263
  lhs_sel.set_index(lhs_valid++, base_idx);
264
+ left_outer.SetMatch(base_idx);
265
265
  }
266
266
  } else if (ValidityMask::NoneValid(validity_entry)) {
267
267
  base_idx = next;
@@ -270,120 +270,237 @@ void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
270
270
  for (; base_idx < next; ++base_idx) {
271
271
  if (ValidityMask::RowIsValid(validity_entry, base_idx - start)) {
272
272
  lhs_sel.set_index(lhs_valid++, base_idx);
273
+ left_outer.SetMatch(base_idx);
273
274
  }
274
275
  }
275
276
  }
276
277
  }
277
278
 
278
279
  // Slice the keys to the ones we can match
279
- if (lhs_valid < count) {
280
- lhs_keys.Slice(lhs_sel, lhs_valid);
280
+ lhs_payload.Reset();
281
+ if (lhs_valid == count) {
282
+ lhs_payload.Reference(input);
283
+ lhs_payload.SetCardinality(input);
284
+ } else {
285
+ lhs_payload.Slice(input, lhs_sel, lhs_valid);
286
+ lhs_payload.SetCardinality(lhs_valid);
287
+
288
+ // Flush the ones that can't match
289
+ fetch_next_left = false;
281
290
  }
282
291
 
283
- // Hash to assign the partitions
284
- auto &global_partition = op.sink_state->Cast<AsOfGlobalSinkState>().global_partition;
285
- if (op.lhs_partitions.empty()) {
286
- // Only one hash group
287
- bin_vector.Reference(Value::HASH(0));
288
- } else {
289
- // Hash to determine the partitions.
290
- VectorOperations::Hash(lhs_keys.data[0], hash_vector, lhs_sel, lhs_valid);
291
- for (size_t prt_idx = 1; prt_idx < op.lhs_partitions.size(); ++prt_idx) {
292
- VectorOperations::CombineHash(hash_vector, lhs_keys.data[prt_idx], lhs_sel, lhs_valid);
292
+ lhs_partition_sink->Sink(lhs_payload);
293
+
294
+ return false;
295
+ }
296
+
297
+ OperatorResultType AsOfLocalState::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk) {
298
+ input.Verify();
299
+ Sink(input);
300
+
301
+ // If there were any unmatchable rows, return them now so we can forget about them.
302
+ if (!fetch_next_left) {
303
+ fetch_next_left = true;
304
+ left_outer.ConstructLeftJoinResult(input, chunk);
305
+ left_outer.Reset();
306
+ }
307
+
308
+ // Just keep asking for data and buffering it
309
+ return OperatorResultType::NEED_MORE_INPUT;
310
+ }
311
+
312
+ OperatorResultType PhysicalAsOfJoin::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
313
+ GlobalOperatorState &gstate, OperatorState &lstate_p) const {
314
+ auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
315
+ auto &lstate = lstate_p.Cast<AsOfLocalState>();
316
+
317
+ if (gsink.rhs_sink.count == 0) {
318
+ // empty RHS
319
+ if (!EmptyResultIfRHSIsEmpty()) {
320
+ ConstructEmptyJoinResult(join_type, gsink.has_null, input, chunk);
321
+ return OperatorResultType::NEED_MORE_INPUT;
322
+ } else {
323
+ return OperatorResultType::FINISHED;
293
324
  }
325
+ }
326
+
327
+ return lstate.ExecuteInternal(context, input, chunk);
328
+ }
294
329
 
295
- // Convert hashes to hash groups
296
- const auto radix_bits = global_partition.grouping_data->GetRadixBits();
297
- RadixPartitioning::HashesToBins(hash_vector, radix_bits, bin_vector, count);
330
+ //===--------------------------------------------------------------------===//
331
+ // Source
332
+ //===--------------------------------------------------------------------===//
333
+ class AsOfProbeBuffer {
334
+ public:
335
+ using Orders = vector<BoundOrderByNode>;
336
+
337
+ static bool IsExternal(ClientContext &context) {
338
+ return ClientConfig::GetConfig(context).force_external;
298
339
  }
299
340
 
300
- // Sort the selection vector on the valid keys
301
- lhs_global_state = make_uniq<GlobalSortState>(buffer_manager, lhs_orders, lhs_layout);
302
- auto &global_state = *lhs_global_state;
303
- LocalSortState local_sort;
304
- local_sort.Initialize(*lhs_global_state, buffer_manager);
341
+ AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin &op);
305
342
 
306
- DataChunk payload_chunk;
307
- payload_chunk.InitializeEmpty({LogicalType::UINTEGER});
308
- FlatVector::SetData(payload_chunk.data[0], data_ptr_cast(lhs_sel.data()));
309
- payload_chunk.SetCardinality(lhs_valid);
310
- local_sort.SinkChunk(lhs_keys, payload_chunk);
343
+ public:
344
+ void ResolveJoin(bool *found_matches, idx_t *matches = nullptr);
345
+ bool Scanning() const {
346
+ return lhs_scanner.get();
347
+ }
348
+ void BeginLeftScan(hash_t scan_bin);
349
+ bool NextLeft();
350
+ void EndScan();
351
+
352
+ // resolve joins that output max N elements (SEMI, ANTI, MARK)
353
+ void ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk);
354
+ // resolve joins that can potentially output N*M elements (INNER, LEFT, FULL)
355
+ void ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk);
356
+ // Chunk may be empty
357
+ void GetData(ExecutionContext &context, DataChunk &chunk);
358
+ bool HasMoreData() const {
359
+ return !fetch_next_left || (lhs_scanner && lhs_scanner->Remaining());
360
+ }
361
+
362
+ ClientContext &context;
363
+ Allocator &allocator;
364
+ const PhysicalAsOfJoin &op;
365
+ BufferManager &buffer_manager;
366
+ const bool force_external;
367
+ const idx_t memory_per_thread;
368
+ Orders lhs_orders;
369
+
370
+ // LHS scanning
371
+ SelectionVector lhs_sel;
372
+ optional_ptr<PartitionGlobalHashGroup> left_hash;
373
+ OuterJoinMarker left_outer;
374
+ unique_ptr<SBIterator> left_itr;
375
+ unique_ptr<PayloadScanner> lhs_scanner;
376
+ DataChunk lhs_payload;
377
+
378
+ // RHS scanning
379
+ optional_ptr<PartitionGlobalHashGroup> right_hash;
380
+ optional_ptr<OuterJoinMarker> right_outer;
381
+ unique_ptr<SBIterator> right_itr;
382
+ unique_ptr<PayloadScanner> rhs_scanner;
383
+ DataChunk rhs_payload;
384
+
385
+ idx_t lhs_match_count;
386
+ bool fetch_next_left;
387
+ };
388
+
389
+ AsOfProbeBuffer::AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin &op)
390
+ : context(context), allocator(Allocator::Get(context)), op(op),
391
+ buffer_manager(BufferManager::GetBufferManager(context)), force_external(IsExternal(context)),
392
+ memory_per_thread(op.GetMaxThreadMemory(context)), left_outer(IsLeftOuterJoin(op.join_type)),
393
+ fetch_next_left(true) {
394
+ vector<unique_ptr<BaseStatistics>> partition_stats;
395
+ Orders partitions; // Not used.
396
+ PartitionGlobalSinkState::GenerateOrderings(partitions, lhs_orders, op.lhs_partitions, op.lhs_orders,
397
+ partition_stats);
398
+
399
+ // We sort the row numbers of the incoming block, not the rows
400
+ lhs_payload.Initialize(allocator, op.children[0]->types);
401
+ rhs_payload.Initialize(allocator, op.children[1]->types);
311
402
 
312
- // Set external (can be forced with the PRAGMA)
313
- global_state.external = force_external;
314
- global_state.AddLocalState(local_sort);
315
- global_state.PrepareMergePhase();
316
- while (global_state.sorted_blocks.size() > 1) {
317
- MergeSorter merge_sorter(*lhs_global_state, buffer_manager);
318
- merge_sorter.PerformInMergeRound();
319
- global_state.CompleteMergeRound();
403
+ lhs_sel.Initialize();
404
+ left_outer.Initialize(STANDARD_VECTOR_SIZE);
405
+ }
406
+
407
+ void AsOfProbeBuffer::BeginLeftScan(hash_t scan_bin) {
408
+ auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
409
+ auto &lhs_sink = *gsink.lhs_sink;
410
+ const auto left_group = lhs_sink.bin_groups[scan_bin];
411
+ if (left_group >= lhs_sink.bin_groups.size()) {
412
+ return;
320
413
  }
321
414
 
322
- // Scan the sorted selection
323
- D_ASSERT(global_state.sorted_blocks.size() == 1);
415
+ auto iterator_comp = ExpressionType::INVALID;
416
+ switch (op.comparison_type) {
417
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
418
+ iterator_comp = ExpressionType::COMPARE_LESSTHANOREQUALTO;
419
+ break;
420
+ case ExpressionType::COMPARE_GREATERTHAN:
421
+ iterator_comp = ExpressionType::COMPARE_LESSTHAN;
422
+ break;
423
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
424
+ iterator_comp = ExpressionType::COMPARE_GREATERTHANOREQUALTO;
425
+ break;
426
+ case ExpressionType::COMPARE_LESSTHAN:
427
+ iterator_comp = ExpressionType::COMPARE_GREATERTHAN;
428
+ break;
429
+ default:
430
+ throw NotImplementedException("Unsupported comparison type for ASOF join");
431
+ }
324
432
 
325
- auto scanner = make_uniq<PayloadScanner>(*global_state.sorted_blocks[0]->payload_data, global_state, false);
326
- lhs_sorted.Reset();
327
- scanner->Scan(lhs_sorted);
433
+ left_hash = lhs_sink.hash_groups[left_group].get();
434
+ auto &left_sort = *(left_hash->global_sort);
435
+ lhs_scanner = make_uniq<PayloadScanner>(left_sort, false);
436
+ left_itr = make_uniq<SBIterator>(left_sort, iterator_comp);
437
+
438
+ // We are only probing the corresponding right side bin, which may be empty
439
+ // If they are empty, we leave the iterator as null so we can emit left matches
440
+ auto &rhs_sink = gsink.rhs_sink;
441
+ const auto right_group = rhs_sink.bin_groups[scan_bin];
442
+ if (right_group < rhs_sink.bin_groups.size()) {
443
+ right_hash = rhs_sink.hash_groups[right_group].get();
444
+ right_outer = gsink.right_outers.data() + right_group;
445
+ auto &right_sort = *(right_hash->global_sort);
446
+ right_itr = make_uniq<SBIterator>(right_sort, iterator_comp);
447
+ rhs_scanner = make_uniq<PayloadScanner>(right_sort, false);
448
+ }
328
449
  }
329
450
 
330
- void AsOfLocalState::ResolveJoin(DataChunk &input, bool *found_match, std::pair<hash_t, idx_t> *matches) {
331
- // Sort the input into lhs_payload, radix keys in lhs_global_state
332
- ResolveJoinKeys(input);
451
+ bool AsOfProbeBuffer::NextLeft() {
452
+ if (!HasMoreData()) {
453
+ return false;
454
+ }
333
455
 
334
- auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
335
- auto &global_partition = gsink.global_partition;
456
+ // Scan the next sorted chunk
457
+ lhs_payload.Reset();
458
+ left_itr->SetIndex(lhs_scanner->Scanned());
459
+ lhs_scanner->Scan(lhs_payload);
336
460
 
337
- // The bins are contiguous from sorting, so load them one at a time
338
- // But they may be constant, so unify.
339
- UnifiedVectorFormat bin_unified;
340
- bin_vector.ToUnifiedFormat(lhs_valid, bin_unified);
341
- const auto bins = UnifiedVectorFormat::GetData<hash_t>(bin_unified);
461
+ return true;
462
+ }
342
463
 
343
- hash_t prev_bin = global_partition.bin_groups.size();
344
- optional_ptr<PartitionGlobalHashGroup> hash_group;
345
- optional_ptr<OuterJoinMarker> right_outer;
346
- // Searching for right <= left
347
- SBIterator left(*lhs_global_state, ExpressionType::COMPARE_LESSTHANOREQUALTO);
348
- unique_ptr<SBIterator> right;
464
+ void AsOfProbeBuffer::EndScan() {
465
+ right_hash = nullptr;
466
+ right_itr.reset();
467
+ rhs_scanner.reset();
468
+ right_outer = nullptr;
469
+
470
+ left_hash = nullptr;
471
+ left_itr.reset();
472
+ lhs_scanner.reset();
473
+ }
474
+
475
+ void AsOfProbeBuffer::ResolveJoin(bool *found_match, idx_t *matches) {
476
+ // If there was no right partition, there are no matches
349
477
  lhs_match_count = 0;
350
- const auto sorted_sel = FlatVector::GetData<sel_t>(lhs_sorted.data[0]);
351
- for (idx_t i = 0; i < lhs_valid; ++i) {
352
- // idx is the index in the input; i is the index in the sorted keys
353
- const auto idx = sorted_sel[i];
354
- const auto curr_bin = bins[bin_unified.sel->get_index(idx)];
355
- if (!hash_group || curr_bin != prev_bin) {
356
- // Grab the next group
357
- prev_bin = curr_bin;
358
- const auto group_idx = global_partition.bin_groups[curr_bin];
359
- if (group_idx >= global_partition.hash_groups.size()) {
360
- // No matching partition
361
- hash_group = nullptr;
362
- right_outer = nullptr;
363
- right.reset();
364
- continue;
365
- }
366
- hash_group = global_partition.hash_groups[group_idx].get();
367
- right_outer = gsink.right_outers.data() + group_idx;
368
- right = make_uniq<SBIterator>(*(hash_group->global_sort), ExpressionType::COMPARE_LESSTHANOREQUALTO);
369
- }
370
- left.SetIndex(i);
478
+ left_outer.Reset();
479
+ if (!right_itr) {
480
+ return;
481
+ }
482
+
483
+ const auto count = lhs_payload.size();
484
+ const auto left_base = left_itr->GetIndex();
485
+ // Searching for right <= left
486
+ for (idx_t i = 0; i < count; ++i) {
487
+ left_itr->SetIndex(left_base + i);
371
488
 
372
489
  // If right > left, then there is no match
373
- if (!right->Compare(left)) {
490
+ if (!right_itr->Compare(*left_itr)) {
374
491
  continue;
375
492
  }
376
493
 
377
494
  // Exponential search forward for a non-matching value using radix iterators
378
495
  // (We use exponential search to avoid thrashing the block manager on large probes)
379
496
  idx_t bound = 1;
380
- idx_t begin = right->GetIndex();
381
- right->SetIndex(begin + bound);
382
- while (right->GetIndex() < hash_group->count) {
383
- if (right->Compare(left)) {
497
+ idx_t begin = right_itr->GetIndex();
498
+ right_itr->SetIndex(begin + bound);
499
+ while (right_itr->GetIndex() < right_hash->count) {
500
+ if (right_itr->Compare(*left_itr)) {
384
501
  // If right <= left, jump ahead
385
502
  bound *= 2;
386
- right->SetIndex(begin + bound);
503
+ right_itr->SetIndex(begin + bound);
387
504
  } else {
388
505
  break;
389
506
  }
@@ -392,255 +509,298 @@ void AsOfLocalState::ResolveJoin(DataChunk &input, bool *found_match, std::pair<
392
509
  // Binary search for the first non-matching value using radix iterators
393
510
  // The previous value (which we know exists) is the match
394
511
  auto first = begin + bound / 2;
395
- auto last = MinValue<idx_t>(begin + bound, hash_group->count);
512
+ auto last = MinValue<idx_t>(begin + bound, right_hash->count);
396
513
  while (first < last) {
397
514
  const auto mid = first + (last - first) / 2;
398
- right->SetIndex(mid);
399
- if (right->Compare(left)) {
515
+ right_itr->SetIndex(mid);
516
+ if (right_itr->Compare(*left_itr)) {
400
517
  // If right <= left, new lower bound
401
518
  first = mid + 1;
402
519
  } else {
403
520
  last = mid;
404
521
  }
405
522
  }
406
- right->SetIndex(--first);
523
+ right_itr->SetIndex(--first);
407
524
 
408
525
  // Check partitions for strict equality
409
- if (!op.lhs_partitions.empty() && hash_group->ComparePartitions(left, *right)) {
526
+ if (right_hash->ComparePartitions(*left_itr, *right_itr)) {
410
527
  continue;
411
528
  }
412
529
 
413
530
  // Emit match data
414
531
  right_outer->SetMatch(first);
415
- left_outer.SetMatch(idx);
532
+ left_outer.SetMatch(i);
416
533
  if (found_match) {
417
- found_match[idx] = true;
534
+ found_match[i] = true;
418
535
  }
419
536
  if (matches) {
420
- matches[idx] = Match(curr_bin, first);
537
+ matches[i] = first;
421
538
  }
422
- lhs_matched.set_index(lhs_match_count++, idx);
539
+ lhs_sel.set_index(lhs_match_count++, i);
423
540
  }
424
541
  }
425
542
 
426
543
  unique_ptr<OperatorState> PhysicalAsOfJoin::GetOperatorState(ExecutionContext &context) const {
427
- auto &config = ClientConfig::GetConfig(context.client);
428
- return make_uniq<AsOfLocalState>(context.client, *this, config.force_external);
544
+ return make_uniq<AsOfLocalState>(context.client, *this);
429
545
  }
430
546
 
431
- void PhysicalAsOfJoin::ResolveSimpleJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
432
- OperatorState &lstate_p) const {
433
- auto &lstate = lstate_p.Cast<AsOfLocalState>();
434
- auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
435
-
547
+ void AsOfProbeBuffer::ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk) {
436
548
  // perform the actual join
437
549
  bool found_match[STANDARD_VECTOR_SIZE] = {false};
438
- lstate.ResolveJoin(input, found_match);
550
+ ResolveJoin(found_match);
439
551
 
440
552
  // now construct the result based on the join result
441
- switch (join_type) {
442
- case JoinType::MARK: {
443
- PhysicalJoin::ConstructMarkJoinResult(lstate.lhs_keys, input, chunk, found_match, gsink.has_null);
444
- break;
445
- }
553
+ switch (op.join_type) {
446
554
  case JoinType::SEMI:
447
- PhysicalJoin::ConstructSemiJoinResult(input, chunk, found_match);
555
+ PhysicalJoin::ConstructSemiJoinResult(lhs_payload, chunk, found_match);
448
556
  break;
449
557
  case JoinType::ANTI:
450
- PhysicalJoin::ConstructAntiJoinResult(input, chunk, found_match);
558
+ PhysicalJoin::ConstructAntiJoinResult(lhs_payload, chunk, found_match);
451
559
  break;
452
560
  default:
453
561
  throw NotImplementedException("Unimplemented join type for AsOf join");
454
562
  }
455
563
  }
456
564
 
457
- OperatorResultType PhysicalAsOfJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
458
- OperatorState &lstate_p) const {
459
- auto &lstate = lstate_p.Cast<AsOfLocalState>();
460
- auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
461
-
462
- if (!lstate.fetch_next_left) {
463
- lstate.fetch_next_left = true;
464
- if (lstate.left_outer.Enabled()) {
465
- // left join: before we move to the next chunk, see if we need to output any vectors that didn't
466
- // have a match found
467
- lstate.left_outer.ConstructLeftJoinResult(input, chunk);
468
- lstate.left_outer.Reset();
469
- }
470
- return OperatorResultType::NEED_MORE_INPUT;
471
- }
472
-
565
+ void AsOfProbeBuffer::ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk) {
473
566
  // perform the actual join
474
- AsOfLocalState::Match matches[STANDARD_VECTOR_SIZE];
475
- lstate.ResolveJoin(input, nullptr, matches);
476
- lstate.group_payload.Reset();
477
- lstate.rhs_payload.Reset();
478
-
479
- auto &global_partition = gsink.global_partition;
480
- hash_t scan_bin = global_partition.bin_groups.size();
481
- optional_ptr<PartitionGlobalHashGroup> hash_group;
482
- unique_ptr<PayloadScanner> scanner;
483
- for (idx_t i = 0; i < lstate.lhs_match_count; ++i) {
484
- const auto idx = lstate.lhs_matched[i];
485
- const auto match_bin = matches[idx].first;
486
- const auto match_pos = matches[idx].second;
487
- if (match_bin != scan_bin) {
488
- // Grab the next group
489
- const auto group_idx = global_partition.bin_groups[match_bin];
490
- hash_group = global_partition.hash_groups[group_idx].get();
491
- scan_bin = match_bin;
492
- scanner = make_uniq<PayloadScanner>(*hash_group->global_sort, false);
493
- lstate.group_payload.Reset();
494
- }
567
+ idx_t matches[STANDARD_VECTOR_SIZE];
568
+ ResolveJoin(nullptr, matches);
569
+
570
+ for (idx_t i = 0; i < lhs_match_count; ++i) {
571
+ const auto idx = lhs_sel[i];
572
+ const auto match_pos = matches[idx];
495
573
  // Skip to the range containing the match
496
- while (match_pos >= scanner->Scanned()) {
497
- lstate.group_payload.Reset();
498
- scanner->Scan(lstate.group_payload);
574
+ while (match_pos >= rhs_scanner->Scanned()) {
575
+ rhs_payload.Reset();
576
+ rhs_scanner->Scan(rhs_payload);
499
577
  }
500
578
  // Append the individual values
501
579
  // TODO: Batch the copies
502
- const auto source_offset = match_pos - (scanner->Scanned() - lstate.group_payload.size());
503
- for (idx_t col_idx = 0; col_idx < right_projection_map.size(); ++col_idx) {
504
- const auto rhs_idx = right_projection_map[col_idx];
505
- auto &source = lstate.group_payload.data[rhs_idx];
506
- auto &target = chunk.data[input.ColumnCount() + col_idx];
580
+ const auto source_offset = match_pos - (rhs_scanner->Scanned() - rhs_payload.size());
581
+ for (column_t col_idx = 0; col_idx < op.right_projection_map.size(); ++col_idx) {
582
+ const auto rhs_idx = op.right_projection_map[col_idx];
583
+ auto &source = rhs_payload.data[rhs_idx];
584
+ auto &target = chunk.data[lhs_payload.ColumnCount() + col_idx];
507
585
  VectorOperations::Copy(source, target, source_offset + 1, source_offset, i);
508
586
  }
509
587
  }
510
588
 
511
- // Slice the input into the left side
512
- chunk.Slice(input, lstate.lhs_matched, lstate.lhs_match_count);
513
-
514
- // If we are doing a left join, come back for the NULLs
515
- if (lstate.left_outer.Enabled()) {
516
- lstate.fetch_next_left = false;
517
- return OperatorResultType::HAVE_MORE_OUTPUT;
589
+ // Slice the left payload into the result
590
+ for (column_t i = 0; i < lhs_payload.ColumnCount(); ++i) {
591
+ chunk.data[i].Slice(lhs_payload.data[i], lhs_sel, lhs_match_count);
518
592
  }
593
+ chunk.SetCardinality(lhs_match_count);
519
594
 
520
- return OperatorResultType::NEED_MORE_INPUT;
595
+ // If we are doing a left join, come back for the NULLs
596
+ fetch_next_left = !left_outer.Enabled();
521
597
  }
522
598
 
523
- OperatorResultType PhysicalAsOfJoin::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
524
- GlobalOperatorState &gstate, OperatorState &lstate) const {
525
- auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
526
-
527
- if (gsink.global_partition.count == 0) {
528
- // empty RHS
529
- if (!EmptyResultIfRHSIsEmpty()) {
530
- ConstructEmptyJoinResult(join_type, gsink.has_null, input, chunk);
531
- return OperatorResultType::NEED_MORE_INPUT;
532
- } else {
533
- return OperatorResultType::FINISHED;
599
+ void AsOfProbeBuffer::GetData(ExecutionContext &context, DataChunk &chunk) {
600
+ // Handle dangling left join results from current chunk
601
+ if (!fetch_next_left) {
602
+ fetch_next_left = true;
603
+ if (left_outer.Enabled()) {
604
+ // left join: before we move to the next chunk, see if we need to output any vectors that didn't
605
+ // have a match found
606
+ left_outer.ConstructLeftJoinResult(lhs_payload, chunk);
607
+ left_outer.Reset();
534
608
  }
609
+ return;
535
610
  }
536
611
 
537
- input.Verify();
538
- switch (join_type) {
612
+ // Stop if there is no more data
613
+ if (!NextLeft()) {
614
+ return;
615
+ }
616
+
617
+ switch (op.join_type) {
539
618
  case JoinType::SEMI:
540
619
  case JoinType::ANTI:
541
620
  case JoinType::MARK:
542
621
  // simple joins can have max STANDARD_VECTOR_SIZE matches per chunk
543
- ResolveSimpleJoin(context, input, chunk, lstate);
544
- return OperatorResultType::NEED_MORE_INPUT;
622
+ ResolveSimpleJoin(context, chunk);
623
+ break;
545
624
  case JoinType::LEFT:
546
625
  case JoinType::INNER:
547
626
  case JoinType::RIGHT:
548
627
  case JoinType::OUTER:
549
- return ResolveComplexJoin(context, input, chunk, lstate);
628
+ ResolveComplexJoin(context, chunk);
629
+ break;
550
630
  default:
551
631
  throw NotImplementedException("Unimplemented type for as-of join!");
552
632
  }
553
633
  }
554
634
 
555
- //===--------------------------------------------------------------------===//
556
- // Source
557
- //===--------------------------------------------------------------------===//
558
635
  class AsOfGlobalSourceState : public GlobalSourceState {
559
636
  public:
560
- explicit AsOfGlobalSourceState(PartitionGlobalSinkState &gsink_p) : gsink(gsink_p), next_bin(0) {
637
+ explicit AsOfGlobalSourceState(AsOfGlobalSinkState &gsink_p)
638
+ : gsink(gsink_p), next_combine(0), combined(0), merged(0), mergers(0), next_left(0), flushed(0), next_right(0) {
561
639
  }
562
640
 
563
- PartitionGlobalSinkState &gsink;
564
- //! The output read position.
565
- atomic<idx_t> next_bin;
566
-
567
- public:
568
- idx_t MaxThreads() override {
569
- // If there is only one partition, we have to process it on one thread.
570
- if (!gsink.grouping_data) {
571
- return 1;
641
+ PartitionGlobalMergeStates &GetMergeStates() {
642
+ lock_guard<mutex> guard(lock);
643
+ if (!merge_states) {
644
+ merge_states = make_uniq<PartitionGlobalMergeStates>(*gsink.lhs_sink);
572
645
  }
646
+ return *merge_states;
647
+ }
573
648
 
574
- // If there is not a lot of data, process serially.
575
- if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
576
- return 1;
577
- }
649
+ AsOfGlobalSinkState &gsink;
650
+ //! The next buffer to combine
651
+ atomic<size_t> next_combine;
652
+ //! The number of combined buffers
653
+ atomic<size_t> combined;
654
+ //! The number of combined buffers
655
+ atomic<size_t> merged;
656
+ //! The number of combined buffers
657
+ atomic<size_t> mergers;
658
+ //! The next buffer to flush
659
+ atomic<size_t> next_left;
660
+ //! The number of flushed buffers
661
+ atomic<size_t> flushed;
662
+ //! The right outer output read position.
663
+ atomic<idx_t> next_right;
664
+ //! The merge handler
665
+ mutex lock;
666
+ unique_ptr<PartitionGlobalMergeStates> merge_states;
578
667
 
579
- return gsink.hash_groups.size();
668
+ public:
669
+ idx_t MaxThreads() override {
670
+ return gsink.lhs_buffers.size();
580
671
  }
581
672
  };
582
673
 
583
674
  unique_ptr<GlobalSourceState> PhysicalAsOfJoin::GetGlobalSourceState(ClientContext &context) const {
584
675
  auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
585
- return make_uniq<AsOfGlobalSourceState>(gsink.global_partition);
676
+ return make_uniq<AsOfGlobalSourceState>(gsink);
586
677
  }
587
678
 
588
679
  class AsOfLocalSourceState : public LocalSourceState {
589
680
  public:
590
681
  using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
591
682
 
592
- explicit AsOfLocalSourceState(AsOfGlobalSinkState &gstate_p);
683
+ AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op);
684
+
685
+ void CombineLeftPartitions();
686
+ void MergeLeftPartitions();
593
687
 
594
- idx_t GeneratePartition(const idx_t hash_bin);
688
+ idx_t BeginRightScan(const idx_t hash_bin);
595
689
 
596
- AsOfGlobalSinkState &gstate;
690
+ AsOfGlobalSourceState &gsource;
691
+
692
+ //! The left side partition being probed
693
+ AsOfProbeBuffer probe_buffer;
597
694
 
598
695
  //! The read partition
599
696
  idx_t hash_bin;
600
697
  HashGroupPtr hash_group;
601
-
602
698
  //! The read cursor
603
699
  unique_ptr<PayloadScanner> scanner;
604
- //! Buffer for the inputs
605
- DataChunk input_chunk;
606
700
  //! Pointer to the matches
607
- const bool *found_match;
701
+ const bool *found_match = {};
608
702
  };
609
703
 
610
- AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSinkState &gstate_p) : gstate(gstate_p) {
611
- input_chunk.Initialize(gstate.global_partition.allocator, gstate.global_partition.payload_types);
704
+ AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op)
705
+ : gsource(gsource), probe_buffer(gsource.gsink.lhs_sink->context, op) {
706
+ gsource.mergers++;
707
+ }
708
+
709
+ void AsOfLocalSourceState::CombineLeftPartitions() {
710
+ const auto buffer_count = gsource.gsink.lhs_buffers.size();
711
+ while (gsource.combined < buffer_count) {
712
+ const auto next_combine = gsource.next_combine++;
713
+ if (next_combine < buffer_count) {
714
+ gsource.gsink.lhs_buffers[next_combine]->Combine();
715
+ ++gsource.combined;
716
+ } else {
717
+ std::this_thread::yield();
718
+ }
719
+ }
720
+ }
721
+
722
+ void AsOfLocalSourceState::MergeLeftPartitions() {
723
+ PartitionGlobalMergeStates::Callback local_callback;
724
+ PartitionLocalMergeState local_merge;
725
+ gsource.GetMergeStates().ExecuteTask(local_merge, local_callback);
726
+ gsource.merged++;
727
+ while (gsource.merged < gsource.mergers) {
728
+ std::this_thread::yield();
729
+ }
612
730
  }
613
731
 
614
- idx_t AsOfLocalSourceState::GeneratePartition(const idx_t hash_bin_p) {
615
- // Get rid of any stale data
732
+ idx_t AsOfLocalSourceState::BeginRightScan(const idx_t hash_bin_p) {
616
733
  hash_bin = hash_bin_p;
617
734
 
618
- hash_group = std::move(gstate.global_partition.hash_groups[hash_bin]);
735
+ hash_group = std::move(gsource.gsink.rhs_sink.hash_groups[hash_bin]);
619
736
  scanner = make_uniq<PayloadScanner>(*hash_group->global_sort);
620
- found_match = gstate.right_outers[hash_bin].GetMatches();
737
+ found_match = gsource.gsink.right_outers[hash_bin].GetMatches();
621
738
 
622
739
  return scanner->Remaining();
623
740
  }
624
741
 
625
742
  unique_ptr<LocalSourceState> PhysicalAsOfJoin::GetLocalSourceState(ExecutionContext &context,
626
743
  GlobalSourceState &gstate) const {
627
- auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
628
- return make_uniq<AsOfLocalSourceState>(gsink);
744
+ auto &gsource = gstate.Cast<AsOfGlobalSourceState>();
745
+ return make_uniq<AsOfLocalSourceState>(gsource, *this);
629
746
  }
630
747
 
631
748
  SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk &chunk,
632
749
  OperatorSourceInput &input) const {
633
- D_ASSERT(IsRightOuterJoin(join_type));
634
-
635
750
  auto &gsource = input.global_state.Cast<AsOfGlobalSourceState>();
636
751
  auto &lsource = input.local_state.Cast<AsOfLocalSourceState>();
637
- auto &gsink = gsource.gsink;
752
+ auto &rhs_sink = gsource.gsink.rhs_sink;
753
+
754
+ // Step 1: Combine the partitions
755
+ lsource.CombineLeftPartitions();
756
+
757
+ // Step 2: Sort on all threads
758
+ lsource.MergeLeftPartitions();
759
+
760
+ // Step 3: Join the partitions
761
+ auto &lhs_sink = *gsource.gsink.lhs_sink;
762
+ auto &partitions = lhs_sink.grouping_data->GetPartitions();
763
+ const auto left_bins = partitions.size();
764
+ while (gsource.flushed < left_bins) {
765
+ // Make sure we have something to flush
766
+ if (!lsource.probe_buffer.Scanning()) {
767
+ const auto left_bin = gsource.next_left++;
768
+ if (left_bin < left_bins) {
769
+ // More to flush
770
+ lsource.probe_buffer.BeginLeftScan(left_bin);
771
+ } else if (!IsRightOuterJoin(join_type)) {
772
+ return SourceResultType::FINISHED;
773
+ } else {
774
+ // Wait for all threads to finish
775
+ // TODO: How to implement a spin wait correctly?
776
+ // Returning BLOCKED seems to hang the system.
777
+ std::this_thread::yield();
778
+ continue;
779
+ }
780
+ }
781
+
782
+ lsource.probe_buffer.GetData(context, chunk);
783
+ if (chunk.size()) {
784
+ return SourceResultType::HAVE_MORE_OUTPUT;
785
+ } else if (lsource.probe_buffer.HasMoreData()) {
786
+ // Join the next partition
787
+ continue;
788
+ } else {
789
+ lsource.probe_buffer.EndScan();
790
+ gsource.flushed++;
791
+ }
792
+ }
793
+
794
+ // Step 4: Emit right join matches
795
+ if (!IsRightOuterJoin(join_type)) {
796
+ return SourceResultType::FINISHED;
797
+ }
638
798
 
639
- auto &hash_groups = gsink.hash_groups;
640
- const auto bin_count = hash_groups.size();
799
+ auto &hash_groups = rhs_sink.hash_groups;
800
+ const auto right_groups = hash_groups.size();
641
801
 
642
802
  DataChunk rhs_chunk;
643
- rhs_chunk.Initialize(Allocator::Get(context.client), gsink.payload_types);
803
+ rhs_chunk.Initialize(Allocator::Get(context.client), rhs_sink.payload_types);
644
804
  SelectionVector rsel(STANDARD_VECTOR_SIZE);
645
805
 
646
806
  while (chunk.size() == 0) {
@@ -648,17 +808,17 @@ SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk
648
808
  while (!lsource.scanner || !lsource.scanner->Remaining()) {
649
809
  lsource.scanner.reset();
650
810
  lsource.hash_group.reset();
651
- auto hash_bin = gsource.next_bin++;
652
- if (hash_bin >= bin_count) {
811
+ auto hash_bin = gsource.next_right++;
812
+ if (hash_bin >= right_groups) {
653
813
  return SourceResultType::FINISHED;
654
814
  }
655
815
 
656
- for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_bin++) {
816
+ for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_right++) {
657
817
  if (hash_groups[hash_bin]) {
658
818
  break;
659
819
  }
660
820
  }
661
- lsource.GeneratePartition(hash_bin);
821
+ lsource.BeginRightScan(hash_bin);
662
822
  }
663
823
  const auto rhs_position = lsource.scanner->Scanned();
664
824
  lsource.scanner->Scan(rhs_chunk);