duckdb 0.8.2-dev37.0 → 0.8.2-dev3989.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1087) hide show
  1. package/README.md +7 -0
  2. package/binding.gyp +29 -13
  3. package/binding.gyp.in +1 -1
  4. package/configure.py +11 -3
  5. package/duckdb_extension_config.cmake +10 -0
  6. package/package.json +1 -1
  7. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  8. package/src/duckdb/extension/icu/icu-datefunc.cpp +10 -1
  9. package/src/duckdb/extension/icu/icu-datepart.cpp +162 -41
  10. package/src/duckdb/extension/icu/icu-datesub.cpp +3 -2
  11. package/src/duckdb/extension/icu/icu-datetrunc.cpp +2 -1
  12. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  13. package/src/duckdb/extension/icu/icu-makedate.cpp +19 -6
  14. package/src/duckdb/extension/icu/icu-strptime.cpp +5 -24
  15. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  16. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  17. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  18. package/src/duckdb/extension/icu/icu_extension.cpp +10 -12
  19. package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -0
  20. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +5 -19
  21. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  22. package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
  23. package/src/duckdb/extension/json/include/json_enums.hpp +60 -0
  24. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  25. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  26. package/src/duckdb/extension/json/include/json_scan.hpp +14 -10
  27. package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
  28. package/src/duckdb/extension/json/include/json_transform.hpp +3 -0
  29. package/src/duckdb/extension/json/json_common.cpp +272 -40
  30. package/src/duckdb/extension/json/json_deserializer.cpp +37 -73
  31. package/src/duckdb/extension/json/json_enums.cpp +105 -0
  32. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
  33. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  34. package/src/duckdb/extension/json/json_functions/json_transform.cpp +93 -38
  35. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  36. package/src/duckdb/extension/json/json_functions.cpp +26 -25
  37. package/src/duckdb/extension/json/json_scan.cpp +47 -6
  38. package/src/duckdb/extension/json/json_serializer.cpp +29 -72
  39. package/src/duckdb/extension/json/serialize_json.cpp +92 -0
  40. package/src/duckdb/extension/parquet/column_reader.cpp +37 -25
  41. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  42. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  43. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  44. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  45. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  46. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  47. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  48. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  49. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +4 -0
  50. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  51. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  52. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +1 -0
  53. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +28 -5
  54. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  55. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  56. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  57. package/src/duckdb/extension/parquet/parquet_extension.cpp +258 -40
  58. package/src/duckdb/extension/parquet/parquet_reader.cpp +10 -10
  59. package/src/duckdb/extension/parquet/parquet_statistics.cpp +25 -8
  60. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +6 -0
  61. package/src/duckdb/extension/parquet/parquet_writer.cpp +149 -31
  62. package/src/duckdb/extension/parquet/serialize_parquet.cpp +26 -0
  63. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  64. package/src/duckdb/src/catalog/catalog.cpp +147 -70
  65. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +8 -11
  66. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +17 -41
  67. package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +2 -10
  68. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +4 -14
  69. package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +11 -28
  70. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +11 -42
  71. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +7 -26
  72. package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +11 -27
  73. package/src/duckdb/src/catalog/catalog_entry.cpp +25 -1
  74. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  75. package/src/duckdb/src/catalog/catalog_set.cpp +0 -63
  76. package/src/duckdb/src/catalog/default/default_functions.cpp +21 -0
  77. package/src/duckdb/src/catalog/dependency_manager.cpp +0 -36
  78. package/src/duckdb/src/common/adbc/adbc.cpp +541 -171
  79. package/src/duckdb/src/common/adbc/driver_manager.cpp +92 -39
  80. package/src/duckdb/src/common/adbc/nanoarrow/allocator.cpp +57 -0
  81. package/src/duckdb/src/common/adbc/nanoarrow/metadata.cpp +121 -0
  82. package/src/duckdb/src/common/adbc/nanoarrow/schema.cpp +474 -0
  83. package/src/duckdb/src/common/adbc/nanoarrow/single_batch_array_stream.cpp +84 -0
  84. package/src/duckdb/src/common/allocator.cpp +14 -2
  85. package/src/duckdb/src/common/arrow/appender/bool_data.cpp +44 -0
  86. package/src/duckdb/src/common/arrow/appender/list_data.cpp +78 -0
  87. package/src/duckdb/src/common/arrow/appender/map_data.cpp +86 -0
  88. package/src/duckdb/src/common/arrow/appender/struct_data.cpp +45 -0
  89. package/src/duckdb/src/common/arrow/appender/union_data.cpp +70 -0
  90. package/src/duckdb/src/common/arrow/arrow_appender.cpp +95 -666
  91. package/src/duckdb/src/common/arrow/arrow_converter.cpp +65 -37
  92. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +37 -42
  93. package/src/duckdb/src/common/assert.cpp +3 -0
  94. package/src/duckdb/src/common/constants.cpp +2 -1
  95. package/src/duckdb/src/common/enum_util.cpp +4979 -4458
  96. package/src/duckdb/src/common/enums/date_part_specifier.cpp +2 -0
  97. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  98. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  99. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  100. package/src/duckdb/src/common/exception.cpp +15 -2
  101. package/src/duckdb/src/common/extra_type_info.cpp +487 -0
  102. package/src/duckdb/src/common/field_writer.cpp +1 -1
  103. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  104. package/src/duckdb/src/common/file_system.cpp +46 -12
  105. package/src/duckdb/src/common/filename_pattern.cpp +1 -1
  106. package/src/duckdb/src/common/gzip_file_system.cpp +7 -12
  107. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  108. package/src/duckdb/src/common/http_state.cpp +78 -0
  109. package/src/duckdb/src/common/local_file_system.cpp +36 -28
  110. package/src/duckdb/src/common/multi_file_reader.cpp +193 -20
  111. package/src/duckdb/src/common/operator/cast_operators.cpp +92 -1
  112. package/src/duckdb/src/common/operator/string_cast.cpp +45 -8
  113. package/src/duckdb/src/common/radix_partitioning.cpp +34 -39
  114. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  115. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
  116. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  117. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +63 -73
  118. package/src/duckdb/src/common/serializer/binary_serializer.cpp +85 -80
  119. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +0 -9
  120. package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
  121. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  122. package/src/duckdb/src/common/sort/partition_state.cpp +102 -74
  123. package/src/duckdb/src/common/sort/sort_state.cpp +1 -1
  124. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  125. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  126. package/src/duckdb/src/common/types/bit.cpp +51 -0
  127. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  128. package/src/duckdb/src/common/types/column/column_data_collection.cpp +68 -2
  129. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +20 -6
  130. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  131. package/src/duckdb/src/common/types/data_chunk.cpp +46 -10
  132. package/src/duckdb/src/common/types/date.cpp +15 -0
  133. package/src/duckdb/src/common/types/hugeint.cpp +40 -0
  134. package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
  135. package/src/duckdb/src/common/types/interval.cpp +6 -0
  136. package/src/duckdb/src/common/types/list_segment.cpp +56 -198
  137. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +251 -131
  138. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  139. package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
  140. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
  141. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +41 -26
  142. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
  143. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  144. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
  145. package/src/duckdb/src/common/types/string_heap.cpp +4 -0
  146. package/src/duckdb/src/common/types/time.cpp +105 -0
  147. package/src/duckdb/src/common/types/timestamp.cpp +7 -0
  148. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  149. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  150. package/src/duckdb/src/common/types/value.cpp +99 -60
  151. package/src/duckdb/src/common/types/vector.cpp +73 -80
  152. package/src/duckdb/src/common/types.cpp +38 -724
  153. package/src/duckdb/src/common/virtual_file_system.cpp +142 -1
  154. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +26 -0
  155. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +5 -7
  156. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +64 -19
  157. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +30 -0
  158. package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +1 -0
  159. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +83 -59
  160. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  161. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  162. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  163. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  164. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  165. package/src/duckdb/src/core_functions/function_list.cpp +10 -4
  166. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +2 -0
  167. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +380 -89
  168. package/src/duckdb/src/core_functions/scalar/date/date_sub.cpp +2 -0
  169. package/src/duckdb/src/core_functions/scalar/date/date_trunc.cpp +4 -0
  170. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  171. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  172. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +10 -0
  173. package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
  174. package/src/duckdb/src/core_functions/scalar/enum/enum_functions.cpp +16 -12
  175. package/src/duckdb/src/core_functions/scalar/generic/current_setting.cpp +3 -1
  176. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +314 -82
  177. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  178. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +23 -3
  179. package/src/duckdb/src/core_functions/scalar/map/map_entries.cpp +2 -2
  180. package/src/duckdb/src/core_functions/scalar/string/to_base.cpp +66 -0
  181. package/src/duckdb/src/core_functions/scalar/union/union_tag.cpp +1 -1
  182. package/src/duckdb/src/execution/aggregate_hashtable.cpp +226 -346
  183. package/src/duckdb/src/execution/column_binding_resolver.cpp +10 -7
  184. package/src/duckdb/src/execution/expression_executor/execute_parameter.cpp +2 -2
  185. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  186. package/src/duckdb/src/execution/index/art/art.cpp +219 -259
  187. package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
  188. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +11 -15
  189. package/src/duckdb/src/execution/index/art/iterator.cpp +130 -214
  190. package/src/duckdb/src/execution/index/art/leaf.cpp +300 -266
  191. package/src/duckdb/src/execution/index/art/node.cpp +211 -205
  192. package/src/duckdb/src/execution/index/art/node16.cpp +10 -19
  193. package/src/duckdb/src/execution/index/art/node256.cpp +10 -18
  194. package/src/duckdb/src/execution/index/art/node4.cpp +21 -23
  195. package/src/duckdb/src/execution/index/art/node48.cpp +10 -20
  196. package/src/duckdb/src/execution/index/art/prefix.cpp +308 -338
  197. package/src/duckdb/src/execution/join_hashtable.cpp +9 -10
  198. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  199. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +250 -317
  200. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +6 -4
  201. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  202. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +231 -190
  203. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +367 -1068
  204. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +157 -174
  205. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
  206. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
  207. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
  208. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
  209. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +67 -28
  210. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
  211. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
  212. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +46 -47
  213. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
  214. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
  215. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
  216. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
  217. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
  218. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
  219. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  220. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +12 -9
  221. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +2 -2
  222. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +10 -8
  223. package/src/duckdb/src/execution/operator/helper/physical_load.cpp +2 -1
  224. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +7 -5
  225. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +3 -1
  226. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +3 -1
  227. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +7 -5
  228. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +449 -288
  229. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  230. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  231. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +13 -6
  232. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +28 -15
  233. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +35 -17
  234. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  235. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +7 -4
  236. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +31 -10
  237. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  238. package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -5
  239. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +7 -5
  240. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  241. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +14 -10
  242. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +11 -9
  243. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +9 -7
  244. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +14 -12
  245. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +11 -11
  246. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -2
  247. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  248. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +24 -27
  249. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  250. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -12
  251. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +2 -1
  252. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +198 -0
  253. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +2 -6
  254. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  255. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +16 -7
  256. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +37 -6
  257. package/src/duckdb/src/execution/physical_operator.cpp +20 -16
  258. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  259. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +57 -35
  260. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +32 -15
  261. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +45 -34
  262. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  263. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +2 -5
  264. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  265. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  266. package/src/duckdb/src/execution/physical_plan_generator.cpp +6 -11
  267. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +636 -349
  268. package/src/duckdb/src/execution/window_executor.cpp +1285 -0
  269. package/src/duckdb/src/execution/window_segment_tree.cpp +408 -144
  270. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -13
  271. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +6 -12
  272. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  273. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  274. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
  275. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  276. package/src/duckdb/src/function/cast/string_cast.cpp +2 -2
  277. package/src/duckdb/src/function/cast/time_casts.cpp +7 -6
  278. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +7 -2
  279. package/src/duckdb/src/function/function.cpp +3 -1
  280. package/src/duckdb/src/function/pragma/pragma_queries.cpp +7 -1
  281. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  282. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  283. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  284. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  285. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  286. package/src/duckdb/src/function/scalar/operators/add.cpp +9 -0
  287. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +6 -3
  288. package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
  289. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  290. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +39 -5
  291. package/src/duckdb/src/function/scalar_function.cpp +5 -20
  292. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +57 -0
  293. package/src/duckdb/src/function/table/arrow.cpp +110 -88
  294. package/src/duckdb/src/function/table/arrow_conversion.cpp +86 -73
  295. package/src/duckdb/src/function/table/copy_csv.cpp +102 -97
  296. package/src/duckdb/src/function/table/read_csv.cpp +263 -141
  297. package/src/duckdb/src/function/table/system/test_all_types.cpp +48 -21
  298. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  299. package/src/duckdb/src/function/table/table_scan.cpp +42 -0
  300. package/src/duckdb/src/function/table/version/pragma_version.cpp +49 -2
  301. package/src/duckdb/src/function/table_function.cpp +4 -3
  302. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +20 -5
  303. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +3 -3
  304. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +1 -4
  305. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -5
  306. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp +1 -6
  307. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +2 -13
  308. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +1 -4
  309. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/view_catalog_entry.hpp +2 -5
  310. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +14 -0
  311. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +0 -6
  312. package/src/duckdb/src/include/duckdb/common/adbc/adbc.h +1 -0
  313. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +4 -1
  314. package/src/duckdb/src/include/duckdb/common/adbc/single_batch_array_stream.hpp +16 -0
  315. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  316. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +109 -0
  317. package/src/duckdb/src/include/duckdb/common/arrow/appender/bool_data.hpp +15 -0
  318. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +69 -0
  319. package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +8 -0
  320. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +18 -0
  321. package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +18 -0
  322. package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +88 -0
  323. package/src/duckdb/src/include/duckdb/common/arrow/appender/struct_data.hpp +18 -0
  324. package/src/duckdb/src/include/duckdb/common/arrow/appender/union_data.hpp +21 -0
  325. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +105 -0
  326. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +9 -4
  327. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +3 -5
  328. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +5 -3
  329. package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.h +462 -0
  330. package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.hpp +14 -0
  331. package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +4 -0
  332. package/src/duckdb/src/include/duckdb/common/assert.hpp +1 -1
  333. package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +70 -55
  334. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  335. package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +1 -0
  336. package/src/duckdb/src/include/duckdb/common/constants.hpp +4 -0
  337. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  338. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +681 -577
  339. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  340. package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +9 -1
  341. package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +4 -3
  342. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  343. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  344. package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +5 -1
  345. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  346. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  347. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  348. package/src/duckdb/src/include/duckdb/common/exception.hpp +15 -1
  349. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  350. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +215 -0
  351. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +0 -4
  352. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
  353. package/src/duckdb/src/include/duckdb/common/file_system.hpp +10 -8
  354. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +1 -1
  355. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
  356. package/src/duckdb/src/include/duckdb/common/helper.hpp +8 -3
  357. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  358. package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
  359. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +15 -0
  360. package/src/duckdb/src/include/duckdb/common/index_vector.hpp +12 -0
  361. package/src/duckdb/src/include/duckdb/common/limits.hpp +52 -149
  362. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +11 -5
  363. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +12 -42
  364. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  365. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +48 -0
  366. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +6 -2
  367. package/src/duckdb/src/include/duckdb/common/operator/add.hpp +5 -2
  368. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +65 -4
  369. package/src/duckdb/src/include/duckdb/common/operator/multiply.hpp +3 -2
  370. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  371. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
  372. package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +3 -2
  373. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
  374. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
  375. package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
  376. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  377. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  378. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  379. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +71 -30
  380. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +48 -39
  381. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +0 -4
  382. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +128 -0
  383. package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
  384. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +186 -133
  385. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +166 -121
  386. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +27 -4
  387. package/src/duckdb/src/include/duckdb/common/serializer.hpp +0 -7
  388. package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
  389. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +34 -13
  390. package/src/duckdb/src/include/duckdb/common/stack_checker.hpp +34 -0
  391. package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
  392. package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
  393. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
  394. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  395. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  396. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +11 -1
  397. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -1
  398. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +3 -1
  399. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  400. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +6 -3
  401. package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -5
  402. package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +46 -3
  403. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
  404. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
  405. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +11 -15
  406. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +46 -11
  407. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +10 -1
  408. package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
  409. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
  410. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +7 -3
  411. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
  412. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
  413. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +6 -2
  414. package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +3 -0
  415. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  416. package/src/duckdb/src/include/duckdb/common/types/time.hpp +5 -0
  417. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +16 -10
  418. package/src/duckdb/src/include/duckdb/common/types/value.hpp +7 -2
  419. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +10 -3
  420. package/src/duckdb/src/include/duckdb/common/types.hpp +6 -25
  421. package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
  422. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +7 -2
  423. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +40 -97
  424. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  425. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  426. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  427. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +4 -2
  428. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  429. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  430. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  431. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  432. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  433. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  434. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  435. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +40 -11
  436. package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
  437. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  438. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  439. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +7 -5
  440. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  441. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +6 -4
  442. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +4 -2
  443. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  444. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +12 -1
  445. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  446. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  447. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +128 -131
  448. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  449. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +13 -12
  450. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
  451. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
  452. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +32 -28
  453. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +46 -51
  454. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +134 -53
  455. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +5 -7
  456. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +5 -7
  457. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +7 -9
  458. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +5 -7
  459. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  460. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +8 -7
  461. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  462. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -3
  463. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +6 -5
  464. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
  465. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +1 -1
  466. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +1 -1
  467. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -1
  468. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -2
  469. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +5 -12
  470. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
  471. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +2 -2
  472. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -2
  473. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +3 -3
  474. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -2
  475. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +3 -3
  476. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  477. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -2
  478. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -2
  479. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  480. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
  481. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  482. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -2
  483. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +2 -2
  484. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -2
  485. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +1 -1
  486. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +19 -19
  487. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
  488. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
  489. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
  490. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
  491. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +5 -4
  492. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +61 -28
  493. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
  494. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
  495. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
  496. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +22 -28
  497. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
  498. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  499. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -5
  500. package/src/duckdb/src/include/duckdb/execution/operator/schema/{physical_create_index.hpp → physical_create_art_index.hpp} +14 -7
  501. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  502. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  503. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +4 -2
  504. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +6 -5
  505. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +11 -0
  506. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +6 -2
  507. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +19 -21
  508. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
  509. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +79 -63
  510. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -4
  511. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  512. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  513. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +6 -1
  514. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +80 -0
  515. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +3 -0
  516. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  517. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  518. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  519. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +12 -4
  520. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  521. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +8 -3
  522. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +3 -0
  523. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +99 -0
  524. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +6 -36
  525. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +24 -12
  526. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +5 -1
  527. package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -0
  528. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +3 -0
  529. package/src/duckdb/src/include/duckdb/function/udf_function.hpp +2 -1
  530. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -1
  531. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +4 -3
  532. package/src/duckdb/src/include/duckdb/main/chunk_scan_state/query_result.hpp +29 -0
  533. package/src/duckdb/src/include/duckdb/main/chunk_scan_state.hpp +43 -0
  534. package/src/duckdb/src/include/duckdb/main/client_config.hpp +7 -2
  535. package/src/duckdb/src/include/duckdb/main/client_context.hpp +16 -14
  536. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  537. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
  538. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +25 -0
  539. package/src/duckdb/src/include/duckdb/main/config.hpp +16 -1
  540. package/src/duckdb/src/include/duckdb/main/connection.hpp +3 -4
  541. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +27 -0
  542. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +210 -144
  543. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +41 -6
  544. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +4 -0
  545. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  546. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +73 -5
  547. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +6 -6
  548. package/src/duckdb/src/include/duckdb/main/query_result.hpp +2 -27
  549. package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
  550. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  551. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  552. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
  553. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  554. package/src/duckdb/src/include/duckdb/main/settings.hpp +71 -11
  555. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  556. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  557. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  558. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +7 -0
  559. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +38 -64
  560. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  561. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  562. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  563. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +8 -22
  564. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -12
  565. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  566. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +19 -30
  567. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  568. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  569. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  570. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  571. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  572. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  573. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  574. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  575. package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
  576. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -3
  577. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +3 -2
  578. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +9 -1
  579. package/src/duckdb/src/include/duckdb/parser/column_definition.hpp +6 -5
  580. package/src/duckdb/src/include/duckdb/parser/column_list.hpp +4 -0
  581. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  582. package/src/duckdb/src/include/duckdb/parser/constraint.hpp +5 -0
  583. package/src/duckdb/src/include/duckdb/parser/constraints/check_constraint.hpp +3 -0
  584. package/src/duckdb/src/include/duckdb/parser/constraints/foreign_key_constraint.hpp +6 -0
  585. package/src/duckdb/src/include/duckdb/parser/constraints/not_null_constraint.hpp +3 -0
  586. package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +6 -0
  587. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +4 -1
  588. package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +1 -1
  589. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +4 -1
  590. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +4 -1
  591. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +4 -1
  592. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +4 -1
  593. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
  594. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +4 -1
  595. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  596. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -1
  597. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +4 -1
  598. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +21 -4
  599. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +18 -2
  600. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +4 -1
  601. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +1 -1
  602. package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +1 -1
  603. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -1
  604. package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
  605. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +12 -1
  606. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +66 -2
  607. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +8 -1
  608. package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +8 -1
  609. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +9 -1
  610. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +9 -2
  611. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +3 -0
  612. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_schema_info.hpp +3 -0
  613. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +3 -0
  614. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_info.hpp +3 -0
  615. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +3 -0
  616. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +3 -0
  617. package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +7 -0
  618. package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +7 -0
  619. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +7 -0
  620. package/src/duckdb/src/include/duckdb/parser/parsed_data/load_info.hpp +17 -3
  621. package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +22 -0
  622. package/src/duckdb/src/include/duckdb/parser/parsed_data/pragma_info.hpp +10 -0
  623. package/src/duckdb/src/include/duckdb/parser/parsed_data/show_select_info.hpp +7 -0
  624. package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +10 -0
  625. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +10 -0
  626. package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
  627. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  628. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  629. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  630. package/src/duckdb/src/include/duckdb/parser/statement/execute_statement.hpp +1 -1
  631. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  632. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  633. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  634. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  635. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +23 -26
  636. package/src/duckdb/src/include/duckdb/planner/binder.hpp +16 -5
  637. package/src/duckdb/src/include/duckdb/planner/bound_constraint.hpp +0 -8
  638. package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +2 -1
  639. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +6 -0
  640. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  641. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +9 -0
  642. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  643. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
  644. package/src/duckdb/src/include/duckdb/planner/expression/bound_between_expression.hpp +6 -0
  645. package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +6 -0
  646. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +6 -0
  647. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +3 -0
  648. package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +3 -0
  649. package/src/duckdb/src/include/duckdb/planner/expression/bound_conjunction_expression.hpp +3 -0
  650. package/src/duckdb/src/include/duckdb/planner/expression/bound_constant_expression.hpp +3 -0
  651. package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +3 -0
  652. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +4 -0
  653. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +3 -1
  654. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +3 -0
  655. package/src/duckdb/src/include/duckdb/planner/expression/bound_operator_expression.hpp +3 -0
  656. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +24 -6
  657. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +9 -2
  658. package/src/duckdb/src/include/duckdb/planner/expression/bound_reference_expression.hpp +3 -0
  659. package/src/duckdb/src/include/duckdb/planner/expression/bound_unnest_expression.hpp +3 -0
  660. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +3 -0
  661. package/src/duckdb/src/include/duckdb/planner/expression/list.hpp +1 -0
  662. package/src/duckdb/src/include/duckdb/planner/expression.hpp +3 -0
  663. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  664. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +13 -1
  665. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
  666. package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
  667. package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
  668. package/src/duckdb/src/include/duckdb/planner/joinside.hpp +3 -0
  669. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +3 -2
  670. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -2
  671. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +3 -3
  672. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +3 -0
  673. package/src/duckdb/src/include/duckdb/planner/operator/logical_any_join.hpp +3 -0
  674. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +4 -0
  675. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +12 -7
  676. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +2 -0
  677. package/src/duckdb/src/include/duckdb/planner/operator/logical_create.hpp +9 -6
  678. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +12 -23
  679. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_table.hpp +10 -6
  680. package/src/duckdb/src/include/duckdb/planner/operator/logical_cross_product.hpp +3 -0
  681. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +9 -2
  682. package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +7 -0
  683. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +3 -0
  684. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  685. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +6 -10
  686. package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +2 -0
  687. package/src/duckdb/src/include/duckdb/planner/operator/logical_empty_result.hpp +2 -0
  688. package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +4 -0
  689. package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +3 -0
  690. package/src/duckdb/src/include/duckdb/planner/operator/logical_extension_operator.hpp +8 -0
  691. package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +3 -0
  692. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +11 -1
  693. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +6 -0
  694. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit.hpp +3 -0
  695. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +3 -0
  696. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +52 -0
  697. package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +7 -35
  698. package/src/duckdb/src/include/duckdb/planner/operator/logical_pivot.hpp +6 -0
  699. package/src/duckdb/src/include/duckdb/planner/operator/logical_positional_join.hpp +3 -0
  700. package/src/duckdb/src/include/duckdb/planner/operator/logical_projection.hpp +3 -0
  701. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +10 -7
  702. package/src/duckdb/src/include/duckdb/planner/operator/logical_reset.hpp +4 -0
  703. package/src/duckdb/src/include/duckdb/planner/operator/logical_sample.hpp +6 -0
  704. package/src/duckdb/src/include/duckdb/planner/operator/logical_set.hpp +4 -0
  705. package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +4 -0
  706. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +3 -0
  707. package/src/duckdb/src/include/duckdb/planner/operator/logical_simple.hpp +3 -0
  708. package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +4 -0
  709. package/src/duckdb/src/include/duckdb/planner/operator/logical_unnest.hpp +2 -0
  710. package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +6 -0
  711. package/src/duckdb/src/include/duckdb/planner/operator/logical_window.hpp +3 -0
  712. package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +1 -0
  713. package/src/duckdb/src/include/duckdb/planner/planner.hpp +4 -3
  714. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  715. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  716. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  717. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  718. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  719. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  720. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +7 -1
  721. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  722. package/src/duckdb/src/include/duckdb/planner/tableref/bound_pivotref.hpp +3 -0
  723. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -1
  724. package/src/duckdb/src/include/duckdb/storage/block.hpp +33 -4
  725. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +11 -11
  726. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
  727. package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +5 -5
  728. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_reader.hpp +2 -2
  729. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -3
  730. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +19 -16
  731. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
  732. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +2 -2
  733. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +2 -2
  734. package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -2
  735. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +88 -0
  736. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +54 -0
  737. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +45 -0
  738. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  739. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -2
  740. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +8 -5
  741. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
  742. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
  743. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
  744. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
  745. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
  746. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +7 -0
  747. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
  748. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +2 -2
  749. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -2
  750. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +18 -3
  751. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -2
  752. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +8 -3
  753. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -3
  754. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -2
  755. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  756. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  757. package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
  758. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +3 -4
  759. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  760. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
  761. package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +1 -1
  762. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  763. package/src/duckdb/src/include/duckdb.h +98 -1
  764. package/src/duckdb/src/main/appender.cpp +3 -1
  765. package/src/duckdb/src/main/attached_database.cpp +2 -2
  766. package/src/duckdb/src/main/capi/arrow-c.cpp +196 -8
  767. package/src/duckdb/src/main/capi/duckdb-c.cpp +16 -0
  768. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  769. package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
  770. package/src/duckdb/src/main/capi/pending-c.cpp +23 -0
  771. package/src/duckdb/src/main/capi/prepared-c.cpp +106 -28
  772. package/src/duckdb/src/main/capi/result-c.cpp +3 -1
  773. package/src/duckdb/src/main/chunk_scan_state/query_result.cpp +53 -0
  774. package/src/duckdb/src/main/chunk_scan_state.cpp +48 -0
  775. package/src/duckdb/src/main/client_context.cpp +42 -19
  776. package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
  777. package/src/duckdb/src/main/client_verify.cpp +18 -0
  778. package/src/duckdb/src/main/config.cpp +9 -3
  779. package/src/duckdb/src/main/connection.cpp +3 -3
  780. package/src/duckdb/src/main/database.cpp +3 -12
  781. package/src/duckdb/src/main/db_instance_cache.cpp +14 -6
  782. package/src/duckdb/src/main/extension/extension_helper.cpp +164 -88
  783. package/src/duckdb/src/main/extension/extension_install.cpp +76 -15
  784. package/src/duckdb/src/main/extension/extension_load.cpp +62 -13
  785. package/src/duckdb/src/main/extension/extension_util.cpp +16 -0
  786. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  787. package/src/duckdb/src/main/prepared_statement.cpp +38 -11
  788. package/src/duckdb/src/main/prepared_statement_data.cpp +23 -18
  789. package/src/duckdb/src/main/query_result.cpp +0 -21
  790. package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
  791. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  792. package/src/duckdb/src/main/relation/join_relation.cpp +6 -6
  793. package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
  794. package/src/duckdb/src/main/relation.cpp +10 -9
  795. package/src/duckdb/src/main/settings/settings.cpp +125 -33
  796. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  797. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +2 -4
  798. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  799. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  800. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  801. package/src/duckdb/src/optimizer/compressed_materialization.cpp +477 -0
  802. package/src/duckdb/src/optimizer/deliminator.cpp +180 -323
  803. package/src/duckdb/src/optimizer/filter_pushdown.cpp +23 -6
  804. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  805. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  806. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  807. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  808. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1047
  809. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  810. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  811. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +52 -41
  812. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  813. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  814. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  815. package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
  816. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  817. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  818. package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +34 -7
  819. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  820. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  821. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  822. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  823. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  824. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  825. package/src/duckdb/src/optimizer/topn_optimizer.cpp +27 -10
  826. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +3 -5
  827. package/src/duckdb/src/parallel/executor.cpp +25 -1
  828. package/src/duckdb/src/parallel/pipeline.cpp +0 -17
  829. package/src/duckdb/src/parallel/pipeline_executor.cpp +33 -13
  830. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +55 -1
  831. package/src/duckdb/src/parallel/task_scheduler.cpp +18 -2
  832. package/src/duckdb/src/parser/column_definition.cpp +20 -32
  833. package/src/duckdb/src/parser/column_list.cpp +8 -0
  834. package/src/duckdb/src/parser/constraints/foreign_key_constraint.cpp +3 -0
  835. package/src/duckdb/src/parser/constraints/unique_constraint.cpp +3 -0
  836. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  837. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -25
  838. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  839. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  840. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  841. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  842. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  843. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  844. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  845. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  846. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  847. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  848. package/src/duckdb/src/parser/expression/parameter_expression.cpp +7 -19
  849. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  850. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  851. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  852. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  853. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +5 -2
  854. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +38 -0
  855. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +17 -1
  856. package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +2 -0
  857. package/src/duckdb/src/parser/parsed_data/detach_info.cpp +1 -1
  858. package/src/duckdb/src/parser/parsed_data/drop_info.cpp +1 -1
  859. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +0 -18
  860. package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +4 -1
  861. package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +1 -1
  862. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  863. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  864. package/src/duckdb/src/parser/parser.cpp +62 -36
  865. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  866. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  867. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  868. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  869. package/src/duckdb/src/parser/query_node.cpp +15 -47
  870. package/src/duckdb/src/parser/result_modifier.cpp +0 -87
  871. package/src/duckdb/src/parser/statement/execute_statement.cpp +2 -2
  872. package/src/duckdb/src/parser/statement/select_statement.cpp +0 -10
  873. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  874. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  875. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  876. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  877. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -55
  878. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  879. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  880. package/src/duckdb/src/parser/tableref.cpp +0 -44
  881. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +55 -38
  882. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
  883. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  884. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  885. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -0
  886. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  887. package/src/duckdb/src/parser/transform/expression/transform_param_ref.cpp +45 -26
  888. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  889. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +16 -1
  890. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  891. package/src/duckdb/src/parser/transform/statement/transform_create_index.cpp +32 -17
  892. package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
  893. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  894. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  895. package/src/duckdb/src/parser/transform/statement/transform_load.cpp +1 -0
  896. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  897. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  898. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +28 -6
  899. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  900. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  901. package/src/duckdb/src/parser/transformer.cpp +44 -25
  902. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +5 -3
  903. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +10 -10
  904. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  905. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  906. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  907. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  908. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +36 -33
  909. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
  910. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +14 -52
  911. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +0 -23
  912. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +13 -7
  913. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +70 -29
  914. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +93 -28
  915. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  916. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
  917. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  918. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +67 -31
  919. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  920. package/src/duckdb/src/planner/binder.cpp +44 -31
  921. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +24 -1
  922. package/src/duckdb/src/planner/expression/bound_between_expression.cpp +4 -0
  923. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +13 -8
  924. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +22 -0
  925. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +28 -20
  926. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +48 -4
  927. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  928. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +5 -4
  929. package/src/duckdb/src/planner/expression_binder.cpp +23 -0
  930. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  931. package/src/duckdb/src/planner/logical_operator.cpp +19 -7
  932. package/src/duckdb/src/planner/logical_operator_visitor.cpp +5 -6
  933. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +4 -2
  934. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +8 -0
  935. package/src/duckdb/src/planner/operator/logical_create.cpp +14 -0
  936. package/src/duckdb/src/planner/operator/logical_create_index.cpp +36 -7
  937. package/src/duckdb/src/planner/operator/logical_create_table.cpp +16 -0
  938. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  939. package/src/duckdb/src/planner/operator/logical_delete.cpp +9 -2
  940. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  941. package/src/duckdb/src/planner/operator/logical_distinct.cpp +13 -0
  942. package/src/duckdb/src/planner/operator/logical_explain.cpp +1 -1
  943. package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +39 -0
  944. package/src/duckdb/src/planner/operator/logical_get.cpp +82 -4
  945. package/src/duckdb/src/planner/operator/logical_insert.cpp +8 -2
  946. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +22 -0
  947. package/src/duckdb/src/planner/operator/logical_order.cpp +39 -0
  948. package/src/duckdb/src/planner/operator/logical_pivot.cpp +3 -0
  949. package/src/duckdb/src/planner/operator/logical_recursive_cte.cpp +5 -5
  950. package/src/duckdb/src/planner/operator/logical_sample.cpp +3 -0
  951. package/src/duckdb/src/planner/operator/logical_update.cpp +8 -2
  952. package/src/duckdb/src/planner/parsed_data/bound_create_table_info.cpp +4 -2
  953. package/src/duckdb/src/planner/planner.cpp +18 -7
  954. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  955. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  956. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  957. package/src/duckdb/src/storage/arena_allocator.cpp +13 -2
  958. package/src/duckdb/src/storage/buffer/block_manager.cpp +13 -9
  959. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  960. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -4
  961. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +7 -7
  962. package/src/duckdb/src/storage/checkpoint_manager.cpp +78 -72
  963. package/src/duckdb/src/storage/compression/bitpacking.cpp +87 -63
  964. package/src/duckdb/src/storage/compression/bitpacking_hugeint.cpp +295 -0
  965. package/src/duckdb/src/storage/compression/fsst.cpp +1 -1
  966. package/src/duckdb/src/storage/compression/rle.cpp +52 -13
  967. package/src/duckdb/src/storage/data_table.cpp +36 -25
  968. package/src/duckdb/src/storage/index.cpp +4 -26
  969. package/src/duckdb/src/storage/local_storage.cpp +3 -4
  970. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +267 -0
  971. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +80 -0
  972. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +86 -0
  973. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +98 -0
  974. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +194 -0
  975. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +283 -0
  976. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +762 -0
  977. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +62 -0
  978. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +461 -0
  979. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +421 -0
  980. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +342 -0
  981. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  982. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +97 -0
  983. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +22 -0
  984. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
  985. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +97 -0
  986. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +164 -0
  987. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  988. package/src/duckdb/src/storage/single_file_block_manager.cpp +69 -51
  989. package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
  990. package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
  991. package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
  992. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
  993. package/src/duckdb/src/storage/statistics/string_stats.cpp +44 -2
  994. package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
  995. package/src/duckdb/src/storage/storage_info.cpp +3 -2
  996. package/src/duckdb/src/storage/storage_manager.cpp +11 -5
  997. package/src/duckdb/src/storage/table/chunk_info.cpp +99 -3
  998. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
  999. package/src/duckdb/src/storage/table/list_column_data.cpp +6 -3
  1000. package/src/duckdb/src/storage/table/persistent_table_data.cpp +1 -2
  1001. package/src/duckdb/src/storage/table/row_group.cpp +102 -20
  1002. package/src/duckdb/src/storage/table/row_group_collection.cpp +23 -19
  1003. package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
  1004. package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
  1005. package/src/duckdb/src/storage/table_index_list.cpp +1 -1
  1006. package/src/duckdb/src/storage/wal_replay.cpp +26 -26
  1007. package/src/duckdb/src/storage/write_ahead_log.cpp +3 -2
  1008. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
  1009. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -11
  1010. package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
  1011. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  1012. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +5 -2
  1013. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  1014. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  1015. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +11 -0
  1016. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  1017. package/src/duckdb/third_party/libpg_query/pg_functions.cpp +13 -0
  1018. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11019 -10364
  1019. package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +9 -0
  1020. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +10 -0
  1021. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +31 -1
  1022. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
  1023. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  1024. package/src/duckdb/ub_src_common.cpp +4 -0
  1025. package/src/duckdb/ub_src_common_adbc_nanoarrow.cpp +8 -0
  1026. package/src/duckdb/ub_src_common_arrow_appender.cpp +10 -0
  1027. package/src/duckdb/ub_src_common_serializer.cpp +2 -0
  1028. package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
  1029. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
  1030. package/src/duckdb/ub_src_execution.cpp +2 -2
  1031. package/src/duckdb/ub_src_execution_index_art.cpp +0 -6
  1032. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
  1033. package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
  1034. package/src/duckdb/ub_src_execution_operator_persistent.cpp +1 -11
  1035. package/src/duckdb/ub_src_execution_operator_schema.cpp +1 -1
  1036. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  1037. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  1038. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  1039. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  1040. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  1041. package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
  1042. package/src/duckdb/ub_src_main.cpp +2 -0
  1043. package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
  1044. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  1045. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  1046. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
  1047. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  1048. package/src/duckdb/ub_src_parser.cpp +0 -2
  1049. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  1050. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  1051. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  1052. package/src/duckdb/ub_src_planner_operator.cpp +3 -3
  1053. package/src/duckdb/ub_src_storage.cpp +0 -4
  1054. package/src/duckdb/ub_src_storage_compression.cpp +2 -0
  1055. package/src/duckdb/ub_src_storage_metadata.cpp +6 -0
  1056. package/src/duckdb/ub_src_storage_serialization.cpp +30 -0
  1057. package/src/duckdb_node.hpp +1 -0
  1058. package/src/statement.cpp +10 -5
  1059. package/test/columns.test.ts +25 -3
  1060. package/test/extension.test.ts +1 -1
  1061. package/test/test_all_types.test.ts +234 -0
  1062. package/tsconfig.json +1 -0
  1063. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
  1064. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  1065. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
  1066. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  1067. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  1068. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  1069. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -193
  1070. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -172
  1071. package/src/duckdb/src/include/duckdb/common/arrow/arrow_options.hpp +0 -25
  1072. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
  1073. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  1074. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
  1075. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
  1076. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
  1077. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -69
  1078. package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +0 -27
  1079. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_join.hpp +0 -32
  1080. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +0 -49
  1081. package/src/duckdb/src/include/duckdb/storage/meta_block_writer.hpp +0 -50
  1082. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
  1083. package/src/duckdb/src/parser/common_table_expression_info.cpp +0 -19
  1084. package/src/duckdb/src/planner/operator/logical_asof_join.cpp +0 -14
  1085. package/src/duckdb/src/planner/operator/logical_delim_join.cpp +0 -27
  1086. package/src/duckdb/src/storage/meta_block_reader.cpp +0 -78
  1087. package/src/duckdb/src/storage/meta_block_writer.cpp +0 -80
@@ -13,12 +13,15 @@
13
13
  #include "duckdb/parallel/event.hpp"
14
14
  #include "duckdb/parallel/thread_context.hpp"
15
15
 
16
+ #include <thread>
17
+
16
18
  namespace duckdb {
17
19
 
18
20
  PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> left,
19
21
  unique_ptr<PhysicalOperator> right)
20
22
  : PhysicalComparisonJoin(op, PhysicalOperatorType::ASOF_JOIN, std::move(op.conditions), op.join_type,
21
- op.estimated_cardinality) {
23
+ op.estimated_cardinality),
24
+ comparison_type(ExpressionType::INVALID) {
22
25
 
23
26
  // Convert the conditions partitions and sorts
24
27
  for (auto &cond : conditions) {
@@ -29,9 +32,19 @@ PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<Physica
29
32
  auto right = cond.right->Copy();
30
33
  switch (cond.comparison) {
31
34
  case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
35
+ case ExpressionType::COMPARE_GREATERTHAN:
32
36
  null_sensitive.emplace_back(lhs_orders.size());
33
37
  lhs_orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(left));
34
38
  rhs_orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(right));
39
+ comparison_type = cond.comparison;
40
+ break;
41
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
42
+ case ExpressionType::COMPARE_LESSTHAN:
43
+ // Always put NULLS LAST so they can be ignored.
44
+ null_sensitive.emplace_back(lhs_orders.size());
45
+ lhs_orders.emplace_back(OrderType::DESCENDING, OrderByNullType::NULLS_LAST, std::move(left));
46
+ rhs_orders.emplace_back(OrderType::DESCENDING, OrderByNullType::NULLS_LAST, std::move(right));
47
+ comparison_type = cond.comparison;
35
48
  break;
36
49
  case ExpressionType::COMPARE_EQUAL:
37
50
  null_sensitive.emplace_back(lhs_orders.size());
@@ -67,21 +80,32 @@ PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<Physica
67
80
  class AsOfGlobalSinkState : public GlobalSinkState {
68
81
  public:
69
82
  AsOfGlobalSinkState(ClientContext &context, const PhysicalAsOfJoin &op)
70
- : global_partition(context, op.rhs_partitions, op.rhs_orders, op.children[1]->types, {},
71
- op.estimated_cardinality),
83
+ : rhs_sink(context, op.rhs_partitions, op.rhs_orders, op.children[1]->types, {}, op.estimated_cardinality),
72
84
  is_outer(IsRightOuterJoin(op.join_type)), has_null(false) {
73
85
  }
74
86
 
75
87
  idx_t Count() const {
76
- return global_partition.count;
88
+ return rhs_sink.count;
77
89
  }
78
90
 
79
- PartitionGlobalSinkState global_partition;
91
+ PartitionLocalSinkState *RegisterBuffer(ClientContext &context) {
92
+ lock_guard<mutex> guard(lock);
93
+ lhs_buffers.emplace_back(make_uniq<PartitionLocalSinkState>(context, *lhs_sink));
94
+ return lhs_buffers.back().get();
95
+ }
96
+
97
+ PartitionGlobalSinkState rhs_sink;
80
98
 
81
99
  // One per partition
82
100
  const bool is_outer;
83
101
  vector<OuterJoinMarker> right_outers;
84
102
  bool has_null;
103
+
104
+ // Left side buffering
105
+ unique_ptr<PartitionGlobalSinkState> lhs_sink;
106
+
107
+ mutex lock;
108
+ vector<unique_ptr<PartitionLocalSinkState>> lhs_buffers;
85
109
  };
86
110
 
87
111
  class AsOfLocalSinkState : public LocalSinkState {
@@ -108,7 +132,7 @@ unique_ptr<GlobalSinkState> PhysicalAsOfJoin::GetGlobalSinkState(ClientContext &
108
132
  unique_ptr<LocalSinkState> PhysicalAsOfJoin::GetLocalSinkState(ExecutionContext &context) const {
109
133
  // We only sink the RHS
110
134
  auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
111
- return make_uniq<AsOfLocalSinkState>(context.client, gsink.global_partition);
135
+ return make_uniq<AsOfLocalSinkState>(context.client, gsink.rhs_sink);
112
136
  }
113
137
 
114
138
  SinkResultType PhysicalAsOfJoin::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
@@ -119,27 +143,34 @@ SinkResultType PhysicalAsOfJoin::Sink(ExecutionContext &context, DataChunk &chun
119
143
  return SinkResultType::NEED_MORE_INPUT;
120
144
  }
121
145
 
122
- void PhysicalAsOfJoin::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
123
- auto &lstate = lstate_p.Cast<AsOfLocalSinkState>();
146
+ SinkCombineResultType PhysicalAsOfJoin::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
147
+ auto &lstate = input.local_state.Cast<AsOfLocalSinkState>();
124
148
  lstate.Combine();
149
+ return SinkCombineResultType::FINISHED;
125
150
  }
126
151
 
127
152
  //===--------------------------------------------------------------------===//
128
153
  // Finalize
129
154
  //===--------------------------------------------------------------------===//
130
155
  SinkFinalizeType PhysicalAsOfJoin::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
131
- GlobalSinkState &gstate_p) const {
132
- auto &gstate = gstate_p.Cast<AsOfGlobalSinkState>();
156
+ OperatorSinkFinalizeInput &input) const {
157
+ auto &gstate = input.global_state.Cast<AsOfGlobalSinkState>();
158
+
159
+ // The data is all in so we can initialise the left partitioning.
160
+ const vector<unique_ptr<BaseStatistics>> partitions_stats;
161
+ gstate.lhs_sink = make_uniq<PartitionGlobalSinkState>(context, lhs_partitions, lhs_orders, children[0]->types,
162
+ partitions_stats, 0);
163
+ gstate.lhs_sink->SyncPartitioning(gstate.rhs_sink);
133
164
 
134
165
  // Find the first group to sort
135
- auto &groups = gstate.global_partition.grouping_data->GetPartitions();
166
+ auto &groups = gstate.rhs_sink.grouping_data->GetPartitions();
136
167
  if (groups.empty() && EmptyResultIfRHSIsEmpty()) {
137
168
  // Empty input!
138
169
  return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
139
170
  }
140
171
 
141
172
  // Schedule all the sorts for maximum thread utilisation
142
- auto new_event = make_shared<PartitionMergeEvent>(gstate.global_partition, pipeline);
173
+ auto new_event = make_shared<PartitionMergeEvent>(gstate.rhs_sink, pipeline);
143
174
  event.InsertEvent(std::move(new_event));
144
175
 
145
176
  return SinkFinalizeType::READY;
@@ -152,10 +183,10 @@ class AsOfGlobalState : public GlobalOperatorState {
152
183
  public:
153
184
  explicit AsOfGlobalState(AsOfGlobalSinkState &gsink) {
154
185
  // for FULL/RIGHT OUTER JOIN, initialize right_outers to false for every tuple
155
- auto &global_partition = gsink.global_partition;
186
+ auto &rhs_partition = gsink.rhs_sink;
156
187
  auto &right_outers = gsink.right_outers;
157
- right_outers.reserve(global_partition.hash_groups.size());
158
- for (const auto &hash_group : global_partition.hash_groups) {
188
+ right_outers.reserve(rhs_partition.hash_groups.size());
189
+ for (const auto &hash_group : rhs_partition.hash_groups) {
159
190
  right_outers.emplace_back(OuterJoinMarker(gsink.is_outer));
160
191
  right_outers.back().Initialize(hash_group->count);
161
192
  }
@@ -169,79 +200,47 @@ unique_ptr<GlobalOperatorState> PhysicalAsOfJoin::GetGlobalOperatorState(ClientC
169
200
 
170
201
  class AsOfLocalState : public CachingOperatorState {
171
202
  public:
172
- using Orders = vector<BoundOrderByNode>;
173
- using Match = std::pair<hash_t, idx_t>;
203
+ AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op)
204
+ : context(context), allocator(Allocator::Get(context)), op(op), lhs_executor(context),
205
+ left_outer(IsLeftOuterJoin(op.join_type)), fetch_next_left(true) {
206
+ lhs_keys.Initialize(allocator, op.join_key_types);
207
+ for (const auto &cond : op.conditions) {
208
+ lhs_executor.AddExpression(*cond.left);
209
+ }
174
210
 
175
- AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op, bool force_external);
211
+ lhs_payload.Initialize(allocator, op.children[0]->types);
212
+ lhs_sel.Initialize();
213
+ left_outer.Initialize(STANDARD_VECTOR_SIZE);
176
214
 
177
- public:
178
- void ResolveJoin(DataChunk &input, bool *found_matches, Match *matches = nullptr);
215
+ auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
216
+ lhs_partition_sink = gsink.RegisterBuffer(context);
217
+ }
179
218
 
180
- void ResolveJoinKeys(DataChunk &input);
219
+ bool Sink(DataChunk &input);
220
+ OperatorResultType ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk);
181
221
 
182
222
  ClientContext &context;
183
223
  Allocator &allocator;
184
224
  const PhysicalAsOfJoin &op;
185
- BufferManager &buffer_manager;
186
- const bool force_external;
187
- Orders lhs_orders;
188
225
 
189
- // LHS sorting
190
226
  ExpressionExecutor lhs_executor;
191
227
  DataChunk lhs_keys;
192
228
  ValidityMask lhs_valid_mask;
193
229
  SelectionVector lhs_sel;
194
- idx_t lhs_valid;
195
- RowLayout lhs_layout;
196
- unique_ptr<GlobalSortState> lhs_global_state;
197
- DataChunk lhs_sorted;
230
+ DataChunk lhs_payload;
198
231
 
199
- // LHS binning
200
- Vector hash_vector;
201
- Vector bin_vector;
202
-
203
- // Output
204
- idx_t lhs_match_count;
205
- SelectionVector lhs_matched;
206
232
  OuterJoinMarker left_outer;
207
233
  bool fetch_next_left;
208
- DataChunk group_payload;
209
- DataChunk rhs_payload;
210
- };
211
234
 
212
- AsOfLocalState::AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op, bool force_external)
213
- : context(context), allocator(Allocator::Get(context)), op(op),
214
- buffer_manager(BufferManager::GetBufferManager(context)), force_external(force_external), lhs_executor(context),
215
- hash_vector(LogicalType::HASH), bin_vector(LogicalType::HASH), left_outer(IsLeftOuterJoin(op.join_type)),
216
- fetch_next_left(true) {
217
- vector<unique_ptr<BaseStatistics>> partition_stats;
218
- Orders partitions; // Not used.
219
- PartitionGlobalSinkState::GenerateOrderings(partitions, lhs_orders, op.lhs_partitions, op.lhs_orders,
220
- partition_stats);
221
-
222
- // We sort the row numbers of the incoming block, not the rows
223
- lhs_layout.Initialize({LogicalType::UINTEGER});
224
- lhs_sorted.Initialize(allocator, lhs_layout.GetTypes());
225
-
226
- lhs_keys.Initialize(allocator, op.join_key_types);
227
- for (const auto &cond : op.conditions) {
228
- lhs_executor.AddExpression(*cond.left);
229
- }
230
-
231
- group_payload.Initialize(allocator, op.children[1]->types);
232
- rhs_payload.Initialize(allocator, op.children[1]->types);
233
-
234
- lhs_matched.Initialize();
235
- lhs_sel.Initialize();
236
- left_outer.Initialize(STANDARD_VECTOR_SIZE);
237
- }
235
+ optional_ptr<PartitionLocalSinkState> lhs_partition_sink;
236
+ };
238
237
 
239
- void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
238
+ bool AsOfLocalState::Sink(DataChunk &input) {
240
239
  // Compute the join keys
241
240
  lhs_keys.Reset();
242
241
  lhs_executor.Execute(input, lhs_keys);
243
242
 
244
- // Extract the NULLs
243
+ // Combine the NULLs
245
244
  const auto count = input.size();
246
245
  lhs_valid_mask.Reset();
247
246
  for (auto col_idx : op.null_sensitive) {
@@ -251,17 +250,19 @@ void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
251
250
  lhs_valid_mask.Combine(unified.validity, count);
252
251
  }
253
252
 
254
- // Convert the mask to a selection vector.
255
- // We need this anyway for sorting
256
- lhs_valid = 0;
253
+ // Convert the mask to a selection vector
254
+ // and mark all the rows that cannot match for early return.
255
+ idx_t lhs_valid = 0;
257
256
  const auto entry_count = lhs_valid_mask.EntryCount(count);
258
257
  idx_t base_idx = 0;
258
+ left_outer.Reset();
259
259
  for (idx_t entry_idx = 0; entry_idx < entry_count;) {
260
260
  const auto validity_entry = lhs_valid_mask.GetValidityEntry(entry_idx++);
261
261
  const auto next = MinValue<idx_t>(base_idx + ValidityMask::BITS_PER_VALUE, count);
262
262
  if (ValidityMask::AllValid(validity_entry)) {
263
263
  for (; base_idx < next; ++base_idx) {
264
264
  lhs_sel.set_index(lhs_valid++, base_idx);
265
+ left_outer.SetMatch(base_idx);
265
266
  }
266
267
  } else if (ValidityMask::NoneValid(validity_entry)) {
267
268
  base_idx = next;
@@ -270,120 +271,237 @@ void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
270
271
  for (; base_idx < next; ++base_idx) {
271
272
  if (ValidityMask::RowIsValid(validity_entry, base_idx - start)) {
272
273
  lhs_sel.set_index(lhs_valid++, base_idx);
274
+ left_outer.SetMatch(base_idx);
273
275
  }
274
276
  }
275
277
  }
276
278
  }
277
279
 
278
280
  // Slice the keys to the ones we can match
279
- if (lhs_valid < count) {
280
- lhs_keys.Slice(lhs_sel, lhs_valid);
281
+ lhs_payload.Reset();
282
+ if (lhs_valid == count) {
283
+ lhs_payload.Reference(input);
284
+ lhs_payload.SetCardinality(input);
285
+ } else {
286
+ lhs_payload.Slice(input, lhs_sel, lhs_valid);
287
+ lhs_payload.SetCardinality(lhs_valid);
288
+
289
+ // Flush the ones that can't match
290
+ fetch_next_left = false;
281
291
  }
282
292
 
283
- // Hash to assign the partitions
284
- auto &global_partition = op.sink_state->Cast<AsOfGlobalSinkState>().global_partition;
285
- if (op.lhs_partitions.empty()) {
286
- // Only one hash group
287
- bin_vector.Reference(Value::HASH(0));
288
- } else {
289
- // Hash to determine the partitions.
290
- VectorOperations::Hash(lhs_keys.data[0], hash_vector, lhs_sel, lhs_valid);
291
- for (size_t prt_idx = 1; prt_idx < op.lhs_partitions.size(); ++prt_idx) {
292
- VectorOperations::CombineHash(hash_vector, lhs_keys.data[prt_idx], lhs_sel, lhs_valid);
293
+ lhs_partition_sink->Sink(lhs_payload);
294
+
295
+ return false;
296
+ }
297
+
298
+ OperatorResultType AsOfLocalState::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk) {
299
+ input.Verify();
300
+ Sink(input);
301
+
302
+ // If there were any unmatchable rows, return them now so we can forget about them.
303
+ if (!fetch_next_left) {
304
+ fetch_next_left = true;
305
+ left_outer.ConstructLeftJoinResult(input, chunk);
306
+ left_outer.Reset();
307
+ }
308
+
309
+ // Just keep asking for data and buffering it
310
+ return OperatorResultType::NEED_MORE_INPUT;
311
+ }
312
+
313
+ OperatorResultType PhysicalAsOfJoin::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
314
+ GlobalOperatorState &gstate, OperatorState &lstate_p) const {
315
+ auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
316
+ auto &lstate = lstate_p.Cast<AsOfLocalState>();
317
+
318
+ if (gsink.rhs_sink.count == 0) {
319
+ // empty RHS
320
+ if (!EmptyResultIfRHSIsEmpty()) {
321
+ ConstructEmptyJoinResult(join_type, gsink.has_null, input, chunk);
322
+ return OperatorResultType::NEED_MORE_INPUT;
323
+ } else {
324
+ return OperatorResultType::FINISHED;
293
325
  }
326
+ }
327
+
328
+ return lstate.ExecuteInternal(context, input, chunk);
329
+ }
330
+
331
+ //===--------------------------------------------------------------------===//
332
+ // Source
333
+ //===--------------------------------------------------------------------===//
334
+ class AsOfProbeBuffer {
335
+ public:
336
+ using Orders = vector<BoundOrderByNode>;
337
+
338
+ static bool IsExternal(ClientContext &context) {
339
+ return ClientConfig::GetConfig(context).force_external;
340
+ }
341
+
342
+ AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin &op);
343
+
344
+ public:
345
+ void ResolveJoin(bool *found_matches, idx_t *matches = nullptr);
346
+ bool Scanning() const {
347
+ return lhs_scanner.get();
348
+ }
349
+ void BeginLeftScan(hash_t scan_bin);
350
+ bool NextLeft();
351
+ void EndScan();
352
+
353
+ // resolve joins that output max N elements (SEMI, ANTI, MARK)
354
+ void ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk);
355
+ // resolve joins that can potentially output N*M elements (INNER, LEFT, FULL)
356
+ void ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk);
357
+ // Chunk may be empty
358
+ void GetData(ExecutionContext &context, DataChunk &chunk);
359
+ bool HasMoreData() const {
360
+ return !fetch_next_left || (lhs_scanner && lhs_scanner->Remaining());
361
+ }
362
+
363
+ ClientContext &context;
364
+ Allocator &allocator;
365
+ const PhysicalAsOfJoin &op;
366
+ BufferManager &buffer_manager;
367
+ const bool force_external;
368
+ const idx_t memory_per_thread;
369
+ Orders lhs_orders;
370
+
371
+ // LHS scanning
372
+ SelectionVector lhs_sel;
373
+ optional_ptr<PartitionGlobalHashGroup> left_hash;
374
+ OuterJoinMarker left_outer;
375
+ unique_ptr<SBIterator> left_itr;
376
+ unique_ptr<PayloadScanner> lhs_scanner;
377
+ DataChunk lhs_payload;
378
+
379
+ // RHS scanning
380
+ optional_ptr<PartitionGlobalHashGroup> right_hash;
381
+ optional_ptr<OuterJoinMarker> right_outer;
382
+ unique_ptr<SBIterator> right_itr;
383
+ unique_ptr<PayloadScanner> rhs_scanner;
384
+ DataChunk rhs_payload;
385
+
386
+ idx_t lhs_match_count;
387
+ bool fetch_next_left;
388
+ };
389
+
390
+ AsOfProbeBuffer::AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin &op)
391
+ : context(context), allocator(Allocator::Get(context)), op(op),
392
+ buffer_manager(BufferManager::GetBufferManager(context)), force_external(IsExternal(context)),
393
+ memory_per_thread(op.GetMaxThreadMemory(context)), left_outer(IsLeftOuterJoin(op.join_type)),
394
+ fetch_next_left(true) {
395
+ vector<unique_ptr<BaseStatistics>> partition_stats;
396
+ Orders partitions; // Not used.
397
+ PartitionGlobalSinkState::GenerateOrderings(partitions, lhs_orders, op.lhs_partitions, op.lhs_orders,
398
+ partition_stats);
294
399
 
295
- // Convert hashes to hash groups
296
- const auto radix_bits = global_partition.grouping_data->GetRadixBits();
297
- RadixPartitioning::HashesToBins(hash_vector, radix_bits, bin_vector, count);
400
+ // We sort the row numbers of the incoming block, not the rows
401
+ lhs_payload.Initialize(allocator, op.children[0]->types);
402
+ rhs_payload.Initialize(allocator, op.children[1]->types);
403
+
404
+ lhs_sel.Initialize();
405
+ left_outer.Initialize(STANDARD_VECTOR_SIZE);
406
+ }
407
+
408
+ void AsOfProbeBuffer::BeginLeftScan(hash_t scan_bin) {
409
+ auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
410
+ auto &lhs_sink = *gsink.lhs_sink;
411
+ const auto left_group = lhs_sink.bin_groups[scan_bin];
412
+ if (left_group >= lhs_sink.bin_groups.size()) {
413
+ return;
298
414
  }
299
415
 
300
- // Sort the selection vector on the valid keys
301
- lhs_global_state = make_uniq<GlobalSortState>(buffer_manager, lhs_orders, lhs_layout);
302
- auto &global_state = *lhs_global_state;
303
- LocalSortState local_sort;
304
- local_sort.Initialize(*lhs_global_state, buffer_manager);
416
+ auto iterator_comp = ExpressionType::INVALID;
417
+ switch (op.comparison_type) {
418
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
419
+ iterator_comp = ExpressionType::COMPARE_LESSTHANOREQUALTO;
420
+ break;
421
+ case ExpressionType::COMPARE_GREATERTHAN:
422
+ iterator_comp = ExpressionType::COMPARE_LESSTHAN;
423
+ break;
424
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
425
+ iterator_comp = ExpressionType::COMPARE_GREATERTHANOREQUALTO;
426
+ break;
427
+ case ExpressionType::COMPARE_LESSTHAN:
428
+ iterator_comp = ExpressionType::COMPARE_GREATERTHAN;
429
+ break;
430
+ default:
431
+ throw NotImplementedException("Unsupported comparison type for ASOF join");
432
+ }
305
433
 
306
- DataChunk payload_chunk;
307
- payload_chunk.InitializeEmpty({LogicalType::UINTEGER});
308
- FlatVector::SetData(payload_chunk.data[0], data_ptr_cast(lhs_sel.data()));
309
- payload_chunk.SetCardinality(lhs_valid);
310
- local_sort.SinkChunk(lhs_keys, payload_chunk);
434
+ left_hash = lhs_sink.hash_groups[left_group].get();
435
+ auto &left_sort = *(left_hash->global_sort);
436
+ lhs_scanner = make_uniq<PayloadScanner>(left_sort, false);
437
+ left_itr = make_uniq<SBIterator>(left_sort, iterator_comp);
438
+
439
+ // We are only probing the corresponding right side bin, which may be empty
440
+ // If they are empty, we leave the iterator as null so we can emit left matches
441
+ auto &rhs_sink = gsink.rhs_sink;
442
+ const auto right_group = rhs_sink.bin_groups[scan_bin];
443
+ if (right_group < rhs_sink.bin_groups.size()) {
444
+ right_hash = rhs_sink.hash_groups[right_group].get();
445
+ right_outer = gsink.right_outers.data() + right_group;
446
+ auto &right_sort = *(right_hash->global_sort);
447
+ right_itr = make_uniq<SBIterator>(right_sort, iterator_comp);
448
+ rhs_scanner = make_uniq<PayloadScanner>(right_sort, false);
449
+ }
450
+ }
311
451
 
312
- // Set external (can be forced with the PRAGMA)
313
- global_state.external = force_external;
314
- global_state.AddLocalState(local_sort);
315
- global_state.PrepareMergePhase();
316
- while (global_state.sorted_blocks.size() > 1) {
317
- MergeSorter merge_sorter(*lhs_global_state, buffer_manager);
318
- merge_sorter.PerformInMergeRound();
319
- global_state.CompleteMergeRound();
452
+ bool AsOfProbeBuffer::NextLeft() {
453
+ if (!HasMoreData()) {
454
+ return false;
320
455
  }
321
456
 
322
- // Scan the sorted selection
323
- D_ASSERT(global_state.sorted_blocks.size() == 1);
457
+ // Scan the next sorted chunk
458
+ lhs_payload.Reset();
459
+ left_itr->SetIndex(lhs_scanner->Scanned());
460
+ lhs_scanner->Scan(lhs_payload);
324
461
 
325
- auto scanner = make_uniq<PayloadScanner>(*global_state.sorted_blocks[0]->payload_data, global_state, false);
326
- lhs_sorted.Reset();
327
- scanner->Scan(lhs_sorted);
462
+ return true;
328
463
  }
329
464
 
330
- void AsOfLocalState::ResolveJoin(DataChunk &input, bool *found_match, std::pair<hash_t, idx_t> *matches) {
331
- // Sort the input into lhs_payload, radix keys in lhs_global_state
332
- ResolveJoinKeys(input);
465
+ void AsOfProbeBuffer::EndScan() {
466
+ right_hash = nullptr;
467
+ right_itr.reset();
468
+ rhs_scanner.reset();
469
+ right_outer = nullptr;
333
470
 
334
- auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
335
- auto &global_partition = gsink.global_partition;
471
+ left_hash = nullptr;
472
+ left_itr.reset();
473
+ lhs_scanner.reset();
474
+ }
336
475
 
337
- // The bins are contiguous from sorting, so load them one at a time
338
- // But they may be constant, so unify.
339
- UnifiedVectorFormat bin_unified;
340
- bin_vector.ToUnifiedFormat(lhs_valid, bin_unified);
341
- const auto bins = UnifiedVectorFormat::GetData<hash_t>(bin_unified);
476
+ void AsOfProbeBuffer::ResolveJoin(bool *found_match, idx_t *matches) {
477
+ // If there was no right partition, there are no matches
478
+ lhs_match_count = 0;
479
+ left_outer.Reset();
480
+ if (!right_itr) {
481
+ return;
482
+ }
342
483
 
343
- hash_t prev_bin = global_partition.bin_groups.size();
344
- optional_ptr<PartitionGlobalHashGroup> hash_group;
345
- optional_ptr<OuterJoinMarker> right_outer;
484
+ const auto count = lhs_payload.size();
485
+ const auto left_base = left_itr->GetIndex();
346
486
  // Searching for right <= left
347
- SBIterator left(*lhs_global_state, ExpressionType::COMPARE_LESSTHANOREQUALTO);
348
- unique_ptr<SBIterator> right;
349
- lhs_match_count = 0;
350
- const auto sorted_sel = FlatVector::GetData<sel_t>(lhs_sorted.data[0]);
351
- for (idx_t i = 0; i < lhs_valid; ++i) {
352
- // idx is the index in the input; i is the index in the sorted keys
353
- const auto idx = sorted_sel[i];
354
- const auto curr_bin = bins[bin_unified.sel->get_index(idx)];
355
- if (!hash_group || curr_bin != prev_bin) {
356
- // Grab the next group
357
- prev_bin = curr_bin;
358
- const auto group_idx = global_partition.bin_groups[curr_bin];
359
- if (group_idx >= global_partition.hash_groups.size()) {
360
- // No matching partition
361
- hash_group = nullptr;
362
- right_outer = nullptr;
363
- right.reset();
364
- continue;
365
- }
366
- hash_group = global_partition.hash_groups[group_idx].get();
367
- right_outer = gsink.right_outers.data() + group_idx;
368
- right = make_uniq<SBIterator>(*(hash_group->global_sort), ExpressionType::COMPARE_LESSTHANOREQUALTO);
369
- }
370
- left.SetIndex(i);
487
+ for (idx_t i = 0; i < count; ++i) {
488
+ left_itr->SetIndex(left_base + i);
371
489
 
372
490
  // If right > left, then there is no match
373
- if (!right->Compare(left)) {
491
+ if (!right_itr->Compare(*left_itr)) {
374
492
  continue;
375
493
  }
376
494
 
377
495
  // Exponential search forward for a non-matching value using radix iterators
378
496
  // (We use exponential search to avoid thrashing the block manager on large probes)
379
497
  idx_t bound = 1;
380
- idx_t begin = right->GetIndex();
381
- right->SetIndex(begin + bound);
382
- while (right->GetIndex() < hash_group->count) {
383
- if (right->Compare(left)) {
498
+ idx_t begin = right_itr->GetIndex();
499
+ right_itr->SetIndex(begin + bound);
500
+ while (right_itr->GetIndex() < right_hash->count) {
501
+ if (right_itr->Compare(*left_itr)) {
384
502
  // If right <= left, jump ahead
385
503
  bound *= 2;
386
- right->SetIndex(begin + bound);
504
+ right_itr->SetIndex(begin + bound);
387
505
  } else {
388
506
  break;
389
507
  }
@@ -392,255 +510,298 @@ void AsOfLocalState::ResolveJoin(DataChunk &input, bool *found_match, std::pair<
392
510
  // Binary search for the first non-matching value using radix iterators
393
511
  // The previous value (which we know exists) is the match
394
512
  auto first = begin + bound / 2;
395
- auto last = MinValue<idx_t>(begin + bound, hash_group->count);
513
+ auto last = MinValue<idx_t>(begin + bound, right_hash->count);
396
514
  while (first < last) {
397
515
  const auto mid = first + (last - first) / 2;
398
- right->SetIndex(mid);
399
- if (right->Compare(left)) {
516
+ right_itr->SetIndex(mid);
517
+ if (right_itr->Compare(*left_itr)) {
400
518
  // If right <= left, new lower bound
401
519
  first = mid + 1;
402
520
  } else {
403
521
  last = mid;
404
522
  }
405
523
  }
406
- right->SetIndex(--first);
524
+ right_itr->SetIndex(--first);
407
525
 
408
526
  // Check partitions for strict equality
409
- if (!op.lhs_partitions.empty() && hash_group->ComparePartitions(left, *right)) {
527
+ if (right_hash->ComparePartitions(*left_itr, *right_itr)) {
410
528
  continue;
411
529
  }
412
530
 
413
531
  // Emit match data
414
532
  right_outer->SetMatch(first);
415
- left_outer.SetMatch(idx);
533
+ left_outer.SetMatch(i);
416
534
  if (found_match) {
417
- found_match[idx] = true;
535
+ found_match[i] = true;
418
536
  }
419
537
  if (matches) {
420
- matches[idx] = Match(curr_bin, first);
538
+ matches[i] = first;
421
539
  }
422
- lhs_matched.set_index(lhs_match_count++, idx);
540
+ lhs_sel.set_index(lhs_match_count++, i);
423
541
  }
424
542
  }
425
543
 
426
544
  unique_ptr<OperatorState> PhysicalAsOfJoin::GetOperatorState(ExecutionContext &context) const {
427
- auto &config = ClientConfig::GetConfig(context.client);
428
- return make_uniq<AsOfLocalState>(context.client, *this, config.force_external);
545
+ return make_uniq<AsOfLocalState>(context.client, *this);
429
546
  }
430
547
 
431
- void PhysicalAsOfJoin::ResolveSimpleJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
432
- OperatorState &lstate_p) const {
433
- auto &lstate = lstate_p.Cast<AsOfLocalState>();
434
- auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
435
-
548
+ void AsOfProbeBuffer::ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk) {
436
549
  // perform the actual join
437
550
  bool found_match[STANDARD_VECTOR_SIZE] = {false};
438
- lstate.ResolveJoin(input, found_match);
551
+ ResolveJoin(found_match);
439
552
 
440
553
  // now construct the result based on the join result
441
- switch (join_type) {
442
- case JoinType::MARK: {
443
- PhysicalJoin::ConstructMarkJoinResult(lstate.lhs_keys, input, chunk, found_match, gsink.has_null);
444
- break;
445
- }
554
+ switch (op.join_type) {
446
555
  case JoinType::SEMI:
447
- PhysicalJoin::ConstructSemiJoinResult(input, chunk, found_match);
556
+ PhysicalJoin::ConstructSemiJoinResult(lhs_payload, chunk, found_match);
448
557
  break;
449
558
  case JoinType::ANTI:
450
- PhysicalJoin::ConstructAntiJoinResult(input, chunk, found_match);
559
+ PhysicalJoin::ConstructAntiJoinResult(lhs_payload, chunk, found_match);
451
560
  break;
452
561
  default:
453
562
  throw NotImplementedException("Unimplemented join type for AsOf join");
454
563
  }
455
564
  }
456
565
 
457
- OperatorResultType PhysicalAsOfJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
458
- OperatorState &lstate_p) const {
459
- auto &lstate = lstate_p.Cast<AsOfLocalState>();
460
- auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
461
-
462
- if (!lstate.fetch_next_left) {
463
- lstate.fetch_next_left = true;
464
- if (lstate.left_outer.Enabled()) {
465
- // left join: before we move to the next chunk, see if we need to output any vectors that didn't
466
- // have a match found
467
- lstate.left_outer.ConstructLeftJoinResult(input, chunk);
468
- lstate.left_outer.Reset();
469
- }
470
- return OperatorResultType::NEED_MORE_INPUT;
471
- }
472
-
566
+ void AsOfProbeBuffer::ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk) {
473
567
  // perform the actual join
474
- AsOfLocalState::Match matches[STANDARD_VECTOR_SIZE];
475
- lstate.ResolveJoin(input, nullptr, matches);
476
- lstate.group_payload.Reset();
477
- lstate.rhs_payload.Reset();
478
-
479
- auto &global_partition = gsink.global_partition;
480
- hash_t scan_bin = global_partition.bin_groups.size();
481
- optional_ptr<PartitionGlobalHashGroup> hash_group;
482
- unique_ptr<PayloadScanner> scanner;
483
- for (idx_t i = 0; i < lstate.lhs_match_count; ++i) {
484
- const auto idx = lstate.lhs_matched[i];
485
- const auto match_bin = matches[idx].first;
486
- const auto match_pos = matches[idx].second;
487
- if (match_bin != scan_bin) {
488
- // Grab the next group
489
- const auto group_idx = global_partition.bin_groups[match_bin];
490
- hash_group = global_partition.hash_groups[group_idx].get();
491
- scan_bin = match_bin;
492
- scanner = make_uniq<PayloadScanner>(*hash_group->global_sort, false);
493
- lstate.group_payload.Reset();
494
- }
568
+ idx_t matches[STANDARD_VECTOR_SIZE];
569
+ ResolveJoin(nullptr, matches);
570
+
571
+ for (idx_t i = 0; i < lhs_match_count; ++i) {
572
+ const auto idx = lhs_sel[i];
573
+ const auto match_pos = matches[idx];
495
574
  // Skip to the range containing the match
496
- while (match_pos >= scanner->Scanned()) {
497
- lstate.group_payload.Reset();
498
- scanner->Scan(lstate.group_payload);
575
+ while (match_pos >= rhs_scanner->Scanned()) {
576
+ rhs_payload.Reset();
577
+ rhs_scanner->Scan(rhs_payload);
499
578
  }
500
579
  // Append the individual values
501
580
  // TODO: Batch the copies
502
- const auto source_offset = match_pos - (scanner->Scanned() - lstate.group_payload.size());
503
- for (idx_t col_idx = 0; col_idx < right_projection_map.size(); ++col_idx) {
504
- const auto rhs_idx = right_projection_map[col_idx];
505
- auto &source = lstate.group_payload.data[rhs_idx];
506
- auto &target = chunk.data[input.ColumnCount() + col_idx];
581
+ const auto source_offset = match_pos - (rhs_scanner->Scanned() - rhs_payload.size());
582
+ for (column_t col_idx = 0; col_idx < op.right_projection_map.size(); ++col_idx) {
583
+ const auto rhs_idx = op.right_projection_map[col_idx];
584
+ auto &source = rhs_payload.data[rhs_idx];
585
+ auto &target = chunk.data[lhs_payload.ColumnCount() + col_idx];
507
586
  VectorOperations::Copy(source, target, source_offset + 1, source_offset, i);
508
587
  }
509
588
  }
510
589
 
511
- // Slice the input into the left side
512
- chunk.Slice(input, lstate.lhs_matched, lstate.lhs_match_count);
513
-
514
- // If we are doing a left join, come back for the NULLs
515
- if (lstate.left_outer.Enabled()) {
516
- lstate.fetch_next_left = false;
517
- return OperatorResultType::HAVE_MORE_OUTPUT;
590
+ // Slice the left payload into the result
591
+ for (column_t i = 0; i < lhs_payload.ColumnCount(); ++i) {
592
+ chunk.data[i].Slice(lhs_payload.data[i], lhs_sel, lhs_match_count);
518
593
  }
594
+ chunk.SetCardinality(lhs_match_count);
519
595
 
520
- return OperatorResultType::NEED_MORE_INPUT;
596
+ // If we are doing a left join, come back for the NULLs
597
+ fetch_next_left = !left_outer.Enabled();
521
598
  }
522
599
 
523
- OperatorResultType PhysicalAsOfJoin::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
524
- GlobalOperatorState &gstate, OperatorState &lstate) const {
525
- auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
526
-
527
- if (gsink.global_partition.count == 0) {
528
- // empty RHS
529
- if (!EmptyResultIfRHSIsEmpty()) {
530
- ConstructEmptyJoinResult(join_type, gsink.has_null, input, chunk);
531
- return OperatorResultType::NEED_MORE_INPUT;
532
- } else {
533
- return OperatorResultType::FINISHED;
600
+ void AsOfProbeBuffer::GetData(ExecutionContext &context, DataChunk &chunk) {
601
+ // Handle dangling left join results from current chunk
602
+ if (!fetch_next_left) {
603
+ fetch_next_left = true;
604
+ if (left_outer.Enabled()) {
605
+ // left join: before we move to the next chunk, see if we need to output any vectors that didn't
606
+ // have a match found
607
+ left_outer.ConstructLeftJoinResult(lhs_payload, chunk);
608
+ left_outer.Reset();
534
609
  }
610
+ return;
535
611
  }
536
612
 
537
- input.Verify();
538
- switch (join_type) {
613
+ // Stop if there is no more data
614
+ if (!NextLeft()) {
615
+ return;
616
+ }
617
+
618
+ switch (op.join_type) {
539
619
  case JoinType::SEMI:
540
620
  case JoinType::ANTI:
541
621
  case JoinType::MARK:
542
622
  // simple joins can have max STANDARD_VECTOR_SIZE matches per chunk
543
- ResolveSimpleJoin(context, input, chunk, lstate);
544
- return OperatorResultType::NEED_MORE_INPUT;
623
+ ResolveSimpleJoin(context, chunk);
624
+ break;
545
625
  case JoinType::LEFT:
546
626
  case JoinType::INNER:
547
627
  case JoinType::RIGHT:
548
628
  case JoinType::OUTER:
549
- return ResolveComplexJoin(context, input, chunk, lstate);
629
+ ResolveComplexJoin(context, chunk);
630
+ break;
550
631
  default:
551
632
  throw NotImplementedException("Unimplemented type for as-of join!");
552
633
  }
553
634
  }
554
635
 
555
- //===--------------------------------------------------------------------===//
556
- // Source
557
- //===--------------------------------------------------------------------===//
558
636
  class AsOfGlobalSourceState : public GlobalSourceState {
559
637
  public:
560
- explicit AsOfGlobalSourceState(PartitionGlobalSinkState &gsink_p) : gsink(gsink_p), next_bin(0) {
638
+ explicit AsOfGlobalSourceState(AsOfGlobalSinkState &gsink_p)
639
+ : gsink(gsink_p), next_combine(0), combined(0), merged(0), mergers(0), next_left(0), flushed(0), next_right(0) {
561
640
  }
562
641
 
563
- PartitionGlobalSinkState &gsink;
564
- //! The output read position.
565
- atomic<idx_t> next_bin;
566
-
567
- public:
568
- idx_t MaxThreads() override {
569
- // If there is only one partition, we have to process it on one thread.
570
- if (!gsink.grouping_data) {
571
- return 1;
642
+ PartitionGlobalMergeStates &GetMergeStates() {
643
+ lock_guard<mutex> guard(lock);
644
+ if (!merge_states) {
645
+ merge_states = make_uniq<PartitionGlobalMergeStates>(*gsink.lhs_sink);
572
646
  }
647
+ return *merge_states;
648
+ }
573
649
 
574
- // If there is not a lot of data, process serially.
575
- if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
576
- return 1;
577
- }
650
+ AsOfGlobalSinkState &gsink;
651
+ //! The next buffer to combine
652
+ atomic<size_t> next_combine;
653
+ //! The number of combined buffers
654
+ atomic<size_t> combined;
655
+ //! The number of combined buffers
656
+ atomic<size_t> merged;
657
+ //! The number of combined buffers
658
+ atomic<size_t> mergers;
659
+ //! The next buffer to flush
660
+ atomic<size_t> next_left;
661
+ //! The number of flushed buffers
662
+ atomic<size_t> flushed;
663
+ //! The right outer output read position.
664
+ atomic<idx_t> next_right;
665
+ //! The merge handler
666
+ mutex lock;
667
+ unique_ptr<PartitionGlobalMergeStates> merge_states;
578
668
 
579
- return gsink.hash_groups.size();
669
+ public:
670
+ idx_t MaxThreads() override {
671
+ return gsink.lhs_buffers.size();
580
672
  }
581
673
  };
582
674
 
583
675
  unique_ptr<GlobalSourceState> PhysicalAsOfJoin::GetGlobalSourceState(ClientContext &context) const {
584
676
  auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
585
- return make_uniq<AsOfGlobalSourceState>(gsink.global_partition);
677
+ return make_uniq<AsOfGlobalSourceState>(gsink);
586
678
  }
587
679
 
588
680
  class AsOfLocalSourceState : public LocalSourceState {
589
681
  public:
590
682
  using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
591
683
 
592
- explicit AsOfLocalSourceState(AsOfGlobalSinkState &gstate_p);
684
+ AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op);
685
+
686
+ void CombineLeftPartitions();
687
+ void MergeLeftPartitions();
593
688
 
594
- idx_t GeneratePartition(const idx_t hash_bin);
689
+ idx_t BeginRightScan(const idx_t hash_bin);
595
690
 
596
- AsOfGlobalSinkState &gstate;
691
+ AsOfGlobalSourceState &gsource;
692
+
693
+ //! The left side partition being probed
694
+ AsOfProbeBuffer probe_buffer;
597
695
 
598
696
  //! The read partition
599
697
  idx_t hash_bin;
600
698
  HashGroupPtr hash_group;
601
-
602
699
  //! The read cursor
603
700
  unique_ptr<PayloadScanner> scanner;
604
- //! Buffer for the inputs
605
- DataChunk input_chunk;
606
701
  //! Pointer to the matches
607
- const bool *found_match;
702
+ const bool *found_match = {};
608
703
  };
609
704
 
610
- AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSinkState &gstate_p) : gstate(gstate_p) {
611
- input_chunk.Initialize(gstate.global_partition.allocator, gstate.global_partition.payload_types);
705
+ AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op)
706
+ : gsource(gsource), probe_buffer(gsource.gsink.lhs_sink->context, op) {
707
+ gsource.mergers++;
708
+ }
709
+
710
+ void AsOfLocalSourceState::CombineLeftPartitions() {
711
+ const auto buffer_count = gsource.gsink.lhs_buffers.size();
712
+ while (gsource.combined < buffer_count) {
713
+ const auto next_combine = gsource.next_combine++;
714
+ if (next_combine < buffer_count) {
715
+ gsource.gsink.lhs_buffers[next_combine]->Combine();
716
+ ++gsource.combined;
717
+ } else {
718
+ std::this_thread::yield();
719
+ }
720
+ }
721
+ }
722
+
723
+ void AsOfLocalSourceState::MergeLeftPartitions() {
724
+ PartitionGlobalMergeStates::Callback local_callback;
725
+ PartitionLocalMergeState local_merge(*gsource.gsink.lhs_sink);
726
+ gsource.GetMergeStates().ExecuteTask(local_merge, local_callback);
727
+ gsource.merged++;
728
+ while (gsource.merged < gsource.mergers) {
729
+ std::this_thread::yield();
730
+ }
612
731
  }
613
732
 
614
- idx_t AsOfLocalSourceState::GeneratePartition(const idx_t hash_bin_p) {
615
- // Get rid of any stale data
733
+ idx_t AsOfLocalSourceState::BeginRightScan(const idx_t hash_bin_p) {
616
734
  hash_bin = hash_bin_p;
617
735
 
618
- hash_group = std::move(gstate.global_partition.hash_groups[hash_bin]);
736
+ hash_group = std::move(gsource.gsink.rhs_sink.hash_groups[hash_bin]);
619
737
  scanner = make_uniq<PayloadScanner>(*hash_group->global_sort);
620
- found_match = gstate.right_outers[hash_bin].GetMatches();
738
+ found_match = gsource.gsink.right_outers[hash_bin].GetMatches();
621
739
 
622
740
  return scanner->Remaining();
623
741
  }
624
742
 
625
743
  unique_ptr<LocalSourceState> PhysicalAsOfJoin::GetLocalSourceState(ExecutionContext &context,
626
744
  GlobalSourceState &gstate) const {
627
- auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
628
- return make_uniq<AsOfLocalSourceState>(gsink);
745
+ auto &gsource = gstate.Cast<AsOfGlobalSourceState>();
746
+ return make_uniq<AsOfLocalSourceState>(gsource, *this);
629
747
  }
630
748
 
631
749
  SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk &chunk,
632
750
  OperatorSourceInput &input) const {
633
- D_ASSERT(IsRightOuterJoin(join_type));
634
-
635
751
  auto &gsource = input.global_state.Cast<AsOfGlobalSourceState>();
636
752
  auto &lsource = input.local_state.Cast<AsOfLocalSourceState>();
637
- auto &gsink = gsource.gsink;
753
+ auto &rhs_sink = gsource.gsink.rhs_sink;
754
+
755
+ // Step 1: Combine the partitions
756
+ lsource.CombineLeftPartitions();
757
+
758
+ // Step 2: Sort on all threads
759
+ lsource.MergeLeftPartitions();
760
+
761
+ // Step 3: Join the partitions
762
+ auto &lhs_sink = *gsource.gsink.lhs_sink;
763
+ auto &partitions = lhs_sink.grouping_data->GetPartitions();
764
+ const auto left_bins = partitions.size();
765
+ while (gsource.flushed < left_bins) {
766
+ // Make sure we have something to flush
767
+ if (!lsource.probe_buffer.Scanning()) {
768
+ const auto left_bin = gsource.next_left++;
769
+ if (left_bin < left_bins) {
770
+ // More to flush
771
+ lsource.probe_buffer.BeginLeftScan(left_bin);
772
+ } else if (!IsRightOuterJoin(join_type)) {
773
+ return SourceResultType::FINISHED;
774
+ } else {
775
+ // Wait for all threads to finish
776
+ // TODO: How to implement a spin wait correctly?
777
+ // Returning BLOCKED seems to hang the system.
778
+ std::this_thread::yield();
779
+ continue;
780
+ }
781
+ }
782
+
783
+ lsource.probe_buffer.GetData(context, chunk);
784
+ if (chunk.size()) {
785
+ return SourceResultType::HAVE_MORE_OUTPUT;
786
+ } else if (lsource.probe_buffer.HasMoreData()) {
787
+ // Join the next partition
788
+ continue;
789
+ } else {
790
+ lsource.probe_buffer.EndScan();
791
+ gsource.flushed++;
792
+ }
793
+ }
794
+
795
+ // Step 4: Emit right join matches
796
+ if (!IsRightOuterJoin(join_type)) {
797
+ return SourceResultType::FINISHED;
798
+ }
638
799
 
639
- auto &hash_groups = gsink.hash_groups;
640
- const auto bin_count = hash_groups.size();
800
+ auto &hash_groups = rhs_sink.hash_groups;
801
+ const auto right_groups = hash_groups.size();
641
802
 
642
803
  DataChunk rhs_chunk;
643
- rhs_chunk.Initialize(Allocator::Get(context.client), gsink.payload_types);
804
+ rhs_chunk.Initialize(Allocator::Get(context.client), rhs_sink.payload_types);
644
805
  SelectionVector rsel(STANDARD_VECTOR_SIZE);
645
806
 
646
807
  while (chunk.size() == 0) {
@@ -648,17 +809,17 @@ SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk
648
809
  while (!lsource.scanner || !lsource.scanner->Remaining()) {
649
810
  lsource.scanner.reset();
650
811
  lsource.hash_group.reset();
651
- auto hash_bin = gsource.next_bin++;
652
- if (hash_bin >= bin_count) {
812
+ auto hash_bin = gsource.next_right++;
813
+ if (hash_bin >= right_groups) {
653
814
  return SourceResultType::FINISHED;
654
815
  }
655
816
 
656
- for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_bin++) {
817
+ for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_right++) {
657
818
  if (hash_groups[hash_bin]) {
658
819
  break;
659
820
  }
660
821
  }
661
- lsource.GeneratePartition(hash_bin);
822
+ lsource.BeginRightScan(hash_bin);
662
823
  }
663
824
  const auto rhs_position = lsource.scanner->Scanned();
664
825
  lsource.scanner->Scan(rhs_chunk);