duckdb 0.8.2-dev145.0 → 0.8.2-dev1493.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (476) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  24. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  26. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  27. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  28. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  29. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  30. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  31. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  32. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  33. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  34. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  36. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  38. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  39. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  40. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  41. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  42. package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
  43. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  44. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  45. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  46. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  47. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  49. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  50. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  51. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  52. package/src/duckdb/src/common/allocator.cpp +14 -2
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  54. package/src/duckdb/src/common/assert.cpp +3 -0
  55. package/src/duckdb/src/common/enum_util.cpp +42 -5
  56. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  57. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  58. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/exception.cpp +2 -2
  60. package/src/duckdb/src/common/file_system.cpp +19 -0
  61. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  62. package/src/duckdb/src/common/local_file_system.cpp +2 -2
  63. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  64. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  65. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  66. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  67. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  68. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  69. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  70. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  71. package/src/duckdb/src/common/types/bit.cpp +51 -0
  72. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  73. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  74. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  75. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  76. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  77. package/src/duckdb/src/common/types/date.cpp +9 -0
  78. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  79. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  80. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  81. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  82. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  83. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  84. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  85. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  86. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  87. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  88. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  92. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  93. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  94. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  95. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  96. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  97. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  98. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  99. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  100. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  102. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  103. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  104. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  105. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  106. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  107. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  108. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  109. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  110. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  111. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  112. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  113. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  114. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  115. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  116. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
  117. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  118. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  119. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
  121. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -2
  122. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  123. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  124. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  125. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  126. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  127. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  128. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  129. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  130. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  131. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  132. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  133. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  134. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  135. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  136. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  137. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  138. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  139. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  140. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  141. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  142. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  143. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  144. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  145. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  146. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  147. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  148. package/src/duckdb/src/function/function.cpp +3 -1
  149. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  150. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  151. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  152. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  153. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  154. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  155. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  156. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  157. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  158. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  159. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  160. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  161. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  162. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  163. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  164. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  165. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  166. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  167. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  168. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  169. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  170. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  171. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  172. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  173. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  174. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  175. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  176. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  177. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  178. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  179. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  180. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  181. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  182. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  183. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  184. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  185. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  186. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  187. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  188. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  189. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  190. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  191. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  192. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  193. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  194. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  195. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  196. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  197. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  198. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  199. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  200. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  201. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  202. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  203. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  204. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  205. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  206. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  207. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  208. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  209. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  210. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  211. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  212. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  213. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  214. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  215. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  218. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  219. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  220. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  221. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  222. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  223. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  224. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  225. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  226. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  227. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  228. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  229. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  230. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  231. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  232. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  233. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  234. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  235. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  236. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  237. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  238. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  239. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  240. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  241. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  242. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  243. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  244. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  245. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  246. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  247. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  248. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  249. package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
  250. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  251. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  252. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  253. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  254. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  255. package/src/duckdb/src/include/duckdb/main/settings.hpp +30 -1
  256. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  257. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  258. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  259. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  260. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  261. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  262. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  263. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  264. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  265. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  266. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  267. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  268. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  269. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  270. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  271. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  272. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  273. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  274. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  275. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  276. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  277. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  278. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  279. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  280. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  281. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  282. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  283. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  284. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  285. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  286. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  287. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  288. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  289. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  291. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  292. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
  293. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  294. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  295. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  296. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  297. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  298. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  299. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  300. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  301. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  302. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  303. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  304. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  305. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  306. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  307. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  308. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  309. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  310. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  311. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  312. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  313. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  314. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  315. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  316. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  317. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  318. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  319. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  320. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  321. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  322. package/src/duckdb/src/include/duckdb.h +28 -0
  323. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  324. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  325. package/src/duckdb/src/main/config.cpp +3 -0
  326. package/src/duckdb/src/main/database.cpp +1 -1
  327. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  328. package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
  329. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  330. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  331. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  332. package/src/duckdb/src/main/relation.cpp +6 -5
  333. package/src/duckdb/src/main/settings/settings.cpp +64 -18
  334. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  335. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  336. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  337. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  338. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  339. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  340. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  341. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  342. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  343. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  344. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  345. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  346. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  347. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  348. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  349. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  350. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  351. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  352. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  353. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  354. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  355. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  356. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  357. package/src/duckdb/src/parallel/executor.cpp +15 -0
  358. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  359. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  360. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  361. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  362. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  363. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  364. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  365. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  366. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  367. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  368. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  369. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  370. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  371. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  372. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  373. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  374. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  375. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  376. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  377. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  378. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  379. package/src/duckdb/src/parser/parser.cpp +8 -2
  380. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  381. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  382. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  383. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  384. package/src/duckdb/src/parser/query_node.cpp +15 -37
  385. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  386. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  387. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  388. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  389. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  390. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  391. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  392. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  393. package/src/duckdb/src/parser/tableref.cpp +0 -44
  394. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  395. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  396. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  397. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  398. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  399. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  400. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  401. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  402. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  403. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  404. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  405. package/src/duckdb/src/parser/transformer.cpp +15 -0
  406. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  407. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  408. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  409. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  410. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  411. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  412. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  413. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  414. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  415. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  416. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  417. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  418. package/src/duckdb/src/planner/binder.cpp +5 -0
  419. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  420. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  421. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  422. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  423. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  424. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  425. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  426. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  427. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  428. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  429. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  430. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  431. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  432. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  433. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  434. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  435. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  436. package/src/duckdb/src/storage/data_table.cpp +1 -1
  437. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  438. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  439. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  440. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  441. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  442. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  443. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  444. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  445. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  446. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  447. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  448. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  449. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  450. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  451. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  452. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  453. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  454. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  455. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  456. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  457. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  458. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  459. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  460. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  461. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  462. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  463. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  464. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  465. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  466. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  467. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  468. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  469. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  470. package/src/duckdb/ub_src_storage_serialization.cpp +8 -0
  471. package/src/statement.cpp +10 -3
  472. package/test/test_all_types.test.ts +233 -0
  473. package/tsconfig.json +1 -0
  474. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  475. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  476. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -32,7 +32,7 @@ string PhysicalComparisonJoin::ParamsToString() const {
32
32
  }
33
33
  extra_info += "\n[INFOSEPARATOR]\n";
34
34
  extra_info += StringUtil::Format("EC: %llu\n", estimated_props->GetCardinality<idx_t>());
35
- extra_info += StringUtil::Format("Cost: %llu", (idx_t)estimated_props->GetCost());
35
+ extra_info += StringUtil::Format("Cost: %llu", estimated_props->GetCost<idx_t>());
36
36
  return extra_info;
37
37
  }
38
38
 
@@ -96,7 +96,7 @@ public:
96
96
  class HashJoinLocalSinkState : public LocalSinkState {
97
97
  public:
98
98
  HashJoinLocalSinkState(const PhysicalHashJoin &op, ClientContext &context) : build_executor(context) {
99
- auto &allocator = Allocator::Get(context);
99
+ auto &allocator = BufferAllocator::Get(context);
100
100
  if (!op.right_projection_map.empty()) {
101
101
  build_chunk.Initialize(allocator, op.build_types);
102
102
  }
@@ -124,7 +124,7 @@ public:
124
124
  unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &context) const {
125
125
  auto result =
126
126
  make_uniq<JoinHashTable>(BufferManager::GetBufferManager(context), conditions, build_types, join_type);
127
- result->max_ht_size = double(BufferManager::GetBufferManager(context).GetMaxMemory()) * 0.6;
127
+ result->max_ht_size = double(0.6) * BufferManager::GetBufferManager(context).GetMaxMemory();
128
128
  if (!delim_types.empty() && join_type == JoinType::MARK) {
129
129
  // correlated MARK join
130
130
  if (delim_types.size() + 1 == conditions.size()) {
@@ -162,7 +162,7 @@ unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &c
162
162
  payload_types.push_back(aggr->return_type);
163
163
  info.correlated_aggregates.push_back(std::move(aggr));
164
164
 
165
- auto &allocator = Allocator::Get(context);
165
+ auto &allocator = BufferAllocator::Get(context);
166
166
  info.correlated_counts = make_uniq<GroupedAggregateHashTable>(context, allocator, delim_types,
167
167
  payload_types, correlated_aggregates);
168
168
  info.correlated_types = delim_types;
@@ -312,10 +312,10 @@ void HashJoinGlobalSinkState::InitializeProbeSpill() {
312
312
  }
313
313
  }
314
314
 
315
- class HashJoinPartitionTask : public ExecutorTask {
315
+ class HashJoinRepartitionTask : public ExecutorTask {
316
316
  public:
317
- HashJoinPartitionTask(shared_ptr<Event> event_p, ClientContext &context, JoinHashTable &global_ht,
318
- JoinHashTable &local_ht)
317
+ HashJoinRepartitionTask(shared_ptr<Event> event_p, ClientContext &context, JoinHashTable &global_ht,
318
+ JoinHashTable &local_ht)
319
319
  : ExecutorTask(context), event(std::move(event_p)), global_ht(global_ht), local_ht(local_ht) {
320
320
  }
321
321
 
@@ -349,7 +349,7 @@ public:
349
349
  partition_tasks.reserve(local_hts.size());
350
350
  for (auto &local_ht : local_hts) {
351
351
  partition_tasks.push_back(
352
- make_uniq<HashJoinPartitionTask>(shared_from_this(), context, *sink.hash_table, *local_ht));
352
+ make_uniq<HashJoinRepartitionTask>(shared_from_this(), context, *sink.hash_table, *local_ht));
353
353
  }
354
354
  SetTasks(std::move(partition_tasks));
355
355
  }
@@ -434,7 +434,7 @@ public:
434
434
  };
435
435
 
436
436
  unique_ptr<OperatorState> PhysicalHashJoin::GetOperatorState(ExecutionContext &context) const {
437
- auto &allocator = Allocator::Get(context.client);
437
+ auto &allocator = BufferAllocator::Get(context.client);
438
438
  auto &sink = sink_state->Cast<HashJoinGlobalSinkState>();
439
439
  auto state = make_uniq<HashJoinOperatorState>(context.client);
440
440
  if (sink.perfect_join_executor) {
@@ -532,7 +532,18 @@ public:
532
532
  bool AssignTask(HashJoinGlobalSinkState &sink, HashJoinLocalSourceState &lstate);
533
533
 
534
534
  idx_t MaxThreads() override {
535
- return probe_count / ((idx_t)STANDARD_VECTOR_SIZE * parallel_scan_chunk_count);
535
+ D_ASSERT(op.sink_state);
536
+ auto &gstate = op.sink_state->Cast<HashJoinGlobalSinkState>();
537
+
538
+ idx_t count;
539
+ if (gstate.probe_spill) {
540
+ count = probe_count;
541
+ } else if (IsRightOuterJoin(op.join_type)) {
542
+ count = gstate.hash_table->Count();
543
+ } else {
544
+ return 0;
545
+ }
546
+ return count / ((idx_t)STANDARD_VECTOR_SIZE * parallel_scan_chunk_count);
536
547
  }
537
548
 
538
549
  public:
@@ -611,7 +622,7 @@ unique_ptr<GlobalSourceState> PhysicalHashJoin::GetGlobalSourceState(ClientConte
611
622
 
612
623
  unique_ptr<LocalSourceState> PhysicalHashJoin::GetLocalSourceState(ExecutionContext &context,
613
624
  GlobalSourceState &gstate) const {
614
- return make_uniq<HashJoinLocalSourceState>(*this, Allocator::Get(context.client));
625
+ return make_uniq<HashJoinLocalSourceState>(*this, BufferAllocator::Get(context.client));
615
626
  }
616
627
 
617
628
  HashJoinGlobalSourceState::HashJoinGlobalSourceState(const PhysicalHashJoin &op, ClientContext &context)
@@ -60,7 +60,7 @@ void PhysicalJoin::BuildJoinPipelines(Pipeline &current, MetaPipeline &meta_pipe
60
60
  // Join can become a source operator if it's RIGHT/OUTER, or if the hash join goes out-of-core
61
61
  bool add_child_pipeline = false;
62
62
  auto &join_op = op.Cast<PhysicalJoin>();
63
- if (IsRightOuterJoin(join_op.join_type) || join_op.type == PhysicalOperatorType::HASH_JOIN) {
63
+ if (join_op.IsSource()) {
64
64
  add_child_pipeline = true;
65
65
  }
66
66
 
@@ -208,6 +208,7 @@ public:
208
208
  idx_t right_position;
209
209
  idx_t right_chunk_index;
210
210
  idx_t right_base;
211
+ idx_t prev_left_index;
211
212
 
212
213
  // Secondary predicate shared data
213
214
  SelectionVector sel;
@@ -431,7 +432,8 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da
431
432
  }
432
433
  }
433
434
 
434
- static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const ExpressionType comparison) {
435
+ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const ExpressionType comparison,
436
+ idx_t &prev_left_index) {
435
437
  const auto cmp = MergeJoinComparisonValue(comparison);
436
438
 
437
439
  // The sort parameters should all be the same
@@ -465,6 +467,20 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
465
467
 
466
468
  idx_t result_count = 0;
467
469
  while (true) {
470
+ if (l.entry_idx < prev_left_index) {
471
+ // left side smaller: found match
472
+ l.result.set_index(result_count, sel_t(l.entry_idx));
473
+ r.result.set_index(result_count, sel_t(r.entry_idx));
474
+ result_count++;
475
+ // move left side forward
476
+ l.entry_idx++;
477
+ l_ptr += entry_size;
478
+ if (result_count == STANDARD_VECTOR_SIZE) {
479
+ // out of space!
480
+ break;
481
+ }
482
+ continue;
483
+ }
468
484
  if (l.entry_idx < l.not_null) {
469
485
  int comp_res;
470
486
  if (all_constant) {
@@ -474,7 +490,6 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
474
490
  rread.entry_idx = r.entry_idx;
475
491
  comp_res = Comparators::CompareTuple(lread, rread, l_ptr, r_ptr, l.state.sort_layout, external);
476
492
  }
477
-
478
493
  if (comp_res <= cmp) {
479
494
  // left side smaller: found match
480
495
  l.result.set_index(result_count, sel_t(l.entry_idx));
@@ -490,6 +505,8 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
490
505
  continue;
491
506
  }
492
507
  }
508
+
509
+ prev_left_index = l.entry_idx;
493
510
  // right side smaller or equal, or left side exhausted: move
494
511
  // right pointer forward reset left side to start
495
512
  r.entry_idx++;
@@ -521,6 +538,7 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
521
538
  state.right_chunk_index = 0;
522
539
  state.right_base = 0;
523
540
  state.left_position = 0;
541
+ state.prev_left_index = 0;
524
542
  state.right_position = 0;
525
543
  state.first_fetch = false;
526
544
  state.finished = false;
@@ -547,7 +565,8 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
547
565
  BlockMergeInfo right_info(gstate.table->global_sort_state, state.right_chunk_index, state.right_position,
548
566
  rhs_not_null);
549
567
 
550
- idx_t result_count = MergeJoinComplexBlocks(left_info, right_info, conditions[0].comparison);
568
+ idx_t result_count =
569
+ MergeJoinComplexBlocks(left_info, right_info, conditions[0].comparison, state.prev_left_index);
551
570
  if (result_count == 0) {
552
571
  // exhausted this chunk on the right side
553
572
  // move to the next right chunk
@@ -167,8 +167,8 @@ PhysicalRangeJoin::PhysicalRangeJoin(LogicalOperator &op, PhysicalOperatorType t
167
167
  // TODO: use stats to improve the choice?
168
168
  // TODO: Prefer fixed length types?
169
169
  if (conditions.size() > 1) {
170
- auto conditions_p = std::move(conditions);
171
- conditions.resize(conditions_p.size());
170
+ vector<JoinCondition> conditions_p(conditions.size());
171
+ std::swap(conditions_p, conditions);
172
172
  idx_t range_position = 0;
173
173
  idx_t other_position = conditions_p.size();
174
174
  for (idx_t i = 0; i < conditions_p.size(); ++i) {
@@ -1,5 +1,4 @@
1
1
  #include "duckdb/execution/operator/persistent/base_csv_reader.hpp"
2
-
3
2
  #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
4
3
  #include "duckdb/common/file_system.hpp"
5
4
  #include "duckdb/common/string_util.hpp"
@@ -10,6 +9,7 @@
10
9
  #include "duckdb/common/vector_operations/unary_executor.hpp"
11
10
  #include "duckdb/common/vector_operations/vector_operations.hpp"
12
11
  #include "duckdb/function/scalar/strftime_format.hpp"
12
+ #include "duckdb/main/appender.hpp"
13
13
  #include "duckdb/main/database.hpp"
14
14
  #include "duckdb/parser/column_definition.hpp"
15
15
  #include "duckdb/storage/data_table.hpp"
@@ -18,7 +18,8 @@
18
18
  #include "duckdb/parser/keyword_helper.hpp"
19
19
  #include "duckdb/main/error_manager.hpp"
20
20
  #include "duckdb/execution/operator/persistent/parallel_csv_reader.hpp"
21
-
21
+ #include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
22
+ #include "duckdb/main/client_data.hpp"
22
23
  #include <algorithm>
23
24
  #include <cctype>
24
25
  #include <cstring>
@@ -448,6 +449,17 @@ bool TryCastFloatingVectorCommaSeparated(BufferedCSVReaderOptions &options, Vect
448
449
  }
449
450
  }
450
451
 
452
+ // Location of erroneous value in the current parse chunk
453
+ struct ErrorLocation {
454
+ idx_t row_idx;
455
+ idx_t col_idx;
456
+ idx_t row_line;
457
+
458
+ ErrorLocation(idx_t row_idx, idx_t col_idx, idx_t row_line)
459
+ : row_idx(row_idx), col_idx(col_idx), row_line(row_line) {
460
+ }
461
+ };
462
+
451
463
  bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_add_line) {
452
464
  if (parse_chunk.size() == 0) {
453
465
  return true;
@@ -506,10 +518,7 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
506
518
  if (try_add_line) {
507
519
  return false;
508
520
  }
509
- if (options.ignore_errors) {
510
- conversion_error_ignored = true;
511
- continue;
512
- }
521
+
513
522
  string col_name = to_string(col_idx);
514
523
  if (col_idx < names.size()) {
515
524
  col_name = "\"" + names[col_idx] + "\"";
@@ -527,16 +536,18 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
527
536
  }
528
537
  }
529
538
 
530
- idx_t error_line;
531
539
  // The line_error must be summed with linenr (All lines emmited from this batch)
532
540
  // But subtracted from the parse_chunk
533
541
  D_ASSERT(line_error + linenr >= parse_chunk.size());
534
542
  line_error += linenr;
535
543
  line_error -= parse_chunk.size();
536
544
 
537
- error_line = GetLineError(line_error, buffer_idx);
545
+ auto error_line = GetLineError(line_error, buffer_idx);
546
+
547
+ if (options.ignore_errors) {
548
+ conversion_error_ignored = true;
538
549
 
539
- if (options.auto_detect) {
550
+ } else if (options.auto_detect) {
540
551
  throw InvalidInputException("%s in column %s, at line %llu.\n\nParser "
541
552
  "options:\n%s.\n\nConsider either increasing the sample size "
542
553
  "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
@@ -550,11 +561,19 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
550
561
  }
551
562
  if (conversion_error_ignored) {
552
563
  D_ASSERT(options.ignore_errors);
564
+
553
565
  SelectionVector succesful_rows(parse_chunk.size());
554
566
  idx_t sel_size = 0;
555
567
 
568
+ // Keep track of failed cells
569
+ vector<ErrorLocation> failed_cells;
570
+
556
571
  for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
557
- bool failed = false;
572
+
573
+ auto global_row_idx = row_idx + linenr - parse_chunk.size();
574
+ auto row_line = GetLineError(global_row_idx, buffer_idx, false);
575
+
576
+ bool row_failed = false;
558
577
  for (idx_t c = 0; c < reader_data.column_ids.size(); c++) {
559
578
  auto col_idx = reader_data.column_ids[c];
560
579
  auto result_idx = reader_data.column_mapping[c];
@@ -564,14 +583,82 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
564
583
 
565
584
  bool was_already_null = FlatVector::IsNull(parse_vector, row_idx);
566
585
  if (!was_already_null && FlatVector::IsNull(result_vector, row_idx)) {
567
- failed = true;
568
- break;
586
+ row_failed = true;
587
+ failed_cells.emplace_back(row_idx, col_idx, row_line);
569
588
  }
570
589
  }
571
- if (!failed) {
590
+ if (!row_failed) {
572
591
  succesful_rows.set_index(sel_size++, row_idx);
573
592
  }
574
593
  }
594
+
595
+ // Now do a second pass to produce the reject table entries
596
+ if (!failed_cells.empty() && !options.rejects_table_name.empty()) {
597
+ auto limit = options.rejects_limit;
598
+
599
+ auto rejects = CSVRejectsTable::GetOrCreate(context, options.rejects_table_name);
600
+ lock_guard<mutex> lock(rejects->write_lock);
601
+
602
+ // short circuit if we already have too many rejects
603
+ if (limit == 0 || rejects->count < limit) {
604
+ auto &table = rejects->GetTable(context);
605
+ InternalAppender appender(context, table);
606
+ auto file_name = GetFileName();
607
+
608
+ for (auto &cell : failed_cells) {
609
+ if (limit != 0 && rejects->count >= limit) {
610
+ break;
611
+ }
612
+ rejects->count++;
613
+
614
+ auto row_idx = cell.row_idx;
615
+ auto col_idx = cell.col_idx;
616
+ auto row_line = cell.row_line;
617
+
618
+ auto col_name = to_string(col_idx);
619
+ if (col_idx < names.size()) {
620
+ col_name = "\"" + names[col_idx] + "\"";
621
+ }
622
+
623
+ auto &parse_vector = parse_chunk.data[col_idx];
624
+ auto parsed_str = FlatVector::GetData<string_t>(parse_vector)[row_idx];
625
+ auto &type = insert_chunk.data[col_idx].GetType();
626
+ auto row_error_msg = StringUtil::Format("Could not convert string '%s' to '%s'",
627
+ parsed_str.GetString(), type.ToString());
628
+
629
+ // Add the row to the rejects table
630
+ appender.BeginRow();
631
+ appender.Append(string_t(file_name));
632
+ appender.Append(row_line);
633
+ appender.Append(col_idx);
634
+ appender.Append(string_t(col_name));
635
+ appender.Append(parsed_str);
636
+
637
+ if (!options.rejects_recovery_columns.empty()) {
638
+ child_list_t<Value> recovery_key;
639
+ for (auto &key_idx : options.rejects_recovery_column_ids) {
640
+ // Figure out if the recovery key is valid.
641
+ // If not, error out for real.
642
+ auto &component_vector = parse_chunk.data[key_idx];
643
+ if (FlatVector::IsNull(component_vector, row_idx)) {
644
+ throw InvalidInputException("%s at line %llu in column %s. Parser options:\n%s ",
645
+ "Could not parse recovery column", row_line, col_name,
646
+ options.ToString());
647
+ }
648
+ auto component = Value(FlatVector::GetData<string_t>(component_vector)[row_idx]);
649
+ recovery_key.emplace_back(names[key_idx], component);
650
+ }
651
+ appender.Append(Value::STRUCT(recovery_key));
652
+ }
653
+
654
+ appender.Append(string_t(row_error_msg));
655
+ appender.EndRow();
656
+ }
657
+ appender.Close();
658
+ }
659
+ }
660
+
661
+ // Now slice the insert chunk to only include the succesful rows
575
662
  insert_chunk.Slice(succesful_rows, sel_size);
576
663
  }
577
664
  parse_chunk.Reset();
@@ -13,7 +13,7 @@ CSVFileHandle::CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<Fi
13
13
 
14
14
  unique_ptr<FileHandle> CSVFileHandle::OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
15
15
  FileCompressionType compression) {
16
- auto file_handle = fs.OpenFile(path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
16
+ auto file_handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
17
17
  if (file_handle->CanSeek()) {
18
18
  file_handle->Reset();
19
19
  }
@@ -179,6 +179,26 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
179
179
  allow_quoted_nulls = ParseBoolean(value, loption);
180
180
  } else if (loption == "parallel") {
181
181
  parallel_mode = ParseBoolean(value, loption) ? ParallelMode::PARALLEL : ParallelMode::SINGLE_THREADED;
182
+ } else if (loption == "rejects_table") {
183
+ // skip, handled in SetRejectsOptions
184
+ auto table_name = ParseString(value, loption);
185
+ if (table_name.empty()) {
186
+ throw BinderException("REJECTS_TABLE option cannot be empty");
187
+ }
188
+ rejects_table_name = table_name;
189
+ } else if (loption == "rejects_recovery_columns") {
190
+ // Get the list of columns to use as a recovery key
191
+ auto &children = ListValue::GetChildren(value);
192
+ for (auto &child : children) {
193
+ auto col_name = child.GetValue<string>();
194
+ rejects_recovery_columns.push_back(col_name);
195
+ }
196
+ } else if (loption == "rejects_limit") {
197
+ int64_t limit = ParseInteger(value, loption);
198
+ if (limit < 0) {
199
+ throw BinderException("Unsupported parameter for REJECTS_LIMIT: cannot be negative");
200
+ }
201
+ rejects_limit = limit;
182
202
  } else {
183
203
  throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
184
204
  }
@@ -0,0 +1,48 @@
1
+ #include "duckdb/main/appender.hpp"
2
+ #include "duckdb/parser/parsed_data/create_table_info.hpp"
3
+ #include "duckdb/function/table/read_csv.hpp"
4
+ #include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
5
+ #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ TableCatalogEntry &CSVRejectsTable::GetTable(ClientContext &context) {
10
+ auto &temp_catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
11
+ auto &table_entry = temp_catalog.GetEntry<TableCatalogEntry>(context, TEMP_CATALOG, DEFAULT_SCHEMA, name);
12
+ return table_entry;
13
+ }
14
+
15
+ shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context, const string &name) {
16
+ auto key = "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(name);
17
+ auto &cache = ObjectCache::GetObjectCache(context);
18
+ return cache.GetOrCreate<CSVRejectsTable>(key, name);
19
+ }
20
+
21
+ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData &data) {
22
+ // (Re)Create the temporary rejects table
23
+ auto &catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
24
+ auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, name);
25
+ info->temporary = true;
26
+ info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
27
+ info->columns.AddColumn(ColumnDefinition("file", LogicalType::VARCHAR));
28
+ info->columns.AddColumn(ColumnDefinition("line", LogicalType::BIGINT));
29
+ info->columns.AddColumn(ColumnDefinition("column", LogicalType::BIGINT));
30
+ info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
31
+ info->columns.AddColumn(ColumnDefinition("parsed_value", LogicalType::VARCHAR));
32
+
33
+ if (!data.options.rejects_recovery_columns.empty()) {
34
+ child_list_t<LogicalType> recovery_key_components;
35
+ for (auto &col_name : data.options.rejects_recovery_columns) {
36
+ recovery_key_components.emplace_back(col_name, LogicalType::VARCHAR);
37
+ }
38
+ info->columns.AddColumn(ColumnDefinition("recovery_columns", LogicalType::STRUCT(recovery_key_components)));
39
+ }
40
+
41
+ info->columns.AddColumn(ColumnDefinition("error", LogicalType::VARCHAR));
42
+
43
+ catalog.CreateTable(context, std::move(info));
44
+
45
+ count = 0;
46
+ }
47
+
48
+ } // namespace duckdb
@@ -635,12 +635,11 @@ void ParallelCSVReader::ParseCSV(DataChunk &insert_chunk) {
635
635
  }
636
636
  }
637
637
 
638
- idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx) {
638
+ idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx, bool stop_at_first) {
639
639
  while (true) {
640
640
  if (buffer->line_info->CanItGetLine(file_idx, buffer_idx)) {
641
641
  auto cur_start = verification_positions.beginning_of_first_line + buffer->buffer->GetCSVGlobalStart();
642
- // line errors are 1-indexed
643
- return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false);
642
+ return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false, stop_at_first);
644
643
  }
645
644
  }
646
645
  }
@@ -1,9 +1,11 @@
1
1
  #include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
2
+
3
+ #include "duckdb/common/allocator.hpp"
4
+ #include "duckdb/common/types/batched_data_collection.hpp"
5
+ #include "duckdb/common/vector_operations/vector_operations.hpp"
2
6
  #include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
3
7
  #include "duckdb/parallel/base_pipeline_event.hpp"
4
- #include "duckdb/common/vector_operations/vector_operations.hpp"
5
- #include "duckdb/common/types/batched_data_collection.hpp"
6
- #include "duckdb/common/allocator.hpp"
8
+
7
9
  #include <algorithm>
8
10
 
9
11
  namespace duckdb {
@@ -67,7 +69,7 @@ public:
67
69
  optional_idx batch_index;
68
70
 
69
71
  void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
70
- collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), op.children[0]->types);
72
+ collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
71
73
  collection->InitializeAppend(append_state);
72
74
  }
73
75
  };
@@ -116,7 +116,7 @@ public:
116
116
  optional_idx batch_index;
117
117
 
118
118
  void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
119
- collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), op.children[0]->types);
119
+ collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
120
120
  collection->InitializeAppend(append_state);
121
121
  }
122
122
  };
@@ -353,7 +353,7 @@ void PhysicalFixedBatchCopy::RepartitionBatches(ClientContext &context, GlobalSi
353
353
  } else {
354
354
  // the collection is too large for a batch - we need to repartition
355
355
  // create an empty collection
356
- current_collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), children[0]->types);
356
+ current_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
357
357
  }
358
358
  if (current_collection) {
359
359
  current_collection->InitializeAppend(append_state);
@@ -373,7 +373,7 @@ void PhysicalFixedBatchCopy::RepartitionBatches(ClientContext &context, GlobalSi
373
373
  }
374
374
  // the collection is full - move it to the result and create a new one
375
375
  gstate.AddTask(make_uniq<PrepareBatchTask>(gstate.scheduled_batch_index++, std::move(current_collection)));
376
- current_collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), children[0]->types);
376
+ current_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
377
377
  current_collection->InitializeAppend(append_state);
378
378
  }
379
379
  }
@@ -41,7 +41,7 @@ PhysicalInsert::PhysicalInsert(vector<LogicalType> types_p, TableCatalogEntry &t
41
41
  return;
42
42
  }
43
43
 
44
- D_ASSERT(set_expressions.size() == set_columns.size());
44
+ D_ASSERT(this->set_expressions.size() == this->set_columns.size());
45
45
 
46
46
  // One or more columns are referenced from the existing table,
47
47
  // we use the 'insert_types' to figure out which types these columns have
@@ -16,6 +16,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
16
16
  pivot_map[bound_pivot.pivot_values[p]] = bound_pivot.group_count + p;
17
17
  }
18
18
  // extract the empty aggregate expressions
19
+ ArenaAllocator allocator(Allocator::DefaultAllocator());
19
20
  for (auto &aggr_expr : bound_pivot.aggregates) {
20
21
  auto &aggr = aggr_expr->Cast<BoundAggregateExpression>();
21
22
  // for each aggregate, initialize an empty aggregate state and finalize it immediately
@@ -23,7 +24,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
23
24
  aggr.function.initialize(state.get());
24
25
  Vector state_vector(Value::POINTER(CastPointerToValue(state.get())));
25
26
  Vector result_vector(aggr_expr->return_type);
26
- AggregateInputData aggr_input_data(aggr.bind_info.get(), Allocator::DefaultAllocator());
27
+ AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
27
28
  aggr.function.finalize(state_vector, aggr_input_data, result_vector, 1, 0);
28
29
  empty_aggregates.push_back(result_vector.GetValue(0));
29
30
  }
@@ -64,6 +64,9 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline &current, MetaPipeline &met
64
64
  state.SetPipelineSource(current, delim_join.distinct->Cast<PhysicalOperator>());
65
65
  return;
66
66
  }
67
+ case PhysicalOperatorType::CTE_SCAN: {
68
+ break;
69
+ }
67
70
  case PhysicalOperatorType::RECURSIVE_CTE_SCAN:
68
71
  if (!meta_pipeline.HasRecursiveCTE()) {
69
72
  throw InternalException("Recursive CTE scan found without recursive CTE node");
@@ -76,4 +79,20 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline &current, MetaPipeline &met
76
79
  state.SetPipelineSource(current, *this);
77
80
  }
78
81
 
82
+ string PhysicalColumnDataScan::ParamsToString() const {
83
+ string result = "";
84
+ switch (type) {
85
+ case PhysicalOperatorType::CTE_SCAN:
86
+ case PhysicalOperatorType::RECURSIVE_CTE_SCAN: {
87
+ result += "\n[INFOSEPARATOR]\n";
88
+ result += StringUtil::Format("idx: %llu", cte_index);
89
+ break;
90
+ }
91
+ default:
92
+ break;
93
+ }
94
+
95
+ return result;
96
+ }
97
+
79
98
  } // namespace duckdb
@@ -16,17 +16,18 @@ PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction fu
16
16
  : PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, std::move(types), estimated_cardinality),
17
17
  function(std::move(function_p)), bind_data(std::move(bind_data_p)), column_ids(std::move(column_ids_p)),
18
18
  names(std::move(names_p)), table_filters(std::move(table_filters_p)) {
19
+ extra_info.file_filters = "";
19
20
  }
20
21
 
21
22
  PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction function_p,
22
23
  unique_ptr<FunctionData> bind_data_p, vector<LogicalType> returned_types_p,
23
24
  vector<column_t> column_ids_p, vector<idx_t> projection_ids_p,
24
25
  vector<string> names_p, unique_ptr<TableFilterSet> table_filters_p,
25
- idx_t estimated_cardinality)
26
+ idx_t estimated_cardinality, ExtraOperatorInfo extra_info)
26
27
  : PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, std::move(types), estimated_cardinality),
27
28
  function(std::move(function_p)), bind_data(std::move(bind_data_p)), returned_types(std::move(returned_types_p)),
28
29
  column_ids(std::move(column_ids_p)), projection_ids(std::move(projection_ids_p)), names(std::move(names_p)),
29
- table_filters(std::move(table_filters_p)) {
30
+ table_filters(std::move(table_filters_p)), extra_info(extra_info) {
30
31
  }
31
32
 
32
33
  class TableScanGlobalSourceState : public GlobalSourceState {
@@ -149,6 +150,10 @@ string PhysicalTableScan::ParamsToString() const {
149
150
  }
150
151
  }
151
152
  }
153
+ if (!extra_info.file_filters.empty()) {
154
+ result += "\n[INFOSEPARATOR]\n";
155
+ result += "File Filters: " + extra_info.file_filters;
156
+ }
152
157
  result += "\n[INFOSEPARATOR]\n";
153
158
  result += StringUtil::Format("EC: %llu", estimated_props->GetCardinality<idx_t>());
154
159
  return result;