duckdb 0.8.2-dev161.0 → 0.8.2-dev1724.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (504) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
  23. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  24. package/src/duckdb/extension/json/json_functions/json_transform.cpp +91 -38
  25. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  26. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  27. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  28. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  29. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  30. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  31. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  32. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  33. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  34. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  36. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  38. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  39. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  40. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  41. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  42. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  43. package/src/duckdb/extension/parquet/parquet_extension.cpp +194 -20
  44. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  45. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  46. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  47. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  48. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  49. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  50. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  51. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  52. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  53. package/src/duckdb/src/common/allocator.cpp +14 -2
  54. package/src/duckdb/src/common/arrow/arrow_appender.cpp +79 -12
  55. package/src/duckdb/src/common/arrow/arrow_converter.cpp +44 -19
  56. package/src/duckdb/src/common/assert.cpp +3 -0
  57. package/src/duckdb/src/common/enum_util.cpp +4619 -4446
  58. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  60. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  61. package/src/duckdb/src/common/exception.cpp +2 -2
  62. package/src/duckdb/src/common/extra_type_info.cpp +506 -0
  63. package/src/duckdb/src/common/file_system.cpp +19 -0
  64. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  65. package/src/duckdb/src/common/local_file_system.cpp +14 -14
  66. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  67. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  68. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  69. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  70. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  71. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  72. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  73. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  74. package/src/duckdb/src/common/types/bit.cpp +51 -0
  75. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  76. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  77. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  78. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  79. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  80. package/src/duckdb/src/common/types/date.cpp +9 -0
  81. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  82. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  83. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  84. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  85. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  86. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  87. package/src/duckdb/src/common/types/value.cpp +11 -6
  88. package/src/duckdb/src/common/types.cpp +9 -656
  89. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  90. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  91. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  92. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  93. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  94. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  95. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  96. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  97. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  98. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  99. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  100. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  101. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  102. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  103. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  104. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  105. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  106. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  107. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  108. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  109. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  110. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  111. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  112. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  113. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  114. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  115. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  116. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  117. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  118. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  119. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  120. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  121. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
  122. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  123. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  124. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
  125. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  126. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
  127. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  128. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  129. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  130. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  131. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  132. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  133. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  134. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  135. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  136. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  137. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  138. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  139. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  140. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  141. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  142. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  143. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  144. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
  145. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
  146. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  147. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  148. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  149. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  150. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  151. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  152. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  153. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  154. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  155. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  156. package/src/duckdb/src/function/function.cpp +3 -1
  157. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  158. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  159. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  160. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  161. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  162. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  163. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  164. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  165. package/src/duckdb/src/function/table/arrow.cpp +19 -0
  166. package/src/duckdb/src/function/table/arrow_conversion.cpp +35 -1
  167. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  168. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  169. package/src/duckdb/src/function/table/system/test_all_types.cpp +7 -0
  170. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  171. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  172. package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
  173. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  174. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  175. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  176. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  177. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
  178. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  179. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  180. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  181. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  182. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  183. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  184. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
  185. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  186. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  187. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  188. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  189. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  190. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +2 -2
  191. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  192. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  193. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  194. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  195. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  196. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  197. package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
  198. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  199. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  200. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  201. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  202. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  203. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  204. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  205. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  206. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  207. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  208. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  209. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  210. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  211. package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -0
  212. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
  213. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  214. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  215. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  218. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  219. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  220. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  221. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  222. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  223. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  224. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  225. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  226. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  227. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  228. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  229. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  230. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  231. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  232. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  233. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  234. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  235. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  236. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  237. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  238. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  239. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  240. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  241. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  242. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  243. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  244. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  245. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  246. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  247. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
  248. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  249. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  250. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  251. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  252. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  253. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  254. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  255. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  256. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  257. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  258. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  259. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  260. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  261. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  262. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  263. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  264. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  265. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  266. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  267. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  268. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  269. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  270. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  271. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  272. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  273. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  274. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  275. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +22 -0
  276. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
  277. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  278. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  279. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  280. package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
  281. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  282. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  283. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  284. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  285. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  286. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  287. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  288. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  289. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  291. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  293. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  294. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  295. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  296. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  297. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  298. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  299. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  300. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  301. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  302. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  303. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  304. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  305. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  306. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  307. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  308. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  309. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  310. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  311. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  312. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  313. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  314. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  315. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  316. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  317. package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
  318. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  319. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  320. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  321. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  322. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  323. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  324. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  325. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  326. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  327. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  328. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  329. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  330. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  331. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  332. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  333. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  334. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  335. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  336. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  337. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  338. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  339. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  340. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  341. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  342. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  343. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  344. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  345. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  346. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  347. package/src/duckdb/src/include/duckdb.h +28 -0
  348. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  349. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  350. package/src/duckdb/src/main/config.cpp +4 -0
  351. package/src/duckdb/src/main/database.cpp +1 -1
  352. package/src/duckdb/src/main/extension/extension_helper.cpp +93 -88
  353. package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
  354. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  355. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  356. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  357. package/src/duckdb/src/main/relation.cpp +6 -5
  358. package/src/duckdb/src/main/settings/settings.cpp +79 -18
  359. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  360. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  361. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  362. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  363. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  364. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  365. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  366. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  367. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  368. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  369. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  370. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  371. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  372. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  373. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  374. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  375. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  376. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  377. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  378. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  379. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  380. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  381. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  382. package/src/duckdb/src/parallel/executor.cpp +15 -0
  383. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  384. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  385. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  386. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  387. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  388. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  389. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  390. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  391. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  392. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  393. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  394. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  395. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  396. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  397. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  398. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  399. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  400. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  401. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  402. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  403. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  404. package/src/duckdb/src/parser/parser.cpp +8 -2
  405. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  406. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  407. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  408. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  409. package/src/duckdb/src/parser/query_node.cpp +15 -37
  410. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  411. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  412. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  413. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  414. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  415. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  416. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  417. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  418. package/src/duckdb/src/parser/tableref.cpp +0 -44
  419. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  420. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  421. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  422. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  423. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  424. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  425. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  426. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  427. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  428. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  429. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  430. package/src/duckdb/src/parser/transformer.cpp +15 -0
  431. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  432. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  433. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  434. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  435. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  436. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  437. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  438. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  439. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  440. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  441. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
  442. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  443. package/src/duckdb/src/planner/binder.cpp +44 -31
  444. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  445. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  446. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  447. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  448. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  449. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  450. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  451. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  452. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  453. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  454. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  455. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  456. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  457. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  458. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  459. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  460. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  461. package/src/duckdb/src/storage/data_table.cpp +1 -1
  462. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  463. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  464. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  465. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  466. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  467. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  468. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  469. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  470. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  471. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  472. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  473. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  474. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  475. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  476. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  477. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  478. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  479. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  480. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  481. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  482. package/src/duckdb/ub_src_common.cpp +2 -0
  483. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  484. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  485. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  486. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  487. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  488. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  489. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  490. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  491. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  492. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  493. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  494. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  495. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  496. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  497. package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
  498. package/src/statement.cpp +10 -3
  499. package/test/columns.test.ts +24 -1
  500. package/test/test_all_types.test.ts +234 -0
  501. package/tsconfig.json +1 -0
  502. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  503. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  504. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -32,7 +32,7 @@ string PhysicalComparisonJoin::ParamsToString() const {
32
32
  }
33
33
  extra_info += "\n[INFOSEPARATOR]\n";
34
34
  extra_info += StringUtil::Format("EC: %llu\n", estimated_props->GetCardinality<idx_t>());
35
- extra_info += StringUtil::Format("Cost: %llu", (idx_t)estimated_props->GetCost());
35
+ extra_info += StringUtil::Format("Cost: %llu", estimated_props->GetCost<idx_t>());
36
36
  return extra_info;
37
37
  }
38
38
 
@@ -96,7 +96,7 @@ public:
96
96
  class HashJoinLocalSinkState : public LocalSinkState {
97
97
  public:
98
98
  HashJoinLocalSinkState(const PhysicalHashJoin &op, ClientContext &context) : build_executor(context) {
99
- auto &allocator = Allocator::Get(context);
99
+ auto &allocator = BufferAllocator::Get(context);
100
100
  if (!op.right_projection_map.empty()) {
101
101
  build_chunk.Initialize(allocator, op.build_types);
102
102
  }
@@ -124,7 +124,7 @@ public:
124
124
  unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &context) const {
125
125
  auto result =
126
126
  make_uniq<JoinHashTable>(BufferManager::GetBufferManager(context), conditions, build_types, join_type);
127
- result->max_ht_size = double(BufferManager::GetBufferManager(context).GetMaxMemory()) * 0.6;
127
+ result->max_ht_size = double(0.6) * BufferManager::GetBufferManager(context).GetMaxMemory();
128
128
  if (!delim_types.empty() && join_type == JoinType::MARK) {
129
129
  // correlated MARK join
130
130
  if (delim_types.size() + 1 == conditions.size()) {
@@ -162,7 +162,7 @@ unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &c
162
162
  payload_types.push_back(aggr->return_type);
163
163
  info.correlated_aggregates.push_back(std::move(aggr));
164
164
 
165
- auto &allocator = Allocator::Get(context);
165
+ auto &allocator = BufferAllocator::Get(context);
166
166
  info.correlated_counts = make_uniq<GroupedAggregateHashTable>(context, allocator, delim_types,
167
167
  payload_types, correlated_aggregates);
168
168
  info.correlated_types = delim_types;
@@ -312,10 +312,10 @@ void HashJoinGlobalSinkState::InitializeProbeSpill() {
312
312
  }
313
313
  }
314
314
 
315
- class HashJoinPartitionTask : public ExecutorTask {
315
+ class HashJoinRepartitionTask : public ExecutorTask {
316
316
  public:
317
- HashJoinPartitionTask(shared_ptr<Event> event_p, ClientContext &context, JoinHashTable &global_ht,
318
- JoinHashTable &local_ht)
317
+ HashJoinRepartitionTask(shared_ptr<Event> event_p, ClientContext &context, JoinHashTable &global_ht,
318
+ JoinHashTable &local_ht)
319
319
  : ExecutorTask(context), event(std::move(event_p)), global_ht(global_ht), local_ht(local_ht) {
320
320
  }
321
321
 
@@ -349,7 +349,7 @@ public:
349
349
  partition_tasks.reserve(local_hts.size());
350
350
  for (auto &local_ht : local_hts) {
351
351
  partition_tasks.push_back(
352
- make_uniq<HashJoinPartitionTask>(shared_from_this(), context, *sink.hash_table, *local_ht));
352
+ make_uniq<HashJoinRepartitionTask>(shared_from_this(), context, *sink.hash_table, *local_ht));
353
353
  }
354
354
  SetTasks(std::move(partition_tasks));
355
355
  }
@@ -434,7 +434,7 @@ public:
434
434
  };
435
435
 
436
436
  unique_ptr<OperatorState> PhysicalHashJoin::GetOperatorState(ExecutionContext &context) const {
437
- auto &allocator = Allocator::Get(context.client);
437
+ auto &allocator = BufferAllocator::Get(context.client);
438
438
  auto &sink = sink_state->Cast<HashJoinGlobalSinkState>();
439
439
  auto state = make_uniq<HashJoinOperatorState>(context.client);
440
440
  if (sink.perfect_join_executor) {
@@ -532,7 +532,18 @@ public:
532
532
  bool AssignTask(HashJoinGlobalSinkState &sink, HashJoinLocalSourceState &lstate);
533
533
 
534
534
  idx_t MaxThreads() override {
535
- return probe_count / ((idx_t)STANDARD_VECTOR_SIZE * parallel_scan_chunk_count);
535
+ D_ASSERT(op.sink_state);
536
+ auto &gstate = op.sink_state->Cast<HashJoinGlobalSinkState>();
537
+
538
+ idx_t count;
539
+ if (gstate.probe_spill) {
540
+ count = probe_count;
541
+ } else if (IsRightOuterJoin(op.join_type)) {
542
+ count = gstate.hash_table->Count();
543
+ } else {
544
+ return 0;
545
+ }
546
+ return count / ((idx_t)STANDARD_VECTOR_SIZE * parallel_scan_chunk_count);
536
547
  }
537
548
 
538
549
  public:
@@ -611,7 +622,7 @@ unique_ptr<GlobalSourceState> PhysicalHashJoin::GetGlobalSourceState(ClientConte
611
622
 
612
623
  unique_ptr<LocalSourceState> PhysicalHashJoin::GetLocalSourceState(ExecutionContext &context,
613
624
  GlobalSourceState &gstate) const {
614
- return make_uniq<HashJoinLocalSourceState>(*this, Allocator::Get(context.client));
625
+ return make_uniq<HashJoinLocalSourceState>(*this, BufferAllocator::Get(context.client));
615
626
  }
616
627
 
617
628
  HashJoinGlobalSourceState::HashJoinGlobalSourceState(const PhysicalHashJoin &op, ClientContext &context)
@@ -16,7 +16,7 @@
16
16
 
17
17
  namespace duckdb {
18
18
 
19
- PhysicalIEJoin::PhysicalIEJoin(LogicalOperator &op, unique_ptr<PhysicalOperator> left,
19
+ PhysicalIEJoin::PhysicalIEJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> left,
20
20
  unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond, JoinType join_type,
21
21
  idx_t estimated_cardinality)
22
22
  : PhysicalRangeJoin(op, PhysicalOperatorType::IE_JOIN, std::move(left), std::move(right), std::move(cond),
@@ -641,6 +641,8 @@ public:
641
641
  : op(op), true_sel(STANDARD_VECTOR_SIZE), left_executor(context), right_executor(context),
642
642
  left_matches(nullptr), right_matches(nullptr) {
643
643
  auto &allocator = Allocator::Get(context);
644
+ unprojected.Initialize(allocator, op.unprojected_types);
645
+
644
646
  if (op.conditions.size() < 3) {
645
647
  return;
646
648
  }
@@ -696,6 +698,8 @@ public:
696
698
  ExpressionExecutor right_executor;
697
699
  DataChunk right_keys;
698
700
 
701
+ DataChunk unprojected;
702
+
699
703
  // Outer joins
700
704
  idx_t outer_idx;
701
705
  idx_t outer_count;
@@ -703,13 +707,14 @@ public:
703
707
  bool *right_matches;
704
708
  };
705
709
 
706
- void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk, LocalSourceState &state_p) const {
710
+ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &result, LocalSourceState &state_p) const {
707
711
  auto &state = state_p.Cast<IEJoinLocalSourceState>();
708
712
  auto &ie_sink = sink_state->Cast<IEJoinGlobalState>();
709
713
  auto &left_table = *ie_sink.tables[0];
710
714
  auto &right_table = *ie_sink.tables[1];
711
715
 
712
716
  const auto left_cols = children[0]->GetTypes().size();
717
+ auto &chunk = state.unprojected;
713
718
  do {
714
719
  SelectionVector lsel(STANDARD_VECTOR_SIZE);
715
720
  SelectionVector rsel(STANDARD_VECTOR_SIZE);
@@ -720,6 +725,7 @@ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &ch
720
725
  }
721
726
 
722
727
  // found matches: extract them
728
+
723
729
  chunk.Reset();
724
730
  SliceSortedPayload(chunk, left_table.global_sort_state, state.left_block_index, lsel, result_count, 0);
725
731
  SliceSortedPayload(chunk, right_table.global_sort_state, state.right_block_index, rsel, result_count,
@@ -762,6 +768,10 @@ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &ch
762
768
  }
763
769
  }
764
770
 
771
+ // We need all of the data to compute other predicates,
772
+ // but we only return what is in the projection map
773
+ ProjectResult(chunk, result);
774
+
765
775
  // found matches: mark the found matches if required
766
776
  if (left_table.found_match) {
767
777
  for (idx_t i = 0; i < result_count; i++) {
@@ -773,8 +783,8 @@ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &ch
773
783
  right_table.found_match[state.right_base + rsel[sel->get_index(i)]] = true;
774
784
  }
775
785
  }
776
- chunk.Verify();
777
- } while (chunk.size() == 0);
786
+ result.Verify();
787
+ } while (result.size() == 0);
778
788
  }
779
789
 
780
790
  class IEJoinGlobalSourceState : public GlobalSourceState {
@@ -961,15 +971,18 @@ SourceResultType PhysicalIEJoin::GetData(ExecutionContext &context, DataChunk &r
961
971
  ie_gstate.GetNextPair(context.client, ie_sink, ie_lstate);
962
972
  continue;
963
973
  }
964
- SliceSortedPayload(result, ie_sink.tables[0]->global_sort_state, ie_lstate.left_block_index, ie_lstate.true_sel,
974
+ auto &chunk = ie_lstate.unprojected;
975
+ chunk.Reset();
976
+ SliceSortedPayload(chunk, ie_sink.tables[0]->global_sort_state, ie_lstate.left_block_index, ie_lstate.true_sel,
965
977
  count);
966
978
 
967
979
  // Fill in NULLs to the right
968
- for (auto col_idx = left_cols; col_idx < result.ColumnCount(); ++col_idx) {
969
- result.data[col_idx].SetVectorType(VectorType::CONSTANT_VECTOR);
970
- ConstantVector::SetNull(result.data[col_idx], true);
980
+ for (auto col_idx = left_cols; col_idx < chunk.ColumnCount(); ++col_idx) {
981
+ chunk.data[col_idx].SetVectorType(VectorType::CONSTANT_VECTOR);
982
+ ConstantVector::SetNull(chunk.data[col_idx], true);
971
983
  }
972
984
 
985
+ ProjectResult(chunk, result);
973
986
  result.SetCardinality(count);
974
987
  result.Verify();
975
988
 
@@ -984,15 +997,18 @@ SourceResultType PhysicalIEJoin::GetData(ExecutionContext &context, DataChunk &r
984
997
  continue;
985
998
  }
986
999
 
987
- SliceSortedPayload(result, ie_sink.tables[1]->global_sort_state, ie_lstate.right_block_index,
988
- ie_lstate.true_sel, count, left_cols);
1000
+ auto &chunk = ie_lstate.unprojected;
1001
+ chunk.Reset();
1002
+ SliceSortedPayload(chunk, ie_sink.tables[1]->global_sort_state, ie_lstate.right_block_index, ie_lstate.true_sel,
1003
+ count, left_cols);
989
1004
 
990
1005
  // Fill in NULLs to the left
991
1006
  for (idx_t col_idx = 0; col_idx < left_cols; ++col_idx) {
992
- result.data[col_idx].SetVectorType(VectorType::CONSTANT_VECTOR);
993
- ConstantVector::SetNull(result.data[col_idx], true);
1007
+ chunk.data[col_idx].SetVectorType(VectorType::CONSTANT_VECTOR);
1008
+ ConstantVector::SetNull(chunk.data[col_idx], true);
994
1009
  }
995
1010
 
1011
+ ProjectResult(chunk, result);
996
1012
  result.SetCardinality(count);
997
1013
  result.Verify();
998
1014
 
@@ -60,7 +60,7 @@ void PhysicalJoin::BuildJoinPipelines(Pipeline &current, MetaPipeline &meta_pipe
60
60
  // Join can become a source operator if it's RIGHT/OUTER, or if the hash join goes out-of-core
61
61
  bool add_child_pipeline = false;
62
62
  auto &join_op = op.Cast<PhysicalJoin>();
63
- if (IsRightOuterJoin(join_op.join_type) || join_op.type == PhysicalOperatorType::HASH_JOIN) {
63
+ if (join_op.IsSource()) {
64
64
  add_child_pipeline = true;
65
65
  }
66
66
 
@@ -14,7 +14,7 @@
14
14
 
15
15
  namespace duckdb {
16
16
 
17
- PhysicalPiecewiseMergeJoin::PhysicalPiecewiseMergeJoin(LogicalOperator &op, unique_ptr<PhysicalOperator> left,
17
+ PhysicalPiecewiseMergeJoin::PhysicalPiecewiseMergeJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> left,
18
18
  unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond,
19
19
  JoinType join_type, idx_t estimated_cardinality)
20
20
  : PhysicalRangeJoin(op, PhysicalOperatorType::PIECEWISE_MERGE_JOIN, std::move(left), std::move(right),
@@ -208,6 +208,7 @@ public:
208
208
  idx_t right_position;
209
209
  idx_t right_chunk_index;
210
210
  idx_t right_base;
211
+ idx_t prev_left_index;
211
212
 
212
213
  // Secondary predicate shared data
213
214
  SelectionVector sel;
@@ -431,7 +432,8 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da
431
432
  }
432
433
  }
433
434
 
434
- static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const ExpressionType comparison) {
435
+ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const ExpressionType comparison,
436
+ idx_t &prev_left_index) {
435
437
  const auto cmp = MergeJoinComparisonValue(comparison);
436
438
 
437
439
  // The sort parameters should all be the same
@@ -465,6 +467,20 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
465
467
 
466
468
  idx_t result_count = 0;
467
469
  while (true) {
470
+ if (l.entry_idx < prev_left_index) {
471
+ // left side smaller: found match
472
+ l.result.set_index(result_count, sel_t(l.entry_idx));
473
+ r.result.set_index(result_count, sel_t(r.entry_idx));
474
+ result_count++;
475
+ // move left side forward
476
+ l.entry_idx++;
477
+ l_ptr += entry_size;
478
+ if (result_count == STANDARD_VECTOR_SIZE) {
479
+ // out of space!
480
+ break;
481
+ }
482
+ continue;
483
+ }
468
484
  if (l.entry_idx < l.not_null) {
469
485
  int comp_res;
470
486
  if (all_constant) {
@@ -474,7 +490,6 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
474
490
  rread.entry_idx = r.entry_idx;
475
491
  comp_res = Comparators::CompareTuple(lread, rread, l_ptr, r_ptr, l.state.sort_layout, external);
476
492
  }
477
-
478
493
  if (comp_res <= cmp) {
479
494
  // left side smaller: found match
480
495
  l.result.set_index(result_count, sel_t(l.entry_idx));
@@ -490,6 +505,8 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
490
505
  continue;
491
506
  }
492
507
  }
508
+
509
+ prev_left_index = l.entry_idx;
493
510
  // right side smaller or equal, or left side exhausted: move
494
511
  // right pointer forward reset left side to start
495
512
  r.entry_idx++;
@@ -521,6 +538,7 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
521
538
  state.right_chunk_index = 0;
522
539
  state.right_base = 0;
523
540
  state.left_position = 0;
541
+ state.prev_left_index = 0;
524
542
  state.right_position = 0;
525
543
  state.first_fetch = false;
526
544
  state.finished = false;
@@ -547,7 +565,8 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
547
565
  BlockMergeInfo right_info(gstate.table->global_sort_state, state.right_chunk_index, state.right_position,
548
566
  rhs_not_null);
549
567
 
550
- idx_t result_count = MergeJoinComplexBlocks(left_info, right_info, conditions[0].comparison);
568
+ idx_t result_count =
569
+ MergeJoinComplexBlocks(left_info, right_info, conditions[0].comparison, state.prev_left_index);
551
570
  if (result_count == 0) {
552
571
  // exhausted this chunk on the right side
553
572
  // move to the next right chunk
@@ -159,16 +159,16 @@ void PhysicalRangeJoin::GlobalSortedTable::Finalize(Pipeline &pipeline, Event &e
159
159
  }
160
160
  }
161
161
 
162
- PhysicalRangeJoin::PhysicalRangeJoin(LogicalOperator &op, PhysicalOperatorType type, unique_ptr<PhysicalOperator> left,
163
- unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond, JoinType join_type,
164
- idx_t estimated_cardinality)
162
+ PhysicalRangeJoin::PhysicalRangeJoin(LogicalComparisonJoin &op, PhysicalOperatorType type,
163
+ unique_ptr<PhysicalOperator> left, unique_ptr<PhysicalOperator> right,
164
+ vector<JoinCondition> cond, JoinType join_type, idx_t estimated_cardinality)
165
165
  : PhysicalComparisonJoin(op, type, std::move(cond), join_type, estimated_cardinality) {
166
166
  // Reorder the conditions so that ranges are at the front.
167
167
  // TODO: use stats to improve the choice?
168
168
  // TODO: Prefer fixed length types?
169
169
  if (conditions.size() > 1) {
170
- auto conditions_p = std::move(conditions);
171
- conditions.resize(conditions_p.size());
170
+ vector<JoinCondition> conditions_p(conditions.size());
171
+ std::swap(conditions_p, conditions);
172
172
  idx_t range_position = 0;
173
173
  idx_t other_position = conditions_p.size();
174
174
  for (idx_t i = 0; i < conditions_p.size(); ++i) {
@@ -188,6 +188,30 @@ PhysicalRangeJoin::PhysicalRangeJoin(LogicalOperator &op, PhysicalOperatorType t
188
188
 
189
189
  children.push_back(std::move(left));
190
190
  children.push_back(std::move(right));
191
+
192
+ // Fill out the left projection map.
193
+ left_projection_map = op.left_projection_map;
194
+ if (left_projection_map.empty()) {
195
+ const auto left_count = children[0]->types.size();
196
+ left_projection_map.reserve(left_count);
197
+ for (column_t i = 0; i < left_count; ++i) {
198
+ left_projection_map.emplace_back(i);
199
+ }
200
+ }
201
+ // Fill out the right projection map.
202
+ right_projection_map = op.right_projection_map;
203
+ if (right_projection_map.empty()) {
204
+ const auto right_count = children[1]->types.size();
205
+ right_projection_map.reserve(right_count);
206
+ for (column_t i = 0; i < right_count; ++i) {
207
+ right_projection_map.emplace_back(i);
208
+ }
209
+ }
210
+
211
+ // Construct the unprojected type layout from the children's types
212
+ unprojected_types = children[0]->GetTypes();
213
+ auto &types = children[1]->GetTypes();
214
+ unprojected_types.insert(unprojected_types.end(), types.begin(), types.end());
191
215
  }
192
216
 
193
217
  idx_t PhysicalRangeJoin::LocalSortedTable::MergeNulls(const vector<JoinCondition> &conditions) {
@@ -266,6 +290,18 @@ idx_t PhysicalRangeJoin::LocalSortedTable::MergeNulls(const vector<JoinCondition
266
290
  }
267
291
  }
268
292
 
293
+ void PhysicalRangeJoin::ProjectResult(DataChunk &chunk, DataChunk &result) const {
294
+ const auto left_projected = left_projection_map.size();
295
+ for (idx_t i = 0; i < left_projected; ++i) {
296
+ result.data[i].Reference(chunk.data[left_projection_map[i]]);
297
+ }
298
+ const auto left_width = children[0]->types.size();
299
+ for (idx_t i = 0; i < right_projection_map.size(); ++i) {
300
+ result.data[left_projected + i].Reference(chunk.data[left_width + right_projection_map[i]]);
301
+ }
302
+ result.SetCardinality(chunk);
303
+ }
304
+
269
305
  BufferHandle PhysicalRangeJoin::SliceSortedPayload(DataChunk &payload, GlobalSortState &state, const idx_t block_idx,
270
306
  const SelectionVector &result, const idx_t result_count,
271
307
  const idx_t left_cols) {
@@ -1,5 +1,4 @@
1
1
  #include "duckdb/execution/operator/persistent/base_csv_reader.hpp"
2
-
3
2
  #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
4
3
  #include "duckdb/common/file_system.hpp"
5
4
  #include "duckdb/common/string_util.hpp"
@@ -10,6 +9,7 @@
10
9
  #include "duckdb/common/vector_operations/unary_executor.hpp"
11
10
  #include "duckdb/common/vector_operations/vector_operations.hpp"
12
11
  #include "duckdb/function/scalar/strftime_format.hpp"
12
+ #include "duckdb/main/appender.hpp"
13
13
  #include "duckdb/main/database.hpp"
14
14
  #include "duckdb/parser/column_definition.hpp"
15
15
  #include "duckdb/storage/data_table.hpp"
@@ -18,7 +18,8 @@
18
18
  #include "duckdb/parser/keyword_helper.hpp"
19
19
  #include "duckdb/main/error_manager.hpp"
20
20
  #include "duckdb/execution/operator/persistent/parallel_csv_reader.hpp"
21
-
21
+ #include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
22
+ #include "duckdb/main/client_data.hpp"
22
23
  #include <algorithm>
23
24
  #include <cctype>
24
25
  #include <cstring>
@@ -448,6 +449,17 @@ bool TryCastFloatingVectorCommaSeparated(BufferedCSVReaderOptions &options, Vect
448
449
  }
449
450
  }
450
451
 
452
+ // Location of erroneous value in the current parse chunk
453
+ struct ErrorLocation {
454
+ idx_t row_idx;
455
+ idx_t col_idx;
456
+ idx_t row_line;
457
+
458
+ ErrorLocation(idx_t row_idx, idx_t col_idx, idx_t row_line)
459
+ : row_idx(row_idx), col_idx(col_idx), row_line(row_line) {
460
+ }
461
+ };
462
+
451
463
  bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_add_line) {
452
464
  if (parse_chunk.size() == 0) {
453
465
  return true;
@@ -506,10 +518,7 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
506
518
  if (try_add_line) {
507
519
  return false;
508
520
  }
509
- if (options.ignore_errors) {
510
- conversion_error_ignored = true;
511
- continue;
512
- }
521
+
513
522
  string col_name = to_string(col_idx);
514
523
  if (col_idx < names.size()) {
515
524
  col_name = "\"" + names[col_idx] + "\"";
@@ -527,16 +536,18 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
527
536
  }
528
537
  }
529
538
 
530
- idx_t error_line;
531
539
  // The line_error must be summed with linenr (All lines emmited from this batch)
532
540
  // But subtracted from the parse_chunk
533
541
  D_ASSERT(line_error + linenr >= parse_chunk.size());
534
542
  line_error += linenr;
535
543
  line_error -= parse_chunk.size();
536
544
 
537
- error_line = GetLineError(line_error, buffer_idx);
545
+ auto error_line = GetLineError(line_error, buffer_idx);
546
+
547
+ if (options.ignore_errors) {
548
+ conversion_error_ignored = true;
538
549
 
539
- if (options.auto_detect) {
550
+ } else if (options.auto_detect) {
540
551
  throw InvalidInputException("%s in column %s, at line %llu.\n\nParser "
541
552
  "options:\n%s.\n\nConsider either increasing the sample size "
542
553
  "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
@@ -550,11 +561,19 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
550
561
  }
551
562
  if (conversion_error_ignored) {
552
563
  D_ASSERT(options.ignore_errors);
564
+
553
565
  SelectionVector succesful_rows(parse_chunk.size());
554
566
  idx_t sel_size = 0;
555
567
 
568
+ // Keep track of failed cells
569
+ vector<ErrorLocation> failed_cells;
570
+
556
571
  for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
557
- bool failed = false;
572
+
573
+ auto global_row_idx = row_idx + linenr - parse_chunk.size();
574
+ auto row_line = GetLineError(global_row_idx, buffer_idx, false);
575
+
576
+ bool row_failed = false;
558
577
  for (idx_t c = 0; c < reader_data.column_ids.size(); c++) {
559
578
  auto col_idx = reader_data.column_ids[c];
560
579
  auto result_idx = reader_data.column_mapping[c];
@@ -564,14 +583,82 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
564
583
 
565
584
  bool was_already_null = FlatVector::IsNull(parse_vector, row_idx);
566
585
  if (!was_already_null && FlatVector::IsNull(result_vector, row_idx)) {
567
- failed = true;
568
- break;
586
+ row_failed = true;
587
+ failed_cells.emplace_back(row_idx, col_idx, row_line);
569
588
  }
570
589
  }
571
- if (!failed) {
590
+ if (!row_failed) {
572
591
  succesful_rows.set_index(sel_size++, row_idx);
573
592
  }
574
593
  }
594
+
595
+ // Now do a second pass to produce the reject table entries
596
+ if (!failed_cells.empty() && !options.rejects_table_name.empty()) {
597
+ auto limit = options.rejects_limit;
598
+
599
+ auto rejects = CSVRejectsTable::GetOrCreate(context, options.rejects_table_name);
600
+ lock_guard<mutex> lock(rejects->write_lock);
601
+
602
+ // short circuit if we already have too many rejects
603
+ if (limit == 0 || rejects->count < limit) {
604
+ auto &table = rejects->GetTable(context);
605
+ InternalAppender appender(context, table);
606
+ auto file_name = GetFileName();
607
+
608
+ for (auto &cell : failed_cells) {
609
+ if (limit != 0 && rejects->count >= limit) {
610
+ break;
611
+ }
612
+ rejects->count++;
613
+
614
+ auto row_idx = cell.row_idx;
615
+ auto col_idx = cell.col_idx;
616
+ auto row_line = cell.row_line;
617
+
618
+ auto col_name = to_string(col_idx);
619
+ if (col_idx < names.size()) {
620
+ col_name = "\"" + names[col_idx] + "\"";
621
+ }
622
+
623
+ auto &parse_vector = parse_chunk.data[col_idx];
624
+ auto parsed_str = FlatVector::GetData<string_t>(parse_vector)[row_idx];
625
+ auto &type = insert_chunk.data[col_idx].GetType();
626
+ auto row_error_msg = StringUtil::Format("Could not convert string '%s' to '%s'",
627
+ parsed_str.GetString(), type.ToString());
628
+
629
+ // Add the row to the rejects table
630
+ appender.BeginRow();
631
+ appender.Append(string_t(file_name));
632
+ appender.Append(row_line);
633
+ appender.Append(col_idx);
634
+ appender.Append(string_t(col_name));
635
+ appender.Append(parsed_str);
636
+
637
+ if (!options.rejects_recovery_columns.empty()) {
638
+ child_list_t<Value> recovery_key;
639
+ for (auto &key_idx : options.rejects_recovery_column_ids) {
640
+ // Figure out if the recovery key is valid.
641
+ // If not, error out for real.
642
+ auto &component_vector = parse_chunk.data[key_idx];
643
+ if (FlatVector::IsNull(component_vector, row_idx)) {
644
+ throw InvalidInputException("%s at line %llu in column %s. Parser options:\n%s ",
645
+ "Could not parse recovery column", row_line, col_name,
646
+ options.ToString());
647
+ }
648
+ auto component = Value(FlatVector::GetData<string_t>(component_vector)[row_idx]);
649
+ recovery_key.emplace_back(names[key_idx], component);
650
+ }
651
+ appender.Append(Value::STRUCT(recovery_key));
652
+ }
653
+
654
+ appender.Append(string_t(row_error_msg));
655
+ appender.EndRow();
656
+ }
657
+ appender.Close();
658
+ }
659
+ }
660
+
661
+ // Now slice the insert chunk to only include the succesful rows
575
662
  insert_chunk.Slice(succesful_rows, sel_size);
576
663
  }
577
664
  parse_chunk.Reset();
@@ -13,7 +13,7 @@ CSVFileHandle::CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<Fi
13
13
 
14
14
  unique_ptr<FileHandle> CSVFileHandle::OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
15
15
  FileCompressionType compression) {
16
- auto file_handle = fs.OpenFile(path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
16
+ auto file_handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
17
17
  if (file_handle->CanSeek()) {
18
18
  file_handle->Reset();
19
19
  }
@@ -179,6 +179,26 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
179
179
  allow_quoted_nulls = ParseBoolean(value, loption);
180
180
  } else if (loption == "parallel") {
181
181
  parallel_mode = ParseBoolean(value, loption) ? ParallelMode::PARALLEL : ParallelMode::SINGLE_THREADED;
182
+ } else if (loption == "rejects_table") {
183
+ // skip, handled in SetRejectsOptions
184
+ auto table_name = ParseString(value, loption);
185
+ if (table_name.empty()) {
186
+ throw BinderException("REJECTS_TABLE option cannot be empty");
187
+ }
188
+ rejects_table_name = table_name;
189
+ } else if (loption == "rejects_recovery_columns") {
190
+ // Get the list of columns to use as a recovery key
191
+ auto &children = ListValue::GetChildren(value);
192
+ for (auto &child : children) {
193
+ auto col_name = child.GetValue<string>();
194
+ rejects_recovery_columns.push_back(col_name);
195
+ }
196
+ } else if (loption == "rejects_limit") {
197
+ int64_t limit = ParseInteger(value, loption);
198
+ if (limit < 0) {
199
+ throw BinderException("Unsupported parameter for REJECTS_LIMIT: cannot be negative");
200
+ }
201
+ rejects_limit = limit;
182
202
  } else {
183
203
  throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
184
204
  }
@@ -0,0 +1,48 @@
1
+ #include "duckdb/main/appender.hpp"
2
+ #include "duckdb/parser/parsed_data/create_table_info.hpp"
3
+ #include "duckdb/function/table/read_csv.hpp"
4
+ #include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
5
+ #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ TableCatalogEntry &CSVRejectsTable::GetTable(ClientContext &context) {
10
+ auto &temp_catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
11
+ auto &table_entry = temp_catalog.GetEntry<TableCatalogEntry>(context, TEMP_CATALOG, DEFAULT_SCHEMA, name);
12
+ return table_entry;
13
+ }
14
+
15
+ shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context, const string &name) {
16
+ auto key = "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(name);
17
+ auto &cache = ObjectCache::GetObjectCache(context);
18
+ return cache.GetOrCreate<CSVRejectsTable>(key, name);
19
+ }
20
+
21
+ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData &data) {
22
+ // (Re)Create the temporary rejects table
23
+ auto &catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
24
+ auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, name);
25
+ info->temporary = true;
26
+ info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
27
+ info->columns.AddColumn(ColumnDefinition("file", LogicalType::VARCHAR));
28
+ info->columns.AddColumn(ColumnDefinition("line", LogicalType::BIGINT));
29
+ info->columns.AddColumn(ColumnDefinition("column", LogicalType::BIGINT));
30
+ info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
31
+ info->columns.AddColumn(ColumnDefinition("parsed_value", LogicalType::VARCHAR));
32
+
33
+ if (!data.options.rejects_recovery_columns.empty()) {
34
+ child_list_t<LogicalType> recovery_key_components;
35
+ for (auto &col_name : data.options.rejects_recovery_columns) {
36
+ recovery_key_components.emplace_back(col_name, LogicalType::VARCHAR);
37
+ }
38
+ info->columns.AddColumn(ColumnDefinition("recovery_columns", LogicalType::STRUCT(recovery_key_components)));
39
+ }
40
+
41
+ info->columns.AddColumn(ColumnDefinition("error", LogicalType::VARCHAR));
42
+
43
+ catalog.CreateTable(context, std::move(info));
44
+
45
+ count = 0;
46
+ }
47
+
48
+ } // namespace duckdb
@@ -635,12 +635,11 @@ void ParallelCSVReader::ParseCSV(DataChunk &insert_chunk) {
635
635
  }
636
636
  }
637
637
 
638
- idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx) {
638
+ idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx, bool stop_at_first) {
639
639
  while (true) {
640
640
  if (buffer->line_info->CanItGetLine(file_idx, buffer_idx)) {
641
641
  auto cur_start = verification_positions.beginning_of_first_line + buffer->buffer->GetCSVGlobalStart();
642
- // line errors are 1-indexed
643
- return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false);
642
+ return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false, stop_at_first);
644
643
  }
645
644
  }
646
645
  }