duckdb 0.8.2-dev11.0 → 0.8.2-dev1182.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (381) hide show
  1. package/binding.gyp +14 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/lib/duckdb.d.ts +59 -0
  6. package/lib/duckdb.js +21 -0
  7. package/package.json +1 -1
  8. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  10. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  11. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  12. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  13. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  14. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  15. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  16. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  17. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  18. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  19. package/src/duckdb/extension/icu/icu_extension.cpp +5 -7
  20. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  21. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  22. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  23. package/src/duckdb/extension/json/json_common.cpp +272 -40
  24. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  26. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  27. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  28. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  29. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  30. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  31. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  32. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  33. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  34. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  35. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  36. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  37. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  38. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  39. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  40. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  41. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  42. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  43. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  44. package/src/duckdb/extension/parquet/parquet_extension.cpp +191 -19
  45. package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -5
  46. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  47. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  48. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  49. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  50. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  51. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  52. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  53. package/src/duckdb/src/common/allocator.cpp +14 -2
  54. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -10
  55. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +0 -12
  56. package/src/duckdb/src/common/assert.cpp +3 -0
  57. package/src/duckdb/src/common/enum_util.cpp +42 -5
  58. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  60. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  61. package/src/duckdb/src/common/file_system.cpp +15 -0
  62. package/src/duckdb/src/common/local_file_system.cpp +1 -1
  63. package/src/duckdb/src/common/multi_file_reader.cpp +181 -18
  64. package/src/duckdb/src/common/radix_partitioning.cpp +27 -9
  65. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  66. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  67. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  68. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  69. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  70. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  71. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  72. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  73. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  74. package/src/duckdb/src/common/types/date.cpp +9 -0
  75. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  76. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  77. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  78. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  79. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  80. package/src/duckdb/src/common/types/vector.cpp +15 -14
  81. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +6 -4
  82. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  83. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  84. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  85. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  86. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  87. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  88. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  89. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  90. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +0 -17
  91. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  92. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  93. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  94. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  95. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  96. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  97. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  98. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  99. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  100. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  101. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  102. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  103. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  104. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  105. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  106. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  107. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  108. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  109. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  110. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  111. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +413 -282
  112. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  113. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  114. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  115. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
  116. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  117. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  118. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  119. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  120. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  121. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -2
  122. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  123. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  124. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  125. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  126. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  127. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  128. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  129. package/src/duckdb/src/execution/physical_operator.cpp +17 -14
  130. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  131. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  132. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  133. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  134. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  135. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  136. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  137. package/src/duckdb/src/function/function.cpp +2 -0
  138. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  139. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  140. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  141. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  142. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  143. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  144. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  145. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  146. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  147. package/src/duckdb/src/function/table/system/test_all_types.cpp +38 -18
  148. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  149. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  150. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  151. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  152. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  153. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  154. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  155. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  156. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  157. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  158. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  159. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  160. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  161. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  162. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  163. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  164. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  165. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  166. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  167. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  168. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  169. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  170. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  171. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  172. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  173. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  174. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  175. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  176. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  177. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  178. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  179. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  180. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  181. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  182. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +24 -6
  183. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  184. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  185. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  186. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  187. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  188. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  189. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  190. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  191. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  192. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  193. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  194. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  195. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +1 -1
  196. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  197. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  198. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  199. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  200. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  201. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  202. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  203. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  204. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +3 -0
  205. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  206. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  207. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  208. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  209. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  210. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  211. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  212. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  213. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  214. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  215. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +1 -1
  216. package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
  217. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  218. package/src/duckdb/src/include/duckdb/main/settings.hpp +21 -1
  219. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  220. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  221. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  222. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  223. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  224. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  225. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  226. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  227. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  228. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  229. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  230. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  231. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  232. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  233. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  234. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  235. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  236. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  237. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  238. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  239. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  240. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
  241. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  242. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  243. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  244. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  245. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  246. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  247. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  248. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  249. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  250. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  251. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  252. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  253. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  254. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  255. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  256. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  257. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  258. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  259. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  260. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  261. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  262. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  263. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  264. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  265. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  266. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  267. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  268. package/src/duckdb/src/include/duckdb.h +28 -0
  269. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  270. package/src/duckdb/src/main/config.cpp +2 -0
  271. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  272. package/src/duckdb/src/main/settings/settings.cpp +40 -18
  273. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  274. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  275. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  276. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  277. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  278. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  279. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  280. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  281. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  282. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  283. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  284. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  285. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  286. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  287. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  288. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  289. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  290. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  291. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  292. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  293. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  294. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  295. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  296. package/src/duckdb/src/parallel/executor.cpp +15 -0
  297. package/src/duckdb/src/parallel/pipeline_executor.cpp +7 -6
  298. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  299. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  300. package/src/duckdb/src/parser/expression/lambda_expression.cpp +1 -1
  301. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  302. package/src/duckdb/src/parser/query_node/cte_node.cpp +75 -0
  303. package/src/duckdb/src/parser/query_node.cpp +18 -1
  304. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -0
  305. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  306. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  307. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  308. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  309. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  310. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  311. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  312. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  313. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  314. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  315. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  316. package/src/duckdb/src/parser/transformer.cpp +15 -0
  317. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  318. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  319. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  320. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  321. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  322. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  323. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  324. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
  325. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  326. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  327. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  328. package/src/duckdb/src/planner/binder.cpp +5 -0
  329. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  330. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  331. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  332. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  333. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  334. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  335. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  336. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  337. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  338. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  339. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  340. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  341. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  342. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  343. package/src/duckdb/src/storage/data_table.cpp +1 -1
  344. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  345. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  346. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  347. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  348. package/src/duckdb/src/storage/storage_manager.cpp +7 -2
  349. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  350. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  351. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  352. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  353. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  354. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  355. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  356. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  357. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  358. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  359. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
  360. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  361. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  362. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  363. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  364. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  365. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  366. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  367. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  368. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  369. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  370. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  371. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  372. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  373. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  374. package/src/duckdb_node.hpp +1 -0
  375. package/src/statement.cpp +103 -4
  376. package/test/columns.test.ts +243 -0
  377. package/test/test_all_types.test.ts +233 -0
  378. package/tsconfig.json +1 -0
  379. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  380. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  381. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -32,7 +32,7 @@ string PhysicalComparisonJoin::ParamsToString() const {
32
32
  }
33
33
  extra_info += "\n[INFOSEPARATOR]\n";
34
34
  extra_info += StringUtil::Format("EC: %llu\n", estimated_props->GetCardinality<idx_t>());
35
- extra_info += StringUtil::Format("Cost: %llu", (idx_t)estimated_props->GetCost());
35
+ extra_info += StringUtil::Format("Cost: %llu", estimated_props->GetCost<idx_t>());
36
36
  return extra_info;
37
37
  }
38
38
 
@@ -96,7 +96,7 @@ public:
96
96
  class HashJoinLocalSinkState : public LocalSinkState {
97
97
  public:
98
98
  HashJoinLocalSinkState(const PhysicalHashJoin &op, ClientContext &context) : build_executor(context) {
99
- auto &allocator = Allocator::Get(context);
99
+ auto &allocator = BufferAllocator::Get(context);
100
100
  if (!op.right_projection_map.empty()) {
101
101
  build_chunk.Initialize(allocator, op.build_types);
102
102
  }
@@ -124,7 +124,7 @@ public:
124
124
  unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &context) const {
125
125
  auto result =
126
126
  make_uniq<JoinHashTable>(BufferManager::GetBufferManager(context), conditions, build_types, join_type);
127
- result->max_ht_size = double(BufferManager::GetBufferManager(context).GetMaxMemory()) * 0.6;
127
+ result->max_ht_size = double(0.6) * BufferManager::GetBufferManager(context).GetMaxMemory();
128
128
  if (!delim_types.empty() && join_type == JoinType::MARK) {
129
129
  // correlated MARK join
130
130
  if (delim_types.size() + 1 == conditions.size()) {
@@ -162,7 +162,7 @@ unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &c
162
162
  payload_types.push_back(aggr->return_type);
163
163
  info.correlated_aggregates.push_back(std::move(aggr));
164
164
 
165
- auto &allocator = Allocator::Get(context);
165
+ auto &allocator = BufferAllocator::Get(context);
166
166
  info.correlated_counts = make_uniq<GroupedAggregateHashTable>(context, allocator, delim_types,
167
167
  payload_types, correlated_aggregates);
168
168
  info.correlated_types = delim_types;
@@ -312,10 +312,10 @@ void HashJoinGlobalSinkState::InitializeProbeSpill() {
312
312
  }
313
313
  }
314
314
 
315
- class HashJoinPartitionTask : public ExecutorTask {
315
+ class HashJoinRepartitionTask : public ExecutorTask {
316
316
  public:
317
- HashJoinPartitionTask(shared_ptr<Event> event_p, ClientContext &context, JoinHashTable &global_ht,
318
- JoinHashTable &local_ht)
317
+ HashJoinRepartitionTask(shared_ptr<Event> event_p, ClientContext &context, JoinHashTable &global_ht,
318
+ JoinHashTable &local_ht)
319
319
  : ExecutorTask(context), event(std::move(event_p)), global_ht(global_ht), local_ht(local_ht) {
320
320
  }
321
321
 
@@ -349,7 +349,7 @@ public:
349
349
  partition_tasks.reserve(local_hts.size());
350
350
  for (auto &local_ht : local_hts) {
351
351
  partition_tasks.push_back(
352
- make_uniq<HashJoinPartitionTask>(shared_from_this(), context, *sink.hash_table, *local_ht));
352
+ make_uniq<HashJoinRepartitionTask>(shared_from_this(), context, *sink.hash_table, *local_ht));
353
353
  }
354
354
  SetTasks(std::move(partition_tasks));
355
355
  }
@@ -434,7 +434,7 @@ public:
434
434
  };
435
435
 
436
436
  unique_ptr<OperatorState> PhysicalHashJoin::GetOperatorState(ExecutionContext &context) const {
437
- auto &allocator = Allocator::Get(context.client);
437
+ auto &allocator = BufferAllocator::Get(context.client);
438
438
  auto &sink = sink_state->Cast<HashJoinGlobalSinkState>();
439
439
  auto state = make_uniq<HashJoinOperatorState>(context.client);
440
440
  if (sink.perfect_join_executor) {
@@ -532,7 +532,18 @@ public:
532
532
  bool AssignTask(HashJoinGlobalSinkState &sink, HashJoinLocalSourceState &lstate);
533
533
 
534
534
  idx_t MaxThreads() override {
535
- return probe_count / ((idx_t)STANDARD_VECTOR_SIZE * parallel_scan_chunk_count);
535
+ D_ASSERT(op.sink_state);
536
+ auto &gstate = op.sink_state->Cast<HashJoinGlobalSinkState>();
537
+
538
+ idx_t count;
539
+ if (gstate.probe_spill) {
540
+ count = probe_count;
541
+ } else if (IsRightOuterJoin(op.join_type)) {
542
+ count = gstate.hash_table->Count();
543
+ } else {
544
+ return 0;
545
+ }
546
+ return count / ((idx_t)STANDARD_VECTOR_SIZE * parallel_scan_chunk_count);
536
547
  }
537
548
 
538
549
  public:
@@ -611,7 +622,7 @@ unique_ptr<GlobalSourceState> PhysicalHashJoin::GetGlobalSourceState(ClientConte
611
622
 
612
623
  unique_ptr<LocalSourceState> PhysicalHashJoin::GetLocalSourceState(ExecutionContext &context,
613
624
  GlobalSourceState &gstate) const {
614
- return make_uniq<HashJoinLocalSourceState>(*this, Allocator::Get(context.client));
625
+ return make_uniq<HashJoinLocalSourceState>(*this, BufferAllocator::Get(context.client));
615
626
  }
616
627
 
617
628
  HashJoinGlobalSourceState::HashJoinGlobalSourceState(const PhysicalHashJoin &op, ClientContext &context)
@@ -60,7 +60,7 @@ void PhysicalJoin::BuildJoinPipelines(Pipeline &current, MetaPipeline &meta_pipe
60
60
  // Join can become a source operator if it's RIGHT/OUTER, or if the hash join goes out-of-core
61
61
  bool add_child_pipeline = false;
62
62
  auto &join_op = op.Cast<PhysicalJoin>();
63
- if (IsRightOuterJoin(join_op.join_type) || join_op.type == PhysicalOperatorType::HASH_JOIN) {
63
+ if (join_op.IsSource()) {
64
64
  add_child_pipeline = true;
65
65
  }
66
66
 
@@ -208,6 +208,7 @@ public:
208
208
  idx_t right_position;
209
209
  idx_t right_chunk_index;
210
210
  idx_t right_base;
211
+ idx_t prev_left_index;
211
212
 
212
213
  // Secondary predicate shared data
213
214
  SelectionVector sel;
@@ -431,7 +432,8 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da
431
432
  }
432
433
  }
433
434
 
434
- static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const ExpressionType comparison) {
435
+ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const ExpressionType comparison,
436
+ idx_t &prev_left_index) {
435
437
  const auto cmp = MergeJoinComparisonValue(comparison);
436
438
 
437
439
  // The sort parameters should all be the same
@@ -465,6 +467,20 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
465
467
 
466
468
  idx_t result_count = 0;
467
469
  while (true) {
470
+ if (l.entry_idx < prev_left_index) {
471
+ // left side smaller: found match
472
+ l.result.set_index(result_count, sel_t(l.entry_idx));
473
+ r.result.set_index(result_count, sel_t(r.entry_idx));
474
+ result_count++;
475
+ // move left side forward
476
+ l.entry_idx++;
477
+ l_ptr += entry_size;
478
+ if (result_count == STANDARD_VECTOR_SIZE) {
479
+ // out of space!
480
+ break;
481
+ }
482
+ continue;
483
+ }
468
484
  if (l.entry_idx < l.not_null) {
469
485
  int comp_res;
470
486
  if (all_constant) {
@@ -474,7 +490,6 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
474
490
  rread.entry_idx = r.entry_idx;
475
491
  comp_res = Comparators::CompareTuple(lread, rread, l_ptr, r_ptr, l.state.sort_layout, external);
476
492
  }
477
-
478
493
  if (comp_res <= cmp) {
479
494
  // left side smaller: found match
480
495
  l.result.set_index(result_count, sel_t(l.entry_idx));
@@ -490,6 +505,8 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
490
505
  continue;
491
506
  }
492
507
  }
508
+
509
+ prev_left_index = l.entry_idx;
493
510
  // right side smaller or equal, or left side exhausted: move
494
511
  // right pointer forward reset left side to start
495
512
  r.entry_idx++;
@@ -521,6 +538,7 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
521
538
  state.right_chunk_index = 0;
522
539
  state.right_base = 0;
523
540
  state.left_position = 0;
541
+ state.prev_left_index = 0;
524
542
  state.right_position = 0;
525
543
  state.first_fetch = false;
526
544
  state.finished = false;
@@ -547,7 +565,8 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
547
565
  BlockMergeInfo right_info(gstate.table->global_sort_state, state.right_chunk_index, state.right_position,
548
566
  rhs_not_null);
549
567
 
550
- idx_t result_count = MergeJoinComplexBlocks(left_info, right_info, conditions[0].comparison);
568
+ idx_t result_count =
569
+ MergeJoinComplexBlocks(left_info, right_info, conditions[0].comparison, state.prev_left_index);
551
570
  if (result_count == 0) {
552
571
  // exhausted this chunk on the right side
553
572
  // move to the next right chunk
@@ -1,5 +1,4 @@
1
1
  #include "duckdb/execution/operator/persistent/base_csv_reader.hpp"
2
-
3
2
  #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
4
3
  #include "duckdb/common/file_system.hpp"
5
4
  #include "duckdb/common/string_util.hpp"
@@ -10,6 +9,7 @@
10
9
  #include "duckdb/common/vector_operations/unary_executor.hpp"
11
10
  #include "duckdb/common/vector_operations/vector_operations.hpp"
12
11
  #include "duckdb/function/scalar/strftime_format.hpp"
12
+ #include "duckdb/main/appender.hpp"
13
13
  #include "duckdb/main/database.hpp"
14
14
  #include "duckdb/parser/column_definition.hpp"
15
15
  #include "duckdb/storage/data_table.hpp"
@@ -18,7 +18,8 @@
18
18
  #include "duckdb/parser/keyword_helper.hpp"
19
19
  #include "duckdb/main/error_manager.hpp"
20
20
  #include "duckdb/execution/operator/persistent/parallel_csv_reader.hpp"
21
-
21
+ #include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
22
+ #include "duckdb/main/client_data.hpp"
22
23
  #include <algorithm>
23
24
  #include <cctype>
24
25
  #include <cstring>
@@ -448,6 +449,17 @@ bool TryCastFloatingVectorCommaSeparated(BufferedCSVReaderOptions &options, Vect
448
449
  }
449
450
  }
450
451
 
452
+ // Location of erroneous value in the current parse chunk
453
+ struct ErrorLocation {
454
+ idx_t row_idx;
455
+ idx_t col_idx;
456
+ idx_t row_line;
457
+
458
+ ErrorLocation(idx_t row_idx, idx_t col_idx, idx_t row_line)
459
+ : row_idx(row_idx), col_idx(col_idx), row_line(row_line) {
460
+ }
461
+ };
462
+
451
463
  bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_add_line) {
452
464
  if (parse_chunk.size() == 0) {
453
465
  return true;
@@ -506,10 +518,7 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
506
518
  if (try_add_line) {
507
519
  return false;
508
520
  }
509
- if (options.ignore_errors) {
510
- conversion_error_ignored = true;
511
- continue;
512
- }
521
+
513
522
  string col_name = to_string(col_idx);
514
523
  if (col_idx < names.size()) {
515
524
  col_name = "\"" + names[col_idx] + "\"";
@@ -527,16 +536,18 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
527
536
  }
528
537
  }
529
538
 
530
- idx_t error_line;
531
539
  // The line_error must be summed with linenr (All lines emmited from this batch)
532
540
  // But subtracted from the parse_chunk
533
541
  D_ASSERT(line_error + linenr >= parse_chunk.size());
534
542
  line_error += linenr;
535
543
  line_error -= parse_chunk.size();
536
544
 
537
- error_line = GetLineError(line_error, buffer_idx);
545
+ auto error_line = GetLineError(line_error, buffer_idx);
546
+
547
+ if (options.ignore_errors) {
548
+ conversion_error_ignored = true;
538
549
 
539
- if (options.auto_detect) {
550
+ } else if (options.auto_detect) {
540
551
  throw InvalidInputException("%s in column %s, at line %llu.\n\nParser "
541
552
  "options:\n%s.\n\nConsider either increasing the sample size "
542
553
  "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
@@ -550,11 +561,19 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
550
561
  }
551
562
  if (conversion_error_ignored) {
552
563
  D_ASSERT(options.ignore_errors);
564
+
553
565
  SelectionVector succesful_rows(parse_chunk.size());
554
566
  idx_t sel_size = 0;
555
567
 
568
+ // Keep track of failed cells
569
+ vector<ErrorLocation> failed_cells;
570
+
556
571
  for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
557
- bool failed = false;
572
+
573
+ auto global_row_idx = row_idx + linenr - parse_chunk.size();
574
+ auto row_line = GetLineError(global_row_idx, buffer_idx, false);
575
+
576
+ bool row_failed = false;
558
577
  for (idx_t c = 0; c < reader_data.column_ids.size(); c++) {
559
578
  auto col_idx = reader_data.column_ids[c];
560
579
  auto result_idx = reader_data.column_mapping[c];
@@ -564,14 +583,82 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
564
583
 
565
584
  bool was_already_null = FlatVector::IsNull(parse_vector, row_idx);
566
585
  if (!was_already_null && FlatVector::IsNull(result_vector, row_idx)) {
567
- failed = true;
568
- break;
586
+ row_failed = true;
587
+ failed_cells.emplace_back(row_idx, col_idx, row_line);
569
588
  }
570
589
  }
571
- if (!failed) {
590
+ if (!row_failed) {
572
591
  succesful_rows.set_index(sel_size++, row_idx);
573
592
  }
574
593
  }
594
+
595
+ // Now do a second pass to produce the reject table entries
596
+ if (!failed_cells.empty() && !options.rejects_table_name.empty()) {
597
+ auto limit = options.rejects_limit;
598
+
599
+ auto rejects = CSVRejectsTable::GetOrCreate(context, options.rejects_table_name);
600
+ lock_guard<mutex> lock(rejects->write_lock);
601
+
602
+ // short circuit if we already have too many rejects
603
+ if (limit == 0 || rejects->count < limit) {
604
+ auto &table = rejects->GetTable(context);
605
+ InternalAppender appender(context, table);
606
+ auto file_name = GetFileName();
607
+
608
+ for (auto &cell : failed_cells) {
609
+ if (limit != 0 && rejects->count >= limit) {
610
+ break;
611
+ }
612
+ rejects->count++;
613
+
614
+ auto row_idx = cell.row_idx;
615
+ auto col_idx = cell.col_idx;
616
+ auto row_line = cell.row_line;
617
+
618
+ auto col_name = to_string(col_idx);
619
+ if (col_idx < names.size()) {
620
+ col_name = "\"" + names[col_idx] + "\"";
621
+ }
622
+
623
+ auto &parse_vector = parse_chunk.data[col_idx];
624
+ auto parsed_str = FlatVector::GetData<string_t>(parse_vector)[row_idx];
625
+ auto &type = insert_chunk.data[col_idx].GetType();
626
+ auto row_error_msg = StringUtil::Format("Could not convert string '%s' to '%s'",
627
+ parsed_str.GetString(), type.ToString());
628
+
629
+ // Add the row to the rejects table
630
+ appender.BeginRow();
631
+ appender.Append(string_t(file_name));
632
+ appender.Append(row_line);
633
+ appender.Append(col_idx);
634
+ appender.Append(string_t(col_name));
635
+ appender.Append(parsed_str);
636
+
637
+ if (!options.rejects_recovery_columns.empty()) {
638
+ child_list_t<Value> recovery_key;
639
+ for (auto &key_idx : options.rejects_recovery_column_ids) {
640
+ // Figure out if the recovery key is valid.
641
+ // If not, error out for real.
642
+ auto &component_vector = parse_chunk.data[key_idx];
643
+ if (FlatVector::IsNull(component_vector, row_idx)) {
644
+ throw InvalidInputException("%s at line %llu in column %s. Parser options:\n%s ",
645
+ "Could not parse recovery column", row_line, col_name,
646
+ options.ToString());
647
+ }
648
+ auto component = Value(FlatVector::GetData<string_t>(component_vector)[row_idx]);
649
+ recovery_key.emplace_back(names[key_idx], component);
650
+ }
651
+ appender.Append(Value::STRUCT(recovery_key));
652
+ }
653
+
654
+ appender.Append(string_t(row_error_msg));
655
+ appender.EndRow();
656
+ }
657
+ appender.Close();
658
+ }
659
+ }
660
+
661
+ // Now slice the insert chunk to only include the succesful rows
575
662
  insert_chunk.Slice(succesful_rows, sel_size);
576
663
  }
577
664
  parse_chunk.Reset();
@@ -179,6 +179,26 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
179
179
  allow_quoted_nulls = ParseBoolean(value, loption);
180
180
  } else if (loption == "parallel") {
181
181
  parallel_mode = ParseBoolean(value, loption) ? ParallelMode::PARALLEL : ParallelMode::SINGLE_THREADED;
182
+ } else if (loption == "rejects_table") {
183
+ // skip, handled in SetRejectsOptions
184
+ auto table_name = ParseString(value, loption);
185
+ if (table_name.empty()) {
186
+ throw BinderException("REJECTS_TABLE option cannot be empty");
187
+ }
188
+ rejects_table_name = table_name;
189
+ } else if (loption == "rejects_recovery_columns") {
190
+ // Get the list of columns to use as a recovery key
191
+ auto &children = ListValue::GetChildren(value);
192
+ for (auto &child : children) {
193
+ auto col_name = child.GetValue<string>();
194
+ rejects_recovery_columns.push_back(col_name);
195
+ }
196
+ } else if (loption == "rejects_limit") {
197
+ int64_t limit = ParseInteger(value, loption);
198
+ if (limit < 0) {
199
+ throw BinderException("Unsupported parameter for REJECTS_LIMIT: cannot be negative");
200
+ }
201
+ rejects_limit = limit;
182
202
  } else {
183
203
  throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
184
204
  }
@@ -0,0 +1,48 @@
1
+ #include "duckdb/main/appender.hpp"
2
+ #include "duckdb/parser/parsed_data/create_table_info.hpp"
3
+ #include "duckdb/function/table/read_csv.hpp"
4
+ #include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
5
+ #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ TableCatalogEntry &CSVRejectsTable::GetTable(ClientContext &context) {
10
+ auto &temp_catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
11
+ auto &table_entry = temp_catalog.GetEntry<TableCatalogEntry>(context, TEMP_CATALOG, DEFAULT_SCHEMA, name);
12
+ return table_entry;
13
+ }
14
+
15
+ shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context, const string &name) {
16
+ auto key = "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(name);
17
+ auto &cache = ObjectCache::GetObjectCache(context);
18
+ return cache.GetOrCreate<CSVRejectsTable>(key, name);
19
+ }
20
+
21
+ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData &data) {
22
+ // (Re)Create the temporary rejects table
23
+ auto &catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
24
+ auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, name);
25
+ info->temporary = true;
26
+ info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
27
+ info->columns.AddColumn(ColumnDefinition("file", LogicalType::VARCHAR));
28
+ info->columns.AddColumn(ColumnDefinition("line", LogicalType::BIGINT));
29
+ info->columns.AddColumn(ColumnDefinition("column", LogicalType::BIGINT));
30
+ info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
31
+ info->columns.AddColumn(ColumnDefinition("parsed_value", LogicalType::VARCHAR));
32
+
33
+ if (!data.options.rejects_recovery_columns.empty()) {
34
+ child_list_t<LogicalType> recovery_key_components;
35
+ for (auto &col_name : data.options.rejects_recovery_columns) {
36
+ recovery_key_components.emplace_back(col_name, LogicalType::VARCHAR);
37
+ }
38
+ info->columns.AddColumn(ColumnDefinition("recovery_columns", LogicalType::STRUCT(recovery_key_components)));
39
+ }
40
+
41
+ info->columns.AddColumn(ColumnDefinition("error", LogicalType::VARCHAR));
42
+
43
+ catalog.CreateTable(context, std::move(info));
44
+
45
+ count = 0;
46
+ }
47
+
48
+ } // namespace duckdb
@@ -635,12 +635,11 @@ void ParallelCSVReader::ParseCSV(DataChunk &insert_chunk) {
635
635
  }
636
636
  }
637
637
 
638
- idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx) {
638
+ idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx, bool stop_at_first) {
639
639
  while (true) {
640
640
  if (buffer->line_info->CanItGetLine(file_idx, buffer_idx)) {
641
641
  auto cur_start = verification_positions.beginning_of_first_line + buffer->buffer->GetCSVGlobalStart();
642
- // line errors are 1-indexed
643
- return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false);
642
+ return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false, stop_at_first);
644
643
  }
645
644
  }
646
645
  }
@@ -1,9 +1,11 @@
1
1
  #include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
2
+
3
+ #include "duckdb/common/allocator.hpp"
4
+ #include "duckdb/common/types/batched_data_collection.hpp"
5
+ #include "duckdb/common/vector_operations/vector_operations.hpp"
2
6
  #include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
3
7
  #include "duckdb/parallel/base_pipeline_event.hpp"
4
- #include "duckdb/common/vector_operations/vector_operations.hpp"
5
- #include "duckdb/common/types/batched_data_collection.hpp"
6
- #include "duckdb/common/allocator.hpp"
8
+
7
9
  #include <algorithm>
8
10
 
9
11
  namespace duckdb {
@@ -67,7 +69,7 @@ public:
67
69
  optional_idx batch_index;
68
70
 
69
71
  void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
70
- collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), op.children[0]->types);
72
+ collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
71
73
  collection->InitializeAppend(append_state);
72
74
  }
73
75
  };
@@ -327,10 +327,11 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, DataChunk &c
327
327
  // no collection yet: create a new one
328
328
  lstate.CreateNewCollection(table, insert_types);
329
329
  lstate.writer = &table.GetStorage().CreateOptimisticWriter(context.client);
330
- } else if (lstate.current_index != batch_index) {
330
+ }
331
+
332
+ if (lstate.current_index != batch_index) {
331
333
  throw InternalException("Current batch differs from batch - but NextBatch was not called!?");
332
334
  }
333
- lstate.current_index = batch_index;
334
335
 
335
336
  table.GetStorage().VerifyAppendConstraints(table, context.client, lstate.insert_chunk);
336
337
 
@@ -116,7 +116,7 @@ public:
116
116
  optional_idx batch_index;
117
117
 
118
118
  void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
119
- collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), op.children[0]->types);
119
+ collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
120
120
  collection->InitializeAppend(append_state);
121
121
  }
122
122
  };
@@ -353,7 +353,7 @@ void PhysicalFixedBatchCopy::RepartitionBatches(ClientContext &context, GlobalSi
353
353
  } else {
354
354
  // the collection is too large for a batch - we need to repartition
355
355
  // create an empty collection
356
- current_collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), children[0]->types);
356
+ current_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
357
357
  }
358
358
  if (current_collection) {
359
359
  current_collection->InitializeAppend(append_state);
@@ -373,7 +373,7 @@ void PhysicalFixedBatchCopy::RepartitionBatches(ClientContext &context, GlobalSi
373
373
  }
374
374
  // the collection is full - move it to the result and create a new one
375
375
  gstate.AddTask(make_uniq<PrepareBatchTask>(gstate.scheduled_batch_index++, std::move(current_collection)));
376
- current_collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), children[0]->types);
376
+ current_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
377
377
  current_collection->InitializeAppend(append_state);
378
378
  }
379
379
  }
@@ -16,6 +16,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
16
16
  pivot_map[bound_pivot.pivot_values[p]] = bound_pivot.group_count + p;
17
17
  }
18
18
  // extract the empty aggregate expressions
19
+ ArenaAllocator allocator(Allocator::DefaultAllocator());
19
20
  for (auto &aggr_expr : bound_pivot.aggregates) {
20
21
  auto &aggr = aggr_expr->Cast<BoundAggregateExpression>();
21
22
  // for each aggregate, initialize an empty aggregate state and finalize it immediately
@@ -23,7 +24,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
23
24
  aggr.function.initialize(state.get());
24
25
  Vector state_vector(Value::POINTER(CastPointerToValue(state.get())));
25
26
  Vector result_vector(aggr_expr->return_type);
26
- AggregateInputData aggr_input_data(aggr.bind_info.get(), Allocator::DefaultAllocator());
27
+ AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
27
28
  aggr.function.finalize(state_vector, aggr_input_data, result_vector, 1, 0);
28
29
  empty_aggregates.push_back(result_vector.GetValue(0));
29
30
  }
@@ -64,6 +64,9 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline &current, MetaPipeline &met
64
64
  state.SetPipelineSource(current, delim_join.distinct->Cast<PhysicalOperator>());
65
65
  return;
66
66
  }
67
+ case PhysicalOperatorType::CTE_SCAN: {
68
+ break;
69
+ }
67
70
  case PhysicalOperatorType::RECURSIVE_CTE_SCAN:
68
71
  if (!meta_pipeline.HasRecursiveCTE()) {
69
72
  throw InternalException("Recursive CTE scan found without recursive CTE node");
@@ -76,4 +79,20 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline &current, MetaPipeline &met
76
79
  state.SetPipelineSource(current, *this);
77
80
  }
78
81
 
82
+ string PhysicalColumnDataScan::ParamsToString() const {
83
+ string result = "";
84
+ switch (type) {
85
+ case PhysicalOperatorType::CTE_SCAN:
86
+ case PhysicalOperatorType::RECURSIVE_CTE_SCAN: {
87
+ result += "\n[INFOSEPARATOR]\n";
88
+ result += StringUtil::Format("idx: %llu", cte_index);
89
+ break;
90
+ }
91
+ default:
92
+ break;
93
+ }
94
+
95
+ return result;
96
+ }
97
+
79
98
  } // namespace duckdb