duckdb 0.8.2-dev11.0 → 0.8.2-dev1182.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (381) hide show
  1. package/binding.gyp +14 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/lib/duckdb.d.ts +59 -0
  6. package/lib/duckdb.js +21 -0
  7. package/package.json +1 -1
  8. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  10. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  11. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  12. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  13. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  14. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  15. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  16. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  17. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  18. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  19. package/src/duckdb/extension/icu/icu_extension.cpp +5 -7
  20. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  21. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  22. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  23. package/src/duckdb/extension/json/json_common.cpp +272 -40
  24. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  26. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  27. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  28. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  29. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  30. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  31. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  32. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  33. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  34. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  35. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  36. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  37. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  38. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  39. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  40. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  41. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  42. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  43. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  44. package/src/duckdb/extension/parquet/parquet_extension.cpp +191 -19
  45. package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -5
  46. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  47. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  48. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  49. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  50. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  51. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  52. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  53. package/src/duckdb/src/common/allocator.cpp +14 -2
  54. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -10
  55. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +0 -12
  56. package/src/duckdb/src/common/assert.cpp +3 -0
  57. package/src/duckdb/src/common/enum_util.cpp +42 -5
  58. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  60. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  61. package/src/duckdb/src/common/file_system.cpp +15 -0
  62. package/src/duckdb/src/common/local_file_system.cpp +1 -1
  63. package/src/duckdb/src/common/multi_file_reader.cpp +181 -18
  64. package/src/duckdb/src/common/radix_partitioning.cpp +27 -9
  65. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  66. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  67. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  68. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  69. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  70. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  71. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  72. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  73. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  74. package/src/duckdb/src/common/types/date.cpp +9 -0
  75. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  76. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  77. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  78. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  79. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  80. package/src/duckdb/src/common/types/vector.cpp +15 -14
  81. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +6 -4
  82. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  83. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  84. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  85. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  86. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  87. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  88. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  89. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  90. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +0 -17
  91. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  92. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  93. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  94. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  95. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  96. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  97. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  98. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  99. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  100. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  101. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  102. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  103. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  104. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  105. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  106. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  107. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  108. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  109. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  110. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  111. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +413 -282
  112. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  113. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  114. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  115. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
  116. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  117. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  118. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  119. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  120. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  121. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -2
  122. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  123. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  124. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  125. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  126. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  127. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  128. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  129. package/src/duckdb/src/execution/physical_operator.cpp +17 -14
  130. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  131. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  132. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  133. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  134. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  135. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  136. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  137. package/src/duckdb/src/function/function.cpp +2 -0
  138. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  139. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  140. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  141. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  142. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  143. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  144. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  145. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  146. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  147. package/src/duckdb/src/function/table/system/test_all_types.cpp +38 -18
  148. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  149. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  150. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  151. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  152. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  153. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  154. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  155. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  156. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  157. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  158. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  159. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  160. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  161. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  162. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  163. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  164. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  165. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  166. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  167. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  168. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  169. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  170. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  171. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  172. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  173. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  174. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  175. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  176. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  177. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  178. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  179. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  180. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  181. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  182. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +24 -6
  183. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  184. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  185. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  186. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  187. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  188. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  189. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  190. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  191. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  192. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  193. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  194. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  195. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +1 -1
  196. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  197. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  198. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  199. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  200. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  201. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  202. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  203. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  204. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +3 -0
  205. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  206. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  207. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  208. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  209. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  210. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  211. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  212. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  213. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  214. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  215. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +1 -1
  216. package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
  217. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  218. package/src/duckdb/src/include/duckdb/main/settings.hpp +21 -1
  219. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  220. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  221. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  222. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  223. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  224. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  225. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  226. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  227. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  228. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  229. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  230. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  231. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  232. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  233. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  234. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  235. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  236. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  237. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  238. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  239. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  240. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
  241. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  242. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  243. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  244. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  245. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  246. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  247. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  248. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  249. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  250. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  251. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  252. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  253. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  254. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  255. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  256. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  257. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  258. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  259. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  260. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  261. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  262. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  263. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  264. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  265. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  266. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  267. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  268. package/src/duckdb/src/include/duckdb.h +28 -0
  269. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  270. package/src/duckdb/src/main/config.cpp +2 -0
  271. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  272. package/src/duckdb/src/main/settings/settings.cpp +40 -18
  273. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  274. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  275. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  276. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  277. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  278. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  279. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  280. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  281. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  282. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  283. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  284. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  285. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  286. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  287. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  288. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  289. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  290. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  291. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  292. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  293. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  294. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  295. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  296. package/src/duckdb/src/parallel/executor.cpp +15 -0
  297. package/src/duckdb/src/parallel/pipeline_executor.cpp +7 -6
  298. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  299. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  300. package/src/duckdb/src/parser/expression/lambda_expression.cpp +1 -1
  301. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  302. package/src/duckdb/src/parser/query_node/cte_node.cpp +75 -0
  303. package/src/duckdb/src/parser/query_node.cpp +18 -1
  304. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -0
  305. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  306. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  307. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  308. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  309. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  310. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  311. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  312. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  313. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  314. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  315. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  316. package/src/duckdb/src/parser/transformer.cpp +15 -0
  317. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  318. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  319. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  320. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  321. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  322. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  323. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  324. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
  325. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  326. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  327. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  328. package/src/duckdb/src/planner/binder.cpp +5 -0
  329. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  330. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  331. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  332. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  333. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  334. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  335. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  336. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  337. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  338. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  339. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  340. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  341. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  342. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  343. package/src/duckdb/src/storage/data_table.cpp +1 -1
  344. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  345. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  346. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  347. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  348. package/src/duckdb/src/storage/storage_manager.cpp +7 -2
  349. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  350. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  351. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  352. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  353. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  354. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  355. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  356. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  357. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  358. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  359. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
  360. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  361. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  362. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  363. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  364. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  365. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  366. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  367. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  368. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  369. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  370. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  371. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  372. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  373. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  374. package/src/duckdb_node.hpp +1 -0
  375. package/src/statement.cpp +103 -4
  376. package/test/columns.test.ts +243 -0
  377. package/test/test_all_types.test.ts +233 -0
  378. package/tsconfig.json +1 -0
  379. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  380. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  381. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -700,7 +700,7 @@ void LocalFileSystem::CreateDirectory(const string &directory) {
700
700
  }
701
701
  auto unicode_path = WindowsUtil::UTF8ToUnicode(directory.c_str());
702
702
  if (directory.empty() || !CreateDirectoryW(unicode_path.c_str(), NULL) || !DirectoryExists(directory)) {
703
- throw IOException("Could not create directory!");
703
+ throw IOException("Could not create directory: \'%s\'", directory.c_str());
704
704
  }
705
705
  }
706
706
 
@@ -6,6 +6,7 @@
6
6
  #include "duckdb/common/exception.hpp"
7
7
  #include "duckdb/function/function_set.hpp"
8
8
  #include "duckdb/common/hive_partitioning.hpp"
9
+ #include "duckdb/common/types.hpp"
9
10
 
10
11
  namespace duckdb {
11
12
 
@@ -13,6 +14,8 @@ void MultiFileReader::AddParameters(TableFunction &table_function) {
13
14
  table_function.named_parameters["filename"] = LogicalType::BOOLEAN;
14
15
  table_function.named_parameters["hive_partitioning"] = LogicalType::BOOLEAN;
15
16
  table_function.named_parameters["union_by_name"] = LogicalType::BOOLEAN;
17
+ table_function.named_parameters["hive_types"] = LogicalType::ANY;
18
+ table_function.named_parameters["hive_types_autocast"] = LogicalType::BOOLEAN;
16
19
  }
17
20
 
18
21
  vector<string> MultiFileReader::GetFileList(ClientContext &context, const Value &input, const string &name,
@@ -49,7 +52,8 @@ vector<string> MultiFileReader::GetFileList(ClientContext &context, const Value
49
52
  return files;
50
53
  }
51
54
 
52
- bool MultiFileReader::ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options) {
55
+ bool MultiFileReader::ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options,
56
+ ClientContext &context) {
53
57
  auto loption = StringUtil::Lower(key);
54
58
  if (loption == "filename") {
55
59
  options.filename = BooleanValue::Get(val);
@@ -58,6 +62,28 @@ bool MultiFileReader::ParseOption(const string &key, const Value &val, MultiFile
58
62
  options.auto_detect_hive_partitioning = false;
59
63
  } else if (loption == "union_by_name") {
60
64
  options.union_by_name = BooleanValue::Get(val);
65
+ } else if (loption == "hive_types_autocast" || loption == "hive_type_autocast") {
66
+ options.hive_types_autocast = BooleanValue::Get(val);
67
+ } else if (loption == "hive_types" || loption == "hive_type") {
68
+ if (val.type().id() != LogicalTypeId::STRUCT) {
69
+ throw InvalidInputException(
70
+ "'hive_types' only accepts a STRUCT('name':VARCHAR, ...), but '%s' was provided",
71
+ val.type().ToString());
72
+ }
73
+ // verify that that all the children of the struct value are VARCHAR
74
+ auto &children = StructValue::GetChildren(val);
75
+ for (idx_t i = 0; i < children.size(); i++) {
76
+ const Value &child = children[i];
77
+ if (child.type().id() != LogicalType::VARCHAR) {
78
+ throw InvalidInputException("hive_types: '%s' must be a VARCHAR, instead: '%s' was provided",
79
+ StructType::GetChildName(val.type(), i), child.type().ToString());
80
+ }
81
+ // for every child of the struct, get the logical type
82
+ LogicalType transformed_type = TransformStringToLogicalType(child.ToString(), context);
83
+ const string &name = StructType::GetChildName(val.type(), i);
84
+ options.hive_types_schema[name] = transformed_type;
85
+ }
86
+ D_ASSERT(!options.hive_types_schema.empty());
61
87
  } else {
62
88
  return false;
63
89
  }
@@ -111,28 +137,28 @@ MultiFileReaderBindData MultiFileReader::BindOptions(MultiFileReaderOptions &opt
111
137
  auto file_partitions = HivePartitioning::Parse(f);
112
138
  for (auto &part_info : partitions) {
113
139
  if (file_partitions.find(part_info.first) == file_partitions.end()) {
140
+ string error = "Hive partition mismatch between file \"%s\" and \"%s\": key \"%s\" not found";
114
141
  if (options.auto_detect_hive_partitioning == true) {
115
- throw BinderException(
116
- "Hive partitioning was enabled automatically, but an error was encountered: Hive partition "
117
- "mismatch between file \"%s\" and \"%s\": key \"%s\" not found\n\nTo switch off hive "
118
- "partition, set: HIVE_PARTITIONING=0",
119
- files[0], f, part_info.first);
142
+ throw InternalException(error + "(hive partitioning was autodetected)", files[0], f,
143
+ part_info.first);
120
144
  }
121
- throw BinderException(
122
- "Hive partition mismatch between file \"%s\" and \"%s\": key \"%s\" not found", files[0], f,
123
- part_info.first);
145
+ throw BinderException(error.c_str(), files[0], f, part_info.first);
124
146
  }
125
147
  }
126
148
  if (partitions.size() != file_partitions.size()) {
149
+ string error_msg = "Hive partition mismatch between file \"%s\" and \"%s\"";
127
150
  if (options.auto_detect_hive_partitioning == true) {
128
- throw BinderException("Hive partitioning was enabled automatically, but an error was encountered: "
129
- "Hive partition mismatch between file \"%s\" and \"%s\"\n\nTo switch off "
130
- "hive partition, set: HIVE_PARTITIONING=0",
131
- files[0], f);
151
+ throw InternalException(error_msg + "(hive partitioning was autodetected)", files[0], f);
132
152
  }
133
- throw BinderException("Hive partition mismatch between file \"%s\" and \"%s\"", files[0], f);
153
+ throw BinderException(error_msg.c_str(), files[0], f);
134
154
  }
135
155
  }
156
+
157
+ if (!options.hive_types_schema.empty()) {
158
+ // verify that all hive_types are existing partitions
159
+ options.VerifyHiveTypesArePartitions(partitions);
160
+ }
161
+
136
162
  for (auto &part : partitions) {
137
163
  idx_t hive_partitioning_index = DConstants::INVALID_INDEX;
138
164
  auto lookup = std::find(names.begin(), names.end(), part.first);
@@ -140,11 +166,11 @@ MultiFileReaderBindData MultiFileReader::BindOptions(MultiFileReaderOptions &opt
140
166
  // hive partitioning column also exists in file - override
141
167
  auto idx = lookup - names.begin();
142
168
  hive_partitioning_index = idx;
143
- return_types[idx] = LogicalType::VARCHAR;
169
+ return_types[idx] = options.GetHiveLogicalType(part.first);
144
170
  } else {
145
171
  // hive partitioning column does not exist in file - add a new column containing the key
146
172
  hive_partitioning_index = names.size();
147
- return_types.emplace_back(LogicalType::VARCHAR);
173
+ return_types.emplace_back(options.GetHiveLogicalType(part.first));
148
174
  names.emplace_back(part.first);
149
175
  }
150
176
  bind_data.hive_partitioning_indexes.emplace_back(part.first, hive_partitioning_index);
@@ -156,7 +182,9 @@ MultiFileReaderBindData MultiFileReader::BindOptions(MultiFileReaderOptions &opt
156
182
  void MultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options,
157
183
  const string &filename, const vector<string> &local_names,
158
184
  const vector<LogicalType> &global_types, const vector<string> &global_names,
159
- const vector<column_t> &global_column_ids, MultiFileReaderData &reader_data) {
185
+ const vector<column_t> &global_column_ids, MultiFileReaderData &reader_data,
186
+ ClientContext &context) {
187
+
160
188
  // create a map of name -> column index
161
189
  case_insensitive_map_t<idx_t> name_map;
162
190
  if (file_options.union_by_name) {
@@ -183,7 +211,8 @@ void MultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, c
183
211
  bool found_partition = false;
184
212
  for (auto &entry : options.hive_partitioning_indexes) {
185
213
  if (column_id == entry.index) {
186
- reader_data.constant_map.emplace_back(i, Value(partitions[entry.value]));
214
+ Value value = file_options.GetHivePartitionValue(partitions[entry.value], entry.value, context);
215
+ reader_data.constant_map.emplace_back(i, value);
187
216
  found_partition = true;
188
217
  break;
189
218
  }
@@ -313,7 +342,16 @@ void MultiFileReaderOptions::Serialize(Serializer &serializer) const {
313
342
  FieldWriter writer(serializer);
314
343
  writer.WriteField<bool>(filename);
315
344
  writer.WriteField<bool>(hive_partitioning);
345
+ writer.WriteField<bool>(auto_detect_hive_partitioning);
316
346
  writer.WriteField<bool>(union_by_name);
347
+ writer.WriteField<bool>(hive_types_autocast);
348
+ // serialize hive_types_schema
349
+ const uint32_t schema_size = hive_types_schema.size();
350
+ writer.WriteField<uint32_t>(schema_size);
351
+ for (auto &hive_type : hive_types_schema) {
352
+ writer.WriteString(hive_type.first);
353
+ writer.WriteString(hive_type.second.ToString());
354
+ }
317
355
  writer.Finalize();
318
356
  }
319
357
 
@@ -322,7 +360,16 @@ MultiFileReaderOptions MultiFileReaderOptions::Deserialize(Deserializer &source)
322
360
  FieldReader reader(source);
323
361
  result.filename = reader.ReadRequired<bool>();
324
362
  result.hive_partitioning = reader.ReadRequired<bool>();
363
+ result.auto_detect_hive_partitioning = reader.ReadRequired<bool>();
325
364
  result.union_by_name = reader.ReadRequired<bool>();
365
+ result.hive_types_autocast = reader.ReadRequired<bool>();
366
+ // deserialize hive_types_schema
367
+ const uint32_t schema_size = reader.ReadRequired<uint32_t>();
368
+ for (idx_t i = 0; i < schema_size; i++) {
369
+ const string name = reader.ReadRequired<string>();
370
+ const LogicalType type = TransformStringToLogicalType(reader.ReadRequired<string>());
371
+ result.hive_types_schema[name] = type;
372
+ }
326
373
  reader.Finalize();
327
374
  return result;
328
375
  }
@@ -365,7 +412,9 @@ HivePartitioningIndex HivePartitioningIndex::Deserialize(Deserializer &source) {
365
412
  void MultiFileReaderOptions::AddBatchInfo(BindInfo &bind_info) const {
366
413
  bind_info.InsertOption("filename", Value::BOOLEAN(filename));
367
414
  bind_info.InsertOption("hive_partitioning", Value::BOOLEAN(hive_partitioning));
415
+ bind_info.InsertOption("auto_detect_hive_partitioning", Value::BOOLEAN(auto_detect_hive_partitioning));
368
416
  bind_info.InsertOption("union_by_name", Value::BOOLEAN(union_by_name));
417
+ bind_info.InsertOption("hive_types_autocast", Value::BOOLEAN(hive_types_autocast));
369
418
  }
370
419
 
371
420
  void UnionByName::CombineUnionTypes(const vector<string> &col_names, const vector<LogicalType> &sql_types,
@@ -390,4 +439,118 @@ void UnionByName::CombineUnionTypes(const vector<string> &col_names, const vecto
390
439
  }
391
440
  }
392
441
 
442
+ bool MultiFileReaderOptions::AutoDetectHivePartitioningInternal(const vector<string> &files) {
443
+ std::unordered_set<string> partitions;
444
+
445
+ auto splits_first_file = StringUtil::Split(files.front(), FileSystem::PathSeparator());
446
+ if (splits_first_file.size() < 2) {
447
+ return false;
448
+ }
449
+ for (auto it = splits_first_file.begin(); it != splits_first_file.end(); it++) {
450
+ auto partition = StringUtil::Split(*it, "=");
451
+ if (partition.size() == 2) {
452
+ partitions.insert(partition.front());
453
+ }
454
+ }
455
+ if (partitions.empty()) {
456
+ return false;
457
+ }
458
+ for (auto &file : files) {
459
+ auto splits = StringUtil::Split(file, FileSystem::PathSeparator());
460
+ if (splits.size() != splits_first_file.size()) {
461
+ return false;
462
+ }
463
+ for (auto it = splits.begin(); it != std::prev(splits.end()); it++) {
464
+ auto part = StringUtil::Split(*it, "=");
465
+ if (part.size() != 2) {
466
+ continue;
467
+ }
468
+ if (partitions.find(part.front()) == partitions.end()) {
469
+ return false;
470
+ }
471
+ }
472
+ }
473
+ return true;
474
+ }
475
+ void MultiFileReaderOptions::AutoDetectHiveTypesInternal(const string &file, ClientContext &context) {
476
+ std::map<string, string> partitions;
477
+ auto splits = StringUtil::Split(file, FileSystem::PathSeparator());
478
+ if (splits.size() < 2) {
479
+ return;
480
+ }
481
+ for (auto it = splits.begin(); it != std::prev(splits.end()); it++) {
482
+ auto part = StringUtil::Split(*it, "=");
483
+ if (part.size() == 2) {
484
+ partitions[part.front()] = part.back();
485
+ }
486
+ }
487
+ if (partitions.empty()) {
488
+ return;
489
+ }
490
+
491
+ const LogicalType candidates[] = {LogicalType::DATE, LogicalType::TIMESTAMP, LogicalType::BIGINT};
492
+ for (auto &part : partitions) {
493
+ const string &name = part.first;
494
+ if (hive_types_schema.find(name) != hive_types_schema.end()) {
495
+ continue;
496
+ }
497
+ Value value(part.second);
498
+ for (auto &candidate : candidates) {
499
+ const bool success = value.TryCastAs(context, candidate);
500
+ if (success) {
501
+ hive_types_schema[name] = candidate;
502
+ break;
503
+ }
504
+ }
505
+ }
506
+ }
507
+ void MultiFileReaderOptions::AutoDetectHivePartitioning(const vector<string> &files, ClientContext &context) {
508
+ D_ASSERT(!files.empty());
509
+ const bool hp_explicitly_disabled = !auto_detect_hive_partitioning && !hive_partitioning;
510
+ const bool ht_enabled = !hive_types_schema.empty();
511
+ if (hp_explicitly_disabled && ht_enabled) {
512
+ throw InvalidInputException("cannot disable hive_partitioning when hive_types is enabled");
513
+ }
514
+ if (ht_enabled && auto_detect_hive_partitioning && !hive_partitioning) {
515
+ // hive_types flag implies hive_partitioning
516
+ hive_partitioning = true;
517
+ auto_detect_hive_partitioning = false;
518
+ }
519
+ if (auto_detect_hive_partitioning) {
520
+ hive_partitioning = AutoDetectHivePartitioningInternal(files);
521
+ }
522
+ if (hive_partitioning && hive_types_autocast) {
523
+ AutoDetectHiveTypesInternal(files.front(), context);
524
+ }
525
+ }
526
+ void MultiFileReaderOptions::VerifyHiveTypesArePartitions(const std::map<string, string> &partitions) const {
527
+ for (auto &hive_type : hive_types_schema) {
528
+ if (partitions.find(hive_type.first) == partitions.end()) {
529
+ throw InvalidInputException("Unknown hive_type: \"%s\" does not appear to be a partition", hive_type.first);
530
+ }
531
+ }
532
+ }
533
+ LogicalType MultiFileReaderOptions::GetHiveLogicalType(const string &hive_partition_column) const {
534
+ if (!hive_types_schema.empty()) {
535
+ auto it = hive_types_schema.find(hive_partition_column);
536
+ if (it != hive_types_schema.end()) {
537
+ return it->second;
538
+ }
539
+ }
540
+ return LogicalType::VARCHAR;
541
+ }
542
+ Value MultiFileReaderOptions::GetHivePartitionValue(const string &base, const string &entry,
543
+ ClientContext &context) const {
544
+ Value value(base);
545
+ auto it = hive_types_schema.find(entry);
546
+ if (it == hive_types_schema.end()) {
547
+ return value;
548
+ }
549
+ if (!value.TryCastAs(context, it->second)) {
550
+ throw InvalidInputException("Unable to cast '%s' (from hive partition column '%s') to: '%s'", value.ToString(),
551
+ StringUtil::Upper(it->first), it->second.ToString());
552
+ }
553
+ return value;
554
+ }
555
+
393
556
  } // namespace duckdb
@@ -8,9 +8,26 @@
8
8
 
9
9
  namespace duckdb {
10
10
 
11
+ //! Templated radix partitioning constants, can be templated to the number of radix bits
12
+ template <idx_t radix_bits>
13
+ struct RadixPartitioningConstants {
14
+ public:
15
+ //! Bitmask of the upper bits starting at the 5th byte
16
+ static constexpr const idx_t NUM_PARTITIONS = RadixPartitioning::NumberOfPartitions(radix_bits);
17
+ static constexpr const idx_t SHIFT = RadixPartitioning::Shift(radix_bits);
18
+ static constexpr const hash_t MASK = RadixPartitioning::Mask(radix_bits);
19
+
20
+ public:
21
+ //! Apply bitmask and right shift to get a number between 0 and NUM_PARTITIONS
22
+ static inline hash_t ApplyMask(hash_t hash) {
23
+ D_ASSERT((hash & MASK) >> SHIFT < NUM_PARTITIONS);
24
+ return (hash & MASK) >> SHIFT;
25
+ }
26
+ };
27
+
11
28
  template <class OP, class RETURN_TYPE, typename... ARGS>
12
- RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
13
- D_ASSERT(radix_bits <= sizeof(hash_t) * 8);
29
+ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...args) {
30
+ D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
14
31
  switch (radix_bits) {
15
32
  case 1:
16
33
  return OP::template Operation<1>(std::forward<ARGS>(args)...);
@@ -20,7 +37,7 @@ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
20
37
  return OP::template Operation<3>(std::forward<ARGS>(args)...);
21
38
  case 4:
22
39
  return OP::template Operation<4>(std::forward<ARGS>(args)...);
23
- case 5:
40
+ case 5: // LCOV_EXCL_START
24
41
  return OP::template Operation<5>(std::forward<ARGS>(args)...);
25
42
  case 6:
26
43
  return OP::template Operation<6>(std::forward<ARGS>(args)...);
@@ -32,9 +49,14 @@ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
32
49
  return OP::template Operation<9>(std::forward<ARGS>(args)...);
33
50
  case 10:
34
51
  return OP::template Operation<10>(std::forward<ARGS>(args)...);
52
+ case 11:
53
+ return OP::template Operation<10>(std::forward<ARGS>(args)...);
54
+ case 12:
55
+ return OP::template Operation<10>(std::forward<ARGS>(args)...);
35
56
  default:
36
- throw InternalException("TODO");
37
- }
57
+ throw InternalException(
58
+ "radix_bits higher than RadixPartitioning::MAX_RADIX_BITS encountered in RadixBitsSwitch");
59
+ } // LCOV_EXCL_STOP
38
60
  }
39
61
 
40
62
  template <idx_t radix_bits>
@@ -69,10 +91,6 @@ struct HashsToBinsFunctor {
69
91
  }
70
92
  };
71
93
 
72
- void RadixPartitioning::HashesToBins(Vector &hashes, idx_t radix_bits, Vector &bins, idx_t count) {
73
- return RadixBitsSwitch<HashsToBinsFunctor, void>(radix_bits, hashes, bins, count);
74
- }
75
-
76
94
  //===--------------------------------------------------------------------===//
77
95
  // Row Data Partitioning
78
96
  //===--------------------------------------------------------------------===//
@@ -99,7 +99,7 @@ void RowOperations::UnswizzleHeapPointer(const RowLayout &layout, const data_ptr
99
99
 
100
100
  static inline void VerifyUnswizzledString(const RowLayout &layout, const idx_t &col_idx, const data_ptr_t &row_ptr) {
101
101
  #ifdef DEBUG
102
- if (layout.GetTypes()[col_idx] == LogicalTypeId::BLOB) {
102
+ if (layout.GetTypes()[col_idx].id() != LogicalTypeId::VARCHAR) {
103
103
  return;
104
104
  }
105
105
  idx_t entry_idx;
@@ -273,16 +273,13 @@ void MergeSorter::ComputeMerge(const idx_t &count, bool left_smaller[]) {
273
273
  break;
274
274
  }
275
275
  // Pin the radix sorting data
276
- if (!l_done) {
277
- left->PinRadix(l.block_idx);
278
- l_radix_ptr = left->RadixPtr();
279
- }
280
- if (!r_done) {
281
- right->PinRadix(r.block_idx);
282
- r_radix_ptr = right->RadixPtr();
283
- }
284
- const idx_t &l_count = !l_done ? l_sorted_block.radix_sorting_data[l.block_idx]->count : 0;
285
- const idx_t &r_count = !r_done ? r_sorted_block.radix_sorting_data[r.block_idx]->count : 0;
276
+ left->PinRadix(l.block_idx);
277
+ l_radix_ptr = left->RadixPtr();
278
+ right->PinRadix(r.block_idx);
279
+ r_radix_ptr = right->RadixPtr();
280
+
281
+ const idx_t l_count = l_sorted_block.radix_sorting_data[l.block_idx]->count;
282
+ const idx_t r_count = r_sorted_block.radix_sorting_data[r.block_idx]->count;
286
283
  // Compute the merge
287
284
  if (sort_layout.all_constant) {
288
285
  // All sorting columns are constant size
@@ -298,12 +295,8 @@ void MergeSorter::ComputeMerge(const idx_t &count, bool left_smaller[]) {
298
295
  }
299
296
  } else {
300
297
  // Pin the blob data
301
- if (!l_done) {
302
- left->PinData(*l_sorted_block.blob_sorting_data);
303
- }
304
- if (!r_done) {
305
- right->PinData(*r_sorted_block.blob_sorting_data);
306
- }
298
+ left->PinData(*l_sorted_block.blob_sorting_data);
299
+ right->PinData(*r_sorted_block.blob_sorting_data);
307
300
  // Merge with variable size sorting columns
308
301
  for (; compared < count && l.entry_idx < l_count && r.entry_idx < r_count; compared++) {
309
302
  left_smaller[compared] =
@@ -87,7 +87,7 @@ PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
87
87
  const vector<unique_ptr<BaseStatistics>> &partition_stats,
88
88
  idx_t estimated_cardinality)
89
89
  : context(context), buffer_manager(BufferManager::GetBufferManager(context)), allocator(Allocator::Get(context)),
90
- payload_types(payload_types), memory_per_thread(0), count(0) {
90
+ fixed_bits(0), payload_types(payload_types), memory_per_thread(0), count(0) {
91
91
 
92
92
  GenerateOrderings(partitions, orders, partition_bys, order_bys, partition_stats);
93
93
 
@@ -102,9 +102,19 @@ PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
102
102
  }
103
103
  }
104
104
 
105
+ void PartitionGlobalSinkState::SyncPartitioning(const PartitionGlobalSinkState &other) {
106
+ fixed_bits = other.grouping_data ? other.grouping_data->GetRadixBits() : 0;
107
+
108
+ const auto old_bits = grouping_data ? grouping_data->GetRadixBits() : 0;
109
+ if (fixed_bits != old_bits) {
110
+ const auto hash_col_idx = payload_types.size();
111
+ grouping_data = make_uniq<RadixPartitionedColumnData>(context, grouping_types, fixed_bits, hash_col_idx);
112
+ }
113
+ }
114
+
105
115
  void PartitionGlobalSinkState::ResizeGroupingData(idx_t cardinality) {
106
116
  // Have we started to combine? Then just live with it.
107
- if (grouping_data && !grouping_data->GetPartitions().empty()) {
117
+ if (fixed_bits || (grouping_data && !grouping_data->GetPartitions().empty())) {
108
118
  return;
109
119
  }
110
120
  // Is the average partition size too large?
@@ -186,9 +196,7 @@ void PartitionGlobalSinkState::CombineLocalPartition(GroupingPartition &local_pa
186
196
  grouping_data->Combine(*local_partition);
187
197
  }
188
198
 
189
- void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data, PartitionGlobalHashGroup &hash_group) {
190
- auto &global_sort = *hash_group.global_sort;
191
-
199
+ void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data, GlobalSortState &global_sort) const {
192
200
  // Set up the sort expression computation.
193
201
  vector<LogicalType> sort_types;
194
202
  ExpressionExecutor executor(context);
@@ -234,6 +242,10 @@ void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data,
234
242
  }
235
243
 
236
244
  global_sort.AddLocalState(local_sort);
245
+ }
246
+
247
+ void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data, PartitionGlobalHashGroup &hash_group) {
248
+ BuildSortState(group_data, *hash_group.global_sort);
237
249
 
238
250
  hash_group.count += group_data.Count();
239
251
  }
@@ -482,18 +494,29 @@ public:
482
494
  TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override;
483
495
 
484
496
  private:
497
+ struct ExecutorCallback : public PartitionGlobalMergeStates::Callback {
498
+ explicit ExecutorCallback(Executor &executor) : executor(executor) {
499
+ }
500
+
501
+ bool HasError() const override {
502
+ return executor.HasError();
503
+ }
504
+
505
+ Executor &executor;
506
+ };
507
+
485
508
  shared_ptr<Event> event;
486
509
  PartitionLocalMergeState local_state;
487
510
  PartitionGlobalMergeStates &hash_groups;
488
511
  };
489
512
 
490
- TaskExecutionResult PartitionMergeTask::ExecuteTask(TaskExecutionMode mode) {
513
+ bool PartitionGlobalMergeStates::ExecuteTask(PartitionLocalMergeState &local_state, Callback &callback) {
491
514
  // Loop until all hash groups are done
492
515
  size_t sorted = 0;
493
- while (sorted < hash_groups.states.size()) {
516
+ while (sorted < states.size()) {
494
517
  // First check if there is an unfinished task for this thread
495
- if (executor.HasError()) {
496
- return TaskExecutionResult::TASK_ERROR;
518
+ if (callback.HasError()) {
519
+ return false;
497
520
  }
498
521
  if (!local_state.TaskFinished()) {
499
522
  local_state.ExecuteTask();
@@ -501,8 +524,8 @@ TaskExecutionResult PartitionMergeTask::ExecuteTask(TaskExecutionMode mode) {
501
524
  }
502
525
 
503
526
  // Thread is done with its assigned task, try to fetch new work
504
- for (auto group = sorted; group < hash_groups.states.size(); ++group) {
505
- auto &global_state = hash_groups.states[group];
527
+ for (auto group = sorted; group < states.size(); ++group) {
528
+ auto &global_state = states[group];
506
529
  if (global_state->IsSorted()) {
507
530
  // This hash group is done
508
531
  // Update the high water mark of densely completed groups
@@ -543,6 +566,16 @@ TaskExecutionResult PartitionMergeTask::ExecuteTask(TaskExecutionMode mode) {
543
566
  }
544
567
  }
545
568
 
569
+ return true;
570
+ }
571
+
572
+ TaskExecutionResult PartitionMergeTask::ExecuteTask(TaskExecutionMode mode) {
573
+ ExecutorCallback callback(executor);
574
+
575
+ if (!hash_groups.ExecuteTask(local_state, callback)) {
576
+ return TaskExecutionResult::TASK_ERROR;
577
+ }
578
+
546
579
  event->FinishTask();
547
580
  return TaskExecutionResult::TASK_FINISHED;
548
581
  }
@@ -1,11 +1,14 @@
1
1
  #include "duckdb/common/types/batched_data_collection.hpp"
2
+
3
+ #include "duckdb/common/optional_ptr.hpp"
2
4
  #include "duckdb/common/printer.hpp"
3
5
  #include "duckdb/storage/buffer_manager.hpp"
4
- #include "duckdb/common/optional_ptr.hpp"
5
6
 
6
7
  namespace duckdb {
7
8
 
8
- BatchedDataCollection::BatchedDataCollection(vector<LogicalType> types_p) : types(std::move(types_p)) {
9
+ BatchedDataCollection::BatchedDataCollection(ClientContext &context_p, vector<LogicalType> types_p,
10
+ bool buffer_managed_p)
11
+ : context(context_p), types(std::move(types_p)), buffer_managed(buffer_managed_p) {
9
12
  }
10
13
 
11
14
  void BatchedDataCollection::Append(DataChunk &input, idx_t batch_index) {
@@ -20,6 +23,8 @@ void BatchedDataCollection::Append(DataChunk &input, idx_t batch_index) {
20
23
  unique_ptr<ColumnDataCollection> new_collection;
21
24
  if (last_collection.collection) {
22
25
  new_collection = make_uniq<ColumnDataCollection>(*last_collection.collection);
26
+ } else if (buffer_managed) {
27
+ new_collection = make_uniq<ColumnDataCollection>(BufferManager::GetBufferManager(context), types);
23
28
  } else {
24
29
  new_collection = make_uniq<ColumnDataCollection>(Allocator::DefaultAllocator(), types);
25
30
  }
@@ -1,8 +1,8 @@
1
1
  #include "duckdb/common/types/column/column_data_allocator.hpp"
2
2
 
3
3
  #include "duckdb/common/types/column/column_data_collection_segment.hpp"
4
- #include "duckdb/storage/buffer_manager.hpp"
5
4
  #include "duckdb/storage/buffer/block_handle.hpp"
5
+ #include "duckdb/storage/buffer_manager.hpp"
6
6
 
7
7
  namespace duckdb {
8
8
 
@@ -19,6 +19,7 @@ ColumnDataAllocator::ColumnDataAllocator(ClientContext &context, ColumnDataAlloc
19
19
  : type(allocator_type) {
20
20
  switch (type) {
21
21
  case ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR:
22
+ case ColumnDataAllocatorType::HYBRID:
22
23
  alloc.buffer_manager = &BufferManager::GetBufferManager(context);
23
24
  break;
24
25
  case ColumnDataAllocatorType::IN_MEMORY_ALLOCATOR:
@@ -33,6 +34,7 @@ ColumnDataAllocator::ColumnDataAllocator(ColumnDataAllocator &other) {
33
34
  type = other.GetType();
34
35
  switch (type) {
35
36
  case ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR:
37
+ case ColumnDataAllocatorType::HYBRID:
36
38
  alloc.allocator = other.alloc.allocator;
37
39
  break;
38
40
  case ColumnDataAllocatorType::IN_MEMORY_ALLOCATOR:
@@ -44,7 +46,7 @@ ColumnDataAllocator::ColumnDataAllocator(ColumnDataAllocator &other) {
44
46
  }
45
47
 
46
48
  BufferHandle ColumnDataAllocator::Pin(uint32_t block_id) {
47
- D_ASSERT(type == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR);
49
+ D_ASSERT(type == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR || type == ColumnDataAllocatorType::HYBRID);
48
50
  shared_ptr<BlockHandle> handle;
49
51
  if (shared) {
50
52
  // we only need to grab the lock when accessing the vector, because vector access is not thread-safe:
@@ -58,7 +60,7 @@ BufferHandle ColumnDataAllocator::Pin(uint32_t block_id) {
58
60
  }
59
61
 
60
62
  BufferHandle ColumnDataAllocator::AllocateBlock(idx_t size) {
61
- D_ASSERT(type == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR);
63
+ D_ASSERT(type == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR || type == ColumnDataAllocatorType::HYBRID);
62
64
  auto block_size = MaxValue<idx_t>(size, Storage::BLOCK_SIZE);
63
65
  BlockMetaData data;
64
66
  data.size = 0;
@@ -136,6 +138,7 @@ void ColumnDataAllocator::AllocateData(idx_t size, uint32_t &block_id, uint32_t
136
138
  ChunkManagementState *chunk_state) {
137
139
  switch (type) {
138
140
  case ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR:
141
+ case ColumnDataAllocatorType::HYBRID:
139
142
  if (shared) {
140
143
  lock_guard<mutex> guard(lock);
141
144
  AllocateBuffer(size, block_id, offset, chunk_state);
@@ -174,8 +177,8 @@ data_ptr_t ColumnDataAllocator::GetDataPointer(ChunkManagementState &state, uint
174
177
  return state.handles[block_id].Ptr() + offset;
175
178
  }
176
179
 
177
- void ColumnDataAllocator::UnswizzlePointers(ChunkManagementState &state, Vector &result, uint16_t v_offset,
178
- uint16_t count, uint32_t block_id, uint32_t offset) {
180
+ void ColumnDataAllocator::UnswizzlePointers(ChunkManagementState &state, Vector &result, idx_t v_offset, uint16_t count,
181
+ uint32_t block_id, uint32_t offset) {
179
182
  D_ASSERT(result.GetType().InternalType() == PhysicalType::VARCHAR);
180
183
  lock_guard<mutex> guard(lock);
181
184
 
@@ -225,7 +228,7 @@ Allocator &ColumnDataAllocator::GetAllocator() {
225
228
  }
226
229
 
227
230
  void ColumnDataAllocator::InitializeChunkState(ChunkManagementState &state, ChunkMetaData &chunk) {
228
- if (type != ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR) {
231
+ if (type != ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR && type != ColumnDataAllocatorType::HYBRID) {
229
232
  // nothing to pin
230
233
  return;
231
234
  }