duckdb 0.8.2-dev1.0 → 0.8.2-dev1182.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (385) hide show
  1. package/binding.gyp +16 -14
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/lib/duckdb.d.ts +59 -0
  6. package/lib/duckdb.js +21 -0
  7. package/package.json +1 -1
  8. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  10. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  11. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  12. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  13. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  14. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  15. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  16. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  17. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  18. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  19. package/src/duckdb/extension/icu/{icu-extension.cpp → icu_extension.cpp} +29 -34
  20. package/src/duckdb/extension/icu/include/{icu-extension.hpp → icu_extension.hpp} +2 -2
  21. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  22. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  23. package/src/duckdb/extension/json/include/{json-extension.hpp → json_extension.hpp} +2 -2
  24. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  25. package/src/duckdb/extension/json/json_common.cpp +272 -40
  26. package/src/duckdb/extension/json/{json-extension.cpp → json_extension.cpp} +4 -4
  27. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  28. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  29. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  30. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  31. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  32. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  33. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  34. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  35. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  36. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  37. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  38. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  39. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  40. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  41. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  42. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  43. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  44. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  45. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  46. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  47. package/src/duckdb/extension/parquet/{parquet-extension.cpp → parquet_extension.cpp} +190 -19
  48. package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -5
  49. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  50. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  51. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  52. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  53. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  54. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  55. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  56. package/src/duckdb/src/common/allocator.cpp +16 -4
  57. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -10
  58. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +0 -12
  59. package/src/duckdb/src/common/assert.cpp +3 -0
  60. package/src/duckdb/src/common/enum_util.cpp +42 -5
  61. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  62. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  63. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  64. package/src/duckdb/src/common/file_system.cpp +15 -0
  65. package/src/duckdb/src/common/local_file_system.cpp +1 -1
  66. package/src/duckdb/src/common/multi_file_reader.cpp +181 -18
  67. package/src/duckdb/src/common/radix_partitioning.cpp +27 -9
  68. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  69. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  70. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  71. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  72. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  73. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  74. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  75. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  76. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  77. package/src/duckdb/src/common/types/date.cpp +9 -0
  78. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  79. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  80. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  81. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  82. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  83. package/src/duckdb/src/common/types/vector.cpp +15 -14
  84. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +6 -4
  85. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  86. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  87. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  88. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  91. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  92. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  93. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +0 -17
  94. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  95. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  96. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  97. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  98. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  99. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  100. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  101. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  102. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  103. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  104. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  105. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  106. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  107. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  108. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  109. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  110. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  111. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  112. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  113. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  114. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +413 -282
  115. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  116. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  117. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  118. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
  119. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  120. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  121. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  122. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  123. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  124. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -2
  125. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  126. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  127. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  128. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  129. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  130. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  131. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  132. package/src/duckdb/src/execution/physical_operator.cpp +17 -14
  133. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  134. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  135. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  136. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  137. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  138. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  139. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  140. package/src/duckdb/src/function/function.cpp +2 -0
  141. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  142. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  143. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  144. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  145. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  146. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  147. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  148. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  149. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  150. package/src/duckdb/src/function/table/system/test_all_types.cpp +38 -18
  151. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  152. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  153. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  154. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  155. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  156. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  157. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  158. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  159. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  160. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  161. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  162. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  163. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  164. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  165. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  166. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  167. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  168. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  169. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  170. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  171. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  172. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  173. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  174. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  175. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  176. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  177. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  178. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  179. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  180. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  181. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  182. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  183. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  184. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  185. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +24 -6
  186. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  187. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  188. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  189. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  190. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  191. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  192. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  193. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  194. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  195. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  196. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  197. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  198. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +1 -1
  199. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  200. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  201. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  202. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  203. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  204. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  205. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  206. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  207. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +3 -0
  208. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  209. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  210. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  211. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  212. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  213. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  214. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  215. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  216. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  217. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  218. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +1 -1
  219. package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
  220. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  221. package/src/duckdb/src/include/duckdb/main/settings.hpp +21 -1
  222. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  223. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  224. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  225. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  226. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  227. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  228. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  229. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  230. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  231. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  232. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  233. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  234. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  235. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  236. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  237. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  238. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  239. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  240. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  241. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  242. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  243. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
  244. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  245. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  246. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  247. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  248. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  249. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  250. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  251. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  252. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  253. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  254. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  255. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  256. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  257. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  258. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  259. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  260. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  261. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  262. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  263. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  264. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  265. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  266. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  267. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  268. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  269. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  270. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  271. package/src/duckdb/src/include/duckdb.h +28 -0
  272. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  273. package/src/duckdb/src/main/config.cpp +2 -0
  274. package/src/duckdb/src/main/extension/extension_helper.cpp +106 -99
  275. package/src/duckdb/src/main/settings/settings.cpp +40 -18
  276. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  277. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  278. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  279. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  280. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  281. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  282. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  283. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  284. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  285. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  286. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  287. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  288. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  289. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  290. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  291. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  292. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  293. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  294. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  295. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  296. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  297. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  298. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  299. package/src/duckdb/src/parallel/executor.cpp +15 -0
  300. package/src/duckdb/src/parallel/pipeline_executor.cpp +7 -6
  301. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  302. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  303. package/src/duckdb/src/parser/expression/lambda_expression.cpp +1 -1
  304. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  305. package/src/duckdb/src/parser/query_node/cte_node.cpp +75 -0
  306. package/src/duckdb/src/parser/query_node.cpp +18 -1
  307. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -0
  308. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  309. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  310. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  311. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  312. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  313. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  314. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  315. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  316. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  317. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  318. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  319. package/src/duckdb/src/parser/transformer.cpp +15 -0
  320. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  321. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  322. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  323. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  324. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  325. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  326. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  327. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
  328. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  329. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  330. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  331. package/src/duckdb/src/planner/binder.cpp +5 -0
  332. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  333. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  334. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  335. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  336. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  337. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  338. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  339. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  340. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  341. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  342. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  343. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  344. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  345. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  346. package/src/duckdb/src/storage/data_table.cpp +1 -1
  347. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  348. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  349. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  350. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  351. package/src/duckdb/src/storage/storage_manager.cpp +7 -2
  352. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  353. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  354. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  355. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  356. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  357. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  358. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  359. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  360. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  361. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  362. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
  363. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  364. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  365. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  366. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  367. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  368. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  369. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  370. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  371. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  372. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  373. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  374. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  375. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  376. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  377. package/src/duckdb_node.hpp +1 -0
  378. package/src/statement.cpp +103 -4
  379. package/test/columns.test.ts +243 -0
  380. package/test/test_all_types.test.ts +233 -0
  381. package/tsconfig.json +1 -0
  382. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  383. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  384. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
  385. /package/src/duckdb/extension/parquet/include/{parquet-extension.hpp → parquet_extension.hpp} +0 -0
@@ -15,7 +15,7 @@ public:
15
15
  // some derivatives
16
16
  D_ASSERT(miniblocks_per_block > 0);
17
17
  values_per_miniblock = block_value_count / miniblocks_per_block;
18
- miniblock_bit_widths = duckdb::unique_ptr<uint8_t[]>(new data_t[miniblocks_per_block]);
18
+ miniblock_bit_widths = unique_ptr<uint8_t[]>(new data_t[miniblocks_per_block]);
19
19
 
20
20
  // init state to something sane
21
21
  values_left_in_block = 0;
@@ -96,7 +96,7 @@ public:
96
96
  if (values_left_in_miniblock == 0) {
97
97
  return;
98
98
  }
99
- auto data = duckdb::unique_ptr<uint32_t[]>(new uint32_t[values_left_in_miniblock]);
99
+ auto data = unique_ptr<uint32_t[]>(new uint32_t[values_left_in_miniblock]);
100
100
  GetBatch<uint32_t>(data_ptr_cast(data.get()), values_left_in_miniblock);
101
101
  }
102
102
 
@@ -112,7 +112,7 @@ private:
112
112
  int64_t start_value;
113
113
  idx_t values_per_miniblock;
114
114
 
115
- duckdb::unique_ptr<uint8_t[]> miniblock_bit_widths;
115
+ unique_ptr<uint8_t[]> miniblock_bit_widths;
116
116
  idx_t values_left_in_block;
117
117
  idx_t values_left_in_miniblock;
118
118
  idx_t miniblock_offset;
@@ -35,9 +35,9 @@ public:
35
35
  return res;
36
36
  }
37
37
 
38
- static duckdb::unique_ptr<ColumnReader> CreateReader(ParquetReader &reader, const LogicalType &type_p,
39
- const SchemaElement &schema_p, idx_t file_idx_p,
40
- idx_t max_define, idx_t max_repeat);
38
+ static unique_ptr<ColumnReader> CreateReader(ParquetReader &reader, const LogicalType &type_p,
39
+ const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define,
40
+ idx_t max_repeat);
41
41
  };
42
42
 
43
43
  } // namespace duckdb
@@ -20,14 +20,14 @@ class ParquetFileMetadataCache : public ObjectCacheEntry {
20
20
  public:
21
21
  ParquetFileMetadataCache() : metadata(nullptr) {
22
22
  }
23
- ParquetFileMetadataCache(duckdb::unique_ptr<duckdb_parquet::format::FileMetaData> file_metadata, time_t r_time)
23
+ ParquetFileMetadataCache(unique_ptr<duckdb_parquet::format::FileMetaData> file_metadata, time_t r_time)
24
24
  : metadata(std::move(file_metadata)), read_time(r_time) {
25
25
  }
26
26
 
27
27
  ~ParquetFileMetadataCache() override = default;
28
28
 
29
29
  //! Parquet file metadata
30
- duckdb::unique_ptr<const duckdb_parquet::format::FileMetaData> metadata;
30
+ unique_ptr<const duckdb_parquet::format::FileMetaData> metadata;
31
31
 
32
32
  //! read time
33
33
  time_t read_time;
@@ -15,8 +15,8 @@ struct LogicalType;
15
15
 
16
16
  struct ParquetStatisticsUtils {
17
17
 
18
- static duckdb::unique_ptr<BaseStatistics>
19
- TransformColumnStatistics(const SchemaElement &s_ele, const LogicalType &type, const ColumnChunk &column_chunk);
18
+ static unique_ptr<BaseStatistics> TransformColumnStatistics(const SchemaElement &s_ele, const LogicalType &type,
19
+ const ColumnChunk &column_chunk);
20
20
 
21
21
  static Value ConvertValue(const LogicalType &type, const duckdb_parquet::format::SchemaElement &schema_ele,
22
22
  const std::string &stats);
@@ -37,8 +37,7 @@ public:
37
37
  * @param throwIfNotFound fail if a stream is required and not found
38
38
  * @return the new stream
39
39
  */
40
- virtual duckdb::unique_ptr<SeekableInputStream> getStream(const StreamIdentifier &si, bool throwIfNotFound)
41
- const = 0;
40
+ virtual unique_ptr<SeekableInputStream> getStream(const StreamIdentifier &si, bool throwIfNotFound) const = 0;
42
41
 
43
42
  /**
44
43
  * visit all streams of given node and execute visitor logic
@@ -63,7 +62,7 @@ public:
63
62
  * Get the RowGroupIndex.
64
63
  * @return a vector of RowIndex belonging to the stripe
65
64
  */
66
- virtual duckdb::unique_ptr<proto::RowIndex> getRowGroupIndex(const StreamIdentifier &si) const = 0;
65
+ virtual unique_ptr<proto::RowIndex> getRowGroupIndex(const StreamIdentifier &si) const = 0;
67
66
 
68
67
  /**
69
68
  * Get stride index provider which is used by string dictionary reader to
@@ -84,8 +83,7 @@ public:
84
83
  * @param throwIfNotFound fail if a stream is required and not found
85
84
  * @return the new stream
86
85
  */
87
- virtual duckdb::unique_ptr<SeekableInputStream> getStream(const StreamIdentifier &si,
88
- bool throwIfNotFound) const = 0;
86
+ virtual unique_ptr<SeekableInputStream> getStream(const StreamIdentifier &si, bool throwIfNotFound) const = 0;
89
87
 
90
88
  /**
91
89
  * visit all streams of given node and execute visitor logic
@@ -110,7 +108,7 @@ public:
110
108
  * Get the RowGroupIndex.
111
109
  * @return a vector of RowIndex belonging to the stripe
112
110
  */
113
- virtual duckdb::unique_ptr<proto::RowIndex> getRowGroupIndex(const StreamIdentifier &si) const = 0;
111
+ virtual unique_ptr<proto::RowIndex> getRowGroupIndex(const StreamIdentifier &si) const = 0;
114
112
 
115
113
  /**
116
114
  * Get stride index provider which is used by string dictionary reader to
@@ -157,10 +155,10 @@ public:
157
155
  }
158
156
 
159
157
  // Creates a reader for the given stripe.
160
- static duckdb::unique_ptr<SelectiveColumnReader>
161
- build(const std::shared_ptr<const dwio::common::TypeWithId> &requestedType,
162
- const std::shared_ptr<const dwio::common::TypeWithId> &dataType, StripeStreams &stripe,
163
- common::ScanSpec *scanSpec, uint32_t sequence = 0);
158
+ static unique_ptr<SelectiveColumnReader> build(const std::shared_ptr<const dwio::common::TypeWithId> &requestedType,
159
+ const std::shared_ptr<const dwio::common::TypeWithId> &dataType,
160
+ StripeStreams &stripe, common::ScanSpec *scanSpec,
161
+ uint32_t sequence = 0);
164
162
 
165
163
  // Seeks to offset and reads the rows in 'rows' and applies
166
164
  // filters and value processing as given by 'scanSpec supplied at
@@ -336,7 +334,7 @@ public:
336
334
  return kind_;
337
335
  }
338
336
 
339
- virtual duckdb::unique_ptr<Filter> clone() const = 0;
337
+ virtual unique_ptr<Filter> clone() const = 0;
340
338
 
341
339
  /**
342
340
  * A filter becomes non-deterministic when applies to nested column,
@@ -17,8 +17,8 @@
17
17
  #include "duckdb/common/types/column/column_data_collection.hpp"
18
18
  #endif
19
19
 
20
- #include "parquet_types.h"
21
20
  #include "column_writer.hpp"
21
+ #include "parquet_types.h"
22
22
  #include "thrift/protocol/TCompactProtocol.h"
23
23
 
24
24
  namespace duckdb {
@@ -27,13 +27,31 @@ class FileOpener;
27
27
 
28
28
  struct PreparedRowGroup {
29
29
  duckdb_parquet::format::RowGroup row_group;
30
- vector<duckdb::unique_ptr<ColumnWriterState>> states;
30
+ vector<unique_ptr<ColumnWriterState>> states;
31
+ vector<shared_ptr<StringHeap>> heaps;
32
+ };
33
+
34
+ struct FieldID;
35
+ struct ChildFieldIDs {
36
+ ChildFieldIDs();
37
+ ChildFieldIDs Copy() const;
38
+ unique_ptr<case_insensitive_map_t<FieldID>> ids;
39
+ };
40
+
41
+ struct FieldID {
42
+ static constexpr const auto DUCKDB_FIELD_ID = "__duckdb_field_id";
43
+ FieldID();
44
+ explicit FieldID(int32_t field_id);
45
+ FieldID Copy() const;
46
+ bool set;
47
+ int32_t field_id;
48
+ ChildFieldIDs child_field_ids;
31
49
  };
32
50
 
33
51
  class ParquetWriter {
34
52
  public:
35
53
  ParquetWriter(FileSystem &fs, string file_name, vector<LogicalType> types, vector<string> names,
36
- duckdb_parquet::format::CompressionCodec::type codec);
54
+ duckdb_parquet::format::CompressionCodec::type codec, ChildFieldIDs field_ids);
37
55
 
38
56
  public:
39
57
  void PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result);
@@ -62,13 +80,14 @@ private:
62
80
  vector<LogicalType> sql_types;
63
81
  vector<string> column_names;
64
82
  duckdb_parquet::format::CompressionCodec::type codec;
83
+ ChildFieldIDs field_ids;
65
84
 
66
- duckdb::unique_ptr<BufferedFileWriter> writer;
85
+ unique_ptr<BufferedFileWriter> writer;
67
86
  shared_ptr<duckdb_apache::thrift::protocol::TProtocol> protocol;
68
87
  duckdb_parquet::format::FileMetaData file_meta_data;
69
88
  std::mutex lock;
70
89
 
71
- vector<duckdb::unique_ptr<ColumnWriter>> column_writers;
90
+ vector<unique_ptr<ColumnWriter>> column_writers;
72
91
  };
73
92
 
74
93
  } // namespace duckdb
@@ -28,7 +28,7 @@ public:
28
28
  StringColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p,
29
29
  idx_t max_define_p, idx_t max_repeat_p);
30
30
 
31
- duckdb::unique_ptr<string_t[]> dict_strings;
31
+ unique_ptr<string_t[]> dict_strings;
32
32
  idx_t fixed_width_string_length;
33
33
  idx_t delta_offset = 0;
34
34
 
@@ -19,10 +19,9 @@ public:
19
19
 
20
20
  public:
21
21
  StructColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p,
22
- idx_t max_define_p, idx_t max_repeat_p,
23
- vector<duckdb::unique_ptr<ColumnReader>> child_readers_p);
22
+ idx_t max_define_p, idx_t max_repeat_p, vector<unique_ptr<ColumnReader>> child_readers_p);
24
23
 
25
- vector<duckdb::unique_ptr<ColumnReader>> child_readers;
24
+ vector<unique_ptr<ColumnReader>> child_readers;
26
25
 
27
26
  public:
28
27
  ColumnReader *GetChildReader(idx_t child_idx);
@@ -17,13 +17,13 @@ namespace duckdb {
17
17
 
18
18
  class ZStdFileSystem : public CompressedFileSystem {
19
19
  public:
20
- duckdb::unique_ptr<FileHandle> OpenCompressedFile(duckdb::unique_ptr<FileHandle> handle, bool write) override;
20
+ unique_ptr<FileHandle> OpenCompressedFile(unique_ptr<FileHandle> handle, bool write) override;
21
21
 
22
22
  std::string GetName() const override {
23
23
  return "ZStdFileSystem";
24
24
  }
25
25
 
26
- duckdb::unique_ptr<StreamWrapper> CreateStream() override;
26
+ unique_ptr<StreamWrapper> CreateStream() override;
27
27
  idx_t InBufferSize() override;
28
28
  idx_t OutBufferSize() override;
29
29
  };
@@ -1,6 +1,6 @@
1
1
  #define DUCKDB_EXTENSION_MAIN
2
2
 
3
- #include "parquet-extension.hpp"
3
+ #include "parquet_extension.hpp"
4
4
 
5
5
  #include "duckdb.hpp"
6
6
  #include "parquet_metadata.hpp"
@@ -15,15 +15,18 @@
15
15
  #include <vector>
16
16
  #ifndef DUCKDB_AMALGAMATION
17
17
  #include "duckdb/catalog/catalog.hpp"
18
+ #include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
18
19
  #include "duckdb/common/constants.hpp"
19
20
  #include "duckdb/common/enums/file_compression_type.hpp"
20
21
  #include "duckdb/common/field_writer.hpp"
21
22
  #include "duckdb/common/file_system.hpp"
23
+ #include "duckdb/common/multi_file_reader.hpp"
22
24
  #include "duckdb/common/types/chunk_collection.hpp"
23
25
  #include "duckdb/function/copy_function.hpp"
24
26
  #include "duckdb/function/table_function.hpp"
25
27
  #include "duckdb/main/client_context.hpp"
26
28
  #include "duckdb/main/config.hpp"
29
+ #include "duckdb/main/extension_util.hpp"
27
30
  #include "duckdb/parser/expression/constant_expression.hpp"
28
31
  #include "duckdb/parser/expression/function_expression.hpp"
29
32
  #include "duckdb/parser/parsed_data/create_copy_function_info.hpp"
@@ -31,10 +34,7 @@
31
34
  #include "duckdb/parser/tableref/table_function_ref.hpp"
32
35
  #include "duckdb/planner/operator/logical_get.hpp"
33
36
  #include "duckdb/storage/statistics/base_statistics.hpp"
34
- #include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
35
- #include "duckdb/common/multi_file_reader.hpp"
36
37
  #include "duckdb/storage/table/row_group.hpp"
37
- #include "duckdb/main/extension_util.hpp"
38
38
  #endif
39
39
 
40
40
  namespace duckdb {
@@ -116,6 +116,7 @@ struct ParquetWriteBindData : public TableFunctionData {
116
116
  vector<string> column_names;
117
117
  duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
118
118
  idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
119
+ ChildFieldIDs field_ids;
119
120
  };
120
121
 
121
122
  struct ParquetWriteGlobalState : public GlobalFunctionData {
@@ -124,10 +125,12 @@ struct ParquetWriteGlobalState : public GlobalFunctionData {
124
125
 
125
126
  struct ParquetWriteLocalState : public LocalFunctionData {
126
127
  explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types)
127
- : buffer(Allocator::Get(context), types) {
128
+ : buffer(context, types, ColumnDataAllocatorType::HYBRID) {
129
+ buffer.InitializeAppend(append_state);
128
130
  }
129
131
 
130
132
  ColumnDataCollection buffer;
133
+ ColumnDataAppendState append_state;
131
134
  };
132
135
 
133
136
  void ParquetOptions::Serialize(FieldWriter &writer) const {
@@ -294,7 +297,7 @@ public:
294
297
  ParquetOptions parquet_options(context);
295
298
  for (auto &kv : input.named_parameters) {
296
299
  auto loption = StringUtil::Lower(kv.first);
297
- if (MultiFileReader::ParseOption(kv.first, kv.second, parquet_options.file_options)) {
300
+ if (MultiFileReader::ParseOption(kv.first, kv.second, parquet_options.file_options, context)) {
298
301
  continue;
299
302
  }
300
303
  if (loption == "binary_as_string") {
@@ -303,9 +306,7 @@ public:
303
306
  parquet_options.file_row_number = BooleanValue::Get(kv.second);
304
307
  }
305
308
  }
306
- if (parquet_options.file_options.auto_detect_hive_partitioning) {
307
- parquet_options.file_options.hive_partitioning = MultiFileReaderOptions::AutoDetectHivePartitioning(files);
308
- }
309
+ parquet_options.file_options.AutoDetectHivePartitioning(files, context);
309
310
  return ParquetScanBindInternal(context, std::move(files), return_types, names, parquet_options);
310
311
  }
311
312
 
@@ -372,7 +373,7 @@ public:
372
373
  }
373
374
  MultiFileReader::InitializeReader(*reader, bind_data.parquet_options.file_options, bind_data.reader_bind,
374
375
  bind_data.types, bind_data.names, input.column_ids, input.filters,
375
- bind_data.files[0]);
376
+ bind_data.files[0], context);
376
377
  }
377
378
 
378
379
  result->column_ids = input.column_ids;
@@ -565,9 +566,10 @@ public:
565
566
  shared_ptr<ParquetReader> reader;
566
567
  try {
567
568
  reader = make_shared<ParquetReader>(context, file, pq_options);
568
- MultiFileReader::InitializeReader(
569
- *reader, bind_data.parquet_options.file_options, bind_data.reader_bind, bind_data.types,
570
- bind_data.names, parallel_state.column_ids, parallel_state.filters, bind_data.files.front());
569
+ MultiFileReader::InitializeReader(*reader, bind_data.parquet_options.file_options,
570
+ bind_data.reader_bind, bind_data.types, bind_data.names,
571
+ parallel_state.column_ids, parallel_state.filters,
572
+ bind_data.files.front(), context);
571
573
  } catch (...) {
572
574
  parallel_lock.lock();
573
575
  parallel_state.error_opening_file = true;
@@ -586,8 +588,157 @@ public:
586
588
  }
587
589
  };
588
590
 
591
+ static case_insensitive_map_t<LogicalType> GetChildNameToTypeMap(const LogicalType &type) {
592
+ case_insensitive_map_t<LogicalType> name_to_type_map;
593
+ switch (type.id()) {
594
+ case LogicalTypeId::LIST:
595
+ name_to_type_map.emplace("element", ListType::GetChildType(type));
596
+ break;
597
+ case LogicalTypeId::MAP:
598
+ name_to_type_map.emplace("key", MapType::KeyType(type));
599
+ name_to_type_map.emplace("value", MapType::ValueType(type));
600
+ break;
601
+ case LogicalTypeId::STRUCT:
602
+ for (auto &child_type : StructType::GetChildTypes(type)) {
603
+ if (child_type.first == FieldID::DUCKDB_FIELD_ID) {
604
+ throw BinderException("Cannot have column named \"%s\" with FIELD_IDS", FieldID::DUCKDB_FIELD_ID);
605
+ }
606
+ name_to_type_map.emplace(child_type);
607
+ }
608
+ break;
609
+ default: // LCOV_EXCL_START
610
+ throw InternalException("Unexpected type in GetChildNameToTypeMap");
611
+ } // LCOV_EXCL_STOP
612
+ return name_to_type_map;
613
+ }
614
+
615
+ static void GetChildNamesAndTypes(const LogicalType &type, vector<string> &child_names,
616
+ vector<LogicalType> &child_types) {
617
+ switch (type.id()) {
618
+ case LogicalTypeId::LIST:
619
+ child_names.emplace_back("element");
620
+ child_types.emplace_back(ListType::GetChildType(type));
621
+ break;
622
+ case LogicalTypeId::MAP:
623
+ child_names.emplace_back("key");
624
+ child_names.emplace_back("value");
625
+ child_types.emplace_back(MapType::KeyType(type));
626
+ child_types.emplace_back(MapType::ValueType(type));
627
+ break;
628
+ case LogicalTypeId::STRUCT:
629
+ for (auto &child_type : StructType::GetChildTypes(type)) {
630
+ child_names.emplace_back(child_type.first);
631
+ child_types.emplace_back(child_type.second);
632
+ }
633
+ break;
634
+ default: // LCOV_EXCL_START
635
+ throw InternalException("Unexpected type in GetChildNamesAndTypes");
636
+ } // LCOV_EXCL_STOP
637
+ }
638
+
639
+ static void GenerateFieldIDs(ChildFieldIDs &field_ids, idx_t &field_id, const vector<string> &names,
640
+ const vector<LogicalType> &sql_types) {
641
+ D_ASSERT(names.size() == sql_types.size());
642
+ for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) {
643
+ const auto &col_name = names[col_idx];
644
+ auto inserted = field_ids.ids->insert(make_pair(col_name, FieldID(field_id++)));
645
+ D_ASSERT(inserted.second);
646
+
647
+ const auto &col_type = sql_types[col_idx];
648
+ if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP &&
649
+ col_type.id() != LogicalTypeId::STRUCT) {
650
+ continue;
651
+ }
652
+
653
+ // Cannot use GetChildNameToTypeMap here because we lose order, and we want to generate depth-first
654
+ vector<string> child_names;
655
+ vector<LogicalType> child_types;
656
+ GetChildNamesAndTypes(col_type, child_names, child_types);
657
+
658
+ GenerateFieldIDs(inserted.first->second.child_field_ids, field_id, child_names, child_types);
659
+ }
660
+ }
661
+
662
+ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
663
+ unordered_set<uint32_t> &unique_field_ids,
664
+ const case_insensitive_map_t<LogicalType> &name_to_type_map) {
665
+ const auto &struct_type = field_ids_value.type();
666
+ if (struct_type.id() != LogicalTypeId::STRUCT) {
667
+ throw BinderException(
668
+ "Expected FIELD_IDS to be a STRUCT, e.g., {col1: 42, col2: {%s: 43, nested_col: 44}, col3: 44}",
669
+ FieldID::DUCKDB_FIELD_ID);
670
+ }
671
+ const auto &struct_children = StructValue::GetChildren(field_ids_value);
672
+ D_ASSERT(StructType::GetChildTypes(struct_type).size() == struct_children.size());
673
+ for (idx_t i = 0; i < struct_children.size(); i++) {
674
+ const auto &col_name = StringUtil::Lower(StructType::GetChildName(struct_type, i));
675
+ if (col_name == FieldID::DUCKDB_FIELD_ID) {
676
+ continue;
677
+ }
678
+
679
+ auto it = name_to_type_map.find(col_name);
680
+ if (it == name_to_type_map.end()) {
681
+ string names;
682
+ for (const auto &name : name_to_type_map) {
683
+ if (!names.empty()) {
684
+ names += ", ";
685
+ }
686
+ names += name.first;
687
+ }
688
+ throw BinderException("Column name \"%s\" specified in FIELD_IDS not found. Available column names: [%s]",
689
+ col_name, names);
690
+ }
691
+ D_ASSERT(field_ids.ids->find(col_name) == field_ids.ids->end()); // Caught by STRUCT - deduplicates keys
692
+
693
+ const auto &child_value = struct_children[i];
694
+ const auto &child_type = child_value.type();
695
+ optional_ptr<const Value> field_id_value;
696
+ optional_ptr<const Value> child_field_ids_value;
697
+
698
+ if (child_type.id() == LogicalTypeId::STRUCT) {
699
+ const auto &nested_children = StructValue::GetChildren(child_value);
700
+ D_ASSERT(StructType::GetChildTypes(child_type).size() == nested_children.size());
701
+ for (idx_t nested_i = 0; nested_i < nested_children.size(); nested_i++) {
702
+ const auto &field_id_or_nested_col = StructType::GetChildName(child_type, nested_i);
703
+ if (field_id_or_nested_col == FieldID::DUCKDB_FIELD_ID) {
704
+ field_id_value = &nested_children[nested_i];
705
+ } else {
706
+ child_field_ids_value = &child_value;
707
+ }
708
+ }
709
+ } else {
710
+ field_id_value = &child_value;
711
+ }
712
+
713
+ FieldID field_id;
714
+ if (field_id_value) {
715
+ Value field_id_integer_value = field_id_value->DefaultCastAs(LogicalType::INTEGER);
716
+ const uint32_t field_id_int = IntegerValue::Get(field_id_integer_value);
717
+ if (!unique_field_ids.insert(field_id_int).second) {
718
+ throw BinderException("Duplicate field_id %s found in FIELD_IDS", field_id_integer_value.ToString());
719
+ }
720
+ field_id = FieldID(field_id_int);
721
+ }
722
+ auto inserted = field_ids.ids->insert(make_pair(col_name, std::move(field_id)));
723
+ D_ASSERT(inserted.second);
724
+
725
+ if (child_field_ids_value) {
726
+ const auto &col_type = it->second;
727
+ if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP &&
728
+ col_type.id() != LogicalTypeId::STRUCT) {
729
+ throw BinderException("Column \"%s\" with type \"%s\" cannot have a nested FIELD_IDS specification",
730
+ col_name, LogicalTypeIdToString(col_type.id()));
731
+ }
732
+
733
+ GetFieldIDs(*child_field_ids_value, inserted.first->second.child_field_ids, unique_field_ids,
734
+ GetChildNameToTypeMap(col_type));
735
+ }
736
+ }
737
+ }
738
+
589
739
  unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
590
740
  vector<LogicalType> &sql_types) {
741
+ D_ASSERT(names.size() == sql_types.size());
591
742
  auto bind_data = make_uniq<ParquetWriteBindData>();
592
743
  for (auto &option : info.options) {
593
744
  auto loption = StringUtil::Lower(option.first);
@@ -610,7 +761,27 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info
610
761
  continue;
611
762
  }
612
763
  }
613
- throw ParserException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption);
764
+ throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption);
765
+ } else if (loption == "field_ids") {
766
+ if (option.second.size() != 1) {
767
+ throw BinderException("FIELD_IDS requires exactly one argument");
768
+ }
769
+ if (option.second[0].type().id() == LogicalTypeId::VARCHAR &&
770
+ StringUtil::Lower(StringValue::Get(option.second[0])) == "auto") {
771
+ idx_t field_id = 0;
772
+ GenerateFieldIDs(bind_data->field_ids, field_id, names, sql_types);
773
+ } else {
774
+ unordered_set<uint32_t> unique_field_ids;
775
+ case_insensitive_map_t<LogicalType> name_to_type_map;
776
+ for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) {
777
+ if (names[col_idx] == FieldID::DUCKDB_FIELD_ID) {
778
+ throw BinderException("Cannot have a column named \"%s\" when writing FIELD_IDS",
779
+ FieldID::DUCKDB_FIELD_ID);
780
+ }
781
+ name_to_type_map.emplace(names[col_idx], sql_types[col_idx]);
782
+ }
783
+ GetFieldIDs(option.second[0], bind_data->field_ids, unique_field_ids, name_to_type_map);
784
+ }
614
785
  } else {
615
786
  throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
616
787
  }
@@ -626,8 +797,8 @@ unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &conte
626
797
  auto &parquet_bind = bind_data.Cast<ParquetWriteBindData>();
627
798
 
628
799
  auto &fs = FileSystem::GetFileSystem(context);
629
- global_state->writer =
630
- make_uniq<ParquetWriter>(fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec);
800
+ global_state->writer = make_uniq<ParquetWriter>(fs, file_path, parquet_bind.sql_types, parquet_bind.column_names,
801
+ parquet_bind.codec, parquet_bind.field_ids.Copy());
631
802
  return std::move(global_state);
632
803
  }
633
804
 
@@ -638,12 +809,12 @@ void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, Glob
638
809
  auto &local_state = lstate.Cast<ParquetWriteLocalState>();
639
810
 
640
811
  // append data to the local (buffered) chunk collection
641
- local_state.buffer.Append(input);
812
+ local_state.buffer.Append(local_state.append_state, input);
642
813
  if (local_state.buffer.Count() > bind_data.row_group_size) {
643
814
  // if the chunk collection exceeds a certain size we flush it to the parquet file
815
+ local_state.append_state.current_chunk_state.handles.clear();
644
816
  global_state.writer->Flush(local_state.buffer);
645
- // and reset the buffer
646
- local_state.buffer.Reset();
817
+ local_state.buffer.InitializeAppend(local_state.append_state);
647
818
  }
648
819
  }
649
820
 
@@ -48,7 +48,7 @@ using duckdb_parquet::format::SchemaElement;
48
48
  using duckdb_parquet::format::Statistics;
49
49
  using duckdb_parquet::format::Type;
50
50
 
51
- static duckdb::unique_ptr<duckdb_apache::thrift::protocol::TProtocol>
51
+ static unique_ptr<duckdb_apache::thrift::protocol::TProtocol>
52
52
  CreateThriftProtocol(Allocator &allocator, FileHandle &file_handle, bool prefetch_mode) {
53
53
  auto transport = make_shared<ThriftFileTransport>(allocator, file_handle, prefetch_mode);
54
54
  return make_uniq<duckdb_apache::thrift::protocol::TCompactProtocolT<ThriftFileTransport>>(std::move(transport));
@@ -271,7 +271,7 @@ unique_ptr<ColumnReader> ParquetReader::CreateReaderRecursive(idx_t depth, idx_t
271
271
  }
272
272
  if (s_ele.__isset.num_children && s_ele.num_children > 0) { // inner node
273
273
  child_list_t<LogicalType> child_types;
274
- vector<duckdb::unique_ptr<ColumnReader>> child_readers;
274
+ vector<unique_ptr<ColumnReader>> child_readers;
275
275
 
276
276
  idx_t c_idx = 0;
277
277
  while (c_idx < (idx_t)s_ele.num_children) {
@@ -287,7 +287,7 @@ unique_ptr<ColumnReader> ParquetReader::CreateReaderRecursive(idx_t depth, idx_t
287
287
  c_idx++;
288
288
  }
289
289
  D_ASSERT(!child_types.empty());
290
- duckdb::unique_ptr<ColumnReader> result;
290
+ unique_ptr<ColumnReader> result;
291
291
  LogicalType result_type;
292
292
 
293
293
  bool is_repeated = repetition_type == FieldRepetitionType::REPEATED;
@@ -429,7 +429,7 @@ ParquetOptions::ParquetOptions(ClientContext &context) {
429
429
 
430
430
  ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, ParquetOptions parquet_options_p)
431
431
  : fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)),
432
- parquet_options(parquet_options_p) {
432
+ parquet_options(std::move(parquet_options_p)) {
433
433
  file_name = std::move(file_name_p);
434
434
  file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ);
435
435
  if (!file_handle->CanSeek()) {
@@ -457,7 +457,7 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
457
457
  ParquetReader::ParquetReader(ClientContext &context_p, ParquetOptions parquet_options_p,
458
458
  shared_ptr<ParquetFileMetadataCache> metadata_p)
459
459
  : fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)),
460
- metadata(std::move(metadata_p)), parquet_options(parquet_options_p) {
460
+ metadata(std::move(metadata_p)), parquet_options(std::move(parquet_options_p)) {
461
461
  InitializeSchema();
462
462
  }
463
463
 
@@ -1,12 +1,13 @@
1
1
  #include "parquet_statistics.hpp"
2
+
3
+ #include "duckdb.hpp"
2
4
  #include "parquet_decimal_utils.hpp"
3
5
  #include "parquet_timestamp.hpp"
4
6
  #include "string_column_reader.hpp"
5
- #include "duckdb.hpp"
6
7
  #ifndef DUCKDB_AMALGAMATION
7
8
  #include "duckdb/common/types/blob.hpp"
8
- #include "duckdb/common/types/value.hpp"
9
9
  #include "duckdb/common/types/time.hpp"
10
+ #include "duckdb/common/types/value.hpp"
10
11
  #endif
11
12
 
12
13
  namespace duckdb {
@@ -14,9 +15,9 @@ namespace duckdb {
14
15
  using duckdb_parquet::format::ConvertedType;
15
16
  using duckdb_parquet::format::Type;
16
17
 
17
- static duckdb::unique_ptr<BaseStatistics> CreateNumericStats(const LogicalType &type,
18
- const duckdb_parquet::format::SchemaElement &schema_ele,
19
- const duckdb_parquet::format::Statistics &parquet_stats) {
18
+ static unique_ptr<BaseStatistics> CreateNumericStats(const LogicalType &type,
19
+ const duckdb_parquet::format::SchemaElement &schema_ele,
20
+ const duckdb_parquet::format::Statistics &parquet_stats) {
20
21
  auto stats = NumericStats::CreateUnknown(type);
21
22
 
22
23
  // for reasons unknown to science, Parquet defines *both* `min` and `min_value` as well as `max` and
@@ -226,7 +227,7 @@ unique_ptr<BaseStatistics> ParquetStatisticsUtils::TransformColumnStatistics(con
226
227
  return nullptr;
227
228
  }
228
229
  auto &parquet_stats = column_chunk.meta_data.statistics;
229
- duckdb::unique_ptr<BaseStatistics> row_group_stats;
230
+ unique_ptr<BaseStatistics> row_group_stats;
230
231
 
231
232
  switch (type.id()) {
232
233
  case LogicalTypeId::UTINYINT: