duckdb 0.8.2-dev150.0 → 0.8.2-dev1549.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (489) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  24. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  26. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  27. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  28. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  29. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  30. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  31. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  32. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  33. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  34. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  36. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  38. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  39. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  40. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  41. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  42. package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
  43. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  44. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  45. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  46. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  47. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  49. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  50. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  51. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  52. package/src/duckdb/src/common/allocator.cpp +14 -2
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  54. package/src/duckdb/src/common/assert.cpp +3 -0
  55. package/src/duckdb/src/common/enum_util.cpp +4619 -4446
  56. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  57. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  58. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/exception.cpp +2 -2
  60. package/src/duckdb/src/common/extra_type_info.cpp +506 -0
  61. package/src/duckdb/src/common/file_system.cpp +19 -0
  62. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  63. package/src/duckdb/src/common/local_file_system.cpp +14 -14
  64. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  65. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  66. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  67. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  68. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  69. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  70. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  71. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  72. package/src/duckdb/src/common/types/bit.cpp +51 -0
  73. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  74. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  75. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  76. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  77. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  78. package/src/duckdb/src/common/types/date.cpp +9 -0
  79. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  80. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  81. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  82. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  83. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  84. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  85. package/src/duckdb/src/common/types.cpp +8 -655
  86. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  87. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  88. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  92. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  93. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  94. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  95. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  96. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  97. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  98. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  99. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  100. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  102. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  103. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  104. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  105. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  106. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  107. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  108. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  109. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  110. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  111. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  112. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  113. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  114. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  115. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  116. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  117. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  118. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
  119. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  121. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
  122. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  123. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
  124. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  125. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  126. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  127. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  128. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  129. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  130. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  131. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  132. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  133. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  134. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  135. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  136. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  137. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  138. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  139. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  140. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  141. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +13 -22
  142. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
  143. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  144. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  145. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  146. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  147. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  148. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  149. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  150. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  151. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  152. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  153. package/src/duckdb/src/function/function.cpp +3 -1
  154. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  155. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  156. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  157. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  158. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  159. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  160. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  161. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  162. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  163. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  164. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  165. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  166. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  167. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  168. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  169. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
  170. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  171. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  172. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  173. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  174. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  175. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  176. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
  177. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  178. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  179. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  180. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  181. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  182. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  183. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  184. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  185. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  186. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  187. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  188. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  189. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  190. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  191. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  192. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  193. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  194. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  195. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  196. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  197. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  198. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  199. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  200. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  201. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
  202. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  203. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  204. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  205. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  206. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  207. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  208. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  209. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  210. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  211. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  212. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  213. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  214. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  215. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  218. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  219. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  220. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  221. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  222. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  223. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  224. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  225. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  226. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  227. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  228. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  229. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  230. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  231. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  232. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  233. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  234. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  235. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  236. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  237. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  238. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  239. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  240. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  241. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  242. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  243. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  244. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  245. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  246. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  247. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  248. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  249. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  250. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  251. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  252. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  253. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  254. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  255. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  256. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  257. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  258. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  259. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  260. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  261. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  262. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  263. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  264. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  265. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  266. package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
  267. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  268. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  269. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  270. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  271. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  272. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  273. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  274. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  275. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  276. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  277. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  278. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  279. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  280. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  281. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  282. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  283. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  284. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  285. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  286. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  287. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  288. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  289. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  291. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  293. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  294. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  295. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  296. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  297. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  298. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  299. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  300. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  301. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  302. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  303. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
  304. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  305. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  306. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  307. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  308. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  309. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  310. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  311. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  312. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  313. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  314. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  315. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  316. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  317. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  318. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  319. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  320. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  321. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  322. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  323. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  324. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  325. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  326. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  327. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  328. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  329. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  330. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  331. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  332. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  333. package/src/duckdb/src/include/duckdb.h +28 -0
  334. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  335. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  336. package/src/duckdb/src/main/config.cpp +4 -0
  337. package/src/duckdb/src/main/database.cpp +1 -1
  338. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  339. package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
  340. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  341. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  342. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  343. package/src/duckdb/src/main/relation.cpp +6 -5
  344. package/src/duckdb/src/main/settings/settings.cpp +79 -18
  345. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  346. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  347. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  348. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  349. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  350. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  351. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  352. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  353. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  354. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  355. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  356. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  357. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  358. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  359. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  360. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  361. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  362. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  363. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  364. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  365. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  366. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  367. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  368. package/src/duckdb/src/parallel/executor.cpp +15 -0
  369. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  370. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  371. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  372. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  373. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  374. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  375. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  376. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  377. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  378. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  379. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  380. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  381. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  382. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  383. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  384. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  385. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  386. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  387. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  388. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  389. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  390. package/src/duckdb/src/parser/parser.cpp +8 -2
  391. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  392. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  393. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  394. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  395. package/src/duckdb/src/parser/query_node.cpp +15 -37
  396. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  397. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  398. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  399. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  400. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  401. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  402. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  403. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  404. package/src/duckdb/src/parser/tableref.cpp +0 -44
  405. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  406. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  407. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  408. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  409. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  410. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  411. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  412. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  413. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  414. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  415. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  416. package/src/duckdb/src/parser/transformer.cpp +15 -0
  417. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  418. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  419. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  420. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  421. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  422. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  423. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  424. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  425. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  426. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  427. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  428. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  429. package/src/duckdb/src/planner/binder.cpp +5 -0
  430. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  431. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  432. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  433. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  434. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  435. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  436. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  437. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  438. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  439. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  440. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  441. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  442. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  443. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  444. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  445. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  446. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  447. package/src/duckdb/src/storage/data_table.cpp +1 -1
  448. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  449. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  450. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  451. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  452. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  453. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  454. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  455. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  456. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  457. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  458. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  459. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  460. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  461. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  462. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  463. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  464. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  465. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  466. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  467. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  468. package/src/duckdb/ub_src_common.cpp +2 -0
  469. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  470. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  471. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  472. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  473. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  474. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  475. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  476. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  477. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  478. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  479. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  480. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  481. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  482. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  483. package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
  484. package/src/statement.cpp +10 -3
  485. package/test/test_all_types.test.ts +233 -0
  486. package/tsconfig.json +1 -0
  487. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  488. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  489. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -15,7 +15,7 @@ public:
15
15
  // some derivatives
16
16
  D_ASSERT(miniblocks_per_block > 0);
17
17
  values_per_miniblock = block_value_count / miniblocks_per_block;
18
- miniblock_bit_widths = duckdb::unique_ptr<uint8_t[]>(new data_t[miniblocks_per_block]);
18
+ miniblock_bit_widths = unique_ptr<uint8_t[]>(new data_t[miniblocks_per_block]);
19
19
 
20
20
  // init state to something sane
21
21
  values_left_in_block = 0;
@@ -96,7 +96,7 @@ public:
96
96
  if (values_left_in_miniblock == 0) {
97
97
  return;
98
98
  }
99
- auto data = duckdb::unique_ptr<uint32_t[]>(new uint32_t[values_left_in_miniblock]);
99
+ auto data = unique_ptr<uint32_t[]>(new uint32_t[values_left_in_miniblock]);
100
100
  GetBatch<uint32_t>(data_ptr_cast(data.get()), values_left_in_miniblock);
101
101
  }
102
102
 
@@ -112,7 +112,7 @@ private:
112
112
  int64_t start_value;
113
113
  idx_t values_per_miniblock;
114
114
 
115
- duckdb::unique_ptr<uint8_t[]> miniblock_bit_widths;
115
+ unique_ptr<uint8_t[]> miniblock_bit_widths;
116
116
  idx_t values_left_in_block;
117
117
  idx_t values_left_in_miniblock;
118
118
  idx_t miniblock_offset;
@@ -35,9 +35,9 @@ public:
35
35
  return res;
36
36
  }
37
37
 
38
- static duckdb::unique_ptr<ColumnReader> CreateReader(ParquetReader &reader, const LogicalType &type_p,
39
- const SchemaElement &schema_p, idx_t file_idx_p,
40
- idx_t max_define, idx_t max_repeat);
38
+ static unique_ptr<ColumnReader> CreateReader(ParquetReader &reader, const LogicalType &type_p,
39
+ const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define,
40
+ idx_t max_repeat);
41
41
  };
42
42
 
43
43
  } // namespace duckdb
@@ -20,14 +20,14 @@ class ParquetFileMetadataCache : public ObjectCacheEntry {
20
20
  public:
21
21
  ParquetFileMetadataCache() : metadata(nullptr) {
22
22
  }
23
- ParquetFileMetadataCache(duckdb::unique_ptr<duckdb_parquet::format::FileMetaData> file_metadata, time_t r_time)
23
+ ParquetFileMetadataCache(unique_ptr<duckdb_parquet::format::FileMetaData> file_metadata, time_t r_time)
24
24
  : metadata(std::move(file_metadata)), read_time(r_time) {
25
25
  }
26
26
 
27
27
  ~ParquetFileMetadataCache() override = default;
28
28
 
29
29
  //! Parquet file metadata
30
- duckdb::unique_ptr<const duckdb_parquet::format::FileMetaData> metadata;
30
+ unique_ptr<const duckdb_parquet::format::FileMetaData> metadata;
31
31
 
32
32
  //! read time
33
33
  time_t read_time;
@@ -15,8 +15,8 @@ struct LogicalType;
15
15
 
16
16
  struct ParquetStatisticsUtils {
17
17
 
18
- static duckdb::unique_ptr<BaseStatistics>
19
- TransformColumnStatistics(const SchemaElement &s_ele, const LogicalType &type, const ColumnChunk &column_chunk);
18
+ static unique_ptr<BaseStatistics> TransformColumnStatistics(const SchemaElement &s_ele, const LogicalType &type,
19
+ const ColumnChunk &column_chunk);
20
20
 
21
21
  static Value ConvertValue(const LogicalType &type, const duckdb_parquet::format::SchemaElement &schema_ele,
22
22
  const std::string &stats);
@@ -37,8 +37,7 @@ public:
37
37
  * @param throwIfNotFound fail if a stream is required and not found
38
38
  * @return the new stream
39
39
  */
40
- virtual duckdb::unique_ptr<SeekableInputStream> getStream(const StreamIdentifier &si, bool throwIfNotFound)
41
- const = 0;
40
+ virtual unique_ptr<SeekableInputStream> getStream(const StreamIdentifier &si, bool throwIfNotFound) const = 0;
42
41
 
43
42
  /**
44
43
  * visit all streams of given node and execute visitor logic
@@ -63,7 +62,7 @@ public:
63
62
  * Get the RowGroupIndex.
64
63
  * @return a vector of RowIndex belonging to the stripe
65
64
  */
66
- virtual duckdb::unique_ptr<proto::RowIndex> getRowGroupIndex(const StreamIdentifier &si) const = 0;
65
+ virtual unique_ptr<proto::RowIndex> getRowGroupIndex(const StreamIdentifier &si) const = 0;
67
66
 
68
67
  /**
69
68
  * Get stride index provider which is used by string dictionary reader to
@@ -84,8 +83,7 @@ public:
84
83
  * @param throwIfNotFound fail if a stream is required and not found
85
84
  * @return the new stream
86
85
  */
87
- virtual duckdb::unique_ptr<SeekableInputStream> getStream(const StreamIdentifier &si,
88
- bool throwIfNotFound) const = 0;
86
+ virtual unique_ptr<SeekableInputStream> getStream(const StreamIdentifier &si, bool throwIfNotFound) const = 0;
89
87
 
90
88
  /**
91
89
  * visit all streams of given node and execute visitor logic
@@ -110,7 +108,7 @@ public:
110
108
  * Get the RowGroupIndex.
111
109
  * @return a vector of RowIndex belonging to the stripe
112
110
  */
113
- virtual duckdb::unique_ptr<proto::RowIndex> getRowGroupIndex(const StreamIdentifier &si) const = 0;
111
+ virtual unique_ptr<proto::RowIndex> getRowGroupIndex(const StreamIdentifier &si) const = 0;
114
112
 
115
113
  /**
116
114
  * Get stride index provider which is used by string dictionary reader to
@@ -157,10 +155,10 @@ public:
157
155
  }
158
156
 
159
157
  // Creates a reader for the given stripe.
160
- static duckdb::unique_ptr<SelectiveColumnReader>
161
- build(const std::shared_ptr<const dwio::common::TypeWithId> &requestedType,
162
- const std::shared_ptr<const dwio::common::TypeWithId> &dataType, StripeStreams &stripe,
163
- common::ScanSpec *scanSpec, uint32_t sequence = 0);
158
+ static unique_ptr<SelectiveColumnReader> build(const std::shared_ptr<const dwio::common::TypeWithId> &requestedType,
159
+ const std::shared_ptr<const dwio::common::TypeWithId> &dataType,
160
+ StripeStreams &stripe, common::ScanSpec *scanSpec,
161
+ uint32_t sequence = 0);
164
162
 
165
163
  // Seeks to offset and reads the rows in 'rows' and applies
166
164
  // filters and value processing as given by 'scanSpec supplied at
@@ -336,7 +334,7 @@ public:
336
334
  return kind_;
337
335
  }
338
336
 
339
- virtual duckdb::unique_ptr<Filter> clone() const = 0;
337
+ virtual unique_ptr<Filter> clone() const = 0;
340
338
 
341
339
  /**
342
340
  * A filter becomes non-deterministic when applies to nested column,
@@ -17,8 +17,8 @@
17
17
  #include "duckdb/common/types/column/column_data_collection.hpp"
18
18
  #endif
19
19
 
20
- #include "parquet_types.h"
21
20
  #include "column_writer.hpp"
21
+ #include "parquet_types.h"
22
22
  #include "thrift/protocol/TCompactProtocol.h"
23
23
 
24
24
  namespace duckdb {
@@ -27,13 +27,31 @@ class FileOpener;
27
27
 
28
28
  struct PreparedRowGroup {
29
29
  duckdb_parquet::format::RowGroup row_group;
30
- vector<duckdb::unique_ptr<ColumnWriterState>> states;
30
+ vector<unique_ptr<ColumnWriterState>> states;
31
+ vector<shared_ptr<StringHeap>> heaps;
32
+ };
33
+
34
+ struct FieldID;
35
+ struct ChildFieldIDs {
36
+ ChildFieldIDs();
37
+ ChildFieldIDs Copy() const;
38
+ unique_ptr<case_insensitive_map_t<FieldID>> ids;
39
+ };
40
+
41
+ struct FieldID {
42
+ static constexpr const auto DUCKDB_FIELD_ID = "__duckdb_field_id";
43
+ FieldID();
44
+ explicit FieldID(int32_t field_id);
45
+ FieldID Copy() const;
46
+ bool set;
47
+ int32_t field_id;
48
+ ChildFieldIDs child_field_ids;
31
49
  };
32
50
 
33
51
  class ParquetWriter {
34
52
  public:
35
53
  ParquetWriter(FileSystem &fs, string file_name, vector<LogicalType> types, vector<string> names,
36
- duckdb_parquet::format::CompressionCodec::type codec);
54
+ duckdb_parquet::format::CompressionCodec::type codec, ChildFieldIDs field_ids);
37
55
 
38
56
  public:
39
57
  void PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result);
@@ -62,13 +80,14 @@ private:
62
80
  vector<LogicalType> sql_types;
63
81
  vector<string> column_names;
64
82
  duckdb_parquet::format::CompressionCodec::type codec;
83
+ ChildFieldIDs field_ids;
65
84
 
66
- duckdb::unique_ptr<BufferedFileWriter> writer;
85
+ unique_ptr<BufferedFileWriter> writer;
67
86
  shared_ptr<duckdb_apache::thrift::protocol::TProtocol> protocol;
68
87
  duckdb_parquet::format::FileMetaData file_meta_data;
69
88
  std::mutex lock;
70
89
 
71
- vector<duckdb::unique_ptr<ColumnWriter>> column_writers;
90
+ vector<unique_ptr<ColumnWriter>> column_writers;
72
91
  };
73
92
 
74
93
  } // namespace duckdb
@@ -28,7 +28,7 @@ public:
28
28
  StringColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p,
29
29
  idx_t max_define_p, idx_t max_repeat_p);
30
30
 
31
- duckdb::unique_ptr<string_t[]> dict_strings;
31
+ unique_ptr<string_t[]> dict_strings;
32
32
  idx_t fixed_width_string_length;
33
33
  idx_t delta_offset = 0;
34
34
 
@@ -19,10 +19,9 @@ public:
19
19
 
20
20
  public:
21
21
  StructColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p,
22
- idx_t max_define_p, idx_t max_repeat_p,
23
- vector<duckdb::unique_ptr<ColumnReader>> child_readers_p);
22
+ idx_t max_define_p, idx_t max_repeat_p, vector<unique_ptr<ColumnReader>> child_readers_p);
24
23
 
25
- vector<duckdb::unique_ptr<ColumnReader>> child_readers;
24
+ vector<unique_ptr<ColumnReader>> child_readers;
26
25
 
27
26
  public:
28
27
  ColumnReader *GetChildReader(idx_t child_idx);
@@ -17,13 +17,13 @@ namespace duckdb {
17
17
 
18
18
  class ZStdFileSystem : public CompressedFileSystem {
19
19
  public:
20
- duckdb::unique_ptr<FileHandle> OpenCompressedFile(duckdb::unique_ptr<FileHandle> handle, bool write) override;
20
+ unique_ptr<FileHandle> OpenCompressedFile(unique_ptr<FileHandle> handle, bool write) override;
21
21
 
22
22
  std::string GetName() const override {
23
23
  return "ZStdFileSystem";
24
24
  }
25
25
 
26
- duckdb::unique_ptr<StreamWrapper> CreateStream() override;
26
+ unique_ptr<StreamWrapper> CreateStream() override;
27
27
  idx_t InBufferSize() override;
28
28
  idx_t OutBufferSize() override;
29
29
  };
@@ -1,7 +1,8 @@
1
1
  #define DUCKDB_EXTENSION_MAIN
2
2
 
3
- #include "duckdb.hpp"
4
3
  #include "parquet_extension.hpp"
4
+
5
+ #include "duckdb.hpp"
5
6
  #include "parquet_metadata.hpp"
6
7
  #include "parquet_reader.hpp"
7
8
  #include "parquet_writer.hpp"
@@ -14,15 +15,18 @@
14
15
  #include <vector>
15
16
  #ifndef DUCKDB_AMALGAMATION
16
17
  #include "duckdb/catalog/catalog.hpp"
18
+ #include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
17
19
  #include "duckdb/common/constants.hpp"
18
20
  #include "duckdb/common/enums/file_compression_type.hpp"
19
21
  #include "duckdb/common/field_writer.hpp"
20
22
  #include "duckdb/common/file_system.hpp"
23
+ #include "duckdb/common/multi_file_reader.hpp"
21
24
  #include "duckdb/common/types/chunk_collection.hpp"
22
25
  #include "duckdb/function/copy_function.hpp"
23
26
  #include "duckdb/function/table_function.hpp"
24
27
  #include "duckdb/main/client_context.hpp"
25
28
  #include "duckdb/main/config.hpp"
29
+ #include "duckdb/main/extension_util.hpp"
26
30
  #include "duckdb/parser/expression/constant_expression.hpp"
27
31
  #include "duckdb/parser/expression/function_expression.hpp"
28
32
  #include "duckdb/parser/parsed_data/create_copy_function_info.hpp"
@@ -30,10 +34,7 @@
30
34
  #include "duckdb/parser/tableref/table_function_ref.hpp"
31
35
  #include "duckdb/planner/operator/logical_get.hpp"
32
36
  #include "duckdb/storage/statistics/base_statistics.hpp"
33
- #include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
34
- #include "duckdb/common/multi_file_reader.hpp"
35
37
  #include "duckdb/storage/table/row_group.hpp"
36
- #include "duckdb/main/extension_util.hpp"
37
38
  #endif
38
39
 
39
40
  namespace duckdb {
@@ -115,6 +116,7 @@ struct ParquetWriteBindData : public TableFunctionData {
115
116
  vector<string> column_names;
116
117
  duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
117
118
  idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
119
+ ChildFieldIDs field_ids;
118
120
  };
119
121
 
120
122
  struct ParquetWriteGlobalState : public GlobalFunctionData {
@@ -123,10 +125,12 @@ struct ParquetWriteGlobalState : public GlobalFunctionData {
123
125
 
124
126
  struct ParquetWriteLocalState : public LocalFunctionData {
125
127
  explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types)
126
- : buffer(Allocator::Get(context), types) {
128
+ : buffer(context, types, ColumnDataAllocatorType::HYBRID) {
129
+ buffer.InitializeAppend(append_state);
127
130
  }
128
131
 
129
132
  ColumnDataCollection buffer;
133
+ ColumnDataAppendState append_state;
130
134
  };
131
135
 
132
136
  void ParquetOptions::Serialize(FieldWriter &writer) const {
@@ -171,7 +175,6 @@ public:
171
175
  table_function.serialize = ParquetScanSerialize;
172
176
  table_function.deserialize = ParquetScanDeserialize;
173
177
  table_function.get_batch_info = ParquetGetBatchInfo;
174
-
175
178
  table_function.projection_pushdown = true;
176
179
  table_function.filter_pushdown = true;
177
180
  table_function.filter_prune = true;
@@ -293,7 +296,7 @@ public:
293
296
  ParquetOptions parquet_options(context);
294
297
  for (auto &kv : input.named_parameters) {
295
298
  auto loption = StringUtil::Lower(kv.first);
296
- if (MultiFileReader::ParseOption(kv.first, kv.second, parquet_options.file_options)) {
299
+ if (MultiFileReader::ParseOption(kv.first, kv.second, parquet_options.file_options, context)) {
297
300
  continue;
298
301
  }
299
302
  if (loption == "binary_as_string") {
@@ -302,9 +305,7 @@ public:
302
305
  parquet_options.file_row_number = BooleanValue::Get(kv.second);
303
306
  }
304
307
  }
305
- if (parquet_options.file_options.auto_detect_hive_partitioning) {
306
- parquet_options.file_options.hive_partitioning = MultiFileReaderOptions::AutoDetectHivePartitioning(files);
307
- }
308
+ parquet_options.file_options.AutoDetectHivePartitioning(files, context);
308
309
  return ParquetScanBindInternal(context, std::move(files), return_types, names, parquet_options);
309
310
  }
310
311
 
@@ -371,7 +372,7 @@ public:
371
372
  }
372
373
  MultiFileReader::InitializeReader(*reader, bind_data.parquet_options.file_options, bind_data.reader_bind,
373
374
  bind_data.types, bind_data.names, input.column_ids, input.filters,
374
- bind_data.files[0]);
375
+ bind_data.files[0], context);
375
376
  }
376
377
 
377
378
  result->column_ids = input.column_ids;
@@ -518,6 +519,7 @@ public:
518
519
  static void ParquetComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p,
519
520
  vector<unique_ptr<Expression>> &filters) {
520
521
  auto &data = bind_data_p->Cast<ParquetReadBindData>();
522
+
521
523
  auto reset_reader = MultiFileReader::ComplexFilterPushdown(context, data.files,
522
524
  data.parquet_options.file_options, get, filters);
523
525
  if (reset_reader) {
@@ -564,9 +566,10 @@ public:
564
566
  shared_ptr<ParquetReader> reader;
565
567
  try {
566
568
  reader = make_shared<ParquetReader>(context, file, pq_options);
567
- MultiFileReader::InitializeReader(
568
- *reader, bind_data.parquet_options.file_options, bind_data.reader_bind, bind_data.types,
569
- bind_data.names, parallel_state.column_ids, parallel_state.filters, bind_data.files.front());
569
+ MultiFileReader::InitializeReader(*reader, bind_data.parquet_options.file_options,
570
+ bind_data.reader_bind, bind_data.types, bind_data.names,
571
+ parallel_state.column_ids, parallel_state.filters,
572
+ bind_data.files.front(), context);
570
573
  } catch (...) {
571
574
  parallel_lock.lock();
572
575
  parallel_state.error_opening_file = true;
@@ -585,8 +588,157 @@ public:
585
588
  }
586
589
  };
587
590
 
591
+ static case_insensitive_map_t<LogicalType> GetChildNameToTypeMap(const LogicalType &type) {
592
+ case_insensitive_map_t<LogicalType> name_to_type_map;
593
+ switch (type.id()) {
594
+ case LogicalTypeId::LIST:
595
+ name_to_type_map.emplace("element", ListType::GetChildType(type));
596
+ break;
597
+ case LogicalTypeId::MAP:
598
+ name_to_type_map.emplace("key", MapType::KeyType(type));
599
+ name_to_type_map.emplace("value", MapType::ValueType(type));
600
+ break;
601
+ case LogicalTypeId::STRUCT:
602
+ for (auto &child_type : StructType::GetChildTypes(type)) {
603
+ if (child_type.first == FieldID::DUCKDB_FIELD_ID) {
604
+ throw BinderException("Cannot have column named \"%s\" with FIELD_IDS", FieldID::DUCKDB_FIELD_ID);
605
+ }
606
+ name_to_type_map.emplace(child_type);
607
+ }
608
+ break;
609
+ default: // LCOV_EXCL_START
610
+ throw InternalException("Unexpected type in GetChildNameToTypeMap");
611
+ } // LCOV_EXCL_STOP
612
+ return name_to_type_map;
613
+ }
614
+
615
+ static void GetChildNamesAndTypes(const LogicalType &type, vector<string> &child_names,
616
+ vector<LogicalType> &child_types) {
617
+ switch (type.id()) {
618
+ case LogicalTypeId::LIST:
619
+ child_names.emplace_back("element");
620
+ child_types.emplace_back(ListType::GetChildType(type));
621
+ break;
622
+ case LogicalTypeId::MAP:
623
+ child_names.emplace_back("key");
624
+ child_names.emplace_back("value");
625
+ child_types.emplace_back(MapType::KeyType(type));
626
+ child_types.emplace_back(MapType::ValueType(type));
627
+ break;
628
+ case LogicalTypeId::STRUCT:
629
+ for (auto &child_type : StructType::GetChildTypes(type)) {
630
+ child_names.emplace_back(child_type.first);
631
+ child_types.emplace_back(child_type.second);
632
+ }
633
+ break;
634
+ default: // LCOV_EXCL_START
635
+ throw InternalException("Unexpected type in GetChildNamesAndTypes");
636
+ } // LCOV_EXCL_STOP
637
+ }
638
+
639
+ static void GenerateFieldIDs(ChildFieldIDs &field_ids, idx_t &field_id, const vector<string> &names,
640
+ const vector<LogicalType> &sql_types) {
641
+ D_ASSERT(names.size() == sql_types.size());
642
+ for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) {
643
+ const auto &col_name = names[col_idx];
644
+ auto inserted = field_ids.ids->insert(make_pair(col_name, FieldID(field_id++)));
645
+ D_ASSERT(inserted.second);
646
+
647
+ const auto &col_type = sql_types[col_idx];
648
+ if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP &&
649
+ col_type.id() != LogicalTypeId::STRUCT) {
650
+ continue;
651
+ }
652
+
653
+ // Cannot use GetChildNameToTypeMap here because we lose order, and we want to generate depth-first
654
+ vector<string> child_names;
655
+ vector<LogicalType> child_types;
656
+ GetChildNamesAndTypes(col_type, child_names, child_types);
657
+
658
+ GenerateFieldIDs(inserted.first->second.child_field_ids, field_id, child_names, child_types);
659
+ }
660
+ }
661
+
662
+ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
663
+ unordered_set<uint32_t> &unique_field_ids,
664
+ const case_insensitive_map_t<LogicalType> &name_to_type_map) {
665
+ const auto &struct_type = field_ids_value.type();
666
+ if (struct_type.id() != LogicalTypeId::STRUCT) {
667
+ throw BinderException(
668
+ "Expected FIELD_IDS to be a STRUCT, e.g., {col1: 42, col2: {%s: 43, nested_col: 44}, col3: 44}",
669
+ FieldID::DUCKDB_FIELD_ID);
670
+ }
671
+ const auto &struct_children = StructValue::GetChildren(field_ids_value);
672
+ D_ASSERT(StructType::GetChildTypes(struct_type).size() == struct_children.size());
673
+ for (idx_t i = 0; i < struct_children.size(); i++) {
674
+ const auto &col_name = StringUtil::Lower(StructType::GetChildName(struct_type, i));
675
+ if (col_name == FieldID::DUCKDB_FIELD_ID) {
676
+ continue;
677
+ }
678
+
679
+ auto it = name_to_type_map.find(col_name);
680
+ if (it == name_to_type_map.end()) {
681
+ string names;
682
+ for (const auto &name : name_to_type_map) {
683
+ if (!names.empty()) {
684
+ names += ", ";
685
+ }
686
+ names += name.first;
687
+ }
688
+ throw BinderException("Column name \"%s\" specified in FIELD_IDS not found. Available column names: [%s]",
689
+ col_name, names);
690
+ }
691
+ D_ASSERT(field_ids.ids->find(col_name) == field_ids.ids->end()); // Caught by STRUCT - deduplicates keys
692
+
693
+ const auto &child_value = struct_children[i];
694
+ const auto &child_type = child_value.type();
695
+ optional_ptr<const Value> field_id_value;
696
+ optional_ptr<const Value> child_field_ids_value;
697
+
698
+ if (child_type.id() == LogicalTypeId::STRUCT) {
699
+ const auto &nested_children = StructValue::GetChildren(child_value);
700
+ D_ASSERT(StructType::GetChildTypes(child_type).size() == nested_children.size());
701
+ for (idx_t nested_i = 0; nested_i < nested_children.size(); nested_i++) {
702
+ const auto &field_id_or_nested_col = StructType::GetChildName(child_type, nested_i);
703
+ if (field_id_or_nested_col == FieldID::DUCKDB_FIELD_ID) {
704
+ field_id_value = &nested_children[nested_i];
705
+ } else {
706
+ child_field_ids_value = &child_value;
707
+ }
708
+ }
709
+ } else {
710
+ field_id_value = &child_value;
711
+ }
712
+
713
+ FieldID field_id;
714
+ if (field_id_value) {
715
+ Value field_id_integer_value = field_id_value->DefaultCastAs(LogicalType::INTEGER);
716
+ const uint32_t field_id_int = IntegerValue::Get(field_id_integer_value);
717
+ if (!unique_field_ids.insert(field_id_int).second) {
718
+ throw BinderException("Duplicate field_id %s found in FIELD_IDS", field_id_integer_value.ToString());
719
+ }
720
+ field_id = FieldID(field_id_int);
721
+ }
722
+ auto inserted = field_ids.ids->insert(make_pair(col_name, std::move(field_id)));
723
+ D_ASSERT(inserted.second);
724
+
725
+ if (child_field_ids_value) {
726
+ const auto &col_type = it->second;
727
+ if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP &&
728
+ col_type.id() != LogicalTypeId::STRUCT) {
729
+ throw BinderException("Column \"%s\" with type \"%s\" cannot have a nested FIELD_IDS specification",
730
+ col_name, LogicalTypeIdToString(col_type.id()));
731
+ }
732
+
733
+ GetFieldIDs(*child_field_ids_value, inserted.first->second.child_field_ids, unique_field_ids,
734
+ GetChildNameToTypeMap(col_type));
735
+ }
736
+ }
737
+ }
738
+
588
739
  unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
589
740
  vector<LogicalType> &sql_types) {
741
+ D_ASSERT(names.size() == sql_types.size());
590
742
  auto bind_data = make_uniq<ParquetWriteBindData>();
591
743
  for (auto &option : info.options) {
592
744
  auto loption = StringUtil::Lower(option.first);
@@ -609,7 +761,27 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info
609
761
  continue;
610
762
  }
611
763
  }
612
- throw ParserException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption);
764
+ throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption);
765
+ } else if (loption == "field_ids") {
766
+ if (option.second.size() != 1) {
767
+ throw BinderException("FIELD_IDS requires exactly one argument");
768
+ }
769
+ if (option.second[0].type().id() == LogicalTypeId::VARCHAR &&
770
+ StringUtil::Lower(StringValue::Get(option.second[0])) == "auto") {
771
+ idx_t field_id = 0;
772
+ GenerateFieldIDs(bind_data->field_ids, field_id, names, sql_types);
773
+ } else {
774
+ unordered_set<uint32_t> unique_field_ids;
775
+ case_insensitive_map_t<LogicalType> name_to_type_map;
776
+ for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) {
777
+ if (names[col_idx] == FieldID::DUCKDB_FIELD_ID) {
778
+ throw BinderException("Cannot have a column named \"%s\" when writing FIELD_IDS",
779
+ FieldID::DUCKDB_FIELD_ID);
780
+ }
781
+ name_to_type_map.emplace(names[col_idx], sql_types[col_idx]);
782
+ }
783
+ GetFieldIDs(option.second[0], bind_data->field_ids, unique_field_ids, name_to_type_map);
784
+ }
613
785
  } else {
614
786
  throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
615
787
  }
@@ -625,8 +797,8 @@ unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &conte
625
797
  auto &parquet_bind = bind_data.Cast<ParquetWriteBindData>();
626
798
 
627
799
  auto &fs = FileSystem::GetFileSystem(context);
628
- global_state->writer =
629
- make_uniq<ParquetWriter>(fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec);
800
+ global_state->writer = make_uniq<ParquetWriter>(fs, file_path, parquet_bind.sql_types, parquet_bind.column_names,
801
+ parquet_bind.codec, parquet_bind.field_ids.Copy());
630
802
  return std::move(global_state);
631
803
  }
632
804
 
@@ -637,12 +809,12 @@ void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, Glob
637
809
  auto &local_state = lstate.Cast<ParquetWriteLocalState>();
638
810
 
639
811
  // append data to the local (buffered) chunk collection
640
- local_state.buffer.Append(input);
812
+ local_state.buffer.Append(local_state.append_state, input);
641
813
  if (local_state.buffer.Count() > bind_data.row_group_size) {
642
814
  // if the chunk collection exceeds a certain size we flush it to the parquet file
815
+ local_state.append_state.current_chunk_state.handles.clear();
643
816
  global_state.writer->Flush(local_state.buffer);
644
- // and reset the buffer
645
- local_state.buffer.Reset();
817
+ local_state.buffer.InitializeAppend(local_state.append_state);
646
818
  }
647
819
  }
648
820
 
@@ -48,7 +48,7 @@ using duckdb_parquet::format::SchemaElement;
48
48
  using duckdb_parquet::format::Statistics;
49
49
  using duckdb_parquet::format::Type;
50
50
 
51
- static duckdb::unique_ptr<duckdb_apache::thrift::protocol::TProtocol>
51
+ static unique_ptr<duckdb_apache::thrift::protocol::TProtocol>
52
52
  CreateThriftProtocol(Allocator &allocator, FileHandle &file_handle, bool prefetch_mode) {
53
53
  auto transport = make_shared<ThriftFileTransport>(allocator, file_handle, prefetch_mode);
54
54
  return make_uniq<duckdb_apache::thrift::protocol::TCompactProtocolT<ThriftFileTransport>>(std::move(transport));
@@ -76,7 +76,7 @@ static shared_ptr<ParquetFileMetadataCache> LoadMetadata(Allocator &allocator, F
76
76
  }
77
77
  // read four-byte footer length from just before the end magic bytes
78
78
  auto footer_len = *reinterpret_cast<uint32_t *>(buf.ptr);
79
- if (footer_len <= 0 || file_size < 12 + footer_len) {
79
+ if (footer_len == 0 || file_size < 12 + footer_len) {
80
80
  throw InvalidInputException("Footer length error in file '%s'", file_handle.path);
81
81
  }
82
82
  auto metadata_pos = file_size - (footer_len + 8);
@@ -271,7 +271,7 @@ unique_ptr<ColumnReader> ParquetReader::CreateReaderRecursive(idx_t depth, idx_t
271
271
  }
272
272
  if (s_ele.__isset.num_children && s_ele.num_children > 0) { // inner node
273
273
  child_list_t<LogicalType> child_types;
274
- vector<duckdb::unique_ptr<ColumnReader>> child_readers;
274
+ vector<unique_ptr<ColumnReader>> child_readers;
275
275
 
276
276
  idx_t c_idx = 0;
277
277
  while (c_idx < (idx_t)s_ele.num_children) {
@@ -287,7 +287,7 @@ unique_ptr<ColumnReader> ParquetReader::CreateReaderRecursive(idx_t depth, idx_t
287
287
  c_idx++;
288
288
  }
289
289
  D_ASSERT(!child_types.empty());
290
- duckdb::unique_ptr<ColumnReader> result;
290
+ unique_ptr<ColumnReader> result;
291
291
  LogicalType result_type;
292
292
 
293
293
  bool is_repeated = repetition_type == FieldRepetitionType::REPEATED;
@@ -429,7 +429,7 @@ ParquetOptions::ParquetOptions(ClientContext &context) {
429
429
 
430
430
  ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, ParquetOptions parquet_options_p)
431
431
  : fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)),
432
- parquet_options(parquet_options_p) {
432
+ parquet_options(std::move(parquet_options_p)) {
433
433
  file_name = std::move(file_name_p);
434
434
  file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ);
435
435
  if (!file_handle->CanSeek()) {
@@ -457,7 +457,7 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
457
457
  ParquetReader::ParquetReader(ClientContext &context_p, ParquetOptions parquet_options_p,
458
458
  shared_ptr<ParquetFileMetadataCache> metadata_p)
459
459
  : fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)),
460
- metadata(std::move(metadata_p)), parquet_options(parquet_options_p) {
460
+ metadata(std::move(metadata_p)), parquet_options(std::move(parquet_options_p)) {
461
461
  InitializeSchema();
462
462
  }
463
463
 
@@ -1,12 +1,13 @@
1
1
  #include "parquet_statistics.hpp"
2
+
3
+ #include "duckdb.hpp"
2
4
  #include "parquet_decimal_utils.hpp"
3
5
  #include "parquet_timestamp.hpp"
4
6
  #include "string_column_reader.hpp"
5
- #include "duckdb.hpp"
6
7
  #ifndef DUCKDB_AMALGAMATION
7
8
  #include "duckdb/common/types/blob.hpp"
8
- #include "duckdb/common/types/value.hpp"
9
9
  #include "duckdb/common/types/time.hpp"
10
+ #include "duckdb/common/types/value.hpp"
10
11
  #endif
11
12
 
12
13
  namespace duckdb {
@@ -14,9 +15,9 @@ namespace duckdb {
14
15
  using duckdb_parquet::format::ConvertedType;
15
16
  using duckdb_parquet::format::Type;
16
17
 
17
- static duckdb::unique_ptr<BaseStatistics> CreateNumericStats(const LogicalType &type,
18
- const duckdb_parquet::format::SchemaElement &schema_ele,
19
- const duckdb_parquet::format::Statistics &parquet_stats) {
18
+ static unique_ptr<BaseStatistics> CreateNumericStats(const LogicalType &type,
19
+ const duckdb_parquet::format::SchemaElement &schema_ele,
20
+ const duckdb_parquet::format::Statistics &parquet_stats) {
20
21
  auto stats = NumericStats::CreateUnknown(type);
21
22
 
22
23
  // for reasons unknown to science, Parquet defines *both* `min` and `min_value` as well as `max` and
@@ -226,7 +227,7 @@ unique_ptr<BaseStatistics> ParquetStatisticsUtils::TransformColumnStatistics(con
226
227
  return nullptr;
227
228
  }
228
229
  auto &parquet_stats = column_chunk.meta_data.statistics;
229
- duckdb::unique_ptr<BaseStatistics> row_group_stats;
230
+ unique_ptr<BaseStatistics> row_group_stats;
230
231
 
231
232
  switch (type.id()) {
232
233
  case LogicalTypeId::UTINYINT: