duckdb 0.8.2-dev145.0 → 0.8.2-dev1493.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (476) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  24. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  26. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  27. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  28. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  29. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  30. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  31. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  32. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  33. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  34. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  36. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  38. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  39. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  40. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  41. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  42. package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
  43. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  44. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  45. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  46. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  47. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  49. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  50. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  51. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  52. package/src/duckdb/src/common/allocator.cpp +14 -2
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  54. package/src/duckdb/src/common/assert.cpp +3 -0
  55. package/src/duckdb/src/common/enum_util.cpp +42 -5
  56. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  57. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  58. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/exception.cpp +2 -2
  60. package/src/duckdb/src/common/file_system.cpp +19 -0
  61. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  62. package/src/duckdb/src/common/local_file_system.cpp +2 -2
  63. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  64. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  65. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  66. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  67. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  68. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  69. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  70. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  71. package/src/duckdb/src/common/types/bit.cpp +51 -0
  72. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  73. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  74. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  75. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  76. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  77. package/src/duckdb/src/common/types/date.cpp +9 -0
  78. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  79. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  80. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  81. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  82. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  83. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  84. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  85. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  86. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  87. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  88. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  92. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  93. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  94. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  95. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  96. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  97. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  98. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  99. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  100. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  102. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  103. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  104. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  105. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  106. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  107. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  108. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  109. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  110. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  111. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  112. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  113. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  114. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  115. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  116. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
  117. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  118. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  119. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
  121. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -2
  122. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  123. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  124. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  125. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  126. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  127. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  128. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  129. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  130. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  131. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  132. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  133. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  134. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  135. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  136. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  137. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  138. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  139. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  140. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  141. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  142. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  143. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  144. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  145. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  146. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  147. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  148. package/src/duckdb/src/function/function.cpp +3 -1
  149. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  150. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  151. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  152. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  153. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  154. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  155. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  156. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  157. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  158. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  159. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  160. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  161. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  162. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  163. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  164. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  165. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  166. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  167. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  168. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  169. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  170. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  171. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  172. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  173. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  174. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  175. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  176. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  177. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  178. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  179. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  180. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  181. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  182. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  183. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  184. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  185. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  186. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  187. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  188. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  189. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  190. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  191. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  192. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  193. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  194. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  195. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  196. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  197. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  198. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  199. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  200. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  201. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  202. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  203. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  204. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  205. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  206. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  207. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  208. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  209. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  210. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  211. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  212. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  213. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  214. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  215. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  218. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  219. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  220. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  221. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  222. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  223. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  224. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  225. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  226. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  227. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  228. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  229. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  230. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  231. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  232. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  233. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  234. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  235. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  236. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  237. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  238. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  239. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  240. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  241. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  242. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  243. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  244. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  245. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  246. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  247. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  248. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  249. package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
  250. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  251. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  252. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  253. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  254. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  255. package/src/duckdb/src/include/duckdb/main/settings.hpp +30 -1
  256. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  257. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  258. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  259. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  260. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  261. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  262. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  263. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  264. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  265. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  266. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  267. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  268. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  269. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  270. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  271. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  272. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  273. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  274. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  275. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  276. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  277. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  278. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  279. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  280. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  281. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  282. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  283. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  284. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  285. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  286. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  287. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  288. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  289. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  291. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  292. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
  293. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  294. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  295. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  296. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  297. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  298. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  299. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  300. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  301. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  302. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  303. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  304. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  305. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  306. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  307. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  308. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  309. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  310. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  311. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  312. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  313. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  314. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  315. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  316. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  317. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  318. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  319. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  320. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  321. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  322. package/src/duckdb/src/include/duckdb.h +28 -0
  323. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  324. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  325. package/src/duckdb/src/main/config.cpp +3 -0
  326. package/src/duckdb/src/main/database.cpp +1 -1
  327. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  328. package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
  329. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  330. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  331. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  332. package/src/duckdb/src/main/relation.cpp +6 -5
  333. package/src/duckdb/src/main/settings/settings.cpp +64 -18
  334. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  335. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  336. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  337. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  338. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  339. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  340. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  341. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  342. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  343. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  344. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  345. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  346. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  347. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  348. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  349. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  350. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  351. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  352. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  353. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  354. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  355. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  356. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  357. package/src/duckdb/src/parallel/executor.cpp +15 -0
  358. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  359. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  360. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  361. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  362. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  363. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  364. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  365. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  366. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  367. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  368. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  369. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  370. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  371. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  372. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  373. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  374. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  375. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  376. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  377. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  378. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  379. package/src/duckdb/src/parser/parser.cpp +8 -2
  380. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  381. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  382. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  383. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  384. package/src/duckdb/src/parser/query_node.cpp +15 -37
  385. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  386. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  387. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  388. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  389. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  390. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  391. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  392. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  393. package/src/duckdb/src/parser/tableref.cpp +0 -44
  394. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  395. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  396. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  397. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  398. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  399. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  400. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  401. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  402. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  403. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  404. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  405. package/src/duckdb/src/parser/transformer.cpp +15 -0
  406. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  407. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  408. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  409. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  410. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  411. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  412. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  413. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  414. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  415. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  416. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  417. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  418. package/src/duckdb/src/planner/binder.cpp +5 -0
  419. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  420. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  421. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  422. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  423. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  424. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  425. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  426. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  427. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  428. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  429. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  430. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  431. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  432. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  433. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  434. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  435. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  436. package/src/duckdb/src/storage/data_table.cpp +1 -1
  437. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  438. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  439. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  440. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  441. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  442. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  443. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  444. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  445. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  446. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  447. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  448. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  449. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  450. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  451. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  452. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  453. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  454. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  455. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  456. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  457. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  458. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  459. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  460. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  461. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  462. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  463. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  464. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  465. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  466. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  467. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  468. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  469. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  470. package/src/duckdb/ub_src_storage_serialization.cpp +8 -0
  471. package/src/statement.cpp +10 -3
  472. package/test/test_all_types.test.ts +233 -0
  473. package/tsconfig.json +1 -0
  474. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  475. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  476. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -1,27 +1,25 @@
1
1
  #include "column_reader.hpp"
2
- #include "parquet_timestamp.hpp"
3
- #include "utf8proc_wrapper.hpp"
4
- #include "parquet_reader.hpp"
5
2
 
6
3
  #include "boolean_column_reader.hpp"
7
- #include "cast_column_reader.hpp"
8
- #include "row_number_column_reader.hpp"
9
4
  #include "callback_column_reader.hpp"
10
- #include "parquet_decimal_utils.hpp"
5
+ #include "cast_column_reader.hpp"
6
+ #include "duckdb.hpp"
11
7
  #include "list_column_reader.hpp"
8
+ #include "miniz_wrapper.hpp"
9
+ #include "parquet_decimal_utils.hpp"
10
+ #include "parquet_reader.hpp"
11
+ #include "parquet_timestamp.hpp"
12
+ #include "row_number_column_reader.hpp"
13
+ #include "snappy.h"
12
14
  #include "string_column_reader.hpp"
13
15
  #include "struct_column_reader.hpp"
14
16
  #include "templated_column_reader.hpp"
15
-
16
- #include "snappy.h"
17
- #include "miniz_wrapper.hpp"
17
+ #include "utf8proc_wrapper.hpp"
18
18
  #include "zstd.h"
19
- #include <iostream>
20
19
 
21
- #include "duckdb.hpp"
22
20
  #ifndef DUCKDB_AMALGAMATION
23
- #include "duckdb/common/types/blob.hpp"
24
21
  #include "duckdb/common/types/bit.hpp"
22
+ #include "duckdb/common/types/blob.hpp"
25
23
  #include "duckdb/common/types/chunk_collection.hpp"
26
24
  #endif
27
25
 
@@ -614,7 +612,7 @@ uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len
614
612
 
615
613
  void StringColumnReader::Dictionary(shared_ptr<ResizeableBuffer> data, idx_t num_entries) {
616
614
  dict = std::move(data);
617
- dict_strings = duckdb::unique_ptr<string_t[]>(new string_t[num_entries]);
615
+ dict_strings = unique_ptr<string_t[]>(new string_t[num_entries]);
618
616
  for (idx_t dict_idx = 0; dict_idx < num_entries; dict_idx++) {
619
617
  uint32_t str_len;
620
618
  if (fixed_width_string_length == 0) {
@@ -873,7 +871,7 @@ idx_t ListColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data
873
871
 
874
872
  ListColumnReader::ListColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p,
875
873
  idx_t schema_idx_p, idx_t max_define_p, idx_t max_repeat_p,
876
- duckdb::unique_ptr<ColumnReader> child_column_reader_p)
874
+ unique_ptr<ColumnReader> child_column_reader_p)
877
875
  : ColumnReader(reader, std::move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p),
878
876
  child_column_reader(std::move(child_column_reader_p)),
879
877
  read_cache(reader.allocator, ListType::GetChildType(Type())), read_vector(read_cache), overflow_child_count(0) {
@@ -889,8 +887,8 @@ ListColumnReader::ListColumnReader(ParquetReader &reader, LogicalType type_p, co
889
887
  void ListColumnReader::ApplyPendingSkips(idx_t num_values) {
890
888
  pending_skips -= num_values;
891
889
 
892
- auto define_out = duckdb::unique_ptr<uint8_t[]>(new uint8_t[num_values]);
893
- auto repeat_out = duckdb::unique_ptr<uint8_t[]>(new uint8_t[num_values]);
890
+ auto define_out = unique_ptr<uint8_t[]>(new uint8_t[num_values]);
891
+ auto repeat_out = unique_ptr<uint8_t[]>(new uint8_t[num_values]);
894
892
 
895
893
  idx_t remaining = num_values;
896
894
  idx_t read = 0;
@@ -953,7 +951,7 @@ idx_t RowNumberColumnReader::Read(uint64_t num_values, parquet_filter_t &filter,
953
951
  //===--------------------------------------------------------------------===//
954
952
  // Cast Column Reader
955
953
  //===--------------------------------------------------------------------===//
956
- CastColumnReader::CastColumnReader(duckdb::unique_ptr<ColumnReader> child_reader_p, LogicalType target_type_p)
954
+ CastColumnReader::CastColumnReader(unique_ptr<ColumnReader> child_reader_p, LogicalType target_type_p)
957
955
  : ColumnReader(child_reader_p->Reader(), std::move(target_type_p), child_reader_p->Schema(),
958
956
  child_reader_p->FileIdx(), child_reader_p->MaxDefine(), child_reader_p->MaxRepeat()),
959
957
  child_reader(std::move(child_reader_p)) {
@@ -1005,7 +1003,7 @@ idx_t CastColumnReader::GroupRowsAvailable() {
1005
1003
  //===--------------------------------------------------------------------===//
1006
1004
  StructColumnReader::StructColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p,
1007
1005
  idx_t schema_idx_p, idx_t max_define_p, idx_t max_repeat_p,
1008
- vector<duckdb::unique_ptr<ColumnReader>> child_readers_p)
1006
+ vector<unique_ptr<ColumnReader>> child_readers_p)
1009
1007
  : ColumnReader(reader, std::move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p),
1010
1008
  child_readers(std::move(child_readers_p)) {
1011
1009
  D_ASSERT(type.InternalType() == PhysicalType::STRUCT);
@@ -1155,9 +1153,9 @@ protected:
1155
1153
  };
1156
1154
 
1157
1155
  template <bool FIXED_LENGTH>
1158
- static duckdb::unique_ptr<ColumnReader> CreateDecimalReaderInternal(ParquetReader &reader, const LogicalType &type_p,
1159
- const SchemaElement &schema_p, idx_t file_idx_p,
1160
- idx_t max_define, idx_t max_repeat) {
1156
+ static unique_ptr<ColumnReader> CreateDecimalReaderInternal(ParquetReader &reader, const LogicalType &type_p,
1157
+ const SchemaElement &schema_p, idx_t file_idx_p,
1158
+ idx_t max_define, idx_t max_repeat) {
1161
1159
  switch (type_p.InternalType()) {
1162
1160
  case PhysicalType::INT16:
1163
1161
  return make_uniq<DecimalColumnReader<int16_t, FIXED_LENGTH>>(reader, type_p, schema_p, file_idx_p, max_define,
@@ -184,7 +184,7 @@ ColumnWriterState::~ColumnWriterState() {
184
184
  }
185
185
 
186
186
  void ColumnWriter::CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
187
- duckdb::unique_ptr<data_t[]> &compressed_buf) {
187
+ unique_ptr<data_t[]> &compressed_buf) {
188
188
  switch (writer.GetCodec()) {
189
189
  case CompressionCodec::UNCOMPRESSED:
190
190
  compressed_size = temp_writer.blob.size;
@@ -192,7 +192,7 @@ void ColumnWriter::CompressPage(BufferedSerializer &temp_writer, size_t &compres
192
192
  break;
193
193
  case CompressionCodec::SNAPPY: {
194
194
  compressed_size = duckdb_snappy::MaxCompressedLength(temp_writer.blob.size);
195
- compressed_buf = duckdb::unique_ptr<data_t[]>(new data_t[compressed_size]);
195
+ compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
196
196
  duckdb_snappy::RawCompress(const_char_ptr_cast(temp_writer.blob.data.get()), temp_writer.blob.size,
197
197
  char_ptr_cast(compressed_buf.get()), &compressed_size);
198
198
  compressed_data = compressed_buf.get();
@@ -202,7 +202,7 @@ void ColumnWriter::CompressPage(BufferedSerializer &temp_writer, size_t &compres
202
202
  case CompressionCodec::GZIP: {
203
203
  MiniZStream s;
204
204
  compressed_size = s.MaxCompressedLength(temp_writer.blob.size);
205
- compressed_buf = duckdb::unique_ptr<data_t[]>(new data_t[compressed_size]);
205
+ compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
206
206
  s.Compress(const_char_ptr_cast(temp_writer.blob.data.get()), temp_writer.blob.size,
207
207
  char_ptr_cast(compressed_buf.get()), &compressed_size);
208
208
  compressed_data = compressed_buf.get();
@@ -210,7 +210,7 @@ void ColumnWriter::CompressPage(BufferedSerializer &temp_writer, size_t &compres
210
210
  }
211
211
  case CompressionCodec::ZSTD: {
212
212
  compressed_size = duckdb_zstd::ZSTD_compressBound(temp_writer.blob.size);
213
- compressed_buf = duckdb::unique_ptr<data_t[]>(new data_t[compressed_size]);
213
+ compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
214
214
  compressed_size = duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size,
215
215
  (const void *)temp_writer.blob.data.get(), temp_writer.blob.size,
216
216
  ZSTD_CLEVEL_DEFAULT);
@@ -303,14 +303,14 @@ struct PageInformation {
303
303
 
304
304
  struct PageWriteInformation {
305
305
  PageHeader page_header;
306
- duckdb::unique_ptr<BufferedSerializer> temp_writer;
307
- duckdb::unique_ptr<ColumnWriterPageState> page_state;
306
+ unique_ptr<BufferedSerializer> temp_writer;
307
+ unique_ptr<ColumnWriterPageState> page_state;
308
308
  idx_t write_page_idx = 0;
309
309
  idx_t write_count = 0;
310
310
  idx_t max_write_count = 0;
311
311
  size_t compressed_size;
312
312
  data_ptr_t compressed_data;
313
- duckdb::unique_ptr<data_t[]> compressed_buf;
313
+ unique_ptr<data_t[]> compressed_buf;
314
314
  };
315
315
 
316
316
  class BasicColumnWriterState : public ColumnWriterState {
@@ -325,7 +325,7 @@ public:
325
325
  idx_t col_idx;
326
326
  vector<PageInformation> page_info;
327
327
  vector<PageWriteInformation> write_info;
328
- duckdb::unique_ptr<ColumnWriterStatistics> stats_state;
328
+ unique_ptr<ColumnWriterStatistics> stats_state;
329
329
  idx_t current_page = 0;
330
330
  };
331
331
 
@@ -355,8 +355,7 @@ public:
355
355
  static constexpr const idx_t STRING_LENGTH_SIZE = sizeof(uint32_t);
356
356
 
357
357
  public:
358
- duckdb::unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group,
359
- Allocator &allocator) override;
358
+ unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override;
360
359
  void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
361
360
  void BeginWrite(ColumnWriterState &state) override;
362
361
  void Write(ColumnWriterState &state, Vector &vector, idx_t count) override;
@@ -372,10 +371,10 @@ protected:
372
371
  void FlushPage(BasicColumnWriterState &state);
373
372
 
374
373
  //! Initializes the state used to track statistics during writing. Only used for scalar types.
375
- virtual duckdb::unique_ptr<ColumnWriterStatistics> InitializeStatsState();
374
+ virtual unique_ptr<ColumnWriterStatistics> InitializeStatsState();
376
375
 
377
376
  //! Initialize the writer for a specific page. Only used for scalar types.
378
- virtual duckdb::unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state);
377
+ virtual unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state);
379
378
 
380
379
  //! Flushes the writer for a specific page. Only used for scalar types.
381
380
  virtual void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state);
@@ -391,16 +390,14 @@ protected:
391
390
  }
392
391
  //! The number of elements in the dictionary
393
392
  virtual idx_t DictionarySize(BasicColumnWriterState &state_p);
394
- void WriteDictionary(BasicColumnWriterState &state, duckdb::unique_ptr<BufferedSerializer> temp_writer,
395
- idx_t row_count);
393
+ void WriteDictionary(BasicColumnWriterState &state, unique_ptr<BufferedSerializer> temp_writer, idx_t row_count);
396
394
  virtual void FlushDictionary(BasicColumnWriterState &state, ColumnWriterStatistics *stats);
397
395
 
398
396
  void SetParquetStatistics(BasicColumnWriterState &state, duckdb_parquet::format::ColumnChunk &column);
399
397
  void RegisterToRowGroup(duckdb_parquet::format::RowGroup &row_group);
400
398
  };
401
399
 
402
- unique_ptr<ColumnWriterState> BasicColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group,
403
- Allocator &allocator) {
400
+ unique_ptr<ColumnWriterState> BasicColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) {
404
401
  auto result = make_uniq<BasicColumnWriterState>(row_group, row_group.columns.size());
405
402
  RegisterToRowGroup(row_group);
406
403
  return std::move(result);
@@ -698,8 +695,8 @@ idx_t BasicColumnWriter::DictionarySize(BasicColumnWriterState &state) {
698
695
  throw InternalException("This page does not have a dictionary");
699
696
  }
700
697
 
701
- void BasicColumnWriter::WriteDictionary(BasicColumnWriterState &state,
702
- duckdb::unique_ptr<BufferedSerializer> temp_writer, idx_t row_count) {
698
+ void BasicColumnWriter::WriteDictionary(BasicColumnWriterState &state, unique_ptr<BufferedSerializer> temp_writer,
699
+ idx_t row_count) {
703
700
  D_ASSERT(temp_writer);
704
701
  D_ASSERT(temp_writer->blob.size > 0);
705
702
 
@@ -761,7 +758,7 @@ public:
761
758
 
762
759
  struct BaseParquetOperator {
763
760
  template <class SRC, class TGT>
764
- static duckdb::unique_ptr<ColumnWriterStatistics> InitializeStats() {
761
+ static unique_ptr<ColumnWriterStatistics> InitializeStats() {
765
762
  return make_uniq<NumericStatisticsState<SRC, TGT, BaseParquetOperator>>();
766
763
  }
767
764
 
@@ -805,7 +802,7 @@ struct ParquetHugeintOperator {
805
802
  }
806
803
 
807
804
  template <class SRC, class TGT>
808
- static duckdb::unique_ptr<ColumnWriterStatistics> InitializeStats() {
805
+ static unique_ptr<ColumnWriterStatistics> InitializeStats() {
809
806
  return make_uniq<ColumnWriterStatistics>();
810
807
  }
811
808
 
@@ -837,7 +834,7 @@ public:
837
834
  ~StandardColumnWriter() override = default;
838
835
 
839
836
  public:
840
- duckdb::unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
837
+ unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
841
838
  return OP::template InitializeStats<SRC, TGT>();
842
839
  }
843
840
 
@@ -897,7 +894,7 @@ public:
897
894
  ~BooleanColumnWriter() override = default;
898
895
 
899
896
  public:
900
- duckdb::unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
897
+ unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
901
898
  return make_uniq<BooleanStatisticsState>();
902
899
  }
903
900
 
@@ -928,7 +925,7 @@ public:
928
925
  }
929
926
  }
930
927
 
931
- duckdb::unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
928
+ unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
932
929
  return make_uniq<BooleanWriterPageState>();
933
930
  }
934
931
 
@@ -1022,7 +1019,7 @@ public:
1022
1019
  ~FixedDecimalColumnWriter() override = default;
1023
1020
 
1024
1021
  public:
1025
- duckdb::unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
1022
+ unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
1026
1023
  return make_uniq<FixedDecimalStatistics>();
1027
1024
  }
1028
1025
 
@@ -1196,8 +1193,8 @@ public:
1196
1193
 
1197
1194
  class StringColumnWriterState : public BasicColumnWriterState {
1198
1195
  public:
1199
- StringColumnWriterState(duckdb_parquet::format::RowGroup &row_group, Allocator &allocator, idx_t col_idx)
1200
- : BasicColumnWriterState(row_group, col_idx), dictionary_heap(allocator) {
1196
+ StringColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx)
1197
+ : BasicColumnWriterState(row_group, col_idx) {
1201
1198
  }
1202
1199
  ~StringColumnWriterState() override = default;
1203
1200
 
@@ -1208,7 +1205,6 @@ public:
1208
1205
 
1209
1206
  // Dictionary and accompanying string heap
1210
1207
  string_map_t<uint32_t> dictionary;
1211
- StringHeap dictionary_heap;
1212
1208
  // key_bit_width== 0 signifies the chunk is written in plain encoding
1213
1209
  uint32_t key_bit_width;
1214
1210
 
@@ -1243,13 +1239,12 @@ public:
1243
1239
  ~StringColumnWriter() override = default;
1244
1240
 
1245
1241
  public:
1246
- duckdb::unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
1242
+ unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
1247
1243
  return make_uniq<StringStatisticsState>();
1248
1244
  }
1249
1245
 
1250
- duckdb::unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group,
1251
- Allocator &allocator) override {
1252
- auto result = make_uniq<StringColumnWriterState>(row_group, allocator, row_group.columns.size());
1246
+ unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override {
1247
+ auto result = make_uniq<StringColumnWriterState>(row_group, row_group.columns.size());
1253
1248
  RegisterToRowGroup(row_group);
1254
1249
  return std::move(result);
1255
1250
  }
@@ -1279,11 +1274,8 @@ public:
1279
1274
  if (validity.RowIsValid(vector_index)) {
1280
1275
  run_length++;
1281
1276
  const auto &value = strings[vector_index];
1282
- // If the value did not yet exist in the dictionary we add it to the StringHeap
1283
- auto found = !value.IsInlined() && state.dictionary.find(value) == state.dictionary.end()
1284
- ? state.dictionary.insert(string_map_t<uint32_t>::value_type(
1285
- state.dictionary_heap.AddBlob(value), new_value_index))
1286
- : state.dictionary.insert(string_map_t<uint32_t>::value_type(value, new_value_index));
1277
+ // Try to insert into the dictionary. If it's already there, we get back the value index
1278
+ auto found = state.dictionary.insert(string_map_t<uint32_t>::value_type(value, new_value_index));
1287
1279
  state.estimated_plain_size += value.GetSize() + STRING_LENGTH_SIZE;
1288
1280
  if (found.second) {
1289
1281
  // string didn't exist yet in the dictionary
@@ -1359,7 +1351,7 @@ public:
1359
1351
  }
1360
1352
  }
1361
1353
 
1362
- duckdb::unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p) override {
1354
+ unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p) override {
1363
1355
  auto &state = state_p.Cast<StringColumnWriterState>();
1364
1356
  return make_uniq<StringWriterPageState>(state.key_bit_width, state.dictionary);
1365
1357
  }
@@ -1456,7 +1448,7 @@ public:
1456
1448
  uint32_t bit_width;
1457
1449
 
1458
1450
  public:
1459
- duckdb::unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
1451
+ unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
1460
1452
  return make_uniq<StringStatisticsState>();
1461
1453
  }
1462
1454
 
@@ -1499,7 +1491,7 @@ public:
1499
1491
  }
1500
1492
  }
1501
1493
 
1502
- duckdb::unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
1494
+ unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
1503
1495
  return make_uniq<EnumWriterPageState>(bit_width);
1504
1496
  }
1505
1497
 
@@ -1557,17 +1549,16 @@ public:
1557
1549
  class StructColumnWriter : public ColumnWriter {
1558
1550
  public:
1559
1551
  StructColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
1560
- idx_t max_define, vector<duckdb::unique_ptr<ColumnWriter>> child_writers_p, bool can_have_nulls)
1552
+ idx_t max_define, vector<unique_ptr<ColumnWriter>> child_writers_p, bool can_have_nulls)
1561
1553
  : ColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls),
1562
1554
  child_writers(std::move(child_writers_p)) {
1563
1555
  }
1564
1556
  ~StructColumnWriter() override = default;
1565
1557
 
1566
- vector<duckdb::unique_ptr<ColumnWriter>> child_writers;
1558
+ vector<unique_ptr<ColumnWriter>> child_writers;
1567
1559
 
1568
1560
  public:
1569
- duckdb::unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group,
1570
- Allocator &allocator) override;
1561
+ unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override;
1571
1562
  bool HasAnalyze() override;
1572
1563
  void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
1573
1564
  void FinalizeAnalyze(ColumnWriterState &state) override;
@@ -1587,16 +1578,15 @@ public:
1587
1578
 
1588
1579
  duckdb_parquet::format::RowGroup &row_group;
1589
1580
  idx_t col_idx;
1590
- vector<duckdb::unique_ptr<ColumnWriterState>> child_states;
1581
+ vector<unique_ptr<ColumnWriterState>> child_states;
1591
1582
  };
1592
1583
 
1593
- unique_ptr<ColumnWriterState> StructColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group,
1594
- Allocator &allocator) {
1584
+ unique_ptr<ColumnWriterState> StructColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) {
1595
1585
  auto result = make_uniq<StructColumnWriterState>(row_group, row_group.columns.size());
1596
1586
 
1597
1587
  result->child_states.reserve(child_writers.size());
1598
1588
  for (auto &child_writer : child_writers) {
1599
- result->child_states.push_back(child_writer->InitializeWriteState(row_group, allocator));
1589
+ result->child_states.push_back(child_writer->InitializeWriteState(row_group));
1600
1590
  }
1601
1591
  return std::move(result);
1602
1592
  }
@@ -1680,17 +1670,16 @@ void StructColumnWriter::FinalizeWrite(ColumnWriterState &state_p) {
1680
1670
  class ListColumnWriter : public ColumnWriter {
1681
1671
  public:
1682
1672
  ListColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
1683
- idx_t max_define, duckdb::unique_ptr<ColumnWriter> child_writer_p, bool can_have_nulls)
1673
+ idx_t max_define, unique_ptr<ColumnWriter> child_writer_p, bool can_have_nulls)
1684
1674
  : ColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls),
1685
1675
  child_writer(std::move(child_writer_p)) {
1686
1676
  }
1687
1677
  ~ListColumnWriter() override = default;
1688
1678
 
1689
- duckdb::unique_ptr<ColumnWriter> child_writer;
1679
+ unique_ptr<ColumnWriter> child_writer;
1690
1680
 
1691
1681
  public:
1692
- duckdb::unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group,
1693
- Allocator &allocator) override;
1682
+ unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override;
1694
1683
  bool HasAnalyze() override;
1695
1684
  void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
1696
1685
  void FinalizeAnalyze(ColumnWriterState &state) override;
@@ -1710,14 +1699,13 @@ public:
1710
1699
 
1711
1700
  duckdb_parquet::format::RowGroup &row_group;
1712
1701
  idx_t col_idx;
1713
- duckdb::unique_ptr<ColumnWriterState> child_state;
1702
+ unique_ptr<ColumnWriterState> child_state;
1714
1703
  idx_t parent_index = 0;
1715
1704
  };
1716
1705
 
1717
- unique_ptr<ColumnWriterState> ListColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group,
1718
- Allocator &allocator) {
1706
+ unique_ptr<ColumnWriterState> ListColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) {
1719
1707
  auto result = make_uniq<ListColumnWriterState>(row_group, row_group.columns.size());
1720
- result->child_state = child_writer->InitializeWriteState(row_group, allocator);
1708
+ result->child_state = child_writer->InitializeWriteState(row_group);
1721
1709
  return std::move(result);
1722
1710
  }
1723
1711
 
@@ -1818,12 +1806,24 @@ void ListColumnWriter::FinalizeWrite(ColumnWriterState &state_p) {
1818
1806
  unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parquet::format::SchemaElement> &schemas,
1819
1807
  ParquetWriter &writer, const LogicalType &type,
1820
1808
  const string &name, vector<string> schema_path,
1809
+ optional_ptr<const ChildFieldIDs> field_ids,
1821
1810
  idx_t max_repeat, idx_t max_define, bool can_have_nulls) {
1822
1811
  auto null_type = can_have_nulls ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED;
1823
1812
  if (!can_have_nulls) {
1824
1813
  max_define--;
1825
1814
  }
1826
1815
  idx_t schema_idx = schemas.size();
1816
+
1817
+ optional_ptr<const FieldID> field_id;
1818
+ optional_ptr<const ChildFieldIDs> child_field_ids;
1819
+ if (field_ids) {
1820
+ auto field_id_it = field_ids->ids->find(name);
1821
+ if (field_id_it != field_ids->ids->end()) {
1822
+ field_id = &field_id_it->second;
1823
+ child_field_ids = &field_id->child_field_ids;
1824
+ }
1825
+ }
1826
+
1827
1827
  if (type.id() == LogicalTypeId::STRUCT) {
1828
1828
  auto &child_types = StructType::GetChildTypes(type);
1829
1829
  // set up the schema element for this struct
@@ -1834,15 +1834,19 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
1834
1834
  schema_element.__isset.type = false;
1835
1835
  schema_element.__isset.repetition_type = true;
1836
1836
  schema_element.name = name;
1837
+ if (field_id && field_id->set) {
1838
+ schema_element.__isset.field_id = true;
1839
+ schema_element.field_id = field_id->field_id;
1840
+ }
1837
1841
  schemas.push_back(std::move(schema_element));
1838
1842
  schema_path.push_back(name);
1839
1843
 
1840
1844
  // construct the child types recursively
1841
- vector<duckdb::unique_ptr<ColumnWriter>> child_writers;
1845
+ vector<unique_ptr<ColumnWriter>> child_writers;
1842
1846
  child_writers.reserve(child_types.size());
1843
1847
  for (auto &child_type : child_types) {
1844
1848
  child_writers.push_back(CreateWriterRecursive(schemas, writer, child_type.second, child_type.first,
1845
- schema_path, max_repeat, max_define + 1));
1849
+ schema_path, child_field_ids, max_repeat, max_define + 1));
1846
1850
  }
1847
1851
  return make_uniq<StructColumnWriter>(writer, schema_idx, std::move(schema_path), max_repeat, max_define,
1848
1852
  std::move(child_writers), can_have_nulls);
@@ -1861,6 +1865,10 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
1861
1865
  optional_element.__isset.repetition_type = true;
1862
1866
  optional_element.__isset.converted_type = true;
1863
1867
  optional_element.name = name;
1868
+ if (field_id && field_id->set) {
1869
+ optional_element.__isset.field_id = true;
1870
+ optional_element.field_id = field_id->field_id;
1871
+ }
1864
1872
  schemas.push_back(std::move(optional_element));
1865
1873
  schema_path.push_back(name);
1866
1874
 
@@ -1875,8 +1883,8 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
1875
1883
  schemas.push_back(std::move(repeated_element));
1876
1884
  schema_path.emplace_back("list");
1877
1885
 
1878
- auto child_writer =
1879
- CreateWriterRecursive(schemas, writer, child_type, "element", schema_path, max_repeat + 1, max_define + 2);
1886
+ auto child_writer = CreateWriterRecursive(schemas, writer, child_type, "element", schema_path, child_field_ids,
1887
+ max_repeat + 1, max_define + 2);
1880
1888
  return make_uniq<ListColumnWriter>(writer, schema_idx, std::move(schema_path), max_repeat, max_define,
1881
1889
  std::move(child_writer), can_have_nulls);
1882
1890
  }
@@ -1899,6 +1907,10 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
1899
1907
  top_element.__isset.converted_type = true;
1900
1908
  top_element.__isset.type = false;
1901
1909
  top_element.name = name;
1910
+ if (field_id && field_id->set) {
1911
+ top_element.__isset.field_id = true;
1912
+ top_element.field_id = field_id->field_id;
1913
+ }
1902
1914
  schemas.push_back(std::move(top_element));
1903
1915
  schema_path.push_back(name);
1904
1916
 
@@ -1916,13 +1928,13 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
1916
1928
  // construct the child types recursively
1917
1929
  vector<LogicalType> kv_types {MapType::KeyType(type), MapType::ValueType(type)};
1918
1930
  vector<string> kv_names {"key", "value"};
1919
- vector<duckdb::unique_ptr<ColumnWriter>> child_writers;
1931
+ vector<unique_ptr<ColumnWriter>> child_writers;
1920
1932
  child_writers.reserve(2);
1921
1933
  for (idx_t i = 0; i < 2; i++) {
1922
1934
  // key needs to be marked as REQUIRED
1923
1935
  bool is_key = i == 0;
1924
1936
  auto child_writer = CreateWriterRecursive(schemas, writer, kv_types[i], kv_names[i], schema_path,
1925
- max_repeat + 1, max_define + 2, !is_key);
1937
+ child_field_ids, max_repeat + 1, max_define + 2, !is_key);
1926
1938
 
1927
1939
  child_writers.push_back(std::move(child_writer));
1928
1940
  }
@@ -1938,6 +1950,10 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
1938
1950
  schema_element.__isset.type = true;
1939
1951
  schema_element.__isset.repetition_type = true;
1940
1952
  schema_element.name = name;
1953
+ if (field_id && field_id->set) {
1954
+ schema_element.__isset.field_id = true;
1955
+ schema_element.field_id = field_id->field_id;
1956
+ }
1941
1957
  ParquetWriter::SetSchemaProperties(type, schema_element);
1942
1958
  schemas.push_back(std::move(schema_element));
1943
1959
  schema_path.push_back(name);
@@ -19,9 +19,9 @@ public:
19
19
  static constexpr const PhysicalType TYPE = PhysicalType::INVALID;
20
20
 
21
21
  public:
22
- CastColumnReader(duckdb::unique_ptr<ColumnReader> child_reader, LogicalType target_type);
22
+ CastColumnReader(unique_ptr<ColumnReader> child_reader, LogicalType target_type);
23
23
 
24
- duckdb::unique_ptr<ColumnReader> child_reader;
24
+ unique_ptr<ColumnReader> child_reader;
25
25
  DataChunk intermediate_chunk;
26
26
 
27
27
  public:
@@ -8,21 +8,19 @@
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "parquet_types.h"
12
- #include "thrift_tools.hpp"
13
- #include "resizable_buffer.hpp"
14
-
15
- #include "parquet_rle_bp_decoder.hpp"
11
+ #include "duckdb.hpp"
16
12
  #include "parquet_dbp_decoder.hpp"
13
+ #include "parquet_rle_bp_decoder.hpp"
17
14
  #include "parquet_statistics.hpp"
18
-
19
- #include "duckdb.hpp"
15
+ #include "parquet_types.h"
16
+ #include "resizable_buffer.hpp"
17
+ #include "thrift_tools.hpp"
20
18
  #ifndef DUCKDB_AMALGAMATION
21
19
 
22
- #include "duckdb/common/types/vector.hpp"
23
- #include "duckdb/common/types/string_type.hpp"
24
- #include "duckdb/common/types/chunk_collection.hpp"
25
20
  #include "duckdb/common/operator/cast_operators.hpp"
21
+ #include "duckdb/common/types/chunk_collection.hpp"
22
+ #include "duckdb/common/types/string_type.hpp"
23
+ #include "duckdb/common/types/vector.hpp"
26
24
  #include "duckdb/common/types/vector_cache.hpp"
27
25
  #endif
28
26
 
@@ -129,7 +127,7 @@ protected:
129
127
 
130
128
  ParquetReader &reader;
131
129
  LogicalType type;
132
- duckdb::unique_ptr<Vector> byte_array_data;
130
+ unique_ptr<Vector> byte_array_data;
133
131
  idx_t byte_array_count = 0;
134
132
 
135
133
  idx_t pending_skips = 0;
@@ -158,11 +156,11 @@ private:
158
156
  ResizeableBuffer compressed_buffer;
159
157
  ResizeableBuffer offset_buffer;
160
158
 
161
- duckdb::unique_ptr<RleBpDecoder> dict_decoder;
162
- duckdb::unique_ptr<RleBpDecoder> defined_decoder;
163
- duckdb::unique_ptr<RleBpDecoder> repeated_decoder;
164
- duckdb::unique_ptr<DbpDecoder> dbp_decoder;
165
- duckdb::unique_ptr<RleBpDecoder> rle_decoder;
159
+ unique_ptr<RleBpDecoder> dict_decoder;
160
+ unique_ptr<RleBpDecoder> defined_decoder;
161
+ unique_ptr<RleBpDecoder> repeated_decoder;
162
+ unique_ptr<DbpDecoder> dbp_decoder;
163
+ unique_ptr<RleBpDecoder> rle_decoder;
166
164
 
167
165
  // dummies for Skip()
168
166
  parquet_filter_t none_filter;
@@ -16,6 +16,7 @@ class BufferedSerializer;
16
16
  class ParquetWriter;
17
17
  class ColumnWriterPageState;
18
18
  class BasicColumnWriterState;
19
+ struct ChildFieldIDs;
19
20
 
20
21
  class ColumnWriterState {
21
22
  public:
@@ -78,13 +79,14 @@ public:
78
79
 
79
80
  public:
80
81
  //! Create the column writer for a specific type recursively
81
- static duckdb::unique_ptr<ColumnWriter>
82
- CreateWriterRecursive(vector<duckdb_parquet::format::SchemaElement> &schemas, ParquetWriter &writer,
83
- const LogicalType &type, const string &name, vector<string> schema_path, idx_t max_repeat = 0,
84
- idx_t max_define = 1, bool can_have_nulls = true);
82
+ static unique_ptr<ColumnWriter> CreateWriterRecursive(vector<duckdb_parquet::format::SchemaElement> &schemas,
83
+ ParquetWriter &writer, const LogicalType &type,
84
+ const string &name, vector<string> schema_path,
85
+ optional_ptr<const ChildFieldIDs> field_ids,
86
+ idx_t max_repeat = 0, idx_t max_define = 1,
87
+ bool can_have_nulls = true);
85
88
 
86
- virtual duckdb::unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group,
87
- Allocator &allocator) = 0;
89
+ virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) = 0;
88
90
 
89
91
  //! indicates whether the write need to analyse the data before preparing it
90
92
  virtual bool HasAnalyze() {
@@ -112,7 +114,7 @@ protected:
112
114
  void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat);
113
115
 
114
116
  void CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
115
- duckdb::unique_ptr<data_t[]> &compressed_buf);
117
+ unique_ptr<data_t[]> &compressed_buf);
116
118
  };
117
119
 
118
120
  } // namespace duckdb
@@ -19,7 +19,7 @@ public:
19
19
 
20
20
  public:
21
21
  ListColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p,
22
- idx_t max_define_p, idx_t max_repeat_p, duckdb::unique_ptr<ColumnReader> child_column_reader_p);
22
+ idx_t max_define_p, idx_t max_repeat_p, unique_ptr<ColumnReader> child_column_reader_p);
23
23
 
24
24
  idx_t Read(uint64_t num_values, parquet_filter_t &filter, data_ptr_t define_out, data_ptr_t repeat_out,
25
25
  Vector &result_out) override;
@@ -43,7 +43,7 @@ public:
43
43
  }
44
44
 
45
45
  private:
46
- duckdb::unique_ptr<ColumnReader> child_column_reader;
46
+ unique_ptr<ColumnReader> child_column_reader;
47
47
  ResizeableBuffer child_defines;
48
48
  ResizeableBuffer child_repeats;
49
49
  uint8_t *child_defines_ptr;