duckdb 0.8.2-dev161.0 → 0.8.2-dev1764.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (504) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
  23. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  24. package/src/duckdb/extension/json/json_functions/json_transform.cpp +91 -38
  25. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  26. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  27. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  28. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  29. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  30. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  31. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  32. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  33. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  34. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  36. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  38. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  39. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  40. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  41. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  42. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  43. package/src/duckdb/extension/parquet/parquet_extension.cpp +194 -20
  44. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  45. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  46. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  47. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  48. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  49. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  50. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  51. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  52. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  53. package/src/duckdb/src/common/allocator.cpp +14 -2
  54. package/src/duckdb/src/common/arrow/arrow_appender.cpp +79 -12
  55. package/src/duckdb/src/common/arrow/arrow_converter.cpp +44 -19
  56. package/src/duckdb/src/common/assert.cpp +3 -0
  57. package/src/duckdb/src/common/enum_util.cpp +4619 -4446
  58. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  60. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  61. package/src/duckdb/src/common/exception.cpp +2 -2
  62. package/src/duckdb/src/common/extra_type_info.cpp +506 -0
  63. package/src/duckdb/src/common/file_system.cpp +19 -0
  64. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  65. package/src/duckdb/src/common/local_file_system.cpp +14 -14
  66. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  67. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  68. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  69. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  70. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  71. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  72. package/src/duckdb/src/common/sort/partition_state.cpp +70 -50
  73. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  74. package/src/duckdb/src/common/types/bit.cpp +51 -0
  75. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  76. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  77. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  78. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  79. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  80. package/src/duckdb/src/common/types/date.cpp +9 -0
  81. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  82. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  83. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  84. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  85. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  86. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  87. package/src/duckdb/src/common/types/value.cpp +11 -6
  88. package/src/duckdb/src/common/types.cpp +9 -656
  89. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  90. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  91. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  92. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  93. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  94. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  95. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  96. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  97. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  98. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  99. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  100. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  101. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  102. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  103. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  104. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  105. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  106. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  107. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  108. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  109. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  110. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  111. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  112. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  113. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  114. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  115. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  116. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  117. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  118. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  119. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  120. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  121. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
  122. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  123. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  124. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
  125. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  126. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
  127. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  128. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  129. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  130. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  131. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  132. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  133. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  134. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  135. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  136. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  137. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  138. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  139. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  140. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  141. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  142. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  143. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  144. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
  145. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
  146. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  147. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  148. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  149. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  150. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  151. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  152. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  153. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  154. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  155. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  156. package/src/duckdb/src/function/function.cpp +3 -1
  157. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  158. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  159. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  160. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  161. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  162. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  163. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  164. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  165. package/src/duckdb/src/function/table/arrow.cpp +19 -0
  166. package/src/duckdb/src/function/table/arrow_conversion.cpp +35 -1
  167. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  168. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  169. package/src/duckdb/src/function/table/system/test_all_types.cpp +7 -0
  170. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  171. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  172. package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
  173. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  174. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  175. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  176. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  177. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
  178. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  179. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  180. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  181. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  182. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  183. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  184. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
  185. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  186. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  187. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  188. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  189. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  190. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +2 -2
  191. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  192. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  193. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  194. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  195. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  196. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +23 -8
  197. package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
  198. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  199. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  200. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  201. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  202. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  203. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  204. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  205. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  206. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  207. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +5 -2
  208. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  209. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  210. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  211. package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -0
  212. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
  213. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  214. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  215. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  218. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  219. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  220. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  221. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  222. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  223. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  224. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  225. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  226. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  227. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  228. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  229. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  230. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  231. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  232. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  233. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  234. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  235. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  236. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  237. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  238. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  239. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  240. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  241. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  242. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  243. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  244. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  245. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  246. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  247. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
  248. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  249. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  250. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  251. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  252. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  253. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  254. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  255. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  256. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  257. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  258. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  259. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  260. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  261. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  262. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  263. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  264. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  265. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  266. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  267. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  268. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  269. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  270. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  271. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  272. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  273. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  274. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  275. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +22 -0
  276. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
  277. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  278. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  279. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  280. package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
  281. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  282. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  283. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  284. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  285. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  286. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  287. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  288. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  289. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  291. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  293. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  294. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  295. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  296. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  297. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  298. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  299. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  300. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  301. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  302. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  303. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  304. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  305. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  306. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  307. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  308. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  309. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  310. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  311. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  312. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  313. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  314. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  315. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  316. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  317. package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
  318. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  319. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  320. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  321. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  322. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  323. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  324. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  325. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  326. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  327. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  328. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  329. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  330. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  331. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  332. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  333. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  334. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  335. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  336. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  337. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  338. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  339. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  340. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  341. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  342. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  343. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  344. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  345. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  346. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  347. package/src/duckdb/src/include/duckdb.h +28 -0
  348. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  349. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  350. package/src/duckdb/src/main/config.cpp +4 -0
  351. package/src/duckdb/src/main/database.cpp +1 -1
  352. package/src/duckdb/src/main/extension/extension_helper.cpp +93 -88
  353. package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
  354. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  355. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  356. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  357. package/src/duckdb/src/main/relation.cpp +6 -5
  358. package/src/duckdb/src/main/settings/settings.cpp +79 -18
  359. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  360. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  361. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  362. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  363. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  364. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  365. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  366. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  367. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  368. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  369. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  370. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  371. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  372. package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
  373. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  374. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  375. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  376. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  377. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  378. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  379. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  380. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  381. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  382. package/src/duckdb/src/parallel/executor.cpp +15 -0
  383. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  384. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  385. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  386. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  387. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  388. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  389. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  390. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  391. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  392. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  393. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  394. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  395. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  396. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  397. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  398. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  399. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  400. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  401. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  402. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  403. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  404. package/src/duckdb/src/parser/parser.cpp +8 -2
  405. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  406. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  407. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  408. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  409. package/src/duckdb/src/parser/query_node.cpp +15 -37
  410. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  411. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  412. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  413. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  414. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  415. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  416. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  417. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  418. package/src/duckdb/src/parser/tableref.cpp +0 -44
  419. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  420. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  421. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  422. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  423. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  424. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  425. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  426. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  427. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  428. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  429. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  430. package/src/duckdb/src/parser/transformer.cpp +15 -0
  431. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  432. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  433. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  434. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  435. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  436. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  437. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  438. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  439. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  440. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  441. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
  442. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  443. package/src/duckdb/src/planner/binder.cpp +44 -31
  444. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  445. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  446. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  447. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  448. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  449. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  450. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  451. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  452. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  453. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  454. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  455. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  456. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  457. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  458. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  459. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  460. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  461. package/src/duckdb/src/storage/data_table.cpp +1 -1
  462. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  463. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  464. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  465. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  466. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  467. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  468. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  469. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  470. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  471. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  472. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  473. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  474. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  475. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  476. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  477. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  478. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  479. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  480. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  481. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  482. package/src/duckdb/ub_src_common.cpp +2 -0
  483. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  484. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  485. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  486. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  487. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  488. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  489. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  490. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  491. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  492. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  493. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  494. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  495. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  496. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  497. package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
  498. package/src/statement.cpp +10 -3
  499. package/test/columns.test.ts +24 -1
  500. package/test/test_all_types.test.ts +234 -0
  501. package/tsconfig.json +1 -0
  502. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  503. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  504. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -1,7 +1,8 @@
1
1
  #define DUCKDB_EXTENSION_MAIN
2
2
 
3
- #include "duckdb.hpp"
4
3
  #include "parquet_extension.hpp"
4
+
5
+ #include "duckdb.hpp"
5
6
  #include "parquet_metadata.hpp"
6
7
  #include "parquet_reader.hpp"
7
8
  #include "parquet_writer.hpp"
@@ -14,15 +15,18 @@
14
15
  #include <vector>
15
16
  #ifndef DUCKDB_AMALGAMATION
16
17
  #include "duckdb/catalog/catalog.hpp"
18
+ #include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
17
19
  #include "duckdb/common/constants.hpp"
18
20
  #include "duckdb/common/enums/file_compression_type.hpp"
19
21
  #include "duckdb/common/field_writer.hpp"
20
22
  #include "duckdb/common/file_system.hpp"
23
+ #include "duckdb/common/multi_file_reader.hpp"
21
24
  #include "duckdb/common/types/chunk_collection.hpp"
22
25
  #include "duckdb/function/copy_function.hpp"
23
26
  #include "duckdb/function/table_function.hpp"
24
27
  #include "duckdb/main/client_context.hpp"
25
28
  #include "duckdb/main/config.hpp"
29
+ #include "duckdb/main/extension_util.hpp"
26
30
  #include "duckdb/parser/expression/constant_expression.hpp"
27
31
  #include "duckdb/parser/expression/function_expression.hpp"
28
32
  #include "duckdb/parser/parsed_data/create_copy_function_info.hpp"
@@ -30,10 +34,7 @@
30
34
  #include "duckdb/parser/tableref/table_function_ref.hpp"
31
35
  #include "duckdb/planner/operator/logical_get.hpp"
32
36
  #include "duckdb/storage/statistics/base_statistics.hpp"
33
- #include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
34
- #include "duckdb/common/multi_file_reader.hpp"
35
37
  #include "duckdb/storage/table/row_group.hpp"
36
- #include "duckdb/main/extension_util.hpp"
37
38
  #endif
38
39
 
39
40
  namespace duckdb {
@@ -115,6 +116,7 @@ struct ParquetWriteBindData : public TableFunctionData {
115
116
  vector<string> column_names;
116
117
  duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
117
118
  idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
119
+ ChildFieldIDs field_ids;
118
120
  };
119
121
 
120
122
  struct ParquetWriteGlobalState : public GlobalFunctionData {
@@ -123,10 +125,12 @@ struct ParquetWriteGlobalState : public GlobalFunctionData {
123
125
 
124
126
  struct ParquetWriteLocalState : public LocalFunctionData {
125
127
  explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types)
126
- : buffer(Allocator::Get(context), types) {
128
+ : buffer(context, types, ColumnDataAllocatorType::HYBRID) {
129
+ buffer.InitializeAppend(append_state);
127
130
  }
128
131
 
129
132
  ColumnDataCollection buffer;
133
+ ColumnDataAppendState append_state;
130
134
  };
131
135
 
132
136
  void ParquetOptions::Serialize(FieldWriter &writer) const {
@@ -148,10 +152,12 @@ BindInfo ParquetGetBatchInfo(const FunctionData *bind_data) {
148
152
  for (auto &path : parquet_bind.files) {
149
153
  file_path.emplace_back(path);
150
154
  }
155
+ // LCOV_EXCL_START
151
156
  bind_info.InsertOption("file_path", Value::LIST(LogicalType::VARCHAR, file_path));
152
157
  bind_info.InsertOption("binary_as_string", Value::BOOLEAN(parquet_bind.parquet_options.binary_as_string));
153
158
  bind_info.InsertOption("file_row_number", Value::BOOLEAN(parquet_bind.parquet_options.file_row_number));
154
159
  parquet_bind.parquet_options.file_options.AddBatchInfo(bind_info);
160
+ // LCOV_EXCL_STOP
155
161
  return bind_info;
156
162
  }
157
163
 
@@ -171,7 +177,6 @@ public:
171
177
  table_function.serialize = ParquetScanSerialize;
172
178
  table_function.deserialize = ParquetScanDeserialize;
173
179
  table_function.get_batch_info = ParquetGetBatchInfo;
174
-
175
180
  table_function.projection_pushdown = true;
176
181
  table_function.filter_pushdown = true;
177
182
  table_function.filter_prune = true;
@@ -293,7 +298,7 @@ public:
293
298
  ParquetOptions parquet_options(context);
294
299
  for (auto &kv : input.named_parameters) {
295
300
  auto loption = StringUtil::Lower(kv.first);
296
- if (MultiFileReader::ParseOption(kv.first, kv.second, parquet_options.file_options)) {
301
+ if (MultiFileReader::ParseOption(kv.first, kv.second, parquet_options.file_options, context)) {
297
302
  continue;
298
303
  }
299
304
  if (loption == "binary_as_string") {
@@ -302,9 +307,7 @@ public:
302
307
  parquet_options.file_row_number = BooleanValue::Get(kv.second);
303
308
  }
304
309
  }
305
- if (parquet_options.file_options.auto_detect_hive_partitioning) {
306
- parquet_options.file_options.hive_partitioning = MultiFileReaderOptions::AutoDetectHivePartitioning(files);
307
- }
310
+ parquet_options.file_options.AutoDetectHivePartitioning(files, context);
308
311
  return ParquetScanBindInternal(context, std::move(files), return_types, names, parquet_options);
309
312
  }
310
313
 
@@ -371,7 +374,7 @@ public:
371
374
  }
372
375
  MultiFileReader::InitializeReader(*reader, bind_data.parquet_options.file_options, bind_data.reader_bind,
373
376
  bind_data.types, bind_data.names, input.column_ids, input.filters,
374
- bind_data.files[0]);
377
+ bind_data.files[0], context);
375
378
  }
376
379
 
377
380
  result->column_ids = input.column_ids;
@@ -518,6 +521,7 @@ public:
518
521
  static void ParquetComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p,
519
522
  vector<unique_ptr<Expression>> &filters) {
520
523
  auto &data = bind_data_p->Cast<ParquetReadBindData>();
524
+
521
525
  auto reset_reader = MultiFileReader::ComplexFilterPushdown(context, data.files,
522
526
  data.parquet_options.file_options, get, filters);
523
527
  if (reset_reader) {
@@ -564,9 +568,10 @@ public:
564
568
  shared_ptr<ParquetReader> reader;
565
569
  try {
566
570
  reader = make_shared<ParquetReader>(context, file, pq_options);
567
- MultiFileReader::InitializeReader(
568
- *reader, bind_data.parquet_options.file_options, bind_data.reader_bind, bind_data.types,
569
- bind_data.names, parallel_state.column_ids, parallel_state.filters, bind_data.files.front());
571
+ MultiFileReader::InitializeReader(*reader, bind_data.parquet_options.file_options,
572
+ bind_data.reader_bind, bind_data.types, bind_data.names,
573
+ parallel_state.column_ids, parallel_state.filters,
574
+ bind_data.files.front(), context);
570
575
  } catch (...) {
571
576
  parallel_lock.lock();
572
577
  parallel_state.error_opening_file = true;
@@ -585,8 +590,157 @@ public:
585
590
  }
586
591
  };
587
592
 
593
+ static case_insensitive_map_t<LogicalType> GetChildNameToTypeMap(const LogicalType &type) {
594
+ case_insensitive_map_t<LogicalType> name_to_type_map;
595
+ switch (type.id()) {
596
+ case LogicalTypeId::LIST:
597
+ name_to_type_map.emplace("element", ListType::GetChildType(type));
598
+ break;
599
+ case LogicalTypeId::MAP:
600
+ name_to_type_map.emplace("key", MapType::KeyType(type));
601
+ name_to_type_map.emplace("value", MapType::ValueType(type));
602
+ break;
603
+ case LogicalTypeId::STRUCT:
604
+ for (auto &child_type : StructType::GetChildTypes(type)) {
605
+ if (child_type.first == FieldID::DUCKDB_FIELD_ID) {
606
+ throw BinderException("Cannot have column named \"%s\" with FIELD_IDS", FieldID::DUCKDB_FIELD_ID);
607
+ }
608
+ name_to_type_map.emplace(child_type);
609
+ }
610
+ break;
611
+ default: // LCOV_EXCL_START
612
+ throw InternalException("Unexpected type in GetChildNameToTypeMap");
613
+ } // LCOV_EXCL_STOP
614
+ return name_to_type_map;
615
+ }
616
+
617
+ static void GetChildNamesAndTypes(const LogicalType &type, vector<string> &child_names,
618
+ vector<LogicalType> &child_types) {
619
+ switch (type.id()) {
620
+ case LogicalTypeId::LIST:
621
+ child_names.emplace_back("element");
622
+ child_types.emplace_back(ListType::GetChildType(type));
623
+ break;
624
+ case LogicalTypeId::MAP:
625
+ child_names.emplace_back("key");
626
+ child_names.emplace_back("value");
627
+ child_types.emplace_back(MapType::KeyType(type));
628
+ child_types.emplace_back(MapType::ValueType(type));
629
+ break;
630
+ case LogicalTypeId::STRUCT:
631
+ for (auto &child_type : StructType::GetChildTypes(type)) {
632
+ child_names.emplace_back(child_type.first);
633
+ child_types.emplace_back(child_type.second);
634
+ }
635
+ break;
636
+ default: // LCOV_EXCL_START
637
+ throw InternalException("Unexpected type in GetChildNamesAndTypes");
638
+ } // LCOV_EXCL_STOP
639
+ }
640
+
641
+ static void GenerateFieldIDs(ChildFieldIDs &field_ids, idx_t &field_id, const vector<string> &names,
642
+ const vector<LogicalType> &sql_types) {
643
+ D_ASSERT(names.size() == sql_types.size());
644
+ for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) {
645
+ const auto &col_name = names[col_idx];
646
+ auto inserted = field_ids.ids->insert(make_pair(col_name, FieldID(field_id++)));
647
+ D_ASSERT(inserted.second);
648
+
649
+ const auto &col_type = sql_types[col_idx];
650
+ if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP &&
651
+ col_type.id() != LogicalTypeId::STRUCT) {
652
+ continue;
653
+ }
654
+
655
+ // Cannot use GetChildNameToTypeMap here because we lose order, and we want to generate depth-first
656
+ vector<string> child_names;
657
+ vector<LogicalType> child_types;
658
+ GetChildNamesAndTypes(col_type, child_names, child_types);
659
+
660
+ GenerateFieldIDs(inserted.first->second.child_field_ids, field_id, child_names, child_types);
661
+ }
662
+ }
663
+
664
+ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
665
+ unordered_set<uint32_t> &unique_field_ids,
666
+ const case_insensitive_map_t<LogicalType> &name_to_type_map) {
667
+ const auto &struct_type = field_ids_value.type();
668
+ if (struct_type.id() != LogicalTypeId::STRUCT) {
669
+ throw BinderException(
670
+ "Expected FIELD_IDS to be a STRUCT, e.g., {col1: 42, col2: {%s: 43, nested_col: 44}, col3: 44}",
671
+ FieldID::DUCKDB_FIELD_ID);
672
+ }
673
+ const auto &struct_children = StructValue::GetChildren(field_ids_value);
674
+ D_ASSERT(StructType::GetChildTypes(struct_type).size() == struct_children.size());
675
+ for (idx_t i = 0; i < struct_children.size(); i++) {
676
+ const auto &col_name = StringUtil::Lower(StructType::GetChildName(struct_type, i));
677
+ if (col_name == FieldID::DUCKDB_FIELD_ID) {
678
+ continue;
679
+ }
680
+
681
+ auto it = name_to_type_map.find(col_name);
682
+ if (it == name_to_type_map.end()) {
683
+ string names;
684
+ for (const auto &name : name_to_type_map) {
685
+ if (!names.empty()) {
686
+ names += ", ";
687
+ }
688
+ names += name.first;
689
+ }
690
+ throw BinderException("Column name \"%s\" specified in FIELD_IDS not found. Available column names: [%s]",
691
+ col_name, names);
692
+ }
693
+ D_ASSERT(field_ids.ids->find(col_name) == field_ids.ids->end()); // Caught by STRUCT - deduplicates keys
694
+
695
+ const auto &child_value = struct_children[i];
696
+ const auto &child_type = child_value.type();
697
+ optional_ptr<const Value> field_id_value;
698
+ optional_ptr<const Value> child_field_ids_value;
699
+
700
+ if (child_type.id() == LogicalTypeId::STRUCT) {
701
+ const auto &nested_children = StructValue::GetChildren(child_value);
702
+ D_ASSERT(StructType::GetChildTypes(child_type).size() == nested_children.size());
703
+ for (idx_t nested_i = 0; nested_i < nested_children.size(); nested_i++) {
704
+ const auto &field_id_or_nested_col = StructType::GetChildName(child_type, nested_i);
705
+ if (field_id_or_nested_col == FieldID::DUCKDB_FIELD_ID) {
706
+ field_id_value = &nested_children[nested_i];
707
+ } else {
708
+ child_field_ids_value = &child_value;
709
+ }
710
+ }
711
+ } else {
712
+ field_id_value = &child_value;
713
+ }
714
+
715
+ FieldID field_id;
716
+ if (field_id_value) {
717
+ Value field_id_integer_value = field_id_value->DefaultCastAs(LogicalType::INTEGER);
718
+ const uint32_t field_id_int = IntegerValue::Get(field_id_integer_value);
719
+ if (!unique_field_ids.insert(field_id_int).second) {
720
+ throw BinderException("Duplicate field_id %s found in FIELD_IDS", field_id_integer_value.ToString());
721
+ }
722
+ field_id = FieldID(field_id_int);
723
+ }
724
+ auto inserted = field_ids.ids->insert(make_pair(col_name, std::move(field_id)));
725
+ D_ASSERT(inserted.second);
726
+
727
+ if (child_field_ids_value) {
728
+ const auto &col_type = it->second;
729
+ if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP &&
730
+ col_type.id() != LogicalTypeId::STRUCT) {
731
+ throw BinderException("Column \"%s\" with type \"%s\" cannot have a nested FIELD_IDS specification",
732
+ col_name, LogicalTypeIdToString(col_type.id()));
733
+ }
734
+
735
+ GetFieldIDs(*child_field_ids_value, inserted.first->second.child_field_ids, unique_field_ids,
736
+ GetChildNameToTypeMap(col_type));
737
+ }
738
+ }
739
+ }
740
+
588
741
  unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
589
742
  vector<LogicalType> &sql_types) {
743
+ D_ASSERT(names.size() == sql_types.size());
590
744
  auto bind_data = make_uniq<ParquetWriteBindData>();
591
745
  for (auto &option : info.options) {
592
746
  auto loption = StringUtil::Lower(option.first);
@@ -609,7 +763,27 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info
609
763
  continue;
610
764
  }
611
765
  }
612
- throw ParserException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption);
766
+ throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption);
767
+ } else if (loption == "field_ids") {
768
+ if (option.second.size() != 1) {
769
+ throw BinderException("FIELD_IDS requires exactly one argument");
770
+ }
771
+ if (option.second[0].type().id() == LogicalTypeId::VARCHAR &&
772
+ StringUtil::Lower(StringValue::Get(option.second[0])) == "auto") {
773
+ idx_t field_id = 0;
774
+ GenerateFieldIDs(bind_data->field_ids, field_id, names, sql_types);
775
+ } else {
776
+ unordered_set<uint32_t> unique_field_ids;
777
+ case_insensitive_map_t<LogicalType> name_to_type_map;
778
+ for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) {
779
+ if (names[col_idx] == FieldID::DUCKDB_FIELD_ID) {
780
+ throw BinderException("Cannot have a column named \"%s\" when writing FIELD_IDS",
781
+ FieldID::DUCKDB_FIELD_ID);
782
+ }
783
+ name_to_type_map.emplace(names[col_idx], sql_types[col_idx]);
784
+ }
785
+ GetFieldIDs(option.second[0], bind_data->field_ids, unique_field_ids, name_to_type_map);
786
+ }
613
787
  } else {
614
788
  throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
615
789
  }
@@ -625,8 +799,8 @@ unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &conte
625
799
  auto &parquet_bind = bind_data.Cast<ParquetWriteBindData>();
626
800
 
627
801
  auto &fs = FileSystem::GetFileSystem(context);
628
- global_state->writer =
629
- make_uniq<ParquetWriter>(fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec);
802
+ global_state->writer = make_uniq<ParquetWriter>(fs, file_path, parquet_bind.sql_types, parquet_bind.column_names,
803
+ parquet_bind.codec, parquet_bind.field_ids.Copy());
630
804
  return std::move(global_state);
631
805
  }
632
806
 
@@ -637,12 +811,12 @@ void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, Glob
637
811
  auto &local_state = lstate.Cast<ParquetWriteLocalState>();
638
812
 
639
813
  // append data to the local (buffered) chunk collection
640
- local_state.buffer.Append(input);
814
+ local_state.buffer.Append(local_state.append_state, input);
641
815
  if (local_state.buffer.Count() > bind_data.row_group_size) {
642
816
  // if the chunk collection exceeds a certain size we flush it to the parquet file
817
+ local_state.append_state.current_chunk_state.handles.clear();
643
818
  global_state.writer->Flush(local_state.buffer);
644
- // and reset the buffer
645
- local_state.buffer.Reset();
819
+ local_state.buffer.InitializeAppend(local_state.append_state);
646
820
  }
647
821
  }
648
822
 
@@ -48,7 +48,7 @@ using duckdb_parquet::format::SchemaElement;
48
48
  using duckdb_parquet::format::Statistics;
49
49
  using duckdb_parquet::format::Type;
50
50
 
51
- static duckdb::unique_ptr<duckdb_apache::thrift::protocol::TProtocol>
51
+ static unique_ptr<duckdb_apache::thrift::protocol::TProtocol>
52
52
  CreateThriftProtocol(Allocator &allocator, FileHandle &file_handle, bool prefetch_mode) {
53
53
  auto transport = make_shared<ThriftFileTransport>(allocator, file_handle, prefetch_mode);
54
54
  return make_uniq<duckdb_apache::thrift::protocol::TCompactProtocolT<ThriftFileTransport>>(std::move(transport));
@@ -76,7 +76,7 @@ static shared_ptr<ParquetFileMetadataCache> LoadMetadata(Allocator &allocator, F
76
76
  }
77
77
  // read four-byte footer length from just before the end magic bytes
78
78
  auto footer_len = *reinterpret_cast<uint32_t *>(buf.ptr);
79
- if (footer_len <= 0 || file_size < 12 + footer_len) {
79
+ if (footer_len == 0 || file_size < 12 + footer_len) {
80
80
  throw InvalidInputException("Footer length error in file '%s'", file_handle.path);
81
81
  }
82
82
  auto metadata_pos = file_size - (footer_len + 8);
@@ -271,7 +271,7 @@ unique_ptr<ColumnReader> ParquetReader::CreateReaderRecursive(idx_t depth, idx_t
271
271
  }
272
272
  if (s_ele.__isset.num_children && s_ele.num_children > 0) { // inner node
273
273
  child_list_t<LogicalType> child_types;
274
- vector<duckdb::unique_ptr<ColumnReader>> child_readers;
274
+ vector<unique_ptr<ColumnReader>> child_readers;
275
275
 
276
276
  idx_t c_idx = 0;
277
277
  while (c_idx < (idx_t)s_ele.num_children) {
@@ -287,7 +287,7 @@ unique_ptr<ColumnReader> ParquetReader::CreateReaderRecursive(idx_t depth, idx_t
287
287
  c_idx++;
288
288
  }
289
289
  D_ASSERT(!child_types.empty());
290
- duckdb::unique_ptr<ColumnReader> result;
290
+ unique_ptr<ColumnReader> result;
291
291
  LogicalType result_type;
292
292
 
293
293
  bool is_repeated = repetition_type == FieldRepetitionType::REPEATED;
@@ -429,7 +429,7 @@ ParquetOptions::ParquetOptions(ClientContext &context) {
429
429
 
430
430
  ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, ParquetOptions parquet_options_p)
431
431
  : fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)),
432
- parquet_options(parquet_options_p) {
432
+ parquet_options(std::move(parquet_options_p)) {
433
433
  file_name = std::move(file_name_p);
434
434
  file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ);
435
435
  if (!file_handle->CanSeek()) {
@@ -457,7 +457,7 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
457
457
  ParquetReader::ParquetReader(ClientContext &context_p, ParquetOptions parquet_options_p,
458
458
  shared_ptr<ParquetFileMetadataCache> metadata_p)
459
459
  : fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)),
460
- metadata(std::move(metadata_p)), parquet_options(parquet_options_p) {
460
+ metadata(std::move(metadata_p)), parquet_options(std::move(parquet_options_p)) {
461
461
  InitializeSchema();
462
462
  }
463
463
 
@@ -1,12 +1,13 @@
1
1
  #include "parquet_statistics.hpp"
2
+
3
+ #include "duckdb.hpp"
2
4
  #include "parquet_decimal_utils.hpp"
3
5
  #include "parquet_timestamp.hpp"
4
6
  #include "string_column_reader.hpp"
5
- #include "duckdb.hpp"
6
7
  #ifndef DUCKDB_AMALGAMATION
7
8
  #include "duckdb/common/types/blob.hpp"
8
- #include "duckdb/common/types/value.hpp"
9
9
  #include "duckdb/common/types/time.hpp"
10
+ #include "duckdb/common/types/value.hpp"
10
11
  #endif
11
12
 
12
13
  namespace duckdb {
@@ -14,9 +15,9 @@ namespace duckdb {
14
15
  using duckdb_parquet::format::ConvertedType;
15
16
  using duckdb_parquet::format::Type;
16
17
 
17
- static duckdb::unique_ptr<BaseStatistics> CreateNumericStats(const LogicalType &type,
18
- const duckdb_parquet::format::SchemaElement &schema_ele,
19
- const duckdb_parquet::format::Statistics &parquet_stats) {
18
+ static unique_ptr<BaseStatistics> CreateNumericStats(const LogicalType &type,
19
+ const duckdb_parquet::format::SchemaElement &schema_ele,
20
+ const duckdb_parquet::format::Statistics &parquet_stats) {
20
21
  auto stats = NumericStats::CreateUnknown(type);
21
22
 
22
23
  // for reasons unknown to science, Parquet defines *both* `min` and `min_value` as well as `max` and
@@ -226,7 +227,7 @@ unique_ptr<BaseStatistics> ParquetStatisticsUtils::TransformColumnStatistics(con
226
227
  return nullptr;
227
228
  }
228
229
  auto &parquet_stats = column_chunk.meta_data.statistics;
229
- duckdb::unique_ptr<BaseStatistics> row_group_stats;
230
+ unique_ptr<BaseStatistics> row_group_stats;
230
231
 
231
232
  switch (type.id()) {
232
233
  case LogicalTypeId::UTINYINT:
@@ -29,6 +29,30 @@ using duckdb_parquet::format::PageType;
29
29
  using ParquetRowGroup = duckdb_parquet::format::RowGroup;
30
30
  using duckdb_parquet::format::Type;
31
31
 
32
+ ChildFieldIDs::ChildFieldIDs() {
33
+ ids = make_uniq<case_insensitive_map_t<FieldID>>();
34
+ }
35
+
36
+ ChildFieldIDs ChildFieldIDs::Copy() const {
37
+ ChildFieldIDs result;
38
+ for (const auto &id : *ids) {
39
+ result.ids->emplace(id.first, id.second.Copy());
40
+ }
41
+ return result;
42
+ }
43
+
44
+ FieldID::FieldID() : set(false) {
45
+ }
46
+
47
+ FieldID::FieldID(int32_t field_id_p) : set(true), field_id(field_id_p) {
48
+ }
49
+
50
+ FieldID FieldID::Copy() const {
51
+ auto result = set ? FieldID(field_id) : FieldID();
52
+ result.child_field_ids = child_field_ids.Copy();
53
+ return result;
54
+ }
55
+
32
56
  class MyTransport : public TTransport {
33
57
  public:
34
58
  explicit MyTransport(Serializer &serializer) : serializer(serializer) {
@@ -226,8 +250,9 @@ void VerifyUniqueNames(const vector<string> &names) {
226
250
  }
227
251
 
228
252
  ParquetWriter::ParquetWriter(FileSystem &fs, string file_name_p, vector<LogicalType> types_p, vector<string> names_p,
229
- CompressionCodec::type codec)
230
- : file_name(std::move(file_name_p)), sql_types(std::move(types_p)), column_names(std::move(names_p)), codec(codec) {
253
+ CompressionCodec::type codec, ChildFieldIDs field_ids_p)
254
+ : file_name(std::move(file_name_p)), sql_types(std::move(types_p)), column_names(std::move(names_p)), codec(codec),
255
+ field_ids(std::move(field_ids_p)) {
231
256
  // initialize the file writer
232
257
  writer = make_uniq<BufferedFileWriter>(fs, file_name.c_str(),
233
258
  FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW);
@@ -257,11 +282,18 @@ ParquetWriter::ParquetWriter(FileSystem &fs, string file_name_p, vector<LogicalT
257
282
  vector<string> schema_path;
258
283
  for (idx_t i = 0; i < sql_types.size(); i++) {
259
284
  column_writers.push_back(ColumnWriter::CreateWriterRecursive(file_meta_data.schema, *this, sql_types[i],
260
- unique_names[i], schema_path));
285
+ unique_names[i], schema_path, &field_ids));
261
286
  }
262
287
  }
263
288
 
264
289
  void ParquetWriter::PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result) {
290
+ // We write 8 columns at a time so that iterating over ColumnDataCollection is more efficient
291
+ static constexpr idx_t COLUMNS_PER_PASS = 8;
292
+
293
+ // We want these to be in-memory/hybrid so we don't have to copy over strings to the dictionary
294
+ D_ASSERT(buffer.GetAllocatorType() == ColumnDataAllocatorType::IN_MEMORY_ALLOCATOR ||
295
+ buffer.GetAllocatorType() == ColumnDataAllocatorType::HYBRID);
296
+
265
297
  // set up a new row group for this chunk collection
266
298
  auto &row_group = result.row_group;
267
299
  row_group.num_rows = buffer.Count();
@@ -270,24 +302,52 @@ void ParquetWriter::PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGro
270
302
  auto &states = result.states;
271
303
  // iterate over each of the columns of the chunk collection and write them
272
304
  D_ASSERT(buffer.ColumnCount() == column_writers.size());
273
- for (idx_t col_idx = 0; col_idx < buffer.ColumnCount(); col_idx++) {
274
- const auto &col_writer = column_writers[col_idx];
275
- auto write_state = col_writer->InitializeWriteState(row_group, buffer.GetAllocator());
276
- if (col_writer->HasAnalyze()) {
277
- for (auto &chunk : buffer.Chunks()) {
278
- col_writer->Analyze(*write_state, nullptr, chunk.data[col_idx], chunk.size());
305
+ for (idx_t col_idx = 0; col_idx < buffer.ColumnCount(); col_idx += COLUMNS_PER_PASS) {
306
+ const auto next = MinValue<idx_t>(buffer.ColumnCount() - col_idx, COLUMNS_PER_PASS);
307
+ vector<column_t> column_ids;
308
+ vector<reference<ColumnWriter>> col_writers;
309
+ vector<unique_ptr<ColumnWriterState>> write_states;
310
+ for (idx_t i = 0; i < next; i++) {
311
+ column_ids.emplace_back(col_idx + i);
312
+ col_writers.emplace_back(*column_writers[column_ids.back()]);
313
+ write_states.emplace_back(col_writers.back().get().InitializeWriteState(row_group));
314
+ }
315
+
316
+ for (auto &chunk : buffer.Chunks({column_ids})) {
317
+ for (idx_t i = 0; i < next; i++) {
318
+ if (col_writers[i].get().HasAnalyze()) {
319
+ col_writers[i].get().Analyze(*write_states[i], nullptr, chunk.data[i], chunk.size());
320
+ }
321
+ }
322
+ }
323
+
324
+ for (idx_t i = 0; i < next; i++) {
325
+ if (col_writers[i].get().HasAnalyze()) {
326
+ col_writers[i].get().FinalizeAnalyze(*write_states[i]);
327
+ }
328
+ }
329
+
330
+ for (auto &chunk : buffer.Chunks({column_ids})) {
331
+ for (idx_t i = 0; i < next; i++) {
332
+ col_writers[i].get().Prepare(*write_states[i], nullptr, chunk.data[i], chunk.size());
279
333
  }
280
- col_writer->FinalizeAnalyze(*write_state);
281
334
  }
282
- for (auto &chunk : buffer.Chunks()) {
283
- col_writer->Prepare(*write_state, nullptr, chunk.data[col_idx], chunk.size());
335
+
336
+ for (idx_t i = 0; i < next; i++) {
337
+ col_writers[i].get().BeginWrite(*write_states[i]);
284
338
  }
285
- col_writer->BeginWrite(*write_state);
286
- for (auto &chunk : buffer.Chunks()) {
287
- col_writer->Write(*write_state, chunk.data[col_idx], chunk.size());
339
+
340
+ for (auto &chunk : buffer.Chunks({column_ids})) {
341
+ for (idx_t i = 0; i < next; i++) {
342
+ col_writers[i].get().Write(*write_states[i], chunk.data[i], chunk.size());
343
+ }
344
+ }
345
+
346
+ for (auto &write_state : write_states) {
347
+ states.push_back(std::move(write_state));
288
348
  }
289
- states.push_back(std::move(write_state));
290
349
  }
350
+ result.heaps = buffer.GetHeapReferences();
291
351
  }
292
352
 
293
353
  void ParquetWriter::FlushRowGroup(PreparedRowGroup &prepared) {
@@ -307,6 +367,8 @@ void ParquetWriter::FlushRowGroup(PreparedRowGroup &prepared) {
307
367
  // append the row group to the file meta data
308
368
  file_meta_data.row_groups.push_back(row_group);
309
369
  file_meta_data.num_rows += row_group.num_rows;
370
+
371
+ prepared.heaps.clear();
310
372
  }
311
373
 
312
374
  void ParquetWriter::Flush(ColumnDataCollection &buffer) {
@@ -316,6 +378,7 @@ void ParquetWriter::Flush(ColumnDataCollection &buffer) {
316
378
 
317
379
  PreparedRowGroup prepared_row_group;
318
380
  PrepareRowGroup(buffer, prepared_row_group);
381
+ buffer.Reset();
319
382
 
320
383
  FlushRowGroup(prepared_row_group);
321
384
  }
@@ -155,7 +155,7 @@ void ZstdStreamWrapper::Close() {
155
155
 
156
156
  class ZStdFile : public CompressedFile {
157
157
  public:
158
- ZStdFile(duckdb::unique_ptr<FileHandle> child_handle_p, const string &path, bool write)
158
+ ZStdFile(unique_ptr<FileHandle> child_handle_p, const string &path, bool write)
159
159
  : CompressedFile(zstd_fs, std::move(child_handle_p), path) {
160
160
  Initialize(write);
161
161
  }
@@ -163,7 +163,7 @@ public:
163
163
  ZStdFileSystem zstd_fs;
164
164
  };
165
165
 
166
- unique_ptr<FileHandle> ZStdFileSystem::OpenCompressedFile(duckdb::unique_ptr<FileHandle> handle, bool write) {
166
+ unique_ptr<FileHandle> ZStdFileSystem::OpenCompressedFile(unique_ptr<FileHandle> handle, bool write) {
167
167
  auto path = handle->path;
168
168
  return make_uniq<ZStdFile>(std::move(handle), path, write);
169
169
  }
@@ -298,7 +298,7 @@ unique_ptr<CatalogEntry> DuckTableEntry::AddColumn(ClientContext &context, AddCo
298
298
  auto binder = Binder::CreateBinder(context);
299
299
  auto bound_create_info = binder->BindCreateTableInfo(std::move(create_info));
300
300
  auto new_storage =
301
- make_shared<DataTable>(context, *storage, info.new_column, bound_create_info->bound_defaults.back().get());
301
+ make_shared<DataTable>(context, *storage, info.new_column, *bound_create_info->bound_defaults.back());
302
302
  return make_uniq<DuckTableEntry>(catalog, schema, *bound_create_info, new_storage);
303
303
  }
304
304
 
@@ -1,10 +1,10 @@
1
1
  #include "duckdb/catalog/catalog_search_path.hpp"
2
2
 
3
+ #include "duckdb/catalog/catalog.hpp"
3
4
  #include "duckdb/common/constants.hpp"
4
5
  #include "duckdb/common/exception.hpp"
5
6
  #include "duckdb/common/string_util.hpp"
6
7
  #include "duckdb/main/client_context.hpp"
7
- #include "duckdb/catalog/catalog.hpp"
8
8
  #include "duckdb/main/database_manager.hpp"
9
9
 
10
10
  namespace duckdb {
@@ -249,13 +249,14 @@ void CatalogSearchPath::SetPaths(vector<CatalogSearchEntry> new_paths) {
249
249
  bool CatalogSearchPath::SchemaInSearchPath(ClientContext &context, const string &catalog_name,
250
250
  const string &schema_name) {
251
251
  for (auto &path : paths) {
252
- if (path.schema != schema_name) {
252
+ if (!StringUtil::CIEquals(path.schema, schema_name)) {
253
253
  continue;
254
254
  }
255
- if (path.catalog == catalog_name) {
255
+ if (StringUtil::CIEquals(path.catalog, catalog_name)) {
256
256
  return true;
257
257
  }
258
- if (IsInvalidCatalog(path.catalog) && catalog_name == DatabaseManager::GetDefaultDatabase(context)) {
258
+ if (IsInvalidCatalog(path.catalog) &&
259
+ StringUtil::CIEquals(catalog_name, DatabaseManager::GetDefaultDatabase(context))) {
259
260
  return true;
260
261
  }
261
262
  }
@@ -99,6 +99,19 @@ static DefaultMacro internal_macros[] = {
99
99
  {DEFAULT_SCHEMA, "count_if", {"l", nullptr}, "sum(if(l, 1, 0))"},
100
100
  {DEFAULT_SCHEMA, "split_part", {"string", "delimiter", "position", nullptr}, "coalesce(string_split(string, delimiter)[position],'')"},
101
101
 
102
+ // FIXME implement as actual function if we encounter a lot of performance issues. Complexity now: n * m, with hashing possibly n + m
103
+ {DEFAULT_SCHEMA, "list_intersect", {"l1", "l2", nullptr}, "list_filter(l1, (x) -> list_contains(l2, x))"},
104
+ {DEFAULT_SCHEMA, "array_intersect", {"l1", "l2", nullptr}, "list_intersect(l1, l2)"},
105
+
106
+ {DEFAULT_SCHEMA, "list_has_any", {"l1", "l2", nullptr}, "CASE WHEN l1 IS NULL THEN NULL WHEN l2 IS NULL THEN NULL WHEN len(list_intersect(l1, l2)) > 0 THEN true ELSE false END"},
107
+ {DEFAULT_SCHEMA, "array_has_any", {"l1", "l2", nullptr}, "list_has_any(l1, l2)" },
108
+ {DEFAULT_SCHEMA, "&&", {"l1", "l2", nullptr}, "list_has_any(l1, l2)" }, // "&&" is the operator for "list_has_any
109
+
110
+ {DEFAULT_SCHEMA, "list_has_all", {"l1", "l2", nullptr}, "CASE WHEN l1 IS NULL THEN NULL WHEN l2 IS NULL THEN NULL WHEN len(list_intersect(l2, l1)) = len(list_filter(l2, x -> x IS NOT NULL)) THEN true ELSE false END"},
111
+ {DEFAULT_SCHEMA, "array_has_all", {"l1", "l2", nullptr}, "list_has_all(l1, l2)" },
112
+ {DEFAULT_SCHEMA, "@>", {"l1", "l2", nullptr}, "list_has_all(l1, l2)" }, // "@>" is the operator for "list_has_all
113
+ {DEFAULT_SCHEMA, "<@", {"l1", "l2", nullptr}, "list_has_all(l2, l1)" }, // "<@" is the operator for "list_has_all
114
+
102
115
  // algebraic list aggregates
103
116
  {DEFAULT_SCHEMA, "list_avg", {"l", nullptr}, "list_aggr(l, 'avg')"},
104
117
  {DEFAULT_SCHEMA, "list_var_samp", {"l", nullptr}, "list_aggr(l, 'var_samp')"},
@@ -205,6 +218,9 @@ unique_ptr<CatalogEntry> DefaultFunctionGenerator::CreateDefaultEntry(ClientCont
205
218
  vector<string> DefaultFunctionGenerator::GetDefaultEntries() {
206
219
  vector<string> result;
207
220
  for (idx_t index = 0; internal_macros[index].name != nullptr; index++) {
221
+ if (StringUtil::Lower(internal_macros[index].name) != internal_macros[index].name) {
222
+ throw InternalException("Default macro name %s should be lowercase", internal_macros[index].name);
223
+ }
208
224
  if (internal_macros[index].schema == schema.name) {
209
225
  result.emplace_back(internal_macros[index].name);
210
226
  }