duckdb 0.8.2-dev145.0 → 0.8.2-dev1493.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (476) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  24. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  26. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  27. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  28. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  29. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  30. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  31. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  32. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  33. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  34. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  36. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  38. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  39. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  40. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  41. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  42. package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
  43. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  44. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  45. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  46. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  47. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  49. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  50. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  51. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  52. package/src/duckdb/src/common/allocator.cpp +14 -2
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  54. package/src/duckdb/src/common/assert.cpp +3 -0
  55. package/src/duckdb/src/common/enum_util.cpp +42 -5
  56. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  57. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  58. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/exception.cpp +2 -2
  60. package/src/duckdb/src/common/file_system.cpp +19 -0
  61. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  62. package/src/duckdb/src/common/local_file_system.cpp +2 -2
  63. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  64. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  65. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  66. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  67. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  68. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  69. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  70. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  71. package/src/duckdb/src/common/types/bit.cpp +51 -0
  72. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  73. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  74. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  75. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  76. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  77. package/src/duckdb/src/common/types/date.cpp +9 -0
  78. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  79. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  80. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  81. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  82. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  83. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  84. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  85. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  86. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  87. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  88. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  92. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  93. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  94. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  95. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  96. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  97. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  98. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  99. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  100. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  102. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  103. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  104. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  105. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  106. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  107. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  108. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  109. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  110. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  111. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  112. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  113. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  114. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  115. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  116. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
  117. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  118. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  119. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
  121. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -2
  122. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  123. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  124. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  125. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  126. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  127. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  128. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  129. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  130. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  131. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  132. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  133. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  134. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  135. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  136. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  137. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  138. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  139. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  140. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  141. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  142. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  143. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  144. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  145. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  146. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  147. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  148. package/src/duckdb/src/function/function.cpp +3 -1
  149. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  150. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  151. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  152. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  153. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  154. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  155. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  156. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  157. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  158. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  159. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  160. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  161. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  162. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  163. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  164. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  165. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  166. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  167. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  168. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  169. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  170. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  171. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  172. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  173. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  174. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  175. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  176. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  177. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  178. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  179. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  180. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  181. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  182. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  183. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  184. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  185. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  186. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  187. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  188. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  189. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  190. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  191. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  192. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  193. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  194. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  195. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  196. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  197. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  198. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  199. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  200. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  201. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  202. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  203. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  204. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  205. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  206. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  207. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  208. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  209. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  210. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  211. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  212. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  213. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  214. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  215. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  218. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  219. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  220. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  221. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  222. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  223. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  224. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  225. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  226. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  227. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  228. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  229. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  230. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  231. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  232. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  233. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  234. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  235. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  236. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  237. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  238. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  239. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  240. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  241. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  242. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  243. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  244. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  245. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  246. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  247. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  248. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  249. package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
  250. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  251. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  252. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  253. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  254. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  255. package/src/duckdb/src/include/duckdb/main/settings.hpp +30 -1
  256. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  257. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  258. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  259. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  260. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  261. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  262. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  263. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  264. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  265. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  266. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  267. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  268. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  269. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  270. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  271. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  272. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  273. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  274. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  275. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  276. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  277. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  278. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  279. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  280. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  281. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  282. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  283. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  284. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  285. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  286. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  287. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  288. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  289. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  291. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  292. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
  293. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  294. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  295. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  296. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  297. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  298. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  299. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  300. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  301. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  302. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  303. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  304. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  305. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  306. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  307. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  308. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  309. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  310. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  311. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  312. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  313. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  314. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  315. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  316. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  317. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  318. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  319. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  320. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  321. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  322. package/src/duckdb/src/include/duckdb.h +28 -0
  323. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  324. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  325. package/src/duckdb/src/main/config.cpp +3 -0
  326. package/src/duckdb/src/main/database.cpp +1 -1
  327. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  328. package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
  329. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  330. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  331. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  332. package/src/duckdb/src/main/relation.cpp +6 -5
  333. package/src/duckdb/src/main/settings/settings.cpp +64 -18
  334. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  335. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  336. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  337. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  338. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  339. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  340. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  341. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  342. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  343. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  344. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  345. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  346. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  347. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  348. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  349. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  350. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  351. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  352. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  353. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  354. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  355. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  356. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  357. package/src/duckdb/src/parallel/executor.cpp +15 -0
  358. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  359. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  360. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  361. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  362. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  363. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  364. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  365. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  366. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  367. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  368. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  369. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  370. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  371. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  372. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  373. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  374. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  375. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  376. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  377. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  378. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  379. package/src/duckdb/src/parser/parser.cpp +8 -2
  380. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  381. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  382. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  383. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  384. package/src/duckdb/src/parser/query_node.cpp +15 -37
  385. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  386. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  387. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  388. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  389. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  390. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  391. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  392. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  393. package/src/duckdb/src/parser/tableref.cpp +0 -44
  394. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  395. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  396. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  397. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  398. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  399. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  400. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  401. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  402. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  403. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  404. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  405. package/src/duckdb/src/parser/transformer.cpp +15 -0
  406. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  407. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  408. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  409. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  410. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  411. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  412. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  413. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  414. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  415. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  416. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  417. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  418. package/src/duckdb/src/planner/binder.cpp +5 -0
  419. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  420. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  421. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  422. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  423. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  424. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  425. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  426. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  427. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  428. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  429. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  430. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  431. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  432. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  433. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  434. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  435. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  436. package/src/duckdb/src/storage/data_table.cpp +1 -1
  437. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  438. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  439. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  440. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  441. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  442. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  443. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  444. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  445. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  446. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  447. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  448. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  449. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  450. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  451. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  452. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  453. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  454. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  455. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  456. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  457. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  458. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  459. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  460. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  461. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  462. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  463. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  464. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  465. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  466. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  467. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  468. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  469. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  470. package/src/duckdb/ub_src_storage_serialization.cpp +8 -0
  471. package/src/statement.cpp +10 -3
  472. package/test/test_all_types.test.ts +233 -0
  473. package/tsconfig.json +1 -0
  474. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  475. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  476. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -23,6 +23,14 @@
23
23
  #include <sys/stat.h>
24
24
  #include <sys/types.h>
25
25
  #include <unistd.h>
26
+
27
+ #ifdef __MVS__
28
+ #define _XOPEN_SOURCE_EXTENDED 1
29
+ #include <sys/resource.h>
30
+ // enjoy - https://reviews.llvm.org/D92110
31
+ #define PATH_MAX _XOPEN_PATH_MAX
32
+ #endif
33
+
26
34
  #else
27
35
  #include <string>
28
36
  #include <sysinfoapi.h>
@@ -79,7 +87,14 @@ void FileSystem::SetWorkingDirectory(const string &path) {
79
87
 
80
88
  idx_t FileSystem::GetAvailableMemory() {
81
89
  errno = 0;
90
+
91
+ #ifdef __MVS__
92
+ struct rlimit limit;
93
+ int rlim_rc = getrlimit(RLIMIT_AS, &limit);
94
+ idx_t max_memory = MinValue<idx_t>(limit.rlim_max, UINTPTR_MAX);
95
+ #else
82
96
  idx_t max_memory = MinValue<idx_t>((idx_t)sysconf(_SC_PHYS_PAGES) * (idx_t)sysconf(_SC_PAGESIZE), UINTPTR_MAX);
97
+ #endif
83
98
  if (errno != 0) {
84
99
  return DConstants::INVALID_INDEX;
85
100
  }
@@ -370,6 +385,10 @@ void FileSystem::UnregisterSubSystem(const string &name) {
370
385
  throw NotImplementedException("%s: Can't unregister a sub system on a non-virtual file system", GetName());
371
386
  }
372
387
 
388
+ void FileSystem::SetDisabledFileSystems(const vector<string> &names) {
389
+ throw NotImplementedException("%s: Can't disable file systems on a non-virtual file system", GetName());
390
+ }
391
+
373
392
  vector<string> FileSystem::ListSubSystems() {
374
393
  throw NotImplementedException("%s: Can't list sub systems on a non-virtual file system", GetName());
375
394
  }
@@ -6,6 +6,7 @@
6
6
  #include "duckdb/planner/expression/bound_constant_expression.hpp"
7
7
  #include "duckdb/planner/expression/bound_reference_expression.hpp"
8
8
  #include "duckdb/planner/expression_iterator.hpp"
9
+ #include "duckdb/planner/operator/logical_get.hpp"
9
10
  #include "duckdb/planner/table_filter.hpp"
10
11
  #include "re2/re2.h"
11
12
 
@@ -86,12 +87,15 @@ std::map<string, string> HivePartitioning::Parse(const string &filename) {
86
87
  // currently, only expressions that cannot be evaluated during pushdown are removed.
87
88
  void HivePartitioning::ApplyFiltersToFileList(ClientContext &context, vector<string> &files,
88
89
  vector<unique_ptr<Expression>> &filters,
89
- unordered_map<string, column_t> &column_map, idx_t table_index,
90
+ unordered_map<string, column_t> &column_map, LogicalGet &get,
90
91
  bool hive_enabled, bool filename_enabled) {
92
+
91
93
  vector<string> pruned_files;
92
94
  vector<bool> have_preserved_filter(filters.size(), false);
93
95
  vector<unique_ptr<Expression>> pruned_filters;
96
+ unordered_set<idx_t> filters_applied_to_files;
94
97
  duckdb_re2::RE2 regex(REGEX_STRING);
98
+ auto table_index = get.table_index;
95
99
 
96
100
  if ((!filename_enabled && !hive_enabled) || filters.empty()) {
97
101
  return;
@@ -121,11 +125,11 @@ void HivePartitioning::ApplyFiltersToFileList(ClientContext &context, vector<str
121
125
  } else if (!result_value.GetValue<bool>()) {
122
126
  // filter evaluates to false
123
127
  should_prune_file = true;
124
- }
125
-
126
- // Use filter combiner to determine that this filter makes
127
- if (!should_prune_file && combiner.AddFilter(std::move(filter_copy)) == FilterResult::UNSATISFIABLE) {
128
- should_prune_file = true;
128
+ // convert the filter to a table filter.
129
+ if (filters_applied_to_files.find(j) == filters_applied_to_files.end()) {
130
+ get.extra_info.file_filters += filter->ToString();
131
+ filters_applied_to_files.insert(j);
132
+ }
129
133
  }
130
134
  }
131
135
 
@@ -375,7 +375,7 @@ int RemoveDirectoryRecursive(const char *path) {
375
375
  continue;
376
376
  }
377
377
  len = path_len + (idx_t)strlen(p->d_name) + 2;
378
- buf = new char[len];
378
+ buf = new (std::nothrow) char[len];
379
379
  if (buf) {
380
380
  struct stat statbuf;
381
381
  snprintf(buf, len, "%s/%s", path, p->d_name);
@@ -700,7 +700,7 @@ void LocalFileSystem::CreateDirectory(const string &directory) {
700
700
  }
701
701
  auto unicode_path = WindowsUtil::UTF8ToUnicode(directory.c_str());
702
702
  if (directory.empty() || !CreateDirectoryW(unicode_path.c_str(), NULL) || !DirectoryExists(directory)) {
703
- throw IOException("Could not create directory!");
703
+ throw IOException("Could not create directory: \'%s\'", directory.c_str());
704
704
  }
705
705
  }
706
706
 
@@ -6,6 +6,7 @@
6
6
  #include "duckdb/common/exception.hpp"
7
7
  #include "duckdb/function/function_set.hpp"
8
8
  #include "duckdb/common/hive_partitioning.hpp"
9
+ #include "duckdb/common/types.hpp"
9
10
 
10
11
  namespace duckdb {
11
12
 
@@ -13,6 +14,8 @@ void MultiFileReader::AddParameters(TableFunction &table_function) {
13
14
  table_function.named_parameters["filename"] = LogicalType::BOOLEAN;
14
15
  table_function.named_parameters["hive_partitioning"] = LogicalType::BOOLEAN;
15
16
  table_function.named_parameters["union_by_name"] = LogicalType::BOOLEAN;
17
+ table_function.named_parameters["hive_types"] = LogicalType::ANY;
18
+ table_function.named_parameters["hive_types_autocast"] = LogicalType::BOOLEAN;
16
19
  }
17
20
 
18
21
  vector<string> MultiFileReader::GetFileList(ClientContext &context, const Value &input, const string &name,
@@ -49,7 +52,8 @@ vector<string> MultiFileReader::GetFileList(ClientContext &context, const Value
49
52
  return files;
50
53
  }
51
54
 
52
- bool MultiFileReader::ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options) {
55
+ bool MultiFileReader::ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options,
56
+ ClientContext &context) {
53
57
  auto loption = StringUtil::Lower(key);
54
58
  if (loption == "filename") {
55
59
  options.filename = BooleanValue::Get(val);
@@ -58,6 +62,28 @@ bool MultiFileReader::ParseOption(const string &key, const Value &val, MultiFile
58
62
  options.auto_detect_hive_partitioning = false;
59
63
  } else if (loption == "union_by_name") {
60
64
  options.union_by_name = BooleanValue::Get(val);
65
+ } else if (loption == "hive_types_autocast" || loption == "hive_type_autocast") {
66
+ options.hive_types_autocast = BooleanValue::Get(val);
67
+ } else if (loption == "hive_types" || loption == "hive_type") {
68
+ if (val.type().id() != LogicalTypeId::STRUCT) {
69
+ throw InvalidInputException(
70
+ "'hive_types' only accepts a STRUCT('name':VARCHAR, ...), but '%s' was provided",
71
+ val.type().ToString());
72
+ }
73
+ // verify that that all the children of the struct value are VARCHAR
74
+ auto &children = StructValue::GetChildren(val);
75
+ for (idx_t i = 0; i < children.size(); i++) {
76
+ const Value &child = children[i];
77
+ if (child.type().id() != LogicalType::VARCHAR) {
78
+ throw InvalidInputException("hive_types: '%s' must be a VARCHAR, instead: '%s' was provided",
79
+ StructType::GetChildName(val.type(), i), child.type().ToString());
80
+ }
81
+ // for every child of the struct, get the logical type
82
+ LogicalType transformed_type = TransformStringToLogicalType(child.ToString(), context);
83
+ const string &name = StructType::GetChildName(val.type(), i);
84
+ options.hive_types_schema[name] = transformed_type;
85
+ }
86
+ D_ASSERT(!options.hive_types_schema.empty());
61
87
  } else {
62
88
  return false;
63
89
  }
@@ -80,8 +106,9 @@ bool MultiFileReader::ComplexFilterPushdown(ClientContext &context, vector<strin
80
106
  }
81
107
 
82
108
  auto start_files = files.size();
83
- HivePartitioning::ApplyFiltersToFileList(context, files, filters, column_map, get.table_index,
84
- options.hive_partitioning, options.filename);
109
+ HivePartitioning::ApplyFiltersToFileList(context, files, filters, column_map, get, options.hive_partitioning,
110
+ options.filename);
111
+
85
112
  if (files.size() != start_files) {
86
113
  // we have pruned files
87
114
  return true;
@@ -111,28 +138,28 @@ MultiFileReaderBindData MultiFileReader::BindOptions(MultiFileReaderOptions &opt
111
138
  auto file_partitions = HivePartitioning::Parse(f);
112
139
  for (auto &part_info : partitions) {
113
140
  if (file_partitions.find(part_info.first) == file_partitions.end()) {
141
+ string error = "Hive partition mismatch between file \"%s\" and \"%s\": key \"%s\" not found";
114
142
  if (options.auto_detect_hive_partitioning == true) {
115
- throw BinderException(
116
- "Hive partitioning was enabled automatically, but an error was encountered: Hive partition "
117
- "mismatch between file \"%s\" and \"%s\": key \"%s\" not found\n\nTo switch off hive "
118
- "partition, set: HIVE_PARTITIONING=0",
119
- files[0], f, part_info.first);
143
+ throw InternalException(error + "(hive partitioning was autodetected)", files[0], f,
144
+ part_info.first);
120
145
  }
121
- throw BinderException(
122
- "Hive partition mismatch between file \"%s\" and \"%s\": key \"%s\" not found", files[0], f,
123
- part_info.first);
146
+ throw BinderException(error.c_str(), files[0], f, part_info.first);
124
147
  }
125
148
  }
126
149
  if (partitions.size() != file_partitions.size()) {
150
+ string error_msg = "Hive partition mismatch between file \"%s\" and \"%s\"";
127
151
  if (options.auto_detect_hive_partitioning == true) {
128
- throw BinderException("Hive partitioning was enabled automatically, but an error was encountered: "
129
- "Hive partition mismatch between file \"%s\" and \"%s\"\n\nTo switch off "
130
- "hive partition, set: HIVE_PARTITIONING=0",
131
- files[0], f);
152
+ throw InternalException(error_msg + "(hive partitioning was autodetected)", files[0], f);
132
153
  }
133
- throw BinderException("Hive partition mismatch between file \"%s\" and \"%s\"", files[0], f);
154
+ throw BinderException(error_msg.c_str(), files[0], f);
134
155
  }
135
156
  }
157
+
158
+ if (!options.hive_types_schema.empty()) {
159
+ // verify that all hive_types are existing partitions
160
+ options.VerifyHiveTypesArePartitions(partitions);
161
+ }
162
+
136
163
  for (auto &part : partitions) {
137
164
  idx_t hive_partitioning_index = DConstants::INVALID_INDEX;
138
165
  auto lookup = std::find(names.begin(), names.end(), part.first);
@@ -140,11 +167,11 @@ MultiFileReaderBindData MultiFileReader::BindOptions(MultiFileReaderOptions &opt
140
167
  // hive partitioning column also exists in file - override
141
168
  auto idx = lookup - names.begin();
142
169
  hive_partitioning_index = idx;
143
- return_types[idx] = LogicalType::VARCHAR;
170
+ return_types[idx] = options.GetHiveLogicalType(part.first);
144
171
  } else {
145
172
  // hive partitioning column does not exist in file - add a new column containing the key
146
173
  hive_partitioning_index = names.size();
147
- return_types.emplace_back(LogicalType::VARCHAR);
174
+ return_types.emplace_back(options.GetHiveLogicalType(part.first));
148
175
  names.emplace_back(part.first);
149
176
  }
150
177
  bind_data.hive_partitioning_indexes.emplace_back(part.first, hive_partitioning_index);
@@ -156,7 +183,9 @@ MultiFileReaderBindData MultiFileReader::BindOptions(MultiFileReaderOptions &opt
156
183
  void MultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options,
157
184
  const string &filename, const vector<string> &local_names,
158
185
  const vector<LogicalType> &global_types, const vector<string> &global_names,
159
- const vector<column_t> &global_column_ids, MultiFileReaderData &reader_data) {
186
+ const vector<column_t> &global_column_ids, MultiFileReaderData &reader_data,
187
+ ClientContext &context) {
188
+
160
189
  // create a map of name -> column index
161
190
  case_insensitive_map_t<idx_t> name_map;
162
191
  if (file_options.union_by_name) {
@@ -183,7 +212,8 @@ void MultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, c
183
212
  bool found_partition = false;
184
213
  for (auto &entry : options.hive_partitioning_indexes) {
185
214
  if (column_id == entry.index) {
186
- reader_data.constant_map.emplace_back(i, Value(partitions[entry.value]));
215
+ Value value = file_options.GetHivePartitionValue(partitions[entry.value], entry.value, context);
216
+ reader_data.constant_map.emplace_back(i, value);
187
217
  found_partition = true;
188
218
  break;
189
219
  }
@@ -313,7 +343,16 @@ void MultiFileReaderOptions::Serialize(Serializer &serializer) const {
313
343
  FieldWriter writer(serializer);
314
344
  writer.WriteField<bool>(filename);
315
345
  writer.WriteField<bool>(hive_partitioning);
346
+ writer.WriteField<bool>(auto_detect_hive_partitioning);
316
347
  writer.WriteField<bool>(union_by_name);
348
+ writer.WriteField<bool>(hive_types_autocast);
349
+ // serialize hive_types_schema
350
+ const uint32_t schema_size = hive_types_schema.size();
351
+ writer.WriteField<uint32_t>(schema_size);
352
+ for (auto &hive_type : hive_types_schema) {
353
+ writer.WriteString(hive_type.first);
354
+ writer.WriteString(hive_type.second.ToString());
355
+ }
317
356
  writer.Finalize();
318
357
  }
319
358
 
@@ -322,7 +361,16 @@ MultiFileReaderOptions MultiFileReaderOptions::Deserialize(Deserializer &source)
322
361
  FieldReader reader(source);
323
362
  result.filename = reader.ReadRequired<bool>();
324
363
  result.hive_partitioning = reader.ReadRequired<bool>();
364
+ result.auto_detect_hive_partitioning = reader.ReadRequired<bool>();
325
365
  result.union_by_name = reader.ReadRequired<bool>();
366
+ result.hive_types_autocast = reader.ReadRequired<bool>();
367
+ // deserialize hive_types_schema
368
+ const uint32_t schema_size = reader.ReadRequired<uint32_t>();
369
+ for (idx_t i = 0; i < schema_size; i++) {
370
+ const string name = reader.ReadRequired<string>();
371
+ const LogicalType type = TransformStringToLogicalType(reader.ReadRequired<string>());
372
+ result.hive_types_schema[name] = type;
373
+ }
326
374
  reader.Finalize();
327
375
  return result;
328
376
  }
@@ -365,7 +413,9 @@ HivePartitioningIndex HivePartitioningIndex::Deserialize(Deserializer &source) {
365
413
  void MultiFileReaderOptions::AddBatchInfo(BindInfo &bind_info) const {
366
414
  bind_info.InsertOption("filename", Value::BOOLEAN(filename));
367
415
  bind_info.InsertOption("hive_partitioning", Value::BOOLEAN(hive_partitioning));
416
+ bind_info.InsertOption("auto_detect_hive_partitioning", Value::BOOLEAN(auto_detect_hive_partitioning));
368
417
  bind_info.InsertOption("union_by_name", Value::BOOLEAN(union_by_name));
418
+ bind_info.InsertOption("hive_types_autocast", Value::BOOLEAN(hive_types_autocast));
369
419
  }
370
420
 
371
421
  void UnionByName::CombineUnionTypes(const vector<string> &col_names, const vector<LogicalType> &sql_types,
@@ -390,4 +440,118 @@ void UnionByName::CombineUnionTypes(const vector<string> &col_names, const vecto
390
440
  }
391
441
  }
392
442
 
443
+ bool MultiFileReaderOptions::AutoDetectHivePartitioningInternal(const vector<string> &files) {
444
+ std::unordered_set<string> partitions;
445
+
446
+ auto splits_first_file = StringUtil::Split(files.front(), FileSystem::PathSeparator());
447
+ if (splits_first_file.size() < 2) {
448
+ return false;
449
+ }
450
+ for (auto it = splits_first_file.begin(); it != splits_first_file.end(); it++) {
451
+ auto partition = StringUtil::Split(*it, "=");
452
+ if (partition.size() == 2) {
453
+ partitions.insert(partition.front());
454
+ }
455
+ }
456
+ if (partitions.empty()) {
457
+ return false;
458
+ }
459
+ for (auto &file : files) {
460
+ auto splits = StringUtil::Split(file, FileSystem::PathSeparator());
461
+ if (splits.size() != splits_first_file.size()) {
462
+ return false;
463
+ }
464
+ for (auto it = splits.begin(); it != std::prev(splits.end()); it++) {
465
+ auto part = StringUtil::Split(*it, "=");
466
+ if (part.size() != 2) {
467
+ continue;
468
+ }
469
+ if (partitions.find(part.front()) == partitions.end()) {
470
+ return false;
471
+ }
472
+ }
473
+ }
474
+ return true;
475
+ }
476
+ void MultiFileReaderOptions::AutoDetectHiveTypesInternal(const string &file, ClientContext &context) {
477
+ std::map<string, string> partitions;
478
+ auto splits = StringUtil::Split(file, FileSystem::PathSeparator());
479
+ if (splits.size() < 2) {
480
+ return;
481
+ }
482
+ for (auto it = splits.begin(); it != std::prev(splits.end()); it++) {
483
+ auto part = StringUtil::Split(*it, "=");
484
+ if (part.size() == 2) {
485
+ partitions[part.front()] = part.back();
486
+ }
487
+ }
488
+ if (partitions.empty()) {
489
+ return;
490
+ }
491
+
492
+ const LogicalType candidates[] = {LogicalType::DATE, LogicalType::TIMESTAMP, LogicalType::BIGINT};
493
+ for (auto &part : partitions) {
494
+ const string &name = part.first;
495
+ if (hive_types_schema.find(name) != hive_types_schema.end()) {
496
+ continue;
497
+ }
498
+ Value value(part.second);
499
+ for (auto &candidate : candidates) {
500
+ const bool success = value.TryCastAs(context, candidate);
501
+ if (success) {
502
+ hive_types_schema[name] = candidate;
503
+ break;
504
+ }
505
+ }
506
+ }
507
+ }
508
+ void MultiFileReaderOptions::AutoDetectHivePartitioning(const vector<string> &files, ClientContext &context) {
509
+ D_ASSERT(!files.empty());
510
+ const bool hp_explicitly_disabled = !auto_detect_hive_partitioning && !hive_partitioning;
511
+ const bool ht_enabled = !hive_types_schema.empty();
512
+ if (hp_explicitly_disabled && ht_enabled) {
513
+ throw InvalidInputException("cannot disable hive_partitioning when hive_types is enabled");
514
+ }
515
+ if (ht_enabled && auto_detect_hive_partitioning && !hive_partitioning) {
516
+ // hive_types flag implies hive_partitioning
517
+ hive_partitioning = true;
518
+ auto_detect_hive_partitioning = false;
519
+ }
520
+ if (auto_detect_hive_partitioning) {
521
+ hive_partitioning = AutoDetectHivePartitioningInternal(files);
522
+ }
523
+ if (hive_partitioning && hive_types_autocast) {
524
+ AutoDetectHiveTypesInternal(files.front(), context);
525
+ }
526
+ }
527
+ void MultiFileReaderOptions::VerifyHiveTypesArePartitions(const std::map<string, string> &partitions) const {
528
+ for (auto &hive_type : hive_types_schema) {
529
+ if (partitions.find(hive_type.first) == partitions.end()) {
530
+ throw InvalidInputException("Unknown hive_type: \"%s\" does not appear to be a partition", hive_type.first);
531
+ }
532
+ }
533
+ }
534
+ LogicalType MultiFileReaderOptions::GetHiveLogicalType(const string &hive_partition_column) const {
535
+ if (!hive_types_schema.empty()) {
536
+ auto it = hive_types_schema.find(hive_partition_column);
537
+ if (it != hive_types_schema.end()) {
538
+ return it->second;
539
+ }
540
+ }
541
+ return LogicalType::VARCHAR;
542
+ }
543
+ Value MultiFileReaderOptions::GetHivePartitionValue(const string &base, const string &entry,
544
+ ClientContext &context) const {
545
+ Value value(base);
546
+ auto it = hive_types_schema.find(entry);
547
+ if (it == hive_types_schema.end()) {
548
+ return value;
549
+ }
550
+ if (!value.TryCastAs(context, it->second)) {
551
+ throw InvalidInputException("Unable to cast '%s' (from hive partition column '%s') to: '%s'", value.ToString(),
552
+ StringUtil::Upper(it->first), it->second.ToString());
553
+ }
554
+ return value;
555
+ }
556
+
393
557
  } // namespace duckdb
@@ -1,4 +1,5 @@
1
1
  #include "duckdb/common/operator/cast_operators.hpp"
2
+ #include "duckdb/common/hugeint.hpp"
2
3
  #include "duckdb/common/operator/string_cast.hpp"
3
4
  #include "duckdb/common/operator/numeric_cast.hpp"
4
5
  #include "duckdb/common/operator/decimal_cast_operators.hpp"
@@ -1425,11 +1426,20 @@ string_t CastFromBlob::Operation(string_t input, Vector &vector) {
1425
1426
  return result;
1426
1427
  }
1427
1428
 
1429
+ template <>
1430
+ string_t CastFromBlobToBit::Operation(string_t input, Vector &vector) {
1431
+ idx_t result_size = input.GetSize() + 1;
1432
+ if (result_size <= 1) {
1433
+ throw ConversionException("Cannot cast empty BLOB to BIT");
1434
+ }
1435
+ return StringVector::AddStringOrBlob(vector, Bit::BlobToBit(input));
1436
+ }
1437
+
1428
1438
  //===--------------------------------------------------------------------===//
1429
1439
  // Cast From Bit
1430
1440
  //===--------------------------------------------------------------------===//
1431
1441
  template <>
1432
- string_t CastFromBit::Operation(string_t input, Vector &vector) {
1442
+ string_t CastFromBitToString::Operation(string_t input, Vector &vector) {
1433
1443
 
1434
1444
  idx_t result_size = Bit::BitLength(input);
1435
1445
  string_t result = StringVector::EmptyString(vector, result_size);
@@ -1482,6 +1492,30 @@ bool TryCastToBit::Operation(string_t input, string_t &result, Vector &result_ve
1482
1492
  return true;
1483
1493
  }
1484
1494
 
1495
+ template <>
1496
+ bool CastFromBitToNumeric::Operation(string_t input, bool &result, bool strict) {
1497
+ D_ASSERT(input.GetSize() > 1);
1498
+
1499
+ uint8_t value;
1500
+ bool success = CastFromBitToNumeric::Operation(input, value, strict);
1501
+ result = (value > 0);
1502
+ return (success);
1503
+ }
1504
+
1505
+ template <>
1506
+ bool CastFromBitToNumeric::Operation(string_t input, hugeint_t &result, bool strict) {
1507
+ D_ASSERT(input.GetSize() > 1);
1508
+
1509
+ if (input.GetSize() - 1 > sizeof(hugeint_t)) {
1510
+ throw ConversionException("Bitstring doesn't fit inside of %s", GetTypeId<hugeint_t>());
1511
+ }
1512
+ Bit::BitToNumeric(input, result);
1513
+ if (result < NumericLimits<hugeint_t>::Minimum()) {
1514
+ throw ConversionException("Minimum limit for HUGEINT is %s", NumericLimits<hugeint_t>::Minimum().ToString());
1515
+ }
1516
+ return (true);
1517
+ }
1518
+
1485
1519
  //===--------------------------------------------------------------------===//
1486
1520
  // Cast From UUID
1487
1521
  //===--------------------------------------------------------------------===//
@@ -8,9 +8,26 @@
8
8
 
9
9
  namespace duckdb {
10
10
 
11
+ //! Templated radix partitioning constants, can be templated to the number of radix bits
12
+ template <idx_t radix_bits>
13
+ struct RadixPartitioningConstants {
14
+ public:
15
+ //! Bitmask of the upper bits starting at the 5th byte
16
+ static constexpr const idx_t NUM_PARTITIONS = RadixPartitioning::NumberOfPartitions(radix_bits);
17
+ static constexpr const idx_t SHIFT = RadixPartitioning::Shift(radix_bits);
18
+ static constexpr const hash_t MASK = RadixPartitioning::Mask(radix_bits);
19
+
20
+ public:
21
+ //! Apply bitmask and right shift to get a number between 0 and NUM_PARTITIONS
22
+ static inline hash_t ApplyMask(hash_t hash) {
23
+ D_ASSERT((hash & MASK) >> SHIFT < NUM_PARTITIONS);
24
+ return (hash & MASK) >> SHIFT;
25
+ }
26
+ };
27
+
11
28
  template <class OP, class RETURN_TYPE, typename... ARGS>
12
29
  RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
13
- D_ASSERT(radix_bits <= sizeof(hash_t) * 8);
30
+ D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
14
31
  switch (radix_bits) {
15
32
  case 1:
16
33
  return OP::template Operation<1>(std::forward<ARGS>(args)...);
@@ -20,7 +37,7 @@ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
20
37
  return OP::template Operation<3>(std::forward<ARGS>(args)...);
21
38
  case 4:
22
39
  return OP::template Operation<4>(std::forward<ARGS>(args)...);
23
- case 5:
40
+ case 5: // LCOV_EXCL_START
24
41
  return OP::template Operation<5>(std::forward<ARGS>(args)...);
25
42
  case 6:
26
43
  return OP::template Operation<6>(std::forward<ARGS>(args)...);
@@ -32,9 +49,14 @@ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
32
49
  return OP::template Operation<9>(std::forward<ARGS>(args)...);
33
50
  case 10:
34
51
  return OP::template Operation<10>(std::forward<ARGS>(args)...);
52
+ case 11:
53
+ return OP::template Operation<10>(std::forward<ARGS>(args)...);
54
+ case 12:
55
+ return OP::template Operation<10>(std::forward<ARGS>(args)...);
35
56
  default:
36
- throw InternalException("TODO");
37
- }
57
+ throw InternalException(
58
+ "radix_bits higher than RadixPartitioning::MAX_RADIX_BITS encountered in RadixBitsSwitch");
59
+ } // LCOV_EXCL_STOP
38
60
  }
39
61
 
40
62
  template <idx_t radix_bits>
@@ -69,10 +91,6 @@ struct HashsToBinsFunctor {
69
91
  }
70
92
  };
71
93
 
72
- void RadixPartitioning::HashesToBins(Vector &hashes, idx_t radix_bits, Vector &bins, idx_t count) {
73
- return RadixBitsSwitch<HashsToBinsFunctor, void>(radix_bits, hashes, bins, count);
74
- }
75
-
76
94
  //===--------------------------------------------------------------------===//
77
95
  // Row Data Partitioning
78
96
  //===--------------------------------------------------------------------===//
@@ -54,7 +54,7 @@ duckdb::vector<Match> RegexFindAll(const std::string &input, const Regex &regex)
54
54
  Match match;
55
55
  while (RegexSearchInternal(input.c_str(), match, regex, RE2::UNANCHORED, position, input.size())) {
56
56
  position += match.position(0) + match.length(0);
57
- matches.emplace_back(std::move(match));
57
+ matches.emplace_back(match);
58
58
  }
59
59
  return matches;
60
60
  }
@@ -99,7 +99,7 @@ void RowOperations::UnswizzleHeapPointer(const RowLayout &layout, const data_ptr
99
99
 
100
100
  static inline void VerifyUnswizzledString(const RowLayout &layout, const idx_t &col_idx, const data_ptr_t &row_ptr) {
101
101
  #ifdef DEBUG
102
- if (layout.GetTypes()[col_idx] == LogicalTypeId::BLOB) {
102
+ if (layout.GetTypes()[col_idx].id() != LogicalTypeId::VARCHAR) {
103
103
  return;
104
104
  }
105
105
  idx_t entry_idx;
@@ -273,16 +273,13 @@ void MergeSorter::ComputeMerge(const idx_t &count, bool left_smaller[]) {
273
273
  break;
274
274
  }
275
275
  // Pin the radix sorting data
276
- if (!l_done) {
277
- left->PinRadix(l.block_idx);
278
- l_radix_ptr = left->RadixPtr();
279
- }
280
- if (!r_done) {
281
- right->PinRadix(r.block_idx);
282
- r_radix_ptr = right->RadixPtr();
283
- }
284
- const idx_t &l_count = !l_done ? l_sorted_block.radix_sorting_data[l.block_idx]->count : 0;
285
- const idx_t &r_count = !r_done ? r_sorted_block.radix_sorting_data[r.block_idx]->count : 0;
276
+ left->PinRadix(l.block_idx);
277
+ l_radix_ptr = left->RadixPtr();
278
+ right->PinRadix(r.block_idx);
279
+ r_radix_ptr = right->RadixPtr();
280
+
281
+ const idx_t l_count = l_sorted_block.radix_sorting_data[l.block_idx]->count;
282
+ const idx_t r_count = r_sorted_block.radix_sorting_data[r.block_idx]->count;
286
283
  // Compute the merge
287
284
  if (sort_layout.all_constant) {
288
285
  // All sorting columns are constant size
@@ -298,12 +295,8 @@ void MergeSorter::ComputeMerge(const idx_t &count, bool left_smaller[]) {
298
295
  }
299
296
  } else {
300
297
  // Pin the blob data
301
- if (!l_done) {
302
- left->PinData(*l_sorted_block.blob_sorting_data);
303
- }
304
- if (!r_done) {
305
- right->PinData(*r_sorted_block.blob_sorting_data);
306
- }
298
+ left->PinData(*l_sorted_block.blob_sorting_data);
299
+ right->PinData(*r_sorted_block.blob_sorting_data);
307
300
  // Merge with variable size sorting columns
308
301
  for (; compared < count && l.entry_idx < l_count && r.entry_idx < r_count; compared++) {
309
302
  left_smaller[compared] =