duckdb 0.8.2-dev11.0 → 0.8.2-dev1212.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (396) hide show
  1. package/binding.gyp +14 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/lib/duckdb.d.ts +59 -0
  6. package/lib/duckdb.js +21 -0
  7. package/package.json +1 -1
  8. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  10. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  11. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  12. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  13. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  14. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  15. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  16. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  17. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  18. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  19. package/src/duckdb/extension/icu/icu_extension.cpp +5 -7
  20. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  21. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  22. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  23. package/src/duckdb/extension/json/json_common.cpp +272 -40
  24. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  26. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  27. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  28. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  29. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  30. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  31. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  32. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  33. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  34. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  35. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  36. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  37. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  38. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  39. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  40. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  41. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  42. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  43. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  44. package/src/duckdb/extension/parquet/parquet_extension.cpp +191 -19
  45. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  46. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  47. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  48. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  49. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  50. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  51. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  52. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  53. package/src/duckdb/src/common/allocator.cpp +14 -2
  54. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  55. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +0 -12
  56. package/src/duckdb/src/common/assert.cpp +3 -0
  57. package/src/duckdb/src/common/enum_util.cpp +42 -5
  58. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  60. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  61. package/src/duckdb/src/common/exception.cpp +2 -2
  62. package/src/duckdb/src/common/file_system.cpp +15 -0
  63. package/src/duckdb/src/common/local_file_system.cpp +2 -2
  64. package/src/duckdb/src/common/multi_file_reader.cpp +181 -18
  65. package/src/duckdb/src/common/radix_partitioning.cpp +27 -9
  66. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  67. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  68. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  69. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  70. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  71. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  72. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  73. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  74. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  75. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  76. package/src/duckdb/src/common/types/date.cpp +9 -0
  77. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  78. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  79. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  80. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  81. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  82. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  83. package/src/duckdb/src/common/types/vector.cpp +15 -14
  84. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +6 -4
  85. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  86. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  87. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  88. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  92. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  93. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  94. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +0 -17
  95. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  96. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  97. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  98. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  99. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  100. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  102. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  103. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  104. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  105. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  106. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  107. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  108. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  109. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  110. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  111. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  112. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  113. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  114. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  115. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  116. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
  117. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  118. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  119. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
  121. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -2
  122. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  123. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  124. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  125. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  126. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  127. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  128. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -2
  129. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  130. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  131. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  132. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  133. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  134. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  135. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  136. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  137. package/src/duckdb/src/execution/physical_operator.cpp +17 -14
  138. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  139. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  140. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  141. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  142. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  143. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  144. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  145. package/src/duckdb/src/function/function.cpp +3 -1
  146. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  147. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  148. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  149. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  150. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  151. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  152. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  153. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  154. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  155. package/src/duckdb/src/function/table/system/test_all_types.cpp +38 -18
  156. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  157. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  158. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  159. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  160. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  161. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  162. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  163. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  164. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  165. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  166. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  167. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  168. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  169. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  170. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  171. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  172. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  173. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  174. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  175. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  176. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  177. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  178. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  179. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  180. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  181. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  182. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  183. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  184. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  185. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  186. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  187. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  188. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  189. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  190. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +24 -6
  191. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  192. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  193. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  194. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  195. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  196. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  197. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  198. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  199. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  200. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  201. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  202. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  203. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  204. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  205. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  206. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  207. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  208. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  209. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  210. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  211. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  212. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +3 -0
  213. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  214. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  215. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  216. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  217. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  218. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  219. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  220. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  221. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  222. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  223. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +1 -1
  224. package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
  225. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  226. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  227. package/src/duckdb/src/include/duckdb/main/settings.hpp +21 -1
  228. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  229. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  230. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  231. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  232. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  233. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  234. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  235. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  236. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  237. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  238. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  239. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  240. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  241. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  242. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  243. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  244. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  245. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  246. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  247. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  248. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  249. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  250. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
  251. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  252. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  253. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  254. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  255. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  256. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  257. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  258. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  259. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  260. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  261. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  262. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  263. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  264. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  265. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  266. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  267. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  268. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  269. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  270. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  271. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  272. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  273. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  274. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  275. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  276. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  277. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  278. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  279. package/src/duckdb/src/include/duckdb.h +28 -0
  280. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  281. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  282. package/src/duckdb/src/main/config.cpp +2 -0
  283. package/src/duckdb/src/main/database.cpp +1 -1
  284. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  285. package/src/duckdb/src/main/settings/settings.cpp +40 -18
  286. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  287. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  288. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  289. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  290. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  291. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  292. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  293. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  294. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  295. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  296. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  297. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  298. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  299. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  300. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  301. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  302. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  303. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  304. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  305. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  306. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  307. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  308. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  309. package/src/duckdb/src/parallel/executor.cpp +15 -0
  310. package/src/duckdb/src/parallel/pipeline_executor.cpp +7 -6
  311. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  312. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  313. package/src/duckdb/src/parser/expression/lambda_expression.cpp +1 -1
  314. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  315. package/src/duckdb/src/parser/query_node/cte_node.cpp +75 -0
  316. package/src/duckdb/src/parser/query_node.cpp +18 -1
  317. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -0
  318. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  319. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  320. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  321. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  322. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  323. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  324. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  325. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  326. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  327. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  328. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  329. package/src/duckdb/src/parser/transformer.cpp +15 -0
  330. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  331. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  332. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  333. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  334. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  335. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  336. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  337. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
  338. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  339. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  340. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  341. package/src/duckdb/src/planner/binder.cpp +5 -0
  342. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  343. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  344. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  345. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  346. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  347. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  348. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  349. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  350. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  351. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  352. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  353. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  354. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  355. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  356. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  357. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  358. package/src/duckdb/src/storage/data_table.cpp +1 -1
  359. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  360. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  361. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  362. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  363. package/src/duckdb/src/storage/storage_manager.cpp +7 -2
  364. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  365. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  366. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  367. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  368. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  369. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  370. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  371. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  372. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  373. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  374. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  375. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  376. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  377. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  378. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  379. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  380. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  381. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  382. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  383. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  384. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  385. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  386. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  387. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  388. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  389. package/src/duckdb_node.hpp +1 -0
  390. package/src/statement.cpp +104 -4
  391. package/test/columns.test.ts +243 -0
  392. package/test/test_all_types.test.ts +233 -0
  393. package/tsconfig.json +1 -0
  394. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  395. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  396. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -333,7 +333,7 @@ struct StandardValueCopy : public BaseValueCopy<T> {
333
333
 
334
334
  struct StringValueCopy : public BaseValueCopy<string_t> {
335
335
  static string_t Operation(ColumnDataMetaData &meta_data, string_t input) {
336
- return input.IsInlined() ? input : meta_data.segment.heap.AddBlob(input);
336
+ return input.IsInlined() ? input : meta_data.segment.heap->AddBlob(input);
337
337
  }
338
338
  };
339
339
 
@@ -423,7 +423,8 @@ void ColumnDataCopy<string_t>(ColumnDataMetaData &meta_data, const UnifiedVector
423
423
  idx_t offset, idx_t copy_count) {
424
424
 
425
425
  const auto &allocator_type = meta_data.segment.allocator->GetType();
426
- if (allocator_type == ColumnDataAllocatorType::IN_MEMORY_ALLOCATOR) {
426
+ if (allocator_type == ColumnDataAllocatorType::IN_MEMORY_ALLOCATOR ||
427
+ allocator_type == ColumnDataAllocatorType::HYBRID) {
427
428
  // strings cannot be spilled to disk - use StringHeap
428
429
  TemplatedColumnDataCopy<StringValueCopy>(meta_data, source_data, source, offset, copy_count);
429
430
  return;
@@ -930,6 +931,7 @@ void ColumnDataCollection::Verify() {
930
931
  #endif
931
932
  }
932
933
 
934
+ // LCOV_EXCL_START
933
935
  string ColumnDataCollection::ToString() const {
934
936
  DataChunk chunk;
935
937
  InitializeScanChunk(chunk);
@@ -950,6 +952,7 @@ string ColumnDataCollection::ToString() const {
950
952
 
951
953
  return result;
952
954
  }
955
+ // LCOV_EXCL_STOP
953
956
 
954
957
  void ColumnDataCollection::Print() const {
955
958
  Printer::Print(ToString());
@@ -1030,6 +1033,18 @@ bool ColumnDataCollection::ResultEquals(const ColumnDataCollection &left, const
1030
1033
  return true;
1031
1034
  }
1032
1035
 
1036
+ vector<shared_ptr<StringHeap>> ColumnDataCollection::GetHeapReferences() {
1037
+ vector<shared_ptr<StringHeap>> result(segments.size(), nullptr);
1038
+ for (idx_t segment_idx = 0; segment_idx < segments.size(); segment_idx++) {
1039
+ result[segment_idx] = segments[segment_idx]->heap;
1040
+ }
1041
+ return result;
1042
+ }
1043
+
1044
+ ColumnDataAllocatorType ColumnDataCollection::GetAllocatorType() const {
1045
+ return allocator->GetType();
1046
+ }
1047
+
1033
1048
  const vector<unique_ptr<ColumnDataCollectionSegment>> &ColumnDataCollection::GetSegments() const {
1034
1049
  return segments;
1035
1050
  }
@@ -6,7 +6,8 @@ namespace duckdb {
6
6
 
7
7
  ColumnDataCollectionSegment::ColumnDataCollectionSegment(shared_ptr<ColumnDataAllocator> allocator_p,
8
8
  vector<LogicalType> types_p)
9
- : allocator(std::move(allocator_p)), types(std::move(types_p)), count(0), heap(allocator->GetAllocator()) {
9
+ : allocator(std::move(allocator_p)), types(std::move(types_p)), count(0),
10
+ heap(make_shared<StringHeap>(allocator->GetAllocator())) {
10
11
  }
11
12
 
12
13
  idx_t ColumnDataCollectionSegment::GetDataSize(idx_t type_size) {
@@ -26,7 +27,8 @@ VectorDataIndex ColumnDataCollectionSegment::AllocateVectorInternal(const Logica
26
27
  auto type_size = internal_type == PhysicalType::STRUCT ? 0 : GetTypeIdSize(internal_type);
27
28
  allocator->AllocateData(GetDataSize(type_size) + ValidityMask::STANDARD_MASK_SIZE, meta_data.block_id,
28
29
  meta_data.offset, chunk_state);
29
- if (allocator->GetType() == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR) {
30
+ if (allocator->GetType() == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR ||
31
+ allocator->GetType() == ColumnDataAllocatorType::HYBRID) {
30
32
  chunk_meta.block_ids.insert(meta_data.block_id);
31
33
  }
32
34
 
@@ -203,10 +205,17 @@ idx_t ColumnDataCollectionSegment::ReadVector(ChunkManagementState &state, Vecto
203
205
  }
204
206
  } else if (internal_type == PhysicalType::VARCHAR) {
205
207
  if (allocator->GetType() == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR) {
206
- for (auto &swizzle_segment : vdata.swizzle_data) {
207
- auto &string_heap_segment = GetVectorData(swizzle_segment.child_index);
208
- allocator->UnswizzlePointers(state, result, swizzle_segment.offset, swizzle_segment.count,
209
- string_heap_segment.block_id, string_heap_segment.offset);
208
+ auto next_index = vector_index;
209
+ idx_t offset = 0;
210
+ while (next_index.IsValid()) {
211
+ auto &current_vdata = GetVectorData(next_index);
212
+ for (auto &swizzle_segment : current_vdata.swizzle_data) {
213
+ auto &string_heap_segment = GetVectorData(swizzle_segment.child_index);
214
+ allocator->UnswizzlePointers(state, result, offset + swizzle_segment.offset, swizzle_segment.count,
215
+ string_heap_segment.block_id, string_heap_segment.offset);
216
+ }
217
+ offset += current_vdata.count;
218
+ next_index = current_vdata.next_data;
210
219
  }
211
220
  }
212
221
  if (state.properties == ColumnDataScanProperties::DISALLOW_ZERO_COPY) {
@@ -32,13 +32,13 @@ PartitionedColumnData::~PartitionedColumnData() {
32
32
 
33
33
  void PartitionedColumnData::InitializeAppendState(PartitionedColumnDataAppendState &state) const {
34
34
  state.partition_sel.Initialize();
35
- state.slice_chunk.Initialize(context, types);
35
+ state.slice_chunk.Initialize(BufferAllocator::Get(context), types);
36
36
  InitializeAppendStateInternal(state);
37
37
  }
38
38
 
39
39
  unique_ptr<DataChunk> PartitionedColumnData::CreatePartitionBuffer() const {
40
40
  auto result = make_uniq<DataChunk>();
41
- result->Initialize(BufferManager::GetBufferManager(context).GetBufferAllocator(), types, BufferSize());
41
+ result->Initialize(BufferAllocator::Get(context), types, BufferSize());
42
42
  return result;
43
43
  }
44
44
 
@@ -309,7 +309,7 @@ void DataChunk::Hash(Vector &result) {
309
309
 
310
310
  void DataChunk::Hash(vector<idx_t> &column_ids, Vector &result) {
311
311
  D_ASSERT(result.GetType().id() == LogicalType::HASH);
312
- D_ASSERT(column_ids.size() > 0);
312
+ D_ASSERT(!column_ids.empty());
313
313
 
314
314
  VectorOperations::Hash(data[column_ids[0]], result, size());
315
315
  for (idx_t i = 1; i < column_ids.size(); i++) {
@@ -327,7 +327,7 @@ void DataChunk::Verify() {
327
327
  #endif
328
328
  }
329
329
 
330
- void DataChunk::Print() {
330
+ void DataChunk::Print() const {
331
331
  Printer::Print(ToString());
332
332
  }
333
333
 
@@ -441,6 +441,15 @@ int64_t Date::EpochMicroseconds(date_t date) {
441
441
  return result;
442
442
  }
443
443
 
444
+ int64_t Date::EpochMilliseconds(date_t date) {
445
+ int64_t result;
446
+ const auto MILLIS_PER_DAY = Interval::MICROS_PER_DAY / Interval::MICROS_PER_MSEC;
447
+ if (!TryMultiplyOperator::Operation<int64_t, int64_t, int64_t>(date.days, MILLIS_PER_DAY, result)) {
448
+ throw ConversionException("Could not convert DATE (%s) to milliseconds", Date::ToString(date));
449
+ }
450
+ return result;
451
+ }
452
+
444
453
  int32_t Date::ExtractYear(date_t d, int32_t *last_year) {
445
454
  auto n = d.days;
446
455
  // cached look up: check if year of this date is the same as the last one we looked up
@@ -12,8 +12,8 @@ static idx_t GetAllocationSize(uint16_t capacity) {
12
12
  }
13
13
 
14
14
  template <class T>
15
- static data_ptr_t AllocatePrimitiveData(Allocator &allocator, uint16_t capacity) {
16
- return allocator.AllocateData(GetAllocationSize<T>(capacity));
15
+ static data_ptr_t AllocatePrimitiveData(ArenaAllocator &allocator, uint16_t capacity) {
16
+ return allocator.Allocate(GetAllocationSize<T>(capacity));
17
17
  }
18
18
 
19
19
  template <class T>
@@ -34,8 +34,8 @@ static idx_t GetAllocationSizeList(uint16_t capacity) {
34
34
  return AlignValue(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList));
35
35
  }
36
36
 
37
- static data_ptr_t AllocateListData(Allocator &allocator, uint16_t capacity) {
38
- return allocator.AllocateData(GetAllocationSizeList(capacity));
37
+ static data_ptr_t AllocateListData(ArenaAllocator &allocator, uint16_t capacity) {
38
+ return allocator.Allocate(GetAllocationSizeList(capacity));
39
39
  }
40
40
 
41
41
  static uint64_t *GetListLengthData(ListSegment *segment) {
@@ -65,8 +65,8 @@ static idx_t GetAllocationSizeStruct(uint16_t capacity, idx_t child_count) {
65
65
  return AlignValue(sizeof(ListSegment) + capacity * sizeof(bool) + child_count * sizeof(ListSegment *));
66
66
  }
67
67
 
68
- static data_ptr_t AllocateStructData(Allocator &allocator, uint16_t capacity, idx_t child_count) {
69
- return allocator.AllocateData(GetAllocationSizeStruct(capacity, child_count));
68
+ static data_ptr_t AllocateStructData(ArenaAllocator &allocator, uint16_t capacity, idx_t child_count) {
69
+ return allocator.Allocate(GetAllocationSizeStruct(capacity, child_count));
70
70
  }
71
71
 
72
72
  static ListSegment **GetStructData(ListSegment *segment) {
@@ -98,20 +98,8 @@ static uint16_t GetCapacityForNewSegment(uint16_t capacity) {
98
98
  //===--------------------------------------------------------------------===//
99
99
  // Create & Destroy
100
100
  //===--------------------------------------------------------------------===//
101
- static void DestroyLinkedList(const ListSegmentFunctions &functions, Allocator &allocator, LinkedList &list) {
102
- auto segment = list.first_segment;
103
- while (segment) {
104
- auto next_segment = segment->next;
105
- functions.destroy(functions, segment, allocator);
106
- segment = next_segment;
107
- }
108
- list.first_segment = nullptr;
109
- list.last_segment = nullptr;
110
- list.total_capacity = 0;
111
- }
112
-
113
101
  template <class T>
114
- static ListSegment *CreatePrimitiveSegment(const ListSegmentFunctions &, Allocator &allocator, uint16_t capacity) {
102
+ static ListSegment *CreatePrimitiveSegment(const ListSegmentFunctions &, ArenaAllocator &allocator, uint16_t capacity) {
115
103
  // allocate data and set the header
116
104
  auto segment = (ListSegment *)AllocatePrimitiveData<T>(allocator, capacity);
117
105
  segment->capacity = capacity;
@@ -120,13 +108,7 @@ static ListSegment *CreatePrimitiveSegment(const ListSegmentFunctions &, Allocat
120
108
  return segment;
121
109
  }
122
110
 
123
- template <class T>
124
- void DestroyPrimitiveSegment(const ListSegmentFunctions &, ListSegment *segment, Allocator &allocator) {
125
- D_ASSERT(segment);
126
- allocator.FreeData(data_ptr_cast(segment), GetAllocationSize<T>(segment->capacity));
127
- }
128
-
129
- static ListSegment *CreateListSegment(const ListSegmentFunctions &, Allocator &allocator, uint16_t capacity) {
111
+ static ListSegment *CreateListSegment(const ListSegmentFunctions &, ArenaAllocator &allocator, uint16_t capacity) {
130
112
  // allocate data and set the header
131
113
  auto segment = reinterpret_cast<ListSegment *>(AllocateListData(allocator, capacity));
132
114
  segment->capacity = capacity;
@@ -141,16 +123,7 @@ static ListSegment *CreateListSegment(const ListSegmentFunctions &, Allocator &a
141
123
  return segment;
142
124
  }
143
125
 
144
- void DestroyListSegment(const ListSegmentFunctions &functions, ListSegment *segment, Allocator &allocator) {
145
- // destroy the child list
146
- auto linked_child_list = Load<LinkedList>(data_ptr_cast(GetListChildData(segment)));
147
- DestroyLinkedList(functions.child_functions[0], allocator, linked_child_list);
148
-
149
- // destroy the list segment itself
150
- allocator.FreeData(data_ptr_cast(segment), GetAllocationSizeList(segment->capacity));
151
- }
152
-
153
- static ListSegment *CreateStructSegment(const ListSegmentFunctions &functions, Allocator &allocator,
126
+ static ListSegment *CreateStructSegment(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
154
127
  uint16_t capacity) {
155
128
  // allocate data and set header
156
129
  auto segment =
@@ -170,21 +143,8 @@ static ListSegment *CreateStructSegment(const ListSegmentFunctions &functions, A
170
143
  return segment;
171
144
  }
172
145
 
173
- void DestroyStructSegment(const ListSegmentFunctions &functions, ListSegment *segment, Allocator &allocator) {
174
- // destroy the child entries
175
- auto child_segments = GetStructData(segment);
176
- for (idx_t i = 0; i < functions.child_functions.size(); i++) {
177
- auto child_function = functions.child_functions[i];
178
- auto child_segment = Load<ListSegment *>(data_ptr_cast(child_segments + i));
179
- child_function.destroy(child_function, child_segment, allocator);
180
- }
181
-
182
- // destroy the struct segment itself
183
- allocator.FreeData(data_ptr_cast(segment),
184
- GetAllocationSizeStruct(segment->capacity, functions.child_functions.size()));
185
- }
186
-
187
- static ListSegment *GetSegment(const ListSegmentFunctions &functions, Allocator &allocator, LinkedList &linked_list) {
146
+ static ListSegment *GetSegment(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
147
+ LinkedList &linked_list) {
188
148
  ListSegment *segment;
189
149
 
190
150
  // determine segment
@@ -214,7 +174,7 @@ static ListSegment *GetSegment(const ListSegmentFunctions &functions, Allocator
214
174
  // Append
215
175
  //===--------------------------------------------------------------------===//
216
176
  template <class T>
217
- static void WriteDataToPrimitiveSegment(const ListSegmentFunctions &functions, Allocator &allocator,
177
+ static void WriteDataToPrimitiveSegment(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
218
178
  ListSegment *segment, Vector &input, idx_t &entry_idx, idx_t &count) {
219
179
 
220
180
  // get the vector data and the source index of the entry that we want to write
@@ -232,8 +192,8 @@ static void WriteDataToPrimitiveSegment(const ListSegmentFunctions &functions, A
232
192
  }
233
193
  }
234
194
 
235
- static void WriteDataToVarcharSegment(const ListSegmentFunctions &functions, Allocator &allocator, ListSegment *segment,
236
- Vector &input, idx_t &entry_idx, idx_t &count) {
195
+ static void WriteDataToVarcharSegment(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
196
+ ListSegment *segment, Vector &input, idx_t &entry_idx, idx_t &count) {
237
197
 
238
198
  // get the vector data and the source index of the entry that we want to write
239
199
  auto input_data = FlatVector::GetData<string_t>(input);
@@ -275,8 +235,8 @@ static void WriteDataToVarcharSegment(const ListSegmentFunctions &functions, All
275
235
  Store<LinkedList>(child_segments, data_ptr_cast(GetListChildData(segment)));
276
236
  }
277
237
 
278
- static void WriteDataToListSegment(const ListSegmentFunctions &functions, Allocator &allocator, ListSegment *segment,
279
- Vector &input, idx_t &entry_idx, idx_t &count) {
238
+ static void WriteDataToListSegment(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
239
+ ListSegment *segment, Vector &input, idx_t &entry_idx, idx_t &count) {
280
240
 
281
241
  // get the vector data and the source index of the entry that we want to write
282
242
  auto input_data = FlatVector::GetData<list_entry_t>(input);
@@ -315,8 +275,8 @@ static void WriteDataToListSegment(const ListSegmentFunctions &functions, Alloca
315
275
  Store<uint64_t>(list_length, data_ptr_cast(list_length_data + segment->count));
316
276
  }
317
277
 
318
- static void WriteDataToStructSegment(const ListSegmentFunctions &functions, Allocator &allocator, ListSegment *segment,
319
- Vector &input, idx_t &entry_idx, idx_t &count) {
278
+ static void WriteDataToStructSegment(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
279
+ ListSegment *segment, Vector &input, idx_t &entry_idx, idx_t &count) {
320
280
 
321
281
  // write null validity
322
282
  auto null_mask = GetNullMask(segment);
@@ -338,8 +298,8 @@ static void WriteDataToStructSegment(const ListSegmentFunctions &functions, Allo
338
298
  }
339
299
  }
340
300
 
341
- void ListSegmentFunctions::AppendRow(Allocator &allocator, LinkedList &linked_list, Vector &input, idx_t &entry_idx,
342
- idx_t &count) const {
301
+ void ListSegmentFunctions::AppendRow(ArenaAllocator &allocator, LinkedList &linked_list, Vector &input,
302
+ idx_t &entry_idx, idx_t &count) const {
343
303
 
344
304
  D_ASSERT(input.GetVectorType() == VectorType::FLAT_VECTOR);
345
305
  auto &write_data_to_segment = *this;
@@ -503,7 +463,7 @@ void ListSegmentFunctions::BuildListVector(const LinkedList &linked_list, Vector
503
463
  //===--------------------------------------------------------------------===//
504
464
  template <class T>
505
465
  static ListSegment *CopyDataFromPrimitiveSegment(const ListSegmentFunctions &, const ListSegment *source,
506
- Allocator &allocator) {
466
+ ArenaAllocator &allocator) {
507
467
 
508
468
  auto target = (ListSegment *)AllocatePrimitiveData<T>(allocator, source->capacity);
509
469
  memcpy(target, source, sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(T)));
@@ -512,7 +472,7 @@ static ListSegment *CopyDataFromPrimitiveSegment(const ListSegmentFunctions &, c
512
472
  }
513
473
 
514
474
  static ListSegment *CopyDataFromListSegment(const ListSegmentFunctions &functions, const ListSegment *source,
515
- Allocator &allocator) {
475
+ ArenaAllocator &allocator) {
516
476
 
517
477
  // create an empty linked list for the child vector of target
518
478
  auto source_linked_child_list = Load<LinkedList>(const_data_ptr_cast(GetListChildData(source)));
@@ -538,7 +498,7 @@ static ListSegment *CopyDataFromListSegment(const ListSegmentFunctions &function
538
498
  }
539
499
 
540
500
  static ListSegment *CopyDataFromStructSegment(const ListSegmentFunctions &functions, const ListSegment *source,
541
- Allocator &allocator) {
501
+ ArenaAllocator &allocator) {
542
502
 
543
503
  auto source_child_count = functions.child_functions.size();
544
504
  auto target = reinterpret_cast<ListSegment *>(AllocateStructData(allocator, source->capacity, source_child_count));
@@ -560,7 +520,7 @@ static ListSegment *CopyDataFromStructSegment(const ListSegmentFunctions &functi
560
520
  }
561
521
 
562
522
  void ListSegmentFunctions::CopyLinkedList(const LinkedList &source_list, LinkedList &target_list,
563
- Allocator &allocator) const {
523
+ ArenaAllocator &allocator) const {
564
524
  auto &copy_data_from_segment = *this;
565
525
  auto source_segment = source_list.first_segment;
566
526
 
@@ -578,12 +538,6 @@ void ListSegmentFunctions::CopyLinkedList(const LinkedList &source_list, LinkedL
578
538
  }
579
539
  }
580
540
 
581
- //===--------------------------------------------------------------------===//
582
- // Destroy
583
- //===--------------------------------------------------------------------===//
584
- void ListSegmentFunctions::Destroy(Allocator &allocator, LinkedList &linked_list) const {
585
- DestroyLinkedList(*this, allocator, linked_list);
586
- }
587
541
  //===--------------------------------------------------------------------===//
588
542
  // Functions
589
543
  //===--------------------------------------------------------------------===//
@@ -593,7 +547,6 @@ void SegmentPrimitiveFunction(ListSegmentFunctions &functions) {
593
547
  functions.write_data = WriteDataToPrimitiveSegment<T>;
594
548
  functions.read_data = ReadDataFromPrimitiveSegment<T>;
595
549
  functions.copy_data = CopyDataFromPrimitiveSegment<T>;
596
- functions.destroy = DestroyPrimitiveSegment<T>;
597
550
  }
598
551
 
599
552
  void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType &type) {
@@ -645,7 +598,6 @@ void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType
645
598
  functions.write_data = WriteDataToVarcharSegment;
646
599
  functions.read_data = ReadDataFromVarcharSegment;
647
600
  functions.copy_data = CopyDataFromListSegment;
648
- functions.destroy = DestroyListSegment;
649
601
 
650
602
  functions.child_functions.emplace_back();
651
603
  SegmentPrimitiveFunction<char>(functions.child_functions.back());
@@ -656,7 +608,6 @@ void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType
656
608
  functions.write_data = WriteDataToListSegment;
657
609
  functions.read_data = ReadDataFromListSegment;
658
610
  functions.copy_data = CopyDataFromListSegment;
659
- functions.destroy = DestroyListSegment;
660
611
 
661
612
  // recurse
662
613
  functions.child_functions.emplace_back();
@@ -668,7 +619,6 @@ void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType
668
619
  functions.write_data = WriteDataToStructSegment;
669
620
  functions.read_data = ReadDataFromStructSegment;
670
621
  functions.copy_data = CopyDataFromStructSegment;
671
- functions.destroy = DestroyStructSegment;
672
622
 
673
623
  // recurse
674
624
  auto child_types = StructType::GetChildTypes(type);
@@ -16,15 +16,6 @@ PartitionedTupleData::PartitionedTupleData(const PartitionedTupleData &other)
16
16
  : type(other.type), buffer_manager(other.buffer_manager), layout(other.layout.Copy()) {
17
17
  }
18
18
 
19
- unique_ptr<PartitionedTupleData> PartitionedTupleData::CreateShared() {
20
- switch (type) {
21
- case PartitionedTupleDataType::RADIX:
22
- return make_uniq<RadixPartitionedTupleData>(Cast<RadixPartitionedTupleData>());
23
- default:
24
- throw NotImplementedException("CreateShared for this type of PartitionedTupleData");
25
- }
26
- }
27
-
28
19
  PartitionedTupleData::~PartitionedTupleData() {
29
20
  }
30
21
 
@@ -233,6 +224,9 @@ void PartitionedTupleData::Combine(PartitionedTupleData &other) {
233
224
  }
234
225
 
235
226
  void PartitionedTupleData::Partition(TupleDataCollection &source, TupleDataPinProperties properties) {
227
+ if (source.Count() == 0) {
228
+ return;
229
+ }
236
230
  #ifdef DEBUG
237
231
  const auto count_before = source.Count();
238
232
  #endif
@@ -220,6 +220,8 @@ void TupleDataCollection::AppendUnified(TupleDataPinState &pin_state, TupleDataC
220
220
 
221
221
  static inline void ToUnifiedFormatInternal(TupleDataVectorFormat &format, Vector &vector, const idx_t count) {
222
222
  vector.ToUnifiedFormat(count, format.data);
223
+ format.original_sel = format.data.sel;
224
+ format.original_owned_sel.Initialize(format.data.owned_sel);
223
225
  switch (vector.GetType().InternalType()) {
224
226
  case PhysicalType::STRUCT: {
225
227
  auto &entries = StructVector::GetEntries(vector);
@@ -296,7 +296,7 @@ static void ApplySliceRecursive(const Vector &source_v, TupleDataVectorFormat &s
296
296
  D_ASSERT(source_format.combined_list_data);
297
297
  auto &combined_list_data = *source_format.combined_list_data;
298
298
 
299
- combined_list_data.selection_data = source_format.data.sel->Slice(combined_sel, count);
299
+ combined_list_data.selection_data = source_format.original_sel->Slice(combined_sel, count);
300
300
  source_format.data.owned_sel.Initialize(combined_list_data.selection_data);
301
301
  source_format.data.sel = &source_format.data.owned_sel;
302
302
 
@@ -376,8 +376,8 @@ void TupleDataCollection::ListWithinListComputeHeapSizes(Vector &heap_sizes_v, c
376
376
  for (idx_t i = 0; i < child_list_child_count; i++) {
377
377
  combined_sel.set_index(i, 0);
378
378
  }
379
- idx_t combined_list_offset = 0;
380
379
 
380
+ idx_t combined_list_offset = 0;
381
381
  for (idx_t i = 0; i < append_count; i++) {
382
382
  const auto list_idx = list_sel.get_index(append_sel.get_index(i));
383
383
  if (!list_validity.RowIsValid(list_idx)) {
@@ -49,7 +49,7 @@ bool UUID::FromString(string str, hugeint_t &result) {
49
49
  count++;
50
50
  }
51
51
  // Flip the first bit to make `order by uuid` same as `order by uuid::varchar`
52
- result.upper ^= (int64_t(1) << 63);
52
+ result.upper ^= (uint64_t(1) << 63);
53
53
  return count == 32;
54
54
  }
55
55
 
@@ -61,7 +61,7 @@ void UUID::ToString(hugeint_t input, char *buf) {
61
61
  };
62
62
 
63
63
  // Flip back before convert to string
64
- int64_t upper = input.upper ^ (int64_t(1) << 63);
64
+ int64_t upper = input.upper ^ (uint64_t(1) << 63);
65
65
  idx_t pos = 0;
66
66
  byte_to_hex(upper >> 56 & 0xFF, buf, pos);
67
67
  byte_to_hex(upper >> 48 & 0xFF, buf, pos);
@@ -96,6 +96,39 @@ void ValidityMask::SliceInPlace(const ValidityMask &other, idx_t target_offset,
96
96
  memcpy(target_validity + target_offset_entries, source_validity + source_offset_entries,
97
97
  sizeof(validity_t) * EntryCount(count));
98
98
  return;
99
+ } else if (IsAligned(target_offset)) {
100
+ // Simple common case where we are shifting into an aligned mask (e.g., 0 in Slice above)
101
+ const idx_t entire_units = count / BITS_PER_VALUE;
102
+ const idx_t ragged = count % BITS_PER_VALUE;
103
+ const idx_t tail = source_offset % BITS_PER_VALUE;
104
+ const idx_t head = BITS_PER_VALUE - tail;
105
+ auto source_validity = other.GetData() + (source_offset / BITS_PER_VALUE);
106
+ auto target_validity = this->GetData() + (target_offset / BITS_PER_VALUE);
107
+ auto src_entry = *source_validity++;
108
+ for (idx_t i = 0; i < entire_units; ++i) {
109
+ // Start with head of previous src
110
+ validity_t tgt_entry = src_entry >> tail;
111
+ src_entry = *source_validity++;
112
+ // Add in tail of current src
113
+ tgt_entry |= (src_entry << head);
114
+ *target_validity++ = tgt_entry;
115
+ }
116
+ // Finish last ragged entry
117
+ if (ragged) {
118
+ // Start with head of previous src
119
+ validity_t tgt_entry = (src_entry >> tail);
120
+ // Add in the tail of the next src, if head was too small
121
+ if (head < ragged) {
122
+ src_entry = *source_validity++;
123
+ tgt_entry |= (src_entry << head);
124
+ }
125
+ // Mask off the bits that go past the ragged end
126
+ tgt_entry &= (ValidityBuffer::MAX_ENTRY >> (BITS_PER_VALUE - ragged));
127
+ // Restore the ragged end of the target
128
+ tgt_entry |= *target_validity & (ValidityBuffer::MAX_ENTRY << ragged);
129
+ *target_validity++ = tgt_entry;
130
+ }
131
+ return;
99
132
  }
100
133
 
101
134
  // FIXME: use bitwise operations here
@@ -855,38 +855,39 @@ void Vector::Flatten(const SelectionVector &sel, idx_t count) {
855
855
  }
856
856
  }
857
857
 
858
- void Vector::ToUnifiedFormat(idx_t count, UnifiedVectorFormat &data) {
858
+ void Vector::ToUnifiedFormat(idx_t count, UnifiedVectorFormat &format) {
859
859
  switch (GetVectorType()) {
860
860
  case VectorType::DICTIONARY_VECTOR: {
861
861
  auto &sel = DictionaryVector::SelVector(*this);
862
+ format.owned_sel.Initialize(sel);
863
+ format.sel = &format.owned_sel;
864
+
862
865
  auto &child = DictionaryVector::Child(*this);
863
866
  if (child.GetVectorType() == VectorType::FLAT_VECTOR) {
864
- data.sel = &sel;
865
- data.data = FlatVector::GetData(child);
866
- data.validity = FlatVector::Validity(child);
867
+ format.data = FlatVector::GetData(child);
868
+ format.validity = FlatVector::Validity(child);
867
869
  } else {
868
- // dictionary with non-flat child: create a new reference to the child and normalify it
870
+ // dictionary with non-flat child: create a new reference to the child and flatten it
869
871
  Vector child_vector(child);
870
872
  child_vector.Flatten(sel, count);
871
873
  auto new_aux = make_buffer<VectorChildBuffer>(std::move(child_vector));
872
874
 
873
- data.sel = &sel;
874
- data.data = FlatVector::GetData(new_aux->data);
875
- data.validity = FlatVector::Validity(new_aux->data);
875
+ format.data = FlatVector::GetData(new_aux->data);
876
+ format.validity = FlatVector::Validity(new_aux->data);
876
877
  this->auxiliary = std::move(new_aux);
877
878
  }
878
879
  break;
879
880
  }
880
881
  case VectorType::CONSTANT_VECTOR:
881
- data.sel = ConstantVector::ZeroSelectionVector(count, data.owned_sel);
882
- data.data = ConstantVector::GetData(*this);
883
- data.validity = ConstantVector::Validity(*this);
882
+ format.sel = ConstantVector::ZeroSelectionVector(count, format.owned_sel);
883
+ format.data = ConstantVector::GetData(*this);
884
+ format.validity = ConstantVector::Validity(*this);
884
885
  break;
885
886
  default:
886
887
  Flatten(count);
887
- data.sel = FlatVector::IncrementalSelectionVector();
888
- data.data = FlatVector::GetData(*this);
889
- data.validity = FlatVector::Validity(*this);
888
+ format.sel = FlatVector::IncrementalSelectionVector();
889
+ format.data = FlatVector::GetData(*this);
890
+ format.validity = FlatVector::Validity(*this);
890
891
  break;
891
892
  }
892
893
  }
@@ -564,10 +564,12 @@ static idx_t DistinctSelectList(Vector &left, Vector &right, idx_t count, const
564
564
  SelectionVector lcursor(count);
565
565
  SelectionVector rcursor(count);
566
566
 
567
- ListVector::GetEntry(left).Flatten(ListVector::GetListSize(left));
568
- ListVector::GetEntry(right).Flatten(ListVector::GetListSize(right));
569
- Vector lchild(ListVector::GetEntry(left), lcursor, count);
570
- Vector rchild(ListVector::GetEntry(right), rcursor, count);
567
+ Vector lentry_flattened(ListVector::GetEntry(left));
568
+ Vector rentry_flattened(ListVector::GetEntry(right));
569
+ lentry_flattened.Flatten(ListVector::GetListSize(left));
570
+ rentry_flattened.Flatten(ListVector::GetListSize(right));
571
+ Vector lchild(lentry_flattened, lcursor, count);
572
+ Vector rchild(rentry_flattened, rcursor, count);
571
573
 
572
574
  // To perform the positional comparison, we use a vectorisation of the following algorithm:
573
575
  // bool CompareLists(T *left, idx_t nleft, T *right, nright) {
@@ -21,8 +21,10 @@ struct ReservoirQuantileState {
21
21
  if (new_len <= len) {
22
22
  return;
23
23
  }
24
+ T *old_v = v;
24
25
  v = (T *)realloc(v, new_len * sizeof(T));
25
26
  if (!v) {
27
+ free(old_v);
26
28
  throw InternalException("Memory allocation failure");
27
29
  }
28
30
  len = new_len;
@@ -63,9 +63,9 @@ struct ListFunction {
63
63
 
64
64
  template <class STATE>
65
65
  static void Destroy(STATE &state, AggregateInputData &aggr_input_data) {
66
- auto &list_bind_data = aggr_input_data.bind_data->Cast<ListBindData>();
67
- list_bind_data.functions.Destroy(aggr_input_data.allocator, state.linked_list);
66
+ // nop
68
67
  }
68
+
69
69
  static bool IgnoreNull() {
70
70
  return false;
71
71
  }
@@ -37,16 +37,16 @@ struct RegrAvgFunction {
37
37
  };
38
38
  struct RegrAvgXFunction : RegrAvgFunction {
39
39
  template <class A_TYPE, class B_TYPE, class STATE, class OP>
40
- static void Operation(STATE &state, const A_TYPE &x, const B_TYPE &y, AggregateBinaryInput &idata) {
41
- state.sum += y;
40
+ static void Operation(STATE &state, const A_TYPE &y, const B_TYPE &x, AggregateBinaryInput &idata) {
41
+ state.sum += x;
42
42
  state.count++;
43
43
  }
44
44
  };
45
45
 
46
46
  struct RegrAvgYFunction : RegrAvgFunction {
47
47
  template <class A_TYPE, class B_TYPE, class STATE, class OP>
48
- static void Operation(STATE &state, const A_TYPE &x, const B_TYPE &y, AggregateBinaryInput &idata) {
49
- state.sum += x;
48
+ static void Operation(STATE &state, const A_TYPE &y, const B_TYPE &x, AggregateBinaryInput &idata) {
49
+ state.sum += y;
50
50
  state.count++;
51
51
  }
52
52
  };
@@ -23,11 +23,11 @@ struct RegrInterceptOperation {
23
23
  }
24
24
 
25
25
  template <class A_TYPE, class B_TYPE, class STATE, class OP>
26
- static void Operation(STATE &state, const A_TYPE &x, const B_TYPE &y, AggregateBinaryInput &idata) {
26
+ static void Operation(STATE &state, const A_TYPE &y, const B_TYPE &x, AggregateBinaryInput &idata) {
27
27
  state.count++;
28
- state.sum_x += y;
29
- state.sum_y += x;
30
- RegrSlopeOperation::Operation<A_TYPE, B_TYPE, RegrSlopeState, OP>(state.slope, x, y, idata);
28
+ state.sum_x += x;
29
+ state.sum_y += y;
30
+ RegrSlopeOperation::Operation<A_TYPE, B_TYPE, RegrSlopeState, OP>(state.slope, y, x, idata);
31
31
  }
32
32
 
33
33
  template <class STATE, class OP>