duckdb 0.8.2-dev161.0 → 0.8.2-dev1764.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (504) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
  23. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  24. package/src/duckdb/extension/json/json_functions/json_transform.cpp +91 -38
  25. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  26. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  27. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  28. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  29. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  30. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  31. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  32. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  33. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  34. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  36. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  38. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  39. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  40. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  41. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  42. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  43. package/src/duckdb/extension/parquet/parquet_extension.cpp +194 -20
  44. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  45. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  46. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  47. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  48. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  49. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  50. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  51. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  52. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  53. package/src/duckdb/src/common/allocator.cpp +14 -2
  54. package/src/duckdb/src/common/arrow/arrow_appender.cpp +79 -12
  55. package/src/duckdb/src/common/arrow/arrow_converter.cpp +44 -19
  56. package/src/duckdb/src/common/assert.cpp +3 -0
  57. package/src/duckdb/src/common/enum_util.cpp +4619 -4446
  58. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  60. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  61. package/src/duckdb/src/common/exception.cpp +2 -2
  62. package/src/duckdb/src/common/extra_type_info.cpp +506 -0
  63. package/src/duckdb/src/common/file_system.cpp +19 -0
  64. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  65. package/src/duckdb/src/common/local_file_system.cpp +14 -14
  66. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  67. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  68. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  69. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  70. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  71. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  72. package/src/duckdb/src/common/sort/partition_state.cpp +70 -50
  73. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  74. package/src/duckdb/src/common/types/bit.cpp +51 -0
  75. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  76. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  77. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  78. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  79. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  80. package/src/duckdb/src/common/types/date.cpp +9 -0
  81. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  82. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  83. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  84. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  85. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  86. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  87. package/src/duckdb/src/common/types/value.cpp +11 -6
  88. package/src/duckdb/src/common/types.cpp +9 -656
  89. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  90. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  91. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  92. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  93. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  94. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  95. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  96. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  97. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  98. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  99. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  100. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  101. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  102. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  103. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  104. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  105. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  106. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  107. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  108. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  109. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  110. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  111. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  112. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  113. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  114. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  115. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  116. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  117. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  118. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  119. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  120. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  121. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
  122. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  123. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  124. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
  125. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  126. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
  127. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  128. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  129. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  130. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  131. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  132. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  133. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  134. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  135. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  136. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  137. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  138. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  139. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  140. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  141. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  142. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  143. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  144. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
  145. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
  146. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  147. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  148. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  149. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  150. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  151. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  152. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  153. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  154. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  155. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  156. package/src/duckdb/src/function/function.cpp +3 -1
  157. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  158. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  159. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  160. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  161. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  162. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  163. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  164. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  165. package/src/duckdb/src/function/table/arrow.cpp +19 -0
  166. package/src/duckdb/src/function/table/arrow_conversion.cpp +35 -1
  167. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  168. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  169. package/src/duckdb/src/function/table/system/test_all_types.cpp +7 -0
  170. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  171. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  172. package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
  173. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  174. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  175. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  176. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  177. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
  178. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  179. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  180. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  181. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  182. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  183. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  184. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
  185. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  186. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  187. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  188. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  189. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  190. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +2 -2
  191. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  192. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  193. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  194. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  195. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  196. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +23 -8
  197. package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
  198. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  199. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  200. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  201. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  202. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  203. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  204. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  205. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  206. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  207. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +5 -2
  208. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  209. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  210. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  211. package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -0
  212. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
  213. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  214. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  215. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  218. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  219. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  220. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  221. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  222. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  223. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  224. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  225. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  226. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  227. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  228. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  229. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  230. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  231. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  232. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  233. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  234. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  235. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  236. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  237. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  238. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  239. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  240. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  241. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  242. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  243. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  244. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  245. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  246. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  247. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
  248. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  249. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  250. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  251. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  252. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  253. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  254. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  255. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  256. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  257. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  258. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  259. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  260. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  261. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  262. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  263. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  264. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  265. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  266. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  267. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  268. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  269. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  270. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  271. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  272. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  273. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  274. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  275. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +22 -0
  276. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
  277. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  278. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  279. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  280. package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
  281. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  282. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  283. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  284. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  285. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  286. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  287. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  288. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  289. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  291. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  293. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  294. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  295. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  296. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  297. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  298. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  299. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  300. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  301. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  302. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  303. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  304. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  305. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  306. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  307. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  308. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  309. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  310. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  311. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  312. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  313. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  314. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  315. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  316. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  317. package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
  318. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  319. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  320. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  321. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  322. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  323. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  324. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  325. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  326. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  327. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  328. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  329. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  330. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  331. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  332. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  333. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  334. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  335. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  336. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  337. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  338. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  339. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  340. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  341. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  342. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  343. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  344. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  345. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  346. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  347. package/src/duckdb/src/include/duckdb.h +28 -0
  348. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  349. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  350. package/src/duckdb/src/main/config.cpp +4 -0
  351. package/src/duckdb/src/main/database.cpp +1 -1
  352. package/src/duckdb/src/main/extension/extension_helper.cpp +93 -88
  353. package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
  354. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  355. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  356. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  357. package/src/duckdb/src/main/relation.cpp +6 -5
  358. package/src/duckdb/src/main/settings/settings.cpp +79 -18
  359. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  360. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  361. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  362. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  363. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  364. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  365. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  366. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  367. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  368. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  369. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  370. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  371. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  372. package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
  373. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  374. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  375. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  376. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  377. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  378. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  379. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  380. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  381. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  382. package/src/duckdb/src/parallel/executor.cpp +15 -0
  383. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  384. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  385. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  386. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  387. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  388. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  389. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  390. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  391. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  392. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  393. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  394. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  395. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  396. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  397. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  398. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  399. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  400. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  401. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  402. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  403. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  404. package/src/duckdb/src/parser/parser.cpp +8 -2
  405. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  406. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  407. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  408. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  409. package/src/duckdb/src/parser/query_node.cpp +15 -37
  410. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  411. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  412. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  413. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  414. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  415. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  416. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  417. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  418. package/src/duckdb/src/parser/tableref.cpp +0 -44
  419. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  420. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  421. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  422. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  423. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  424. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  425. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  426. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  427. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  428. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  429. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  430. package/src/duckdb/src/parser/transformer.cpp +15 -0
  431. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  432. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  433. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  434. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  435. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  436. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  437. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  438. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  439. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  440. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  441. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
  442. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  443. package/src/duckdb/src/planner/binder.cpp +44 -31
  444. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  445. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  446. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  447. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  448. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  449. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  450. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  451. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  452. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  453. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  454. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  455. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  456. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  457. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  458. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  459. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  460. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  461. package/src/duckdb/src/storage/data_table.cpp +1 -1
  462. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  463. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  464. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  465. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  466. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  467. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  468. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  469. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  470. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  471. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  472. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  473. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  474. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  475. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  476. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  477. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  478. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  479. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  480. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  481. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  482. package/src/duckdb/ub_src_common.cpp +2 -0
  483. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  484. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  485. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  486. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  487. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  488. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  489. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  490. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  491. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  492. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  493. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  494. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  495. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  496. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  497. package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
  498. package/src/statement.cpp +10 -3
  499. package/test/columns.test.ts +24 -1
  500. package/test/test_all_types.test.ts +234 -0
  501. package/tsconfig.json +1 -0
  502. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  503. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  504. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -22,6 +22,9 @@ struct SelectionVector;
22
22
  //! Generic radix partitioning functions
23
23
  struct RadixPartitioning {
24
24
  public:
25
+ //! 4096 partitions ought to be enough to go out-of-core properly
26
+ static constexpr const idx_t MAX_RADIX_BITS = 12;
27
+
25
28
  //! The number of partitions for a given number of radix bits
26
29
  static inline constexpr idx_t NumberOfPartitions(idx_t radix_bits) {
27
30
  return idx_t(1) << radix_bits;
@@ -38,10 +41,12 @@ public:
38
41
  throw InternalException("RadixPartitioning::RadixBits unable to find partition count!");
39
42
  }
40
43
 
44
+ //! Radix bits begin after uint16_t because these bits are used as salt in the aggregate HT
41
45
  static inline constexpr idx_t Shift(idx_t radix_bits) {
42
- return 48 - radix_bits;
46
+ return (sizeof(hash_t) - sizeof(uint16_t)) * 8 - radix_bits;
43
47
  }
44
48
 
49
+ //! Mask of the radix bits of the hash
45
50
  static inline constexpr hash_t Mask(idx_t radix_bits) {
46
51
  return (hash_t(1 << radix_bits) - 1) << Shift(radix_bits);
47
52
  }
@@ -49,26 +54,6 @@ public:
49
54
  //! Select using a cutoff on the radix bits of the hash
50
55
  static idx_t Select(Vector &hashes, const SelectionVector *sel, idx_t count, idx_t radix_bits, idx_t cutoff,
51
56
  SelectionVector *true_sel, SelectionVector *false_sel);
52
-
53
- //! Convert hashes to bins
54
- static void HashesToBins(Vector &hashes, idx_t radix_bits, Vector &bins, idx_t count);
55
- };
56
-
57
- //! Templated radix partitioning constants, can be templated to the number of radix bits
58
- template <idx_t radix_bits>
59
- struct RadixPartitioningConstants {
60
- public:
61
- //! Bitmask of the upper bits of the 5th byte
62
- static constexpr const idx_t NUM_PARTITIONS = RadixPartitioning::NumberOfPartitions(radix_bits);
63
- static constexpr const idx_t SHIFT = RadixPartitioning::Shift(radix_bits);
64
- static constexpr const hash_t MASK = RadixPartitioning::Mask(radix_bits);
65
-
66
- public:
67
- //! Apply bitmask and right shift to get a number between 0 and NUM_PARTITIONS
68
- static inline hash_t ApplyMask(hash_t hash) {
69
- D_ASSERT((hash & MASK) >> SHIFT < NUM_PARTITIONS);
70
- return (hash & MASK) >> SHIFT;
71
- }
72
57
  };
73
58
 
74
59
  //! RadixPartitionedColumnData is a PartitionedColumnData that partitions input based on the radix of a hash
@@ -13,7 +13,7 @@
13
13
 
14
14
  namespace duckdb {
15
15
 
16
- class Allocator;
16
+ class ArenaAllocator;
17
17
  struct AggregateObject;
18
18
  struct AggregateFilterData;
19
19
  class DataChunk;
@@ -26,10 +26,10 @@ class Vector;
26
26
  struct UnifiedVectorFormat;
27
27
 
28
28
  struct RowOperationsState {
29
- RowOperationsState(Allocator &allocator) : allocator(allocator) {
29
+ explicit RowOperationsState(ArenaAllocator &allocator) : allocator(allocator) {
30
30
  }
31
31
 
32
- Allocator &allocator;
32
+ ArenaAllocator &allocator;
33
33
  };
34
34
 
35
35
  // RowOperations contains a set of operations that operate on data using a RowLayout
@@ -42,8 +42,8 @@ public:
42
42
  using Orders = vector<BoundOrderByNode>;
43
43
  using Types = vector<LogicalType>;
44
44
 
45
- using GroupingPartition = unique_ptr<PartitionedColumnData>;
46
- using GroupingAppend = unique_ptr<PartitionedColumnDataAppendState>;
45
+ using GroupingPartition = unique_ptr<PartitionedTupleData>;
46
+ using GroupingAppend = unique_ptr<PartitionedTupleDataAppendState>;
47
47
 
48
48
  static void GenerateOrderings(Orders &partitions, Orders &orders,
49
49
  const vector<unique_ptr<Expression>> &partition_bys, const Orders &order_bys,
@@ -53,10 +53,14 @@ public:
53
53
  const vector<BoundOrderByNode> &order_bys, const Types &payload_types,
54
54
  const vector<unique_ptr<BaseStatistics>> &partitions_stats, idx_t estimated_cardinality);
55
55
 
56
+ unique_ptr<RadixPartitionedTupleData> CreatePartition(idx_t new_bits) const;
57
+ void SyncPartitioning(const PartitionGlobalSinkState &other);
58
+
56
59
  void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
57
60
  void CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
58
61
 
59
- void BuildSortState(ColumnDataCollection &group_data, PartitionGlobalHashGroup &global_sort);
62
+ void BuildSortState(TupleDataCollection &group_data, GlobalSortState &global_sort) const;
63
+ void BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &global_sort);
60
64
 
61
65
  ClientContext &context;
62
66
  BufferManager &buffer_manager;
@@ -64,9 +68,11 @@ public:
64
68
  mutex lock;
65
69
 
66
70
  // OVER(PARTITION BY...) (hash grouping)
67
- unique_ptr<RadixPartitionedColumnData> grouping_data;
71
+ unique_ptr<RadixPartitionedTupleData> grouping_data;
68
72
  //! Payload plus hash column
69
- Types grouping_types;
73
+ TupleDataLayout grouping_types;
74
+ //! The number of radix bits if this partition is being synced with another
75
+ idx_t fixed_bits;
70
76
 
71
77
  // OVER(...) (sorting)
72
78
  Orders partitions;
@@ -83,6 +89,7 @@ public:
83
89
 
84
90
  // Threading
85
91
  idx_t memory_per_thread;
92
+ idx_t max_bits;
86
93
  atomic<idx_t> count;
87
94
 
88
95
  private:
@@ -102,8 +109,8 @@ public:
102
109
  ExpressionExecutor executor;
103
110
  DataChunk group_chunk;
104
111
  DataChunk payload_chunk;
105
- unique_ptr<PartitionedColumnData> local_partition;
106
- unique_ptr<PartitionedColumnDataAppendState> local_append;
112
+ unique_ptr<PartitionedTupleData> local_partition;
113
+ unique_ptr<PartitionedTupleDataAppendState> local_append;
107
114
 
108
115
  // OVER(...) (sorting)
109
116
  size_t sort_cols;
@@ -127,7 +134,7 @@ class PartitionLocalMergeState;
127
134
 
128
135
  class PartitionGlobalMergeState {
129
136
  public:
130
- using GroupDataPtr = unique_ptr<ColumnDataCollection>;
137
+ using GroupDataPtr = unique_ptr<TupleDataCollection>;
131
138
 
132
139
  PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data, hash_t hash_bin);
133
140
 
@@ -175,10 +182,18 @@ public:
175
182
 
176
183
  class PartitionGlobalMergeStates {
177
184
  public:
185
+ struct Callback {
186
+ virtual bool HasError() const {
187
+ return false;
188
+ }
189
+ };
190
+
178
191
  using PartitionGlobalMergeStatePtr = unique_ptr<PartitionGlobalMergeState>;
179
192
 
180
193
  explicit PartitionGlobalMergeStates(PartitionGlobalSinkState &sink);
181
194
 
195
+ bool ExecuteTask(PartitionLocalMergeState &local_state, Callback &callback);
196
+
182
197
  vector<PartitionGlobalMergeStatePtr> states;
183
198
  };
184
199
 
@@ -16,6 +16,17 @@
16
16
 
17
17
  namespace duckdb {
18
18
 
19
+ #ifndef DUCKDB_QUOTE_DEFINE
20
+ // Preprocessor trick to allow text to be converted to C-string / string
21
+ // Expecte use is:
22
+ // #ifdef SOME_DEFINE
23
+ // string str = DUCKDB_QUOTE_DEFINE(SOME_DEFINE)
24
+ // ...do something with str
25
+ // #endif SOME_DEFINE
26
+ #define DUCKDB_QUOTE_DEFINE_IMPL(x) #x
27
+ #define DUCKDB_QUOTE_DEFINE(x) DUCKDB_QUOTE_DEFINE_IMPL(x)
28
+ #endif
29
+
19
30
  /**
20
31
  * String Utility Functions
21
32
  * Note that these are not the most efficient implementations (i.e., they copy
@@ -24,7 +24,7 @@ struct BatchedChunkScanState {
24
24
  //! Scans over a BatchedDataCollection are ordered by batch index
25
25
  class BatchedDataCollection {
26
26
  public:
27
- DUCKDB_API BatchedDataCollection(vector<LogicalType> types);
27
+ DUCKDB_API BatchedDataCollection(ClientContext &context, vector<LogicalType> types, bool buffer_managed = false);
28
28
 
29
29
  //! Appends a datachunk with the given batch index to the batched collection
30
30
  DUCKDB_API void Append(DataChunk &input, idx_t batch_index);
@@ -51,7 +51,9 @@ private:
51
51
  ColumnDataAppendState append_state;
52
52
  };
53
53
 
54
+ ClientContext &context;
54
55
  vector<LogicalType> types;
56
+ bool buffer_managed;
55
57
  //! The data of the batched chunk collection - a set of batch_index -> ColumnDataCollection pointers
56
58
  map<idx_t, unique_ptr<ColumnDataCollection>> data;
57
59
  //! The last batch collection that was inserted into
@@ -8,8 +8,12 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include "duckdb/common/assert.hpp"
11
12
  #include "duckdb/common/common.hpp"
13
+ #include "duckdb/common/hugeint.hpp"
14
+ #include "duckdb/common/limits.hpp"
12
15
  #include "duckdb/common/types.hpp"
16
+ #include "duckdb/common/types/string_type.hpp"
13
17
 
14
18
  namespace duckdb {
15
19
 
@@ -37,7 +41,33 @@ public:
37
41
  //! Convert a string to a bit. This function should ONLY be called after calling GetBitSize, since it does NOT
38
42
  //! perform data validation.
39
43
  DUCKDB_API static void ToBit(string_t str, string_t &output);
44
+
40
45
  DUCKDB_API static string ToBit(string_t str);
46
+
47
+ //! output needs to have enough space allocated before calling this function (blob size + 1)
48
+ DUCKDB_API static void BlobToBit(string_t blob, string_t &output);
49
+
50
+ DUCKDB_API static string BlobToBit(string_t blob);
51
+
52
+ //! output_str needs to have enough space allocated before calling this function (sizeof(T) + 1)
53
+ template <class T>
54
+ static void NumericToBit(T numeric, string_t &output_str);
55
+
56
+ template <class T>
57
+ static string NumericToBit(T numeric);
58
+
59
+ //! bit is expected to fit inside of output num (bit size <= sizeof(T) + 1)
60
+ template <class T>
61
+ static void BitToNumeric(string_t bit, T &output_num);
62
+
63
+ template <class T>
64
+ static T BitToNumeric(string_t bit);
65
+
66
+ //! bit is expected to fit inside of output_blob (bit size = output_blob + 1)
67
+ static void BitToBlob(string_t bit, string_t &output_blob);
68
+
69
+ static string BitToBlob(string_t bit);
70
+
41
71
  //! Creates a new bitstring of determined length
42
72
  DUCKDB_API static void BitString(const string_t &input, const idx_t &len, string_t &result);
43
73
  DUCKDB_API static void SetEmptyBitString(string_t &target, string_t &input);
@@ -58,5 +88,56 @@ private:
58
88
  static idx_t GetBitInternal(string_t bit_string, idx_t n);
59
89
  static void SetBitInternal(string_t &bit_string, idx_t n, idx_t new_value);
60
90
  static idx_t GetBitIndex(idx_t n);
91
+ static uint8_t GetFirstByte(const string_t &str);
61
92
  };
93
+
94
+ //===--------------------------------------------------------------------===//
95
+ // Bit Template definitions
96
+ //===--------------------------------------------------------------------===//
97
+ template <class T>
98
+ void Bit::NumericToBit(T numeric, string_t &output_str) {
99
+ D_ASSERT(output_str.GetSize() >= sizeof(T) + 1);
100
+
101
+ auto output = output_str.GetDataWriteable();
102
+ auto data = const_data_ptr_cast(&numeric);
103
+
104
+ *output = 0; // set padding to 0
105
+ ++output;
106
+ for (idx_t idx = 0; idx < sizeof(T); ++idx) {
107
+ output[idx] = data[sizeof(T) - idx - 1];
108
+ }
109
+ Bit::Finalize(output_str);
110
+ }
111
+
112
+ template <class T>
113
+ string Bit::NumericToBit(T numeric) {
114
+ auto bit_len = sizeof(T) + 1;
115
+ auto buffer = make_unsafe_uniq_array<char>(bit_len);
116
+ string_t output_str(buffer.get(), bit_len);
117
+ Bit::NumericToBit(numeric, output_str);
118
+ return output_str.GetString();
119
+ }
120
+
121
+ template <class T>
122
+ T Bit::BitToNumeric(string_t bit) {
123
+ T output;
124
+ Bit::BitToNumeric(bit, output);
125
+ return (output);
126
+ }
127
+
128
+ template <class T>
129
+ void Bit::BitToNumeric(string_t bit, T &output_num) {
130
+ D_ASSERT(bit.GetSize() <= sizeof(T) + 1);
131
+
132
+ output_num = 0;
133
+ auto data = const_data_ptr_cast(bit.GetData());
134
+ auto output = data_ptr_cast(&output_num);
135
+
136
+ idx_t padded_byte_idx = sizeof(T) - bit.GetSize() + 1;
137
+ output[sizeof(T) - 1 - padded_byte_idx] = GetFirstByte(bit);
138
+ for (idx_t idx = padded_byte_idx + 1; idx < sizeof(T); ++idx) {
139
+ output[sizeof(T) - 1 - idx] = data[1 + idx - padded_byte_idx];
140
+ }
141
+ }
142
+
62
143
  } // namespace duckdb
@@ -53,7 +53,7 @@ public:
53
53
  void Initialize(ColumnDataAllocator &other);
54
54
  void InitializeChunkState(ChunkManagementState &state, ChunkMetaData &meta_data);
55
55
  data_ptr_t GetDataPointer(ChunkManagementState &state, uint32_t block_id, uint32_t offset);
56
- void UnswizzlePointers(ChunkManagementState &state, Vector &result, uint16_t v_offset, uint16_t count,
56
+ void UnswizzlePointers(ChunkManagementState &state, Vector &result, idx_t v_offset, uint16_t count,
57
57
  uint32_t block_id, uint32_t offset);
58
58
 
59
59
  //! Deletes the block with the given id
@@ -143,7 +143,12 @@ public:
143
143
  //! Initialize the column data collection
144
144
  void Initialize(vector<LogicalType> types);
145
145
 
146
- //! Get a vector of references to every chunk (segment, index in segment), and optionally sort by block id
146
+ //! Get references to the string heaps in this ColumnDataCollection
147
+ vector<shared_ptr<StringHeap>> GetHeapReferences();
148
+ //! Get the allocator type of this ColumnDataCollection
149
+ ColumnDataAllocatorType GetAllocatorType() const;
150
+
151
+ //! Get a vector of the segments in this ColumnDataCollection
147
152
  const vector<unique_ptr<ColumnDataCollectionSegment>> &GetSegments() const;
148
153
 
149
154
  private:
@@ -94,7 +94,7 @@ public:
94
94
  //! The set of child indices
95
95
  vector<VectorDataIndex> child_indices;
96
96
  //! The string heap for the column data collection (only used for IN_MEMORY_ALLOCATOR)
97
- StringHeap heap;
97
+ shared_ptr<StringHeap> heap;
98
98
 
99
99
  public:
100
100
  void AllocateNewChunk();
@@ -20,7 +20,9 @@ enum class ColumnDataAllocatorType : uint8_t {
20
20
  BUFFER_MANAGER_ALLOCATOR,
21
21
  //! Use an in-memory allocator, allocating data for every chunk
22
22
  //! This causes the column data collection to allocate blocks that are not tied to a buffer manager
23
- IN_MEMORY_ALLOCATOR
23
+ IN_MEMORY_ALLOCATOR,
24
+ //! Use a buffer manager to allocate vectors, but use a StringHeap for strings
25
+ HYBRID
24
26
  };
25
27
 
26
28
  enum class ColumnDataScanProperties : uint8_t {
@@ -153,7 +153,7 @@ public:
153
153
 
154
154
  //! Converts this DataChunk to a printable string representation
155
155
  DUCKDB_API string ToString() const;
156
- DUCKDB_API void Print();
156
+ DUCKDB_API void Print() const;
157
157
 
158
158
  DataChunk(const DataChunk &) = delete;
159
159
 
@@ -72,13 +72,13 @@ struct date_t { // NOLINT
72
72
  };
73
73
 
74
74
  // special values
75
- static inline date_t infinity() {
75
+ static inline date_t infinity() { // NOLINT
76
76
  return date_t(NumericLimits<int32_t>::Maximum());
77
- } // NOLINT
78
- static inline date_t ninfinity() {
77
+ } // NOLINT
78
+ static inline date_t ninfinity() { // NOLINT
79
79
  return date_t(-NumericLimits<int32_t>::Maximum());
80
- } // NOLINT
81
- static inline date_t epoch() {
80
+ } // NOLINT
81
+ static inline date_t epoch() { // NOLINT
82
82
  return date_t(0);
83
83
  } // NOLINT
84
84
  };
@@ -158,6 +158,8 @@ public:
158
158
  DUCKDB_API static int64_t EpochNanoseconds(date_t date);
159
159
  //! Extract the epoch from the date (microseconds since 1970-01-01)
160
160
  DUCKDB_API static int64_t EpochMicroseconds(date_t date);
161
+ //! Extract the epoch from the date (milliseconds since 1970-01-01)
162
+ DUCKDB_API static int64_t EpochMilliseconds(date_t date);
161
163
  //! Convert the epoch (seconds since 1970-01-01) to a date_t
162
164
  DUCKDB_API static date_t EpochToDate(int64_t epoch);
163
165
 
@@ -34,28 +34,26 @@ struct LinkedList {
34
34
 
35
35
  // forward declarations
36
36
  struct ListSegmentFunctions;
37
- typedef ListSegment *(*create_segment_t)(const ListSegmentFunctions &functions, Allocator &allocator,
37
+ typedef ListSegment *(*create_segment_t)(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
38
38
  uint16_t capacity);
39
- typedef void (*write_data_to_segment_t)(const ListSegmentFunctions &functions, Allocator &allocator,
39
+ typedef void (*write_data_to_segment_t)(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
40
40
  ListSegment *segment, Vector &input, idx_t &entry_idx, idx_t &count);
41
41
  typedef void (*read_data_from_segment_t)(const ListSegmentFunctions &functions, const ListSegment *segment,
42
42
  Vector &result, idx_t &total_count);
43
43
  typedef ListSegment *(*copy_data_from_segment_t)(const ListSegmentFunctions &functions, const ListSegment *source,
44
- Allocator &allocator);
45
- typedef void (*destroy_segment_t)(const ListSegmentFunctions &functions, ListSegment *segment, Allocator &allocator);
44
+ ArenaAllocator &allocator);
46
45
 
47
46
  struct ListSegmentFunctions {
48
47
  create_segment_t create_segment;
49
48
  write_data_to_segment_t write_data;
50
49
  read_data_from_segment_t read_data;
51
50
  copy_data_from_segment_t copy_data;
52
- destroy_segment_t destroy;
53
51
  vector<ListSegmentFunctions> child_functions;
54
52
 
55
- void AppendRow(Allocator &allocator, LinkedList &linked_list, Vector &input, idx_t &entry_idx, idx_t &count) const;
53
+ void AppendRow(ArenaAllocator &allocator, LinkedList &linked_list, Vector &input, idx_t &entry_idx,
54
+ idx_t &count) const;
56
55
  void BuildListVector(const LinkedList &linked_list, Vector &result, idx_t &initial_total_count) const;
57
- void CopyLinkedList(const LinkedList &source_list, LinkedList &target_list, Allocator &allocator) const;
58
- void Destroy(Allocator &allocator, LinkedList &linked_list) const;
56
+ void CopyLinkedList(const LinkedList &source_list, LinkedList &target_list, ArenaAllocator &allocator) const;
59
57
  };
60
58
 
61
59
  void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType &type);
@@ -48,7 +48,6 @@ struct PartitionTupleDataAllocators {
48
48
  //! partitioning, e.g., radix, hive
49
49
  class PartitionedTupleData {
50
50
  public:
51
- unique_ptr<PartitionedTupleData> CreateShared();
52
51
  virtual ~PartitionedTupleData();
53
52
 
54
53
  public:
@@ -124,7 +123,11 @@ protected:
124
123
  void BuildBufferSpace(PartitionedTupleDataAppendState &state);
125
124
  //! Create a collection for a specific a partition
126
125
  unique_ptr<TupleDataCollection> CreatePartitionCollection(idx_t partition_index) const {
127
- return make_uniq<TupleDataCollection>(allocators->allocators[partition_index]);
126
+ if (allocators) {
127
+ return make_uniq<TupleDataCollection>(allocators->allocators[partition_index]);
128
+ } else {
129
+ return make_uniq<TupleDataCollection>(buffer_manager, layout);
130
+ }
128
131
  }
129
132
 
130
133
  protected:
@@ -17,6 +17,7 @@ namespace duckdb {
17
17
  class TupleDataAllocator;
18
18
  struct TupleDataScatterFunction;
19
19
  struct TupleDataGatherFunction;
20
+ struct RowOperationsState;
20
21
 
21
22
  typedef void (*tuple_data_scatter_function_t)(const Vector &source, const TupleDataVectorFormat &source_format,
22
23
  const SelectionVector &append_sel, const idx_t append_count,
@@ -38,6 +38,9 @@ struct CombinedListData {
38
38
  };
39
39
 
40
40
  struct TupleDataVectorFormat {
41
+ const SelectionVector *original_sel;
42
+ SelectionVector original_owned_sel;
43
+
41
44
  UnifiedVectorFormat data;
42
45
  vector<TupleDataVectorFormat> child_formats;
43
46
  unique_ptr<CombinedListData> combined_list_data;
@@ -83,6 +83,10 @@ public:
83
83
  return value.pointer.prefix;
84
84
  }
85
85
 
86
+ char *GetPrefixWriteable() const {
87
+ return (char *)value.pointer.prefix;
88
+ }
89
+
86
90
  idx_t GetSize() const {
87
91
  return value.inlined.length;
88
92
  }
@@ -95,6 +99,11 @@ public:
95
99
  return GetString();
96
100
  }
97
101
 
102
+ char *GetPointer() const {
103
+ D_ASSERT(!IsInlined());
104
+ return value.pointer.ptr;
105
+ }
106
+
98
107
  void SetPointer(char *new_ptr) {
99
108
  D_ASSERT(!IsInlined());
100
109
  value.pointer.ptr = new_ptr;
@@ -394,6 +394,7 @@ struct ListValue {
394
394
  struct UnionValue {
395
395
  DUCKDB_API static const Value &GetValue(const Value &value);
396
396
  DUCKDB_API static uint8_t GetTag(const Value &value);
397
+ DUCKDB_API static const LogicalType &GetType(const Value &value);
397
398
  };
398
399
 
399
400
  //! Return the internal integral value for any type that is stored as an integral value internally
@@ -27,19 +27,6 @@ class Vector;
27
27
  class ClientContext;
28
28
  class FieldWriter;
29
29
 
30
- //! Extra Type Info Type
31
- enum class ExtraTypeInfoType : uint8_t {
32
- INVALID_TYPE_INFO = 0,
33
- GENERIC_TYPE_INFO = 1,
34
- DECIMAL_TYPE_INFO = 2,
35
- STRING_TYPE_INFO = 3,
36
- LIST_TYPE_INFO = 4,
37
- STRUCT_TYPE_INFO = 5,
38
- ENUM_TYPE_INFO = 6,
39
- USER_TYPE_INFO = 7,
40
- AGGREGATE_STATE_TYPE_INFO = 8
41
- };
42
-
43
30
  struct string_t;
44
31
 
45
32
  template <class T>
@@ -328,8 +315,6 @@ struct LogicalType {
328
315
 
329
316
  DUCKDB_API static LogicalType MaxLogicalType(const LogicalType &left, const LogicalType &right);
330
317
 
331
- DUCKDB_API static ExtraTypeInfoType GetExtraTypeInfoType(const ExtraTypeInfo &type);
332
-
333
318
  //! Gets the decimal properties of a numeric type. Fails if the type is not numeric.
334
319
  DUCKDB_API bool GetDecimalProperties(uint8_t &width, uint8_t &scale) const;
335
320
 
@@ -480,6 +465,7 @@ bool ApproxEqual(float l, float r);
480
465
  bool ApproxEqual(double l, double r);
481
466
 
482
467
  struct aggregate_state_t {
468
+ aggregate_state_t() {}
483
469
  aggregate_state_t(string function_name_p, LogicalType return_type_p, vector<LogicalType> bound_argument_types_p) : function_name(std::move(function_name_p)), return_type(std::move(return_type_p)), bound_argument_types(std::move(bound_argument_types_p)) {
484
470
  }
485
471