duckdb 0.7.2-dev12.0 → 0.7.2-dev1238.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (631) hide show
  1. package/binding.gyp +12 -7
  2. package/lib/duckdb.d.ts +55 -2
  3. package/lib/duckdb.js +20 -1
  4. package/package.json +1 -1
  5. package/src/connection.cpp +1 -2
  6. package/src/database.cpp +1 -1
  7. package/src/duckdb/extension/icu/icu-extension.cpp +4 -0
  8. package/src/duckdb/extension/icu/icu-list-range.cpp +207 -0
  9. package/src/duckdb/extension/icu/icu-table-range.cpp +194 -0
  10. package/src/duckdb/extension/icu/include/icu-list-range.hpp +17 -0
  11. package/src/duckdb/extension/icu/include/icu-table-range.hpp +17 -0
  12. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  13. package/src/duckdb/extension/json/include/json_common.hpp +1 -0
  14. package/src/duckdb/extension/json/include/json_functions.hpp +2 -0
  15. package/src/duckdb/extension/json/include/json_serializer.hpp +77 -0
  16. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +147 -0
  17. package/src/duckdb/extension/json/json_functions/read_json.cpp +6 -5
  18. package/src/duckdb/extension/json/json_functions.cpp +12 -4
  19. package/src/duckdb/extension/json/json_scan.cpp +2 -2
  20. package/src/duckdb/extension/json/json_serializer.cpp +217 -0
  21. package/src/duckdb/extension/parquet/column_reader.cpp +94 -15
  22. package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
  23. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
  24. package/src/duckdb/extension/parquet/include/decode_utils.hpp +5 -4
  25. package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
  26. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +2 -1
  27. package/src/duckdb/extension/parquet/parquet-extension.cpp +14 -3
  28. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -1
  29. package/src/duckdb/extension/parquet/parquet_statistics.cpp +49 -36
  30. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +16 -6
  31. package/src/duckdb/src/catalog/catalog.cpp +34 -5
  32. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +4 -0
  33. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -21
  34. package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +7 -6
  35. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +3 -3
  36. package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +20 -1
  37. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +8 -2
  38. package/src/duckdb/src/catalog/catalog_set.cpp +1 -0
  39. package/src/duckdb/src/catalog/default/default_functions.cpp +3 -0
  40. package/src/duckdb/src/catalog/dependency_list.cpp +12 -0
  41. package/src/duckdb/src/catalog/duck_catalog.cpp +34 -7
  42. package/src/duckdb/src/common/arrow/arrow_appender.cpp +48 -4
  43. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  44. package/src/duckdb/src/common/box_renderer.cpp +109 -23
  45. package/src/duckdb/src/common/enums/expression_type.cpp +8 -222
  46. package/src/duckdb/src/common/enums/join_type.cpp +3 -22
  47. package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
  48. package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
  49. package/src/duckdb/src/common/exception.cpp +15 -1
  50. package/src/duckdb/src/common/field_writer.cpp +1 -0
  51. package/src/duckdb/src/common/hive_partitioning.cpp +3 -1
  52. package/src/duckdb/src/common/local_file_system.cpp +64 -7
  53. package/src/duckdb/src/common/operator/cast_operators.cpp +1 -1
  54. package/src/duckdb/src/common/preserved_error.cpp +7 -5
  55. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +7 -0
  56. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +4 -0
  57. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +15 -2
  58. package/src/duckdb/src/common/serializer/enum_serializer.cpp +1176 -0
  59. package/src/duckdb/src/common/sort/comparators.cpp +14 -5
  60. package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
  61. package/src/duckdb/src/common/sort/sorted_block.cpp +0 -1
  62. package/src/duckdb/src/common/string_util.cpp +18 -1
  63. package/src/duckdb/src/common/types/bit.cpp +166 -87
  64. package/src/duckdb/src/common/types/blob.cpp +1 -1
  65. package/src/duckdb/src/common/types/chunk_collection.cpp +2 -2
  66. package/src/duckdb/src/common/types/column_data_collection.cpp +39 -2
  67. package/src/duckdb/src/common/types/column_data_collection_segment.cpp +12 -10
  68. package/src/duckdb/src/common/types/data_chunk.cpp +1 -1
  69. package/src/duckdb/src/common/types/interval.cpp +0 -41
  70. package/src/duckdb/src/common/types/list_segment.cpp +658 -0
  71. package/src/duckdb/src/common/types/string_heap.cpp +1 -1
  72. package/src/duckdb/src/common/types/string_type.cpp +1 -1
  73. package/src/duckdb/src/common/types/time.cpp +13 -0
  74. package/src/duckdb/src/common/types/validity_mask.cpp +24 -7
  75. package/src/duckdb/src/common/types/value.cpp +320 -154
  76. package/src/duckdb/src/common/types/vector.cpp +158 -134
  77. package/src/duckdb/src/common/types.cpp +313 -153
  78. package/src/duckdb/src/common/value_operations/comparison_operations.cpp +14 -22
  79. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +10 -10
  80. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +11 -10
  81. package/src/duckdb/src/common/vector_operations/vector_cast.cpp +2 -1
  82. package/src/duckdb/src/execution/aggregate_hashtable.cpp +98 -74
  83. package/src/duckdb/src/execution/column_binding_resolver.cpp +21 -5
  84. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -1
  85. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
  86. package/src/duckdb/src/execution/index/art/art.cpp +19 -5
  87. package/src/duckdb/src/execution/join_hashtable.cpp +3 -1
  88. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +1 -1
  89. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
  90. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +117 -26
  91. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +3 -0
  92. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +5 -3
  93. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -17
  94. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +2 -0
  95. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +2 -2
  96. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +13 -4
  97. package/src/duckdb/src/execution/operator/join/physical_join.cpp +0 -3
  98. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +6 -11
  99. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +3 -1
  100. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +11 -4
  101. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +24 -19
  102. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +3 -0
  103. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -1
  104. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +2 -2
  105. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -3
  106. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -0
  107. package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +34 -0
  108. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +20 -5
  109. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +20 -40
  110. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +2 -5
  111. package/src/duckdb/src/execution/partitionable_hashtable.cpp +20 -5
  112. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +22 -16
  113. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +97 -0
  114. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +95 -47
  115. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +2 -1
  116. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +5 -8
  117. package/src/duckdb/src/execution/physical_plan/plan_positional_join.cpp +14 -5
  118. package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -0
  119. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +23 -15
  120. package/src/duckdb/src/execution/window_segment_tree.cpp +173 -1
  121. package/src/duckdb/src/function/aggregate/algebraic/avg.cpp +0 -6
  122. package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +99 -95
  123. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +269 -0
  124. package/src/duckdb/src/function/aggregate/distributive/bool.cpp +2 -0
  125. package/src/duckdb/src/function/aggregate/distributive/count.cpp +3 -4
  126. package/src/duckdb/src/function/aggregate/distributive/first.cpp +1 -0
  127. package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -0
  128. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +19 -16
  129. package/src/duckdb/src/function/aggregate/distributive_functions.cpp +1 -0
  130. package/src/duckdb/src/function/aggregate/holistic/approximate_quantile.cpp +5 -2
  131. package/src/duckdb/src/function/aggregate/holistic/mode.cpp +1 -1
  132. package/src/duckdb/src/function/aggregate/holistic/quantile.cpp +16 -1
  133. package/src/duckdb/src/function/aggregate/nested/list.cpp +6 -712
  134. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +138 -45
  135. package/src/duckdb/src/function/cast/bit_cast.cpp +0 -2
  136. package/src/duckdb/src/function/cast/blob_cast.cpp +0 -1
  137. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -1
  138. package/src/duckdb/src/function/cast/enum_casts.cpp +25 -3
  139. package/src/duckdb/src/function/cast/list_casts.cpp +17 -4
  140. package/src/duckdb/src/function/cast/map_cast.cpp +5 -2
  141. package/src/duckdb/src/function/cast/string_cast.cpp +36 -10
  142. package/src/duckdb/src/function/cast/struct_cast.cpp +24 -4
  143. package/src/duckdb/src/function/cast/time_casts.cpp +2 -2
  144. package/src/duckdb/src/function/cast/union_casts.cpp +33 -7
  145. package/src/duckdb/src/function/cast_rules.cpp +9 -4
  146. package/src/duckdb/src/function/function_binder.cpp +1 -8
  147. package/src/duckdb/src/function/pragma/pragma_queries.cpp +24 -1
  148. package/src/duckdb/src/function/scalar/bit/bitstring.cpp +100 -0
  149. package/src/duckdb/src/function/scalar/date/current.cpp +0 -2
  150. package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
  151. package/src/duckdb/src/function/scalar/date/date_part.cpp +18 -26
  152. package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
  153. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
  154. package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
  155. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +4 -146
  156. package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
  157. package/src/duckdb/src/function/scalar/list/list_aggregates.cpp +1 -1
  158. package/src/duckdb/src/function/scalar/list/list_concat.cpp +8 -12
  159. package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
  160. package/src/duckdb/src/function/scalar/list/list_lambdas.cpp +7 -3
  161. package/src/duckdb/src/function/scalar/list/list_sort.cpp +25 -18
  162. package/src/duckdb/src/function/scalar/list/list_value.cpp +6 -10
  163. package/src/duckdb/src/function/scalar/map/map.cpp +47 -1
  164. package/src/duckdb/src/function/scalar/map/map_entries.cpp +61 -0
  165. package/src/duckdb/src/function/scalar/map/map_extract.cpp +68 -26
  166. package/src/duckdb/src/function/scalar/map/map_keys_values.cpp +97 -0
  167. package/src/duckdb/src/function/scalar/math/numeric.cpp +101 -17
  168. package/src/duckdb/src/function/scalar/math_functions.cpp +3 -0
  169. package/src/duckdb/src/function/scalar/nested_functions.cpp +3 -0
  170. package/src/duckdb/src/function/scalar/operators/add.cpp +0 -9
  171. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +29 -48
  172. package/src/duckdb/src/function/scalar/operators/bitwise.cpp +0 -63
  173. package/src/duckdb/src/function/scalar/operators/multiply.cpp +5 -6
  174. package/src/duckdb/src/function/scalar/operators/subtract.cpp +0 -6
  175. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
  176. package/src/duckdb/src/function/scalar/string/hex.cpp +201 -0
  177. package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
  178. package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
  179. package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
  180. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +243 -0
  181. package/src/duckdb/src/function/scalar/string/regexp/regexp_util.cpp +79 -0
  182. package/src/duckdb/src/function/scalar/string/regexp.cpp +21 -80
  183. package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
  184. package/src/duckdb/src/function/scalar/string_functions.cpp +2 -0
  185. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +5 -10
  186. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +11 -14
  187. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +6 -7
  188. package/src/duckdb/src/function/table/arrow.cpp +5 -2
  189. package/src/duckdb/src/function/table/arrow_conversion.cpp +25 -1
  190. package/src/duckdb/src/function/table/checkpoint.cpp +5 -1
  191. package/src/duckdb/src/function/table/read_csv.cpp +60 -0
  192. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +2 -2
  193. package/src/duckdb/src/function/table/system/test_all_types.cpp +2 -2
  194. package/src/duckdb/src/function/table/table_scan.cpp +9 -12
  195. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  196. package/src/duckdb/src/function/table_function.cpp +30 -11
  197. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +6 -0
  198. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
  199. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +6 -8
  200. package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +3 -0
  201. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -1
  202. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +8 -2
  203. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -19
  204. package/src/duckdb/src/include/duckdb/common/enums/aggregate_handling.hpp +2 -0
  205. package/src/duckdb/src/include/duckdb/common/enums/expression_type.hpp +2 -3
  206. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +7 -4
  207. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
  208. package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +2 -0
  209. package/src/duckdb/src/include/duckdb/common/enums/set_operation_type.hpp +2 -1
  210. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -1
  211. package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
  212. package/src/duckdb/src/include/duckdb/common/exception.hpp +69 -2
  213. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +12 -4
  214. package/src/duckdb/src/include/duckdb/common/helper.hpp +1 -1
  215. package/src/duckdb/src/include/duckdb/common/{http_stats.hpp → http_state.hpp} +18 -4
  216. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +45 -149
  217. package/src/duckdb/src/include/duckdb/common/operator/multiply.hpp +2 -0
  218. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +45 -0
  219. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +6 -1
  220. package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +2 -0
  221. package/src/duckdb/src/include/duckdb/common/serializer/buffered_deserializer.hpp +4 -2
  222. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +8 -2
  223. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +113 -0
  224. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +336 -0
  225. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +268 -0
  226. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +126 -0
  227. package/src/duckdb/src/include/duckdb/common/serializer.hpp +13 -0
  228. package/src/duckdb/src/include/duckdb/common/string_util.hpp +27 -0
  229. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +12 -7
  230. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +39 -3
  231. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +70 -0
  232. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +73 -3
  233. package/src/duckdb/src/include/duckdb/common/types/time.hpp +3 -0
  234. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
  235. package/src/duckdb/src/include/duckdb/common/types/value.hpp +17 -48
  236. package/src/duckdb/src/include/duckdb/common/types/value_map.hpp +1 -1
  237. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -1
  238. package/src/duckdb/src/include/duckdb/common/types.hpp +45 -8
  239. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +2 -2
  240. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +35 -20
  241. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +3 -14
  242. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  243. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +2 -0
  244. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +1 -0
  245. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +10 -0
  246. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +5 -0
  247. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  248. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +1 -3
  249. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +54 -0
  250. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +5 -0
  251. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +18 -6
  252. package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +84 -0
  253. package/src/duckdb/src/include/duckdb/function/cast/cast_function_set.hpp +2 -2
  254. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +28 -64
  255. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -6
  256. package/src/duckdb/src/include/duckdb/function/scalar/bit_functions.hpp +4 -0
  257. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +138 -0
  258. package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +8 -0
  259. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +59 -0
  260. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +81 -1
  261. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
  262. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
  263. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +12 -1
  264. package/src/duckdb/src/include/duckdb/function/table_function.hpp +10 -0
  265. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
  266. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  267. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -3
  268. package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
  269. package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +2 -0
  270. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -0
  271. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
  272. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +2 -0
  273. package/src/duckdb/src/include/duckdb/main/relation/explain_relation.hpp +2 -1
  274. package/src/duckdb/src/include/duckdb/main/relation.hpp +2 -1
  275. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
  276. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
  277. package/src/duckdb/src/include/duckdb/optimizer/rule/list.hpp +1 -0
  278. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +24 -0
  279. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +4 -0
  280. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  281. package/src/duckdb/src/include/duckdb/parser/expression/bound_expression.hpp +2 -0
  282. package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +5 -0
  283. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +2 -0
  284. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +2 -0
  285. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +2 -0
  286. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -0
  287. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +2 -0
  288. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  289. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  290. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -2
  291. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +2 -0
  292. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +2 -0
  293. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +2 -0
  294. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +2 -0
  295. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +4 -2
  296. package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +2 -0
  297. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +5 -0
  298. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +5 -1
  299. package/src/duckdb/src/include/duckdb/parser/parsed_data/{alter_function_info.hpp → alter_scalar_function_info.hpp} +13 -13
  300. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +47 -0
  301. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +6 -0
  302. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_function_info.hpp +2 -1
  303. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +2 -0
  304. package/src/duckdb/src/include/duckdb/parser/parsed_expression.hpp +5 -0
  305. package/src/duckdb/src/include/duckdb/parser/query_node/recursive_cte_node.hpp +3 -0
  306. package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +5 -0
  307. package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +3 -0
  308. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +13 -2
  309. package/src/duckdb/src/include/duckdb/parser/result_modifier.hpp +24 -1
  310. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -1
  311. package/src/duckdb/src/include/duckdb/parser/statement/multi_statement.hpp +28 -0
  312. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +6 -1
  313. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +4 -0
  314. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +2 -0
  315. package/src/duckdb/src/include/duckdb/parser/tableref/expressionlistref.hpp +3 -0
  316. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +3 -0
  317. package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
  318. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +87 -0
  319. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  320. package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +3 -0
  321. package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
  322. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -0
  323. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +33 -0
  324. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
  325. package/src/duckdb/src/include/duckdb/planner/binder.hpp +15 -4
  326. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +3 -0
  327. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
  328. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +64 -0
  329. package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -2
  330. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -1
  331. package/src/duckdb/src/include/duckdb/planner/expression_binder/qualify_binder.hpp +2 -2
  332. package/src/duckdb/src/include/duckdb/planner/expression_binder/select_binder.hpp +9 -38
  333. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +1 -1
  334. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  335. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -0
  336. package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +22 -0
  337. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -2
  338. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +3 -0
  339. package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +8 -2
  340. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +2 -0
  341. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +76 -44
  342. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
  343. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +1 -1
  344. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
  345. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  346. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +2 -1
  347. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
  348. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  349. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +2 -1
  350. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +4 -3
  351. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +4 -3
  352. package/src/duckdb/src/include/duckdb/storage/index.hpp +5 -4
  353. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +7 -0
  354. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -29
  355. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
  356. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +8 -6
  357. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
  358. package/src/duckdb/src/include/duckdb/storage/statistics/node_statistics.hpp +26 -0
  359. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +114 -0
  360. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +62 -0
  361. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
  362. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
  363. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
  364. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
  365. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
  366. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +21 -7
  367. package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +3 -2
  368. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +5 -6
  369. package/src/duckdb/src/include/duckdb/storage/table/column_segment_tree.hpp +18 -0
  370. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
  371. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +6 -3
  372. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +41 -45
  373. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +23 -7
  374. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +35 -0
  375. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +21 -29
  376. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +6 -6
  377. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +281 -26
  378. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +0 -4
  379. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  380. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +0 -1
  381. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +1 -1
  382. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +6 -3
  383. package/src/duckdb/src/include/duckdb.h +71 -2
  384. package/src/duckdb/src/include/duckdb.hpp +0 -1
  385. package/src/duckdb/src/main/capi/pending-c.cpp +16 -3
  386. package/src/duckdb/src/main/capi/result-c.cpp +27 -1
  387. package/src/duckdb/src/main/capi/stream-c.cpp +25 -0
  388. package/src/duckdb/src/main/capi/table_function-c.cpp +23 -0
  389. package/src/duckdb/src/main/client_context.cpp +38 -34
  390. package/src/duckdb/src/main/client_data.cpp +7 -6
  391. package/src/duckdb/src/main/config.cpp +70 -1
  392. package/src/duckdb/src/main/database.cpp +19 -2
  393. package/src/duckdb/src/main/extension/extension_install.cpp +7 -2
  394. package/src/duckdb/src/main/prepared_statement.cpp +4 -0
  395. package/src/duckdb/src/main/query_profiler.cpp +17 -15
  396. package/src/duckdb/src/main/relation/explain_relation.cpp +3 -3
  397. package/src/duckdb/src/main/relation.cpp +3 -2
  398. package/src/duckdb/src/main/settings/settings.cpp +20 -8
  399. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  400. package/src/duckdb/src/optimizer/deliminator.cpp +1 -1
  401. package/src/duckdb/src/optimizer/filter_combiner.cpp +3 -6
  402. package/src/duckdb/src/optimizer/filter_pullup.cpp +3 -1
  403. package/src/duckdb/src/optimizer/filter_pushdown.cpp +14 -8
  404. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +107 -71
  405. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +32 -12
  406. package/src/duckdb/src/optimizer/optimizer.cpp +1 -0
  407. package/src/duckdb/src/optimizer/pullup/pullup_from_left.cpp +2 -2
  408. package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +33 -5
  409. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -1
  410. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +3 -0
  411. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +5 -12
  412. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +2 -2
  413. package/src/duckdb/src/optimizer/pushdown/pushdown_single_join.cpp +1 -1
  414. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +1 -0
  415. package/src/duckdb/src/optimizer/rule/move_constants.cpp +10 -4
  416. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +30 -0
  417. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +9 -2
  418. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
  419. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
  420. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
  421. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  422. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
  423. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
  424. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
  425. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
  426. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
  427. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
  428. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +29 -32
  429. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +5 -5
  430. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
  431. package/src/duckdb/src/optimizer/statistics_propagator.cpp +2 -1
  432. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +2 -2
  433. package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -7
  434. package/src/duckdb/src/parser/common_table_expression_info.cpp +19 -0
  435. package/src/duckdb/src/parser/expression/between_expression.cpp +17 -0
  436. package/src/duckdb/src/parser/expression/case_expression.cpp +28 -0
  437. package/src/duckdb/src/parser/expression/cast_expression.cpp +17 -0
  438. package/src/duckdb/src/parser/expression/collate_expression.cpp +16 -0
  439. package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -0
  440. package/src/duckdb/src/parser/expression/comparison_expression.cpp +16 -0
  441. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +17 -0
  442. package/src/duckdb/src/parser/expression/constant_expression.cpp +14 -0
  443. package/src/duckdb/src/parser/expression/default_expression.cpp +7 -0
  444. package/src/duckdb/src/parser/expression/function_expression.cpp +35 -0
  445. package/src/duckdb/src/parser/expression/lambda_expression.cpp +16 -0
  446. package/src/duckdb/src/parser/expression/operator_expression.cpp +15 -0
  447. package/src/duckdb/src/parser/expression/parameter_expression.cpp +15 -0
  448. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +14 -0
  449. package/src/duckdb/src/parser/expression/star_expression.cpp +26 -6
  450. package/src/duckdb/src/parser/expression/subquery_expression.cpp +20 -0
  451. package/src/duckdb/src/parser/expression/window_expression.cpp +43 -0
  452. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +7 -3
  453. package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +56 -0
  454. package/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +51 -0
  455. package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
  456. package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +6 -0
  457. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +22 -10
  458. package/src/duckdb/src/parser/parsed_expression.cpp +72 -0
  459. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +15 -1
  460. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +21 -0
  461. package/src/duckdb/src/parser/query_node/select_node.cpp +31 -0
  462. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +17 -0
  463. package/src/duckdb/src/parser/query_node.cpp +51 -1
  464. package/src/duckdb/src/parser/result_modifier.cpp +78 -0
  465. package/src/duckdb/src/parser/statement/multi_statement.cpp +18 -0
  466. package/src/duckdb/src/parser/statement/select_statement.cpp +12 -0
  467. package/src/duckdb/src/parser/tableref/basetableref.cpp +21 -0
  468. package/src/duckdb/src/parser/tableref/emptytableref.cpp +4 -0
  469. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +17 -0
  470. package/src/duckdb/src/parser/tableref/joinref.cpp +29 -0
  471. package/src/duckdb/src/parser/tableref/pivotref.cpp +373 -0
  472. package/src/duckdb/src/parser/tableref/subqueryref.cpp +15 -0
  473. package/src/duckdb/src/parser/tableref/table_function.cpp +17 -0
  474. package/src/duckdb/src/parser/tableref.cpp +49 -0
  475. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +11 -0
  476. package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +1 -1
  477. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +17 -2
  478. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +85 -42
  479. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
  480. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
  481. package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +12 -6
  482. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +24 -0
  483. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +7 -0
  484. package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +0 -7
  485. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +3 -2
  486. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +4 -0
  487. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +4 -0
  488. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +179 -0
  489. package/src/duckdb/src/parser/transform/statement/transform_rename.cpp +3 -4
  490. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -0
  491. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +2 -3
  492. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +12 -1
  493. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +121 -0
  494. package/src/duckdb/src/parser/transform/tableref/transform_tableref.cpp +2 -0
  495. package/src/duckdb/src/parser/transformer.cpp +15 -3
  496. package/src/duckdb/src/planner/bind_context.cpp +18 -25
  497. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +9 -7
  498. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +4 -3
  499. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +23 -12
  500. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +3 -2
  501. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +176 -0
  502. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +4 -0
  503. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +163 -24
  504. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +2 -2
  505. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +109 -94
  506. package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +11 -0
  507. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +9 -4
  508. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +5 -3
  509. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +3 -2
  510. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +10 -1
  511. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  512. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +12 -8
  513. package/src/duckdb/src/planner/binder/statement/bind_logical_plan.cpp +17 -0
  514. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +4 -2
  515. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +19 -3
  516. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +366 -0
  517. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +11 -1
  518. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -0
  519. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -13
  520. package/src/duckdb/src/planner/binder.cpp +19 -24
  521. package/src/duckdb/src/planner/bound_result_modifier.cpp +27 -1
  522. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +9 -2
  523. package/src/duckdb/src/planner/expression/bound_expression.cpp +4 -0
  524. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
  525. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +146 -0
  526. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +6 -3
  527. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +3 -3
  528. package/src/duckdb/src/planner/expression_binder/select_binder.cpp +1 -132
  529. package/src/duckdb/src/planner/expression_binder.cpp +10 -3
  530. package/src/duckdb/src/planner/expression_iterator.cpp +17 -10
  531. package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
  532. package/src/duckdb/src/planner/logical_operator.cpp +7 -2
  533. package/src/duckdb/src/planner/logical_operator_visitor.cpp +6 -0
  534. package/src/duckdb/src/planner/operator/logical_asof_join.cpp +8 -0
  535. package/src/duckdb/src/planner/operator/logical_distinct.cpp +3 -0
  536. package/src/duckdb/src/planner/planner.cpp +2 -1
  537. package/src/duckdb/src/planner/pragma_handler.cpp +10 -2
  538. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +3 -1
  539. package/src/duckdb/src/storage/buffer_manager.cpp +44 -46
  540. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  541. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +4 -15
  542. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +10 -4
  543. package/src/duckdb/src/storage/checkpoint_manager.cpp +9 -3
  544. package/src/duckdb/src/storage/compression/bitpacking.cpp +29 -25
  545. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +45 -46
  546. package/src/duckdb/src/storage/compression/numeric_constant.cpp +10 -11
  547. package/src/duckdb/src/storage/compression/patas.cpp +1 -1
  548. package/src/duckdb/src/storage/compression/rle.cpp +20 -15
  549. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +6 -6
  550. package/src/duckdb/src/storage/data_table.cpp +23 -23
  551. package/src/duckdb/src/storage/index.cpp +12 -1
  552. package/src/duckdb/src/storage/local_storage.cpp +27 -23
  553. package/src/duckdb/src/storage/meta_block_reader.cpp +22 -0
  554. package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
  555. package/src/duckdb/src/storage/statistics/column_statistics.cpp +57 -3
  556. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +8 -9
  557. package/src/duckdb/src/storage/statistics/list_stats.cpp +121 -0
  558. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +591 -0
  559. package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +65 -0
  560. package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
  561. package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
  562. package/src/duckdb/src/storage/statistics/struct_stats.cpp +133 -0
  563. package/src/duckdb/src/storage/storage_info.cpp +2 -2
  564. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +4 -10
  565. package/src/duckdb/src/storage/table/column_data.cpp +118 -62
  566. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +10 -9
  567. package/src/duckdb/src/storage/table/column_segment.cpp +30 -45
  568. package/src/duckdb/src/storage/table/list_column_data.cpp +50 -71
  569. package/src/duckdb/src/storage/table/persistent_table_data.cpp +2 -1
  570. package/src/duckdb/src/storage/table/row_group.cpp +213 -143
  571. package/src/duckdb/src/storage/table/row_group_collection.cpp +151 -105
  572. package/src/duckdb/src/storage/table/scan_state.cpp +45 -33
  573. package/src/duckdb/src/storage/table/standard_column_data.cpp +11 -12
  574. package/src/duckdb/src/storage/table/struct_column_data.cpp +27 -34
  575. package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
  576. package/src/duckdb/src/storage/table/update_segment.cpp +23 -18
  577. package/src/duckdb/src/storage/wal_replay.cpp +8 -5
  578. package/src/duckdb/src/storage/write_ahead_log.cpp +2 -2
  579. package/src/duckdb/src/transaction/commit_state.cpp +11 -7
  580. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +0 -1
  581. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +35 -0
  582. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +36 -2
  583. package/src/duckdb/third_party/libpg_query/include/nodes/primnodes.hpp +3 -3
  584. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1022 -530
  585. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +8 -0
  586. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +24462 -22828
  587. package/src/duckdb/third_party/re2/re2/re2.cc +9 -0
  588. package/src/duckdb/third_party/re2/re2/re2.h +2 -0
  589. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  590. package/src/duckdb/ub_extension_json_json_functions.cpp +2 -0
  591. package/src/duckdb/ub_src_common_serializer.cpp +2 -0
  592. package/src/duckdb/ub_src_common_types.cpp +2 -0
  593. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  594. package/src/duckdb/ub_src_function_aggregate_distributive.cpp +2 -0
  595. package/src/duckdb/ub_src_function_scalar_bit.cpp +2 -0
  596. package/src/duckdb/ub_src_function_scalar_map.cpp +4 -0
  597. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  598. package/src/duckdb/ub_src_function_scalar_string_regexp.cpp +4 -0
  599. package/src/duckdb/ub_src_main_capi.cpp +2 -0
  600. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  601. package/src/duckdb/ub_src_parser.cpp +2 -0
  602. package/src/duckdb/ub_src_parser_parsed_data.cpp +4 -2
  603. package/src/duckdb/ub_src_parser_statement.cpp +2 -0
  604. package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
  605. package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
  606. package/src/duckdb/ub_src_parser_transform_tableref.cpp +2 -0
  607. package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
  608. package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
  609. package/src/duckdb/ub_src_planner_expression_binder.cpp +2 -0
  610. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
  611. package/src/duckdb/ub_src_storage_statistics.cpp +6 -6
  612. package/src/duckdb/ub_src_storage_table.cpp +0 -2
  613. package/src/duckdb_node.hpp +2 -1
  614. package/src/statement.cpp +5 -5
  615. package/src/utils.cpp +27 -2
  616. package/test/extension.test.ts +44 -26
  617. package/test/syntax_error.test.ts +3 -1
  618. package/filelist.cache +0 -0
  619. package/src/duckdb/src/include/duckdb/main/loadable_extension.hpp +0 -59
  620. package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
  621. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
  622. package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
  623. package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
  624. package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
  625. package/src/duckdb/src/parser/parsed_data/alter_function_info.cpp +0 -55
  626. package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
  627. package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
  628. package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
  629. package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
  630. package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
  631. package/src/duckdb/src/storage/table/segment_tree.cpp +0 -179
@@ -4,7 +4,7 @@
4
4
  #include "duckdb/common/exception.hpp"
5
5
  #include "duckdb/common/field_writer.hpp"
6
6
  #include "duckdb/storage/table/column_data.hpp"
7
- #include "duckdb/storage/table/standard_column_data.hpp"
7
+ #include "duckdb/storage/table/column_checkpoint_state.hpp"
8
8
  #include "duckdb/storage/table/update_segment.hpp"
9
9
  #include "duckdb/common/chrono.hpp"
10
10
  #include "duckdb/planner/table_filter.hpp"
@@ -15,50 +15,40 @@
15
15
  #include "duckdb/main/database.hpp"
16
16
  #include "duckdb/main/attached_database.hpp"
17
17
  #include "duckdb/transaction/duck_transaction.hpp"
18
+ #include "duckdb/storage/table/append_state.hpp"
19
+ #include "duckdb/storage/table/scan_state.hpp"
18
20
 
19
21
  namespace duckdb {
20
22
 
21
23
  constexpr const idx_t RowGroup::ROW_GROUP_VECTOR_COUNT;
22
24
  constexpr const idx_t RowGroup::ROW_GROUP_SIZE;
23
25
 
24
- RowGroup::RowGroup(AttachedDatabase &db, BlockManager &block_manager, DataTableInfo &table_info, idx_t start,
25
- idx_t count)
26
- : SegmentBase(start, count), db(db), block_manager(block_manager), table_info(table_info) {
27
-
26
+ RowGroup::RowGroup(RowGroupCollection &collection, idx_t start, idx_t count)
27
+ : SegmentBase<RowGroup>(start, count), collection(collection) {
28
28
  Verify();
29
29
  }
30
30
 
31
- RowGroup::RowGroup(AttachedDatabase &db, BlockManager &block_manager, DataTableInfo &table_info,
32
- const vector<LogicalType> &types, RowGroupPointer &&pointer)
33
- : SegmentBase(pointer.row_start, pointer.tuple_count), db(db), block_manager(block_manager),
34
- table_info(table_info) {
31
+ RowGroup::RowGroup(RowGroupCollection &collection, RowGroupPointer &&pointer)
32
+ : SegmentBase<RowGroup>(pointer.row_start, pointer.tuple_count), collection(collection) {
35
33
  // deserialize the columns
36
- if (pointer.data_pointers.size() != types.size()) {
34
+ if (pointer.data_pointers.size() != collection.GetTypes().size()) {
37
35
  throw IOException("Row group column count is unaligned with table column count. Corrupt file?");
38
36
  }
39
- for (idx_t i = 0; i < pointer.data_pointers.size(); i++) {
40
- auto &block_pointer = pointer.data_pointers[i];
41
- MetaBlockReader column_data_reader(block_manager, block_pointer.block_id);
42
- column_data_reader.offset = block_pointer.offset;
43
- this->columns.push_back(
44
- ColumnData::Deserialize(block_manager, table_info, i, start, column_data_reader, types[i], nullptr));
45
- }
46
-
47
- // set up the statistics
48
- for (auto &stats : pointer.statistics) {
49
- auto stats_type = stats->type;
50
- this->stats.push_back(make_shared<SegmentStatistics>(stats_type, std::move(stats)));
37
+ this->column_pointers = std::move(pointer.data_pointers);
38
+ this->columns.resize(column_pointers.size());
39
+ this->is_loaded = unique_ptr<atomic<bool>[]>(new atomic<bool>[columns.size()]);
40
+ for (idx_t c = 0; c < columns.size(); c++) {
41
+ this->is_loaded[c] = false;
51
42
  }
52
43
  this->version_info = std::move(pointer.versions);
53
44
 
54
45
  Verify();
55
46
  }
56
47
 
57
- RowGroup::RowGroup(RowGroup &row_group, idx_t start)
58
- : SegmentBase(start, row_group.count), db(row_group.db), block_manager(row_group.block_manager),
59
- table_info(row_group.table_info), version_info(std::move(row_group.version_info)),
60
- stats(std::move(row_group.stats)) {
61
- for (auto &column : row_group.columns) {
48
+ RowGroup::RowGroup(RowGroup &row_group, RowGroupCollection &collection, idx_t start)
49
+ : SegmentBase<RowGroup>(start, row_group.count.load()), collection(collection),
50
+ version_info(std::move(row_group.version_info)) {
51
+ for (auto &column : row_group.GetColumns()) {
62
52
  this->columns.push_back(ColumnData::CreateColumn(*column, start));
63
53
  }
64
54
  if (version_info) {
@@ -80,23 +70,104 @@ void VersionNode::SetStart(idx_t start) {
80
70
  RowGroup::~RowGroup() {
81
71
  }
82
72
 
73
+ vector<shared_ptr<ColumnData>> &RowGroup::GetColumns() {
74
+ // ensure all columns are loaded
75
+ for (idx_t c = 0; c < GetColumnCount(); c++) {
76
+ GetColumn(c);
77
+ }
78
+ return columns;
79
+ }
80
+
81
+ idx_t RowGroup::GetColumnCount() const {
82
+ return columns.size();
83
+ }
84
+
85
+ ColumnData &RowGroup::GetColumn(idx_t c) {
86
+ D_ASSERT(c < columns.size());
87
+ if (!is_loaded) {
88
+ // not being lazy loaded
89
+ D_ASSERT(columns[c]);
90
+ return *columns[c];
91
+ }
92
+ if (is_loaded[c]) {
93
+ D_ASSERT(columns[c]);
94
+ return *columns[c];
95
+ }
96
+ lock_guard<mutex> l(row_group_lock);
97
+ if (columns[c]) {
98
+ D_ASSERT(is_loaded[c]);
99
+ return *columns[c];
100
+ }
101
+ if (column_pointers.size() != columns.size()) {
102
+ throw InternalException("Lazy loading a column but the pointer was not set");
103
+ }
104
+ auto &block_manager = collection.GetBlockManager();
105
+ auto &types = collection.GetTypes();
106
+ auto &block_pointer = column_pointers[c];
107
+ MetaBlockReader column_data_reader(block_manager, block_pointer.block_id);
108
+ column_data_reader.offset = block_pointer.offset;
109
+ this->columns[c] =
110
+ ColumnData::Deserialize(GetBlockManager(), GetTableInfo(), c, start, column_data_reader, types[c], nullptr);
111
+ is_loaded[c] = true;
112
+ return *columns[c];
113
+ }
114
+
83
115
  DatabaseInstance &RowGroup::GetDatabase() {
84
- return db.GetDatabase();
116
+ return collection.GetDatabase();
117
+ }
118
+
119
+ BlockManager &RowGroup::GetBlockManager() {
120
+ return collection.GetBlockManager();
121
+ }
122
+ DataTableInfo &RowGroup::GetTableInfo() {
123
+ return collection.GetTableInfo();
85
124
  }
86
125
 
87
126
  void RowGroup::InitializeEmpty(const vector<LogicalType> &types) {
88
127
  // set up the segment trees for the column segments
128
+ D_ASSERT(columns.empty());
89
129
  for (idx_t i = 0; i < types.size(); i++) {
90
- auto column_data = ColumnData::CreateColumn(block_manager, GetTableInfo(), i, start, types[i]);
91
- stats.push_back(make_shared<SegmentStatistics>(types[i]));
130
+ auto column_data = ColumnData::CreateColumn(GetBlockManager(), GetTableInfo(), i, start, types[i]);
92
131
  columns.push_back(std::move(column_data));
93
132
  }
94
133
  }
95
134
 
96
- bool RowGroup::InitializeScanWithOffset(RowGroupScanState &state, idx_t vector_offset) {
135
+ void ColumnScanState::Initialize(const LogicalType &type) {
136
+ if (type.id() == LogicalTypeId::VALIDITY) {
137
+ // validity - nothing to initialize
138
+ return;
139
+ }
140
+ if (type.InternalType() == PhysicalType::STRUCT) {
141
+ // validity + struct children
142
+ auto &struct_children = StructType::GetChildTypes(type);
143
+ child_states.resize(struct_children.size() + 1);
144
+ for (idx_t i = 0; i < struct_children.size(); i++) {
145
+ child_states[i + 1].Initialize(struct_children[i].second);
146
+ }
147
+ } else if (type.InternalType() == PhysicalType::LIST) {
148
+ // validity + list child
149
+ child_states.resize(2);
150
+ child_states[1].Initialize(ListType::GetChildType(type));
151
+ } else {
152
+ // validity
153
+ child_states.resize(1);
154
+ }
155
+ }
156
+
157
+ void CollectionScanState::Initialize(const vector<LogicalType> &types) {
158
+ auto &column_ids = GetColumnIds();
159
+ column_scans = unique_ptr<ColumnScanState[]>(new ColumnScanState[column_ids.size()]);
160
+ for (idx_t i = 0; i < column_ids.size(); i++) {
161
+ if (column_ids[i] == COLUMN_IDENTIFIER_ROW_ID) {
162
+ continue;
163
+ }
164
+ column_scans[i].Initialize(types[column_ids[i]]);
165
+ }
166
+ }
167
+
168
+ bool RowGroup::InitializeScanWithOffset(CollectionScanState &state, idx_t vector_offset) {
97
169
  auto &column_ids = state.GetColumnIds();
98
170
  auto filters = state.GetFilters();
99
- auto parent_max_row = state.GetParentMaxRow();
100
171
  if (filters) {
101
172
  if (!CheckZonemap(*filters, column_ids)) {
102
173
  return false;
@@ -105,13 +176,14 @@ bool RowGroup::InitializeScanWithOffset(RowGroupScanState &state, idx_t vector_o
105
176
 
106
177
  state.row_group = this;
107
178
  state.vector_index = vector_offset;
108
- state.max_row = this->start > parent_max_row ? 0 : MinValue<idx_t>(this->count, parent_max_row - this->start);
109
- state.column_scans = unique_ptr<ColumnScanState[]>(new ColumnScanState[column_ids.size()]);
179
+ state.max_row_group_row =
180
+ this->start > state.max_row ? 0 : MinValue<idx_t>(this->count, state.max_row - this->start);
181
+ D_ASSERT(state.column_scans);
110
182
  for (idx_t i = 0; i < column_ids.size(); i++) {
111
183
  auto column = column_ids[i];
112
184
  if (column != COLUMN_IDENTIFIER_ROW_ID) {
113
- columns[column]->InitializeScanWithOffset(state.column_scans[i],
114
- start + vector_offset * STANDARD_VECTOR_SIZE);
185
+ auto &column_data = GetColumn(column);
186
+ column_data.InitializeScanWithOffset(state.column_scans[i], start + vector_offset * STANDARD_VECTOR_SIZE);
115
187
  } else {
116
188
  state.column_scans[i].current = nullptr;
117
189
  }
@@ -119,10 +191,9 @@ bool RowGroup::InitializeScanWithOffset(RowGroupScanState &state, idx_t vector_o
119
191
  return true;
120
192
  }
121
193
 
122
- bool RowGroup::InitializeScan(RowGroupScanState &state) {
194
+ bool RowGroup::InitializeScan(CollectionScanState &state) {
123
195
  auto &column_ids = state.GetColumnIds();
124
196
  auto filters = state.GetFilters();
125
- auto parent_max_row = state.GetParentMaxRow();
126
197
  if (filters) {
127
198
  if (!CheckZonemap(*filters, column_ids)) {
128
199
  return false;
@@ -130,12 +201,17 @@ bool RowGroup::InitializeScan(RowGroupScanState &state) {
130
201
  }
131
202
  state.row_group = this;
132
203
  state.vector_index = 0;
133
- state.max_row = this->start > parent_max_row ? 0 : MinValue<idx_t>(this->count, parent_max_row - this->start);
134
- state.column_scans = unique_ptr<ColumnScanState[]>(new ColumnScanState[column_ids.size()]);
204
+ state.max_row_group_row =
205
+ this->start > state.max_row ? 0 : MinValue<idx_t>(this->count, state.max_row - this->start);
206
+ if (state.max_row_group_row == 0) {
207
+ return false;
208
+ }
209
+ D_ASSERT(state.column_scans);
135
210
  for (idx_t i = 0; i < column_ids.size(); i++) {
136
211
  auto column = column_ids[i];
137
212
  if (column != COLUMN_IDENTIFIER_ROW_ID) {
138
- columns[column]->InitializeScan(state.column_scans[i]);
213
+ auto &column_data = GetColumn(column);
214
+ column_data.InitializeScan(state.column_scans[i]);
139
215
  } else {
140
216
  state.column_scans[i].current = nullptr;
141
217
  }
@@ -143,22 +219,26 @@ bool RowGroup::InitializeScan(RowGroupScanState &state) {
143
219
  return true;
144
220
  }
145
221
 
146
- unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t changed_idx,
147
- ExpressionExecutor &executor, RowGroupScanState &scan_state,
148
- DataChunk &scan_chunk) {
222
+ unique_ptr<RowGroup> RowGroup::AlterType(RowGroupCollection &new_collection, const LogicalType &target_type,
223
+ idx_t changed_idx, ExpressionExecutor &executor,
224
+ CollectionScanState &scan_state, DataChunk &scan_chunk) {
149
225
  Verify();
150
226
 
151
227
  // construct a new column data for this type
152
- auto column_data = ColumnData::CreateColumn(block_manager, GetTableInfo(), changed_idx, start, target_type);
228
+ auto column_data = ColumnData::CreateColumn(GetBlockManager(), GetTableInfo(), changed_idx, start, target_type);
153
229
 
154
230
  ColumnAppendState append_state;
155
231
  column_data->InitializeAppend(append_state);
156
232
 
157
233
  // scan the original table, and fill the new column with the transformed value
234
+ scan_state.Initialize(collection.GetTypes());
158
235
  InitializeScan(scan_state);
159
236
 
160
- Vector append_vector(target_type);
161
- auto altered_col_stats = make_shared<SegmentStatistics>(target_type);
237
+ DataChunk append_chunk;
238
+ vector<LogicalType> append_types;
239
+ append_types.push_back(target_type);
240
+ append_chunk.Initialize(Allocator::DefaultAllocator(), append_types);
241
+ auto &append_vector = append_chunk.data[0];
162
242
  while (true) {
163
243
  // scan the table
164
244
  scan_chunk.Reset();
@@ -167,37 +247,35 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
167
247
  break;
168
248
  }
169
249
  // execute the expression
250
+ append_chunk.Reset();
170
251
  executor.ExecuteExpression(scan_chunk, append_vector);
171
- column_data->Append(*altered_col_stats->statistics, append_state, append_vector, scan_chunk.size());
252
+ column_data->Append(append_state, append_vector, scan_chunk.size());
172
253
  }
173
254
 
174
255
  // set up the row_group based on this row_group
175
- auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
256
+ auto row_group = make_unique<RowGroup>(new_collection, this->start, this->count);
176
257
  row_group->version_info = version_info;
177
- for (idx_t i = 0; i < columns.size(); i++) {
258
+ auto &cols = GetColumns();
259
+ for (idx_t i = 0; i < cols.size(); i++) {
178
260
  if (i == changed_idx) {
179
261
  // this is the altered column: use the new column
180
262
  row_group->columns.push_back(std::move(column_data));
181
- row_group->stats.push_back(std::move(altered_col_stats));
182
263
  } else {
183
264
  // this column was not altered: use the data directly
184
- row_group->columns.push_back(columns[i]);
185
- row_group->stats.push_back(stats[i]);
265
+ row_group->columns.push_back(cols[i]);
186
266
  }
187
267
  }
188
268
  row_group->Verify();
189
269
  return row_group;
190
270
  }
191
271
 
192
- unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, ExpressionExecutor &executor,
193
- Expression *default_value, Vector &result) {
272
+ unique_ptr<RowGroup> RowGroup::AddColumn(RowGroupCollection &new_collection, ColumnDefinition &new_column,
273
+ ExpressionExecutor &executor, Expression *default_value, Vector &result) {
194
274
  Verify();
195
275
 
196
276
  // construct a new column data for the new column
197
277
  auto added_column =
198
- ColumnData::CreateColumn(block_manager, GetTableInfo(), columns.size(), start, new_column.Type());
199
- auto added_col_stats = make_shared<SegmentStatistics>(
200
- new_column.Type(), BaseStatistics::CreateEmpty(new_column.Type(), StatisticsType::LOCAL_STATS));
278
+ ColumnData::CreateColumn(GetBlockManager(), GetTableInfo(), GetColumnCount(), start, new_column.Type());
201
279
 
202
280
  idx_t rows_to_write = this->count;
203
281
  if (rows_to_write > 0) {
@@ -211,52 +289,51 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
211
289
  dummy_chunk.SetCardinality(rows_in_this_vector);
212
290
  executor.ExecuteExpression(dummy_chunk, result);
213
291
  }
214
- added_column->Append(*added_col_stats->statistics, state, result, rows_in_this_vector);
292
+ added_column->Append(state, result, rows_in_this_vector);
215
293
  }
216
294
  }
217
295
 
218
296
  // set up the row_group based on this row_group
219
- auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
297
+ auto row_group = make_unique<RowGroup>(new_collection, this->start, this->count);
220
298
  row_group->version_info = version_info;
221
- row_group->columns = columns;
222
- row_group->stats = stats;
299
+ row_group->columns = GetColumns();
223
300
  // now add the new column
224
301
  row_group->columns.push_back(std::move(added_column));
225
- row_group->stats.push_back(std::move(added_col_stats));
226
302
 
227
303
  row_group->Verify();
228
304
  return row_group;
229
305
  }
230
306
 
231
- unique_ptr<RowGroup> RowGroup::RemoveColumn(idx_t removed_column) {
307
+ unique_ptr<RowGroup> RowGroup::RemoveColumn(RowGroupCollection &new_collection, idx_t removed_column) {
232
308
  Verify();
233
309
 
234
310
  D_ASSERT(removed_column < columns.size());
235
311
 
236
- auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
312
+ auto row_group = make_unique<RowGroup>(new_collection, this->start, this->count);
237
313
  row_group->version_info = version_info;
238
- row_group->columns = columns;
239
- row_group->stats = stats;
240
- // now remove the column
241
- row_group->columns.erase(row_group->columns.begin() + removed_column);
242
- row_group->stats.erase(row_group->stats.begin() + removed_column);
314
+ // copy over all columns except for the removed one
315
+ auto &cols = GetColumns();
316
+ for (idx_t i = 0; i < cols.size(); i++) {
317
+ if (i != removed_column) {
318
+ row_group->columns.push_back(cols[i]);
319
+ }
320
+ }
243
321
 
244
322
  row_group->Verify();
245
323
  return row_group;
246
324
  }
247
325
 
248
326
  void RowGroup::CommitDrop() {
249
- for (idx_t column_idx = 0; column_idx < columns.size(); column_idx++) {
327
+ for (idx_t column_idx = 0; column_idx < GetColumnCount(); column_idx++) {
250
328
  CommitDropColumn(column_idx);
251
329
  }
252
330
  }
253
331
 
254
332
  void RowGroup::CommitDropColumn(idx_t column_idx) {
255
- D_ASSERT(column_idx < columns.size());
256
- columns[column_idx]->CommitDropColumn();
333
+ GetColumn(column_idx).CommitDropColumn();
257
334
  }
258
335
 
259
- void RowGroup::NextVector(RowGroupScanState &state) {
336
+ void RowGroup::NextVector(CollectionScanState &state) {
260
337
  state.vector_index++;
261
338
  auto &column_ids = state.GetColumnIds();
262
339
  for (idx_t i = 0; i < column_ids.size(); i++) {
@@ -265,7 +342,7 @@ void RowGroup::NextVector(RowGroupScanState &state) {
265
342
  continue;
266
343
  }
267
344
  D_ASSERT(column < columns.size());
268
- columns[column]->Skip(state.column_scans[i]);
345
+ GetColumn(column).Skip(state.column_scans[i]);
269
346
  }
270
347
  }
271
348
 
@@ -274,17 +351,14 @@ bool RowGroup::CheckZonemap(TableFilterSet &filters, const vector<column_t> &col
274
351
  auto column_index = entry.first;
275
352
  auto &filter = entry.second;
276
353
  auto base_column_index = column_ids[column_index];
277
-
278
- auto propagate_result = filter->CheckStatistics(*stats[base_column_index]->statistics);
279
- if (propagate_result == FilterPropagateResult::FILTER_ALWAYS_FALSE ||
280
- propagate_result == FilterPropagateResult::FILTER_FALSE_OR_NULL) {
354
+ if (!GetColumn(base_column_index).CheckZonemap(*filter)) {
281
355
  return false;
282
356
  }
283
357
  }
284
358
  return true;
285
359
  }
286
360
 
287
- bool RowGroup::CheckZonemapSegments(RowGroupScanState &state) {
361
+ bool RowGroup::CheckZonemapSegments(CollectionScanState &state) {
288
362
  auto &column_ids = state.GetColumnIds();
289
363
  auto filters = state.GetFilters();
290
364
  if (!filters) {
@@ -294,7 +368,7 @@ bool RowGroup::CheckZonemapSegments(RowGroupScanState &state) {
294
368
  D_ASSERT(entry.first < column_ids.size());
295
369
  auto column_idx = entry.first;
296
370
  auto base_column_idx = column_ids[column_idx];
297
- bool read_segment = columns[base_column_idx]->CheckZonemap(state.column_scans[column_idx], *entry.second);
371
+ bool read_segment = GetColumn(base_column_idx).CheckZonemap(state.column_scans[column_idx], *entry.second);
298
372
  if (!read_segment) {
299
373
  idx_t target_row =
300
374
  state.column_scans[column_idx].current->start + state.column_scans[column_idx].current->count;
@@ -321,19 +395,19 @@ bool RowGroup::CheckZonemapSegments(RowGroupScanState &state) {
321
395
  }
322
396
 
323
397
  template <TableScanType TYPE>
324
- void RowGroup::TemplatedScan(TransactionData transaction, RowGroupScanState &state, DataChunk &result) {
398
+ void RowGroup::TemplatedScan(TransactionData transaction, CollectionScanState &state, DataChunk &result) {
325
399
  const bool ALLOW_UPDATES = TYPE != TableScanType::TABLE_SCAN_COMMITTED_ROWS_DISALLOW_UPDATES &&
326
400
  TYPE != TableScanType::TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED;
327
401
  auto table_filters = state.GetFilters();
328
402
  auto &column_ids = state.GetColumnIds();
329
403
  auto adaptive_filter = state.GetAdaptiveFilter();
330
404
  while (true) {
331
- if (state.vector_index * STANDARD_VECTOR_SIZE >= state.max_row) {
405
+ if (state.vector_index * STANDARD_VECTOR_SIZE >= state.max_row_group_row) {
332
406
  // exceeded the amount of rows to scan
333
407
  return;
334
408
  }
335
409
  idx_t current_row = state.vector_index * STANDARD_VECTOR_SIZE;
336
- auto max_count = MinValue<idx_t>(STANDARD_VECTOR_SIZE, state.max_row - current_row);
410
+ auto max_count = MinValue<idx_t>(STANDARD_VECTOR_SIZE, state.max_row_group_row - current_row);
337
411
 
338
412
  //! first check the zonemap if we have to scan this partition
339
413
  if (!CheckZonemapSegments(state)) {
@@ -369,11 +443,12 @@ void RowGroup::TemplatedScan(TransactionData transaction, RowGroupScanState &sta
369
443
  D_ASSERT(result.data[i].GetType().InternalType() == ROW_TYPE);
370
444
  result.data[i].Sequence(this->start + current_row, 1, count);
371
445
  } else {
446
+ auto &col_data = GetColumn(column);
372
447
  if (TYPE != TableScanType::TABLE_SCAN_REGULAR) {
373
- columns[column]->ScanCommitted(state.vector_index, state.column_scans[i], result.data[i],
374
- ALLOW_UPDATES);
448
+ col_data.ScanCommitted(state.vector_index, state.column_scans[i], result.data[i],
449
+ ALLOW_UPDATES);
375
450
  } else {
376
- columns[column]->Scan(transaction, state.vector_index, state.column_scans[i], result.data[i]);
451
+ col_data.Scan(transaction, state.vector_index, state.column_scans[i], result.data[i]);
377
452
  }
378
453
  }
379
454
  }
@@ -395,9 +470,9 @@ void RowGroup::TemplatedScan(TransactionData transaction, RowGroupScanState &sta
395
470
  for (idx_t i = 0; i < table_filters->filters.size(); i++) {
396
471
  auto tf_idx = adaptive_filter->permutation[i];
397
472
  auto col_idx = column_ids[tf_idx];
398
- columns[col_idx]->Select(transaction, state.vector_index, state.column_scans[tf_idx],
399
- result.data[tf_idx], sel, approved_tuple_count,
400
- *table_filters->filters[tf_idx]);
473
+ auto &col_data = GetColumn(col_idx);
474
+ col_data.Select(transaction, state.vector_index, state.column_scans[tf_idx], result.data[tf_idx],
475
+ sel, approved_tuple_count, *table_filters->filters[tf_idx]);
401
476
  }
402
477
  for (auto &table_filter : table_filters->filters) {
403
478
  result.data[table_filter.first].Slice(sel, approved_tuple_count);
@@ -414,7 +489,8 @@ void RowGroup::TemplatedScan(TransactionData transaction, RowGroupScanState &sta
414
489
  continue;
415
490
  }
416
491
  if (table_filters->filters.find(i) == table_filters->filters.end()) {
417
- columns[col_idx]->Skip(state.column_scans[i]);
492
+ auto &col_data = GetColumn(col_idx);
493
+ col_data.Skip(state.column_scans[i]);
418
494
  }
419
495
  }
420
496
  state.vector_index++;
@@ -432,13 +508,13 @@ void RowGroup::TemplatedScan(TransactionData transaction, RowGroupScanState &sta
432
508
  result_data[sel_idx] = this->start + current_row + sel.get_index(sel_idx);
433
509
  }
434
510
  } else {
511
+ auto &col_data = GetColumn(column);
435
512
  if (TYPE == TableScanType::TABLE_SCAN_REGULAR) {
436
- columns[column]->FilterScan(transaction, state.vector_index, state.column_scans[i],
437
- result.data[i], sel, approved_tuple_count);
513
+ col_data.FilterScan(transaction, state.vector_index, state.column_scans[i], result.data[i],
514
+ sel, approved_tuple_count);
438
515
  } else {
439
- columns[column]->FilterScanCommitted(state.vector_index, state.column_scans[i],
440
- result.data[i], sel, approved_tuple_count,
441
- ALLOW_UPDATES);
516
+ col_data.FilterScanCommitted(state.vector_index, state.column_scans[i], result.data[i], sel,
517
+ approved_tuple_count, ALLOW_UPDATES);
442
518
  }
443
519
  }
444
520
  }
@@ -456,12 +532,12 @@ void RowGroup::TemplatedScan(TransactionData transaction, RowGroupScanState &sta
456
532
  }
457
533
  }
458
534
 
459
- void RowGroup::Scan(TransactionData transaction, RowGroupScanState &state, DataChunk &result) {
535
+ void RowGroup::Scan(TransactionData transaction, CollectionScanState &state, DataChunk &result) {
460
536
  TemplatedScan<TableScanType::TABLE_SCAN_REGULAR>(transaction, state, result);
461
537
  }
462
538
 
463
- void RowGroup::ScanCommitted(RowGroupScanState &state, DataChunk &result, TableScanType type) {
464
- auto &transaction_manager = DuckTransactionManager::Get(db);
539
+ void RowGroup::ScanCommitted(CollectionScanState &state, DataChunk &result, TableScanType type) {
540
+ auto &transaction_manager = DuckTransactionManager::Get(collection.GetAttached());
465
541
 
466
542
  auto lowest_active_start = transaction_manager.LowestActiveStart();
467
543
  auto lowest_active_id = transaction_manager.LowestActiveId();
@@ -534,7 +610,8 @@ void RowGroup::FetchRow(TransactionData transaction, ColumnFetchState &state, co
534
610
  data[result_idx] = row_id;
535
611
  } else {
536
612
  // regular column: fetch data from the base column
537
- columns[column]->FetchRow(transaction, state, row_id, result.data[col_idx], result_idx);
613
+ auto &col_data = GetColumn(column);
614
+ col_data.FetchRow(transaction, state, row_id, result.data[col_idx], result_idx);
538
615
  }
539
616
  }
540
617
  }
@@ -619,16 +696,18 @@ void RowGroup::InitializeAppend(RowGroupAppendState &append_state) {
619
696
  append_state.row_group = this;
620
697
  append_state.offset_in_row_group = this->count;
621
698
  // for each column, initialize the append state
622
- append_state.states = unique_ptr<ColumnAppendState[]>(new ColumnAppendState[columns.size()]);
623
- for (idx_t i = 0; i < columns.size(); i++) {
624
- columns[i]->InitializeAppend(append_state.states[i]);
699
+ append_state.states = unique_ptr<ColumnAppendState[]>(new ColumnAppendState[GetColumnCount()]);
700
+ for (idx_t i = 0; i < GetColumnCount(); i++) {
701
+ auto &col_data = GetColumn(i);
702
+ col_data.InitializeAppend(append_state.states[i]);
625
703
  }
626
704
  }
627
705
 
628
706
  void RowGroup::Append(RowGroupAppendState &state, DataChunk &chunk, idx_t append_count) {
629
707
  // append to the current row_group
630
- for (idx_t i = 0; i < columns.size(); i++) {
631
- columns[i]->Append(*stats[i]->statistics, state.states[i], chunk.data[i], append_count);
708
+ for (idx_t i = 0; i < GetColumnCount(); i++) {
709
+ auto &col_data = GetColumn(i);
710
+ col_data.Append(state.states[i], chunk.data[i], append_count);
632
711
  }
633
712
  state.offset_in_row_group += append_count;
634
713
  }
@@ -643,15 +722,16 @@ void RowGroup::Update(TransactionData transaction, DataChunk &update_chunk, row_
643
722
  for (idx_t i = 0; i < column_ids.size(); i++) {
644
723
  auto column = column_ids[i];
645
724
  D_ASSERT(column.index != COLUMN_IDENTIFIER_ROW_ID);
646
- D_ASSERT(columns[column.index]->type.id() == update_chunk.data[i].GetType().id());
725
+ auto &col_data = GetColumn(column.index);
726
+ D_ASSERT(col_data.type.id() == update_chunk.data[i].GetType().id());
647
727
  if (offset > 0) {
648
728
  Vector sliced_vector(update_chunk.data[i], offset, offset + count);
649
729
  sliced_vector.Flatten(count);
650
- columns[column.index]->Update(transaction, column.index, sliced_vector, ids + offset, count);
730
+ col_data.Update(transaction, column.index, sliced_vector, ids + offset, count);
651
731
  } else {
652
- columns[column.index]->Update(transaction, column.index, update_chunk.data[i], ids, count);
732
+ col_data.Update(transaction, column.index, update_chunk.data[i], ids, count);
653
733
  }
654
- MergeStatistics(column.index, *columns[column.index]->GetUpdateStatistics());
734
+ MergeStatistics(column.index, *col_data.GetUpdateStatistics());
655
735
  }
656
736
  }
657
737
 
@@ -663,29 +743,27 @@ void RowGroup::UpdateColumn(TransactionData transaction, DataChunk &updates, Vec
663
743
  auto primary_column_idx = column_path[0];
664
744
  D_ASSERT(primary_column_idx != COLUMN_IDENTIFIER_ROW_ID);
665
745
  D_ASSERT(primary_column_idx < columns.size());
666
- columns[primary_column_idx]->UpdateColumn(transaction, column_path, updates.data[0], ids, updates.size(), 1);
667
- MergeStatistics(primary_column_idx, *columns[primary_column_idx]->GetUpdateStatistics());
746
+ auto &col_data = GetColumn(primary_column_idx);
747
+ col_data.UpdateColumn(transaction, column_path, updates.data[0], ids, updates.size(), 1);
748
+ MergeStatistics(primary_column_idx, *col_data.GetUpdateStatistics());
668
749
  }
669
750
 
670
751
  unique_ptr<BaseStatistics> RowGroup::GetStatistics(idx_t column_idx) {
671
- D_ASSERT(column_idx < stats.size());
672
-
752
+ auto &col_data = GetColumn(column_idx);
673
753
  lock_guard<mutex> slock(stats_lock);
674
- return stats[column_idx]->statistics->Copy();
754
+ return col_data.GetStatistics();
675
755
  }
676
756
 
677
757
  void RowGroup::MergeStatistics(idx_t column_idx, const BaseStatistics &other) {
678
- D_ASSERT(column_idx < stats.size());
679
-
758
+ auto &col_data = GetColumn(column_idx);
680
759
  lock_guard<mutex> slock(stats_lock);
681
- stats[column_idx]->statistics->Merge(other);
760
+ col_data.MergeStatistics(other);
682
761
  }
683
762
 
684
763
  void RowGroup::MergeIntoStatistics(idx_t column_idx, BaseStatistics &other) {
685
- D_ASSERT(column_idx < stats.size());
686
-
764
+ auto &col_data = GetColumn(column_idx);
687
765
  lock_guard<mutex> slock(stats_lock);
688
- other.Merge(*stats[column_idx]->statistics);
766
+ col_data.MergeIntoStatistics(other);
689
767
  }
690
768
 
691
769
  RowGroupWriteData RowGroup::WriteToDisk(PartialBlockManager &manager,
@@ -702,35 +780,34 @@ RowGroupWriteData RowGroup::WriteToDisk(PartialBlockManager &manager,
702
780
  // Some of these columns are composite (list, struct). The data is written
703
781
  // first sequentially, and the pointers are written later, so that the
704
782
  // pointers all end up densely packed, and thus more cache-friendly.
705
- for (idx_t column_idx = 0; column_idx < columns.size(); column_idx++) {
706
- auto &column = columns[column_idx];
783
+ for (idx_t column_idx = 0; column_idx < GetColumnCount(); column_idx++) {
784
+ auto &column = GetColumn(column_idx);
707
785
  ColumnCheckpointInfo checkpoint_info {compression_types[column_idx]};
708
- auto checkpoint_state = column->Checkpoint(*this, manager, checkpoint_info);
786
+ auto checkpoint_state = column.Checkpoint(*this, manager, checkpoint_info);
709
787
  D_ASSERT(checkpoint_state);
710
788
 
711
789
  auto stats = checkpoint_state->GetStatistics();
712
790
  D_ASSERT(stats);
713
791
 
714
- result.statistics.push_back(std::move(stats));
792
+ result.statistics.push_back(stats->Copy());
715
793
  result.states.push_back(std::move(checkpoint_state));
716
794
  }
717
795
  D_ASSERT(result.states.size() == result.statistics.size());
718
796
  return result;
719
797
  }
720
798
 
721
- RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats) {
799
+ RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, TableStatistics &global_stats) {
722
800
  RowGroupPointer row_group_pointer;
723
801
 
724
802
  vector<CompressionType> compression_types;
725
803
  compression_types.reserve(columns.size());
726
- for (idx_t column_idx = 0; column_idx < columns.size(); column_idx++) {
804
+ for (idx_t column_idx = 0; column_idx < GetColumnCount(); column_idx++) {
727
805
  compression_types.push_back(writer.GetColumnCompressionType(column_idx));
728
806
  }
729
807
  auto result = WriteToDisk(writer.GetPartialBlockManager(), compression_types);
730
- for (idx_t column_idx = 0; column_idx < columns.size(); column_idx++) {
731
- global_stats[column_idx]->Merge(*result.statistics[column_idx]);
808
+ for (idx_t column_idx = 0; column_idx < GetColumnCount(); column_idx++) {
809
+ global_stats.GetStats(column_idx).Statistics().Merge(result.statistics[column_idx]);
732
810
  }
733
- row_group_pointer.statistics = std::move(result.statistics);
734
811
 
735
812
  // construct the row group pointer and write the column meta data to disk
736
813
  D_ASSERT(result.states.size() == columns.size());
@@ -804,9 +881,6 @@ void RowGroup::Serialize(RowGroupPointer &pointer, Serializer &main_serializer)
804
881
  writer.WriteField<uint64_t>(pointer.row_start);
805
882
  writer.WriteField<uint64_t>(pointer.tuple_count);
806
883
  auto &serializer = writer.GetSerializer();
807
- for (auto &stats : pointer.statistics) {
808
- stats->Serialize(serializer);
809
- }
810
884
  for (auto &data_pointer : pointer.data_pointers) {
811
885
  serializer.Write<block_id_t>(data_pointer.block_id);
812
886
  serializer.Write<uint64_t>(data_pointer.offset);
@@ -815,23 +889,18 @@ void RowGroup::Serialize(RowGroupPointer &pointer, Serializer &main_serializer)
815
889
  writer.Finalize();
816
890
  }
817
891
 
818
- RowGroupPointer RowGroup::Deserialize(Deserializer &main_source, const ColumnList &columns) {
892
+ RowGroupPointer RowGroup::Deserialize(Deserializer &main_source, const vector<LogicalType> &columns) {
819
893
  RowGroupPointer result;
820
894
 
821
895
  FieldReader reader(main_source);
822
896
  result.row_start = reader.ReadRequired<uint64_t>();
823
897
  result.tuple_count = reader.ReadRequired<uint64_t>();
824
898
 
825
- auto physical_columns = columns.PhysicalColumnCount();
899
+ auto physical_columns = columns.size();
826
900
  result.data_pointers.reserve(physical_columns);
827
- result.statistics.reserve(physical_columns);
828
901
 
829
902
  auto &source = reader.GetSource();
830
- for (auto &col : columns.Physical()) {
831
- auto stats = BaseStatistics::Deserialize(source, col.Type());
832
- result.statistics.push_back(std::move(stats));
833
- }
834
- for (idx_t i = 0; i < columns.PhysicalColumnCount(); i++) {
903
+ for (idx_t i = 0; i < physical_columns; i++) {
835
904
  BlockPointer pointer;
836
905
  pointer.block_id = source.Read<block_id_t>();
837
906
  pointer.offset = source.Read<uint64_t>();
@@ -847,8 +916,9 @@ RowGroupPointer RowGroup::Deserialize(Deserializer &main_source, const ColumnLis
847
916
  // GetStorageInfo
848
917
  //===--------------------------------------------------------------------===//
849
918
  void RowGroup::GetStorageInfo(idx_t row_group_index, TableStorageInfo &result) {
850
- for (idx_t col_idx = 0; col_idx < columns.size(); col_idx++) {
851
- columns[col_idx]->GetStorageInfo(row_group_index, {col_idx}, result);
919
+ for (idx_t col_idx = 0; col_idx < GetColumnCount(); col_idx++) {
920
+ auto &col_data = GetColumn(col_idx);
921
+ col_data.GetStorageInfo(row_group_index, {col_idx}, result);
852
922
  }
853
923
  }
854
924
 
@@ -894,7 +964,7 @@ idx_t RowGroup::Delete(TransactionData transaction, DataTable *table, row_t *ids
894
964
 
895
965
  void RowGroup::Verify() {
896
966
  #ifdef DEBUG
897
- for (auto &column : columns) {
967
+ for (auto &column : GetColumns()) {
898
968
  column->Verify(*this);
899
969
  }
900
970
  #endif