duckdb 0.7.2-dev0.0 → 0.7.2-dev1034.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (590) hide show
  1. package/binding.gyp +12 -7
  2. package/lib/duckdb.d.ts +55 -2
  3. package/lib/duckdb.js +20 -1
  4. package/package.json +1 -1
  5. package/src/connection.cpp +1 -2
  6. package/src/database.cpp +1 -1
  7. package/src/duckdb/extension/icu/icu-extension.cpp +4 -0
  8. package/src/duckdb/extension/icu/icu-list-range.cpp +207 -0
  9. package/src/duckdb/extension/icu/icu-table-range.cpp +194 -0
  10. package/src/duckdb/extension/icu/include/icu-list-range.hpp +17 -0
  11. package/src/duckdb/extension/icu/include/icu-table-range.hpp +17 -0
  12. package/src/duckdb/extension/json/include/json_common.hpp +1 -0
  13. package/src/duckdb/extension/json/include/json_functions.hpp +2 -0
  14. package/src/duckdb/extension/json/include/json_serializer.hpp +77 -0
  15. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +147 -0
  16. package/src/duckdb/extension/json/json_functions/read_json.cpp +6 -5
  17. package/src/duckdb/extension/json/json_functions.cpp +12 -4
  18. package/src/duckdb/extension/json/json_scan.cpp +2 -2
  19. package/src/duckdb/extension/json/json_serializer.cpp +217 -0
  20. package/src/duckdb/extension/parquet/column_reader.cpp +94 -15
  21. package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
  22. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
  23. package/src/duckdb/extension/parquet/include/decode_utils.hpp +5 -4
  24. package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
  25. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +2 -1
  26. package/src/duckdb/extension/parquet/parquet-extension.cpp +12 -2
  27. package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -1
  28. package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
  29. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +16 -6
  30. package/src/duckdb/src/catalog/catalog.cpp +34 -5
  31. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +4 -0
  32. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -21
  33. package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +7 -6
  34. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +3 -3
  35. package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +20 -1
  36. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +8 -2
  37. package/src/duckdb/src/catalog/catalog_set.cpp +1 -0
  38. package/src/duckdb/src/catalog/default/default_functions.cpp +3 -0
  39. package/src/duckdb/src/catalog/dependency_list.cpp +12 -0
  40. package/src/duckdb/src/catalog/duck_catalog.cpp +34 -7
  41. package/src/duckdb/src/common/arrow/arrow_appender.cpp +48 -4
  42. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  43. package/src/duckdb/src/common/box_renderer.cpp +109 -23
  44. package/src/duckdb/src/common/enums/expression_type.cpp +8 -222
  45. package/src/duckdb/src/common/enums/join_type.cpp +3 -22
  46. package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
  47. package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
  48. package/src/duckdb/src/common/exception.cpp +15 -1
  49. package/src/duckdb/src/common/field_writer.cpp +1 -0
  50. package/src/duckdb/src/common/operator/cast_operators.cpp +1 -1
  51. package/src/duckdb/src/common/preserved_error.cpp +7 -5
  52. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +4 -0
  53. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +15 -2
  54. package/src/duckdb/src/common/serializer/enum_serializer.cpp +1176 -0
  55. package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
  56. package/src/duckdb/src/common/sort/sorted_block.cpp +0 -1
  57. package/src/duckdb/src/common/string_util.cpp +4 -1
  58. package/src/duckdb/src/common/types/bit.cpp +166 -87
  59. package/src/duckdb/src/common/types/blob.cpp +1 -1
  60. package/src/duckdb/src/common/types/chunk_collection.cpp +2 -2
  61. package/src/duckdb/src/common/types/column_data_collection.cpp +39 -2
  62. package/src/duckdb/src/common/types/column_data_collection_segment.cpp +11 -6
  63. package/src/duckdb/src/common/types/data_chunk.cpp +1 -1
  64. package/src/duckdb/src/common/types/time.cpp +13 -0
  65. package/src/duckdb/src/common/types/value.cpp +320 -154
  66. package/src/duckdb/src/common/types/vector.cpp +155 -127
  67. package/src/duckdb/src/common/types.cpp +313 -153
  68. package/src/duckdb/src/common/vector_operations/vector_cast.cpp +2 -1
  69. package/src/duckdb/src/execution/aggregate_hashtable.cpp +10 -5
  70. package/src/duckdb/src/execution/column_binding_resolver.cpp +21 -5
  71. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -1
  72. package/src/duckdb/src/execution/index/art/art.cpp +6 -5
  73. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
  74. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +117 -26
  75. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +3 -0
  76. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +5 -3
  77. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -17
  78. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +2 -2
  79. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +12 -4
  80. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +6 -11
  81. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +3 -1
  82. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +6 -3
  83. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +6 -14
  84. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +2 -2
  85. package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +34 -0
  86. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +20 -5
  87. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +20 -40
  88. package/src/duckdb/src/execution/partitionable_hashtable.cpp +14 -2
  89. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +21 -16
  90. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +97 -0
  91. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +95 -47
  92. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +5 -8
  93. package/src/duckdb/src/execution/physical_plan/plan_positional_join.cpp +14 -5
  94. package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -0
  95. package/src/duckdb/src/execution/window_segment_tree.cpp +173 -1
  96. package/src/duckdb/src/function/aggregate/algebraic/avg.cpp +0 -6
  97. package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +99 -95
  98. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +269 -0
  99. package/src/duckdb/src/function/aggregate/distributive/bool.cpp +2 -0
  100. package/src/duckdb/src/function/aggregate/distributive/count.cpp +3 -4
  101. package/src/duckdb/src/function/aggregate/distributive/first.cpp +1 -0
  102. package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -0
  103. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +19 -16
  104. package/src/duckdb/src/function/aggregate/distributive_functions.cpp +1 -0
  105. package/src/duckdb/src/function/aggregate/holistic/approximate_quantile.cpp +5 -2
  106. package/src/duckdb/src/function/aggregate/holistic/mode.cpp +1 -1
  107. package/src/duckdb/src/function/aggregate/holistic/quantile.cpp +16 -1
  108. package/src/duckdb/src/function/aggregate/nested/list.cpp +8 -8
  109. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +58 -16
  110. package/src/duckdb/src/function/cast/bit_cast.cpp +0 -2
  111. package/src/duckdb/src/function/cast/blob_cast.cpp +0 -1
  112. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -1
  113. package/src/duckdb/src/function/cast/enum_casts.cpp +25 -3
  114. package/src/duckdb/src/function/cast/list_casts.cpp +17 -4
  115. package/src/duckdb/src/function/cast/map_cast.cpp +5 -2
  116. package/src/duckdb/src/function/cast/string_cast.cpp +36 -10
  117. package/src/duckdb/src/function/cast/struct_cast.cpp +24 -4
  118. package/src/duckdb/src/function/cast/time_casts.cpp +2 -2
  119. package/src/duckdb/src/function/cast/union_casts.cpp +33 -7
  120. package/src/duckdb/src/function/function_binder.cpp +1 -8
  121. package/src/duckdb/src/function/scalar/bit/bitstring.cpp +100 -0
  122. package/src/duckdb/src/function/scalar/date/current.cpp +0 -2
  123. package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
  124. package/src/duckdb/src/function/scalar/date/date_part.cpp +18 -26
  125. package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
  126. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
  127. package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
  128. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +4 -146
  129. package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
  130. package/src/duckdb/src/function/scalar/list/list_aggregates.cpp +1 -1
  131. package/src/duckdb/src/function/scalar/list/list_concat.cpp +8 -12
  132. package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
  133. package/src/duckdb/src/function/scalar/list/list_lambdas.cpp +7 -3
  134. package/src/duckdb/src/function/scalar/list/list_value.cpp +6 -10
  135. package/src/duckdb/src/function/scalar/map/map.cpp +47 -1
  136. package/src/duckdb/src/function/scalar/map/map_entries.cpp +61 -0
  137. package/src/duckdb/src/function/scalar/map/map_extract.cpp +68 -26
  138. package/src/duckdb/src/function/scalar/map/map_keys_values.cpp +97 -0
  139. package/src/duckdb/src/function/scalar/math/numeric.cpp +101 -17
  140. package/src/duckdb/src/function/scalar/math_functions.cpp +3 -0
  141. package/src/duckdb/src/function/scalar/nested_functions.cpp +3 -0
  142. package/src/duckdb/src/function/scalar/operators/add.cpp +0 -9
  143. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +29 -48
  144. package/src/duckdb/src/function/scalar/operators/bitwise.cpp +0 -63
  145. package/src/duckdb/src/function/scalar/operators/multiply.cpp +5 -6
  146. package/src/duckdb/src/function/scalar/operators/subtract.cpp +0 -6
  147. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
  148. package/src/duckdb/src/function/scalar/string/hex.cpp +201 -0
  149. package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
  150. package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
  151. package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
  152. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +243 -0
  153. package/src/duckdb/src/function/scalar/string/regexp/regexp_util.cpp +79 -0
  154. package/src/duckdb/src/function/scalar/string/regexp.cpp +21 -80
  155. package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
  156. package/src/duckdb/src/function/scalar/string_functions.cpp +2 -0
  157. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +5 -10
  158. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +11 -14
  159. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +6 -7
  160. package/src/duckdb/src/function/table/arrow.cpp +5 -2
  161. package/src/duckdb/src/function/table/arrow_conversion.cpp +25 -1
  162. package/src/duckdb/src/function/table/checkpoint.cpp +5 -1
  163. package/src/duckdb/src/function/table/read_csv.cpp +55 -0
  164. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +2 -2
  165. package/src/duckdb/src/function/table/system/test_all_types.cpp +2 -2
  166. package/src/duckdb/src/function/table/table_scan.cpp +1 -1
  167. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  168. package/src/duckdb/src/function/table_function.cpp +30 -11
  169. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +6 -0
  170. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
  171. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +6 -8
  172. package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +3 -0
  173. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -1
  174. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +8 -2
  175. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -19
  176. package/src/duckdb/src/include/duckdb/common/enums/aggregate_handling.hpp +2 -0
  177. package/src/duckdb/src/include/duckdb/common/enums/expression_type.hpp +2 -3
  178. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +7 -4
  179. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
  180. package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +2 -0
  181. package/src/duckdb/src/include/duckdb/common/enums/set_operation_type.hpp +2 -1
  182. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -1
  183. package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
  184. package/src/duckdb/src/include/duckdb/common/exception.hpp +69 -2
  185. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +12 -4
  186. package/src/duckdb/src/include/duckdb/common/{http_stats.hpp → http_state.hpp} +18 -4
  187. package/src/duckdb/src/include/duckdb/common/operator/multiply.hpp +2 -0
  188. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +45 -0
  189. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +6 -1
  190. package/src/duckdb/src/include/duckdb/common/serializer/buffered_deserializer.hpp +4 -2
  191. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +8 -2
  192. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +113 -0
  193. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +336 -0
  194. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +268 -0
  195. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +126 -0
  196. package/src/duckdb/src/include/duckdb/common/serializer.hpp +13 -0
  197. package/src/duckdb/src/include/duckdb/common/string_util.hpp +25 -0
  198. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +12 -7
  199. package/src/duckdb/src/include/duckdb/common/types/time.hpp +3 -0
  200. package/src/duckdb/src/include/duckdb/common/types/value.hpp +17 -48
  201. package/src/duckdb/src/include/duckdb/common/types/value_map.hpp +1 -1
  202. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -1
  203. package/src/duckdb/src/include/duckdb/common/types.hpp +45 -8
  204. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +2 -2
  205. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -0
  206. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +2 -2
  207. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  208. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +2 -0
  209. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +1 -0
  210. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +6 -0
  211. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +5 -0
  212. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -0
  213. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +1 -3
  214. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +54 -0
  215. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +5 -0
  216. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +18 -6
  217. package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +84 -0
  218. package/src/duckdb/src/include/duckdb/function/cast/cast_function_set.hpp +2 -2
  219. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +28 -64
  220. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -6
  221. package/src/duckdb/src/include/duckdb/function/scalar/bit_functions.hpp +4 -0
  222. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +138 -0
  223. package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +8 -0
  224. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +59 -0
  225. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +81 -1
  226. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
  227. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
  228. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +12 -1
  229. package/src/duckdb/src/include/duckdb/function/table_function.hpp +10 -0
  230. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
  231. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -3
  232. package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
  233. package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +2 -0
  234. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -0
  235. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
  236. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +2 -0
  237. package/src/duckdb/src/include/duckdb/main/relation/explain_relation.hpp +2 -1
  238. package/src/duckdb/src/include/duckdb/main/relation.hpp +2 -1
  239. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
  240. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
  241. package/src/duckdb/src/include/duckdb/optimizer/rule/list.hpp +1 -0
  242. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +24 -0
  243. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +4 -0
  244. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  245. package/src/duckdb/src/include/duckdb/parser/expression/bound_expression.hpp +2 -0
  246. package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +5 -0
  247. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +2 -0
  248. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +2 -0
  249. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +2 -0
  250. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -0
  251. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +2 -0
  252. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  253. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  254. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -2
  255. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +2 -0
  256. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +2 -0
  257. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +2 -0
  258. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +2 -0
  259. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +4 -2
  260. package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +2 -0
  261. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +5 -0
  262. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +5 -1
  263. package/src/duckdb/src/include/duckdb/parser/parsed_data/{alter_function_info.hpp → alter_scalar_function_info.hpp} +13 -13
  264. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +47 -0
  265. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +6 -0
  266. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_function_info.hpp +2 -1
  267. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +2 -0
  268. package/src/duckdb/src/include/duckdb/parser/parsed_expression.hpp +5 -0
  269. package/src/duckdb/src/include/duckdb/parser/query_node/recursive_cte_node.hpp +3 -0
  270. package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +5 -0
  271. package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +3 -0
  272. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +13 -2
  273. package/src/duckdb/src/include/duckdb/parser/result_modifier.hpp +24 -1
  274. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -1
  275. package/src/duckdb/src/include/duckdb/parser/statement/multi_statement.hpp +28 -0
  276. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +6 -1
  277. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +4 -0
  278. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +2 -0
  279. package/src/duckdb/src/include/duckdb/parser/tableref/expressionlistref.hpp +3 -0
  280. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +3 -0
  281. package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
  282. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +87 -0
  283. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  284. package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +3 -0
  285. package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
  286. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -0
  287. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +33 -0
  288. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
  289. package/src/duckdb/src/include/duckdb/planner/binder.hpp +15 -4
  290. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +3 -0
  291. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +64 -0
  293. package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -2
  294. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -1
  295. package/src/duckdb/src/include/duckdb/planner/expression_binder/qualify_binder.hpp +2 -2
  296. package/src/duckdb/src/include/duckdb/planner/expression_binder/select_binder.hpp +9 -38
  297. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +1 -1
  298. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  299. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -0
  300. package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +22 -0
  301. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -2
  302. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +3 -0
  303. package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +8 -2
  304. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +2 -0
  305. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +76 -44
  306. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
  307. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +1 -1
  308. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
  309. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  310. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  311. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
  312. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  313. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  314. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
  315. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +3 -3
  316. package/src/duckdb/src/include/duckdb/storage/index.hpp +4 -3
  317. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +7 -0
  318. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -29
  319. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
  320. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +8 -6
  321. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
  322. package/src/duckdb/src/include/duckdb/storage/statistics/node_statistics.hpp +26 -0
  323. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +114 -0
  324. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +62 -0
  325. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
  326. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
  327. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
  328. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
  329. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
  330. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +6 -3
  331. package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +3 -2
  332. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +7 -5
  333. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
  334. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +6 -2
  335. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +10 -6
  336. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +8 -5
  337. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +37 -0
  338. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +10 -1
  339. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +4 -3
  340. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +271 -26
  341. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  342. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +0 -1
  343. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +1 -1
  344. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -2
  345. package/src/duckdb/src/include/duckdb.h +50 -2
  346. package/src/duckdb/src/include/duckdb.hpp +0 -1
  347. package/src/duckdb/src/main/capi/pending-c.cpp +16 -3
  348. package/src/duckdb/src/main/capi/result-c.cpp +27 -1
  349. package/src/duckdb/src/main/capi/stream-c.cpp +25 -0
  350. package/src/duckdb/src/main/client_context.cpp +38 -34
  351. package/src/duckdb/src/main/client_data.cpp +7 -6
  352. package/src/duckdb/src/main/config.cpp +70 -1
  353. package/src/duckdb/src/main/database.cpp +19 -2
  354. package/src/duckdb/src/main/extension/extension_install.cpp +7 -2
  355. package/src/duckdb/src/main/prepared_statement.cpp +4 -0
  356. package/src/duckdb/src/main/query_profiler.cpp +17 -15
  357. package/src/duckdb/src/main/relation/explain_relation.cpp +3 -3
  358. package/src/duckdb/src/main/relation.cpp +3 -2
  359. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  360. package/src/duckdb/src/optimizer/deliminator.cpp +1 -1
  361. package/src/duckdb/src/optimizer/filter_combiner.cpp +1 -1
  362. package/src/duckdb/src/optimizer/filter_pullup.cpp +3 -1
  363. package/src/duckdb/src/optimizer/filter_pushdown.cpp +14 -8
  364. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +105 -71
  365. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +31 -12
  366. package/src/duckdb/src/optimizer/optimizer.cpp +1 -0
  367. package/src/duckdb/src/optimizer/pullup/pullup_from_left.cpp +2 -2
  368. package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +33 -5
  369. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -1
  370. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +3 -0
  371. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +5 -12
  372. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +2 -2
  373. package/src/duckdb/src/optimizer/pushdown/pushdown_single_join.cpp +1 -1
  374. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +1 -0
  375. package/src/duckdb/src/optimizer/rule/move_constants.cpp +10 -4
  376. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +30 -0
  377. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +9 -2
  378. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
  379. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
  380. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
  381. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  382. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
  383. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
  384. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
  385. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
  386. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
  387. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
  388. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +29 -32
  389. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +5 -5
  390. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
  391. package/src/duckdb/src/optimizer/statistics_propagator.cpp +2 -1
  392. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +2 -2
  393. package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -4
  394. package/src/duckdb/src/parser/common_table_expression_info.cpp +19 -0
  395. package/src/duckdb/src/parser/expression/between_expression.cpp +17 -0
  396. package/src/duckdb/src/parser/expression/case_expression.cpp +28 -0
  397. package/src/duckdb/src/parser/expression/cast_expression.cpp +17 -0
  398. package/src/duckdb/src/parser/expression/collate_expression.cpp +16 -0
  399. package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -0
  400. package/src/duckdb/src/parser/expression/comparison_expression.cpp +16 -0
  401. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +17 -0
  402. package/src/duckdb/src/parser/expression/constant_expression.cpp +14 -0
  403. package/src/duckdb/src/parser/expression/default_expression.cpp +7 -0
  404. package/src/duckdb/src/parser/expression/function_expression.cpp +35 -0
  405. package/src/duckdb/src/parser/expression/lambda_expression.cpp +16 -0
  406. package/src/duckdb/src/parser/expression/operator_expression.cpp +15 -0
  407. package/src/duckdb/src/parser/expression/parameter_expression.cpp +15 -0
  408. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +14 -0
  409. package/src/duckdb/src/parser/expression/star_expression.cpp +26 -6
  410. package/src/duckdb/src/parser/expression/subquery_expression.cpp +20 -0
  411. package/src/duckdb/src/parser/expression/window_expression.cpp +43 -0
  412. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +7 -3
  413. package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +56 -0
  414. package/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +51 -0
  415. package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
  416. package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +6 -0
  417. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +22 -10
  418. package/src/duckdb/src/parser/parsed_expression.cpp +72 -0
  419. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +15 -1
  420. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +21 -0
  421. package/src/duckdb/src/parser/query_node/select_node.cpp +31 -0
  422. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +17 -0
  423. package/src/duckdb/src/parser/query_node.cpp +51 -1
  424. package/src/duckdb/src/parser/result_modifier.cpp +78 -0
  425. package/src/duckdb/src/parser/statement/multi_statement.cpp +18 -0
  426. package/src/duckdb/src/parser/statement/select_statement.cpp +12 -0
  427. package/src/duckdb/src/parser/tableref/basetableref.cpp +21 -0
  428. package/src/duckdb/src/parser/tableref/emptytableref.cpp +4 -0
  429. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +17 -0
  430. package/src/duckdb/src/parser/tableref/joinref.cpp +29 -0
  431. package/src/duckdb/src/parser/tableref/pivotref.cpp +373 -0
  432. package/src/duckdb/src/parser/tableref/subqueryref.cpp +15 -0
  433. package/src/duckdb/src/parser/tableref/table_function.cpp +17 -0
  434. package/src/duckdb/src/parser/tableref.cpp +49 -0
  435. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +11 -0
  436. package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +1 -1
  437. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +17 -2
  438. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +63 -42
  439. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
  440. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
  441. package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +12 -6
  442. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +24 -0
  443. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +7 -0
  444. package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +0 -7
  445. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +3 -2
  446. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +4 -0
  447. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +4 -0
  448. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +179 -0
  449. package/src/duckdb/src/parser/transform/statement/transform_rename.cpp +3 -4
  450. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -0
  451. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +2 -3
  452. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +12 -1
  453. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +121 -0
  454. package/src/duckdb/src/parser/transform/tableref/transform_tableref.cpp +2 -0
  455. package/src/duckdb/src/parser/transformer.cpp +15 -3
  456. package/src/duckdb/src/planner/bind_context.cpp +18 -25
  457. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +9 -7
  458. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +4 -3
  459. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +23 -12
  460. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +3 -2
  461. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +176 -0
  462. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +4 -0
  463. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +163 -24
  464. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +2 -2
  465. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +109 -94
  466. package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +11 -0
  467. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +9 -4
  468. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +5 -3
  469. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +3 -2
  470. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +9 -1
  471. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  472. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +12 -8
  473. package/src/duckdb/src/planner/binder/statement/bind_logical_plan.cpp +17 -0
  474. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +4 -2
  475. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +19 -3
  476. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +366 -0
  477. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +11 -1
  478. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -0
  479. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -13
  480. package/src/duckdb/src/planner/binder.cpp +19 -24
  481. package/src/duckdb/src/planner/bound_result_modifier.cpp +27 -1
  482. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +9 -2
  483. package/src/duckdb/src/planner/expression/bound_expression.cpp +4 -0
  484. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
  485. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +146 -0
  486. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +6 -3
  487. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +3 -3
  488. package/src/duckdb/src/planner/expression_binder/select_binder.cpp +1 -132
  489. package/src/duckdb/src/planner/expression_binder.cpp +10 -3
  490. package/src/duckdb/src/planner/expression_iterator.cpp +17 -10
  491. package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
  492. package/src/duckdb/src/planner/logical_operator.cpp +7 -2
  493. package/src/duckdb/src/planner/logical_operator_visitor.cpp +6 -0
  494. package/src/duckdb/src/planner/operator/logical_asof_join.cpp +8 -0
  495. package/src/duckdb/src/planner/operator/logical_distinct.cpp +3 -0
  496. package/src/duckdb/src/planner/planner.cpp +2 -1
  497. package/src/duckdb/src/planner/pragma_handler.cpp +10 -2
  498. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +3 -1
  499. package/src/duckdb/src/storage/buffer_manager.cpp +44 -46
  500. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  501. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +4 -15
  502. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +10 -4
  503. package/src/duckdb/src/storage/checkpoint_manager.cpp +9 -3
  504. package/src/duckdb/src/storage/compression/bitpacking.cpp +28 -24
  505. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +43 -45
  506. package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
  507. package/src/duckdb/src/storage/compression/patas.cpp +1 -1
  508. package/src/duckdb/src/storage/compression/rle.cpp +19 -15
  509. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
  510. package/src/duckdb/src/storage/data_table.cpp +20 -20
  511. package/src/duckdb/src/storage/index.cpp +12 -1
  512. package/src/duckdb/src/storage/local_storage.cpp +20 -23
  513. package/src/duckdb/src/storage/meta_block_reader.cpp +22 -0
  514. package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
  515. package/src/duckdb/src/storage/statistics/column_statistics.cpp +57 -3
  516. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +8 -9
  517. package/src/duckdb/src/storage/statistics/list_stats.cpp +121 -0
  518. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +591 -0
  519. package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +65 -0
  520. package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
  521. package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
  522. package/src/duckdb/src/storage/statistics/struct_stats.cpp +133 -0
  523. package/src/duckdb/src/storage/storage_info.cpp +2 -2
  524. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +4 -10
  525. package/src/duckdb/src/storage/table/column_data.cpp +45 -46
  526. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +7 -8
  527. package/src/duckdb/src/storage/table/column_segment.cpp +13 -14
  528. package/src/duckdb/src/storage/table/list_column_data.cpp +41 -59
  529. package/src/duckdb/src/storage/table/persistent_table_data.cpp +2 -1
  530. package/src/duckdb/src/storage/table/row_group.cpp +38 -32
  531. package/src/duckdb/src/storage/table/row_group_collection.cpp +94 -78
  532. package/src/duckdb/src/storage/table/scan_state.cpp +22 -3
  533. package/src/duckdb/src/storage/table/standard_column_data.cpp +7 -6
  534. package/src/duckdb/src/storage/table/struct_column_data.cpp +16 -16
  535. package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
  536. package/src/duckdb/src/storage/table/update_segment.cpp +20 -18
  537. package/src/duckdb/src/storage/wal_replay.cpp +8 -5
  538. package/src/duckdb/src/storage/write_ahead_log.cpp +2 -2
  539. package/src/duckdb/src/transaction/commit_state.cpp +11 -7
  540. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +0 -1
  541. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +35 -0
  542. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +36 -2
  543. package/src/duckdb/third_party/libpg_query/include/nodes/primnodes.hpp +3 -3
  544. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1022 -530
  545. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +8 -0
  546. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +24462 -22828
  547. package/src/duckdb/third_party/re2/re2/re2.cc +9 -0
  548. package/src/duckdb/third_party/re2/re2/re2.h +2 -0
  549. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  550. package/src/duckdb/ub_extension_json_json_functions.cpp +2 -0
  551. package/src/duckdb/ub_src_common_serializer.cpp +2 -0
  552. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  553. package/src/duckdb/ub_src_function_aggregate_distributive.cpp +2 -0
  554. package/src/duckdb/ub_src_function_scalar_bit.cpp +2 -0
  555. package/src/duckdb/ub_src_function_scalar_map.cpp +4 -0
  556. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  557. package/src/duckdb/ub_src_function_scalar_string_regexp.cpp +4 -0
  558. package/src/duckdb/ub_src_main_capi.cpp +2 -0
  559. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  560. package/src/duckdb/ub_src_parser.cpp +2 -0
  561. package/src/duckdb/ub_src_parser_parsed_data.cpp +4 -2
  562. package/src/duckdb/ub_src_parser_statement.cpp +2 -0
  563. package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
  564. package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
  565. package/src/duckdb/ub_src_parser_transform_tableref.cpp +2 -0
  566. package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
  567. package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
  568. package/src/duckdb/ub_src_planner_expression_binder.cpp +2 -0
  569. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
  570. package/src/duckdb/ub_src_storage_statistics.cpp +6 -6
  571. package/src/duckdb/ub_src_storage_table.cpp +0 -2
  572. package/src/duckdb_node.hpp +2 -1
  573. package/src/statement.cpp +5 -5
  574. package/src/utils.cpp +27 -2
  575. package/test/extension.test.ts +44 -26
  576. package/test/syntax_error.test.ts +3 -1
  577. package/filelist.cache +0 -0
  578. package/src/duckdb/src/include/duckdb/main/loadable_extension.hpp +0 -59
  579. package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
  580. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
  581. package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
  582. package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
  583. package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
  584. package/src/duckdb/src/parser/parsed_data/alter_function_info.cpp +0 -55
  585. package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
  586. package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
  587. package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
  588. package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
  589. package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
  590. package/src/duckdb/src/storage/table/segment_tree.cpp +0 -179
@@ -32,10 +32,9 @@ ColumnData::ColumnData(ColumnData &other, idx_t start, ColumnData *parent)
32
32
  updates = make_unique<UpdateSegment>(*other.updates, *this);
33
33
  }
34
34
  idx_t offset = 0;
35
- for (auto segment = other.data.GetRootSegment(); segment; segment = segment->Next()) {
36
- auto &other = (ColumnSegment &)*segment;
37
- this->data.AppendSegment(ColumnSegment::CreateSegment(other, start + offset));
38
- offset += segment->count;
35
+ for (auto &segment : other.data.Segments()) {
36
+ this->data.AppendSegment(ColumnSegment::CreateSegment(segment, start + offset));
37
+ offset += segment.count;
39
38
  }
40
39
  }
41
40
 
@@ -75,7 +74,8 @@ idx_t ColumnData::GetMaxEntry() {
75
74
  }
76
75
 
77
76
  void ColumnData::InitializeScan(ColumnScanState &state) {
78
- state.current = (ColumnSegment *)data.GetRootSegment();
77
+ state.current = data.GetRootSegment();
78
+ state.segment_tree = &data;
79
79
  state.row_index = state.current ? state.current->start : 0;
80
80
  state.internal_index = state.row_index;
81
81
  state.initialized = false;
@@ -84,7 +84,8 @@ void ColumnData::InitializeScan(ColumnScanState &state) {
84
84
  }
85
85
 
86
86
  void ColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
87
- state.current = (ColumnSegment *)data.GetSegment(row_idx);
87
+ state.current = data.GetSegment(row_idx);
88
+ state.segment_tree = &data;
88
89
  state.row_index = row_idx;
89
90
  state.internal_index = state.current->start;
90
91
  state.initialized = false;
@@ -125,11 +126,12 @@ idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remai
125
126
  }
126
127
 
127
128
  if (remaining > 0) {
128
- if (!state.current->next) {
129
+ auto next = data.GetNextSegment(state.current);
130
+ if (!next) {
129
131
  break;
130
132
  }
131
133
  state.previous_states.emplace_back(std::move(state.scan_state));
132
- state.current = (ColumnSegment *)state.current->Next();
134
+ state.current = next;
133
135
  state.current->InitializeScan(state);
134
136
  state.segment_checked = false;
135
137
  D_ASSERT(state.row_index >= state.current->start &&
@@ -234,14 +236,14 @@ void ColumnData::InitializeAppend(ColumnAppendState &state) {
234
236
  // no segments yet, append an empty segment
235
237
  AppendTransientSegment(l, start);
236
238
  }
237
- auto segment = (ColumnSegment *)data.GetLastSegment(l);
239
+ auto segment = data.GetLastSegment(l);
238
240
  if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
239
241
  // no transient segments yet
240
242
  auto total_rows = segment->start + segment->count;
241
243
  AppendTransientSegment(l, total_rows);
242
- state.current = (ColumnSegment *)data.GetLastSegment(l);
244
+ state.current = data.GetLastSegment(l);
243
245
  } else {
244
- state.current = (ColumnSegment *)segment;
246
+ state.current = segment;
245
247
  }
246
248
 
247
249
  D_ASSERT(state.current->segment_type == ColumnSegmentType::TRANSIENT);
@@ -254,7 +256,7 @@ void ColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, Uni
254
256
  while (true) {
255
257
  // append the data from the vector
256
258
  idx_t copied_elements = state.current->Append(state, vdata, offset, count);
257
- stats.Merge(*state.current->stats.statistics);
259
+ stats.Merge(state.current->stats.statistics);
258
260
  if (copied_elements == count) {
259
261
  // finished copying everything
260
262
  break;
@@ -264,7 +266,7 @@ void ColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, Uni
264
266
  {
265
267
  auto l = data.Lock();
266
268
  AppendTransientSegment(l, state.current->start + state.current->count);
267
- state.current = (ColumnSegment *)data.GetLastSegment(l);
269
+ state.current = data.GetLastSegment(l);
268
270
  state.current->InitializeAppend(state);
269
271
  }
270
272
  offset += copied_elements;
@@ -284,7 +286,7 @@ void ColumnData::RevertAppend(row_t start_row) {
284
286
  // find the segment index that the current row belongs to
285
287
  idx_t segment_index = data.GetSegmentIndex(l, start_row);
286
288
  auto segment = data.GetSegmentByIndex(l, segment_index);
287
- auto &transient = (ColumnSegment &)*segment;
289
+ auto &transient = *segment;
288
290
  D_ASSERT(transient.segment_type == ColumnSegmentType::TRANSIENT);
289
291
 
290
292
  // remove any segments AFTER this segment: they should be deleted entirely
@@ -299,14 +301,14 @@ idx_t ColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) {
299
301
  D_ASSERT(idx_t(row_id) >= start);
300
302
  // perform the fetch within the segment
301
303
  state.row_index = start + ((row_id - start) / STANDARD_VECTOR_SIZE * STANDARD_VECTOR_SIZE);
302
- state.current = (ColumnSegment *)data.GetSegment(state.row_index);
304
+ state.current = data.GetSegment(state.row_index);
303
305
  state.internal_index = state.current->start;
304
306
  return ScanVector(state, result, STANDARD_VECTOR_SIZE);
305
307
  }
306
308
 
307
309
  void ColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
308
310
  idx_t result_idx) {
309
- auto segment = (ColumnSegment *)data.GetSegment(row_id);
311
+ auto segment = data.GetSegment(row_id);
310
312
 
311
313
  // now perform the fetch within the segment
312
314
  segment->FetchRow(state, row_id, result, result_idx);
@@ -357,15 +359,14 @@ void ColumnData::AppendTransientSegment(SegmentLock &l, idx_t start_row) {
357
359
  }
358
360
 
359
361
  void ColumnData::CommitDropColumn() {
360
- auto segment = (ColumnSegment *)data.GetRootSegment();
361
- while (segment) {
362
- if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
363
- auto block_id = segment->GetBlockId();
362
+ for (auto &segment_p : data.Segments()) {
363
+ auto &segment = segment_p;
364
+ if (segment.segment_type == ColumnSegmentType::PERSISTENT) {
365
+ auto block_id = segment.GetBlockId();
364
366
  if (block_id != INVALID_BLOCK) {
365
367
  block_manager.MarkBlockAsModified(block_id);
366
368
  }
367
369
  }
368
- segment = (ColumnSegment *)segment->Next();
369
370
  }
370
371
  }
371
372
 
@@ -389,7 +390,7 @@ unique_ptr<ColumnCheckpointState> ColumnData::Checkpoint(RowGroup &row_group,
389
390
  // scan the segments of the column data
390
391
  // set up the checkpoint state
391
392
  auto checkpoint_state = CreateCheckpointState(row_group, partial_block_manager);
392
- checkpoint_state->global_stats = BaseStatistics::CreateEmpty(type, StatisticsType::LOCAL_STATS);
393
+ checkpoint_state->global_stats = BaseStatistics::CreateEmpty(type).ToUnique();
393
394
 
394
395
  auto l = data.Lock();
395
396
  auto nodes = data.MoveSegments(l);
@@ -414,13 +415,19 @@ void ColumnData::DeserializeColumn(Deserializer &source) {
414
415
  idx_t data_pointer_count = source.Read<idx_t>();
415
416
  for (idx_t data_ptr = 0; data_ptr < data_pointer_count; data_ptr++) {
416
417
  // read the data pointer
417
- DataPointer data_pointer;
418
- data_pointer.row_start = source.Read<idx_t>();
419
- data_pointer.tuple_count = source.Read<idx_t>();
420
- data_pointer.block_pointer.block_id = source.Read<block_id_t>();
421
- data_pointer.block_pointer.offset = source.Read<uint32_t>();
422
- data_pointer.compression_type = source.Read<CompressionType>();
423
- data_pointer.statistics = BaseStatistics::Deserialize(source, type);
418
+ auto row_start = source.Read<idx_t>();
419
+ auto tuple_count = source.Read<idx_t>();
420
+ auto block_pointer_block_id = source.Read<block_id_t>();
421
+ auto block_pointer_offset = source.Read<uint32_t>();
422
+ auto compression_type = source.Read<CompressionType>();
423
+ auto stats = BaseStatistics::Deserialize(source, type);
424
+
425
+ DataPointer data_pointer(std::move(stats));
426
+ data_pointer.row_start = row_start;
427
+ data_pointer.tuple_count = tuple_count;
428
+ data_pointer.block_pointer.block_id = block_pointer_block_id;
429
+ data_pointer.block_pointer.offset = block_pointer_offset;
430
+ data_pointer.compression_type = compression_type;
424
431
 
425
432
  // create a persistent segment
426
433
  auto segment = ColumnSegment::CreatePersistentSegment(
@@ -458,7 +465,6 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T
458
465
  while (segment) {
459
466
  ColumnSegmentInfo column_info;
460
467
  column_info.row_group_index = row_group_index;
461
- ;
462
468
  column_info.column_id = col_path[0];
463
469
  column_info.column_path = col_path_str;
464
470
  column_info.segment_idx = segment_idx;
@@ -466,8 +472,7 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T
466
472
  column_info.segment_start = segment->start;
467
473
  column_info.segment_count = segment->count;
468
474
  column_info.compression_type = CompressionTypeToString(segment->function->type);
469
- column_info.segment_stats =
470
- segment->stats.statistics ? segment->stats.statistics->ToString() : string("No Stats");
475
+ column_info.segment_stats = segment->stats.statistics.ToString();
471
476
  column_info.has_updates = updates ? true : false;
472
477
  // persistent
473
478
  // block_id
@@ -482,7 +487,7 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T
482
487
  result.column_segments.push_back(std::move(column_info));
483
488
 
484
489
  segment_idx++;
485
- segment = (ColumnSegment *)segment->Next();
490
+ segment = (ColumnSegment *)data.GetNextSegment(segment);
486
491
  }
487
492
  }
488
493
 
@@ -490,19 +495,13 @@ void ColumnData::Verify(RowGroup &parent) {
490
495
  #ifdef DEBUG
491
496
  D_ASSERT(this->start == parent.start);
492
497
  data.Verify();
493
- auto root = data.GetRootSegment();
494
- if (root) {
495
- D_ASSERT(root != nullptr);
496
- D_ASSERT(root->start == this->start);
497
- idx_t prev_end = root->start;
498
- while (root) {
499
- D_ASSERT(prev_end == root->start);
500
- prev_end = root->start + root->count;
501
- if (!root->next) {
502
- D_ASSERT(prev_end == parent.start + parent.count);
503
- }
504
- root = root->Next();
505
- }
498
+ idx_t current_index = 0;
499
+ idx_t current_start = this->start;
500
+ for (auto &segment : data.Segments()) {
501
+ D_ASSERT(segment.index == current_index);
502
+ D_ASSERT(segment.start == current_start);
503
+ current_start += segment.count;
504
+ current_index++;
506
505
  }
507
506
  #endif
508
507
  }
@@ -38,7 +38,7 @@ ColumnCheckpointState &ColumnDataCheckpointer::GetCheckpointState() {
38
38
  void ColumnDataCheckpointer::ScanSegments(const std::function<void(Vector &, idx_t)> &callback) {
39
39
  Vector scan_vector(intermediate.GetType(), nullptr);
40
40
  for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
41
- auto segment = (ColumnSegment *)nodes[segment_idx].node.get();
41
+ auto segment = nodes[segment_idx].node.get();
42
42
  ColumnScanState scan_state;
43
43
  scan_state.current = segment;
44
44
  segment->InitializeScan(scan_state);
@@ -163,7 +163,7 @@ void ColumnDataCheckpointer::WriteToDisk() {
163
163
  // since the segments will be rewritten their old on disk data is no longer required
164
164
  auto &block_manager = col_data.block_manager;
165
165
  for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
166
- auto segment = (ColumnSegment *)nodes[segment_idx].node.get();
166
+ auto segment = nodes[segment_idx].node.get();
167
167
  if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
168
168
  // persistent segment has updates: mark it as modified and rewrite the block with the merged updates
169
169
  auto block_id = segment->GetBlockId();
@@ -194,7 +194,7 @@ void ColumnDataCheckpointer::WriteToDisk() {
194
194
 
195
195
  bool ColumnDataCheckpointer::HasChanges() {
196
196
  for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
197
- auto segment = (ColumnSegment *)nodes[segment_idx].node.get();
197
+ auto segment = nodes[segment_idx].node.get();
198
198
  if (segment->segment_type == ColumnSegmentType::TRANSIENT) {
199
199
  // transient segment: always need to write to disk
200
200
  return true;
@@ -214,20 +214,19 @@ void ColumnDataCheckpointer::WritePersistentSegments() {
214
214
  // all segments are persistent and there are no updates
215
215
  // we only need to write the metadata
216
216
  for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
217
- auto segment = (ColumnSegment *)nodes[segment_idx].node.get();
217
+ auto segment = nodes[segment_idx].node.get();
218
218
  D_ASSERT(segment->segment_type == ColumnSegmentType::PERSISTENT);
219
219
 
220
220
  // set up the data pointer directly using the data from the persistent segment
221
- DataPointer pointer;
221
+ DataPointer pointer(segment->stats.statistics.Copy());
222
222
  pointer.block_pointer.block_id = segment->GetBlockId();
223
223
  pointer.block_pointer.offset = segment->GetBlockOffset();
224
224
  pointer.row_start = segment->start;
225
225
  pointer.tuple_count = segment->count;
226
226
  pointer.compression_type = segment->function->type;
227
- pointer.statistics = segment->stats.statistics->Copy();
228
227
 
229
228
  // merge the persistent stats into the global column stats
230
- state.global_stats->Merge(*segment->stats.statistics);
229
+ state.global_stats->Merge(segment->stats.statistics);
231
230
 
232
231
  // directly append the current segment to the new tree
233
232
  state.new_tree.AppendSegment(std::move(nodes[segment_idx].node));
@@ -236,7 +235,7 @@ void ColumnDataCheckpointer::WritePersistentSegments() {
236
235
  }
237
236
  }
238
237
 
239
- void ColumnDataCheckpointer::Checkpoint(vector<SegmentNode> nodes) {
238
+ void ColumnDataCheckpointer::Checkpoint(vector<SegmentNode<ColumnSegment>> nodes) {
240
239
  D_ASSERT(!nodes.empty());
241
240
  this->nodes = std::move(nodes);
242
241
  // first check if any of the segments have changes
@@ -19,7 +19,7 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc
19
19
  block_id_t block_id, idx_t offset,
20
20
  const LogicalType &type, idx_t start, idx_t count,
21
21
  CompressionType compression_type,
22
- unique_ptr<BaseStatistics> statistics) {
22
+ BaseStatistics statistics) {
23
23
  auto &config = DBConfig::GetConfig(db);
24
24
  CompressionFunction *function;
25
25
  shared_ptr<BlockHandle> block;
@@ -48,7 +48,7 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateTransientSegment(DatabaseInstance
48
48
  buffer_manager.Allocate(segment_size, false, &block);
49
49
  }
50
50
  return make_unique<ColumnSegment>(db, std::move(block), type, ColumnSegmentType::TRANSIENT, start, 0, function,
51
- nullptr, INVALID_BLOCK, 0, segment_size);
51
+ BaseStatistics::CreateEmpty(type), INVALID_BLOCK, 0, segment_size);
52
52
  }
53
53
 
54
54
  unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx_t start) {
@@ -57,11 +57,11 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx
57
57
 
58
58
  ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type_p,
59
59
  ColumnSegmentType segment_type, idx_t start, idx_t count, CompressionFunction *function_p,
60
- unique_ptr<BaseStatistics> statistics, block_id_t block_id_p, idx_t offset_p,
61
- idx_t segment_size_p)
62
- : SegmentBase(start, count), db(db), type(std::move(type_p)), type_size(GetTypeIdSize(type.InternalType())),
63
- segment_type(segment_type), function(function_p), stats(type, std::move(statistics)), block(std::move(block)),
64
- block_id(block_id_p), offset(offset_p), segment_size(segment_size_p) {
60
+ BaseStatistics statistics, block_id_t block_id_p, idx_t offset_p, idx_t segment_size_p)
61
+ : SegmentBase<ColumnSegment>(start, count), db(db), type(std::move(type_p)),
62
+ type_size(GetTypeIdSize(type.InternalType())), segment_type(segment_type), function(function_p),
63
+ stats(std::move(statistics)), block(std::move(block)), block_id(block_id_p), offset(offset_p),
64
+ segment_size(segment_size_p) {
65
65
  D_ASSERT(function);
66
66
  if (function->init_segment) {
67
67
  segment_state = function->init_segment(*this, block_id);
@@ -69,10 +69,10 @@ ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block
69
69
  }
70
70
 
71
71
  ColumnSegment::ColumnSegment(ColumnSegment &other, idx_t start)
72
- : SegmentBase(start, other.count), db(other.db), type(std::move(other.type)), type_size(other.type_size),
73
- segment_type(other.segment_type), function(other.function), stats(std::move(other.stats)),
74
- block(std::move(other.block)), block_id(other.block_id), offset(other.offset), segment_size(other.segment_size),
75
- segment_state(std::move(other.segment_state)) {
72
+ : SegmentBase<ColumnSegment>(start, other.count.load()), db(other.db), type(std::move(other.type)),
73
+ type_size(other.type_size), segment_type(other.segment_type), function(other.function),
74
+ stats(std::move(other.stats)), block(std::move(other.block)), block_id(other.block_id), offset(other.offset),
75
+ segment_size(other.segment_size), segment_state(std::move(other.segment_state)) {
76
76
  }
77
77
 
78
78
  ColumnSegment::~ColumnSegment() {
@@ -181,13 +181,12 @@ void ColumnSegment::ConvertToPersistent(BlockManager *block_manager, block_id_t
181
181
  block_id = block_id_p;
182
182
  offset = 0;
183
183
 
184
- D_ASSERT(stats.statistics);
185
184
  if (block_id == INVALID_BLOCK) {
186
185
  // constant block: reset the block buffer
187
- D_ASSERT(stats.statistics->IsConstant());
186
+ D_ASSERT(stats.statistics.IsConstant());
188
187
  block.reset();
189
188
  } else {
190
- D_ASSERT(!stats.statistics->IsConstant());
189
+ D_ASSERT(!stats.statistics.IsConstant());
191
190
  // non-constant block: write the block to disk
192
191
  // the data for the block already exists in-memory of our block
193
192
  // instead of copying the data we alter some metadata so the buffer points to an on-disk block
@@ -1,6 +1,7 @@
1
1
  #include "duckdb/storage/table/list_column_data.hpp"
2
- #include "duckdb/storage/statistics/list_statistics.hpp"
2
+ #include "duckdb/storage/statistics/list_stats.hpp"
3
3
  #include "duckdb/transaction/transaction.hpp"
4
+ #include "duckdb/storage/table/column_checkpoint_state.hpp"
4
5
 
5
6
  namespace duckdb {
6
7
 
@@ -39,15 +40,14 @@ void ListColumnData::InitializeScan(ColumnScanState &state) {
39
40
  state.child_states.push_back(std::move(child_state));
40
41
  }
41
42
 
42
- list_entry_t ListColumnData::FetchListEntry(idx_t row_idx) {
43
- auto segment = (ColumnSegment *)data.GetSegment(row_idx);
43
+ uint64_t ListColumnData::FetchListOffset(idx_t row_idx) {
44
+ auto segment = data.GetSegment(row_idx);
44
45
  ColumnFetchState fetch_state;
45
46
  Vector result(type, 1);
46
47
  segment->FetchRow(fetch_state, row_idx, result, 0);
47
48
 
48
49
  // initialize the child scan with the required offset
49
- auto list_data = FlatVector::GetData<list_entry_t>(result);
50
- return list_data[0];
50
+ return FlatVector::GetData<uint64_t>(result)[0];
51
51
  }
52
52
 
53
53
  void ListColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
@@ -63,8 +63,7 @@ void ListColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_
63
63
  state.child_states.push_back(std::move(validity_state));
64
64
 
65
65
  // we need to read the list at position row_idx to get the correct row offset of the child
66
- auto list_entry = FetchListEntry(row_idx);
67
- auto child_offset = list_entry.offset;
66
+ auto child_offset = row_idx == start ? 0 : FetchListOffset(row_idx - 1);
68
67
 
69
68
  D_ASSERT(child_offset <= child_column->GetMaxEntry());
70
69
  ColumnScanState child_state;
@@ -89,26 +88,26 @@ idx_t ListColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t co
89
88
  // updates not supported for lists
90
89
  D_ASSERT(!updates);
91
90
 
92
- idx_t scan_count = ScanVector(state, result, count);
91
+ Vector offset_vector(LogicalType::UBIGINT, count);
92
+ idx_t scan_count = ScanVector(state, offset_vector, count);
93
93
  D_ASSERT(scan_count > 0);
94
94
  validity.ScanCount(state.child_states[0], result, count);
95
95
 
96
- auto data = FlatVector::GetData<list_entry_t>(result);
97
- auto first_entry = data[0];
96
+ auto data = FlatVector::GetData<uint64_t>(offset_vector);
98
97
  auto last_entry = data[scan_count - 1];
99
98
 
100
- #ifdef DEBUG
101
- for (idx_t i = 1; i < scan_count; i++) {
102
- D_ASSERT(data[i].offset == data[i - 1].offset + data[i - 1].length);
103
- }
104
- #endif
105
99
  // shift all offsets so they are 0 at the first entry
100
+ auto result_data = FlatVector::GetData<list_entry_t>(result);
101
+ auto base_offset = state.last_offset;
102
+ idx_t current_offset = 0;
106
103
  for (idx_t i = 0; i < scan_count; i++) {
107
- data[i].offset -= first_entry.offset;
104
+ result_data[i].offset = current_offset;
105
+ result_data[i].length = data[i] - current_offset - base_offset;
106
+ current_offset += result_data[i].length;
108
107
  }
109
108
 
110
- D_ASSERT(last_entry.offset >= first_entry.offset);
111
- idx_t child_scan_count = last_entry.offset + last_entry.length - first_entry.offset;
109
+ D_ASSERT(last_entry >= base_offset);
110
+ idx_t child_scan_count = last_entry - base_offset;
112
111
  ListVector::Reserve(result, child_scan_count);
113
112
 
114
113
  if (child_scan_count > 0) {
@@ -118,6 +117,7 @@ idx_t ListColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t co
118
117
  child_column->start + child_column->GetMaxEntry());
119
118
  child_column->ScanCount(state.child_states[1], child_entry, child_scan_count);
120
119
  }
120
+ state.last_offset = last_entry;
121
121
 
122
122
  ListVector::SetListSize(result, child_scan_count);
123
123
  return scan_count;
@@ -130,19 +130,19 @@ void ListColumnData::Skip(ColumnScanState &state, idx_t count) {
130
130
  // we need to read the list entries/offsets to figure out how much to skip
131
131
  // note that we only need to read the first and last entry
132
132
  // however, let's just read all "count" entries for now
133
- auto data = unique_ptr<list_entry_t[]>(new list_entry_t[count]);
134
- Vector result(type, (data_ptr_t)data.get());
133
+ Vector result(LogicalType::UBIGINT, count);
135
134
  idx_t scan_count = ScanVector(state, result, count);
136
135
  if (scan_count == 0) {
137
136
  return;
138
137
  }
139
138
 
140
- auto &first_entry = data[0];
141
- auto &last_entry = data[scan_count - 1];
142
- idx_t child_scan_count = last_entry.offset + last_entry.length - first_entry.offset;
139
+ auto data = FlatVector::GetData<uint64_t>(result);
140
+ auto last_entry = data[scan_count - 1];
141
+ idx_t child_scan_count = last_entry - state.last_offset;
143
142
  if (child_scan_count == 0) {
144
143
  return;
145
144
  }
145
+ state.last_offset = last_entry;
146
146
 
147
147
  // skip the child state forward by the child_scan_count
148
148
  child_column->Skip(state.child_states[1], child_scan_count);
@@ -163,10 +163,8 @@ void ListColumnData::InitializeAppend(ColumnAppendState &state) {
163
163
  state.child_appends.push_back(std::move(child_append_state));
164
164
  }
165
165
 
166
- void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, Vector &vector, idx_t count) {
166
+ void ListColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) {
167
167
  D_ASSERT(count > 0);
168
- auto &stats = (ListStatistics &)stats_p;
169
-
170
168
  UnifiedVectorFormat list_data;
171
169
  vector.ToUnifiedFormat(count, list_data);
172
170
  auto &list_validity = list_data.validity;
@@ -177,8 +175,8 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V
177
175
  idx_t child_count = 0;
178
176
 
179
177
  ValidityMask append_mask(count);
180
- auto append_offsets = unique_ptr<list_entry_t[]>(new list_entry_t[count]);
181
- bool child_contiguous = false;
178
+ auto append_offsets = unique_ptr<uint64_t[]>(new uint64_t[count]);
179
+ bool child_contiguous = true;
182
180
  for (idx_t i = 0; i < count; i++) {
183
181
  auto input_idx = list_data.sel->get_index(i);
184
182
  if (list_validity.RowIsValid(input_idx)) {
@@ -186,17 +184,11 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V
186
184
  if (input_list.offset != child_count) {
187
185
  child_contiguous = false;
188
186
  }
189
- append_offsets[i].offset = start_offset + child_count;
190
- append_offsets[i].length = input_list.length;
187
+ append_offsets[i] = start_offset + child_count + input_list.length;
191
188
  child_count += input_list.length;
192
189
  } else {
193
190
  append_mask.SetInvalid(i);
194
- if (i > 0) {
195
- append_offsets[i].offset = append_offsets[i - 1].offset + append_offsets[i - 1].length;
196
- } else {
197
- append_offsets[i].offset = start_offset;
198
- }
199
- append_offsets[i].length = 0;
191
+ append_offsets[i] = start_offset + child_count;
200
192
  }
201
193
  }
202
194
  auto &list_child = ListVector::GetEntry(vector);
@@ -218,27 +210,19 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V
218
210
  D_ASSERT(current_count == child_count);
219
211
  child_vector.Slice(list_child, child_sel, child_count);
220
212
  }
221
- #ifdef DEBUG
222
- D_ASSERT(append_offsets[0].offset == start_offset);
223
- for (idx_t i = 1; i < count; i++) {
224
- D_ASSERT(append_offsets[i].offset == append_offsets[i - 1].offset + append_offsets[i - 1].length);
225
- }
226
- D_ASSERT(append_offsets[count - 1].offset + append_offsets[count - 1].length - append_offsets[0].offset ==
227
- child_count);
228
- #endif
229
213
 
230
214
  UnifiedVectorFormat vdata;
231
- vdata.validity = append_mask;
232
215
  vdata.sel = FlatVector::IncrementalSelectionVector();
233
216
  vdata.data = (data_ptr_t)append_offsets.get();
234
217
 
235
218
  // append the list offsets
236
219
  ColumnData::AppendData(stats, state, vdata, count);
237
220
  // append the validity data
238
- validity.AppendData(*stats.validity_stats, state.child_appends[0], vdata, count);
221
+ vdata.validity = append_mask;
222
+ validity.AppendData(stats, state.child_appends[0], vdata, count);
239
223
  // append the child vector
240
224
  if (child_count > 0) {
241
- child_column->Append(*stats.child_stats, state.child_appends[1], child_vector, child_count);
225
+ child_column->Append(ListStats::GetChildStats(stats), state.child_appends[1], child_vector, child_count);
242
226
  }
243
227
  }
244
228
 
@@ -248,8 +232,8 @@ void ListColumnData::RevertAppend(row_t start_row) {
248
232
  auto column_count = GetMaxEntry();
249
233
  if (column_count > start) {
250
234
  // revert append in the child column
251
- auto list_entry = FetchListEntry(column_count - 1);
252
- child_column->RevertAppend(list_entry.offset + list_entry.length);
235
+ auto list_offset = FetchListOffset(column_count - 1);
236
+ child_column->RevertAppend(list_offset);
253
237
  }
254
238
  }
255
239
 
@@ -281,19 +265,18 @@ void ListColumnData::FetchRow(TransactionData transaction, ColumnFetchState &sta
281
265
  auto child_state = make_unique<ColumnFetchState>();
282
266
  state.child_states.push_back(std::move(child_state));
283
267
  }
284
- // fetch the list_entry_t and the validity mask for that list
285
- auto segment = (ColumnSegment *)data.GetSegment(row_id);
286
268
 
287
269
  // now perform the fetch within the segment
288
- segment->FetchRow(state, row_id, result, result_idx);
270
+ auto start_offset = idx_t(row_id) == this->start ? 0 : FetchListOffset(row_id - 1);
271
+ auto end_offset = FetchListOffset(row_id);
289
272
  validity.FetchRow(transaction, *state.child_states[0], row_id, result, result_idx);
290
273
 
291
274
  auto &validity = FlatVector::Validity(result);
292
275
  auto list_data = FlatVector::GetData<list_entry_t>(result);
293
276
  auto &list_entry = list_data[result_idx];
294
- auto original_offset = list_entry.offset;
295
277
  // set the list entry offset to the size of the current list
296
278
  list_entry.offset = ListVector::GetListSize(result);
279
+ list_entry.length = end_offset - start_offset;
297
280
  if (!validity.RowIsValid(result_idx)) {
298
281
  // the list is NULL! no need to fetch the child
299
282
  D_ASSERT(list_entry.length == 0);
@@ -307,7 +290,7 @@ void ListColumnData::FetchRow(TransactionData transaction, ColumnFetchState &sta
307
290
  auto &child_type = ListType::GetChildType(result.GetType());
308
291
  Vector child_scan(child_type, child_scan_count);
309
292
  // seek the scan towards the specified position and read [length] entries
310
- child_column->InitializeScanWithOffset(*child_state, start + original_offset);
293
+ child_column->InitializeScanWithOffset(*child_state, start + start_offset);
311
294
  D_ASSERT(child_type.InternalType() == PhysicalType::STRUCT ||
312
295
  child_state->row_index + child_scan_count - this->start <= child_column->GetMaxEntry());
313
296
  child_column->ScanCount(*child_state, child_scan, child_scan_count);
@@ -324,7 +307,7 @@ void ListColumnData::CommitDropColumn() {
324
307
  struct ListColumnCheckpointState : public ColumnCheckpointState {
325
308
  ListColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, PartialBlockManager &partial_block_manager)
326
309
  : ColumnCheckpointState(row_group, column_data, partial_block_manager) {
327
- global_stats = make_unique<ListStatistics>(column_data.type);
310
+ global_stats = ListStats::CreateEmpty(column_data.type).ToUnique();
328
311
  }
329
312
 
330
313
  unique_ptr<ColumnCheckpointState> validity_state;
@@ -333,10 +316,8 @@ struct ListColumnCheckpointState : public ColumnCheckpointState {
333
316
  public:
334
317
  unique_ptr<BaseStatistics> GetStatistics() override {
335
318
  auto stats = global_stats->Copy();
336
- auto &list_stats = (ListStatistics &)*stats;
337
- stats->validity_stats = validity_state->GetStatistics();
338
- list_stats.child_stats = child_state->GetStatistics();
339
- return stats;
319
+ ListStats::SetChildStats(stats, child_state->GetStatistics());
320
+ return stats.ToUnique();
340
321
  }
341
322
 
342
323
  void WriteDataPointers(RowGroupWriter &writer) override {
@@ -376,6 +357,7 @@ void ListColumnData::DeserializeColumn(Deserializer &source) {
376
357
  }
377
358
 
378
359
  void ListColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, TableStorageInfo &result) {
360
+ ColumnData::GetStorageInfo(row_group_index, col_path, result);
379
361
  col_path.push_back(0);
380
362
  validity.GetStorageInfo(row_group_index, col_path, result);
381
363
  col_path.back() = 1;
@@ -3,7 +3,8 @@
3
3
 
4
4
  namespace duckdb {
5
5
 
6
- PersistentTableData::PersistentTableData(idx_t column_count) {
6
+ PersistentTableData::PersistentTableData(idx_t column_count)
7
+ : total_rows(0), row_group_count(0), block_id(INVALID_BLOCK), offset(0) {
7
8
  }
8
9
 
9
10
  PersistentTableData::~PersistentTableData() {