duckdb 0.9.3-dev0.0 → 0.9.3-dev14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1215) hide show
  1. package/LICENSE +1 -1
  2. package/binding.gyp +32 -7
  3. package/package.json +1 -1
  4. package/src/connection.cpp +6 -6
  5. package/src/database.cpp +12 -10
  6. package/src/duckdb/extension/icu/icu-datefunc.cpp +22 -10
  7. package/src/duckdb/extension/icu/icu-datepart.cpp +42 -22
  8. package/src/duckdb/extension/icu/icu-datetrunc.cpp +40 -7
  9. package/src/duckdb/extension/icu/icu-strptime.cpp +14 -8
  10. package/src/duckdb/extension/icu/icu-table-range.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-timezone.cpp +43 -16
  12. package/src/duckdb/extension/icu/icu_extension.cpp +1 -1
  13. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -0
  14. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  15. package/src/duckdb/extension/json/buffered_json_reader.cpp +78 -62
  16. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +11 -7
  17. package/src/duckdb/extension/json/include/json_common.hpp +0 -14
  18. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -0
  19. package/src/duckdb/extension/json/include/json_functions.hpp +1 -0
  20. package/src/duckdb/extension/json/include/json_scan.hpp +19 -5
  21. package/src/duckdb/extension/json/include/json_serializer.hpp +2 -1
  22. package/src/duckdb/extension/json/include/json_structure.hpp +12 -10
  23. package/src/duckdb/extension/json/json_common.cpp +1 -0
  24. package/src/duckdb/extension/json/json_deserializer.cpp +13 -0
  25. package/src/duckdb/extension/json/json_extension.cpp +3 -3
  26. package/src/duckdb/extension/json/json_functions/copy_json.cpp +8 -4
  27. package/src/duckdb/extension/json/json_functions/json_array_length.cpp +1 -1
  28. package/src/duckdb/extension/json/json_functions/json_contains.cpp +3 -3
  29. package/src/duckdb/extension/json/json_functions/json_create.cpp +53 -8
  30. package/src/duckdb/extension/json/json_functions/json_extract.cpp +10 -6
  31. package/src/duckdb/extension/json/json_functions/json_keys.cpp +1 -1
  32. package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +2 -3
  33. package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +210 -0
  34. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +22 -19
  35. package/src/duckdb/extension/json/json_functions/json_structure.cpp +71 -43
  36. package/src/duckdb/extension/json/json_functions/json_transform.cpp +105 -8
  37. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  38. package/src/duckdb/extension/json/json_functions/json_valid.cpp +1 -1
  39. package/src/duckdb/extension/json/json_functions/read_json.cpp +43 -18
  40. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +1 -1
  41. package/src/duckdb/extension/json/json_functions.cpp +9 -5
  42. package/src/duckdb/extension/json/json_scan.cpp +147 -125
  43. package/src/duckdb/extension/json/json_serializer.cpp +9 -0
  44. package/src/duckdb/extension/json/serialize_json.cpp +6 -0
  45. package/src/duckdb/extension/parquet/column_reader.cpp +53 -18
  46. package/src/duckdb/extension/parquet/column_writer.cpp +29 -6
  47. package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -1
  48. package/src/duckdb/extension/parquet/include/decode_utils.hpp +2 -2
  49. package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +87 -0
  50. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +4 -3
  51. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +16 -3
  52. package/src/duckdb/extension/parquet/include/parquet_metadata.hpp +10 -0
  53. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +34 -6
  54. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +3 -2
  55. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +2 -0
  56. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +21 -1
  57. package/src/duckdb/extension/parquet/parquet_crypto.cpp +370 -0
  58. package/src/duckdb/extension/parquet/parquet_extension.cpp +254 -24
  59. package/src/duckdb/extension/parquet/parquet_metadata.cpp +204 -16
  60. package/src/duckdb/extension/parquet/parquet_reader.cpp +108 -34
  61. package/src/duckdb/extension/parquet/parquet_statistics.cpp +75 -30
  62. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +15 -8
  63. package/src/duckdb/extension/parquet/parquet_writer.cpp +62 -10
  64. package/src/duckdb/extension/parquet/serialize_parquet.cpp +60 -0
  65. package/src/duckdb/src/catalog/catalog.cpp +23 -25
  66. package/src/duckdb/src/catalog/catalog_entry/column_dependency_manager.cpp +1 -0
  67. package/src/duckdb/src/catalog/catalog_entry/dependency/dependency_dependent_entry.cpp +31 -0
  68. package/src/duckdb/src/catalog/catalog_entry/dependency/dependency_entry.cpp +44 -0
  69. package/src/duckdb/src/catalog/catalog_entry/dependency/dependency_subject_entry.cpp +31 -0
  70. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +35 -10
  71. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +22 -6
  72. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +110 -33
  73. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +33 -17
  74. package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +16 -0
  75. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +7 -6
  76. package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +88 -14
  77. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +6 -15
  78. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +20 -20
  79. package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +4 -0
  80. package/src/duckdb/src/catalog/catalog_entry.cpp +29 -0
  81. package/src/duckdb/src/catalog/catalog_set.cpp +358 -343
  82. package/src/duckdb/src/catalog/catalog_transaction.cpp +4 -0
  83. package/src/duckdb/src/catalog/default/default_functions.cpp +13 -4
  84. package/src/duckdb/src/catalog/default/default_schemas.cpp +5 -1
  85. package/src/duckdb/src/catalog/default/default_views.cpp +6 -2
  86. package/src/duckdb/src/catalog/dependency_catalog_set.cpp +51 -0
  87. package/src/duckdb/src/catalog/dependency_manager.cpp +510 -114
  88. package/src/duckdb/src/catalog/duck_catalog.cpp +4 -4
  89. package/src/duckdb/src/common/adbc/adbc.cpp +73 -53
  90. package/src/duckdb/src/common/adbc/driver_manager.cpp +1101 -268
  91. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -9
  92. package/src/duckdb/src/common/bind_helpers.cpp +1 -0
  93. package/src/duckdb/src/common/box_renderer.cpp +52 -1
  94. package/src/duckdb/src/common/compressed_file_system.cpp +1 -0
  95. package/src/duckdb/src/common/constants.cpp +0 -1
  96. package/src/duckdb/src/common/enum_util.cpp +522 -107
  97. package/src/duckdb/src/common/enums/catalog_type.cpp +64 -1
  98. package/src/duckdb/src/common/enums/compression_type.cpp +14 -0
  99. package/src/duckdb/src/common/enums/date_part_specifier.cpp +1 -0
  100. package/src/duckdb/src/common/enums/expression_type.cpp +4 -0
  101. package/src/duckdb/src/common/enums/file_compression_type.cpp +1 -0
  102. package/src/duckdb/src/common/enums/join_type.cpp +33 -0
  103. package/src/duckdb/src/common/enums/logical_operator_type.cpp +5 -3
  104. package/src/duckdb/src/common/enums/optimizer_type.cpp +9 -1
  105. package/src/duckdb/src/common/enums/physical_operator_type.cpp +8 -4
  106. package/src/duckdb/src/common/enums/statement_type.cpp +2 -2
  107. package/src/duckdb/src/common/error_data.cpp +113 -0
  108. package/src/duckdb/src/common/exception/binder_exception.cpp +47 -0
  109. package/src/duckdb/src/common/exception/catalog_exception.cpp +55 -0
  110. package/src/duckdb/src/common/exception/parser_exception.cpp +19 -0
  111. package/src/duckdb/src/common/exception.cpp +110 -121
  112. package/src/duckdb/src/common/exception_format_value.cpp +9 -1
  113. package/src/duckdb/src/common/extra_type_info.cpp +48 -0
  114. package/src/duckdb/src/common/file_system.cpp +12 -7
  115. package/src/duckdb/src/common/gzip_file_system.cpp +18 -18
  116. package/src/duckdb/src/common/hive_partitioning.cpp +5 -1
  117. package/src/duckdb/src/common/http_state.cpp +20 -3
  118. package/src/duckdb/src/common/local_file_system.cpp +214 -15
  119. package/src/duckdb/src/common/multi_file_reader.cpp +20 -7
  120. package/src/duckdb/src/common/operator/cast_operators.cpp +397 -414
  121. package/src/duckdb/src/common/operator/convert_to_string.cpp +4 -0
  122. package/src/duckdb/src/common/operator/string_cast.cpp +5 -0
  123. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +61 -12
  124. package/src/duckdb/src/common/progress_bar/terminal_progress_bar_display.cpp +13 -4
  125. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  126. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +2 -1
  127. package/src/duckdb/src/common/row_operations/row_gather.cpp +7 -1
  128. package/src/duckdb/src/common/row_operations/row_heap_gather.cpp +78 -12
  129. package/src/duckdb/src/common/row_operations/row_heap_scatter.cpp +222 -61
  130. package/src/duckdb/src/common/row_operations/row_matcher.cpp +6 -1
  131. package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +51 -0
  132. package/src/duckdb/src/common/row_operations/row_scatter.cpp +8 -1
  133. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +6 -0
  134. package/src/duckdb/src/common/serializer/binary_serializer.cpp +5 -0
  135. package/src/duckdb/src/common/serializer/serializer.cpp +19 -0
  136. package/src/duckdb/src/common/sort/comparators.cpp +126 -0
  137. package/src/duckdb/src/common/sort/partition_state.cpp +17 -17
  138. package/src/duckdb/src/common/sort/radix_sort.cpp +2 -1
  139. package/src/duckdb/src/common/sort/sort_state.cpp +10 -5
  140. package/src/duckdb/src/common/sort/sorted_block.cpp +7 -6
  141. package/src/duckdb/src/common/string_util.cpp +302 -24
  142. package/src/duckdb/src/common/tree_renderer.cpp +8 -6
  143. package/src/duckdb/src/common/types/cast_helpers.cpp +6 -0
  144. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +1 -1
  145. package/src/duckdb/src/common/types/column/column_data_collection.cpp +58 -0
  146. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +8 -1
  147. package/src/duckdb/src/common/types/data_chunk.cpp +9 -0
  148. package/src/duckdb/src/common/types/date.cpp +2 -2
  149. package/src/duckdb/src/common/types/hash.cpp +9 -1
  150. package/src/duckdb/src/common/types/hugeint.cpp +229 -51
  151. package/src/duckdb/src/common/types/hyperloglog.cpp +10 -3
  152. package/src/duckdb/src/common/types/interval.cpp +67 -12
  153. package/src/duckdb/src/common/types/list_segment.cpp +98 -4
  154. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +11 -1
  155. package/src/duckdb/src/common/types/row/row_data_collection.cpp +1 -1
  156. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +2 -2
  157. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -2
  158. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +63 -3
  159. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +331 -127
  160. package/src/duckdb/src/common/types/time.cpp +47 -75
  161. package/src/duckdb/src/common/types/timestamp.cpp +16 -3
  162. package/src/duckdb/src/common/types/uhugeint.cpp +746 -0
  163. package/src/duckdb/src/common/types/validity_mask.cpp +6 -2
  164. package/src/duckdb/src/common/types/value.cpp +183 -27
  165. package/src/duckdb/src/common/types/vector.cpp +331 -30
  166. package/src/duckdb/src/common/types/vector_buffer.cpp +29 -1
  167. package/src/duckdb/src/common/types/vector_cache.cpp +22 -1
  168. package/src/duckdb/src/common/types.cpp +606 -90
  169. package/src/duckdb/src/common/value_operations/comparison_operations.cpp +21 -1
  170. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +5 -0
  171. package/src/duckdb/src/common/vector_operations/generators.cpp +2 -2
  172. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +131 -2
  173. package/src/duckdb/src/common/vector_operations/vector_copy.cpp +26 -4
  174. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +41 -0
  175. package/src/duckdb/src/common/vector_operations/vector_storage.cpp +7 -0
  176. package/src/duckdb/src/common/virtual_file_system.cpp +0 -1
  177. package/src/duckdb/src/core_functions/aggregate/distributive/approx_count.cpp +2 -1
  178. package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +144 -56
  179. package/src/duckdb/src/core_functions/aggregate/distributive/bitagg.cpp +2 -0
  180. package/src/duckdb/src/core_functions/aggregate/distributive/bitstring_agg.cpp +27 -0
  181. package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +4 -3
  182. package/src/duckdb/src/core_functions/aggregate/distributive/kurtosis.cpp +25 -5
  183. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +100 -3
  184. package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +2 -1
  185. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +9 -1
  186. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +83 -52
  187. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +485 -289
  188. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +3 -3
  189. package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +24 -26
  190. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +34 -37
  191. package/src/duckdb/src/core_functions/function_list.cpp +30 -1
  192. package/src/duckdb/src/core_functions/lambda_functions.cpp +416 -0
  193. package/src/duckdb/src/core_functions/scalar/array/array_functions.cpp +294 -0
  194. package/src/duckdb/src/core_functions/scalar/array/array_value.cpp +87 -0
  195. package/src/duckdb/src/core_functions/scalar/blob/create_sort_key.cpp +686 -0
  196. package/src/duckdb/src/core_functions/scalar/blob/encode.cpp +1 -0
  197. package/src/duckdb/src/core_functions/scalar/date/current.cpp +3 -3
  198. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +295 -20
  199. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +1 -0
  200. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +8 -7
  201. package/src/duckdb/src/core_functions/scalar/date/to_interval.cpp +84 -23
  202. package/src/duckdb/src/core_functions/scalar/generic/error.cpp +4 -4
  203. package/src/duckdb/src/core_functions/scalar/generic/least.cpp +7 -8
  204. package/src/duckdb/src/core_functions/scalar/generic/stats.cpp +1 -1
  205. package/src/duckdb/src/core_functions/scalar/generic/system_functions.cpp +17 -6
  206. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +8 -0
  207. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +28 -14
  208. package/src/duckdb/src/core_functions/scalar/list/list_filter.cpp +49 -0
  209. package/src/duckdb/src/core_functions/scalar/list/list_reduce.cpp +230 -0
  210. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +85 -16
  211. package/src/duckdb/src/core_functions/scalar/list/list_transform.cpp +41 -0
  212. package/src/duckdb/src/core_functions/scalar/list/list_value.cpp +21 -2
  213. package/src/duckdb/src/core_functions/scalar/map/map.cpp +6 -5
  214. package/src/duckdb/src/core_functions/scalar/map/map_entries.cpp +2 -2
  215. package/src/duckdb/src/core_functions/scalar/map/map_from_entries.cpp +1 -2
  216. package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +24 -4
  217. package/src/duckdb/src/core_functions/scalar/operators/bitwise.cpp +6 -0
  218. package/src/duckdb/src/core_functions/scalar/random/random.cpp +2 -2
  219. package/src/duckdb/src/core_functions/scalar/random/setseed.cpp +2 -2
  220. package/src/duckdb/src/core_functions/scalar/secret/which_secret.cpp +28 -0
  221. package/src/duckdb/src/core_functions/scalar/string/bar.cpp +9 -4
  222. package/src/duckdb/src/core_functions/scalar/string/format_bytes.cpp +7 -2
  223. package/src/duckdb/src/core_functions/scalar/string/hex.cpp +63 -4
  224. package/src/duckdb/src/core_functions/scalar/string/pad.cpp +2 -2
  225. package/src/duckdb/src/core_functions/scalar/string/parse_path.cpp +348 -0
  226. package/src/duckdb/src/core_functions/scalar/string/regexp_escape.cpp +22 -0
  227. package/src/duckdb/src/core_functions/scalar/string/string_split.cpp +6 -5
  228. package/src/duckdb/src/core_functions/scalar/struct/struct_insert.cpp +3 -3
  229. package/src/duckdb/src/core_functions/scalar/struct/struct_pack.cpp +1 -1
  230. package/src/duckdb/src/execution/aggregate_hashtable.cpp +9 -2
  231. package/src/duckdb/src/execution/column_binding_resolver.cpp +44 -10
  232. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +4 -0
  233. package/src/duckdb/src/execution/expression_executor/execute_case.cpp +4 -0
  234. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +4 -0
  235. package/src/duckdb/src/execution/expression_executor.cpp +2 -1
  236. package/src/duckdb/src/execution/index/art/art.cpp +202 -53
  237. package/src/duckdb/src/execution/index/art/art_key.cpp +20 -27
  238. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +52 -17
  239. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +14 -8
  240. package/src/duckdb/src/execution/index/index_type_set.cpp +32 -0
  241. package/src/duckdb/src/execution/index/unknown_index.cpp +65 -0
  242. package/src/duckdb/src/execution/join_hashtable.cpp +151 -174
  243. package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_inner.cpp +4 -0
  244. package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_mark.cpp +4 -0
  245. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +2 -1
  246. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +82 -36
  247. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +58 -32
  248. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +35 -19
  249. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +90 -0
  250. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +124 -0
  251. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +97 -0
  252. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +71 -0
  253. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +98 -0
  254. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +105 -0
  255. package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +63 -0
  256. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +1091 -0
  257. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +124 -26
  258. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +117 -129
  259. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +46 -22
  260. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +83 -199
  261. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +21 -122
  262. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +18 -17
  263. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp +22 -0
  264. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +201 -0
  265. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +221 -0
  266. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +204 -0
  267. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +186 -0
  268. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +532 -0
  269. package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +85 -0
  270. package/src/duckdb/src/execution/operator/helper/physical_create_secret.cpp +21 -0
  271. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +1 -1
  272. package/src/duckdb/src/execution/operator/helper/physical_pragma.cpp +2 -2
  273. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +34 -9
  274. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +10 -0
  275. package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +1 -0
  276. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +25 -10
  277. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +7 -8
  278. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
  279. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +5 -2
  280. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  281. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +5 -127
  282. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +221 -61
  283. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +18 -21
  284. package/src/duckdb/src/execution/operator/join/physical_join.cpp +10 -5
  285. package/src/duckdb/src/execution/operator/join/physical_left_delim_join.cpp +137 -0
  286. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +11 -4
  287. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +2 -2
  288. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +19 -4
  289. package/src/duckdb/src/execution/operator/join/physical_right_delim_join.cpp +121 -0
  290. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -2
  291. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +59 -0
  292. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +132 -92
  293. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +54 -54
  294. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  295. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +4 -0
  296. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +21 -3
  297. package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +1 -0
  298. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +61 -43
  299. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +15 -13
  300. package/src/duckdb/src/execution/operator/schema/physical_create_schema.cpp +1 -0
  301. package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +10 -0
  302. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +44 -90
  303. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -0
  304. package/src/duckdb/src/execution/operator/set/physical_union.cpp +8 -4
  305. package/src/duckdb/src/execution/physical_operator.cpp +3 -1
  306. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +30 -143
  307. package/src/duckdb/src/execution/physical_plan/plan_copy_database.cpp +12 -0
  308. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +11 -4
  309. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +8 -8
  310. package/src/duckdb/src/execution/physical_plan/plan_create_secret.cpp +11 -0
  311. package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +9 -8
  312. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +5 -3
  313. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +15 -6
  314. package/src/duckdb/src/execution/physical_plan/plan_positional_join.cpp +1 -0
  315. package/src/duckdb/src/execution/physical_plan/plan_pragma.cpp +1 -1
  316. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -1
  317. package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +90 -12
  318. package/src/duckdb/src/execution/physical_plan/plan_window.cpp +67 -22
  319. package/src/duckdb/src/execution/physical_plan_generator.cpp +6 -3
  320. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +163 -32
  321. package/src/duckdb/src/execution/reservoir_sample.cpp +112 -32
  322. package/src/duckdb/src/execution/window_executor.cpp +291 -26
  323. package/src/duckdb/src/execution/window_segment_tree.cpp +958 -114
  324. package/src/duckdb/src/function/aggregate/distributive/count.cpp +18 -16
  325. package/src/duckdb/src/function/aggregate/distributive/first.cpp +11 -4
  326. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +303 -131
  327. package/src/duckdb/src/function/cast/array_casts.cpp +226 -0
  328. package/src/duckdb/src/function/cast/bit_cast.cpp +2 -0
  329. package/src/duckdb/src/function/cast/cast_function_set.cpp +13 -2
  330. package/src/duckdb/src/function/cast/decimal_cast.cpp +2 -0
  331. package/src/duckdb/src/function/cast/default_casts.cpp +4 -1
  332. package/src/duckdb/src/function/cast/list_casts.cpp +151 -6
  333. package/src/duckdb/src/function/cast/numeric_casts.cpp +4 -0
  334. package/src/duckdb/src/function/cast/string_cast.cpp +95 -5
  335. package/src/duckdb/src/function/cast/struct_cast.cpp +53 -19
  336. package/src/duckdb/src/function/cast/time_casts.cpp +23 -1
  337. package/src/duckdb/src/function/cast/union/from_struct.cpp +1 -0
  338. package/src/duckdb/src/function/cast/union_casts.cpp +4 -3
  339. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -4
  340. package/src/duckdb/src/function/cast_rules.cpp +197 -31
  341. package/src/duckdb/src/function/compression_config.cpp +4 -0
  342. package/src/duckdb/src/function/function.cpp +15 -9
  343. package/src/duckdb/src/function/function_binder.cpp +80 -29
  344. package/src/duckdb/src/function/function_set.cpp +6 -6
  345. package/src/duckdb/src/function/pragma/pragma_functions.cpp +10 -8
  346. package/src/duckdb/src/function/pragma/pragma_queries.cpp +34 -38
  347. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +12 -0
  348. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +10 -1
  349. package/src/duckdb/src/function/scalar/list/list_concat.cpp +11 -1
  350. package/src/duckdb/src/function/scalar/list/list_extract.cpp +14 -3
  351. package/src/duckdb/src/function/scalar/list/list_resize.cpp +4 -0
  352. package/src/duckdb/src/function/scalar/list/list_select.cpp +176 -0
  353. package/src/duckdb/src/function/scalar/list/list_zip.cpp +165 -0
  354. package/src/duckdb/src/function/scalar/nested_functions.cpp +33 -0
  355. package/src/duckdb/src/function/scalar/operators/add.cpp +53 -6
  356. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +48 -14
  357. package/src/duckdb/src/function/scalar/operators/multiply.cpp +9 -1
  358. package/src/duckdb/src/function/scalar/operators/subtract.cpp +19 -4
  359. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +28 -55
  360. package/src/duckdb/src/function/scalar/strftime_format.cpp +242 -19
  361. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -2
  362. package/src/duckdb/src/function/scalar/string/concat.cpp +16 -6
  363. package/src/duckdb/src/function/scalar/string/length.cpp +124 -24
  364. package/src/duckdb/src/function/scalar/string/regexp.cpp +27 -27
  365. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +64 -15
  366. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +4 -3
  367. package/src/duckdb/src/function/scalar_function.cpp +8 -7
  368. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +12 -0
  369. package/src/duckdb/src/function/table/arrow.cpp +20 -27
  370. package/src/duckdb/src/function/table/arrow_conversion.cpp +433 -150
  371. package/src/duckdb/src/function/table/copy_csv.cpp +62 -62
  372. package/src/duckdb/src/function/table/range.cpp +6 -3
  373. package/src/duckdb/src/function/table/read_csv.cpp +107 -759
  374. package/src/duckdb/src/function/table/read_file.cpp +242 -0
  375. package/src/duckdb/src/function/table/sniff_csv.cpp +275 -0
  376. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +15 -3
  377. package/src/duckdb/src/function/table/system/duckdb_databases.cpp +5 -0
  378. package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +9 -13
  379. package/src/duckdb/src/function/table/system/duckdb_functions.cpp +67 -14
  380. package/src/duckdb/src/function/table/system/duckdb_indexes.cpp +12 -15
  381. package/src/duckdb/src/function/table/system/duckdb_memory.cpp +63 -0
  382. package/src/duckdb/src/function/table/system/duckdb_optimizers.cpp +57 -0
  383. package/src/duckdb/src/function/table/system/duckdb_schemas.cpp +5 -0
  384. package/src/duckdb/src/function/table/system/duckdb_secrets.cpp +128 -0
  385. package/src/duckdb/src/function/table/system/duckdb_sequences.cpp +12 -6
  386. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +0 -1
  387. package/src/duckdb/src/function/table/system/duckdb_tables.cpp +5 -0
  388. package/src/duckdb/src/function/table/system/duckdb_types.cpp +6 -0
  389. package/src/duckdb/src/function/table/system/duckdb_views.cpp +5 -0
  390. package/src/duckdb/src/function/table/system/pragma_table_info.cpp +166 -64
  391. package/src/duckdb/src/function/table/system/test_all_types.cpp +2 -1
  392. package/src/duckdb/src/function/table/system_functions.cpp +3 -2
  393. package/src/duckdb/src/function/table/table_scan.cpp +50 -110
  394. package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -44
  395. package/src/duckdb/src/function/table_function.cpp +2 -2
  396. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +4 -3
  397. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/dependency/dependency_dependent_entry.hpp +27 -0
  398. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/dependency/dependency_entry.hpp +66 -0
  399. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/dependency/dependency_subject_entry.hpp +27 -0
  400. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +25 -5
  401. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +3 -1
  402. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +2 -1
  403. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +19 -5
  404. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/list.hpp +1 -0
  405. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp +2 -0
  406. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +1 -1
  407. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp +26 -11
  408. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +1 -0
  409. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp +2 -0
  410. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +1 -0
  411. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/view_catalog_entry.hpp +2 -0
  412. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +15 -0
  413. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +41 -49
  414. package/src/duckdb/src/include/duckdb/catalog/catalog_transaction.hpp +1 -0
  415. package/src/duckdb/src/include/duckdb/catalog/default/builtin_types/types.hpp +3 -1
  416. package/src/duckdb/src/include/duckdb/catalog/dependency.hpp +120 -8
  417. package/src/duckdb/src/include/duckdb/catalog/dependency_catalog_set.hpp +32 -0
  418. package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +2 -0
  419. package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +92 -12
  420. package/src/duckdb/src/include/duckdb/catalog/standard_entry.hpp +1 -1
  421. package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +4 -2
  422. package/src/duckdb/src/include/duckdb/common/adbc/adbc.h +1153 -12
  423. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +6 -6
  424. package/src/duckdb/src/include/duckdb/common/adbc/driver_manager.h +0 -2
  425. package/src/duckdb/src/include/duckdb/common/adbc/options.h +64 -0
  426. package/src/duckdb/src/include/duckdb/common/adbc/single_batch_array_stream.hpp +8 -0
  427. package/src/duckdb/src/include/duckdb/common/arrow/arrow.hpp +25 -6
  428. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +3 -3
  429. package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +1 -1
  430. package/src/duckdb/src/include/duckdb/common/bit_utils.hpp +30 -0
  431. package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +6 -6
  432. package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +10 -0
  433. package/src/duckdb/src/include/duckdb/common/constants.hpp +1 -0
  434. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +75 -19
  435. package/src/duckdb/src/include/duckdb/common/enums/catalog_type.hpp +11 -1
  436. package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +3 -0
  437. package/src/duckdb/src/include/duckdb/common/enums/expression_type.hpp +2 -0
  438. package/src/duckdb/src/include/duckdb/common/enums/index_constraint_type.hpp +35 -0
  439. package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +16 -3
  440. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +6 -5
  441. package/src/duckdb/src/include/duckdb/common/enums/memory_tag.hpp +32 -0
  442. package/src/duckdb/src/include/duckdb/common/enums/on_create_conflict.hpp +26 -0
  443. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  444. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +7 -1
  445. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +9 -3
  446. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +6 -5
  447. package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +3 -2
  448. package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +1 -0
  449. package/src/duckdb/src/include/duckdb/common/error_data.hpp +72 -0
  450. package/src/duckdb/src/include/duckdb/common/exception/binder_exception.hpp +47 -0
  451. package/src/duckdb/src/include/duckdb/common/exception/catalog_exception.hpp +39 -0
  452. package/src/duckdb/src/include/duckdb/common/exception/conversion_exception.hpp +27 -0
  453. package/src/duckdb/src/include/duckdb/common/exception/http_exception.hpp +62 -0
  454. package/src/duckdb/src/include/duckdb/common/exception/list.hpp +6 -0
  455. package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +29 -0
  456. package/src/duckdb/src/include/duckdb/common/exception/transaction_exception.hpp +25 -0
  457. package/src/duckdb/src/include/duckdb/common/exception.hpp +52 -166
  458. package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +7 -4
  459. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +53 -1
  460. package/src/duckdb/src/include/duckdb/common/helper.hpp +13 -3
  461. package/src/duckdb/src/include/duckdb/common/http_state.hpp +18 -4
  462. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +5 -1
  463. package/src/duckdb/src/include/duckdb/common/limits.hpp +19 -1
  464. package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +1 -0
  465. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +7 -2
  466. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +16 -3
  467. package/src/duckdb/src/include/duckdb/common/operator/add.hpp +13 -2
  468. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +114 -5
  469. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +0 -4
  470. package/src/duckdb/src/include/duckdb/common/operator/convert_to_string.hpp +2 -0
  471. package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +29 -0
  472. package/src/duckdb/src/include/duckdb/common/operator/double_cast_operator.hpp +52 -0
  473. package/src/duckdb/src/include/duckdb/common/operator/integer_cast_operator.hpp +459 -0
  474. package/src/duckdb/src/include/duckdb/common/operator/multiply.hpp +2 -0
  475. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +136 -0
  476. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +2 -0
  477. package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +7 -1
  478. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +7 -5
  479. package/src/duckdb/src/include/duckdb/common/platform.h +53 -0
  480. package/src/duckdb/src/include/duckdb/common/progress_bar/display/terminal_progress_bar_display.hpp +5 -5
  481. package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +22 -6
  482. package/src/duckdb/src/include/duckdb/common/radix.hpp +6 -0
  483. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +20 -6
  484. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +1 -0
  485. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +1 -0
  486. package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +43 -4
  487. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +1 -0
  488. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +15 -0
  489. package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +2 -0
  490. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +12 -2
  491. package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +81 -0
  492. package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +0 -78
  493. package/src/duckdb/src/include/duckdb/common/string_util.hpp +23 -1
  494. package/src/duckdb/src/include/duckdb/common/type_util.hpp +5 -1
  495. package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +26 -3
  496. package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +8 -0
  497. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +3 -0
  498. package/src/duckdb/src/include/duckdb/common/types/date.hpp +3 -0
  499. package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +5 -3
  500. package/src/duckdb/src/include/duckdb/common/types/hash.hpp +2 -0
  501. package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +81 -15
  502. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +57 -29
  503. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +1 -1
  504. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +4 -2
  505. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +2 -2
  506. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +26 -22
  507. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +7 -0
  508. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +7 -5
  509. package/src/duckdb/src/include/duckdb/common/types/time.hpp +6 -2
  510. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +2 -0
  511. package/src/duckdb/src/include/duckdb/common/types/uhugeint.hpp +216 -0
  512. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +18 -10
  513. package/src/duckdb/src/include/duckdb/common/types/value.hpp +31 -0
  514. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +18 -2
  515. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +22 -1
  516. package/src/duckdb/src/include/duckdb/common/types.hpp +151 -49
  517. package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +81 -0
  518. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +70 -6
  519. package/src/duckdb/src/include/duckdb/common/vector_size.hpp +6 -6
  520. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +29 -2
  521. package/src/duckdb/src/include/duckdb/core_functions/lambda_functions.hpp +131 -0
  522. package/src/duckdb/src/include/duckdb/core_functions/scalar/array_functions.hpp +69 -0
  523. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +9 -0
  524. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +40 -4
  525. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +42 -0
  526. package/src/duckdb/src/include/duckdb/core_functions/scalar/secret_functions.hpp +27 -0
  527. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +56 -2
  528. package/src/duckdb/src/include/duckdb/core_functions/to_interval.hpp +29 -0
  529. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +2 -2
  530. package/src/duckdb/src/include/duckdb/execution/executor.hpp +11 -13
  531. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +34 -19
  532. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +18 -14
  533. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +2 -1
  534. package/src/duckdb/src/include/duckdb/execution/index/index_type.hpp +57 -0
  535. package/src/duckdb/src/include/duckdb/execution/index/index_type_set.hpp +29 -0
  536. package/src/duckdb/src/include/duckdb/execution/index/unknown_index.hpp +65 -0
  537. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +35 -24
  538. package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +630 -0
  539. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +2 -0
  540. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +3 -2
  541. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer.hpp +103 -0
  542. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.hpp +74 -0
  543. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_file_handle.hpp +60 -0
  544. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_option.hpp +155 -0
  545. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_reader_options.hpp +163 -0
  546. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/state_machine_options.hpp +35 -0
  547. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/base_scanner.hpp +228 -0
  548. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/column_count_scanner.hpp +70 -0
  549. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/scanner_boundary.hpp +93 -0
  550. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/skip_scanner.hpp +60 -0
  551. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/string_value_scanner.hpp +197 -0
  552. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +189 -0
  553. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/quote_rules.hpp +21 -0
  554. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state.hpp +30 -0
  555. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine.hpp +99 -0
  556. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.hpp +87 -0
  557. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/csv_file_scanner.hpp +70 -0
  558. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/global_csv_state.hpp +80 -0
  559. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_casting.hpp +137 -0
  560. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_error.hpp +104 -0
  561. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +0 -4
  562. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_collector.hpp +37 -0
  563. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_create_secret.hpp +38 -0
  564. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_pragma.hpp +4 -7
  565. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +6 -2
  566. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -2
  567. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +5 -18
  568. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +14 -5
  569. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -2
  570. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_join.hpp +2 -1
  571. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_left_delim_join.hpp +37 -0
  572. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +1 -2
  573. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  574. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +1 -1
  575. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_right_delim_join.hpp +37 -0
  576. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +0 -1
  577. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +0 -1
  578. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_database.hpp +35 -0
  579. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +8 -3
  580. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +14 -0
  581. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -4
  582. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_expression_scan.hpp +0 -1
  583. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +17 -12
  584. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +4 -0
  585. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_union.hpp +3 -1
  586. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +2 -1
  587. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +6 -2
  588. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +3 -1
  589. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +32 -18
  590. package/src/duckdb/src/include/duckdb/execution/task_error_manager.hpp +57 -0
  591. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +2 -0
  592. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +101 -19
  593. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +46 -14
  594. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +6 -2
  595. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  596. package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +26 -1
  597. package/src/duckdb/src/include/duckdb/function/cast/cast_function_set.hpp +3 -0
  598. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +1 -0
  599. package/src/duckdb/src/include/duckdb/function/cast/vector_cast_helpers.hpp +7 -1
  600. package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +10 -0
  601. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +16 -4
  602. package/src/duckdb/src/include/duckdb/function/function.hpp +12 -7
  603. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +15 -12
  604. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +4 -3
  605. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +3 -3
  606. package/src/duckdb/src/include/duckdb/function/pragma/pragma_functions.hpp +4 -1
  607. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +3 -0
  608. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +21 -1
  609. package/src/duckdb/src/include/duckdb/function/scalar/sequence_functions.hpp +22 -0
  610. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +16 -2
  611. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +22 -8
  612. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +6 -0
  613. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +35 -3
  614. package/src/duckdb/src/include/duckdb/function/table/list.hpp +1 -0
  615. package/src/duckdb/src/include/duckdb/function/table/range.hpp +12 -0
  616. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +14 -16
  617. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +12 -8
  618. package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +0 -2
  619. package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -3
  620. package/src/duckdb/src/include/duckdb/main/appender.hpp +3 -1
  621. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +3 -2
  622. package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +89 -0
  623. package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +53 -0
  624. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -1
  625. package/src/duckdb/src/include/duckdb/main/capi/cast/generic.hpp +2 -0
  626. package/src/duckdb/src/include/duckdb/main/chunk_scan_state/query_result.hpp +4 -4
  627. package/src/duckdb/src/include/duckdb/main/chunk_scan_state.hpp +3 -3
  628. package/src/duckdb/src/include/duckdb/main/client_config.hpp +8 -10
  629. package/src/duckdb/src/include/duckdb/main/client_context.hpp +22 -23
  630. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +37 -0
  631. package/src/duckdb/src/include/duckdb/main/client_data.hpp +1 -7
  632. package/src/duckdb/src/include/duckdb/main/config.hpp +24 -7
  633. package/src/duckdb/src/include/duckdb/main/connection.hpp +8 -1
  634. package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +17 -26
  635. package/src/duckdb/src/include/duckdb/main/database.hpp +4 -2
  636. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +39 -7
  637. package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +5 -4
  638. package/src/duckdb/src/include/duckdb/main/error_manager.hpp +4 -1
  639. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +203 -197
  640. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +42 -16
  641. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +8 -1
  642. package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
  643. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -2
  644. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +4 -4
  645. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +2 -0
  646. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +0 -32
  647. package/src/duckdb/src/include/duckdb/main/query_result.hpp +13 -12
  648. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
  649. package/src/duckdb/src/include/duckdb/main/relation/setop_relation.hpp +3 -1
  650. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +206 -0
  651. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +211 -0
  652. package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +164 -0
  653. package/src/duckdb/src/include/duckdb/main/settings.hpp +52 -13
  654. package/src/duckdb/src/include/duckdb/main/stream_query_result.hpp +9 -1
  655. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +4 -1
  656. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
  657. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +1 -1
  658. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +2 -0
  659. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +3 -0
  660. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +1 -0
  661. package/src/duckdb/src/include/duckdb/parallel/event.hpp +2 -2
  662. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +14 -22
  663. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +20 -0
  664. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +3 -5
  665. package/src/duckdb/src/include/duckdb/parallel/task.hpp +7 -0
  666. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +10 -4
  667. package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +3 -0
  668. package/src/duckdb/src/include/duckdb/parser/column_definition.hpp +8 -1
  669. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +11 -4
  670. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +16 -9
  671. package/src/duckdb/src/include/duckdb/parser/expression/lambdaref_expression.hpp +47 -0
  672. package/src/duckdb/src/include/duckdb/parser/expression/list.hpp +1 -0
  673. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +29 -2
  674. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +2 -1
  675. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +47 -2
  676. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +3 -0
  677. package/src/duckdb/src/include/duckdb/parser/parsed_data/bound_pragma_info.hpp +29 -0
  678. package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_info.hpp +45 -0
  679. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +21 -22
  680. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +5 -11
  681. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_schema_info.hpp +23 -0
  682. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_secret_info.hpp +47 -0
  683. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_info.hpp +3 -1
  684. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +2 -0
  685. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +4 -0
  686. package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +6 -1
  687. package/src/duckdb/src/include/duckdb/parser/parsed_data/extra_drop_info.hpp +66 -0
  688. package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +3 -1
  689. package/src/duckdb/src/include/duckdb/parser/parsed_data/pragma_info.hpp +8 -4
  690. package/src/duckdb/src/include/duckdb/parser/parsed_expression.hpp +1 -3
  691. package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +2 -0
  692. package/src/duckdb/src/include/duckdb/parser/qualified_name.hpp +1 -1
  693. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +5 -22
  694. package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +2 -0
  695. package/src/duckdb/src/include/duckdb/parser/statement/copy_database_statement.hpp +40 -0
  696. package/src/duckdb/src/include/duckdb/parser/statement/list.hpp +1 -1
  697. package/src/duckdb/src/include/duckdb/parser/statement/set_statement.hpp +4 -3
  698. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +2 -2
  699. package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
  700. package/src/duckdb/src/include/duckdb/parser/tableref/showref.hpp +47 -0
  701. package/src/duckdb/src/include/duckdb/parser/tableref.hpp +2 -1
  702. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -1
  703. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +20 -3
  704. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +5 -1
  705. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -27
  706. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +4 -0
  707. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +4 -0
  708. package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +4 -1
  709. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +2 -1
  710. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +4 -2
  711. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +5 -6
  712. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +11 -0
  713. package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -1
  714. package/src/duckdb/src/include/duckdb/planner/expression_binder/column_alias_binder.hpp +2 -2
  715. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +3 -2
  716. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +2 -1
  717. package/src/duckdb/src/include/duckdb/planner/expression_binder/table_function_binder.hpp +2 -1
  718. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +65 -22
  719. package/src/duckdb/src/include/duckdb/planner/filter/struct_filter.hpp +41 -0
  720. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +2 -1
  721. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  722. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +2 -0
  723. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_database.hpp +45 -0
  724. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -0
  725. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_secret.hpp +43 -0
  726. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +3 -2
  727. package/src/duckdb/src/include/duckdb/planner/operator/logical_pragma.hpp +5 -10
  728. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +2 -0
  729. package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +11 -4
  730. package/src/duckdb/src/include/duckdb/planner/parsed_data/bound_create_table_info.hpp +3 -3
  731. package/src/duckdb/src/include/duckdb/planner/pragma_handler.hpp +1 -1
  732. package/src/duckdb/src/include/duckdb/planner/query_node/bound_set_operation_node.hpp +2 -0
  733. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +3 -0
  734. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_cte_scan.hpp +29 -0
  735. package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +9 -7
  736. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +2 -1
  737. package/src/duckdb/src/include/duckdb/planner/tableref/bound_dummytableref.hpp +3 -2
  738. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +9 -5
  739. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +23 -6
  740. package/src/duckdb/src/include/duckdb/storage/buffer/temporary_file_information.hpp +7 -0
  741. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +11 -4
  742. package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +0 -3
  743. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +1 -1
  744. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +7 -7
  745. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +1 -1
  746. package/src/duckdb/src/include/duckdb/storage/compression/alp/algorithm/alp.hpp +408 -0
  747. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +173 -0
  748. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +283 -0
  749. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +134 -0
  750. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_fetch.hpp +42 -0
  751. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_scan.hpp +244 -0
  752. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_utils.hpp +103 -0
  753. package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +235 -0
  754. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +134 -0
  755. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +301 -0
  756. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_constants.hpp +35 -0
  757. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_fetch.hpp +41 -0
  758. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp +252 -0
  759. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_analyze.hpp +7 -103
  760. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +5 -234
  761. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +2 -2
  762. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_analyze.hpp +7 -107
  763. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +5 -184
  764. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +12 -17
  765. package/src/duckdb/src/include/duckdb/storage/database_size.hpp +1 -0
  766. package/src/duckdb/src/include/duckdb/storage/index.hpp +40 -42
  767. package/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +77 -0
  768. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +6 -2
  769. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +0 -1
  770. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +12 -6
  771. package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +8 -3
  772. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +17 -17
  773. package/src/duckdb/src/include/duckdb/storage/statistics/array_stats.hpp +40 -0
  774. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +12 -3
  775. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +3 -0
  776. package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +3 -2
  777. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +49 -24
  778. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +8 -4
  779. package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +67 -0
  780. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +3 -3
  781. package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +1 -0
  782. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -2
  783. package/src/duckdb/src/include/duckdb/storage/table/data_table_info.hpp +8 -3
  784. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
  785. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -2
  786. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +7 -0
  787. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +12 -1
  788. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +1 -1
  789. package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +1 -1
  790. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +16 -12
  791. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +4 -0
  792. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +6 -9
  793. package/src/duckdb/src/include/duckdb/storage/temporary_memory_manager.hpp +119 -0
  794. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +13 -55
  795. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +1 -1
  796. package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +4 -5
  797. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -2
  798. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +6 -3
  799. package/src/duckdb/src/include/duckdb/transaction/transaction_context.hpp +4 -4
  800. package/src/duckdb/src/include/duckdb/transaction/transaction_manager.hpp +3 -3
  801. package/src/duckdb/src/include/duckdb/verification/fetch_row_verifier.hpp +25 -0
  802. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +5 -0
  803. package/src/duckdb/src/include/duckdb.h +571 -143
  804. package/src/duckdb/src/main/appender.cpp +17 -2
  805. package/src/duckdb/src/main/attached_database.cpp +24 -12
  806. package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +96 -0
  807. package/src/duckdb/src/main/capi/appender-c.cpp +42 -3
  808. package/src/duckdb/src/main/capi/arrow-c.cpp +32 -9
  809. package/src/duckdb/src/main/capi/datetime-c.cpp +22 -0
  810. package/src/duckdb/src/main/capi/duckdb-c.cpp +14 -4
  811. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +66 -2
  812. package/src/duckdb/src/main/capi/helper-c.cpp +76 -2
  813. package/src/duckdb/src/main/capi/hugeint-c.cpp +23 -0
  814. package/src/duckdb/src/main/capi/logical_types-c.cpp +27 -3
  815. package/src/duckdb/src/main/capi/pending-c.cpp +43 -9
  816. package/src/duckdb/src/main/capi/prepared-c.cpp +38 -2
  817. package/src/duckdb/src/main/capi/result-c.cpp +54 -3
  818. package/src/duckdb/src/main/capi/table_function-c.cpp +4 -4
  819. package/src/duckdb/src/main/capi/value-c.cpp +10 -0
  820. package/src/duckdb/src/main/chunk_scan_state/query_result.cpp +3 -3
  821. package/src/duckdb/src/main/client_context.cpp +259 -250
  822. package/src/duckdb/src/main/client_data.cpp +0 -1
  823. package/src/duckdb/src/main/client_verify.cpp +26 -8
  824. package/src/duckdb/src/main/config.cpp +34 -13
  825. package/src/duckdb/src/main/connection.cpp +27 -6
  826. package/src/duckdb/src/main/connection_manager.cpp +54 -0
  827. package/src/duckdb/src/main/database.cpp +44 -39
  828. package/src/duckdb/src/main/database_manager.cpp +106 -8
  829. package/src/duckdb/src/main/database_path_and_type.cpp +27 -8
  830. package/src/duckdb/src/main/db_instance_cache.cpp +4 -4
  831. package/src/duckdb/src/main/error_manager.cpp +12 -3
  832. package/src/duckdb/src/main/extension/extension_alias.cpp +2 -2
  833. package/src/duckdb/src/main/extension/extension_helper.cpp +15 -16
  834. package/src/duckdb/src/main/extension/extension_install.cpp +33 -24
  835. package/src/duckdb/src/main/extension/extension_load.cpp +22 -21
  836. package/src/duckdb/src/main/extension/extension_util.cpp +12 -0
  837. package/src/duckdb/src/main/materialized_query_result.cpp +1 -1
  838. package/src/duckdb/src/main/pending_query_result.cpp +25 -8
  839. package/src/duckdb/src/main/prepared_statement.cpp +5 -5
  840. package/src/duckdb/src/main/prepared_statement_data.cpp +8 -1
  841. package/src/duckdb/src/main/query_profiler.cpp +11 -11
  842. package/src/duckdb/src/main/query_result.cpp +32 -6
  843. package/src/duckdb/src/main/relation/cross_product_relation.cpp +1 -1
  844. package/src/duckdb/src/main/relation/join_relation.cpp +2 -2
  845. package/src/duckdb/src/main/relation/read_csv_relation.cpp +38 -32
  846. package/src/duckdb/src/main/relation/setop_relation.cpp +5 -3
  847. package/src/duckdb/src/main/relation.cpp +5 -5
  848. package/src/duckdb/src/main/secret/secret.cpp +135 -0
  849. package/src/duckdb/src/main/secret/secret_manager.cpp +634 -0
  850. package/src/duckdb/src/main/secret/secret_storage.cpp +233 -0
  851. package/src/duckdb/src/main/settings/settings.cpp +133 -38
  852. package/src/duckdb/src/main/stream_query_result.cpp +53 -14
  853. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +6 -0
  854. package/src/duckdb/src/optimizer/cse_optimizer.cpp +1 -1
  855. package/src/duckdb/src/optimizer/deliminator.cpp +136 -14
  856. package/src/duckdb/src/optimizer/filter_combiner.cpp +72 -26
  857. package/src/duckdb/src/optimizer/filter_pushdown.cpp +3 -0
  858. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +2 -1
  859. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +0 -1
  860. package/src/duckdb/src/optimizer/join_order/join_node.cpp +4 -0
  861. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +3 -6
  862. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +20 -0
  863. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +71 -40
  864. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +12 -3
  865. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +77 -3
  866. package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +7 -7
  867. package/src/duckdb/src/optimizer/pushdown/pushdown_semi_anti_join.cpp +56 -0
  868. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +21 -0
  869. package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +2 -2
  870. package/src/duckdb/src/optimizer/rule/move_constants.cpp +15 -10
  871. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +70 -0
  872. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +17 -5
  873. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +1 -0
  874. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +1 -0
  875. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +1 -2
  876. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +22 -9
  877. package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +28 -4
  878. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +12 -7
  879. package/src/duckdb/src/parallel/event.cpp +2 -2
  880. package/src/duckdb/src/parallel/executor.cpp +114 -81
  881. package/src/duckdb/src/parallel/executor_task.cpp +2 -4
  882. package/src/duckdb/src/parallel/meta_pipeline.cpp +28 -29
  883. package/src/duckdb/src/parallel/pipeline.cpp +41 -41
  884. package/src/duckdb/src/parallel/pipeline_event.cpp +2 -4
  885. package/src/duckdb/src/parallel/pipeline_executor.cpp +13 -75
  886. package/src/duckdb/src/parallel/task_scheduler.cpp +22 -13
  887. package/src/duckdb/src/parser/column_definition.cpp +22 -4
  888. package/src/duckdb/src/parser/column_list.cpp +2 -1
  889. package/src/duckdb/src/parser/expression/function_expression.cpp +1 -1
  890. package/src/duckdb/src/parser/expression/lambda_expression.cpp +51 -0
  891. package/src/duckdb/src/parser/expression/lambdaref_expression.cpp +59 -0
  892. package/src/duckdb/src/parser/expression/window_expression.cpp +9 -1
  893. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +40 -0
  894. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +1 -0
  895. package/src/duckdb/src/parser/parsed_data/comment_on_info.cpp +19 -0
  896. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +11 -9
  897. package/src/duckdb/src/parser/parsed_data/create_info.cpp +1 -0
  898. package/src/duckdb/src/parser/parsed_data/create_secret_info.cpp +22 -0
  899. package/src/duckdb/src/parser/parsed_data/create_table_info.cpp +17 -0
  900. package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +19 -0
  901. package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +13 -9
  902. package/src/duckdb/src/parser/parsed_data/drop_info.cpp +8 -9
  903. package/src/duckdb/src/parser/parsed_data/extra_drop_info.cpp +16 -0
  904. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +3 -1
  905. package/src/duckdb/src/parser/parser.cpp +14 -8
  906. package/src/duckdb/src/parser/query_error_context.cpp +12 -13
  907. package/src/duckdb/src/parser/query_node/select_node.cpp +5 -1
  908. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +8 -13
  909. package/src/duckdb/src/parser/statement/copy_database_statement.cpp +41 -0
  910. package/src/duckdb/src/parser/statement/set_statement.cpp +5 -1
  911. package/src/duckdb/src/parser/tableref/basetableref.cpp +1 -0
  912. package/src/duckdb/src/parser/tableref/showref.cpp +47 -0
  913. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +6 -2
  914. package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +1 -0
  915. package/src/duckdb/src/parser/transform/expression/transform_boolean_test.cpp +30 -15
  916. package/src/duckdb/src/parser/transform/expression/transform_case.cpp +1 -0
  917. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +3 -2
  918. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +34 -4
  919. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +26 -12
  920. package/src/duckdb/src/parser/transform/expression/transform_grouping_function.cpp +1 -1
  921. package/src/duckdb/src/parser/transform/expression/transform_interval.cpp +34 -5
  922. package/src/duckdb/src/parser/transform/expression/transform_is_null.cpp +3 -1
  923. package/src/duckdb/src/parser/transform/expression/transform_lambda.cpp +3 -1
  924. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +3 -3
  925. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +3 -3
  926. package/src/duckdb/src/parser/transform/expression/transform_positional_reference.cpp +1 -1
  927. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +13 -13
  928. package/src/duckdb/src/parser/transform/helpers/nodetype_to_string.cpp +2 -0
  929. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
  930. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +32 -1
  931. package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +1 -1
  932. package/src/duckdb/src/parser/transform/statement/transform_attach.cpp +1 -0
  933. package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +108 -0
  934. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +45 -37
  935. package/src/duckdb/src/parser/transform/statement/transform_copy_database.cpp +29 -0
  936. package/src/duckdb/src/parser/transform/statement/transform_create_index.cpp +5 -14
  937. package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +0 -1
  938. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +1 -1
  939. package/src/duckdb/src/parser/transform/statement/transform_drop.cpp +25 -6
  940. package/src/duckdb/src/parser/transform/statement/transform_import.cpp +2 -1
  941. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +6 -14
  942. package/src/duckdb/src/parser/transform/statement/transform_secret.cpp +103 -0
  943. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -1
  944. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +4 -8
  945. package/src/duckdb/src/parser/transform/statement/transform_set.cpp +18 -5
  946. package/src/duckdb/src/parser/transform/statement/transform_show.cpp +14 -41
  947. package/src/duckdb/src/parser/transform/statement/transform_show_select.cpp +13 -6
  948. package/src/duckdb/src/parser/transform/statement/transform_use.cpp +3 -1
  949. package/src/duckdb/src/parser/transform/tableref/transform_base_tableref.cpp +1 -1
  950. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +1 -2
  951. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +6 -5
  952. package/src/duckdb/src/parser/transform/tableref/transform_table_function.cpp +1 -1
  953. package/src/duckdb/src/parser/transformer.cpp +22 -0
  954. package/src/duckdb/src/planner/bind_context.cpp +23 -14
  955. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +15 -14
  956. package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +20 -9
  957. package/src/duckdb/src/planner/binder/expression/bind_case_expression.cpp +12 -7
  958. package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +4 -4
  959. package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +3 -3
  960. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +279 -195
  961. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +103 -17
  962. package/src/duckdb/src/planner/binder/expression/bind_conjunction_expression.cpp +3 -3
  963. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +91 -68
  964. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +116 -84
  965. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +85 -15
  966. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +68 -31
  967. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +7 -7
  968. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +11 -7
  969. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +42 -19
  970. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +38 -16
  971. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +16 -7
  972. package/src/duckdb/src/planner/binder/query_node/bind_recursive_cte_node.cpp +3 -0
  973. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +83 -12
  974. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +28 -37
  975. package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +3 -4
  976. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +2 -2
  977. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +3 -3
  978. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +7 -5
  979. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +10 -0
  980. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +48 -50
  981. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +187 -0
  982. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +38 -22
  983. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +8 -15
  984. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +6 -1
  985. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +2 -2
  986. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +12 -10
  987. package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +37 -13
  988. package/src/duckdb/src/planner/binder/statement/bind_set.cpp +8 -2
  989. package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +2 -0
  990. package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +29 -14
  991. package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +1 -1
  992. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +16 -6
  993. package/src/duckdb/src/planner/binder/tableref/bind_expressionlistref.cpp +11 -4
  994. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +1 -1
  995. package/src/duckdb/src/planner/binder/tableref/bind_named_parameters.cpp +2 -2
  996. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +18 -17
  997. package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +85 -0
  998. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +11 -17
  999. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -9
  1000. package/src/duckdb/src/planner/binder.cpp +31 -26
  1001. package/src/duckdb/src/planner/bound_result_modifier.cpp +24 -0
  1002. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  1003. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +10 -1
  1004. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +20 -4
  1005. package/src/duckdb/src/planner/expression/bound_lambdaref_expression.cpp +9 -10
  1006. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +65 -3
  1007. package/src/duckdb/src/planner/expression.cpp +15 -5
  1008. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +5 -6
  1009. package/src/duckdb/src/planner/expression_binder/check_binder.cpp +9 -8
  1010. package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +10 -7
  1011. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +9 -4
  1012. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +0 -25
  1013. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +8 -11
  1014. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +3 -2
  1015. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +15 -8
  1016. package/src/duckdb/src/planner/expression_binder/where_binder.cpp +3 -4
  1017. package/src/duckdb/src/planner/expression_binder.cpp +51 -25
  1018. package/src/duckdb/src/planner/expression_iterator.cpp +2 -1
  1019. package/src/duckdb/src/planner/filter/constant_filter.cpp +1 -0
  1020. package/src/duckdb/src/planner/filter/struct_filter.cpp +33 -0
  1021. package/src/duckdb/src/planner/joinside.cpp +1 -1
  1022. package/src/duckdb/src/planner/logical_operator.cpp +2 -1
  1023. package/src/duckdb/src/planner/operator/logical_copy_database.cpp +32 -0
  1024. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +13 -4
  1025. package/src/duckdb/src/planner/operator/logical_create_table.cpp +2 -0
  1026. package/src/duckdb/src/planner/operator/logical_get.cpp +4 -1
  1027. package/src/duckdb/src/planner/operator/logical_join.cpp +8 -0
  1028. package/src/duckdb/src/planner/planner.cpp +24 -23
  1029. package/src/duckdb/src/planner/pragma_handler.cpp +10 -19
  1030. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +99 -6
  1031. package/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp +36 -0
  1032. package/src/duckdb/src/planner/table_binding.cpp +14 -12
  1033. package/src/duckdb/src/storage/buffer/block_handle.cpp +12 -10
  1034. package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -1
  1035. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +25 -9
  1036. package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +4 -3
  1037. package/src/duckdb/src/storage/buffer_manager.cpp +14 -3
  1038. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +0 -8
  1039. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +15 -7
  1040. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +1 -1
  1041. package/src/duckdb/src/storage/checkpoint_manager.cpp +94 -41
  1042. package/src/duckdb/src/storage/compression/alp/alp.cpp +57 -0
  1043. package/src/duckdb/src/storage/compression/alp/alp_constants.cpp +13 -0
  1044. package/src/duckdb/src/storage/compression/alprd.cpp +57 -0
  1045. package/src/duckdb/src/storage/compression/bitpacking.cpp +86 -55
  1046. package/src/duckdb/src/storage/compression/bitpacking_hugeint.cpp +41 -41
  1047. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -3
  1048. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +2 -0
  1049. package/src/duckdb/src/storage/compression/fsst.cpp +4 -4
  1050. package/src/duckdb/src/storage/compression/numeric_constant.cpp +3 -0
  1051. package/src/duckdb/src/storage/compression/rle.cpp +6 -4
  1052. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +7 -7
  1053. package/src/duckdb/src/storage/compression/uncompressed.cpp +1 -0
  1054. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +6 -6
  1055. package/src/duckdb/src/storage/data_table.cpp +32 -96
  1056. package/src/duckdb/src/storage/index.cpp +23 -11
  1057. package/src/duckdb/src/storage/local_storage.cpp +36 -19
  1058. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +2 -2
  1059. package/src/duckdb/src/storage/partial_block_manager.cpp +1 -1
  1060. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +9 -4
  1061. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +3 -3
  1062. package/src/duckdb/src/storage/serialization/serialize_extra_drop_info.cpp +42 -0
  1063. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +7 -17
  1064. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
  1065. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +84 -77
  1066. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +63 -4
  1067. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +23 -0
  1068. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +2 -0
  1069. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +35 -0
  1070. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +19 -0
  1071. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +22 -1
  1072. package/src/duckdb/src/storage/serialization/serialize_types.cpp +50 -0
  1073. package/src/duckdb/src/storage/single_file_block_manager.cpp +46 -7
  1074. package/src/duckdb/src/storage/standard_buffer_manager.cpp +57 -28
  1075. package/src/duckdb/src/storage/statistics/array_stats.cpp +131 -0
  1076. package/src/duckdb/src/storage/statistics/base_statistics.cpp +62 -4
  1077. package/src/duckdb/src/storage/statistics/column_statistics.cpp +1 -0
  1078. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +3 -1
  1079. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +21 -0
  1080. package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +5 -0
  1081. package/src/duckdb/src/storage/statistics/string_stats.cpp +2 -2
  1082. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  1083. package/src/duckdb/src/storage/storage_manager.cpp +47 -22
  1084. package/src/duckdb/src/storage/table/array_column_data.cpp +241 -0
  1085. package/src/duckdb/src/storage/table/chunk_info.cpp +2 -1
  1086. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -1
  1087. package/src/duckdb/src/storage/table/column_data.cpp +41 -18
  1088. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +12 -3
  1089. package/src/duckdb/src/storage/table/column_segment.cpp +40 -6
  1090. package/src/duckdb/src/storage/table/list_column_data.cpp +18 -15
  1091. package/src/duckdb/src/storage/table/row_group.cpp +73 -21
  1092. package/src/duckdb/src/storage/table/row_group_collection.cpp +395 -20
  1093. package/src/duckdb/src/storage/table/row_version_manager.cpp +2 -1
  1094. package/src/duckdb/src/storage/table/scan_state.cpp +4 -0
  1095. package/src/duckdb/src/storage/table/standard_column_data.cpp +11 -5
  1096. package/src/duckdb/src/storage/table/struct_column_data.cpp +30 -10
  1097. package/src/duckdb/src/storage/table/table_statistics.cpp +7 -1
  1098. package/src/duckdb/src/storage/table/update_segment.cpp +18 -2
  1099. package/src/duckdb/src/storage/table_index_list.cpp +73 -7
  1100. package/src/duckdb/src/storage/temporary_memory_manager.cpp +148 -0
  1101. package/src/duckdb/src/storage/wal_replay.cpp +329 -152
  1102. package/src/duckdb/src/storage/write_ahead_log.cpp +157 -137
  1103. package/src/duckdb/src/transaction/cleanup_state.cpp +3 -2
  1104. package/src/duckdb/src/transaction/commit_state.cpp +89 -63
  1105. package/src/duckdb/src/transaction/duck_transaction.cpp +5 -3
  1106. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +26 -54
  1107. package/src/duckdb/src/transaction/meta_transaction.cpp +37 -23
  1108. package/src/duckdb/src/transaction/transaction_context.cpp +23 -4
  1109. package/src/duckdb/src/transaction/undo_buffer.cpp +16 -2
  1110. package/src/duckdb/src/verification/fetch_row_verifier.cpp +13 -0
  1111. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +5 -7
  1112. package/src/duckdb/src/verification/statement_verifier.cpp +6 -5
  1113. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +100 -29
  1114. package/src/duckdb/third_party/fmt/include/fmt/format-inl.h +1 -1
  1115. package/src/duckdb/third_party/fmt/include/fmt/format.h +4 -2
  1116. package/src/duckdb/third_party/fmt/include/fmt/printf.h +5 -5
  1117. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +4 -0
  1118. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +82 -21
  1119. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +551 -1004
  1120. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +17 -3
  1121. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +24861 -23465
  1122. package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +420 -389
  1123. package/src/duckdb/third_party/mbedtls/include/mbedtls/aes.h +640 -1
  1124. package/src/duckdb/third_party/mbedtls/include/mbedtls/aes_alt.h +1 -0
  1125. package/src/duckdb/third_party/mbedtls/include/mbedtls/aria.h +358 -0
  1126. package/src/duckdb/third_party/mbedtls/include/mbedtls/aria_alt.h +1 -0
  1127. package/src/duckdb/third_party/mbedtls/include/mbedtls/camellia.h +316 -0
  1128. package/src/duckdb/third_party/mbedtls/include/mbedtls/camellia_alt.h +1 -0
  1129. package/src/duckdb/third_party/mbedtls/include/mbedtls/ccm.h +1 -1
  1130. package/src/duckdb/third_party/mbedtls/include/mbedtls/ccm_alt.h +1 -0
  1131. package/src/duckdb/third_party/mbedtls/include/mbedtls/chacha20.h +1 -0
  1132. package/src/duckdb/third_party/mbedtls/include/mbedtls/chachapoly.h +1 -0
  1133. package/src/duckdb/third_party/mbedtls/include/mbedtls/cipher.h +124 -124
  1134. package/src/duckdb/third_party/mbedtls/include/mbedtls/cmac.h +1 -0
  1135. package/src/duckdb/third_party/mbedtls/include/mbedtls/entropy.h +293 -0
  1136. package/src/duckdb/third_party/mbedtls/include/mbedtls/gcm.h +383 -0
  1137. package/src/duckdb/third_party/mbedtls/include/mbedtls/gcm_alt.h +1 -0
  1138. package/src/duckdb/third_party/mbedtls/include/mbedtls/mbedtls_config.h +9 -0
  1139. package/src/duckdb/third_party/mbedtls/include/mbedtls/nist_kw.h +1 -0
  1140. package/src/duckdb/third_party/mbedtls/include/mbedtls/timing.h +1 -0
  1141. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +35 -6
  1142. package/src/duckdb/third_party/mbedtls/library/aes.cpp +2171 -0
  1143. package/src/duckdb/third_party/mbedtls/library/aesni.h +1 -0
  1144. package/src/duckdb/third_party/mbedtls/library/aria.cpp +1058 -0
  1145. package/src/duckdb/third_party/mbedtls/library/camellia.cpp +1087 -0
  1146. package/src/duckdb/third_party/mbedtls/library/cipher.cpp +1633 -0
  1147. package/src/duckdb/third_party/mbedtls/library/cipher_wrap.cpp +2270 -0
  1148. package/src/duckdb/third_party/mbedtls/library/cipher_wrap.h +146 -0
  1149. package/src/duckdb/third_party/mbedtls/library/entropy.cpp +701 -0
  1150. package/src/duckdb/third_party/mbedtls/library/entropy_poll.cpp +237 -0
  1151. package/src/duckdb/third_party/mbedtls/library/entropy_poll.h +76 -0
  1152. package/src/duckdb/third_party/mbedtls/library/gcm.cpp +1161 -0
  1153. package/src/duckdb/third_party/mbedtls/library/padlock.h +1 -0
  1154. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +132 -24
  1155. package/src/duckdb/third_party/pcg/pcg_uint128.hpp +1 -1
  1156. package/src/duckdb/third_party/skiplist/HeadNode.h +934 -0
  1157. package/src/duckdb/third_party/skiplist/IntegrityEnums.h +62 -0
  1158. package/src/duckdb/third_party/skiplist/Node.h +641 -0
  1159. package/src/duckdb/third_party/skiplist/NodeRefs.h +251 -0
  1160. package/src/duckdb/third_party/skiplist/RollingMedian.h +202 -0
  1161. package/src/duckdb/third_party/skiplist/SkipList.cpp +40 -0
  1162. package/src/duckdb/third_party/skiplist/SkipList.h +549 -0
  1163. package/src/duckdb/third_party/thrift/thrift/thrift-config.h +1 -1
  1164. package/src/duckdb/ub_extension_json_json_functions.cpp +2 -0
  1165. package/src/duckdb/ub_src_catalog.cpp +3 -1
  1166. package/src/duckdb/ub_src_catalog_catalog_entry_dependency.cpp +6 -0
  1167. package/src/duckdb/ub_src_common.cpp +1 -1
  1168. package/src/duckdb/ub_src_common_exception.cpp +6 -0
  1169. package/src/duckdb/ub_src_common_types.cpp +2 -2
  1170. package/src/duckdb/ub_src_core_functions.cpp +2 -0
  1171. package/src/duckdb/ub_src_core_functions_scalar_array.cpp +4 -0
  1172. package/src/duckdb/ub_src_core_functions_scalar_blob.cpp +2 -0
  1173. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +7 -3
  1174. package/src/duckdb/ub_src_core_functions_scalar_secret.cpp +2 -0
  1175. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +4 -0
  1176. package/src/duckdb/ub_src_execution_index.cpp +4 -0
  1177. package/src/duckdb/ub_src_execution_operator_csv_scanner_buffer_manager.cpp +6 -0
  1178. package/src/duckdb/ub_src_execution_operator_csv_scanner_scanner.cpp +10 -0
  1179. package/src/duckdb/ub_src_execution_operator_csv_scanner_state_machine.cpp +4 -0
  1180. package/src/duckdb/ub_src_execution_operator_csv_scanner_table_function.cpp +4 -0
  1181. package/src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp +4 -0
  1182. package/src/duckdb/ub_src_execution_operator_helper.cpp +4 -0
  1183. package/src/duckdb/ub_src_execution_operator_join.cpp +4 -2
  1184. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  1185. package/src/duckdb/ub_src_execution_physical_plan.cpp +4 -2
  1186. package/src/duckdb/ub_src_function_cast.cpp +2 -0
  1187. package/src/duckdb/ub_src_function_scalar_list.cpp +4 -0
  1188. package/src/duckdb/ub_src_function_table.cpp +4 -4
  1189. package/src/duckdb/ub_src_function_table_system.cpp +6 -0
  1190. package/src/duckdb/ub_src_main.cpp +2 -0
  1191. package/src/duckdb/ub_src_main_buffered_data.cpp +2 -0
  1192. package/src/duckdb/ub_src_main_secret.cpp +6 -0
  1193. package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
  1194. package/src/duckdb/ub_src_parser_expression.cpp +2 -0
  1195. package/src/duckdb/ub_src_parser_parsed_data.cpp +6 -0
  1196. package/src/duckdb/ub_src_parser_statement.cpp +2 -2
  1197. package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
  1198. package/src/duckdb/ub_src_parser_transform_statement.cpp +6 -0
  1199. package/src/duckdb/ub_src_planner_binder_statement.cpp +2 -2
  1200. package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
  1201. package/src/duckdb/ub_src_planner_filter.cpp +2 -0
  1202. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
  1203. package/src/duckdb/ub_src_planner_subquery.cpp +2 -0
  1204. package/src/duckdb/ub_src_storage.cpp +2 -0
  1205. package/src/duckdb/ub_src_storage_compression.cpp +2 -0
  1206. package/src/duckdb/ub_src_storage_compression_alp.cpp +4 -0
  1207. package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
  1208. package/src/duckdb/ub_src_storage_statistics.cpp +2 -0
  1209. package/src/duckdb/ub_src_storage_table.cpp +2 -0
  1210. package/src/duckdb_node.hpp +1 -1
  1211. package/src/statement.cpp +18 -8
  1212. package/src/utils.cpp +1 -15
  1213. package/test/columns.test.ts +2 -1
  1214. package/test/config.test.ts +30 -0
  1215. package/test/test_all_types.test.ts +9 -4
@@ -4,9 +4,26 @@
4
4
  #include "duckdb/common/types/hugeint.hpp"
5
5
  #include "duckdb/common/types/arrow_aux_data.hpp"
6
6
  #include "duckdb/function/scalar/nested_functions.hpp"
7
+ #include "duckdb/common/exception/conversion_exception.hpp"
7
8
 
8
9
  namespace duckdb {
9
10
 
11
+ namespace {
12
+
13
+ enum class ArrowArrayPhysicalType : uint8_t { DICTIONARY_ENCODED, RUN_END_ENCODED, DEFAULT };
14
+
15
+ ArrowArrayPhysicalType GetArrowArrayPhysicalType(const ArrowType &type) {
16
+ if (type.HasDictionary()) {
17
+ return ArrowArrayPhysicalType::DICTIONARY_ENCODED;
18
+ }
19
+ if (type.RunEndEncoded()) {
20
+ return ArrowArrayPhysicalType::RUN_END_ENCODED;
21
+ }
22
+ return ArrowArrayPhysicalType::DEFAULT;
23
+ }
24
+
25
+ } // namespace
26
+
10
27
  static void ShiftRight(unsigned char *ar, int size, int shift) {
11
28
  int carry = 0;
12
29
  while (shift--) {
@@ -18,13 +35,26 @@ static void ShiftRight(unsigned char *ar, int size, int shift) {
18
35
  }
19
36
  }
20
37
 
38
+ idx_t GetEffectiveOffset(ArrowArray &array, int64_t parent_offset, const ArrowScanLocalState &state,
39
+ int64_t nested_offset = -1) {
40
+ if (nested_offset != -1) {
41
+ // The parent of this array is a list
42
+ // We just ignore the parent offset, it's already applied to the list
43
+ return array.offset + nested_offset;
44
+ }
45
+ // Parent offset is set in the case of a struct, it applies to all child arrays
46
+ // 'chunk_offset' is how much of the chunk we've already scanned, in case the chunk size exceeds
47
+ // STANDARD_VECTOR_SIZE
48
+ return array.offset + parent_offset + state.chunk_offset;
49
+ }
50
+
21
51
  template <class T>
22
52
  T *ArrowBufferData(ArrowArray &array, idx_t buffer_idx) {
23
53
  return (T *)array.buffers[buffer_idx]; // NOLINT
24
54
  }
25
55
 
26
- static void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
27
- int64_t nested_offset = -1, bool add_null = false) {
56
+ static void GetValidityMask(ValidityMask &mask, ArrowArray &array, const ArrowScanLocalState &scan_state, idx_t size,
57
+ int64_t parent_offset, int64_t nested_offset = -1, bool add_null = false) {
28
58
  // In certains we don't need to or cannot copy arrow's validity mask to duckdb.
29
59
  //
30
60
  // The conditions where we do want to copy arrow's mask to duckdb are:
@@ -32,10 +62,7 @@ static void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanLoca
32
62
  // 2. n_buffers > 0, meaning the array's arrow type is not `null`
33
63
  // 3. the validity buffer (the first buffer) is not a nullptr
34
64
  if (array.null_count != 0 && array.n_buffers > 0 && array.buffers[0]) {
35
- auto bit_offset = scan_state.chunk_offset + array.offset;
36
- if (nested_offset != -1) {
37
- bit_offset = nested_offset;
38
- }
65
+ auto bit_offset = GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
39
66
  mask.EnsureWritable();
40
67
  #if STANDARD_VECTOR_SIZE > 64
41
68
  auto n_bitmask_bytes = (size + 8 - 1) / 8;
@@ -73,13 +100,17 @@ static void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanLoca
73
100
  }
74
101
  }
75
102
 
76
- static void SetValidityMask(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
77
- int64_t nested_offset, bool add_null = false) {
103
+ static void SetValidityMask(Vector &vector, ArrowArray &array, const ArrowScanLocalState &scan_state, idx_t size,
104
+ int64_t parent_offset, int64_t nested_offset, bool add_null = false) {
78
105
  D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR);
79
106
  auto &mask = FlatVector::Validity(vector);
80
- GetValidityMask(mask, array, scan_state, size, nested_offset, add_null);
107
+ GetValidityMask(mask, array, scan_state, size, parent_offset, nested_offset, add_null);
81
108
  }
82
109
 
110
+ static void ColumnArrowToDuckDBRunEndEncoded(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state,
111
+ idx_t size, const ArrowType &arrow_type, int64_t nested_offset = -1,
112
+ ValidityMask *parent_mask = nullptr, uint64_t parent_offset = 0);
113
+
83
114
  static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size,
84
115
  const ArrowType &arrow_type, int64_t nested_offset = -1,
85
116
  ValidityMask *parent_mask = nullptr, uint64_t parent_offset = 0);
@@ -89,21 +120,19 @@ static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, Arr
89
120
  ValidityMask *parent_mask = nullptr, uint64_t parent_offset = 0);
90
121
 
91
122
  static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size,
92
- const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask) {
123
+ const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask,
124
+ int64_t parent_offset) {
93
125
  auto size_type = arrow_type.GetSizeType();
94
126
  idx_t list_size = 0;
95
127
  auto &scan_state = array_state.state;
96
128
 
97
- SetValidityMask(vector, array, scan_state, size, nested_offset);
129
+ SetValidityMask(vector, array, scan_state, size, parent_offset, nested_offset);
98
130
  idx_t start_offset = 0;
99
131
  idx_t cur_offset = 0;
100
132
  if (size_type == ArrowVariableSizeType::FIXED_SIZE) {
101
133
  auto fixed_size = arrow_type.FixedSize();
102
134
  //! Have to check validity mask before setting this up
103
- idx_t offset = (scan_state.chunk_offset + array.offset) * fixed_size;
104
- if (nested_offset != -1) {
105
- offset = fixed_size * nested_offset;
106
- }
135
+ idx_t offset = GetEffectiveOffset(array, parent_offset, scan_state, nested_offset) * fixed_size;
107
136
  start_offset = offset;
108
137
  auto list_data = FlatVector::GetData<list_entry_t>(vector);
109
138
  for (idx_t i = 0; i < size; i++) {
@@ -114,10 +143,8 @@ static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowArrayScanS
114
143
  }
115
144
  list_size = start_offset + cur_offset;
116
145
  } else if (size_type == ArrowVariableSizeType::NORMAL) {
117
- auto offsets = ArrowBufferData<uint32_t>(array, 1) + array.offset + scan_state.chunk_offset;
118
- if (nested_offset != -1) {
119
- offsets = ArrowBufferData<uint32_t>(array, 1) + nested_offset;
120
- }
146
+ auto offsets =
147
+ ArrowBufferData<uint32_t>(array, 1) + GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
121
148
  start_offset = offsets[0];
122
149
  auto list_data = FlatVector::GetData<list_entry_t>(vector);
123
150
  for (idx_t i = 0; i < size; i++) {
@@ -128,10 +155,8 @@ static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowArrayScanS
128
155
  }
129
156
  list_size = offsets[size];
130
157
  } else {
131
- auto offsets = ArrowBufferData<uint64_t>(array, 1) + array.offset + scan_state.chunk_offset;
132
- if (nested_offset != -1) {
133
- offsets = ArrowBufferData<uint64_t>(array, 1) + nested_offset;
134
- }
158
+ auto offsets =
159
+ ArrowBufferData<uint64_t>(array, 1) + GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
135
160
  start_offset = offsets[0];
136
161
  auto list_data = FlatVector::GetData<list_entry_t>(vector);
137
162
  for (idx_t i = 0; i < size; i++) {
@@ -146,7 +171,7 @@ static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowArrayScanS
146
171
  ListVector::Reserve(vector, list_size);
147
172
  ListVector::SetListSize(vector, list_size);
148
173
  auto &child_vector = ListVector::GetEntry(vector);
149
- SetValidityMask(child_vector, *array.children[0], scan_state, list_size, start_offset);
174
+ SetValidityMask(child_vector, *array.children[0], scan_state, list_size, array.offset, start_offset);
150
175
  auto &list_mask = FlatVector::Validity(vector);
151
176
  if (parent_mask) {
152
177
  //! Since this List is owned by a struct we must guarantee their validity map matches on Null
@@ -164,27 +189,34 @@ static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowArrayScanS
164
189
  if (list_size == 0 && start_offset == 0) {
165
190
  D_ASSERT(!child_array.dictionary);
166
191
  ColumnArrowToDuckDB(child_vector, child_array, child_state, list_size, child_type, -1);
167
- } else {
168
- if (child_array.dictionary) {
169
- // TODO: add support for offsets
170
- ColumnArrowToDuckDBDictionary(child_vector, child_array, child_state, list_size, child_type, start_offset);
171
- } else {
172
- ColumnArrowToDuckDB(child_vector, child_array, child_state, list_size, child_type, start_offset);
173
- }
192
+ return;
193
+ }
194
+
195
+ auto array_physical_type = GetArrowArrayPhysicalType(child_type);
196
+ switch (array_physical_type) {
197
+ case ArrowArrayPhysicalType::DICTIONARY_ENCODED:
198
+ // TODO: add support for offsets
199
+ ColumnArrowToDuckDBDictionary(child_vector, child_array, child_state, list_size, child_type, start_offset);
200
+ break;
201
+ case ArrowArrayPhysicalType::RUN_END_ENCODED:
202
+ ColumnArrowToDuckDBRunEndEncoded(child_vector, child_array, child_state, list_size, child_type, start_offset);
203
+ break;
204
+ case ArrowArrayPhysicalType::DEFAULT:
205
+ ColumnArrowToDuckDB(child_vector, child_array, child_state, list_size, child_type, start_offset);
206
+ break;
207
+ default:
208
+ throw NotImplementedException("ArrowArrayPhysicalType not recognized");
174
209
  }
175
210
  }
176
211
 
177
- static void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
178
- const ArrowType &arrow_type, int64_t nested_offset) {
212
+ static void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, const ArrowScanLocalState &scan_state, idx_t size,
213
+ const ArrowType &arrow_type, int64_t nested_offset, int64_t parent_offset) {
179
214
  auto size_type = arrow_type.GetSizeType();
180
- SetValidityMask(vector, array, scan_state, size, nested_offset);
215
+ SetValidityMask(vector, array, scan_state, size, parent_offset, nested_offset);
181
216
  if (size_type == ArrowVariableSizeType::FIXED_SIZE) {
182
217
  auto fixed_size = arrow_type.FixedSize();
183
218
  //! Have to check validity mask before setting this up
184
- idx_t offset = (scan_state.chunk_offset + array.offset) * fixed_size;
185
- if (nested_offset != -1) {
186
- offset = fixed_size * nested_offset;
187
- }
219
+ idx_t offset = GetEffectiveOffset(array, parent_offset, scan_state, nested_offset) * fixed_size;
188
220
  auto cdata = ArrowBufferData<char>(array, 1);
189
221
  for (idx_t row_idx = 0; row_idx < size; row_idx++) {
190
222
  if (FlatVector::IsNull(vector, row_idx)) {
@@ -196,10 +228,8 @@ static void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalS
196
228
  offset += blob_len;
197
229
  }
198
230
  } else if (size_type == ArrowVariableSizeType::NORMAL) {
199
- auto offsets = ArrowBufferData<uint32_t>(array, 1) + array.offset + scan_state.chunk_offset;
200
- if (nested_offset != -1) {
201
- offsets = ArrowBufferData<uint32_t>(array, 1) + array.offset + nested_offset;
202
- }
231
+ auto offsets =
232
+ ArrowBufferData<uint32_t>(array, 1) + GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
203
233
  auto cdata = ArrowBufferData<char>(array, 2);
204
234
  for (idx_t row_idx = 0; row_idx < size; row_idx++) {
205
235
  if (FlatVector::IsNull(vector, row_idx)) {
@@ -214,10 +244,8 @@ static void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalS
214
244
  if (ArrowBufferData<uint64_t>(array, 1)[array.length] > NumericLimits<uint32_t>::Maximum()) { // LCOV_EXCL_START
215
245
  throw ConversionException("DuckDB does not support Blobs over 4GB");
216
246
  } // LCOV_EXCL_STOP
217
- auto offsets = ArrowBufferData<uint64_t>(array, 1) + array.offset + scan_state.chunk_offset;
218
- if (nested_offset != -1) {
219
- offsets = ArrowBufferData<uint64_t>(array, 1) + array.offset + nested_offset;
220
- }
247
+ auto offsets =
248
+ ArrowBufferData<uint64_t>(array, 1) + GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
221
249
  auto cdata = ArrowBufferData<char>(array, 2);
222
250
  for (idx_t row_idx = 0; row_idx < size; row_idx++) {
223
251
  if (FlatVector::IsNull(vector, row_idx)) {
@@ -266,26 +294,20 @@ static void SetVectorString(Vector &vector, idx_t size, char *cdata, T *offsets)
266
294
  }
267
295
  }
268
296
 
269
- static void DirectConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
270
- uint64_t parent_offset) {
297
+ static void DirectConversion(Vector &vector, ArrowArray &array, const ArrowScanLocalState &scan_state,
298
+ int64_t nested_offset, uint64_t parent_offset) {
271
299
  auto internal_type = GetTypeIdSize(vector.GetType().InternalType());
272
- auto data_ptr =
273
- ArrowBufferData<data_t>(array, 1) + internal_type * (scan_state.chunk_offset + array.offset + parent_offset);
274
- if (nested_offset != -1) {
275
- data_ptr = ArrowBufferData<data_t>(array, 1) + internal_type * (array.offset + nested_offset + parent_offset);
276
- }
300
+ auto data_ptr = ArrowBufferData<data_t>(array, 1) +
301
+ internal_type * GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
277
302
  FlatVector::SetData(vector, data_ptr);
278
303
  }
279
304
 
280
305
  template <class T>
281
- static void TimeConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
282
- idx_t size, int64_t conversion) {
306
+ static void TimeConversion(Vector &vector, ArrowArray &array, const ArrowScanLocalState &scan_state,
307
+ int64_t nested_offset, int64_t parent_offset, idx_t size, int64_t conversion) {
283
308
  auto tgt_ptr = FlatVector::GetData<dtime_t>(vector);
284
309
  auto &validity_mask = FlatVector::Validity(vector);
285
- auto src_ptr = (T *)array.buffers[1] + scan_state.chunk_offset + array.offset;
286
- if (nested_offset != -1) {
287
- src_ptr = (T *)array.buffers[1] + nested_offset + array.offset;
288
- }
310
+ auto src_ptr = (T *)array.buffers[1] + GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
289
311
  for (idx_t row = 0; row < size; row++) {
290
312
  if (!validity_mask.RowIsValid(row)) {
291
313
  continue;
@@ -296,14 +318,12 @@ static void TimeConversion(Vector &vector, ArrowArray &array, ArrowScanLocalStat
296
318
  }
297
319
  }
298
320
 
299
- static void TimestampTZConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
300
- int64_t nested_offset, idx_t size, int64_t conversion) {
321
+ static void TimestampTZConversion(Vector &vector, ArrowArray &array, const ArrowScanLocalState &scan_state,
322
+ int64_t nested_offset, int64_t parent_offset, idx_t size, int64_t conversion) {
301
323
  auto tgt_ptr = FlatVector::GetData<timestamp_t>(vector);
302
324
  auto &validity_mask = FlatVector::Validity(vector);
303
- auto src_ptr = ArrowBufferData<int64_t>(array, 1) + scan_state.chunk_offset + array.offset;
304
- if (nested_offset != -1) {
305
- src_ptr = ArrowBufferData<int64_t>(array, 1) + nested_offset + array.offset;
306
- }
325
+ auto src_ptr =
326
+ ArrowBufferData<int64_t>(array, 1) + GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
307
327
  for (idx_t row = 0; row < size; row++) {
308
328
  if (!validity_mask.RowIsValid(row)) {
309
329
  continue;
@@ -314,13 +334,11 @@ static void TimestampTZConversion(Vector &vector, ArrowArray &array, ArrowScanLo
314
334
  }
315
335
  }
316
336
 
317
- static void IntervalConversionUs(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
318
- int64_t nested_offset, idx_t size, int64_t conversion) {
337
+ static void IntervalConversionUs(Vector &vector, ArrowArray &array, const ArrowScanLocalState &scan_state,
338
+ int64_t nested_offset, int64_t parent_offset, idx_t size, int64_t conversion) {
319
339
  auto tgt_ptr = FlatVector::GetData<interval_t>(vector);
320
- auto src_ptr = ArrowBufferData<int64_t>(array, 1) + scan_state.chunk_offset + array.offset;
321
- if (nested_offset != -1) {
322
- src_ptr = ArrowBufferData<int64_t>(array, 1) + nested_offset + array.offset;
323
- }
340
+ auto src_ptr =
341
+ ArrowBufferData<int64_t>(array, 1) + GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
324
342
  for (idx_t row = 0; row < size; row++) {
325
343
  tgt_ptr[row].days = 0;
326
344
  tgt_ptr[row].months = 0;
@@ -330,13 +348,11 @@ static void IntervalConversionUs(Vector &vector, ArrowArray &array, ArrowScanLoc
330
348
  }
331
349
  }
332
350
 
333
- static void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
334
- int64_t nested_offset, idx_t size) {
351
+ static void IntervalConversionMonths(Vector &vector, ArrowArray &array, const ArrowScanLocalState &scan_state,
352
+ int64_t nested_offset, int64_t parent_offset, idx_t size) {
335
353
  auto tgt_ptr = FlatVector::GetData<interval_t>(vector);
336
- auto src_ptr = ArrowBufferData<int32_t>(array, 1) + scan_state.chunk_offset + array.offset;
337
- if (nested_offset != -1) {
338
- src_ptr = ArrowBufferData<int32_t>(array, 1) + nested_offset + array.offset;
339
- }
354
+ auto src_ptr =
355
+ ArrowBufferData<int32_t>(array, 1) + GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
340
356
  for (idx_t row = 0; row < size; row++) {
341
357
  tgt_ptr[row].days = 0;
342
358
  tgt_ptr[row].micros = 0;
@@ -344,13 +360,11 @@ static void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowSca
344
360
  }
345
361
  }
346
362
 
347
- static void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
348
- int64_t nested_offset, idx_t size) {
363
+ static void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, const ArrowScanLocalState &scan_state,
364
+ int64_t nested_offset, int64_t parent_offset, idx_t size) {
349
365
  auto tgt_ptr = FlatVector::GetData<interval_t>(vector);
350
- auto src_ptr = ArrowBufferData<ArrowInterval>(array, 1) + scan_state.chunk_offset + array.offset;
351
- if (nested_offset != -1) {
352
- src_ptr = ArrowBufferData<ArrowInterval>(array, 1) + nested_offset + array.offset;
353
- }
366
+ auto src_ptr =
367
+ ArrowBufferData<ArrowInterval>(array, 1) + GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
354
368
  for (idx_t row = 0; row < size; row++) {
355
369
  tgt_ptr[row].days = src_ptr[row].days;
356
370
  tgt_ptr[row].micros = src_ptr[row].nanoseconds / Interval::NANOS_PER_MICRO;
@@ -358,11 +372,228 @@ static void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, A
358
372
  }
359
373
  }
360
374
 
375
+ // Find the index of the first run-end that is strictly greater than the offset.
376
+ // count is returned if no such run-end is found.
377
+ template <class RUN_END_TYPE>
378
+ static idx_t FindRunIndex(const RUN_END_TYPE *run_ends, idx_t count, idx_t offset) {
379
+ // Binary-search within the [0, count) range. For example:
380
+ // [0, 0, 0, 1, 1, 2] encoded as
381
+ // run_ends: [3, 5, 6]:
382
+ // 0, 1, 2 -> 0
383
+ // 3, 4 -> 1
384
+ // 5 -> 2
385
+ // 6, 7 .. -> 3 (3 == count [not found])
386
+ idx_t begin = 0;
387
+ idx_t end = count;
388
+ while (begin < end) {
389
+ idx_t middle = (begin + end) / 2;
390
+ // begin < end implies middle < end
391
+ if (offset >= static_cast<idx_t>(run_ends[middle])) {
392
+ // keep searching in [middle + 1, end)
393
+ begin = middle + 1;
394
+ } else {
395
+ // offset < run_ends[middle], so keep searching in [begin, middle)
396
+ end = middle;
397
+ }
398
+ }
399
+ return begin;
400
+ }
401
+
402
+ template <class RUN_END_TYPE, class VALUE_TYPE>
403
+ static void FlattenRunEnds(Vector &result, ArrowRunEndEncodingState &run_end_encoding, idx_t compressed_size,
404
+ idx_t scan_offset, idx_t count) {
405
+ auto &runs = *run_end_encoding.run_ends;
406
+ auto &values = *run_end_encoding.values;
407
+
408
+ UnifiedVectorFormat run_end_format;
409
+ UnifiedVectorFormat value_format;
410
+ runs.ToUnifiedFormat(compressed_size, run_end_format);
411
+ values.ToUnifiedFormat(compressed_size, value_format);
412
+ auto run_ends_data = run_end_format.GetData<RUN_END_TYPE>(run_end_format);
413
+ auto values_data = value_format.GetData<VALUE_TYPE>(value_format);
414
+ auto result_data = FlatVector::GetData<VALUE_TYPE>(result);
415
+ auto &validity = FlatVector::Validity(result);
416
+
417
+ // According to the arrow spec, the 'run_ends' array is always valid
418
+ // so we will assume this is true and not check the validity map
419
+
420
+ // Now construct the result vector from the run_ends and the values
421
+
422
+ auto run = FindRunIndex(run_ends_data, compressed_size, scan_offset);
423
+ idx_t logical_index = scan_offset;
424
+ idx_t index = 0;
425
+ if (value_format.validity.AllValid()) {
426
+ // None of the compressed values are NULL
427
+ for (; run < compressed_size; run++) {
428
+ auto run_end_index = run_end_format.sel->get_index(run);
429
+ auto value_index = value_format.sel->get_index(run);
430
+ auto &value = values_data[value_index];
431
+ auto run_end = static_cast<idx_t>(run_ends_data[run_end_index]);
432
+
433
+ D_ASSERT(run_end > (logical_index + index));
434
+ auto to_scan = run_end - (logical_index + index);
435
+ // Cap the amount to scan so we don't go over size
436
+ to_scan = MinValue<idx_t>(to_scan, (count - index));
437
+
438
+ for (idx_t i = 0; i < to_scan; i++) {
439
+ result_data[index + i] = value;
440
+ }
441
+ index += to_scan;
442
+ if (index >= count) {
443
+ if (logical_index + index >= run_end) {
444
+ // The last run was completed, forward the run index
445
+ run++;
446
+ }
447
+ break;
448
+ }
449
+ }
450
+ } else {
451
+ for (; run < compressed_size; run++) {
452
+ auto run_end_index = run_end_format.sel->get_index(run);
453
+ auto value_index = value_format.sel->get_index(run);
454
+ auto run_end = static_cast<idx_t>(run_ends_data[run_end_index]);
455
+
456
+ D_ASSERT(run_end > (logical_index + index));
457
+ auto to_scan = run_end - (logical_index + index);
458
+ // Cap the amount to scan so we don't go over size
459
+ to_scan = MinValue<idx_t>(to_scan, (count - index));
460
+
461
+ if (value_format.validity.RowIsValidUnsafe(value_index)) {
462
+ auto &value = values_data[value_index];
463
+ for (idx_t i = 0; i < to_scan; i++) {
464
+ result_data[index + i] = value;
465
+ validity.SetValid(index + i);
466
+ }
467
+ } else {
468
+ for (idx_t i = 0; i < to_scan; i++) {
469
+ validity.SetInvalid(index + i);
470
+ }
471
+ }
472
+ index += to_scan;
473
+ if (index >= count) {
474
+ if (logical_index + index >= run_end) {
475
+ // The last run was completed, forward the run index
476
+ run++;
477
+ }
478
+ break;
479
+ }
480
+ }
481
+ }
482
+ }
483
+
484
+ template <class RUN_END_TYPE>
485
+ static void FlattenRunEndsSwitch(Vector &result, ArrowRunEndEncodingState &run_end_encoding, idx_t compressed_size,
486
+ idx_t scan_offset, idx_t size) {
487
+ auto &values = *run_end_encoding.values;
488
+ auto physical_type = values.GetType().InternalType();
489
+
490
+ switch (physical_type) {
491
+ case PhysicalType::INT8:
492
+ FlattenRunEnds<RUN_END_TYPE, int8_t>(result, run_end_encoding, compressed_size, scan_offset, size);
493
+ break;
494
+ case PhysicalType::INT16:
495
+ FlattenRunEnds<RUN_END_TYPE, int16_t>(result, run_end_encoding, compressed_size, scan_offset, size);
496
+ break;
497
+ case PhysicalType::INT32:
498
+ FlattenRunEnds<RUN_END_TYPE, int32_t>(result, run_end_encoding, compressed_size, scan_offset, size);
499
+ break;
500
+ case PhysicalType::INT64:
501
+ FlattenRunEnds<RUN_END_TYPE, int64_t>(result, run_end_encoding, compressed_size, scan_offset, size);
502
+ break;
503
+ case PhysicalType::INT128:
504
+ FlattenRunEnds<RUN_END_TYPE, hugeint_t>(result, run_end_encoding, compressed_size, scan_offset, size);
505
+ break;
506
+ case PhysicalType::UINT8:
507
+ FlattenRunEnds<RUN_END_TYPE, uint8_t>(result, run_end_encoding, compressed_size, scan_offset, size);
508
+ break;
509
+ case PhysicalType::UINT16:
510
+ FlattenRunEnds<RUN_END_TYPE, uint16_t>(result, run_end_encoding, compressed_size, scan_offset, size);
511
+ break;
512
+ case PhysicalType::UINT32:
513
+ FlattenRunEnds<RUN_END_TYPE, uint32_t>(result, run_end_encoding, compressed_size, scan_offset, size);
514
+ break;
515
+ case PhysicalType::UINT64:
516
+ FlattenRunEnds<RUN_END_TYPE, uint64_t>(result, run_end_encoding, compressed_size, scan_offset, size);
517
+ break;
518
+ case PhysicalType::BOOL:
519
+ FlattenRunEnds<RUN_END_TYPE, bool>(result, run_end_encoding, compressed_size, scan_offset, size);
520
+ break;
521
+ case PhysicalType::FLOAT:
522
+ FlattenRunEnds<RUN_END_TYPE, float>(result, run_end_encoding, compressed_size, scan_offset, size);
523
+ break;
524
+ case PhysicalType::DOUBLE:
525
+ FlattenRunEnds<RUN_END_TYPE, double>(result, run_end_encoding, compressed_size, scan_offset, size);
526
+ break;
527
+ case PhysicalType::INTERVAL:
528
+ FlattenRunEnds<RUN_END_TYPE, interval_t>(result, run_end_encoding, compressed_size, scan_offset, size);
529
+ break;
530
+ case PhysicalType::VARCHAR: {
531
+ // Share the string heap, we don't need to allocate new strings, we just reference the existing ones
532
+ result.SetAuxiliary(values.GetAuxiliary());
533
+ FlattenRunEnds<RUN_END_TYPE, string_t>(result, run_end_encoding, compressed_size, scan_offset, size);
534
+ break;
535
+ }
536
+ default:
537
+ throw NotImplementedException("RunEndEncoded value type '%s' not supported yet", TypeIdToString(physical_type));
538
+ }
539
+ }
540
+
541
+ static void ColumnArrowToDuckDBRunEndEncoded(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state,
542
+ idx_t size, const ArrowType &arrow_type, int64_t nested_offset,
543
+ ValidityMask *parent_mask, uint64_t parent_offset) {
544
+ // Scan the 'run_ends' array
545
+ D_ASSERT(array.n_children == 2);
546
+ auto &run_ends_array = *array.children[0];
547
+ auto &values_array = *array.children[1];
548
+
549
+ auto &run_ends_type = arrow_type[0];
550
+ auto &values_type = arrow_type[1];
551
+ D_ASSERT(vector.GetType() == values_type.GetDuckType());
552
+
553
+ auto &scan_state = array_state.state;
554
+
555
+ D_ASSERT(run_ends_array.length == values_array.length);
556
+ auto compressed_size = run_ends_array.length;
557
+ // Create a vector for the run ends and the values
558
+ auto &run_end_encoding = array_state.RunEndEncoding();
559
+ if (!run_end_encoding.run_ends) {
560
+ // The run ends and values have not been scanned yet for this array
561
+ D_ASSERT(!run_end_encoding.values);
562
+ run_end_encoding.run_ends = make_uniq<Vector>(run_ends_type.GetDuckType(), compressed_size);
563
+ run_end_encoding.values = make_uniq<Vector>(values_type.GetDuckType(), compressed_size);
564
+
565
+ ColumnArrowToDuckDB(*run_end_encoding.run_ends, run_ends_array, array_state, compressed_size, run_ends_type);
566
+ auto &values = *run_end_encoding.values;
567
+ SetValidityMask(values, values_array, scan_state, compressed_size, parent_offset, nested_offset);
568
+ ColumnArrowToDuckDB(values, values_array, array_state, compressed_size, values_type);
569
+ }
570
+
571
+ idx_t scan_offset = GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
572
+ auto physical_type = run_ends_type.GetDuckType().InternalType();
573
+ switch (physical_type) {
574
+ case PhysicalType::INT16:
575
+ FlattenRunEndsSwitch<int16_t>(vector, run_end_encoding, compressed_size, scan_offset, size);
576
+ break;
577
+ case PhysicalType::INT32:
578
+ FlattenRunEndsSwitch<int32_t>(vector, run_end_encoding, compressed_size, scan_offset, size);
579
+ break;
580
+ case PhysicalType::INT64:
581
+ FlattenRunEndsSwitch<int32_t>(vector, run_end_encoding, compressed_size, scan_offset, size);
582
+ break;
583
+ default:
584
+ throw NotImplementedException("Type '%s' not implemented for RunEndEncoding", TypeIdToString(physical_type));
585
+ }
586
+ }
587
+
361
588
  static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size,
362
589
  const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask,
363
590
  uint64_t parent_offset) {
591
+ if (parent_offset != 0) {
592
+ (void)array_state;
593
+ }
364
594
  auto &scan_state = array_state.state;
365
595
  D_ASSERT(!array.dictionary);
596
+
366
597
  switch (vector.GetType().id()) {
367
598
  case LogicalTypeId::SQLNULL:
368
599
  vector.Reference(Value());
@@ -370,11 +601,8 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
370
601
  case LogicalTypeId::BOOLEAN: {
371
602
  //! Arrow bit-packs boolean values
372
603
  //! Lets first figure out where we are in the source array
373
- auto src_ptr = ArrowBufferData<uint8_t>(array, 1) + (scan_state.chunk_offset + array.offset) / 8;
374
-
375
- if (nested_offset != -1) {
376
- src_ptr = ArrowBufferData<uint8_t>(array, 1) + (nested_offset + array.offset) / 8;
377
- }
604
+ auto src_ptr = ArrowBufferData<uint8_t>(array, 1) +
605
+ GetEffectiveOffset(array, parent_offset, scan_state, nested_offset) / 8;
378
606
  auto tgt_ptr = (uint8_t *)FlatVector::GetData(vector);
379
607
  int src_pos = 0;
380
608
  idx_t cur_bit = scan_state.chunk_offset % 8;
@@ -406,6 +634,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
406
634
  case LogicalTypeId::UBIGINT:
407
635
  case LogicalTypeId::BIGINT:
408
636
  case LogicalTypeId::HUGEINT:
637
+ case LogicalTypeId::UHUGEINT:
409
638
  case LogicalTypeId::TIMESTAMP:
410
639
  case LogicalTypeId::TIMESTAMP_SEC:
411
640
  case LogicalTypeId::TIMESTAMP_MS:
@@ -417,16 +646,12 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
417
646
  auto size_type = arrow_type.GetSizeType();
418
647
  auto cdata = ArrowBufferData<char>(array, 2);
419
648
  if (size_type == ArrowVariableSizeType::SUPER_SIZE) {
420
- auto offsets = ArrowBufferData<uint64_t>(array, 1) + array.offset + scan_state.chunk_offset;
421
- if (nested_offset != -1) {
422
- offsets = ArrowBufferData<uint64_t>(array, 1) + array.offset + nested_offset;
423
- }
649
+ auto offsets = ArrowBufferData<uint64_t>(array, 1) +
650
+ GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
424
651
  SetVectorString(vector, size, cdata, offsets);
425
652
  } else {
426
- auto offsets = ArrowBufferData<uint32_t>(array, 1) + array.offset + scan_state.chunk_offset;
427
- if (nested_offset != -1) {
428
- offsets = ArrowBufferData<uint32_t>(array, 1) + array.offset + nested_offset;
429
- }
653
+ auto offsets = ArrowBufferData<uint32_t>(array, 1) +
654
+ GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
430
655
  SetVectorString(vector, size, cdata, offsets);
431
656
  }
432
657
  break;
@@ -441,10 +666,8 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
441
666
  }
442
667
  case ArrowDateTimeType::MILLISECONDS: {
443
668
  //! convert date from nanoseconds to days
444
- auto src_ptr = ArrowBufferData<uint64_t>(array, 1) + scan_state.chunk_offset + array.offset;
445
- if (nested_offset != -1) {
446
- src_ptr = ArrowBufferData<uint64_t>(array, 1) + nested_offset + array.offset;
447
- }
669
+ auto src_ptr = ArrowBufferData<uint64_t>(array, 1) +
670
+ GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
448
671
  auto tgt_ptr = FlatVector::GetData<date_t>(vector);
449
672
  for (idx_t row = 0; row < size; row++) {
450
673
  tgt_ptr[row] = date_t(int64_t(src_ptr[row]) / static_cast<int64_t>(1000 * 60 * 60 * 24));
@@ -460,23 +683,21 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
460
683
  auto precision = arrow_type.GetDateTimeType();
461
684
  switch (precision) {
462
685
  case ArrowDateTimeType::SECONDS: {
463
- TimeConversion<int32_t>(vector, array, scan_state, nested_offset, size, 1000000);
686
+ TimeConversion<int32_t>(vector, array, scan_state, nested_offset, parent_offset, size, 1000000);
464
687
  break;
465
688
  }
466
689
  case ArrowDateTimeType::MILLISECONDS: {
467
- TimeConversion<int32_t>(vector, array, scan_state, nested_offset, size, 1000);
690
+ TimeConversion<int32_t>(vector, array, scan_state, nested_offset, parent_offset, size, 1000);
468
691
  break;
469
692
  }
470
693
  case ArrowDateTimeType::MICROSECONDS: {
471
- TimeConversion<int64_t>(vector, array, scan_state, nested_offset, size, 1);
694
+ TimeConversion<int64_t>(vector, array, scan_state, nested_offset, parent_offset, size, 1);
472
695
  break;
473
696
  }
474
697
  case ArrowDateTimeType::NANOSECONDS: {
475
698
  auto tgt_ptr = FlatVector::GetData<dtime_t>(vector);
476
- auto src_ptr = ArrowBufferData<int64_t>(array, 1) + scan_state.chunk_offset + array.offset;
477
- if (nested_offset != -1) {
478
- src_ptr = ArrowBufferData<int64_t>(array, 1) + nested_offset + array.offset;
479
- }
699
+ auto src_ptr = ArrowBufferData<int64_t>(array, 1) +
700
+ GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
480
701
  for (idx_t row = 0; row < size; row++) {
481
702
  tgt_ptr[row].micros = src_ptr[row] / 1000;
482
703
  }
@@ -491,11 +712,11 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
491
712
  auto precision = arrow_type.GetDateTimeType();
492
713
  switch (precision) {
493
714
  case ArrowDateTimeType::SECONDS: {
494
- TimestampTZConversion(vector, array, scan_state, nested_offset, size, 1000000);
715
+ TimestampTZConversion(vector, array, scan_state, nested_offset, parent_offset, size, 1000000);
495
716
  break;
496
717
  }
497
718
  case ArrowDateTimeType::MILLISECONDS: {
498
- TimestampTZConversion(vector, array, scan_state, nested_offset, size, 1000);
719
+ TimestampTZConversion(vector, array, scan_state, nested_offset, parent_offset, size, 1000);
499
720
  break;
500
721
  }
501
722
  case ArrowDateTimeType::MICROSECONDS: {
@@ -504,10 +725,8 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
504
725
  }
505
726
  case ArrowDateTimeType::NANOSECONDS: {
506
727
  auto tgt_ptr = FlatVector::GetData<timestamp_t>(vector);
507
- auto src_ptr = ArrowBufferData<int64_t>(array, 1) + scan_state.chunk_offset + array.offset;
508
- if (nested_offset != -1) {
509
- src_ptr = ArrowBufferData<int64_t>(array, 1) + nested_offset + array.offset;
510
- }
728
+ auto src_ptr = ArrowBufferData<int64_t>(array, 1) +
729
+ GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
511
730
  for (idx_t row = 0; row < size; row++) {
512
731
  tgt_ptr[row].value = src_ptr[row] / 1000;
513
732
  }
@@ -522,24 +741,22 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
522
741
  auto precision = arrow_type.GetDateTimeType();
523
742
  switch (precision) {
524
743
  case ArrowDateTimeType::SECONDS: {
525
- IntervalConversionUs(vector, array, scan_state, nested_offset, size, 1000000);
744
+ IntervalConversionUs(vector, array, scan_state, nested_offset, parent_offset, size, 1000000);
526
745
  break;
527
746
  }
528
747
  case ArrowDateTimeType::DAYS:
529
748
  case ArrowDateTimeType::MILLISECONDS: {
530
- IntervalConversionUs(vector, array, scan_state, nested_offset, size, 1000);
749
+ IntervalConversionUs(vector, array, scan_state, nested_offset, parent_offset, size, 1000);
531
750
  break;
532
751
  }
533
752
  case ArrowDateTimeType::MICROSECONDS: {
534
- IntervalConversionUs(vector, array, scan_state, nested_offset, size, 1);
753
+ IntervalConversionUs(vector, array, scan_state, nested_offset, parent_offset, size, 1);
535
754
  break;
536
755
  }
537
756
  case ArrowDateTimeType::NANOSECONDS: {
538
757
  auto tgt_ptr = FlatVector::GetData<interval_t>(vector);
539
- auto src_ptr = ArrowBufferData<int64_t>(array, 1) + scan_state.chunk_offset + array.offset;
540
- if (nested_offset != -1) {
541
- src_ptr = ArrowBufferData<int64_t>(array, 1) + nested_offset + array.offset;
542
- }
758
+ auto src_ptr = ArrowBufferData<int64_t>(array, 1) +
759
+ GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
543
760
  for (idx_t row = 0; row < size; row++) {
544
761
  tgt_ptr[row].micros = src_ptr[row] / 1000;
545
762
  tgt_ptr[row].days = 0;
@@ -548,11 +765,11 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
548
765
  break;
549
766
  }
550
767
  case ArrowDateTimeType::MONTHS: {
551
- IntervalConversionMonths(vector, array, scan_state, nested_offset, size);
768
+ IntervalConversionMonths(vector, array, scan_state, nested_offset, parent_offset, size);
552
769
  break;
553
770
  }
554
771
  case ArrowDateTimeType::MONTH_DAY_NANO: {
555
- IntervalConversionMonthDayNanos(vector, array, scan_state, nested_offset, size);
772
+ IntervalConversionMonthDayNanos(vector, array, scan_state, nested_offset, parent_offset, size);
556
773
  break;
557
774
  }
558
775
  default:
@@ -563,10 +780,8 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
563
780
  case LogicalTypeId::DECIMAL: {
564
781
  auto val_mask = FlatVector::Validity(vector);
565
782
  //! We have to convert from INT128
566
- auto src_ptr = ArrowBufferData<hugeint_t>(array, 1) + scan_state.chunk_offset + array.offset;
567
- if (nested_offset != -1) {
568
- src_ptr = ArrowBufferData<hugeint_t>(array, 1) + nested_offset + array.offset;
569
- }
783
+ auto src_ptr =
784
+ ArrowBufferData<hugeint_t>(array, 1) + GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
570
785
  switch (vector.GetType().InternalType()) {
571
786
  case PhysicalType::INT16: {
572
787
  auto tgt_ptr = FlatVector::GetData<int16_t>(vector);
@@ -602,9 +817,9 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
602
817
  break;
603
818
  }
604
819
  case PhysicalType::INT128: {
605
- FlatVector::SetData(vector,
606
- ArrowBufferData<data_t>(array, 1) + GetTypeIdSize(vector.GetType().InternalType()) *
607
- (scan_state.chunk_offset + array.offset));
820
+ FlatVector::SetData(vector, ArrowBufferData<data_t>(array, 1) +
821
+ GetTypeIdSize(vector.GetType().InternalType()) *
822
+ GetEffectiveOffset(array, parent_offset, scan_state, nested_offset));
608
823
  break;
609
824
  }
610
825
  default:
@@ -614,15 +829,15 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
614
829
  break;
615
830
  }
616
831
  case LogicalTypeId::BLOB: {
617
- ArrowToDuckDBBlob(vector, array, scan_state, size, arrow_type, nested_offset);
832
+ ArrowToDuckDBBlob(vector, array, scan_state, size, arrow_type, nested_offset, parent_offset);
618
833
  break;
619
834
  }
620
835
  case LogicalTypeId::LIST: {
621
- ArrowToDuckDBList(vector, array, array_state, size, arrow_type, nested_offset, parent_mask);
836
+ ArrowToDuckDBList(vector, array, array_state, size, arrow_type, nested_offset, parent_mask, parent_offset);
622
837
  break;
623
838
  }
624
839
  case LogicalTypeId::MAP: {
625
- ArrowToDuckDBList(vector, array, array_state, size, arrow_type, nested_offset, parent_mask);
840
+ ArrowToDuckDBList(vector, array, array_state, size, arrow_type, nested_offset, parent_mask, parent_offset);
626
841
  ArrowToDuckDBMapVerify(vector, size);
627
842
  break;
628
843
  }
@@ -636,7 +851,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
636
851
  auto &child_type = arrow_type[child_idx];
637
852
  auto &child_state = array_state.GetChild(child_idx);
638
853
 
639
- SetValidityMask(child_entry, child_array, scan_state, size, nested_offset);
854
+ SetValidityMask(child_entry, child_array, scan_state, size, array.offset, nested_offset);
640
855
  if (!struct_validity_mask.AllValid()) {
641
856
  auto &child_validity_mark = FlatVector::Validity(child_entry);
642
857
  for (idx_t i = 0; i < size; i++) {
@@ -645,13 +860,23 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
645
860
  }
646
861
  }
647
862
  }
648
- if (child_array.dictionary) {
649
- // TODO: add support for offsets
863
+
864
+ auto array_physical_type = GetArrowArrayPhysicalType(child_type);
865
+ switch (array_physical_type) {
866
+ case ArrowArrayPhysicalType::DICTIONARY_ENCODED:
650
867
  ColumnArrowToDuckDBDictionary(child_entry, child_array, child_state, size, child_type, nested_offset,
651
868
  &struct_validity_mask, array.offset);
652
- } else {
869
+ break;
870
+ case ArrowArrayPhysicalType::RUN_END_ENCODED:
871
+ ColumnArrowToDuckDBRunEndEncoded(child_entry, child_array, child_state, size, child_type, nested_offset,
872
+ &struct_validity_mask, array.offset);
873
+ break;
874
+ case ArrowArrayPhysicalType::DEFAULT:
653
875
  ColumnArrowToDuckDB(child_entry, child_array, child_state, size, child_type, nested_offset,
654
876
  &struct_validity_mask, array.offset);
877
+ break;
878
+ default:
879
+ throw NotImplementedException("ArrowArrayPhysicalType not recognized");
655
880
  }
656
881
  }
657
882
  break;
@@ -670,12 +895,21 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
670
895
  auto &child_state = array_state.GetChild(child_idx);
671
896
  auto &child_type = arrow_type[child_idx];
672
897
 
673
- SetValidityMask(child, child_array, scan_state, size, nested_offset);
898
+ SetValidityMask(child, child_array, scan_state, size, parent_offset, nested_offset);
899
+ auto array_physical_type = GetArrowArrayPhysicalType(child_type);
674
900
 
675
- if (child_array.dictionary) {
901
+ switch (array_physical_type) {
902
+ case ArrowArrayPhysicalType::DICTIONARY_ENCODED:
676
903
  ColumnArrowToDuckDBDictionary(child, child_array, child_state, size, child_type);
677
- } else {
904
+ break;
905
+ case ArrowArrayPhysicalType::RUN_END_ENCODED:
906
+ ColumnArrowToDuckDBRunEndEncoded(child, child_array, child_state, size, child_type);
907
+ break;
908
+ case ArrowArrayPhysicalType::DEFAULT:
678
909
  ColumnArrowToDuckDB(child, child_array, child_state, size, child_type, nested_offset, &validity_mask);
910
+ break;
911
+ default:
912
+ throw NotImplementedException("ArrowArrayPhysicalType not recognized");
679
913
  }
680
914
 
681
915
  children.push_back(std::move(child));
@@ -823,26 +1057,64 @@ static void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, Logic
823
1057
  }
824
1058
  }
825
1059
 
1060
+ static bool CanContainNull(ArrowArray &array, ValidityMask *parent_mask) {
1061
+ if (array.null_count > 0) {
1062
+ return true;
1063
+ }
1064
+ if (!parent_mask) {
1065
+ return false;
1066
+ }
1067
+ return !parent_mask->AllValid();
1068
+ }
1069
+
826
1070
  static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state,
827
1071
  idx_t size, const ArrowType &arrow_type, int64_t nested_offset,
828
1072
  ValidityMask *parent_mask, uint64_t parent_offset) {
829
- SelectionVector sel;
830
1073
  auto &scan_state = array_state.state;
1074
+
1075
+ const bool has_nulls = CanContainNull(array, parent_mask);
831
1076
  if (!array_state.HasDictionary()) {
832
1077
  //! We need to set the dictionary data for this column
833
1078
  auto base_vector = make_uniq<Vector>(vector.GetType(), array.dictionary->length);
834
- SetValidityMask(*base_vector, *array.dictionary, scan_state, array.dictionary->length, 0, array.null_count > 0);
835
- ColumnArrowToDuckDB(*base_vector, *array.dictionary, array_state, array.dictionary->length,
836
- arrow_type.GetDictionary());
1079
+ SetValidityMask(*base_vector, *array.dictionary, scan_state, array.dictionary->length, 0, 0, has_nulls);
1080
+ auto &dictionary_type = arrow_type.GetDictionary();
1081
+ auto arrow_physical_type = GetArrowArrayPhysicalType(dictionary_type);
1082
+ switch (arrow_physical_type) {
1083
+ case ArrowArrayPhysicalType::DICTIONARY_ENCODED:
1084
+ ColumnArrowToDuckDBDictionary(*base_vector, *array.dictionary, array_state, array.dictionary->length,
1085
+ dictionary_type);
1086
+ break;
1087
+ case ArrowArrayPhysicalType::RUN_END_ENCODED:
1088
+ ColumnArrowToDuckDBRunEndEncoded(*base_vector, *array.dictionary, array_state, array.dictionary->length,
1089
+ dictionary_type);
1090
+ break;
1091
+ case ArrowArrayPhysicalType::DEFAULT:
1092
+ ColumnArrowToDuckDB(*base_vector, *array.dictionary, array_state, array.dictionary->length,
1093
+ dictionary_type);
1094
+ break;
1095
+ default:
1096
+ throw NotImplementedException("ArrowArrayPhysicalType not recognized");
1097
+ };
837
1098
  array_state.AddDictionary(std::move(base_vector));
838
1099
  }
839
1100
  auto offset_type = arrow_type.GetDuckType();
840
1101
  //! Get Pointer to Indices of Dictionary
841
- auto indices = ArrowBufferData<data_t>(array, 1) +
842
- GetTypeIdSize(offset_type.InternalType()) * (scan_state.chunk_offset + array.offset);
843
- if (array.null_count > 0) {
1102
+ auto indices =
1103
+ ArrowBufferData<data_t>(array, 1) +
1104
+ GetTypeIdSize(offset_type.InternalType()) * GetEffectiveOffset(array, parent_offset, scan_state, nested_offset);
1105
+
1106
+ SelectionVector sel;
1107
+ if (has_nulls) {
844
1108
  ValidityMask indices_validity;
845
- GetValidityMask(indices_validity, array, scan_state, size);
1109
+ GetValidityMask(indices_validity, array, scan_state, size, parent_offset);
1110
+ if (parent_mask && !parent_mask->AllValid()) {
1111
+ auto &struct_validity_mask = *parent_mask;
1112
+ for (idx_t i = 0; i < size; i++) {
1113
+ if (!struct_validity_mask.RowIsValid(i)) {
1114
+ indices_validity.SetInvalid(i);
1115
+ }
1116
+ }
1117
+ }
846
1118
  SetSelectionVector(sel, indices, offset_type, size, &indices_validity, array.dictionary->length);
847
1119
  } else {
848
1120
  SetSelectionVector(sel, indices, offset_type, size);
@@ -864,6 +1136,7 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state, const ar
864
1136
  continue;
865
1137
  }
866
1138
 
1139
+ auto &parent_array = scan_state.chunk->arrow_array;
867
1140
  auto &array = *scan_state.chunk->arrow_array.children[arrow_array_idx];
868
1141
  if (!array.release) {
869
1142
  throw InvalidInputException("arrow_scan: released array passed");
@@ -885,11 +1158,21 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state, const ar
885
1158
  auto &arrow_type = *arrow_convert_data.at(col_idx);
886
1159
  auto &array_state = scan_state.GetState(col_idx);
887
1160
 
888
- if (array.dictionary) {
1161
+ auto array_physical_type = GetArrowArrayPhysicalType(arrow_type);
1162
+
1163
+ switch (array_physical_type) {
1164
+ case ArrowArrayPhysicalType::DICTIONARY_ENCODED:
889
1165
  ColumnArrowToDuckDBDictionary(output.data[idx], array, array_state, output.size(), arrow_type);
890
- } else {
891
- SetValidityMask(output.data[idx], array, scan_state, output.size(), -1);
1166
+ break;
1167
+ case ArrowArrayPhysicalType::RUN_END_ENCODED:
1168
+ ColumnArrowToDuckDBRunEndEncoded(output.data[idx], array, array_state, output.size(), arrow_type);
1169
+ break;
1170
+ case ArrowArrayPhysicalType::DEFAULT:
1171
+ SetValidityMask(output.data[idx], array, scan_state, output.size(), parent_array.offset, -1);
892
1172
  ColumnArrowToDuckDB(output.data[idx], array, array_state, output.size(), arrow_type);
1173
+ break;
1174
+ default:
1175
+ throw NotImplementedException("ArrowArrayPhysicalType not recognized");
893
1176
  }
894
1177
  }
895
1178
  }