duckdb 1.0.1-dev21.0 → 1.0.1-dev27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1390) hide show
  1. package/.github/workflows/HighPriorityIssues.yml +2 -2
  2. package/.github/workflows/NodeJS.yml +1 -1
  3. package/binding.gyp +41 -0
  4. package/package.json +1 -1
  5. package/src/duckdb/extension/icu/icu-dateadd.cpp +4 -2
  6. package/src/duckdb/extension/icu/icu-datefunc.cpp +6 -2
  7. package/src/duckdb/extension/icu/icu-datesub.cpp +13 -2
  8. package/src/duckdb/extension/icu/icu-strptime.cpp +6 -6
  9. package/src/duckdb/extension/icu/icu-table-range.cpp +92 -73
  10. package/src/duckdb/extension/icu/icu-timebucket.cpp +12 -2
  11. package/src/duckdb/extension/icu/icu-timezone.cpp +3 -3
  12. package/src/duckdb/extension/icu/icu_extension.cpp +61 -9
  13. package/src/duckdb/extension/json/include/json_executors.hpp +20 -23
  14. package/src/duckdb/extension/json/include/json_functions.hpp +4 -0
  15. package/src/duckdb/extension/json/include/json_scan.hpp +6 -2
  16. package/src/duckdb/extension/json/include/json_structure.hpp +12 -9
  17. package/src/duckdb/extension/json/json_common.cpp +66 -10
  18. package/src/duckdb/extension/json/json_extension.cpp +13 -5
  19. package/src/duckdb/extension/json/json_functions/json_array_length.cpp +1 -1
  20. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -4
  21. package/src/duckdb/extension/json/json_functions/json_exists.cpp +32 -0
  22. package/src/duckdb/extension/json/json_functions/json_extract.cpp +2 -2
  23. package/src/duckdb/extension/json/json_functions/json_keys.cpp +1 -1
  24. package/src/duckdb/extension/json/json_functions/json_pretty.cpp +32 -0
  25. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +5 -1
  26. package/src/duckdb/extension/json/json_functions/json_structure.cpp +305 -94
  27. package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
  28. package/src/duckdb/extension/json/json_functions/json_type.cpp +3 -3
  29. package/src/duckdb/extension/json/json_functions/json_value.cpp +42 -0
  30. package/src/duckdb/extension/json/json_functions/read_json.cpp +16 -2
  31. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -2
  32. package/src/duckdb/extension/json/json_functions.cpp +5 -1
  33. package/src/duckdb/extension/json/json_scan.cpp +13 -12
  34. package/src/duckdb/extension/json/serialize_json.cpp +5 -3
  35. package/src/duckdb/extension/parquet/column_reader.cpp +206 -43
  36. package/src/duckdb/extension/parquet/column_writer.cpp +133 -62
  37. package/src/duckdb/extension/parquet/geo_parquet.cpp +391 -0
  38. package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +16 -5
  39. package/src/duckdb/extension/parquet/include/column_reader.hpp +37 -12
  40. package/src/duckdb/extension/parquet/include/column_writer.hpp +10 -11
  41. package/src/duckdb/extension/parquet/include/expression_column_reader.hpp +52 -0
  42. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +139 -0
  43. package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +13 -8
  44. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -0
  45. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +7 -3
  46. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +55 -8
  47. package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +3 -3
  48. package/src/duckdb/extension/parquet/include/parquet_rle_bp_encoder.hpp +1 -1
  49. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +8 -0
  50. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +21 -7
  51. package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +33 -11
  52. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +5 -2
  53. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +48 -14
  54. package/src/duckdb/extension/parquet/parquet_crypto.cpp +109 -61
  55. package/src/duckdb/extension/parquet/parquet_extension.cpp +305 -72
  56. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -4
  57. package/src/duckdb/extension/parquet/parquet_reader.cpp +151 -40
  58. package/src/duckdb/extension/parquet/parquet_statistics.cpp +50 -16
  59. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +42 -1
  60. package/src/duckdb/extension/parquet/parquet_writer.cpp +67 -75
  61. package/src/duckdb/extension/parquet/serialize_parquet.cpp +3 -1
  62. package/src/duckdb/extension/parquet/zstd_file_system.cpp +5 -1
  63. package/src/duckdb/src/catalog/catalog.cpp +14 -16
  64. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +14 -11
  65. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +39 -19
  66. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +92 -78
  67. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +10 -2
  68. package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +10 -3
  69. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +3 -3
  70. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +7 -7
  71. package/src/duckdb/src/catalog/catalog_entry.cpp +6 -3
  72. package/src/duckdb/src/catalog/catalog_set.cpp +14 -19
  73. package/src/duckdb/src/catalog/default/default_functions.cpp +179 -166
  74. package/src/duckdb/src/catalog/default/default_generator.cpp +24 -0
  75. package/src/duckdb/src/catalog/default/default_schemas.cpp +4 -3
  76. package/src/duckdb/src/catalog/default/default_table_functions.cpp +148 -0
  77. package/src/duckdb/src/catalog/default/default_views.cpp +7 -3
  78. package/src/duckdb/src/catalog/duck_catalog.cpp +7 -1
  79. package/src/duckdb/src/common/adbc/adbc.cpp +120 -58
  80. package/src/duckdb/src/common/allocator.cpp +71 -6
  81. package/src/duckdb/src/common/arrow/appender/bool_data.cpp +8 -7
  82. package/src/duckdb/src/common/arrow/appender/fixed_size_list_data.cpp +1 -1
  83. package/src/duckdb/src/common/arrow/appender/union_data.cpp +4 -5
  84. package/src/duckdb/src/common/arrow/arrow_appender.cpp +55 -21
  85. package/src/duckdb/src/common/arrow/arrow_converter.cpp +85 -10
  86. package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +142 -0
  87. package/src/duckdb/src/common/arrow/arrow_query_result.cpp +56 -0
  88. package/src/duckdb/src/common/arrow/physical_arrow_batch_collector.cpp +37 -0
  89. package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +128 -0
  90. package/src/duckdb/src/common/arrow/schema_metadata.cpp +101 -0
  91. package/src/duckdb/src/common/cgroups.cpp +189 -0
  92. package/src/duckdb/src/common/compressed_file_system.cpp +6 -3
  93. package/src/duckdb/src/common/encryption_state.cpp +38 -0
  94. package/src/duckdb/src/common/enum_util.cpp +682 -14
  95. package/src/duckdb/src/common/enums/file_compression_type.cpp +24 -0
  96. package/src/duckdb/src/common/enums/metric_type.cpp +208 -0
  97. package/src/duckdb/src/common/enums/optimizer_type.cpp +8 -2
  98. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  99. package/src/duckdb/src/common/enums/relation_type.cpp +4 -0
  100. package/src/duckdb/src/common/enums/statement_type.cpp +15 -0
  101. package/src/duckdb/src/common/error_data.cpp +22 -20
  102. package/src/duckdb/src/common/exception/binder_exception.cpp +5 -0
  103. package/src/duckdb/src/common/exception.cpp +11 -1
  104. package/src/duckdb/src/common/extra_type_info.cpp +3 -0
  105. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  106. package/src/duckdb/src/common/file_system.cpp +25 -3
  107. package/src/duckdb/src/common/filename_pattern.cpp +1 -0
  108. package/src/duckdb/src/common/fsst.cpp +15 -14
  109. package/src/duckdb/src/common/gzip_file_system.cpp +3 -1
  110. package/src/duckdb/src/common/hive_partitioning.cpp +103 -43
  111. package/src/duckdb/src/common/http_util.cpp +25 -0
  112. package/src/duckdb/src/common/local_file_system.cpp +48 -27
  113. package/src/duckdb/src/common/multi_file_list.cpp +113 -22
  114. package/src/duckdb/src/common/multi_file_reader.cpp +59 -58
  115. package/src/duckdb/src/common/operator/cast_operators.cpp +133 -34
  116. package/src/duckdb/src/common/operator/string_cast.cpp +42 -11
  117. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +2 -2
  118. package/src/duckdb/src/common/progress_bar/terminal_progress_bar_display.cpp +1 -1
  119. package/src/duckdb/src/common/radix_partitioning.cpp +31 -21
  120. package/src/duckdb/src/common/random_engine.cpp +4 -0
  121. package/src/duckdb/src/common/re2_regex.cpp +47 -12
  122. package/src/duckdb/src/common/render_tree.cpp +243 -0
  123. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +1 -1
  124. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
  125. package/src/duckdb/src/common/row_operations/row_matcher.cpp +58 -5
  126. package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +79 -43
  127. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
  128. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +6 -4
  129. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +18 -9
  130. package/src/duckdb/src/common/serializer/memory_stream.cpp +1 -0
  131. package/src/duckdb/src/common/sort/partition_state.cpp +33 -18
  132. package/src/duckdb/src/common/sort/radix_sort.cpp +22 -15
  133. package/src/duckdb/src/common/sort/sort_state.cpp +19 -16
  134. package/src/duckdb/src/common/sort/sorted_block.cpp +11 -10
  135. package/src/duckdb/src/common/string_util.cpp +167 -10
  136. package/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp +108 -0
  137. package/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp +267 -0
  138. package/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp +116 -0
  139. package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +482 -0
  140. package/src/duckdb/src/common/tree_renderer/tree_renderer.cpp +12 -0
  141. package/src/duckdb/src/common/tree_renderer.cpp +16 -508
  142. package/src/duckdb/src/common/types/batched_data_collection.cpp +78 -9
  143. package/src/duckdb/src/common/types/bit.cpp +24 -22
  144. package/src/duckdb/src/common/types/blob.cpp +15 -11
  145. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +18 -9
  146. package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
  147. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +3 -4
  148. package/src/duckdb/src/common/types/column/column_data_consumer.cpp +2 -2
  149. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +70 -21
  150. package/src/duckdb/src/common/types/data_chunk.cpp +10 -1
  151. package/src/duckdb/src/common/types/date.cpp +8 -19
  152. package/src/duckdb/src/common/types/decimal.cpp +3 -2
  153. package/src/duckdb/src/common/types/hugeint.cpp +11 -3
  154. package/src/duckdb/src/common/types/hyperloglog.cpp +212 -227
  155. package/src/duckdb/src/common/types/interval.cpp +1 -1
  156. package/src/duckdb/src/common/types/list_segment.cpp +83 -49
  157. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +22 -83
  158. package/src/duckdb/src/common/types/row/row_data_collection.cpp +2 -2
  159. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +20 -4
  160. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +28 -7
  161. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +29 -14
  162. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +152 -102
  163. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +4 -1
  164. package/src/duckdb/src/common/types/selection_vector.cpp +17 -1
  165. package/src/duckdb/src/common/types/time.cpp +62 -31
  166. package/src/duckdb/src/common/types/timestamp.cpp +70 -12
  167. package/src/duckdb/src/common/types/uuid.cpp +1 -1
  168. package/src/duckdb/src/common/types/validity_mask.cpp +40 -5
  169. package/src/duckdb/src/common/types/value.cpp +50 -8
  170. package/src/duckdb/src/common/types/varint.cpp +295 -0
  171. package/src/duckdb/src/common/types/vector.cpp +165 -54
  172. package/src/duckdb/src/common/types/vector_buffer.cpp +5 -4
  173. package/src/duckdb/src/common/types.cpp +106 -26
  174. package/src/duckdb/src/common/vector_operations/vector_copy.cpp +13 -25
  175. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +6 -0
  176. package/src/duckdb/src/common/virtual_file_system.cpp +3 -3
  177. package/src/duckdb/src/core_functions/aggregate/distributive/approx_count.cpp +35 -82
  178. package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +283 -46
  179. package/src/duckdb/src/core_functions/aggregate/distributive/bitagg.cpp +4 -4
  180. package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +3 -2
  181. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +226 -338
  182. package/src/duckdb/src/core_functions/aggregate/distributive/sum.cpp +2 -0
  183. package/src/duckdb/src/core_functions/aggregate/holistic/approx_top_k.cpp +388 -0
  184. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +63 -21
  185. package/src/duckdb/src/core_functions/aggregate/holistic/mad.cpp +330 -0
  186. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +136 -97
  187. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +601 -1485
  188. package/src/duckdb/src/core_functions/aggregate/nested/binned_histogram.cpp +405 -0
  189. package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +136 -165
  190. package/src/duckdb/src/core_functions/function_list.cpp +35 -8
  191. package/src/duckdb/src/core_functions/lambda_functions.cpp +5 -7
  192. package/src/duckdb/src/core_functions/scalar/array/array_functions.cpp +172 -198
  193. package/src/duckdb/src/core_functions/scalar/blob/create_sort_key.cpp +341 -54
  194. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +2 -2
  195. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +89 -29
  196. package/src/duckdb/src/core_functions/scalar/date/date_trunc.cpp +1 -1
  197. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +2 -2
  198. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +133 -71
  199. package/src/duckdb/src/core_functions/scalar/date/to_interval.cpp +1 -1
  200. package/src/duckdb/src/core_functions/scalar/enum/enum_functions.cpp +1 -1
  201. package/src/duckdb/src/core_functions/scalar/generic/can_implicitly_cast.cpp +40 -0
  202. package/src/duckdb/src/core_functions/scalar/generic/error.cpp +1 -1
  203. package/src/duckdb/src/core_functions/scalar/generic/least.cpp +161 -58
  204. package/src/duckdb/src/core_functions/scalar/generic/typeof.cpp +13 -0
  205. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +1 -1
  206. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +59 -75
  207. package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +93 -40
  208. package/src/duckdb/src/core_functions/scalar/list/list_has_any_or_all.cpp +227 -0
  209. package/src/duckdb/src/core_functions/scalar/list/list_reduce.cpp +20 -19
  210. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +0 -2
  211. package/src/duckdb/src/core_functions/scalar/list/list_value.cpp +106 -8
  212. package/src/duckdb/src/core_functions/scalar/map/map_contains.cpp +56 -0
  213. package/src/duckdb/src/core_functions/scalar/map/map_extract.cpp +73 -118
  214. package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +98 -2
  215. package/src/duckdb/src/core_functions/scalar/operators/bitwise.cpp +1 -2
  216. package/src/duckdb/src/core_functions/scalar/random/setseed.cpp +1 -1
  217. package/src/duckdb/src/core_functions/scalar/string/bar.cpp +1 -1
  218. package/src/duckdb/src/core_functions/scalar/string/hex.cpp +5 -1
  219. package/src/duckdb/src/core_functions/scalar/string/md5.cpp +10 -37
  220. package/src/duckdb/src/core_functions/scalar/string/printf.cpp +18 -2
  221. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +45 -0
  222. package/src/duckdb/src/core_functions/scalar/string/reverse.cpp +4 -5
  223. package/src/duckdb/src/core_functions/scalar/string/sha1.cpp +35 -0
  224. package/src/duckdb/src/core_functions/scalar/string/sha256.cpp +5 -2
  225. package/src/duckdb/src/core_functions/scalar/string/url_encode.cpp +49 -0
  226. package/src/duckdb/src/core_functions/scalar/struct/struct_pack.cpp +1 -2
  227. package/src/duckdb/src/core_functions/scalar/union/union_extract.cpp +4 -2
  228. package/src/duckdb/src/execution/adaptive_filter.cpp +30 -11
  229. package/src/duckdb/src/execution/aggregate_hashtable.cpp +13 -18
  230. package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +4 -9
  231. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  232. package/src/duckdb/src/execution/index/art/art.cpp +683 -670
  233. package/src/duckdb/src/execution/index/art/art_key.cpp +121 -38
  234. package/src/duckdb/src/execution/index/art/base_leaf.cpp +168 -0
  235. package/src/duckdb/src/execution/index/art/base_node.cpp +163 -0
  236. package/src/duckdb/src/execution/index/art/iterator.cpp +148 -77
  237. package/src/duckdb/src/execution/index/art/leaf.cpp +159 -263
  238. package/src/duckdb/src/execution/index/art/node.cpp +493 -247
  239. package/src/duckdb/src/execution/index/art/node256.cpp +31 -91
  240. package/src/duckdb/src/execution/index/art/node256_leaf.cpp +71 -0
  241. package/src/duckdb/src/execution/index/art/node48.cpp +75 -143
  242. package/src/duckdb/src/execution/index/art/prefix.cpp +424 -244
  243. package/src/duckdb/src/execution/index/bound_index.cpp +7 -1
  244. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +22 -18
  245. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +22 -73
  246. package/src/duckdb/src/execution/join_hashtable.cpp +637 -179
  247. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +4 -4
  248. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +15 -10
  249. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +13 -8
  250. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +525 -132
  251. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +147 -138
  252. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +531 -312
  253. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +1 -1
  254. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +4 -3
  255. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +9 -2
  256. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +13 -17
  257. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +60 -16
  258. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +105 -0
  259. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +24 -24
  260. package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +25 -2
  261. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +275 -112
  262. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +106 -11
  263. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +253 -115
  264. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +93 -52
  265. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +116 -76
  266. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +29 -14
  267. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +1 -1
  268. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +70 -26
  269. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +81 -60
  270. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +88 -50
  271. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +161 -51
  272. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +59 -17
  273. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +5 -5
  274. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +0 -21
  275. package/src/duckdb/src/execution/operator/helper/physical_buffered_batch_collector.cpp +109 -0
  276. package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +5 -13
  277. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +1 -1
  278. package/src/duckdb/src/execution/operator/helper/physical_load.cpp +12 -4
  279. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +0 -16
  280. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +4 -2
  281. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +5 -0
  282. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +3 -1
  283. package/src/duckdb/src/execution/operator/helper/physical_set_variable.cpp +39 -0
  284. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +4 -2
  285. package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +16 -5
  286. package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
  287. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
  288. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
  289. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +5 -4
  290. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +59 -21
  291. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +7 -4
  292. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +333 -176
  293. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +57 -34
  294. package/src/duckdb/src/execution/operator/join/physical_join.cpp +16 -8
  295. package/src/duckdb/src/execution/operator/join/physical_left_delim_join.cpp +10 -4
  296. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +2 -5
  297. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +3 -3
  298. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +5 -5
  299. package/src/duckdb/src/execution/operator/join/physical_right_delim_join.cpp +7 -2
  300. package/src/duckdb/src/execution/operator/order/physical_order.cpp +17 -12
  301. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +12 -9
  302. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +35 -17
  303. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +17 -11
  304. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +5 -1
  305. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +156 -47
  306. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +10 -2
  307. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +1 -3
  308. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -2
  309. package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +13 -6
  310. package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +22 -3
  311. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +19 -3
  312. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +37 -22
  313. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +77 -21
  314. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +27 -55
  315. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +41 -44
  316. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +4 -6
  317. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +4 -6
  318. package/src/duckdb/src/execution/operator/set/physical_union.cpp +18 -4
  319. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +3 -2
  320. package/src/duckdb/src/execution/physical_operator.cpp +45 -4
  321. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -7
  322. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +8 -3
  323. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +13 -6
  324. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +3 -3
  325. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +111 -19
  326. package/src/duckdb/src/execution/physical_plan/plan_limit.cpp +19 -2
  327. package/src/duckdb/src/execution/physical_plan/plan_set.cpp +9 -0
  328. package/src/duckdb/src/execution/physical_plan/plan_window.cpp +3 -1
  329. package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -3
  330. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +49 -49
  331. package/src/duckdb/src/execution/reservoir_sample.cpp +2 -2
  332. package/src/duckdb/src/execution/window_executor.cpp +556 -318
  333. package/src/duckdb/src/execution/window_segment_tree.cpp +1058 -485
  334. package/src/duckdb/src/function/aggregate/distributive/count.cpp +5 -5
  335. package/src/duckdb/src/function/aggregate/distributive/first.cpp +92 -95
  336. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +10 -9
  337. package/src/duckdb/src/function/aggregate_function.cpp +8 -0
  338. package/src/duckdb/src/function/cast/cast_function_set.cpp +10 -1
  339. package/src/duckdb/src/function/cast/decimal_cast.cpp +10 -1
  340. package/src/duckdb/src/function/cast/default_casts.cpp +2 -0
  341. package/src/duckdb/src/function/cast/numeric_casts.cpp +3 -0
  342. package/src/duckdb/src/function/cast/string_cast.cpp +8 -5
  343. package/src/duckdb/src/function/cast/time_casts.cpp +2 -2
  344. package/src/duckdb/src/function/cast/union_casts.cpp +1 -1
  345. package/src/duckdb/src/function/cast/varint_casts.cpp +283 -0
  346. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +3 -1
  347. package/src/duckdb/src/function/cast_rules.cpp +104 -15
  348. package/src/duckdb/src/function/compression_config.cpp +35 -33
  349. package/src/duckdb/src/function/copy_function.cpp +27 -0
  350. package/src/duckdb/src/function/function_binder.cpp +39 -11
  351. package/src/duckdb/src/function/macro_function.cpp +75 -32
  352. package/src/duckdb/src/function/pragma/pragma_queries.cpp +10 -0
  353. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -0
  354. package/src/duckdb/src/function/scalar/generic/binning.cpp +507 -0
  355. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +58 -0
  356. package/src/duckdb/src/function/scalar/generic_functions.cpp +1 -0
  357. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +33 -47
  358. package/src/duckdb/src/function/scalar/list/list_extract.cpp +70 -143
  359. package/src/duckdb/src/function/scalar/list/list_resize.cpp +93 -84
  360. package/src/duckdb/src/function/scalar/list/list_zip.cpp +3 -0
  361. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +24 -11
  362. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +4 -4
  363. package/src/duckdb/src/function/scalar/strftime_format.cpp +196 -57
  364. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +9 -7
  365. package/src/duckdb/src/function/scalar/string/concat.cpp +239 -123
  366. package/src/duckdb/src/function/scalar/string/concat_ws.cpp +149 -0
  367. package/src/duckdb/src/function/scalar/string/contains.cpp +18 -7
  368. package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
  369. package/src/duckdb/src/function/scalar/string/substring.cpp +6 -11
  370. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  371. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +7 -3
  372. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
  373. package/src/duckdb/src/function/scalar_function.cpp +5 -2
  374. package/src/duckdb/src/function/scalar_macro_function.cpp +2 -2
  375. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +20 -39
  376. package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +135 -0
  377. package/src/duckdb/src/function/table/arrow.cpp +194 -52
  378. package/src/duckdb/src/function/table/arrow_conversion.cpp +212 -69
  379. package/src/duckdb/src/function/table/copy_csv.cpp +43 -14
  380. package/src/duckdb/src/function/table/query_function.cpp +80 -0
  381. package/src/duckdb/src/function/table/range.cpp +222 -142
  382. package/src/duckdb/src/function/table/read_csv.cpp +25 -13
  383. package/src/duckdb/src/function/table/sniff_csv.cpp +55 -35
  384. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +141 -129
  385. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +25 -14
  386. package/src/duckdb/src/function/table/system/duckdb_functions.cpp +20 -14
  387. package/src/duckdb/src/function/table/system/duckdb_indexes.cpp +15 -1
  388. package/src/duckdb/src/function/table/system/duckdb_variables.cpp +84 -0
  389. package/src/duckdb/src/function/table/system/test_all_types.cpp +1 -0
  390. package/src/duckdb/src/function/table/system/test_vector_types.cpp +33 -3
  391. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  392. package/src/duckdb/src/function/table/table_scan.cpp +45 -22
  393. package/src/duckdb/src/function/table/unnest.cpp +2 -2
  394. package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -4
  395. package/src/duckdb/src/function/table_function.cpp +5 -4
  396. package/src/duckdb/src/function/table_macro_function.cpp +2 -2
  397. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +8 -4
  398. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +5 -2
  399. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +3 -0
  400. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +2 -2
  401. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +3 -4
  402. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +5 -5
  403. package/src/duckdb/src/include/duckdb/catalog/default/builtin_types/types.hpp +2 -1
  404. package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +4 -5
  405. package/src/duckdb/src/include/duckdb/catalog/default/default_generator.hpp +4 -5
  406. package/src/duckdb/src/include/duckdb/catalog/default/default_schemas.hpp +2 -1
  407. package/src/duckdb/src/include/duckdb/catalog/default/default_table_functions.hpp +47 -0
  408. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -0
  409. package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +2 -2
  410. package/src/duckdb/src/include/duckdb/common/allocator.hpp +9 -1
  411. package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +120 -0
  412. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +37 -11
  413. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +9 -8
  414. package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +1 -0
  415. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +6 -4
  416. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +92 -0
  417. package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +2 -2
  418. package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +26 -4
  419. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +90 -11
  420. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +6 -6
  421. package/src/duckdb/src/include/duckdb/common/arrow/arrow_buffer.hpp +8 -1
  422. package/src/duckdb/src/include/duckdb/common/arrow/arrow_merge_event.hpp +62 -0
  423. package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +52 -0
  424. package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +42 -0
  425. package/src/duckdb/src/include/duckdb/common/arrow/physical_arrow_batch_collector.hpp +30 -0
  426. package/src/duckdb/src/include/duckdb/common/arrow/physical_arrow_collector.hpp +65 -0
  427. package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +43 -0
  428. package/src/duckdb/src/include/duckdb/common/bswap.hpp +18 -16
  429. package/src/duckdb/src/include/duckdb/common/cgroups.hpp +30 -0
  430. package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -0
  431. package/src/duckdb/src/include/duckdb/common/dl.hpp +8 -1
  432. package/src/duckdb/src/include/duckdb/common/encryption_state.hpp +48 -0
  433. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +88 -0
  434. package/src/duckdb/src/include/duckdb/common/enums/checkpoint_type.hpp +2 -2
  435. package/src/duckdb/src/include/duckdb/common/enums/copy_overwrite_mode.hpp +6 -1
  436. package/src/duckdb/src/include/duckdb/common/enums/destroy_buffer_upon.hpp +21 -0
  437. package/src/duckdb/src/include/duckdb/common/enums/explain_format.hpp +17 -0
  438. package/src/duckdb/src/include/duckdb/common/enums/file_compression_type.hpp +4 -0
  439. package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +2 -2
  440. package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +88 -0
  441. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +6 -1
  442. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +2 -1
  443. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  444. package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
  445. package/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +3 -1
  446. package/src/duckdb/src/include/duckdb/common/enums/set_scope.hpp +2 -1
  447. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +23 -2
  448. package/src/duckdb/src/include/duckdb/common/enums/stream_execution_result.hpp +25 -0
  449. package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
  450. package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +1 -0
  451. package/src/duckdb/src/include/duckdb/common/error_data.hpp +5 -2
  452. package/src/duckdb/src/include/duckdb/common/exception/binder_exception.hpp +1 -0
  453. package/src/duckdb/src/include/duckdb/common/exception.hpp +20 -2
  454. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +12 -0
  455. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +2 -0
  456. package/src/duckdb/src/include/duckdb/common/file_open_flags.hpp +16 -0
  457. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +18 -0
  458. package/src/duckdb/src/include/duckdb/common/file_system.hpp +3 -0
  459. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +4 -0
  460. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +160 -96
  461. package/src/duckdb/src/include/duckdb/common/fsst.hpp +9 -2
  462. package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -8
  463. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +16 -7
  464. package/src/duckdb/src/include/duckdb/common/http_util.hpp +19 -0
  465. package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +19 -6
  466. package/src/duckdb/src/include/duckdb/common/limits.hpp +9 -2
  467. package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +38 -6
  468. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +9 -2
  469. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +5 -1
  470. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +82 -50
  471. package/src/duckdb/src/include/duckdb/common/operator/abs.hpp +11 -0
  472. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +7 -3
  473. package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +23 -1
  474. package/src/duckdb/src/include/duckdb/common/operator/double_cast_operator.hpp +2 -1
  475. package/src/duckdb/src/include/duckdb/common/operator/integer_cast_operator.hpp +1 -1
  476. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +4 -0
  477. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +2 -0
  478. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +10 -5
  479. package/src/duckdb/src/include/duckdb/common/optionally_owned_ptr.hpp +1 -0
  480. package/src/duckdb/src/include/duckdb/common/owning_string_map.hpp +155 -0
  481. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -3
  482. package/src/duckdb/src/include/duckdb/common/platform.hpp +58 -0
  483. package/src/duckdb/src/include/duckdb/common/radix.hpp +172 -27
  484. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +5 -1
  485. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +1 -0
  486. package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
  487. package/src/duckdb/src/include/duckdb/common/render_tree.hpp +77 -0
  488. package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +12 -0
  489. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +6 -2
  490. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +5 -3
  491. package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +15 -7
  492. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +3 -1
  493. package/src/duckdb/src/include/duckdb/common/serializer/serialization_data.hpp +245 -0
  494. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +10 -0
  495. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +10 -11
  496. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +12 -6
  497. package/src/duckdb/src/include/duckdb/common/string_util.hpp +37 -7
  498. package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +44 -0
  499. package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +44 -0
  500. package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +44 -0
  501. package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +119 -0
  502. package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +9 -123
  503. package/src/duckdb/src/include/duckdb/common/type_visitor.hpp +96 -0
  504. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +11 -1
  505. package/src/duckdb/src/include/duckdb/common/types/arrow_string_view_type.hpp +84 -0
  506. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +36 -1
  507. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +1 -1
  508. package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +2 -2
  509. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +4 -2
  510. package/src/duckdb/src/include/duckdb/common/types/column/partitioned_column_data.hpp +52 -0
  511. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -0
  512. package/src/duckdb/src/include/duckdb/common/types/date.hpp +0 -3
  513. package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +65 -0
  514. package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +5 -2
  515. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +49 -40
  516. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +5 -1
  517. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +2 -1
  518. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
  519. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +4 -3
  520. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +3 -1
  521. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
  522. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
  523. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +1 -1
  524. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +4 -0
  525. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +4 -1
  526. package/src/duckdb/src/include/duckdb/common/types/time.hpp +11 -6
  527. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +13 -3
  528. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +103 -12
  529. package/src/duckdb/src/include/duckdb/common/types/value.hpp +12 -3
  530. package/src/duckdb/src/include/duckdb/common/types/varint.hpp +107 -0
  531. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +5 -1
  532. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +7 -2
  533. package/src/duckdb/src/include/duckdb/common/types.hpp +6 -39
  534. package/src/duckdb/src/include/duckdb/common/union_by_name.hpp +42 -10
  535. package/src/duckdb/src/include/duckdb/common/vector_operations/generic_executor.hpp +29 -0
  536. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +0 -7
  537. package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +2 -0
  538. package/src/duckdb/src/include/duckdb/common/winapi.hpp +8 -0
  539. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +8 -4
  540. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/stddev.hpp +8 -4
  541. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +4 -2
  542. package/src/duckdb/src/include/duckdb/core_functions/aggregate/histogram_helpers.hpp +99 -0
  543. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +16 -7
  544. package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +396 -0
  545. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +10 -0
  546. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_helpers.hpp +65 -0
  547. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_sort_tree.hpp +349 -0
  548. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_state.hpp +300 -0
  549. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +1 -1
  550. package/src/duckdb/src/include/duckdb/core_functions/aggregate/sort_key_helpers.hpp +55 -0
  551. package/src/duckdb/src/include/duckdb/core_functions/array_kernels.hpp +107 -0
  552. package/src/duckdb/src/include/duckdb/core_functions/create_sort_key.hpp +55 -0
  553. package/src/duckdb/src/include/duckdb/core_functions/lambda_functions.hpp +1 -2
  554. package/src/duckdb/src/include/duckdb/core_functions/scalar/array_functions.hpp +24 -0
  555. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +9 -0
  556. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +27 -0
  557. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +80 -8
  558. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +9 -0
  559. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +54 -0
  560. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +30 -21
  561. package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +25 -14
  562. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +2 -48
  563. package/src/duckdb/src/include/duckdb/execution/executor.hpp +25 -2
  564. package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +102 -0
  565. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +94 -101
  566. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +43 -25
  567. package/src/duckdb/src/include/duckdb/execution/index/art/base_leaf.hpp +109 -0
  568. package/src/duckdb/src/include/duckdb/execution/index/art/base_node.hpp +140 -0
  569. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +43 -24
  570. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +41 -52
  571. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +133 -74
  572. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +46 -29
  573. package/src/duckdb/src/include/duckdb/execution/index/art/node256_leaf.hpp +53 -0
  574. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +52 -35
  575. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +96 -57
  576. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +9 -4
  577. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +48 -10
  578. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +0 -2
  579. package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +4 -2
  580. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +114 -36
  581. package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +158 -67
  582. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/aggregate_object.hpp +1 -1
  583. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
  584. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  585. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_streaming_window.hpp +19 -2
  586. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -1
  587. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +1 -1
  588. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +75 -0
  589. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +81 -23
  590. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +27 -8
  591. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +2 -1
  592. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +31 -22
  593. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +4 -2
  594. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +48 -5
  595. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +7 -3
  596. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +22 -12
  597. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +35 -0
  598. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +81 -39
  599. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +2 -1
  600. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +18 -1
  601. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +9 -7
  602. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +5 -4
  603. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/header_value.hpp +26 -0
  604. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +6 -9
  605. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/skip_scanner.hpp +3 -0
  606. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +5 -3
  607. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +36 -19
  608. package/src/duckdb/src/include/duckdb/execution/operator/filter/physical_filter.hpp +1 -1
  609. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +21 -0
  610. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +53 -0
  611. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_collector.hpp +3 -0
  612. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +6 -2
  613. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +18 -0
  614. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
  615. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +6 -0
  616. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +2 -2
  617. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set_variable.hpp +43 -0
  618. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +1 -1
  619. package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +59 -0
  620. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
  621. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_comparison_join.hpp +8 -1
  622. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +5 -2
  623. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +4 -2
  624. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -0
  625. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_join.hpp +1 -1
  626. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_left_delim_join.hpp +3 -1
  627. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +4 -1
  628. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_right_delim_join.hpp +3 -1
  629. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +1 -1
  630. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +1 -1
  631. package/src/duckdb/src/include/duckdb/execution/operator/persistent/batch_memory_manager.hpp +5 -37
  632. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +5 -4
  633. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +8 -2
  634. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +1 -1
  635. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -0
  636. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +9 -3
  637. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +8 -6
  638. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +2 -2
  639. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +1 -1
  640. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  641. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +21 -6
  642. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +3 -2
  643. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  644. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +137 -110
  645. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +57 -126
  646. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +21 -4
  647. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +1 -1
  648. package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +10 -10
  649. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +37 -7
  650. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +24 -11
  651. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +4 -4
  652. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +41 -1
  653. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +15 -5
  654. package/src/duckdb/src/include/duckdb/function/pragma/pragma_functions.hpp +1 -0
  655. package/src/duckdb/src/include/duckdb/function/replacement_scan.hpp +20 -4
  656. package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +6 -0
  657. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +77 -109
  658. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +1 -1
  659. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +6 -3
  660. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +25 -12
  661. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +9 -8
  662. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +38 -4
  663. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +1 -1
  664. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +11 -57
  665. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +142 -0
  666. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_datetime_type.hpp +18 -0
  667. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +7 -0
  668. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_variable_size_type.hpp +10 -0
  669. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
  670. package/src/duckdb/src/include/duckdb/function/table/range.hpp +4 -0
  671. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -1
  672. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  673. package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +5 -5
  674. package/src/duckdb/src/include/duckdb/function/table_function.hpp +14 -2
  675. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +1 -1
  676. package/src/duckdb/src/include/duckdb/main/appender.hpp +14 -4
  677. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +25 -7
  678. package/src/duckdb/src/include/duckdb/main/buffered_data/batched_buffered_data.hpp +79 -0
  679. package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +10 -20
  680. package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +11 -12
  681. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
  682. package/src/duckdb/src/include/duckdb/main/capi/cast/generic.hpp +1 -1
  683. package/src/duckdb/src/include/duckdb/main/capi/cast/utils.hpp +2 -2
  684. package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +809 -0
  685. package/src/duckdb/src/include/duckdb/main/chunk_scan_state/batched_data_collection.hpp +35 -0
  686. package/src/duckdb/src/include/duckdb/main/client_config.hpp +68 -2
  687. package/src/duckdb/src/include/duckdb/main/client_context.hpp +30 -22
  688. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +79 -1
  689. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +9 -3
  690. package/src/duckdb/src/include/duckdb/main/config.hpp +55 -7
  691. package/src/duckdb/src/include/duckdb/main/connection.hpp +5 -1
  692. package/src/duckdb/src/include/duckdb/main/database.hpp +16 -5
  693. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +9 -8
  694. package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +21 -6
  695. package/src/duckdb/src/include/duckdb/main/extension.hpp +20 -0
  696. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -0
  697. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +29 -23
  698. package/src/duckdb/src/include/duckdb/main/extension_install_info.hpp +6 -0
  699. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +3 -0
  700. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +4 -2
  701. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +5 -6
  702. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +2 -5
  703. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +87 -0
  704. package/src/duckdb/src/include/duckdb/main/profiling_node.hpp +60 -0
  705. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +72 -34
  706. package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
  707. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +2 -1
  708. package/src/duckdb/src/include/duckdb/main/relation/delim_get_relation.hpp +30 -0
  709. package/src/duckdb/src/include/duckdb/main/relation/explain_relation.hpp +3 -1
  710. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +3 -0
  711. package/src/duckdb/src/include/duckdb/main/relation/materialized_relation.hpp +1 -4
  712. package/src/duckdb/src/include/duckdb/main/relation/query_relation.hpp +4 -1
  713. package/src/duckdb/src/include/duckdb/main/relation/read_json_relation.hpp +6 -0
  714. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
  715. package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +2 -0
  716. package/src/duckdb/src/include/duckdb/main/relation.hpp +7 -4
  717. package/src/duckdb/src/include/duckdb/main/secret/default_secrets.hpp +36 -0
  718. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +108 -0
  719. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +14 -4
  720. package/src/duckdb/src/include/duckdb/main/settings.hpp +227 -3
  721. package/src/duckdb/src/include/duckdb/main/stream_query_result.hpp +8 -0
  722. package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +51 -0
  723. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +7 -0
  724. package/src/duckdb/src/include/duckdb/optimizer/cte_filter_pusher.hpp +46 -0
  725. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -1
  726. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +7 -0
  727. package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +31 -0
  728. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +51 -10
  729. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +1 -0
  730. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +17 -5
  731. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +1 -1
  732. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +15 -13
  733. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +9 -4
  734. package/src/duckdb/src/include/duckdb/optimizer/limit_pushdown.hpp +25 -0
  735. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +1 -0
  736. package/src/duckdb/src/include/duckdb/optimizer/rule/join_dependent_filter.hpp +37 -0
  737. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +6 -1
  738. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +54 -2
  739. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +27 -8
  740. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -0
  741. package/src/duckdb/src/include/duckdb/parallel/pipeline_prepare_finish_event.hpp +25 -0
  742. package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +63 -0
  743. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +10 -1
  744. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -1
  745. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +5 -0
  746. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +5 -0
  747. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +5 -0
  748. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +2 -0
  749. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +11 -1
  750. package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +9 -0
  751. package/src/duckdb/src/include/duckdb/parser/parsed_expression_iterator.hpp +13 -6
  752. package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +1 -1
  753. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -3
  754. package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +2 -0
  755. package/src/duckdb/src/include/duckdb/parser/statement/explain_statement.hpp +5 -1
  756. package/src/duckdb/src/include/duckdb/parser/statement/set_statement.hpp +2 -2
  757. package/src/duckdb/src/include/duckdb/parser/statement/transaction_statement.hpp +1 -1
  758. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +0 -2
  759. package/src/duckdb/src/include/duckdb/parser/tableref/column_data_ref.hpp +9 -7
  760. package/src/duckdb/src/include/duckdb/parser/tableref/delimgetref.hpp +37 -0
  761. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +4 -0
  762. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +0 -2
  763. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +0 -2
  764. package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +0 -1
  765. package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
  766. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +17 -9
  767. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -14
  768. package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +44 -0
  769. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +1 -1
  770. package/src/duckdb/src/include/duckdb/planner/expression/bound_between_expression.hpp +1 -1
  771. package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +1 -1
  772. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +1 -1
  773. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +1 -1
  774. package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +1 -1
  775. package/src/duckdb/src/include/duckdb/planner/expression/bound_conjunction_expression.hpp +1 -1
  776. package/src/duckdb/src/include/duckdb/planner/expression/bound_constant_expression.hpp +1 -1
  777. package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +1 -1
  778. package/src/duckdb/src/include/duckdb/planner/expression/bound_expanded_expression.hpp +1 -1
  779. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -1
  780. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +1 -1
  781. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +1 -1
  782. package/src/duckdb/src/include/duckdb/planner/expression/bound_operator_expression.hpp +1 -1
  783. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +2 -0
  784. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +1 -1
  785. package/src/duckdb/src/include/duckdb/planner/expression/bound_reference_expression.hpp +1 -1
  786. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +2 -2
  787. package/src/duckdb/src/include/duckdb/planner/expression/bound_unnest_expression.hpp +1 -1
  788. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +1 -1
  789. package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -2
  790. package/src/duckdb/src/include/duckdb/planner/expression_binder/column_alias_binder.hpp +2 -0
  791. package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +1 -0
  792. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +6 -5
  793. package/src/duckdb/src/include/duckdb/planner/expression_binder/where_binder.hpp +1 -0
  794. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +19 -11
  795. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
  796. package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
  797. package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
  798. package/src/duckdb/src/include/duckdb/planner/filter/struct_filter.hpp +2 -0
  799. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +7 -2
  800. package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +2 -1
  801. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -1
  802. package/src/duckdb/src/include/duckdb/planner/operator/logical_any_join.hpp +1 -1
  803. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +6 -1
  804. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +10 -2
  805. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +1 -0
  806. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -1
  807. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +1 -1
  808. package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -1
  809. package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +4 -2
  810. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +15 -5
  811. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +1 -0
  812. package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +1 -1
  813. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -1
  814. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +24 -2
  815. package/src/duckdb/src/include/duckdb/planner/tableref/bound_delimgetref.hpp +26 -0
  816. package/src/duckdb/src/include/duckdb/planner/tableref/bound_joinref.hpp +6 -0
  817. package/src/duckdb/src/include/duckdb/planner/tableref/bound_subqueryref.hpp +1 -1
  818. package/src/duckdb/src/include/duckdb/planner/tableref/bound_table_function.hpp +2 -0
  819. package/src/duckdb/src/include/duckdb/planner/tableref/list.hpp +2 -0
  820. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -1
  821. package/src/duckdb/src/include/duckdb/storage/block.hpp +4 -2
  822. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +48 -3
  823. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +21 -7
  824. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +65 -51
  825. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +14 -5
  826. package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +0 -4
  827. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +3 -2
  828. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +1 -0
  829. package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +3 -4
  830. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +2 -0
  831. package/src/duckdb/src/include/duckdb/storage/compression/alp/algorithm/alp.hpp +4 -4
  832. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +6 -4
  833. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +19 -17
  834. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +2 -2
  835. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_scan.hpp +3 -4
  836. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_utils.hpp +3 -2
  837. package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +3 -2
  838. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +13 -11
  839. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +19 -19
  840. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp +3 -4
  841. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  842. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  843. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +10 -2
  844. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +3 -2
  845. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +15 -0
  846. package/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +14 -10
  847. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +6 -8
  848. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +7 -4
  849. package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -7
  850. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +29 -4
  851. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +22 -7
  852. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +15 -2
  853. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +8 -2
  854. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +5 -16
  855. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +51 -13
  856. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +6 -3
  857. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +29 -19
  858. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +23 -7
  859. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +27 -18
  860. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +6 -3
  861. package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +5 -2
  862. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  863. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +5 -1
  864. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +77 -6
  865. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +23 -11
  866. package/src/duckdb/src/include/duckdb/storage/table/data_table_info.hpp +3 -0
  867. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +5 -2
  868. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +18 -4
  869. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +7 -1
  870. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
  871. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +89 -14
  872. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +4 -2
  873. package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +4 -2
  874. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +2 -2
  875. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -1
  876. package/src/duckdb/src/include/duckdb/storage/temporary_memory_manager.hpp +33 -15
  877. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +9 -9
  878. package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +3 -1
  879. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +4 -16
  880. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +27 -4
  881. package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +11 -0
  882. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +6 -2
  883. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +5 -5
  884. package/src/duckdb/src/include/duckdb/transaction/transaction_context.hpp +6 -2
  885. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +5 -3
  886. package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +48 -0
  887. package/src/duckdb/src/include/duckdb.h +1779 -739
  888. package/src/duckdb/src/include/duckdb_extension.h +921 -0
  889. package/src/duckdb/src/main/appender.cpp +53 -7
  890. package/src/duckdb/src/main/attached_database.cpp +87 -17
  891. package/src/duckdb/src/main/buffered_data/batched_buffered_data.cpp +226 -0
  892. package/src/duckdb/src/main/buffered_data/buffered_data.cpp +35 -0
  893. package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +48 -23
  894. package/src/duckdb/src/main/capi/aggregate_function-c.cpp +327 -0
  895. package/src/duckdb/src/main/capi/appender-c.cpp +18 -0
  896. package/src/duckdb/src/main/capi/cast/utils-c.cpp +2 -2
  897. package/src/duckdb/src/main/capi/cast_function-c.cpp +210 -0
  898. package/src/duckdb/src/main/capi/config-c.cpp +3 -3
  899. package/src/duckdb/src/main/capi/data_chunk-c.cpp +18 -7
  900. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +223 -24
  901. package/src/duckdb/src/main/capi/helper-c.cpp +51 -11
  902. package/src/duckdb/src/main/capi/logical_types-c.cpp +105 -46
  903. package/src/duckdb/src/main/capi/pending-c.cpp +7 -6
  904. package/src/duckdb/src/main/capi/prepared-c.cpp +18 -7
  905. package/src/duckdb/src/main/capi/profiling_info-c.cpp +84 -0
  906. package/src/duckdb/src/main/capi/result-c.cpp +139 -37
  907. package/src/duckdb/src/main/capi/scalar_function-c.cpp +269 -0
  908. package/src/duckdb/src/main/capi/table_description-c.cpp +82 -0
  909. package/src/duckdb/src/main/capi/table_function-c.cpp +161 -95
  910. package/src/duckdb/src/main/capi/value-c.cpp +2 -2
  911. package/src/duckdb/src/main/chunk_scan_state/batched_data_collection.cpp +57 -0
  912. package/src/duckdb/src/main/client_config.cpp +17 -0
  913. package/src/duckdb/src/main/client_context.cpp +67 -52
  914. package/src/duckdb/src/main/client_data.cpp +3 -3
  915. package/src/duckdb/src/main/config.cpp +120 -62
  916. package/src/duckdb/src/main/connection.cpp +14 -2
  917. package/src/duckdb/src/main/database.cpp +96 -35
  918. package/src/duckdb/src/main/database_manager.cpp +25 -23
  919. package/src/duckdb/src/main/database_path_and_type.cpp +2 -2
  920. package/src/duckdb/src/main/db_instance_cache.cpp +54 -19
  921. package/src/duckdb/src/main/extension/extension_helper.cpp +47 -42
  922. package/src/duckdb/src/main/extension/extension_install.cpp +155 -87
  923. package/src/duckdb/src/main/extension/extension_load.cpp +180 -26
  924. package/src/duckdb/src/main/extension/extension_util.cpp +8 -0
  925. package/src/duckdb/src/main/extension.cpp +72 -5
  926. package/src/duckdb/src/main/pending_query_result.cpp +20 -12
  927. package/src/duckdb/src/main/prepared_statement.cpp +6 -6
  928. package/src/duckdb/src/main/prepared_statement_data.cpp +28 -17
  929. package/src/duckdb/src/main/profiling_info.cpp +196 -0
  930. package/src/duckdb/src/main/query_profiler.cpp +413 -224
  931. package/src/duckdb/src/main/query_result.cpp +1 -1
  932. package/src/duckdb/src/main/relation/create_table_relation.cpp +4 -2
  933. package/src/duckdb/src/main/relation/create_view_relation.cpp +0 -6
  934. package/src/duckdb/src/main/relation/delim_get_relation.cpp +44 -0
  935. package/src/duckdb/src/main/relation/explain_relation.cpp +4 -3
  936. package/src/duckdb/src/main/relation/join_relation.cpp +5 -0
  937. package/src/duckdb/src/main/relation/limit_relation.cpp +1 -1
  938. package/src/duckdb/src/main/relation/materialized_relation.cpp +3 -3
  939. package/src/duckdb/src/main/relation/query_relation.cpp +42 -15
  940. package/src/duckdb/src/main/relation/read_csv_relation.cpp +7 -14
  941. package/src/duckdb/src/main/relation/read_json_relation.cpp +20 -0
  942. package/src/duckdb/src/main/relation/setop_relation.cpp +1 -1
  943. package/src/duckdb/src/main/relation/table_function_relation.cpp +6 -0
  944. package/src/duckdb/src/main/relation/view_relation.cpp +10 -0
  945. package/src/duckdb/src/main/relation.cpp +12 -8
  946. package/src/duckdb/src/main/secret/default_secrets.cpp +108 -0
  947. package/src/duckdb/src/main/secret/secret.cpp +145 -2
  948. package/src/duckdb/src/main/secret/secret_manager.cpp +85 -35
  949. package/src/duckdb/src/main/secret/secret_storage.cpp +29 -17
  950. package/src/duckdb/src/main/settings/settings.cpp +503 -11
  951. package/src/duckdb/src/main/stream_query_result.cpp +75 -2
  952. package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +248 -0
  953. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +28 -6
  954. package/src/duckdb/src/optimizer/compressed_materialization/compress_comparison_join.cpp +152 -0
  955. package/src/duckdb/src/optimizer/compressed_materialization.cpp +11 -1
  956. package/src/duckdb/src/optimizer/cse_optimizer.cpp +3 -0
  957. package/src/duckdb/src/optimizer/cte_filter_pusher.cpp +117 -0
  958. package/src/duckdb/src/optimizer/filter_combiner.cpp +30 -9
  959. package/src/duckdb/src/optimizer/filter_pullup.cpp +54 -2
  960. package/src/duckdb/src/optimizer/filter_pushdown.cpp +71 -3
  961. package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +154 -0
  962. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +245 -114
  963. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +42 -20
  964. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +6 -2
  965. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +32 -10
  966. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +97 -131
  967. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +265 -51
  968. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +21 -17
  969. package/src/duckdb/src/optimizer/limit_pushdown.cpp +42 -0
  970. package/src/duckdb/src/optimizer/optimizer.cpp +51 -8
  971. package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +17 -17
  972. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +22 -4
  973. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +1 -18
  974. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +6 -0
  975. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +4 -2
  976. package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +91 -0
  977. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +21 -25
  978. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +1 -0
  979. package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +3 -0
  980. package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +2 -2
  981. package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +8 -2
  982. package/src/duckdb/src/optimizer/rule/join_dependent_filter.cpp +135 -0
  983. package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +1 -1
  984. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +1 -1
  985. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +6 -1
  986. package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +7 -6
  987. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -1
  988. package/src/duckdb/src/optimizer/topn_optimizer.cpp +46 -7
  989. package/src/duckdb/src/parallel/executor.cpp +129 -51
  990. package/src/duckdb/src/parallel/executor_task.cpp +16 -3
  991. package/src/duckdb/src/parallel/meta_pipeline.cpp +98 -29
  992. package/src/duckdb/src/parallel/pipeline.cpp +17 -3
  993. package/src/duckdb/src/parallel/pipeline_executor.cpp +14 -2
  994. package/src/duckdb/src/parallel/pipeline_prepare_finish_event.cpp +34 -0
  995. package/src/duckdb/src/parallel/task_executor.cpp +84 -0
  996. package/src/duckdb/src/parallel/task_scheduler.cpp +94 -16
  997. package/src/duckdb/src/parallel/thread_context.cpp +1 -1
  998. package/src/duckdb/src/parser/expression/function_expression.cpp +14 -0
  999. package/src/duckdb/src/parser/expression/star_expression.cpp +35 -2
  1000. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +5 -1
  1001. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +17 -0
  1002. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +37 -28
  1003. package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +44 -2
  1004. package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +21 -1
  1005. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +29 -25
  1006. package/src/duckdb/src/parser/parser.cpp +41 -1
  1007. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +1 -0
  1008. package/src/duckdb/src/parser/statement/explain_statement.cpp +28 -13
  1009. package/src/duckdb/src/parser/statement/relation_statement.cpp +5 -0
  1010. package/src/duckdb/src/parser/statement/set_statement.cpp +4 -2
  1011. package/src/duckdb/src/parser/statement/transaction_statement.cpp +3 -3
  1012. package/src/duckdb/src/parser/tableref/column_data_ref.cpp +1 -27
  1013. package/src/duckdb/src/parser/tableref/delimgetref.cpp +30 -0
  1014. package/src/duckdb/src/parser/tableref/joinref.cpp +4 -0
  1015. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +35 -29
  1016. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +32 -32
  1017. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +2 -1
  1018. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +17 -0
  1019. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +5 -0
  1020. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +36 -34
  1021. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +30 -14
  1022. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
  1023. package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +2 -1
  1024. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +27 -19
  1025. package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +31 -28
  1026. package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +25 -27
  1027. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +1 -1
  1028. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +53 -42
  1029. package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +6 -6
  1030. package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +1 -1
  1031. package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
  1032. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
  1033. package/src/duckdb/src/parser/transform/statement/transform_explain.cpp +38 -3
  1034. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +1 -2
  1035. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +1 -1
  1036. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +1 -1
  1037. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +26 -21
  1038. package/src/duckdb/src/parser/transform/statement/transform_set.cpp +8 -8
  1039. package/src/duckdb/src/parser/transform/statement/transform_show.cpp +5 -2
  1040. package/src/duckdb/src/parser/transform/statement/transform_show_select.cpp +6 -4
  1041. package/src/duckdb/src/parser/transform/statement/transform_transaction.cpp +27 -6
  1042. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +8 -9
  1043. package/src/duckdb/src/parser/transform/statement/transform_upsert.cpp +11 -12
  1044. package/src/duckdb/src/parser/transform/statement/transform_vacuum.cpp +3 -3
  1045. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +16 -10
  1046. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +1 -1
  1047. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  1048. package/src/duckdb/src/parser/transformer.cpp +11 -7
  1049. package/src/duckdb/src/planner/bind_context.cpp +3 -3
  1050. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +22 -7
  1051. package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +3 -3
  1052. package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +3 -2
  1053. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +11 -4
  1054. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +9 -54
  1055. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +3 -5
  1056. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -27
  1057. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +7 -7
  1058. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +9 -2
  1059. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +26 -7
  1060. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +5 -0
  1061. package/src/duckdb/src/planner/binder/expression/bind_unpacked_star_expression.cpp +91 -0
  1062. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +2 -2
  1063. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -8
  1064. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +1 -1
  1065. package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +6 -10
  1066. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +14 -10
  1067. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +3 -3
  1068. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +46 -7
  1069. package/src/duckdb/src/planner/binder/statement/bind_call.cpp +13 -20
  1070. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +105 -13
  1071. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +7 -3
  1072. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +75 -55
  1073. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -1
  1074. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +5 -4
  1075. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +2 -2
  1076. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +24 -8
  1077. package/src/duckdb/src/planner/binder/statement/bind_explain.cpp +2 -2
  1078. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +5 -105
  1079. package/src/duckdb/src/planner/binder/statement/bind_extension.cpp +2 -2
  1080. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +109 -41
  1081. package/src/duckdb/src/planner/binder/statement/bind_set.cpp +23 -7
  1082. package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +4 -1
  1083. package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +17 -3
  1084. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -4
  1085. package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +8 -6
  1086. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +55 -42
  1087. package/src/duckdb/src/planner/binder/tableref/bind_column_data_ref.cpp +3 -2
  1088. package/src/duckdb/src/planner/binder/tableref/bind_delimgetref.cpp +16 -0
  1089. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +31 -1
  1090. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +6 -0
  1091. package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +2 -0
  1092. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +106 -46
  1093. package/src/duckdb/src/planner/binder/tableref/plan_delimgetref.cpp +11 -0
  1094. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +15 -2
  1095. package/src/duckdb/src/planner/binder/tableref/plan_table_function.cpp +4 -0
  1096. package/src/duckdb/src/planner/binder.cpp +172 -15
  1097. package/src/duckdb/src/planner/collation_binding.cpp +99 -0
  1098. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -4
  1099. package/src/duckdb/src/planner/expression/bound_between_expression.cpp +1 -1
  1100. package/src/duckdb/src/planner/expression/bound_case_expression.cpp +1 -1
  1101. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +14 -12
  1102. package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +1 -1
  1103. package/src/duckdb/src/planner/expression/bound_comparison_expression.cpp +1 -1
  1104. package/src/duckdb/src/planner/expression/bound_conjunction_expression.cpp +1 -1
  1105. package/src/duckdb/src/planner/expression/bound_constant_expression.cpp +1 -1
  1106. package/src/duckdb/src/planner/expression/bound_expanded_expression.cpp +1 -1
  1107. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -2
  1108. package/src/duckdb/src/planner/expression/bound_lambda_expression.cpp +1 -1
  1109. package/src/duckdb/src/planner/expression/bound_lambdaref_expression.cpp +1 -1
  1110. package/src/duckdb/src/planner/expression/bound_operator_expression.cpp +1 -1
  1111. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +1 -1
  1112. package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +1 -1
  1113. package/src/duckdb/src/planner/expression/bound_subquery_expression.cpp +1 -1
  1114. package/src/duckdb/src/planner/expression/bound_unnest_expression.cpp +1 -1
  1115. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +6 -6
  1116. package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
  1117. package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -2
  1118. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +1 -1
  1119. package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +7 -0
  1120. package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +3 -3
  1121. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +26 -22
  1122. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +7 -1
  1123. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +2 -2
  1124. package/src/duckdb/src/planner/expression_binder/insert_binder.cpp +2 -2
  1125. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +2 -2
  1126. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +61 -43
  1127. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +2 -2
  1128. package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +4 -4
  1129. package/src/duckdb/src/planner/expression_binder/returning_binder.cpp +3 -2
  1130. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +10 -3
  1131. package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
  1132. package/src/duckdb/src/planner/expression_binder/where_binder.cpp +9 -2
  1133. package/src/duckdb/src/planner/expression_binder.cpp +121 -21
  1134. package/src/duckdb/src/planner/expression_iterator.cpp +26 -1
  1135. package/src/duckdb/src/planner/filter/conjunction_filter.cpp +33 -0
  1136. package/src/duckdb/src/planner/filter/constant_filter.cpp +15 -0
  1137. package/src/duckdb/src/planner/filter/null_filter.cpp +22 -0
  1138. package/src/duckdb/src/planner/filter/struct_filter.cpp +16 -0
  1139. package/src/duckdb/src/planner/logical_operator.cpp +24 -7
  1140. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +13 -7
  1141. package/src/duckdb/src/planner/operator/logical_any_join.cpp +5 -2
  1142. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +13 -5
  1143. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +64 -8
  1144. package/src/duckdb/src/planner/operator/logical_cteref.cpp +7 -0
  1145. package/src/duckdb/src/planner/operator/logical_distinct.cpp +6 -5
  1146. package/src/duckdb/src/planner/operator/logical_get.cpp +60 -18
  1147. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +7 -0
  1148. package/src/duckdb/src/planner/operator/logical_order.cpp +7 -4
  1149. package/src/duckdb/src/planner/operator/logical_top_n.cpp +2 -2
  1150. package/src/duckdb/src/planner/operator/logical_vacuum.cpp +1 -1
  1151. package/src/duckdb/src/planner/planner.cpp +2 -3
  1152. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +27 -10
  1153. package/src/duckdb/src/planner/table_filter.cpp +51 -0
  1154. package/src/duckdb/src/storage/arena_allocator.cpp +28 -10
  1155. package/src/duckdb/src/storage/block.cpp +3 -2
  1156. package/src/duckdb/src/storage/buffer/block_handle.cpp +29 -14
  1157. package/src/duckdb/src/storage/buffer/block_manager.cpp +6 -5
  1158. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +1 -1
  1159. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +264 -125
  1160. package/src/duckdb/src/storage/buffer_manager.cpp +5 -1
  1161. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +0 -6
  1162. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +26 -3
  1163. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +21 -9
  1164. package/src/duckdb/src/storage/checkpoint_manager.cpp +49 -24
  1165. package/src/duckdb/src/storage/compression/alp/alp.cpp +6 -11
  1166. package/src/duckdb/src/storage/compression/alprd.cpp +5 -9
  1167. package/src/duckdb/src/storage/compression/bitpacking.cpp +35 -31
  1168. package/src/duckdb/src/storage/compression/chimp/chimp.cpp +6 -8
  1169. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +71 -58
  1170. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +15 -13
  1171. package/src/duckdb/src/storage/compression/fsst.cpp +66 -53
  1172. package/src/duckdb/src/storage/compression/numeric_constant.cpp +4 -5
  1173. package/src/duckdb/src/storage/compression/patas.cpp +6 -17
  1174. package/src/duckdb/src/storage/compression/rle.cpp +20 -18
  1175. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +71 -52
  1176. package/src/duckdb/src/storage/compression/uncompressed.cpp +2 -2
  1177. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +8 -7
  1178. package/src/duckdb/src/storage/data_pointer.cpp +22 -0
  1179. package/src/duckdb/src/storage/data_table.cpp +41 -12
  1180. package/src/duckdb/src/storage/local_storage.cpp +22 -8
  1181. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +33 -17
  1182. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +4 -4
  1183. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +3 -3
  1184. package/src/duckdb/src/storage/partial_block_manager.cpp +19 -8
  1185. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +11 -8
  1186. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +1 -1
  1187. package/src/duckdb/src/storage/serialization/serialize_extension_install_info.cpp +2 -0
  1188. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +3 -3
  1189. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +19 -5
  1190. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +21 -1
  1191. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +4 -2
  1192. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +2 -2
  1193. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +2 -0
  1194. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +8 -4
  1195. package/src/duckdb/src/storage/serialization/serialize_types.cpp +4 -4
  1196. package/src/duckdb/src/storage/single_file_block_manager.cpp +170 -34
  1197. package/src/duckdb/src/storage/standard_buffer_manager.cpp +221 -64
  1198. package/src/duckdb/src/storage/statistics/column_statistics.cpp +4 -3
  1199. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +36 -26
  1200. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +4 -15
  1201. package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -8
  1202. package/src/duckdb/src/storage/statistics/struct_stats.cpp +2 -1
  1203. package/src/duckdb/src/storage/storage_info.cpp +34 -9
  1204. package/src/duckdb/src/storage/storage_manager.cpp +147 -74
  1205. package/src/duckdb/src/storage/table/array_column_data.cpp +37 -17
  1206. package/src/duckdb/src/storage/table/chunk_info.cpp +38 -0
  1207. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -6
  1208. package/src/duckdb/src/storage/table/column_data.cpp +252 -31
  1209. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -12
  1210. package/src/duckdb/src/storage/table/column_segment.cpp +63 -34
  1211. package/src/duckdb/src/storage/table/list_column_data.cpp +34 -15
  1212. package/src/duckdb/src/storage/table/row_group.cpp +228 -120
  1213. package/src/duckdb/src/storage/table/row_group_collection.cpp +122 -120
  1214. package/src/duckdb/src/storage/table/row_version_manager.cpp +27 -1
  1215. package/src/duckdb/src/storage/table/scan_state.cpp +101 -18
  1216. package/src/duckdb/src/storage/table/standard_column_data.cpp +20 -34
  1217. package/src/duckdb/src/storage/table/struct_column_data.cpp +39 -42
  1218. package/src/duckdb/src/storage/table/table_statistics.cpp +2 -1
  1219. package/src/duckdb/src/storage/table/update_segment.cpp +9 -8
  1220. package/src/duckdb/src/storage/table/validity_column_data.cpp +2 -2
  1221. package/src/duckdb/src/storage/table_index_list.cpp +8 -7
  1222. package/src/duckdb/src/storage/temporary_file_manager.cpp +11 -9
  1223. package/src/duckdb/src/storage/temporary_memory_manager.cpp +227 -39
  1224. package/src/duckdb/src/storage/wal_replay.cpp +68 -28
  1225. package/src/duckdb/src/storage/write_ahead_log.cpp +56 -47
  1226. package/src/duckdb/src/transaction/cleanup_state.cpp +9 -1
  1227. package/src/duckdb/src/transaction/commit_state.cpp +7 -170
  1228. package/src/duckdb/src/transaction/duck_transaction.cpp +87 -19
  1229. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +65 -10
  1230. package/src/duckdb/src/transaction/meta_transaction.cpp +18 -3
  1231. package/src/duckdb/src/transaction/transaction_context.cpp +21 -17
  1232. package/src/duckdb/src/transaction/undo_buffer.cpp +20 -14
  1233. package/src/duckdb/src/transaction/wal_write_state.cpp +292 -0
  1234. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +0 -1
  1235. package/src/duckdb/third_party/brotli/common/brotli_constants.h +204 -0
  1236. package/src/duckdb/third_party/brotli/common/brotli_platform.h +543 -0
  1237. package/src/duckdb/third_party/brotli/common/constants.cpp +17 -0
  1238. package/src/duckdb/third_party/brotli/common/context.cpp +156 -0
  1239. package/src/duckdb/third_party/brotli/common/context.h +110 -0
  1240. package/src/duckdb/third_party/brotli/common/dictionary.cpp +5912 -0
  1241. package/src/duckdb/third_party/brotli/common/dictionary.h +60 -0
  1242. package/src/duckdb/third_party/brotli/common/platform.cpp +24 -0
  1243. package/src/duckdb/third_party/brotli/common/shared_dictionary.cpp +517 -0
  1244. package/src/duckdb/third_party/brotli/common/shared_dictionary_internal.h +71 -0
  1245. package/src/duckdb/third_party/brotli/common/transform.cpp +287 -0
  1246. package/src/duckdb/third_party/brotli/common/transform.h +77 -0
  1247. package/src/duckdb/third_party/brotli/common/version.h +51 -0
  1248. package/src/duckdb/third_party/brotli/dec/bit_reader.cpp +74 -0
  1249. package/src/duckdb/third_party/brotli/dec/bit_reader.h +419 -0
  1250. package/src/duckdb/third_party/brotli/dec/decode.cpp +2758 -0
  1251. package/src/duckdb/third_party/brotli/dec/huffman.cpp +338 -0
  1252. package/src/duckdb/third_party/brotli/dec/huffman.h +118 -0
  1253. package/src/duckdb/third_party/brotli/dec/prefix.h +733 -0
  1254. package/src/duckdb/third_party/brotli/dec/state.cpp +178 -0
  1255. package/src/duckdb/third_party/brotli/dec/state.h +386 -0
  1256. package/src/duckdb/third_party/brotli/enc/backward_references.cpp +3775 -0
  1257. package/src/duckdb/third_party/brotli/enc/backward_references.h +36 -0
  1258. package/src/duckdb/third_party/brotli/enc/backward_references_hq.cpp +935 -0
  1259. package/src/duckdb/third_party/brotli/enc/backward_references_hq.h +92 -0
  1260. package/src/duckdb/third_party/brotli/enc/bit_cost.cpp +410 -0
  1261. package/src/duckdb/third_party/brotli/enc/bit_cost.h +60 -0
  1262. package/src/duckdb/third_party/brotli/enc/block_splitter.cpp +1653 -0
  1263. package/src/duckdb/third_party/brotli/enc/block_splitter.h +48 -0
  1264. package/src/duckdb/third_party/brotli/enc/brotli_bit_stream.cpp +1431 -0
  1265. package/src/duckdb/third_party/brotli/enc/brotli_bit_stream.h +85 -0
  1266. package/src/duckdb/third_party/brotli/enc/brotli_hash.h +4352 -0
  1267. package/src/duckdb/third_party/brotli/enc/brotli_params.h +47 -0
  1268. package/src/duckdb/third_party/brotli/enc/cluster.cpp +1025 -0
  1269. package/src/duckdb/third_party/brotli/enc/cluster.h +1017 -0
  1270. package/src/duckdb/third_party/brotli/enc/command.cpp +24 -0
  1271. package/src/duckdb/third_party/brotli/enc/command.h +187 -0
  1272. package/src/duckdb/third_party/brotli/enc/compound_dictionary.cpp +209 -0
  1273. package/src/duckdb/third_party/brotli/enc/compound_dictionary.h +75 -0
  1274. package/src/duckdb/third_party/brotli/enc/compress_fragment.cpp +796 -0
  1275. package/src/duckdb/third_party/brotli/enc/compress_fragment.h +82 -0
  1276. package/src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.cpp +653 -0
  1277. package/src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.h +68 -0
  1278. package/src/duckdb/third_party/brotli/enc/dictionary_hash.cpp +1844 -0
  1279. package/src/duckdb/third_party/brotli/enc/dictionary_hash.h +21 -0
  1280. package/src/duckdb/third_party/brotli/enc/encode.cpp +1990 -0
  1281. package/src/duckdb/third_party/brotli/enc/encoder_dict.cpp +636 -0
  1282. package/src/duckdb/third_party/brotli/enc/encoder_dict.h +153 -0
  1283. package/src/duckdb/third_party/brotli/enc/entropy_encode.cpp +500 -0
  1284. package/src/duckdb/third_party/brotli/enc/entropy_encode.h +119 -0
  1285. package/src/duckdb/third_party/brotli/enc/entropy_encode_static.h +538 -0
  1286. package/src/duckdb/third_party/brotli/enc/fast_log.cpp +101 -0
  1287. package/src/duckdb/third_party/brotli/enc/fast_log.h +63 -0
  1288. package/src/duckdb/third_party/brotli/enc/find_match_length.h +68 -0
  1289. package/src/duckdb/third_party/brotli/enc/histogram.cpp +96 -0
  1290. package/src/duckdb/third_party/brotli/enc/histogram.h +210 -0
  1291. package/src/duckdb/third_party/brotli/enc/literal_cost.cpp +176 -0
  1292. package/src/duckdb/third_party/brotli/enc/literal_cost.h +28 -0
  1293. package/src/duckdb/third_party/brotli/enc/memory.cpp +190 -0
  1294. package/src/duckdb/third_party/brotli/enc/memory.h +127 -0
  1295. package/src/duckdb/third_party/brotli/enc/metablock.cpp +1225 -0
  1296. package/src/duckdb/third_party/brotli/enc/metablock.h +102 -0
  1297. package/src/duckdb/third_party/brotli/enc/prefix.h +50 -0
  1298. package/src/duckdb/third_party/brotli/enc/quality.h +202 -0
  1299. package/src/duckdb/third_party/brotli/enc/ringbuffer.h +164 -0
  1300. package/src/duckdb/third_party/brotli/enc/state.h +106 -0
  1301. package/src/duckdb/third_party/brotli/enc/static_dict.cpp +538 -0
  1302. package/src/duckdb/third_party/brotli/enc/static_dict.h +37 -0
  1303. package/src/duckdb/third_party/brotli/enc/static_dict_lut.h +5862 -0
  1304. package/src/duckdb/third_party/brotli/enc/utf8_util.cpp +81 -0
  1305. package/src/duckdb/third_party/brotli/enc/utf8_util.h +29 -0
  1306. package/src/duckdb/third_party/brotli/enc/write_bits.h +84 -0
  1307. package/src/duckdb/third_party/brotli/include/brotli/decode.h +405 -0
  1308. package/src/duckdb/third_party/brotli/include/brotli/encode.h +489 -0
  1309. package/src/duckdb/third_party/brotli/include/brotli/port.h +238 -0
  1310. package/src/duckdb/third_party/brotli/include/brotli/shared_dictionary.h +96 -0
  1311. package/src/duckdb/third_party/brotli/include/brotli/types.h +83 -0
  1312. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +20 -4
  1313. package/src/duckdb/third_party/fmt/include/fmt/format.h +54 -10
  1314. package/src/duckdb/third_party/fsst/fsst.h +2 -2
  1315. package/src/duckdb/third_party/fsst/libfsst.hpp +2 -2
  1316. package/src/duckdb/third_party/httplib/httplib.hpp +6763 -5580
  1317. package/src/duckdb/third_party/hyperloglog/hyperloglog.cpp +13 -30
  1318. package/src/duckdb/third_party/hyperloglog/hyperloglog.hpp +8 -2
  1319. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +1 -0
  1320. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +22 -9
  1321. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1041 -554
  1322. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +1 -0
  1323. package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +2 -1
  1324. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +21605 -21752
  1325. package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +538 -299
  1326. package/src/duckdb/third_party/mbedtls/include/mbedtls/mbedtls_config.h +1 -0
  1327. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +36 -12
  1328. package/src/duckdb/third_party/mbedtls/library/md.cpp +6 -6
  1329. package/src/duckdb/third_party/mbedtls/library/sha1.cpp +2 -0
  1330. package/src/duckdb/third_party/mbedtls/library/sha256.cpp +3 -0
  1331. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +99 -47
  1332. package/src/duckdb/third_party/pcg/pcg_extras.hpp +1 -1
  1333. package/src/duckdb/third_party/re2/re2/prog.cc +2 -2
  1334. package/src/duckdb/third_party/snappy/snappy-internal.h +398 -0
  1335. package/src/duckdb/third_party/snappy/snappy-sinksource.cc +111 -9
  1336. package/src/duckdb/third_party/snappy/snappy-sinksource.h +158 -0
  1337. package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +523 -3
  1338. package/src/duckdb/third_party/snappy/snappy-stubs-public.h +34 -1
  1339. package/src/duckdb/third_party/snappy/snappy.cc +2626 -0
  1340. package/src/duckdb/third_party/snappy/snappy.h +223 -0
  1341. package/src/duckdb/third_party/snappy/snappy_version.hpp +11 -0
  1342. package/src/duckdb/third_party/utf8proc/include/utf8proc.hpp +69 -101
  1343. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +53 -0
  1344. package/src/duckdb/third_party/utf8proc/utf8proc.cpp +627 -678
  1345. package/src/duckdb/third_party/utf8proc/utf8proc_data.cpp +15008 -12868
  1346. package/src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp +185 -29
  1347. package/src/duckdb/ub_extension_json_json_functions.cpp +6 -0
  1348. package/src/duckdb/ub_src_catalog_default.cpp +4 -0
  1349. package/src/duckdb/ub_src_common.cpp +7 -1
  1350. package/src/duckdb/ub_src_common_arrow.cpp +10 -0
  1351. package/src/duckdb/ub_src_common_enums.cpp +2 -0
  1352. package/src/duckdb/ub_src_common_tree_renderer.cpp +10 -0
  1353. package/src/duckdb/ub_src_common_types.cpp +2 -0
  1354. package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +4 -0
  1355. package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +2 -0
  1356. package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +2 -0
  1357. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +2 -4
  1358. package/src/duckdb/ub_src_core_functions_scalar_map.cpp +2 -0
  1359. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +4 -0
  1360. package/src/duckdb/ub_src_execution_index_art.cpp +5 -3
  1361. package/src/duckdb/ub_src_execution_operator_csv_scanner_scanner.cpp +2 -0
  1362. package/src/duckdb/ub_src_execution_operator_helper.cpp +4 -0
  1363. package/src/duckdb/ub_src_function.cpp +4 -0
  1364. package/src/duckdb/ub_src_function_cast.cpp +2 -0
  1365. package/src/duckdb/ub_src_function_scalar_generic.cpp +4 -0
  1366. package/src/duckdb/ub_src_function_scalar_list.cpp +0 -2
  1367. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  1368. package/src/duckdb/ub_src_function_table.cpp +2 -0
  1369. package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
  1370. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  1371. package/src/duckdb/ub_src_main.cpp +4 -0
  1372. package/src/duckdb/ub_src_main_buffered_data.cpp +4 -0
  1373. package/src/duckdb/ub_src_main_capi.cpp +10 -0
  1374. package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
  1375. package/src/duckdb/ub_src_main_relation.cpp +2 -0
  1376. package/src/duckdb/ub_src_main_secret.cpp +2 -0
  1377. package/src/duckdb/ub_src_optimizer.cpp +8 -0
  1378. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +2 -0
  1379. package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
  1380. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  1381. package/src/duckdb/ub_src_parallel.cpp +4 -0
  1382. package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
  1383. package/src/duckdb/ub_src_planner.cpp +2 -0
  1384. package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
  1385. package/src/duckdb/ub_src_planner_binder_tableref.cpp +4 -0
  1386. package/src/duckdb/ub_src_storage_statistics.cpp +0 -2
  1387. package/src/duckdb/ub_src_transaction.cpp +2 -0
  1388. package/test/columns.test.ts +1 -1
  1389. package/test/prepare.test.ts +1 -1
  1390. package/test/test_all_types.test.ts +1 -1
@@ -14,18 +14,18 @@
14
14
  #include <algorithm>
15
15
 
16
16
  namespace duckdb {
17
-
18
17
  StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_machine,
19
18
  const shared_ptr<CSVBufferHandle> &buffer_handle, Allocator &buffer_allocator,
20
- bool figure_out_new_line_p, idx_t buffer_position, CSVErrorHandler &error_hander_p,
19
+ idx_t result_size_p, idx_t buffer_position, CSVErrorHandler &error_hander_p,
21
20
  CSVIterator &iterator_p, bool store_line_size_p,
22
- shared_ptr<CSVFileScan> csv_file_scan_p, idx_t &lines_read_p, bool sniffing_p)
23
- : ScannerResult(states, state_machine),
21
+ shared_ptr<CSVFileScan> csv_file_scan_p, idx_t &lines_read_p, bool sniffing_p,
22
+ string path_p)
23
+ : ScannerResult(states, state_machine, result_size_p),
24
24
  number_of_columns(NumericCast<uint32_t>(state_machine.dialect_options.num_cols)),
25
25
  null_padding(state_machine.options.null_padding), ignore_errors(state_machine.options.ignore_errors.GetValue()),
26
- figure_out_new_line(figure_out_new_line_p), error_handler(error_hander_p), iterator(iterator_p),
27
- store_line_size(store_line_size_p), csv_file_scan(std::move(csv_file_scan_p)), lines_read(lines_read_p),
28
- current_errors(state_machine.options.IgnoreErrors()), sniffing(sniffing_p) {
26
+ error_handler(error_hander_p), iterator(iterator_p), store_line_size(store_line_size_p),
27
+ csv_file_scan(std::move(csv_file_scan_p)), lines_read(lines_read_p),
28
+ current_errors(state_machine.options.IgnoreErrors()), sniffing(sniffing_p), path(std::move(path_p)) {
29
29
  // Vector information
30
30
  D_ASSERT(number_of_columns > 0);
31
31
  buffer_handles[buffer_handle->buffer_idx] = buffer_handle;
@@ -34,8 +34,6 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
34
34
  buffer_size = buffer_handle->actual_size;
35
35
  last_position = {buffer_handle->buffer_idx, buffer_position, buffer_size};
36
36
  requested_size = buffer_handle->requested_size;
37
- result_size = figure_out_new_line ? 1 : STANDARD_VECTOR_SIZE;
38
-
39
37
  // Current Result information
40
38
  current_line_position.begin = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, buffer_handle->actual_size};
41
39
  current_line_position.end = current_line_position.begin;
@@ -56,9 +54,13 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
56
54
  "Mismatch between the number of columns (%d) in the CSV file and what is expected in the scanner (%d).",
57
55
  number_of_columns, csv_file_scan->file_types.size());
58
56
  }
57
+ bool icu_loaded = csv_file_scan->buffer_manager->context.db->ExtensionIsLoaded("icu");
59
58
  for (idx_t i = 0; i < csv_file_scan->file_types.size(); i++) {
60
59
  auto &type = csv_file_scan->file_types[i];
61
- if (StringValueScanner::CanDirectlyCast(type)) {
60
+ if (type.IsJSONType()) {
61
+ type = LogicalType::VARCHAR;
62
+ }
63
+ if (StringValueScanner::CanDirectlyCast(type, icu_loaded)) {
62
64
  parse_types[i] = ParseTypeInfo(type, true);
63
65
  logical_types.emplace_back(type);
64
66
  } else {
@@ -98,8 +100,8 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
98
100
 
99
101
  // Setup the NullStr information
100
102
  null_str_count = state_machine.options.null_str.size();
101
- null_str_ptr = make_unsafe_uniq_array<const char *>(null_str_count);
102
- null_str_size = make_unsafe_uniq_array<idx_t>(null_str_count);
103
+ null_str_ptr = make_unsafe_uniq_array_uninitialized<const char *>(null_str_count);
104
+ null_str_size = make_unsafe_uniq_array_uninitialized<idx_t>(null_str_count);
103
105
  for (idx_t i = 0; i < null_str_count; i++) {
104
106
  null_str_ptr[i] = state_machine.options.null_str[i].c_str();
105
107
  null_str_size[i] = state_machine.options.null_str[i].size();
@@ -107,6 +109,14 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
107
109
  date_format = state_machine.options.dialect_options.date_format.at(LogicalTypeId::DATE).GetValue();
108
110
  timestamp_format = state_machine.options.dialect_options.date_format.at(LogicalTypeId::TIMESTAMP).GetValue();
109
111
  decimal_separator = state_machine.options.decimal_separator[0];
112
+
113
+ if (iterator.first_one) {
114
+ lines_read +=
115
+ state_machine.dialect_options.skip_rows.GetValue() + state_machine.dialect_options.header.GetValue();
116
+ if (lines_read == 0) {
117
+ SkipBOM();
118
+ }
119
+ }
110
120
  }
111
121
 
112
122
  StringValueResult::~StringValueResult() {
@@ -141,7 +151,7 @@ bool StringValueResult::HandleTooManyColumnsError(const char *value_ptr, const i
141
151
  }
142
152
  if (error) {
143
153
  // We error pointing to the current value error.
144
- current_errors.Insert(CSVErrorType::TOO_MANY_COLUMNS, cur_col_id, chunk_col_id, last_position);
154
+ current_errors.Insert(TOO_MANY_COLUMNS, cur_col_id, chunk_col_id, last_position);
145
155
  cur_col_id++;
146
156
  }
147
157
  // We had an error
@@ -149,6 +159,53 @@ bool StringValueResult::HandleTooManyColumnsError(const char *value_ptr, const i
149
159
  }
150
160
  return false;
151
161
  }
162
+
163
+ void StringValueResult::SetComment(StringValueResult &result, idx_t buffer_pos) {
164
+ if (!result.comment) {
165
+ result.position_before_comment = buffer_pos;
166
+ result.comment = true;
167
+ }
168
+ }
169
+
170
+ bool StringValueResult::UnsetComment(StringValueResult &result, idx_t buffer_pos) {
171
+ bool done = false;
172
+ if (result.last_position.buffer_pos < result.position_before_comment) {
173
+ bool all_empty = true;
174
+ for (idx_t i = result.last_position.buffer_pos; i < result.position_before_comment; i++) {
175
+ if (result.buffer_ptr[i] != ' ') {
176
+ all_empty = false;
177
+ break;
178
+ }
179
+ }
180
+ if (!all_empty) {
181
+ done = AddRow(result, result.position_before_comment);
182
+ }
183
+ } else {
184
+ if (result.cur_col_id != 0) {
185
+ done = AddRow(result, result.position_before_comment);
186
+ }
187
+ }
188
+ if (result.number_of_rows == 0) {
189
+ result.first_line_is_comment = true;
190
+ }
191
+ result.comment = false;
192
+ if (result.state_machine.dialect_options.state_machine_options.new_line.GetValue() != NewLineIdentifier::CARRY_ON) {
193
+ result.last_position.buffer_pos = buffer_pos + 1;
194
+ } else {
195
+ result.last_position.buffer_pos = buffer_pos + 2;
196
+ }
197
+ result.cur_col_id = 0;
198
+ result.chunk_col_id = 0;
199
+ return done;
200
+ }
201
+
202
+ static void SanitizeError(string &value) {
203
+ std::vector<char> char_array(value.begin(), value.end());
204
+ char_array.push_back('\0'); // Null-terminate the character array
205
+ Utf8Proc::MakeValid(&char_array[0], char_array.size());
206
+ value = {char_array.begin(), char_array.end() - 1};
207
+ }
208
+
152
209
  void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size, bool allocate) {
153
210
  if (HandleTooManyColumnsError(value_ptr, size)) {
154
211
  return;
@@ -165,7 +222,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
165
222
  }
166
223
  if (error) {
167
224
  // We error pointing to the current value error.
168
- current_errors.Insert(CSVErrorType::TOO_MANY_COLUMNS, cur_col_id, chunk_col_id, last_position);
225
+ current_errors.Insert(TOO_MANY_COLUMNS, cur_col_id, chunk_col_id, last_position);
169
226
  cur_col_id++;
170
227
  }
171
228
  return;
@@ -188,7 +245,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
188
245
  if (empty) {
189
246
  if (parse_types[chunk_col_id].type_id != LogicalTypeId::VARCHAR) {
190
247
  // If it is not a varchar, empty values are not accepted, we must error.
191
- current_errors.Insert(CSVErrorType::CAST_ERROR, cur_col_id, chunk_col_id, last_position);
248
+ current_errors.Insert(CAST_ERROR, cur_col_id, chunk_col_id, last_position);
192
249
  }
193
250
  static_cast<string_t *>(vector_ptr[chunk_col_id])[number_of_rows] = string_t();
194
251
  } else {
@@ -266,7 +323,8 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
266
323
  static_cast<dtime_t *>(vector_ptr[chunk_col_id])[number_of_rows], false);
267
324
  break;
268
325
  }
269
- case LogicalTypeId::TIMESTAMP: {
326
+ case LogicalTypeId::TIMESTAMP:
327
+ case LogicalTypeId::TIMESTAMP_TZ: {
270
328
  if (!timestamp_format.Empty()) {
271
329
  success = timestamp_format.TryParseTimestamp(
272
330
  value_ptr, size, static_cast<timestamp_t *>(vector_ptr[chunk_col_id])[number_of_rows]);
@@ -347,7 +405,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
347
405
  HandleUnicodeError(cur_col_id, last_position);
348
406
  }
349
407
  // If we got here, we are ingoring errors, hence we must ignore this line.
350
- current_errors.Insert(CSVErrorType::INVALID_UNICODE, cur_col_id, chunk_col_id, last_position);
408
+ current_errors.Insert(INVALID_UNICODE, cur_col_id, chunk_col_id, last_position);
351
409
  break;
352
410
  }
353
411
  if (allocate) {
@@ -362,14 +420,17 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
362
420
  }
363
421
  }
364
422
  if (!success) {
365
- current_errors.Insert(CSVErrorType::CAST_ERROR, cur_col_id, chunk_col_id, last_position);
423
+ current_errors.Insert(CAST_ERROR, cur_col_id, chunk_col_id, last_position);
366
424
  if (!state_machine.options.IgnoreErrors()) {
367
425
  // We have to write the cast error message.
368
426
  std::ostringstream error;
369
427
  // Casting Error Message
370
428
  error << "Could not convert string \"" << std::string(value_ptr, size) << "\" to \'"
371
429
  << LogicalTypeIdToString(parse_types[chunk_col_id].type_id) << "\'";
372
- current_errors.ModifyErrorMessageOfLastError(error.str());
430
+ auto error_string = error.str();
431
+ SanitizeError(error_string);
432
+
433
+ current_errors.ModifyErrorMessageOfLastError(error_string);
373
434
  }
374
435
  }
375
436
  cur_col_id++;
@@ -417,11 +478,32 @@ void StringValueResult::AddQuotedValue(StringValueResult &result, const idx_t bu
417
478
  if (!result.HandleTooManyColumnsError(result.buffer_ptr + result.quoted_position + 1,
418
479
  buffer_pos - result.quoted_position - 2)) {
419
480
  // If it's an escaped value we have to remove all the escapes, this is not really great
420
- auto value = StringValueScanner::RemoveEscape(
421
- result.buffer_ptr + result.quoted_position + 1, buffer_pos - result.quoted_position - 2,
422
- result.state_machine.dialect_options.state_machine_options.escape.GetValue(),
423
- result.parse_chunk.data[result.chunk_col_id]);
424
- result.AddValueToVector(value.GetData(), value.GetSize());
481
+ // If we are going to escape, this vector must be a varchar vector
482
+ if (result.parse_chunk.data[result.chunk_col_id].GetType() != LogicalType::VARCHAR) {
483
+ result.current_errors.Insert(CAST_ERROR, result.cur_col_id, result.chunk_col_id, result.last_position);
484
+ if (!result.state_machine.options.IgnoreErrors()) {
485
+ // We have to write the cast error message.
486
+ std::ostringstream error;
487
+ // Casting Error Message
488
+
489
+ error << "Could not convert string \""
490
+ << std::string(result.buffer_ptr + result.quoted_position + 1,
491
+ buffer_pos - result.quoted_position - 2)
492
+ << "\" to \'" << LogicalTypeIdToString(result.parse_types[result.chunk_col_id].type_id)
493
+ << "\'";
494
+ auto error_string = error.str();
495
+ SanitizeError(error_string);
496
+ result.current_errors.ModifyErrorMessageOfLastError(error_string);
497
+ }
498
+ result.cur_col_id++;
499
+ result.chunk_col_id++;
500
+ } else {
501
+ auto value = StringValueScanner::RemoveEscape(
502
+ result.buffer_ptr + result.quoted_position + 1, buffer_pos - result.quoted_position - 2,
503
+ result.state_machine.dialect_options.state_machine_options.escape.GetValue(),
504
+ result.parse_chunk.data[result.chunk_col_id]);
505
+ result.AddValueToVector(value.GetData(), value.GetSize());
506
+ }
425
507
  }
426
508
  } else {
427
509
  if (buffer_pos < result.last_position.buffer_pos + 2) {
@@ -453,100 +535,108 @@ void StringValueResult::AddValue(StringValueResult &result, const idx_t buffer_p
453
535
  void StringValueResult::HandleUnicodeError(idx_t col_idx, LinePosition &error_position) {
454
536
 
455
537
  bool first_nl;
456
- auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
538
+ auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles, PrintErrorLine());
457
539
  LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
458
540
  if (current_line_position.begin == error_position) {
459
541
  auto csv_error = CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
460
542
  current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
461
- error_position.GetGlobalPosition(requested_size, first_nl));
543
+ error_position.GetGlobalPosition(requested_size, first_nl), path);
462
544
  error_handler.Error(csv_error, true);
463
545
  } else {
464
546
  auto csv_error = CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
465
547
  current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
466
- error_position.GetGlobalPosition(requested_size));
548
+ error_position.GetGlobalPosition(requested_size), path);
467
549
  error_handler.Error(csv_error, true);
468
550
  }
469
551
  }
470
552
 
471
553
  bool LineError::HandleErrors(StringValueResult &result) {
472
- if (ignore_errors && is_error_in_line && !result.figure_out_new_line) {
473
- result.cur_col_id = 0;
474
- result.chunk_col_id = 0;
475
- result.number_of_rows--;
554
+ bool skip_sniffing = false;
555
+ for (auto &cur_error : current_errors) {
556
+ if (cur_error.type == CSVErrorType::INVALID_UNICODE) {
557
+ skip_sniffing = true;
558
+ }
559
+ }
560
+ skip_sniffing = result.sniffing && skip_sniffing;
561
+
562
+ if ((ignore_errors || skip_sniffing) && is_error_in_line && !result.figure_out_new_line) {
563
+ result.RemoveLastLine();
476
564
  Reset();
477
565
  return true;
478
566
  }
479
567
  // Reconstruct CSV Line
480
568
  for (auto &cur_error : current_errors) {
481
569
  LinesPerBoundary lines_per_batch(result.iterator.GetBoundaryIdx(), result.lines_read);
482
- bool first_nl;
483
- auto borked_line = result.current_line_position.ReconstructCurrentLine(first_nl, result.buffer_handles);
570
+ bool first_nl = false;
571
+ auto borked_line = result.current_line_position.ReconstructCurrentLine(first_nl, result.buffer_handles,
572
+ result.PrintErrorLine());
484
573
  CSVError csv_error;
485
574
  auto col_idx = cur_error.col_idx;
486
575
  auto &line_pos = cur_error.error_position;
487
576
 
488
577
  switch (cur_error.type) {
489
- case CSVErrorType::TOO_MANY_COLUMNS:
490
- case CSVErrorType::TOO_FEW_COLUMNS:
578
+ case TOO_MANY_COLUMNS:
579
+ case TOO_FEW_COLUMNS:
491
580
  if (result.current_line_position.begin == line_pos) {
492
581
  csv_error = CSVError::IncorrectColumnAmountError(
493
582
  result.state_machine.options, col_idx, lines_per_batch, borked_line,
494
583
  result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
495
- line_pos.GetGlobalPosition(result.requested_size, first_nl));
584
+ line_pos.GetGlobalPosition(result.requested_size, first_nl), result.path);
496
585
  } else {
497
586
  csv_error = CSVError::IncorrectColumnAmountError(
498
587
  result.state_machine.options, col_idx, lines_per_batch, borked_line,
499
588
  result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
500
- line_pos.GetGlobalPosition(result.requested_size));
589
+ line_pos.GetGlobalPosition(result.requested_size), result.path);
501
590
  }
502
591
  break;
503
- case CSVErrorType::INVALID_UNICODE: {
592
+ case INVALID_UNICODE: {
504
593
  if (result.current_line_position.begin == line_pos) {
505
594
  csv_error = CSVError::InvalidUTF8(
506
595
  result.state_machine.options, col_idx, lines_per_batch, borked_line,
507
596
  result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
508
- line_pos.GetGlobalPosition(result.requested_size, first_nl));
597
+ line_pos.GetGlobalPosition(result.requested_size, first_nl), result.path);
509
598
  } else {
510
599
  csv_error = CSVError::InvalidUTF8(
511
600
  result.state_machine.options, col_idx, lines_per_batch, borked_line,
512
601
  result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
513
- line_pos.GetGlobalPosition(result.requested_size));
602
+ line_pos.GetGlobalPosition(result.requested_size), result.path);
514
603
  }
515
604
  break;
516
605
  }
517
- case CSVErrorType::UNTERMINATED_QUOTES:
606
+ case UNTERMINATED_QUOTES:
518
607
  if (result.current_line_position.begin == line_pos) {
519
608
  csv_error = CSVError::UnterminatedQuotesError(
520
609
  result.state_machine.options, col_idx, lines_per_batch, borked_line,
521
610
  result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
522
- line_pos.GetGlobalPosition(result.requested_size, first_nl));
611
+ line_pos.GetGlobalPosition(result.requested_size, first_nl), result.path);
523
612
  } else {
524
613
  csv_error = CSVError::UnterminatedQuotesError(
525
614
  result.state_machine.options, col_idx, lines_per_batch, borked_line,
526
615
  result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
527
- line_pos.GetGlobalPosition(result.requested_size));
616
+ line_pos.GetGlobalPosition(result.requested_size), result.path);
528
617
  }
529
618
  break;
530
- case CSVErrorType::CAST_ERROR:
619
+ case CAST_ERROR:
531
620
  if (result.current_line_position.begin == line_pos) {
532
621
  csv_error = CSVError::CastError(
533
622
  result.state_machine.options, result.names[cur_error.col_idx], cur_error.error_message,
534
623
  cur_error.col_idx, borked_line, lines_per_batch,
535
624
  result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
536
625
  line_pos.GetGlobalPosition(result.requested_size, first_nl),
537
- result.parse_types[cur_error.chunk_idx].type_id);
626
+ result.parse_types[cur_error.chunk_idx].type_id, result.path);
538
627
  } else {
539
628
  csv_error = CSVError::CastError(
540
629
  result.state_machine.options, result.names[cur_error.col_idx], cur_error.error_message,
541
630
  cur_error.col_idx, borked_line, lines_per_batch,
542
631
  result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
543
- line_pos.GetGlobalPosition(result.requested_size), result.parse_types[cur_error.chunk_idx].type_id);
632
+ line_pos.GetGlobalPosition(result.requested_size), result.parse_types[cur_error.chunk_idx].type_id,
633
+ result.path);
544
634
  }
545
635
  break;
546
- case CSVErrorType::MAXIMUM_LINE_SIZE:
636
+ case MAXIMUM_LINE_SIZE:
547
637
  csv_error = CSVError::LineSizeError(
548
638
  result.state_machine.options, cur_error.current_line_size, lines_per_batch, borked_line,
549
- result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl));
639
+ result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl), result.path);
550
640
  break;
551
641
  default:
552
642
  throw InvalidInputException("CSV Error not allowed when inserting row");
@@ -567,19 +657,23 @@ void StringValueResult::QuotedNewLine(StringValueResult &result) {
567
657
  result.quoted_new_line = true;
568
658
  }
569
659
 
570
- void StringValueResult::NullPaddingQuotedNewlineCheck() {
660
+ void StringValueResult::NullPaddingQuotedNewlineCheck() const {
571
661
  // We do some checks for null_padding correctness
572
662
  if (state_machine.options.null_padding && iterator.IsBoundarySet() && quoted_new_line) {
573
663
  // If we have null_padding set, we found a quoted new line, we are scanning the file in parallel; We error.
574
664
  LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
575
- auto csv_error = CSVError::NullPaddingFail(state_machine.options, lines_per_batch);
665
+ auto csv_error = CSVError::NullPaddingFail(state_machine.options, lines_per_batch, path);
576
666
  error_handler.Error(csv_error);
577
667
  }
578
668
  }
579
669
 
580
670
  //! Reconstructs the current line to be used in error messages
581
671
  string FullLinePosition::ReconstructCurrentLine(bool &first_char_nl,
582
- unordered_map<idx_t, shared_ptr<CSVBufferHandle>> &buffer_handles) {
672
+ unordered_map<idx_t, shared_ptr<CSVBufferHandle>> &buffer_handles,
673
+ bool reconstruct_line) const {
674
+ if (!reconstruct_line) {
675
+ return {};
676
+ }
583
677
  string result;
584
678
  if (end.buffer_idx == begin.buffer_idx) {
585
679
  if (buffer_handles.find(end.buffer_idx) == buffer_handles.end()) {
@@ -609,10 +703,7 @@ string FullLinePosition::ReconstructCurrentLine(bool &first_char_nl,
609
703
  }
610
704
  }
611
705
  // sanitize borked line
612
- std::vector<char> char_array(result.begin(), result.end());
613
- char_array.push_back('\0'); // Null-terminate the character array
614
- Utf8Proc::MakeValid(&char_array[0], char_array.size());
615
- result = {char_array.begin(), char_array.end() - 1};
706
+ SanitizeError(result);
616
707
  return result;
617
708
  }
618
709
 
@@ -625,11 +716,11 @@ bool StringValueResult::AddRowInternal() {
625
716
  current_line_position.begin = current_line_position.end;
626
717
  current_line_position.end = current_line_start;
627
718
  if (current_line_size > state_machine.options.maximum_line_size) {
628
- current_errors.Insert(CSVErrorType::MAXIMUM_LINE_SIZE, 1, chunk_col_id, last_position, current_line_size);
719
+ current_errors.Insert(MAXIMUM_LINE_SIZE, 1, chunk_col_id, last_position, current_line_size);
629
720
  }
630
721
  if (!state_machine.options.null_padding) {
631
722
  for (idx_t col_idx = cur_col_id; col_idx < number_of_columns; col_idx++) {
632
- current_errors.Insert(CSVErrorType::TOO_FEW_COLUMNS, col_idx - 1, chunk_col_id, last_position);
723
+ current_errors.Insert(TOO_FEW_COLUMNS, col_idx - 1, chunk_col_id, last_position);
633
724
  }
634
725
  }
635
726
 
@@ -672,24 +763,25 @@ bool StringValueResult::AddRowInternal() {
672
763
  // If we are not null-padding this is an error
673
764
  if (!state_machine.options.IgnoreErrors()) {
674
765
  bool first_nl;
675
- auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
766
+ auto borked_line =
767
+ current_line_position.ReconstructCurrentLine(first_nl, buffer_handles, PrintErrorLine());
676
768
  LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
677
769
  if (current_line_position.begin == last_position) {
678
770
  auto csv_error = CSVError::IncorrectColumnAmountError(
679
771
  state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
680
772
  current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
681
- last_position.GetGlobalPosition(requested_size, first_nl));
773
+ last_position.GetGlobalPosition(requested_size, first_nl), path);
682
774
  error_handler.Error(csv_error);
683
775
  } else {
684
776
  auto csv_error = CSVError::IncorrectColumnAmountError(
685
777
  state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
686
778
  current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
687
- last_position.GetGlobalPosition(requested_size));
779
+ last_position.GetGlobalPosition(requested_size), path);
688
780
  error_handler.Error(csv_error);
689
781
  }
690
782
  }
691
783
  // If we are here we ignore_errors, so we delete this line
692
- number_of_rows--;
784
+ RemoveLastLine();
693
785
  }
694
786
  }
695
787
  line_positions_per_row[number_of_rows] = current_line_position;
@@ -707,7 +799,7 @@ bool StringValueResult::AddRow(StringValueResult &result, const idx_t buffer_pos
707
799
  if (result.last_position.buffer_pos <= buffer_pos) {
708
800
  // We add the value
709
801
  if (result.quoted) {
710
- StringValueResult::AddQuotedValue(result, buffer_pos);
802
+ AddQuotedValue(result, buffer_pos);
711
803
  } else {
712
804
  result.AddValueToVector(result.buffer_ptr + result.last_position.buffer_pos,
713
805
  buffer_pos - result.last_position.buffer_pos);
@@ -734,8 +826,7 @@ void StringValueResult::InvalidState(StringValueResult &result) {
734
826
  if (force_error) {
735
827
  result.HandleUnicodeError(result.cur_col_id, result.last_position);
736
828
  }
737
- result.current_errors.Insert(CSVErrorType::UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id,
738
- result.last_position);
829
+ result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id, result.last_position);
739
830
  }
740
831
 
741
832
  bool StringValueResult::EmptyLine(StringValueResult &result, const idx_t buffer_pos) {
@@ -772,21 +863,24 @@ StringValueScanner::StringValueScanner(idx_t scanner_idx_p, const shared_ptr<CSV
772
863
  const shared_ptr<CSVStateMachine> &state_machine,
773
864
  const shared_ptr<CSVErrorHandler> &error_handler,
774
865
  const shared_ptr<CSVFileScan> &csv_file_scan, bool sniffing,
775
- CSVIterator boundary, bool figure_out_nl)
866
+ const CSVIterator &boundary, idx_t result_size)
776
867
  : BaseScanner(buffer_manager, state_machine, error_handler, sniffing, csv_file_scan, boundary),
777
868
  scanner_idx(scanner_idx_p),
778
- result(states, *state_machine, cur_buffer_handle, BufferAllocator::Get(buffer_manager->context), figure_out_nl,
869
+ result(states, *state_machine, cur_buffer_handle, BufferAllocator::Get(buffer_manager->context), result_size,
779
870
  iterator.pos.buffer_pos, *error_handler, iterator,
780
- buffer_manager->context.client_data->debug_set_max_line_length, csv_file_scan, lines_read, sniffing) {
871
+ buffer_manager->context.client_data->debug_set_max_line_length, csv_file_scan, lines_read, sniffing,
872
+ buffer_manager->GetFilePath()) {
781
873
  }
782
874
 
783
875
  StringValueScanner::StringValueScanner(const shared_ptr<CSVBufferManager> &buffer_manager,
784
876
  const shared_ptr<CSVStateMachine> &state_machine,
785
- const shared_ptr<CSVErrorHandler> &error_handler)
786
- : BaseScanner(buffer_manager, state_machine, error_handler, false, nullptr, {}), scanner_idx(0),
787
- result(states, *state_machine, cur_buffer_handle, Allocator::DefaultAllocator(), false, iterator.pos.buffer_pos,
788
- *error_handler, iterator, buffer_manager->context.client_data->debug_set_max_line_length, csv_file_scan,
789
- lines_read, sniffing) {
877
+ const shared_ptr<CSVErrorHandler> &error_handler, idx_t result_size,
878
+ const CSVIterator &boundary)
879
+ : BaseScanner(buffer_manager, state_machine, error_handler, false, nullptr, boundary), scanner_idx(0),
880
+ result(states, *state_machine, cur_buffer_handle, Allocator::DefaultAllocator(), result_size,
881
+ iterator.pos.buffer_pos, *error_handler, iterator,
882
+ buffer_manager->context.client_data->debug_set_max_line_length, csv_file_scan, lines_read, sniffing,
883
+ buffer_manager->GetFilePath()) {
790
884
  }
791
885
 
792
886
  unique_ptr<StringValueScanner> StringValueScanner::GetCSVScanner(ClientContext &context, CSVReaderOptions &options) {
@@ -796,13 +890,18 @@ unique_ptr<StringValueScanner> StringValueScanner::GetCSVScanner(ClientContext &
796
890
  state_machine->dialect_options.num_cols = options.dialect_options.num_cols;
797
891
  state_machine->dialect_options.header = options.dialect_options.header;
798
892
  auto buffer_manager = make_shared_ptr<CSVBufferManager>(context, options, options.file_path, 0);
799
- auto scanner = make_uniq<StringValueScanner>(buffer_manager, state_machine, make_shared_ptr<CSVErrorHandler>());
893
+ idx_t rows_to_skip = state_machine->options.GetSkipRows() + state_machine->options.GetHeader();
894
+ rows_to_skip = std::max(rows_to_skip, state_machine->dialect_options.rows_until_header +
895
+ state_machine->dialect_options.header.GetValue());
896
+ auto it = BaseScanner::SkipCSVRows(buffer_manager, state_machine, rows_to_skip);
897
+ auto scanner = make_uniq<StringValueScanner>(buffer_manager, state_machine, make_shared_ptr<CSVErrorHandler>(),
898
+ STANDARD_VECTOR_SIZE, it);
800
899
  scanner->csv_file_scan = make_shared_ptr<CSVFileScan>(context, options.file_path, options);
801
900
  scanner->csv_file_scan->InitializeProjection();
802
901
  return scanner;
803
902
  }
804
903
 
805
- bool StringValueScanner::FinishedIterator() {
904
+ bool StringValueScanner::FinishedIterator() const {
806
905
  return iterator.done;
807
906
  }
808
907
 
@@ -842,7 +941,8 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
842
941
  auto &result_vector = insert_chunk.data[result_idx];
843
942
  auto &type = result_vector.GetType();
844
943
  auto &parse_type = parse_vector.GetType();
845
- if (type == LogicalType::VARCHAR || (type != LogicalType::VARCHAR && parse_type != LogicalType::VARCHAR)) {
944
+ if (!type.IsJSONType() &&
945
+ (type == LogicalType::VARCHAR || (type != LogicalType::VARCHAR && parse_type != LogicalType::VARCHAR))) {
846
946
  // reinterpret rather than reference
847
947
  result_vector.Reinterpret(parse_vector);
848
948
  } else {
@@ -865,9 +965,9 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
865
965
  }
866
966
  }
867
967
  {
868
- vector<Value> row;
869
968
 
870
969
  if (state_machine->options.ignore_errors.GetValue()) {
970
+ vector<Value> row;
871
971
  for (idx_t col = 0; col < parse_chunk.ColumnCount(); col++) {
872
972
  row.push_back(parse_chunk.GetValue(col, line_error));
873
973
  }
@@ -877,16 +977,17 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
877
977
  lines_read - parse_chunk.size() + line_error);
878
978
  bool first_nl;
879
979
  auto borked_line = result.line_positions_per_row[line_error].ReconstructCurrentLine(
880
- first_nl, result.buffer_handles);
980
+ first_nl, result.buffer_handles, result.PrintErrorLine());
881
981
  std::ostringstream error;
882
982
  error << "Could not convert string \"" << parse_vector.GetValue(line_error) << "\" to \'"
883
- << LogicalTypeIdToString(type.id()) << "\'";
983
+ << type.ToString() << "\'";
884
984
  string error_msg = error.str();
985
+ SanitizeError(error_msg);
885
986
  auto csv_error = CSVError::CastError(
886
987
  state_machine->options, csv_file_scan->names[col_idx], error_msg, col_idx, borked_line,
887
988
  lines_per_batch,
888
989
  result.line_positions_per_row[line_error].begin.GetGlobalPosition(result.result_size, first_nl),
889
- optional_idx::Invalid(), result_vector.GetType().id());
990
+ optional_idx::Invalid(), result_vector.GetType().id(), result.path);
890
991
  error_handler->Error(csv_error);
891
992
  }
892
993
  }
@@ -906,18 +1007,19 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
906
1007
  lines_read - parse_chunk.size() + line_error);
907
1008
  bool first_nl;
908
1009
  auto borked_line = result.line_positions_per_row[line_error].ReconstructCurrentLine(
909
- first_nl, result.buffer_handles);
1010
+ first_nl, result.buffer_handles, result.PrintErrorLine());
910
1011
  std::ostringstream error;
911
1012
  // Casting Error Message
912
1013
  error << "Could not convert string \"" << parse_vector.GetValue(line_error) << "\" to \'"
913
1014
  << LogicalTypeIdToString(type.id()) << "\'";
914
1015
  string error_msg = error.str();
1016
+ SanitizeError(error_msg);
915
1017
  auto csv_error =
916
1018
  CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx], error_msg,
917
1019
  col_idx, borked_line, lines_per_batch,
918
1020
  result.line_positions_per_row[line_error].begin.GetGlobalPosition(
919
1021
  result.result_size, first_nl),
920
- optional_idx::Invalid(), result_vector.GetType().id());
1022
+ optional_idx::Invalid(), result_vector.GetType().id(), result.path);
921
1023
  error_handler->Error(csv_error);
922
1024
  }
923
1025
  }
@@ -940,7 +1042,6 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
940
1042
 
941
1043
  void StringValueScanner::Initialize() {
942
1044
  states.Initialize();
943
-
944
1045
  if (result.result_size != 1 && !(sniffing && state_machine->options.null_padding &&
945
1046
  !state_machine->options.dialect_options.skip_rows.IsSetByUser())) {
946
1047
  SetStart();
@@ -968,7 +1069,11 @@ void StringValueScanner::ProcessExtraRow() {
968
1069
  lines_read++;
969
1070
  return;
970
1071
  } else if (states.states[0] != CSVState::CARRIAGE_RETURN) {
971
- result.AddRow(result, iterator.pos.buffer_pos);
1072
+ if (result.IsCommentSet(result)) {
1073
+ result.UnsetComment(result, iterator.pos.buffer_pos);
1074
+ } else {
1075
+ result.AddRow(result, iterator.pos.buffer_pos);
1076
+ }
972
1077
  iterator.pos.buffer_pos++;
973
1078
  lines_read++;
974
1079
  return;
@@ -978,7 +1083,11 @@ void StringValueScanner::ProcessExtraRow() {
978
1083
  break;
979
1084
  case CSVState::CARRIAGE_RETURN:
980
1085
  if (states.states[0] != CSVState::RECORD_SEPARATOR) {
981
- result.AddRow(result, iterator.pos.buffer_pos);
1086
+ if (result.IsCommentSet(result)) {
1087
+ result.UnsetComment(result, iterator.pos.buffer_pos);
1088
+ } else {
1089
+ result.AddRow(result, iterator.pos.buffer_pos);
1090
+ }
982
1091
  iterator.pos.buffer_pos++;
983
1092
  lines_read++;
984
1093
  return;
@@ -988,6 +1097,7 @@ void StringValueScanner::ProcessExtraRow() {
988
1097
  lines_read++;
989
1098
  return;
990
1099
  }
1100
+ break;
991
1101
  case CSVState::DELIMITER:
992
1102
  result.AddValue(result, iterator.pos.buffer_pos);
993
1103
  iterator.pos.buffer_pos++;
@@ -1016,6 +1126,15 @@ void StringValueScanner::ProcessExtraRow() {
1016
1126
  iterator.pos.buffer_pos++;
1017
1127
  }
1018
1128
  break;
1129
+ case CSVState::COMMENT:
1130
+ result.SetComment(result, iterator.pos.buffer_pos);
1131
+ iterator.pos.buffer_pos++;
1132
+ while (state_machine->transition_array
1133
+ .skip_comment[static_cast<uint8_t>(buffer_handle_ptr[iterator.pos.buffer_pos])] &&
1134
+ iterator.pos.buffer_pos < to_pos - 1) {
1135
+ iterator.pos.buffer_pos++;
1136
+ }
1137
+ break;
1019
1138
  case CSVState::QUOTED_NEW_LINE:
1020
1139
  result.quoted_new_line = true;
1021
1140
  result.NullPaddingQuotedNewlineCheck();
@@ -1077,7 +1196,9 @@ void StringValueScanner::ProcessOverbufferValue() {
1077
1196
  if (states.NewRow() || states.NewValue()) {
1078
1197
  break;
1079
1198
  } else {
1080
- overbuffer_string += previous_buffer[i];
1199
+ if (!result.comment) {
1200
+ overbuffer_string += previous_buffer[i];
1201
+ }
1081
1202
  }
1082
1203
  if (states.IsQuoted()) {
1083
1204
  result.SetQuoted(result, j);
@@ -1085,6 +1206,9 @@ void StringValueScanner::ProcessOverbufferValue() {
1085
1206
  if (states.IsEscaped()) {
1086
1207
  result.escaped = true;
1087
1208
  }
1209
+ if (states.IsComment()) {
1210
+ result.comment = true;
1211
+ }
1088
1212
  if (states.IsInvalid()) {
1089
1213
  result.InvalidState(result);
1090
1214
  }
@@ -1109,11 +1233,16 @@ void StringValueScanner::ProcessOverbufferValue() {
1109
1233
  if (states.NewRow() || states.NewValue()) {
1110
1234
  break;
1111
1235
  } else {
1112
- overbuffer_string += buffer_handle_ptr[iterator.pos.buffer_pos];
1236
+ if (!result.comment && !states.IsComment()) {
1237
+ overbuffer_string += buffer_handle_ptr[iterator.pos.buffer_pos];
1238
+ }
1113
1239
  }
1114
1240
  if (states.IsQuoted()) {
1115
1241
  result.SetQuoted(result, j);
1116
1242
  }
1243
+ if (states.IsComment()) {
1244
+ result.comment = true;
1245
+ }
1117
1246
  if (states.IsEscaped()) {
1118
1247
  result.escaped = true;
1119
1248
  }
@@ -1124,7 +1253,7 @@ void StringValueScanner::ProcessOverbufferValue() {
1124
1253
  }
1125
1254
  bool skip_value = false;
1126
1255
  if (result.projecting_columns) {
1127
- if (!result.projected_columns[result.cur_col_id]) {
1256
+ if (!result.projected_columns[result.cur_col_id] && result.cur_col_id != result.number_of_columns) {
1128
1257
  result.cur_col_id++;
1129
1258
  skip_value = true;
1130
1259
  }
@@ -1135,18 +1264,17 @@ void StringValueScanner::ProcessOverbufferValue() {
1135
1264
  value = string_t(overbuffer_string.c_str() + result.quoted_position,
1136
1265
  UnsafeNumericCast<uint32_t>(overbuffer_string.size() - 1 - result.quoted_position));
1137
1266
  if (result.escaped) {
1138
- const auto str_ptr = static_cast<const char *>(overbuffer_string.c_str() + result.quoted_position);
1139
- value = StringValueScanner::RemoveEscape(
1140
- str_ptr, overbuffer_string.size() - 2,
1141
- state_machine->dialect_options.state_machine_options.escape.GetValue(),
1142
- result.parse_chunk.data[result.chunk_col_id]);
1267
+ const auto str_ptr = overbuffer_string.c_str() + result.quoted_position;
1268
+ value = RemoveEscape(str_ptr, overbuffer_string.size() - 2,
1269
+ state_machine->dialect_options.state_machine_options.escape.GetValue(),
1270
+ result.parse_chunk.data[result.chunk_col_id]);
1143
1271
  }
1144
1272
  } else {
1145
1273
  value = string_t(overbuffer_string.c_str(), UnsafeNumericCast<uint32_t>(overbuffer_string.size()));
1146
1274
  }
1147
1275
  if (states.EmptyLine() && state_machine->dialect_options.num_cols == 1) {
1148
1276
  result.EmptyLine(result, iterator.pos.buffer_pos);
1149
- } else if (!states.IsNotSet()) {
1277
+ } else if (!states.IsNotSet() && (!result.comment || !value.Empty())) {
1150
1278
  result.AddValueToVector(value.GetData(), value.GetSize(), true);
1151
1279
  }
1152
1280
  } else {
@@ -1156,7 +1284,11 @@ void StringValueScanner::ProcessOverbufferValue() {
1156
1284
  }
1157
1285
 
1158
1286
  if (states.NewRow() && !states.IsNotSet()) {
1159
- result.AddRowInternal();
1287
+ if (result.IsCommentSet(result)) {
1288
+ result.UnsetComment(result, iterator.pos.buffer_pos);
1289
+ } else {
1290
+ result.AddRowInternal();
1291
+ }
1160
1292
  lines_read++;
1161
1293
  }
1162
1294
 
@@ -1196,7 +1328,11 @@ bool StringValueScanner::MoveToNextBuffer() {
1196
1328
  // we add the value
1197
1329
  result.AddValue(result, previous_buffer_handle->actual_size);
1198
1330
  // And an extra empty value to represent what comes after the delimiter
1199
- result.AddRow(result, previous_buffer_handle->actual_size);
1331
+ if (result.IsCommentSet(result)) {
1332
+ result.UnsetComment(result, iterator.pos.buffer_pos);
1333
+ } else {
1334
+ result.AddRow(result, previous_buffer_handle->actual_size);
1335
+ }
1200
1336
  lines_read++;
1201
1337
  } else if (states.IsQuotedCurrent()) {
1202
1338
  // Unterminated quote
@@ -1206,7 +1342,11 @@ bool StringValueScanner::MoveToNextBuffer() {
1206
1342
  result.current_line_position.end = current_line_start;
1207
1343
  result.InvalidState(result);
1208
1344
  } else {
1209
- result.AddRow(result, previous_buffer_handle->actual_size);
1345
+ if (result.IsCommentSet(result)) {
1346
+ result.UnsetComment(result, iterator.pos.buffer_pos);
1347
+ } else {
1348
+ result.AddRow(result, previous_buffer_handle->actual_size);
1349
+ }
1210
1350
  lines_read++;
1211
1351
  }
1212
1352
  return false;
@@ -1224,13 +1364,31 @@ bool StringValueScanner::MoveToNextBuffer() {
1224
1364
  return false;
1225
1365
  }
1226
1366
 
1227
- void StringValueScanner::SkipBOM() {
1228
- if (cur_buffer_handle->actual_size >= 3 && result.buffer_ptr[0] == '\xEF' && result.buffer_ptr[1] == '\xBB' &&
1229
- result.buffer_ptr[2] == '\xBF') {
1367
+ void StringValueResult::SkipBOM() const {
1368
+ if (buffer_size >= 3 && buffer_ptr[0] == '\xEF' && buffer_ptr[1] == '\xBB' && buffer_ptr[2] == '\xBF' &&
1369
+ iterator.pos.buffer_pos == 0) {
1230
1370
  iterator.pos.buffer_pos = 3;
1231
1371
  }
1232
1372
  }
1233
1373
 
1374
+ void StringValueResult::RemoveLastLine() {
1375
+ // potentially de-nullify values
1376
+ for (idx_t i = 0; i < chunk_col_id; i++) {
1377
+ validity_mask[i]->SetValid(number_of_rows);
1378
+ }
1379
+ // reset column trackers
1380
+ cur_col_id = 0;
1381
+ chunk_col_id = 0;
1382
+ // decrement row counter
1383
+ number_of_rows--;
1384
+ }
1385
+ bool StringValueResult::PrintErrorLine() const {
1386
+ // To print a lint, result size must be different, than one (i.e., this is a SetStart() trying to figure out new
1387
+ // lines) And must either not be ignoring errors OR must be storing them in a rejects table.
1388
+ return result_size != 1 &&
1389
+ (state_machine.options.store_rejects.GetValue() || !state_machine.options.ignore_errors.GetValue());
1390
+ }
1391
+
1234
1392
  void StringValueScanner::SkipUntilNewLine() {
1235
1393
  // Now skip until next newline
1236
1394
  if (state_machine->options.dialect_options.state_machine_options.new_line.GetValue() ==
@@ -1261,7 +1419,7 @@ void StringValueScanner::SkipUntilNewLine() {
1261
1419
  }
1262
1420
  }
1263
1421
 
1264
- bool StringValueScanner::CanDirectlyCast(const LogicalType &type) {
1422
+ bool StringValueScanner::CanDirectlyCast(const LogicalType &type, bool icu_loaded) {
1265
1423
 
1266
1424
  switch (type.id()) {
1267
1425
  case LogicalTypeId::TINYINT:
@@ -1280,29 +1438,33 @@ bool StringValueScanner::CanDirectlyCast(const LogicalType &type) {
1280
1438
  case LogicalTypeId::DECIMAL:
1281
1439
  case LogicalType::VARCHAR:
1282
1440
  return true;
1441
+ case LogicalType::TIMESTAMP_TZ:
1442
+ // We only try to do direct cast of timestamp tz if the ICU extension is not loaded, otherwise, it needs to go
1443
+ // through string -> timestamp_tz casting
1444
+ return !icu_loaded;
1283
1445
  default:
1284
1446
  return false;
1285
1447
  }
1286
1448
  }
1287
1449
 
1288
1450
  void StringValueScanner::SetStart() {
1289
- if (iterator.pos.buffer_idx == 0 && iterator.pos.buffer_pos == 0) {
1290
- // This means this is the very first buffer
1291
- // This CSV is not from auto-detect, so we don't know where exactly it starts
1292
- // Hence we potentially have to skip empty lines and headers.
1293
- SkipBOM();
1294
- SkipCSVRows(state_machine->dialect_options.skip_rows.GetValue() +
1295
- state_machine->dialect_options.header.GetValue());
1451
+ if (iterator.first_one) {
1296
1452
  if (result.store_line_size) {
1297
1453
  result.error_handler.NewMaxLineSize(iterator.pos.buffer_pos);
1298
1454
  }
1299
1455
  return;
1300
1456
  }
1457
+ if (state_machine->options.IgnoreErrors()) {
1458
+ // If we are ignoring errors we don't really need to figure out a line.
1459
+ return;
1460
+ }
1461
+ // The result size of the data after skipping the row is one line
1301
1462
  // We have to look for a new line that fits our schema
1302
1463
  // 1. We walk until the next new line
1303
1464
  bool line_found;
1304
1465
  unique_ptr<StringValueScanner> scan_finder;
1305
1466
  do {
1467
+ constexpr idx_t result_size = 1;
1306
1468
  SkipUntilNewLine();
1307
1469
  if (state_machine->options.null_padding) {
1308
1470
  // When Null Padding, we assume we start from the correct new-line
@@ -1310,11 +1472,12 @@ void StringValueScanner::SetStart() {
1310
1472
  }
1311
1473
  scan_finder =
1312
1474
  make_uniq<StringValueScanner>(0U, buffer_manager, state_machine, make_shared_ptr<CSVErrorHandler>(true),
1313
- csv_file_scan, false, iterator, true);
1475
+ csv_file_scan, false, iterator, result_size);
1314
1476
  auto &tuples = scan_finder->ParseChunk();
1315
1477
  line_found = true;
1316
1478
  if (tuples.number_of_rows != 1 ||
1317
- (!tuples.borked_rows.empty() && !state_machine->options.ignore_errors.GetValue())) {
1479
+ (!tuples.borked_rows.empty() && !state_machine->options.ignore_errors.GetValue()) ||
1480
+ tuples.first_line_is_comment) {
1318
1481
  line_found = false;
1319
1482
  // If no tuples were parsed, this is not the correct start, we need to skip until the next new line
1320
1483
  // Or if columns don't match, this is not the correct start, we need to skip until the next new line
@@ -1358,7 +1521,7 @@ void StringValueScanner::FinalizeChunkProcess() {
1358
1521
  // If we are not done we have two options.
1359
1522
  // 1) If a boundary is set.
1360
1523
  if (iterator.IsBoundarySet()) {
1361
- if (!result.current_errors.HasErrorType(CSVErrorType::UNTERMINATED_QUOTES)) {
1524
+ if (!result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
1362
1525
  iterator.done = true;
1363
1526
  }
1364
1527
  // We read until the next line or until we have nothing else to read.
@@ -1368,7 +1531,7 @@ void StringValueScanner::FinalizeChunkProcess() {
1368
1531
  }
1369
1532
  bool moved = MoveToNextBuffer();
1370
1533
  if (cur_buffer_handle) {
1371
- if (moved && result.cur_col_id < result.number_of_columns && result.cur_col_id > 0) {
1534
+ if (moved && result.cur_col_id > 0) {
1372
1535
  ProcessExtraRow();
1373
1536
  } else if (!moved) {
1374
1537
  ProcessExtraRow();
@@ -1398,7 +1561,7 @@ void StringValueScanner::FinalizeChunkProcess() {
1398
1561
  }
1399
1562
  }
1400
1563
  iterator.done = FinishedFile();
1401
- if (result.null_padding && result.number_of_rows < STANDARD_VECTOR_SIZE) {
1564
+ if (result.null_padding && result.number_of_rows < STANDARD_VECTOR_SIZE && result.chunk_col_id > 0) {
1402
1565
  while (result.chunk_col_id < result.parse_chunk.ColumnCount()) {
1403
1566
  result.validity_mask[result.chunk_col_id++]->SetInvalid(result.number_of_rows);
1404
1567
  result.cur_col_id++;