duckdb 1.0.1-dev22.0 → 1.0.1-dev27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1389) hide show
  1. package/.github/workflows/NodeJS.yml +1 -1
  2. package/binding.gyp +41 -0
  3. package/package.json +1 -1
  4. package/src/duckdb/extension/icu/icu-dateadd.cpp +4 -2
  5. package/src/duckdb/extension/icu/icu-datefunc.cpp +6 -2
  6. package/src/duckdb/extension/icu/icu-datesub.cpp +13 -2
  7. package/src/duckdb/extension/icu/icu-strptime.cpp +6 -6
  8. package/src/duckdb/extension/icu/icu-table-range.cpp +92 -73
  9. package/src/duckdb/extension/icu/icu-timebucket.cpp +12 -2
  10. package/src/duckdb/extension/icu/icu-timezone.cpp +3 -3
  11. package/src/duckdb/extension/icu/icu_extension.cpp +61 -9
  12. package/src/duckdb/extension/json/include/json_executors.hpp +20 -23
  13. package/src/duckdb/extension/json/include/json_functions.hpp +4 -0
  14. package/src/duckdb/extension/json/include/json_scan.hpp +6 -2
  15. package/src/duckdb/extension/json/include/json_structure.hpp +12 -9
  16. package/src/duckdb/extension/json/json_common.cpp +66 -10
  17. package/src/duckdb/extension/json/json_extension.cpp +13 -5
  18. package/src/duckdb/extension/json/json_functions/json_array_length.cpp +1 -1
  19. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -4
  20. package/src/duckdb/extension/json/json_functions/json_exists.cpp +32 -0
  21. package/src/duckdb/extension/json/json_functions/json_extract.cpp +2 -2
  22. package/src/duckdb/extension/json/json_functions/json_keys.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_pretty.cpp +32 -0
  24. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +5 -1
  25. package/src/duckdb/extension/json/json_functions/json_structure.cpp +305 -94
  26. package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
  27. package/src/duckdb/extension/json/json_functions/json_type.cpp +3 -3
  28. package/src/duckdb/extension/json/json_functions/json_value.cpp +42 -0
  29. package/src/duckdb/extension/json/json_functions/read_json.cpp +16 -2
  30. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -2
  31. package/src/duckdb/extension/json/json_functions.cpp +5 -1
  32. package/src/duckdb/extension/json/json_scan.cpp +13 -12
  33. package/src/duckdb/extension/json/serialize_json.cpp +5 -3
  34. package/src/duckdb/extension/parquet/column_reader.cpp +206 -43
  35. package/src/duckdb/extension/parquet/column_writer.cpp +133 -62
  36. package/src/duckdb/extension/parquet/geo_parquet.cpp +391 -0
  37. package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +16 -5
  38. package/src/duckdb/extension/parquet/include/column_reader.hpp +37 -12
  39. package/src/duckdb/extension/parquet/include/column_writer.hpp +10 -11
  40. package/src/duckdb/extension/parquet/include/expression_column_reader.hpp +52 -0
  41. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +139 -0
  42. package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +13 -8
  43. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -0
  44. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +7 -3
  45. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +55 -8
  46. package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +3 -3
  47. package/src/duckdb/extension/parquet/include/parquet_rle_bp_encoder.hpp +1 -1
  48. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +8 -0
  49. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +21 -7
  50. package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +33 -11
  51. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +5 -2
  52. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +48 -14
  53. package/src/duckdb/extension/parquet/parquet_crypto.cpp +109 -61
  54. package/src/duckdb/extension/parquet/parquet_extension.cpp +305 -72
  55. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -4
  56. package/src/duckdb/extension/parquet/parquet_reader.cpp +151 -40
  57. package/src/duckdb/extension/parquet/parquet_statistics.cpp +50 -16
  58. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +42 -1
  59. package/src/duckdb/extension/parquet/parquet_writer.cpp +67 -75
  60. package/src/duckdb/extension/parquet/serialize_parquet.cpp +3 -1
  61. package/src/duckdb/extension/parquet/zstd_file_system.cpp +5 -1
  62. package/src/duckdb/src/catalog/catalog.cpp +14 -16
  63. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +14 -11
  64. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +39 -19
  65. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +92 -78
  66. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +10 -2
  67. package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +10 -3
  68. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +3 -3
  69. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +7 -7
  70. package/src/duckdb/src/catalog/catalog_entry.cpp +6 -3
  71. package/src/duckdb/src/catalog/catalog_set.cpp +14 -19
  72. package/src/duckdb/src/catalog/default/default_functions.cpp +179 -166
  73. package/src/duckdb/src/catalog/default/default_generator.cpp +24 -0
  74. package/src/duckdb/src/catalog/default/default_schemas.cpp +4 -3
  75. package/src/duckdb/src/catalog/default/default_table_functions.cpp +148 -0
  76. package/src/duckdb/src/catalog/default/default_views.cpp +7 -3
  77. package/src/duckdb/src/catalog/duck_catalog.cpp +7 -1
  78. package/src/duckdb/src/common/adbc/adbc.cpp +120 -58
  79. package/src/duckdb/src/common/allocator.cpp +71 -6
  80. package/src/duckdb/src/common/arrow/appender/bool_data.cpp +8 -7
  81. package/src/duckdb/src/common/arrow/appender/fixed_size_list_data.cpp +1 -1
  82. package/src/duckdb/src/common/arrow/appender/union_data.cpp +4 -5
  83. package/src/duckdb/src/common/arrow/arrow_appender.cpp +55 -21
  84. package/src/duckdb/src/common/arrow/arrow_converter.cpp +85 -10
  85. package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +142 -0
  86. package/src/duckdb/src/common/arrow/arrow_query_result.cpp +56 -0
  87. package/src/duckdb/src/common/arrow/physical_arrow_batch_collector.cpp +37 -0
  88. package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +128 -0
  89. package/src/duckdb/src/common/arrow/schema_metadata.cpp +101 -0
  90. package/src/duckdb/src/common/cgroups.cpp +189 -0
  91. package/src/duckdb/src/common/compressed_file_system.cpp +6 -3
  92. package/src/duckdb/src/common/encryption_state.cpp +38 -0
  93. package/src/duckdb/src/common/enum_util.cpp +682 -14
  94. package/src/duckdb/src/common/enums/file_compression_type.cpp +24 -0
  95. package/src/duckdb/src/common/enums/metric_type.cpp +208 -0
  96. package/src/duckdb/src/common/enums/optimizer_type.cpp +8 -2
  97. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  98. package/src/duckdb/src/common/enums/relation_type.cpp +4 -0
  99. package/src/duckdb/src/common/enums/statement_type.cpp +15 -0
  100. package/src/duckdb/src/common/error_data.cpp +22 -20
  101. package/src/duckdb/src/common/exception/binder_exception.cpp +5 -0
  102. package/src/duckdb/src/common/exception.cpp +11 -1
  103. package/src/duckdb/src/common/extra_type_info.cpp +3 -0
  104. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  105. package/src/duckdb/src/common/file_system.cpp +25 -3
  106. package/src/duckdb/src/common/filename_pattern.cpp +1 -0
  107. package/src/duckdb/src/common/fsst.cpp +15 -14
  108. package/src/duckdb/src/common/gzip_file_system.cpp +3 -1
  109. package/src/duckdb/src/common/hive_partitioning.cpp +103 -43
  110. package/src/duckdb/src/common/http_util.cpp +25 -0
  111. package/src/duckdb/src/common/local_file_system.cpp +48 -27
  112. package/src/duckdb/src/common/multi_file_list.cpp +113 -22
  113. package/src/duckdb/src/common/multi_file_reader.cpp +59 -58
  114. package/src/duckdb/src/common/operator/cast_operators.cpp +133 -34
  115. package/src/duckdb/src/common/operator/string_cast.cpp +42 -11
  116. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +2 -2
  117. package/src/duckdb/src/common/progress_bar/terminal_progress_bar_display.cpp +1 -1
  118. package/src/duckdb/src/common/radix_partitioning.cpp +31 -21
  119. package/src/duckdb/src/common/random_engine.cpp +4 -0
  120. package/src/duckdb/src/common/re2_regex.cpp +47 -12
  121. package/src/duckdb/src/common/render_tree.cpp +243 -0
  122. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +1 -1
  123. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
  124. package/src/duckdb/src/common/row_operations/row_matcher.cpp +58 -5
  125. package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +79 -43
  126. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
  127. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +6 -4
  128. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +18 -9
  129. package/src/duckdb/src/common/serializer/memory_stream.cpp +1 -0
  130. package/src/duckdb/src/common/sort/partition_state.cpp +33 -18
  131. package/src/duckdb/src/common/sort/radix_sort.cpp +22 -15
  132. package/src/duckdb/src/common/sort/sort_state.cpp +19 -16
  133. package/src/duckdb/src/common/sort/sorted_block.cpp +11 -10
  134. package/src/duckdb/src/common/string_util.cpp +167 -10
  135. package/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp +108 -0
  136. package/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp +267 -0
  137. package/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp +116 -0
  138. package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +482 -0
  139. package/src/duckdb/src/common/tree_renderer/tree_renderer.cpp +12 -0
  140. package/src/duckdb/src/common/tree_renderer.cpp +16 -508
  141. package/src/duckdb/src/common/types/batched_data_collection.cpp +78 -9
  142. package/src/duckdb/src/common/types/bit.cpp +24 -22
  143. package/src/duckdb/src/common/types/blob.cpp +15 -11
  144. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +18 -9
  145. package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
  146. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +3 -4
  147. package/src/duckdb/src/common/types/column/column_data_consumer.cpp +2 -2
  148. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +70 -21
  149. package/src/duckdb/src/common/types/data_chunk.cpp +10 -1
  150. package/src/duckdb/src/common/types/date.cpp +8 -19
  151. package/src/duckdb/src/common/types/decimal.cpp +3 -2
  152. package/src/duckdb/src/common/types/hugeint.cpp +11 -3
  153. package/src/duckdb/src/common/types/hyperloglog.cpp +212 -227
  154. package/src/duckdb/src/common/types/interval.cpp +1 -1
  155. package/src/duckdb/src/common/types/list_segment.cpp +83 -49
  156. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +22 -83
  157. package/src/duckdb/src/common/types/row/row_data_collection.cpp +2 -2
  158. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +20 -4
  159. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +28 -7
  160. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +29 -14
  161. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +152 -102
  162. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +4 -1
  163. package/src/duckdb/src/common/types/selection_vector.cpp +17 -1
  164. package/src/duckdb/src/common/types/time.cpp +62 -31
  165. package/src/duckdb/src/common/types/timestamp.cpp +70 -12
  166. package/src/duckdb/src/common/types/uuid.cpp +1 -1
  167. package/src/duckdb/src/common/types/validity_mask.cpp +40 -5
  168. package/src/duckdb/src/common/types/value.cpp +50 -8
  169. package/src/duckdb/src/common/types/varint.cpp +295 -0
  170. package/src/duckdb/src/common/types/vector.cpp +165 -54
  171. package/src/duckdb/src/common/types/vector_buffer.cpp +5 -4
  172. package/src/duckdb/src/common/types.cpp +106 -26
  173. package/src/duckdb/src/common/vector_operations/vector_copy.cpp +13 -25
  174. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +6 -0
  175. package/src/duckdb/src/common/virtual_file_system.cpp +3 -3
  176. package/src/duckdb/src/core_functions/aggregate/distributive/approx_count.cpp +35 -82
  177. package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +283 -46
  178. package/src/duckdb/src/core_functions/aggregate/distributive/bitagg.cpp +4 -4
  179. package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +3 -2
  180. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +226 -338
  181. package/src/duckdb/src/core_functions/aggregate/distributive/sum.cpp +2 -0
  182. package/src/duckdb/src/core_functions/aggregate/holistic/approx_top_k.cpp +388 -0
  183. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +63 -21
  184. package/src/duckdb/src/core_functions/aggregate/holistic/mad.cpp +330 -0
  185. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +136 -97
  186. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +601 -1485
  187. package/src/duckdb/src/core_functions/aggregate/nested/binned_histogram.cpp +405 -0
  188. package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +136 -165
  189. package/src/duckdb/src/core_functions/function_list.cpp +35 -8
  190. package/src/duckdb/src/core_functions/lambda_functions.cpp +5 -7
  191. package/src/duckdb/src/core_functions/scalar/array/array_functions.cpp +172 -198
  192. package/src/duckdb/src/core_functions/scalar/blob/create_sort_key.cpp +341 -54
  193. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +2 -2
  194. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +89 -29
  195. package/src/duckdb/src/core_functions/scalar/date/date_trunc.cpp +1 -1
  196. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +2 -2
  197. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +133 -71
  198. package/src/duckdb/src/core_functions/scalar/date/to_interval.cpp +1 -1
  199. package/src/duckdb/src/core_functions/scalar/enum/enum_functions.cpp +1 -1
  200. package/src/duckdb/src/core_functions/scalar/generic/can_implicitly_cast.cpp +40 -0
  201. package/src/duckdb/src/core_functions/scalar/generic/error.cpp +1 -1
  202. package/src/duckdb/src/core_functions/scalar/generic/least.cpp +161 -58
  203. package/src/duckdb/src/core_functions/scalar/generic/typeof.cpp +13 -0
  204. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +1 -1
  205. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +59 -75
  206. package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +93 -40
  207. package/src/duckdb/src/core_functions/scalar/list/list_has_any_or_all.cpp +227 -0
  208. package/src/duckdb/src/core_functions/scalar/list/list_reduce.cpp +20 -19
  209. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +0 -2
  210. package/src/duckdb/src/core_functions/scalar/list/list_value.cpp +106 -8
  211. package/src/duckdb/src/core_functions/scalar/map/map_contains.cpp +56 -0
  212. package/src/duckdb/src/core_functions/scalar/map/map_extract.cpp +73 -118
  213. package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +98 -2
  214. package/src/duckdb/src/core_functions/scalar/operators/bitwise.cpp +1 -2
  215. package/src/duckdb/src/core_functions/scalar/random/setseed.cpp +1 -1
  216. package/src/duckdb/src/core_functions/scalar/string/bar.cpp +1 -1
  217. package/src/duckdb/src/core_functions/scalar/string/hex.cpp +5 -1
  218. package/src/duckdb/src/core_functions/scalar/string/md5.cpp +10 -37
  219. package/src/duckdb/src/core_functions/scalar/string/printf.cpp +18 -2
  220. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +45 -0
  221. package/src/duckdb/src/core_functions/scalar/string/reverse.cpp +4 -5
  222. package/src/duckdb/src/core_functions/scalar/string/sha1.cpp +35 -0
  223. package/src/duckdb/src/core_functions/scalar/string/sha256.cpp +5 -2
  224. package/src/duckdb/src/core_functions/scalar/string/url_encode.cpp +49 -0
  225. package/src/duckdb/src/core_functions/scalar/struct/struct_pack.cpp +1 -2
  226. package/src/duckdb/src/core_functions/scalar/union/union_extract.cpp +4 -2
  227. package/src/duckdb/src/execution/adaptive_filter.cpp +30 -11
  228. package/src/duckdb/src/execution/aggregate_hashtable.cpp +13 -18
  229. package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +4 -9
  230. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  231. package/src/duckdb/src/execution/index/art/art.cpp +683 -670
  232. package/src/duckdb/src/execution/index/art/art_key.cpp +121 -38
  233. package/src/duckdb/src/execution/index/art/base_leaf.cpp +168 -0
  234. package/src/duckdb/src/execution/index/art/base_node.cpp +163 -0
  235. package/src/duckdb/src/execution/index/art/iterator.cpp +148 -77
  236. package/src/duckdb/src/execution/index/art/leaf.cpp +159 -263
  237. package/src/duckdb/src/execution/index/art/node.cpp +493 -247
  238. package/src/duckdb/src/execution/index/art/node256.cpp +31 -91
  239. package/src/duckdb/src/execution/index/art/node256_leaf.cpp +71 -0
  240. package/src/duckdb/src/execution/index/art/node48.cpp +75 -143
  241. package/src/duckdb/src/execution/index/art/prefix.cpp +424 -244
  242. package/src/duckdb/src/execution/index/bound_index.cpp +7 -1
  243. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +22 -18
  244. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +22 -73
  245. package/src/duckdb/src/execution/join_hashtable.cpp +637 -179
  246. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +4 -4
  247. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +15 -10
  248. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +13 -8
  249. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +525 -132
  250. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +147 -138
  251. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +531 -312
  252. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +1 -1
  253. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +4 -3
  254. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +9 -2
  255. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +13 -17
  256. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +60 -16
  257. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +105 -0
  258. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +24 -24
  259. package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +25 -2
  260. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +275 -112
  261. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +106 -11
  262. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +253 -115
  263. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +93 -52
  264. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +116 -76
  265. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +29 -14
  266. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +1 -1
  267. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +70 -26
  268. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +81 -60
  269. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +88 -50
  270. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +161 -51
  271. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +59 -17
  272. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +5 -5
  273. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +0 -21
  274. package/src/duckdb/src/execution/operator/helper/physical_buffered_batch_collector.cpp +109 -0
  275. package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +5 -13
  276. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +1 -1
  277. package/src/duckdb/src/execution/operator/helper/physical_load.cpp +12 -4
  278. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +0 -16
  279. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +4 -2
  280. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +5 -0
  281. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +3 -1
  282. package/src/duckdb/src/execution/operator/helper/physical_set_variable.cpp +39 -0
  283. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +4 -2
  284. package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +16 -5
  285. package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
  286. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
  287. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
  288. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +5 -4
  289. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +59 -21
  290. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +7 -4
  291. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +333 -176
  292. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +57 -34
  293. package/src/duckdb/src/execution/operator/join/physical_join.cpp +16 -8
  294. package/src/duckdb/src/execution/operator/join/physical_left_delim_join.cpp +10 -4
  295. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +2 -5
  296. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +3 -3
  297. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +5 -5
  298. package/src/duckdb/src/execution/operator/join/physical_right_delim_join.cpp +7 -2
  299. package/src/duckdb/src/execution/operator/order/physical_order.cpp +17 -12
  300. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +12 -9
  301. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +35 -17
  302. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +17 -11
  303. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +5 -1
  304. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +156 -47
  305. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +10 -2
  306. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +1 -3
  307. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -2
  308. package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +13 -6
  309. package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +22 -3
  310. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +19 -3
  311. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +37 -22
  312. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +77 -21
  313. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +27 -55
  314. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +41 -44
  315. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +4 -6
  316. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +4 -6
  317. package/src/duckdb/src/execution/operator/set/physical_union.cpp +18 -4
  318. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +3 -2
  319. package/src/duckdb/src/execution/physical_operator.cpp +45 -4
  320. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -7
  321. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +8 -3
  322. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +13 -6
  323. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +3 -3
  324. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +111 -19
  325. package/src/duckdb/src/execution/physical_plan/plan_limit.cpp +19 -2
  326. package/src/duckdb/src/execution/physical_plan/plan_set.cpp +9 -0
  327. package/src/duckdb/src/execution/physical_plan/plan_window.cpp +3 -1
  328. package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -3
  329. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +49 -49
  330. package/src/duckdb/src/execution/reservoir_sample.cpp +2 -2
  331. package/src/duckdb/src/execution/window_executor.cpp +556 -318
  332. package/src/duckdb/src/execution/window_segment_tree.cpp +1058 -485
  333. package/src/duckdb/src/function/aggregate/distributive/count.cpp +5 -5
  334. package/src/duckdb/src/function/aggregate/distributive/first.cpp +92 -95
  335. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +10 -9
  336. package/src/duckdb/src/function/aggregate_function.cpp +8 -0
  337. package/src/duckdb/src/function/cast/cast_function_set.cpp +10 -1
  338. package/src/duckdb/src/function/cast/decimal_cast.cpp +10 -1
  339. package/src/duckdb/src/function/cast/default_casts.cpp +2 -0
  340. package/src/duckdb/src/function/cast/numeric_casts.cpp +3 -0
  341. package/src/duckdb/src/function/cast/string_cast.cpp +8 -5
  342. package/src/duckdb/src/function/cast/time_casts.cpp +2 -2
  343. package/src/duckdb/src/function/cast/union_casts.cpp +1 -1
  344. package/src/duckdb/src/function/cast/varint_casts.cpp +283 -0
  345. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +3 -1
  346. package/src/duckdb/src/function/cast_rules.cpp +104 -15
  347. package/src/duckdb/src/function/compression_config.cpp +35 -33
  348. package/src/duckdb/src/function/copy_function.cpp +27 -0
  349. package/src/duckdb/src/function/function_binder.cpp +39 -11
  350. package/src/duckdb/src/function/macro_function.cpp +75 -32
  351. package/src/duckdb/src/function/pragma/pragma_queries.cpp +10 -0
  352. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -0
  353. package/src/duckdb/src/function/scalar/generic/binning.cpp +507 -0
  354. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +58 -0
  355. package/src/duckdb/src/function/scalar/generic_functions.cpp +1 -0
  356. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +33 -47
  357. package/src/duckdb/src/function/scalar/list/list_extract.cpp +70 -143
  358. package/src/duckdb/src/function/scalar/list/list_resize.cpp +93 -84
  359. package/src/duckdb/src/function/scalar/list/list_zip.cpp +3 -0
  360. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +24 -11
  361. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +4 -4
  362. package/src/duckdb/src/function/scalar/strftime_format.cpp +196 -57
  363. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +9 -7
  364. package/src/duckdb/src/function/scalar/string/concat.cpp +239 -123
  365. package/src/duckdb/src/function/scalar/string/concat_ws.cpp +149 -0
  366. package/src/duckdb/src/function/scalar/string/contains.cpp +18 -7
  367. package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
  368. package/src/duckdb/src/function/scalar/string/substring.cpp +6 -11
  369. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  370. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +7 -3
  371. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
  372. package/src/duckdb/src/function/scalar_function.cpp +5 -2
  373. package/src/duckdb/src/function/scalar_macro_function.cpp +2 -2
  374. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +20 -39
  375. package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +135 -0
  376. package/src/duckdb/src/function/table/arrow.cpp +194 -52
  377. package/src/duckdb/src/function/table/arrow_conversion.cpp +212 -69
  378. package/src/duckdb/src/function/table/copy_csv.cpp +43 -14
  379. package/src/duckdb/src/function/table/query_function.cpp +80 -0
  380. package/src/duckdb/src/function/table/range.cpp +222 -142
  381. package/src/duckdb/src/function/table/read_csv.cpp +25 -13
  382. package/src/duckdb/src/function/table/sniff_csv.cpp +55 -35
  383. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +141 -129
  384. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +25 -14
  385. package/src/duckdb/src/function/table/system/duckdb_functions.cpp +20 -14
  386. package/src/duckdb/src/function/table/system/duckdb_indexes.cpp +15 -1
  387. package/src/duckdb/src/function/table/system/duckdb_variables.cpp +84 -0
  388. package/src/duckdb/src/function/table/system/test_all_types.cpp +1 -0
  389. package/src/duckdb/src/function/table/system/test_vector_types.cpp +33 -3
  390. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  391. package/src/duckdb/src/function/table/table_scan.cpp +45 -22
  392. package/src/duckdb/src/function/table/unnest.cpp +2 -2
  393. package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -4
  394. package/src/duckdb/src/function/table_function.cpp +5 -4
  395. package/src/duckdb/src/function/table_macro_function.cpp +2 -2
  396. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +8 -4
  397. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +5 -2
  398. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +3 -0
  399. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +2 -2
  400. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +3 -4
  401. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +5 -5
  402. package/src/duckdb/src/include/duckdb/catalog/default/builtin_types/types.hpp +2 -1
  403. package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +4 -5
  404. package/src/duckdb/src/include/duckdb/catalog/default/default_generator.hpp +4 -5
  405. package/src/duckdb/src/include/duckdb/catalog/default/default_schemas.hpp +2 -1
  406. package/src/duckdb/src/include/duckdb/catalog/default/default_table_functions.hpp +47 -0
  407. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -0
  408. package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +2 -2
  409. package/src/duckdb/src/include/duckdb/common/allocator.hpp +9 -1
  410. package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +120 -0
  411. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +37 -11
  412. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +9 -8
  413. package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +1 -0
  414. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +6 -4
  415. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +92 -0
  416. package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +2 -2
  417. package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +26 -4
  418. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +90 -11
  419. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +6 -6
  420. package/src/duckdb/src/include/duckdb/common/arrow/arrow_buffer.hpp +8 -1
  421. package/src/duckdb/src/include/duckdb/common/arrow/arrow_merge_event.hpp +62 -0
  422. package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +52 -0
  423. package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +42 -0
  424. package/src/duckdb/src/include/duckdb/common/arrow/physical_arrow_batch_collector.hpp +30 -0
  425. package/src/duckdb/src/include/duckdb/common/arrow/physical_arrow_collector.hpp +65 -0
  426. package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +43 -0
  427. package/src/duckdb/src/include/duckdb/common/bswap.hpp +18 -16
  428. package/src/duckdb/src/include/duckdb/common/cgroups.hpp +30 -0
  429. package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -0
  430. package/src/duckdb/src/include/duckdb/common/dl.hpp +8 -1
  431. package/src/duckdb/src/include/duckdb/common/encryption_state.hpp +48 -0
  432. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +88 -0
  433. package/src/duckdb/src/include/duckdb/common/enums/checkpoint_type.hpp +2 -2
  434. package/src/duckdb/src/include/duckdb/common/enums/copy_overwrite_mode.hpp +6 -1
  435. package/src/duckdb/src/include/duckdb/common/enums/destroy_buffer_upon.hpp +21 -0
  436. package/src/duckdb/src/include/duckdb/common/enums/explain_format.hpp +17 -0
  437. package/src/duckdb/src/include/duckdb/common/enums/file_compression_type.hpp +4 -0
  438. package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +2 -2
  439. package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +88 -0
  440. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +6 -1
  441. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +2 -1
  442. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  443. package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
  444. package/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +3 -1
  445. package/src/duckdb/src/include/duckdb/common/enums/set_scope.hpp +2 -1
  446. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +23 -2
  447. package/src/duckdb/src/include/duckdb/common/enums/stream_execution_result.hpp +25 -0
  448. package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
  449. package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +1 -0
  450. package/src/duckdb/src/include/duckdb/common/error_data.hpp +5 -2
  451. package/src/duckdb/src/include/duckdb/common/exception/binder_exception.hpp +1 -0
  452. package/src/duckdb/src/include/duckdb/common/exception.hpp +20 -2
  453. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +12 -0
  454. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +2 -0
  455. package/src/duckdb/src/include/duckdb/common/file_open_flags.hpp +16 -0
  456. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +18 -0
  457. package/src/duckdb/src/include/duckdb/common/file_system.hpp +3 -0
  458. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +4 -0
  459. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +160 -96
  460. package/src/duckdb/src/include/duckdb/common/fsst.hpp +9 -2
  461. package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -8
  462. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +16 -7
  463. package/src/duckdb/src/include/duckdb/common/http_util.hpp +19 -0
  464. package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +19 -6
  465. package/src/duckdb/src/include/duckdb/common/limits.hpp +9 -2
  466. package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +38 -6
  467. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +9 -2
  468. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +5 -1
  469. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +82 -50
  470. package/src/duckdb/src/include/duckdb/common/operator/abs.hpp +11 -0
  471. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +7 -3
  472. package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +23 -1
  473. package/src/duckdb/src/include/duckdb/common/operator/double_cast_operator.hpp +2 -1
  474. package/src/duckdb/src/include/duckdb/common/operator/integer_cast_operator.hpp +1 -1
  475. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +4 -0
  476. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +2 -0
  477. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +10 -5
  478. package/src/duckdb/src/include/duckdb/common/optionally_owned_ptr.hpp +1 -0
  479. package/src/duckdb/src/include/duckdb/common/owning_string_map.hpp +155 -0
  480. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -3
  481. package/src/duckdb/src/include/duckdb/common/platform.hpp +58 -0
  482. package/src/duckdb/src/include/duckdb/common/radix.hpp +172 -27
  483. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +5 -1
  484. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +1 -0
  485. package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
  486. package/src/duckdb/src/include/duckdb/common/render_tree.hpp +77 -0
  487. package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +12 -0
  488. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +6 -2
  489. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +5 -3
  490. package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +15 -7
  491. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +3 -1
  492. package/src/duckdb/src/include/duckdb/common/serializer/serialization_data.hpp +245 -0
  493. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +10 -0
  494. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +10 -11
  495. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +12 -6
  496. package/src/duckdb/src/include/duckdb/common/string_util.hpp +37 -7
  497. package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +44 -0
  498. package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +44 -0
  499. package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +44 -0
  500. package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +119 -0
  501. package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +9 -123
  502. package/src/duckdb/src/include/duckdb/common/type_visitor.hpp +96 -0
  503. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +11 -1
  504. package/src/duckdb/src/include/duckdb/common/types/arrow_string_view_type.hpp +84 -0
  505. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +36 -1
  506. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +1 -1
  507. package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +2 -2
  508. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +4 -2
  509. package/src/duckdb/src/include/duckdb/common/types/column/partitioned_column_data.hpp +52 -0
  510. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -0
  511. package/src/duckdb/src/include/duckdb/common/types/date.hpp +0 -3
  512. package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +65 -0
  513. package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +5 -2
  514. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +49 -40
  515. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +5 -1
  516. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +2 -1
  517. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
  518. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +4 -3
  519. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +3 -1
  520. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
  521. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
  522. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +1 -1
  523. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +4 -0
  524. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +4 -1
  525. package/src/duckdb/src/include/duckdb/common/types/time.hpp +11 -6
  526. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +13 -3
  527. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +103 -12
  528. package/src/duckdb/src/include/duckdb/common/types/value.hpp +12 -3
  529. package/src/duckdb/src/include/duckdb/common/types/varint.hpp +107 -0
  530. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +5 -1
  531. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +7 -2
  532. package/src/duckdb/src/include/duckdb/common/types.hpp +6 -39
  533. package/src/duckdb/src/include/duckdb/common/union_by_name.hpp +42 -10
  534. package/src/duckdb/src/include/duckdb/common/vector_operations/generic_executor.hpp +29 -0
  535. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +0 -7
  536. package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +2 -0
  537. package/src/duckdb/src/include/duckdb/common/winapi.hpp +8 -0
  538. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +8 -4
  539. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/stddev.hpp +8 -4
  540. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +4 -2
  541. package/src/duckdb/src/include/duckdb/core_functions/aggregate/histogram_helpers.hpp +99 -0
  542. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +16 -7
  543. package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +396 -0
  544. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +10 -0
  545. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_helpers.hpp +65 -0
  546. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_sort_tree.hpp +349 -0
  547. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_state.hpp +300 -0
  548. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +1 -1
  549. package/src/duckdb/src/include/duckdb/core_functions/aggregate/sort_key_helpers.hpp +55 -0
  550. package/src/duckdb/src/include/duckdb/core_functions/array_kernels.hpp +107 -0
  551. package/src/duckdb/src/include/duckdb/core_functions/create_sort_key.hpp +55 -0
  552. package/src/duckdb/src/include/duckdb/core_functions/lambda_functions.hpp +1 -2
  553. package/src/duckdb/src/include/duckdb/core_functions/scalar/array_functions.hpp +24 -0
  554. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +9 -0
  555. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +27 -0
  556. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +80 -8
  557. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +9 -0
  558. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +54 -0
  559. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +30 -21
  560. package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +25 -14
  561. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +2 -48
  562. package/src/duckdb/src/include/duckdb/execution/executor.hpp +25 -2
  563. package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +102 -0
  564. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +94 -101
  565. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +43 -25
  566. package/src/duckdb/src/include/duckdb/execution/index/art/base_leaf.hpp +109 -0
  567. package/src/duckdb/src/include/duckdb/execution/index/art/base_node.hpp +140 -0
  568. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +43 -24
  569. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +41 -52
  570. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +133 -74
  571. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +46 -29
  572. package/src/duckdb/src/include/duckdb/execution/index/art/node256_leaf.hpp +53 -0
  573. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +52 -35
  574. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +96 -57
  575. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +9 -4
  576. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +48 -10
  577. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +0 -2
  578. package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +4 -2
  579. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +114 -36
  580. package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +158 -67
  581. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/aggregate_object.hpp +1 -1
  582. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
  583. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  584. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_streaming_window.hpp +19 -2
  585. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -1
  586. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +1 -1
  587. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +75 -0
  588. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +81 -23
  589. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +27 -8
  590. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +2 -1
  591. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +31 -22
  592. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +4 -2
  593. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +48 -5
  594. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +7 -3
  595. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +22 -12
  596. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +35 -0
  597. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +81 -39
  598. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +2 -1
  599. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +18 -1
  600. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +9 -7
  601. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +5 -4
  602. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/header_value.hpp +26 -0
  603. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +6 -9
  604. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/skip_scanner.hpp +3 -0
  605. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +5 -3
  606. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +36 -19
  607. package/src/duckdb/src/include/duckdb/execution/operator/filter/physical_filter.hpp +1 -1
  608. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +21 -0
  609. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +53 -0
  610. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_collector.hpp +3 -0
  611. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +6 -2
  612. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +18 -0
  613. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
  614. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +6 -0
  615. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +2 -2
  616. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set_variable.hpp +43 -0
  617. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +1 -1
  618. package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +59 -0
  619. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
  620. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_comparison_join.hpp +8 -1
  621. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +5 -2
  622. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +4 -2
  623. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -0
  624. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_join.hpp +1 -1
  625. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_left_delim_join.hpp +3 -1
  626. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +4 -1
  627. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_right_delim_join.hpp +3 -1
  628. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +1 -1
  629. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +1 -1
  630. package/src/duckdb/src/include/duckdb/execution/operator/persistent/batch_memory_manager.hpp +5 -37
  631. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +5 -4
  632. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +8 -2
  633. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +1 -1
  634. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -0
  635. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +9 -3
  636. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +8 -6
  637. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +2 -2
  638. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +1 -1
  639. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  640. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +21 -6
  641. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +3 -2
  642. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  643. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +137 -110
  644. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +57 -126
  645. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +21 -4
  646. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +1 -1
  647. package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +10 -10
  648. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +37 -7
  649. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +24 -11
  650. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +4 -4
  651. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +41 -1
  652. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +15 -5
  653. package/src/duckdb/src/include/duckdb/function/pragma/pragma_functions.hpp +1 -0
  654. package/src/duckdb/src/include/duckdb/function/replacement_scan.hpp +20 -4
  655. package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +6 -0
  656. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +77 -109
  657. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +1 -1
  658. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +6 -3
  659. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +25 -12
  660. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +9 -8
  661. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +38 -4
  662. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +1 -1
  663. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +11 -57
  664. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +142 -0
  665. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_datetime_type.hpp +18 -0
  666. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +7 -0
  667. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_variable_size_type.hpp +10 -0
  668. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
  669. package/src/duckdb/src/include/duckdb/function/table/range.hpp +4 -0
  670. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -1
  671. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  672. package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +5 -5
  673. package/src/duckdb/src/include/duckdb/function/table_function.hpp +14 -2
  674. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +1 -1
  675. package/src/duckdb/src/include/duckdb/main/appender.hpp +14 -4
  676. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +25 -7
  677. package/src/duckdb/src/include/duckdb/main/buffered_data/batched_buffered_data.hpp +79 -0
  678. package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +10 -20
  679. package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +11 -12
  680. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
  681. package/src/duckdb/src/include/duckdb/main/capi/cast/generic.hpp +1 -1
  682. package/src/duckdb/src/include/duckdb/main/capi/cast/utils.hpp +2 -2
  683. package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +809 -0
  684. package/src/duckdb/src/include/duckdb/main/chunk_scan_state/batched_data_collection.hpp +35 -0
  685. package/src/duckdb/src/include/duckdb/main/client_config.hpp +68 -2
  686. package/src/duckdb/src/include/duckdb/main/client_context.hpp +30 -22
  687. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +79 -1
  688. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +9 -3
  689. package/src/duckdb/src/include/duckdb/main/config.hpp +55 -7
  690. package/src/duckdb/src/include/duckdb/main/connection.hpp +5 -1
  691. package/src/duckdb/src/include/duckdb/main/database.hpp +16 -5
  692. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +9 -8
  693. package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +21 -6
  694. package/src/duckdb/src/include/duckdb/main/extension.hpp +20 -0
  695. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -0
  696. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +29 -23
  697. package/src/duckdb/src/include/duckdb/main/extension_install_info.hpp +6 -0
  698. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +3 -0
  699. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +4 -2
  700. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +5 -6
  701. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +2 -5
  702. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +87 -0
  703. package/src/duckdb/src/include/duckdb/main/profiling_node.hpp +60 -0
  704. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +72 -34
  705. package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
  706. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +2 -1
  707. package/src/duckdb/src/include/duckdb/main/relation/delim_get_relation.hpp +30 -0
  708. package/src/duckdb/src/include/duckdb/main/relation/explain_relation.hpp +3 -1
  709. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +3 -0
  710. package/src/duckdb/src/include/duckdb/main/relation/materialized_relation.hpp +1 -4
  711. package/src/duckdb/src/include/duckdb/main/relation/query_relation.hpp +4 -1
  712. package/src/duckdb/src/include/duckdb/main/relation/read_json_relation.hpp +6 -0
  713. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
  714. package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +2 -0
  715. package/src/duckdb/src/include/duckdb/main/relation.hpp +7 -4
  716. package/src/duckdb/src/include/duckdb/main/secret/default_secrets.hpp +36 -0
  717. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +108 -0
  718. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +14 -4
  719. package/src/duckdb/src/include/duckdb/main/settings.hpp +227 -3
  720. package/src/duckdb/src/include/duckdb/main/stream_query_result.hpp +8 -0
  721. package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +51 -0
  722. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +7 -0
  723. package/src/duckdb/src/include/duckdb/optimizer/cte_filter_pusher.hpp +46 -0
  724. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -1
  725. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +7 -0
  726. package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +31 -0
  727. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +51 -10
  728. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +1 -0
  729. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +17 -5
  730. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +1 -1
  731. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +15 -13
  732. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +9 -4
  733. package/src/duckdb/src/include/duckdb/optimizer/limit_pushdown.hpp +25 -0
  734. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +1 -0
  735. package/src/duckdb/src/include/duckdb/optimizer/rule/join_dependent_filter.hpp +37 -0
  736. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +6 -1
  737. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +54 -2
  738. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +27 -8
  739. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -0
  740. package/src/duckdb/src/include/duckdb/parallel/pipeline_prepare_finish_event.hpp +25 -0
  741. package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +63 -0
  742. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +10 -1
  743. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -1
  744. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +5 -0
  745. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +5 -0
  746. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +5 -0
  747. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +2 -0
  748. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +11 -1
  749. package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +9 -0
  750. package/src/duckdb/src/include/duckdb/parser/parsed_expression_iterator.hpp +13 -6
  751. package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +1 -1
  752. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -3
  753. package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +2 -0
  754. package/src/duckdb/src/include/duckdb/parser/statement/explain_statement.hpp +5 -1
  755. package/src/duckdb/src/include/duckdb/parser/statement/set_statement.hpp +2 -2
  756. package/src/duckdb/src/include/duckdb/parser/statement/transaction_statement.hpp +1 -1
  757. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +0 -2
  758. package/src/duckdb/src/include/duckdb/parser/tableref/column_data_ref.hpp +9 -7
  759. package/src/duckdb/src/include/duckdb/parser/tableref/delimgetref.hpp +37 -0
  760. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +4 -0
  761. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +0 -2
  762. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +0 -2
  763. package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +0 -1
  764. package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
  765. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +17 -9
  766. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -14
  767. package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +44 -0
  768. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +1 -1
  769. package/src/duckdb/src/include/duckdb/planner/expression/bound_between_expression.hpp +1 -1
  770. package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +1 -1
  771. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +1 -1
  772. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +1 -1
  773. package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +1 -1
  774. package/src/duckdb/src/include/duckdb/planner/expression/bound_conjunction_expression.hpp +1 -1
  775. package/src/duckdb/src/include/duckdb/planner/expression/bound_constant_expression.hpp +1 -1
  776. package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +1 -1
  777. package/src/duckdb/src/include/duckdb/planner/expression/bound_expanded_expression.hpp +1 -1
  778. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -1
  779. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +1 -1
  780. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +1 -1
  781. package/src/duckdb/src/include/duckdb/planner/expression/bound_operator_expression.hpp +1 -1
  782. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +2 -0
  783. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +1 -1
  784. package/src/duckdb/src/include/duckdb/planner/expression/bound_reference_expression.hpp +1 -1
  785. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +2 -2
  786. package/src/duckdb/src/include/duckdb/planner/expression/bound_unnest_expression.hpp +1 -1
  787. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +1 -1
  788. package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -2
  789. package/src/duckdb/src/include/duckdb/planner/expression_binder/column_alias_binder.hpp +2 -0
  790. package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +1 -0
  791. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +6 -5
  792. package/src/duckdb/src/include/duckdb/planner/expression_binder/where_binder.hpp +1 -0
  793. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +19 -11
  794. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
  795. package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
  796. package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
  797. package/src/duckdb/src/include/duckdb/planner/filter/struct_filter.hpp +2 -0
  798. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +7 -2
  799. package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +2 -1
  800. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -1
  801. package/src/duckdb/src/include/duckdb/planner/operator/logical_any_join.hpp +1 -1
  802. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +6 -1
  803. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +10 -2
  804. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +1 -0
  805. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -1
  806. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +1 -1
  807. package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -1
  808. package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +4 -2
  809. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +15 -5
  810. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +1 -0
  811. package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +1 -1
  812. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -1
  813. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +24 -2
  814. package/src/duckdb/src/include/duckdb/planner/tableref/bound_delimgetref.hpp +26 -0
  815. package/src/duckdb/src/include/duckdb/planner/tableref/bound_joinref.hpp +6 -0
  816. package/src/duckdb/src/include/duckdb/planner/tableref/bound_subqueryref.hpp +1 -1
  817. package/src/duckdb/src/include/duckdb/planner/tableref/bound_table_function.hpp +2 -0
  818. package/src/duckdb/src/include/duckdb/planner/tableref/list.hpp +2 -0
  819. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -1
  820. package/src/duckdb/src/include/duckdb/storage/block.hpp +4 -2
  821. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +48 -3
  822. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +21 -7
  823. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +65 -51
  824. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +14 -5
  825. package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +0 -4
  826. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +3 -2
  827. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +1 -0
  828. package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +3 -4
  829. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +2 -0
  830. package/src/duckdb/src/include/duckdb/storage/compression/alp/algorithm/alp.hpp +4 -4
  831. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +6 -4
  832. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +19 -17
  833. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +2 -2
  834. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_scan.hpp +3 -4
  835. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_utils.hpp +3 -2
  836. package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +3 -2
  837. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +13 -11
  838. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +19 -19
  839. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp +3 -4
  840. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  841. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  842. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +10 -2
  843. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +3 -2
  844. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +15 -0
  845. package/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +14 -10
  846. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +6 -8
  847. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +7 -4
  848. package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -7
  849. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +29 -4
  850. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +22 -7
  851. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +15 -2
  852. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +8 -2
  853. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +5 -16
  854. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +51 -13
  855. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +6 -3
  856. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +29 -19
  857. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +23 -7
  858. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +27 -18
  859. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +6 -3
  860. package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +5 -2
  861. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  862. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +5 -1
  863. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +77 -6
  864. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +23 -11
  865. package/src/duckdb/src/include/duckdb/storage/table/data_table_info.hpp +3 -0
  866. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +5 -2
  867. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +18 -4
  868. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +7 -1
  869. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
  870. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +89 -14
  871. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +4 -2
  872. package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +4 -2
  873. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +2 -2
  874. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -1
  875. package/src/duckdb/src/include/duckdb/storage/temporary_memory_manager.hpp +33 -15
  876. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +9 -9
  877. package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +3 -1
  878. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +4 -16
  879. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +27 -4
  880. package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +11 -0
  881. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +6 -2
  882. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +5 -5
  883. package/src/duckdb/src/include/duckdb/transaction/transaction_context.hpp +6 -2
  884. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +5 -3
  885. package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +48 -0
  886. package/src/duckdb/src/include/duckdb.h +1779 -739
  887. package/src/duckdb/src/include/duckdb_extension.h +921 -0
  888. package/src/duckdb/src/main/appender.cpp +53 -7
  889. package/src/duckdb/src/main/attached_database.cpp +87 -17
  890. package/src/duckdb/src/main/buffered_data/batched_buffered_data.cpp +226 -0
  891. package/src/duckdb/src/main/buffered_data/buffered_data.cpp +35 -0
  892. package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +48 -23
  893. package/src/duckdb/src/main/capi/aggregate_function-c.cpp +327 -0
  894. package/src/duckdb/src/main/capi/appender-c.cpp +18 -0
  895. package/src/duckdb/src/main/capi/cast/utils-c.cpp +2 -2
  896. package/src/duckdb/src/main/capi/cast_function-c.cpp +210 -0
  897. package/src/duckdb/src/main/capi/config-c.cpp +3 -3
  898. package/src/duckdb/src/main/capi/data_chunk-c.cpp +18 -7
  899. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +223 -24
  900. package/src/duckdb/src/main/capi/helper-c.cpp +51 -11
  901. package/src/duckdb/src/main/capi/logical_types-c.cpp +105 -46
  902. package/src/duckdb/src/main/capi/pending-c.cpp +7 -6
  903. package/src/duckdb/src/main/capi/prepared-c.cpp +18 -7
  904. package/src/duckdb/src/main/capi/profiling_info-c.cpp +84 -0
  905. package/src/duckdb/src/main/capi/result-c.cpp +139 -37
  906. package/src/duckdb/src/main/capi/scalar_function-c.cpp +269 -0
  907. package/src/duckdb/src/main/capi/table_description-c.cpp +82 -0
  908. package/src/duckdb/src/main/capi/table_function-c.cpp +161 -95
  909. package/src/duckdb/src/main/capi/value-c.cpp +2 -2
  910. package/src/duckdb/src/main/chunk_scan_state/batched_data_collection.cpp +57 -0
  911. package/src/duckdb/src/main/client_config.cpp +17 -0
  912. package/src/duckdb/src/main/client_context.cpp +67 -52
  913. package/src/duckdb/src/main/client_data.cpp +3 -3
  914. package/src/duckdb/src/main/config.cpp +120 -62
  915. package/src/duckdb/src/main/connection.cpp +14 -2
  916. package/src/duckdb/src/main/database.cpp +96 -35
  917. package/src/duckdb/src/main/database_manager.cpp +25 -23
  918. package/src/duckdb/src/main/database_path_and_type.cpp +2 -2
  919. package/src/duckdb/src/main/db_instance_cache.cpp +54 -19
  920. package/src/duckdb/src/main/extension/extension_helper.cpp +47 -42
  921. package/src/duckdb/src/main/extension/extension_install.cpp +155 -87
  922. package/src/duckdb/src/main/extension/extension_load.cpp +180 -26
  923. package/src/duckdb/src/main/extension/extension_util.cpp +8 -0
  924. package/src/duckdb/src/main/extension.cpp +72 -5
  925. package/src/duckdb/src/main/pending_query_result.cpp +20 -12
  926. package/src/duckdb/src/main/prepared_statement.cpp +6 -6
  927. package/src/duckdb/src/main/prepared_statement_data.cpp +28 -17
  928. package/src/duckdb/src/main/profiling_info.cpp +196 -0
  929. package/src/duckdb/src/main/query_profiler.cpp +413 -224
  930. package/src/duckdb/src/main/query_result.cpp +1 -1
  931. package/src/duckdb/src/main/relation/create_table_relation.cpp +4 -2
  932. package/src/duckdb/src/main/relation/create_view_relation.cpp +0 -6
  933. package/src/duckdb/src/main/relation/delim_get_relation.cpp +44 -0
  934. package/src/duckdb/src/main/relation/explain_relation.cpp +4 -3
  935. package/src/duckdb/src/main/relation/join_relation.cpp +5 -0
  936. package/src/duckdb/src/main/relation/limit_relation.cpp +1 -1
  937. package/src/duckdb/src/main/relation/materialized_relation.cpp +3 -3
  938. package/src/duckdb/src/main/relation/query_relation.cpp +42 -15
  939. package/src/duckdb/src/main/relation/read_csv_relation.cpp +7 -14
  940. package/src/duckdb/src/main/relation/read_json_relation.cpp +20 -0
  941. package/src/duckdb/src/main/relation/setop_relation.cpp +1 -1
  942. package/src/duckdb/src/main/relation/table_function_relation.cpp +6 -0
  943. package/src/duckdb/src/main/relation/view_relation.cpp +10 -0
  944. package/src/duckdb/src/main/relation.cpp +12 -8
  945. package/src/duckdb/src/main/secret/default_secrets.cpp +108 -0
  946. package/src/duckdb/src/main/secret/secret.cpp +145 -2
  947. package/src/duckdb/src/main/secret/secret_manager.cpp +85 -35
  948. package/src/duckdb/src/main/secret/secret_storage.cpp +29 -17
  949. package/src/duckdb/src/main/settings/settings.cpp +503 -11
  950. package/src/duckdb/src/main/stream_query_result.cpp +75 -2
  951. package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +248 -0
  952. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +28 -6
  953. package/src/duckdb/src/optimizer/compressed_materialization/compress_comparison_join.cpp +152 -0
  954. package/src/duckdb/src/optimizer/compressed_materialization.cpp +11 -1
  955. package/src/duckdb/src/optimizer/cse_optimizer.cpp +3 -0
  956. package/src/duckdb/src/optimizer/cte_filter_pusher.cpp +117 -0
  957. package/src/duckdb/src/optimizer/filter_combiner.cpp +30 -9
  958. package/src/duckdb/src/optimizer/filter_pullup.cpp +54 -2
  959. package/src/duckdb/src/optimizer/filter_pushdown.cpp +71 -3
  960. package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +154 -0
  961. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +245 -114
  962. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +42 -20
  963. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +6 -2
  964. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +32 -10
  965. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +97 -131
  966. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +265 -51
  967. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +21 -17
  968. package/src/duckdb/src/optimizer/limit_pushdown.cpp +42 -0
  969. package/src/duckdb/src/optimizer/optimizer.cpp +51 -8
  970. package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +17 -17
  971. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +22 -4
  972. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +1 -18
  973. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +6 -0
  974. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +4 -2
  975. package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +91 -0
  976. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +21 -25
  977. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +1 -0
  978. package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +3 -0
  979. package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +2 -2
  980. package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +8 -2
  981. package/src/duckdb/src/optimizer/rule/join_dependent_filter.cpp +135 -0
  982. package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +1 -1
  983. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +1 -1
  984. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +6 -1
  985. package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +7 -6
  986. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -1
  987. package/src/duckdb/src/optimizer/topn_optimizer.cpp +46 -7
  988. package/src/duckdb/src/parallel/executor.cpp +129 -51
  989. package/src/duckdb/src/parallel/executor_task.cpp +16 -3
  990. package/src/duckdb/src/parallel/meta_pipeline.cpp +98 -29
  991. package/src/duckdb/src/parallel/pipeline.cpp +17 -3
  992. package/src/duckdb/src/parallel/pipeline_executor.cpp +14 -2
  993. package/src/duckdb/src/parallel/pipeline_prepare_finish_event.cpp +34 -0
  994. package/src/duckdb/src/parallel/task_executor.cpp +84 -0
  995. package/src/duckdb/src/parallel/task_scheduler.cpp +94 -16
  996. package/src/duckdb/src/parallel/thread_context.cpp +1 -1
  997. package/src/duckdb/src/parser/expression/function_expression.cpp +14 -0
  998. package/src/duckdb/src/parser/expression/star_expression.cpp +35 -2
  999. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +5 -1
  1000. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +17 -0
  1001. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +37 -28
  1002. package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +44 -2
  1003. package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +21 -1
  1004. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +29 -25
  1005. package/src/duckdb/src/parser/parser.cpp +41 -1
  1006. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +1 -0
  1007. package/src/duckdb/src/parser/statement/explain_statement.cpp +28 -13
  1008. package/src/duckdb/src/parser/statement/relation_statement.cpp +5 -0
  1009. package/src/duckdb/src/parser/statement/set_statement.cpp +4 -2
  1010. package/src/duckdb/src/parser/statement/transaction_statement.cpp +3 -3
  1011. package/src/duckdb/src/parser/tableref/column_data_ref.cpp +1 -27
  1012. package/src/duckdb/src/parser/tableref/delimgetref.cpp +30 -0
  1013. package/src/duckdb/src/parser/tableref/joinref.cpp +4 -0
  1014. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +35 -29
  1015. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +32 -32
  1016. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +2 -1
  1017. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +17 -0
  1018. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +5 -0
  1019. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +36 -34
  1020. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +30 -14
  1021. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
  1022. package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +2 -1
  1023. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +27 -19
  1024. package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +31 -28
  1025. package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +25 -27
  1026. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +1 -1
  1027. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +53 -42
  1028. package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +6 -6
  1029. package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +1 -1
  1030. package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
  1031. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
  1032. package/src/duckdb/src/parser/transform/statement/transform_explain.cpp +38 -3
  1033. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +1 -2
  1034. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +1 -1
  1035. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +1 -1
  1036. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +26 -21
  1037. package/src/duckdb/src/parser/transform/statement/transform_set.cpp +8 -8
  1038. package/src/duckdb/src/parser/transform/statement/transform_show.cpp +5 -2
  1039. package/src/duckdb/src/parser/transform/statement/transform_show_select.cpp +6 -4
  1040. package/src/duckdb/src/parser/transform/statement/transform_transaction.cpp +27 -6
  1041. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +8 -9
  1042. package/src/duckdb/src/parser/transform/statement/transform_upsert.cpp +11 -12
  1043. package/src/duckdb/src/parser/transform/statement/transform_vacuum.cpp +3 -3
  1044. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +16 -10
  1045. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +1 -1
  1046. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  1047. package/src/duckdb/src/parser/transformer.cpp +11 -7
  1048. package/src/duckdb/src/planner/bind_context.cpp +3 -3
  1049. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +22 -7
  1050. package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +3 -3
  1051. package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +3 -2
  1052. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +11 -4
  1053. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +9 -54
  1054. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +3 -5
  1055. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -27
  1056. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +7 -7
  1057. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +9 -2
  1058. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +26 -7
  1059. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +5 -0
  1060. package/src/duckdb/src/planner/binder/expression/bind_unpacked_star_expression.cpp +91 -0
  1061. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +2 -2
  1062. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -8
  1063. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +1 -1
  1064. package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +6 -10
  1065. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +14 -10
  1066. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +3 -3
  1067. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +46 -7
  1068. package/src/duckdb/src/planner/binder/statement/bind_call.cpp +13 -20
  1069. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +105 -13
  1070. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +7 -3
  1071. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +75 -55
  1072. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -1
  1073. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +5 -4
  1074. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +2 -2
  1075. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +24 -8
  1076. package/src/duckdb/src/planner/binder/statement/bind_explain.cpp +2 -2
  1077. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +5 -105
  1078. package/src/duckdb/src/planner/binder/statement/bind_extension.cpp +2 -2
  1079. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +109 -41
  1080. package/src/duckdb/src/planner/binder/statement/bind_set.cpp +23 -7
  1081. package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +4 -1
  1082. package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +17 -3
  1083. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -4
  1084. package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +8 -6
  1085. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +55 -42
  1086. package/src/duckdb/src/planner/binder/tableref/bind_column_data_ref.cpp +3 -2
  1087. package/src/duckdb/src/planner/binder/tableref/bind_delimgetref.cpp +16 -0
  1088. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +31 -1
  1089. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +6 -0
  1090. package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +2 -0
  1091. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +106 -46
  1092. package/src/duckdb/src/planner/binder/tableref/plan_delimgetref.cpp +11 -0
  1093. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +15 -2
  1094. package/src/duckdb/src/planner/binder/tableref/plan_table_function.cpp +4 -0
  1095. package/src/duckdb/src/planner/binder.cpp +172 -15
  1096. package/src/duckdb/src/planner/collation_binding.cpp +99 -0
  1097. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -4
  1098. package/src/duckdb/src/planner/expression/bound_between_expression.cpp +1 -1
  1099. package/src/duckdb/src/planner/expression/bound_case_expression.cpp +1 -1
  1100. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +14 -12
  1101. package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +1 -1
  1102. package/src/duckdb/src/planner/expression/bound_comparison_expression.cpp +1 -1
  1103. package/src/duckdb/src/planner/expression/bound_conjunction_expression.cpp +1 -1
  1104. package/src/duckdb/src/planner/expression/bound_constant_expression.cpp +1 -1
  1105. package/src/duckdb/src/planner/expression/bound_expanded_expression.cpp +1 -1
  1106. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -2
  1107. package/src/duckdb/src/planner/expression/bound_lambda_expression.cpp +1 -1
  1108. package/src/duckdb/src/planner/expression/bound_lambdaref_expression.cpp +1 -1
  1109. package/src/duckdb/src/planner/expression/bound_operator_expression.cpp +1 -1
  1110. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +1 -1
  1111. package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +1 -1
  1112. package/src/duckdb/src/planner/expression/bound_subquery_expression.cpp +1 -1
  1113. package/src/duckdb/src/planner/expression/bound_unnest_expression.cpp +1 -1
  1114. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +6 -6
  1115. package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
  1116. package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -2
  1117. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +1 -1
  1118. package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +7 -0
  1119. package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +3 -3
  1120. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +26 -22
  1121. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +7 -1
  1122. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +2 -2
  1123. package/src/duckdb/src/planner/expression_binder/insert_binder.cpp +2 -2
  1124. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +2 -2
  1125. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +61 -43
  1126. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +2 -2
  1127. package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +4 -4
  1128. package/src/duckdb/src/planner/expression_binder/returning_binder.cpp +3 -2
  1129. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +10 -3
  1130. package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
  1131. package/src/duckdb/src/planner/expression_binder/where_binder.cpp +9 -2
  1132. package/src/duckdb/src/planner/expression_binder.cpp +121 -21
  1133. package/src/duckdb/src/planner/expression_iterator.cpp +26 -1
  1134. package/src/duckdb/src/planner/filter/conjunction_filter.cpp +33 -0
  1135. package/src/duckdb/src/planner/filter/constant_filter.cpp +15 -0
  1136. package/src/duckdb/src/planner/filter/null_filter.cpp +22 -0
  1137. package/src/duckdb/src/planner/filter/struct_filter.cpp +16 -0
  1138. package/src/duckdb/src/planner/logical_operator.cpp +24 -7
  1139. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +13 -7
  1140. package/src/duckdb/src/planner/operator/logical_any_join.cpp +5 -2
  1141. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +13 -5
  1142. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +64 -8
  1143. package/src/duckdb/src/planner/operator/logical_cteref.cpp +7 -0
  1144. package/src/duckdb/src/planner/operator/logical_distinct.cpp +6 -5
  1145. package/src/duckdb/src/planner/operator/logical_get.cpp +60 -18
  1146. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +7 -0
  1147. package/src/duckdb/src/planner/operator/logical_order.cpp +7 -4
  1148. package/src/duckdb/src/planner/operator/logical_top_n.cpp +2 -2
  1149. package/src/duckdb/src/planner/operator/logical_vacuum.cpp +1 -1
  1150. package/src/duckdb/src/planner/planner.cpp +2 -3
  1151. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +27 -10
  1152. package/src/duckdb/src/planner/table_filter.cpp +51 -0
  1153. package/src/duckdb/src/storage/arena_allocator.cpp +28 -10
  1154. package/src/duckdb/src/storage/block.cpp +3 -2
  1155. package/src/duckdb/src/storage/buffer/block_handle.cpp +29 -14
  1156. package/src/duckdb/src/storage/buffer/block_manager.cpp +6 -5
  1157. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +1 -1
  1158. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +264 -125
  1159. package/src/duckdb/src/storage/buffer_manager.cpp +5 -1
  1160. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +0 -6
  1161. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +26 -3
  1162. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +21 -9
  1163. package/src/duckdb/src/storage/checkpoint_manager.cpp +49 -24
  1164. package/src/duckdb/src/storage/compression/alp/alp.cpp +6 -11
  1165. package/src/duckdb/src/storage/compression/alprd.cpp +5 -9
  1166. package/src/duckdb/src/storage/compression/bitpacking.cpp +35 -31
  1167. package/src/duckdb/src/storage/compression/chimp/chimp.cpp +6 -8
  1168. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +71 -58
  1169. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +15 -13
  1170. package/src/duckdb/src/storage/compression/fsst.cpp +66 -53
  1171. package/src/duckdb/src/storage/compression/numeric_constant.cpp +4 -5
  1172. package/src/duckdb/src/storage/compression/patas.cpp +6 -17
  1173. package/src/duckdb/src/storage/compression/rle.cpp +20 -18
  1174. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +71 -52
  1175. package/src/duckdb/src/storage/compression/uncompressed.cpp +2 -2
  1176. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +8 -7
  1177. package/src/duckdb/src/storage/data_pointer.cpp +22 -0
  1178. package/src/duckdb/src/storage/data_table.cpp +41 -12
  1179. package/src/duckdb/src/storage/local_storage.cpp +22 -8
  1180. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +33 -17
  1181. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +4 -4
  1182. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +3 -3
  1183. package/src/duckdb/src/storage/partial_block_manager.cpp +19 -8
  1184. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +11 -8
  1185. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +1 -1
  1186. package/src/duckdb/src/storage/serialization/serialize_extension_install_info.cpp +2 -0
  1187. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +3 -3
  1188. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +19 -5
  1189. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +21 -1
  1190. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +4 -2
  1191. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +2 -2
  1192. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +2 -0
  1193. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +8 -4
  1194. package/src/duckdb/src/storage/serialization/serialize_types.cpp +4 -4
  1195. package/src/duckdb/src/storage/single_file_block_manager.cpp +170 -34
  1196. package/src/duckdb/src/storage/standard_buffer_manager.cpp +221 -64
  1197. package/src/duckdb/src/storage/statistics/column_statistics.cpp +4 -3
  1198. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +36 -26
  1199. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +4 -15
  1200. package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -8
  1201. package/src/duckdb/src/storage/statistics/struct_stats.cpp +2 -1
  1202. package/src/duckdb/src/storage/storage_info.cpp +34 -9
  1203. package/src/duckdb/src/storage/storage_manager.cpp +147 -74
  1204. package/src/duckdb/src/storage/table/array_column_data.cpp +37 -17
  1205. package/src/duckdb/src/storage/table/chunk_info.cpp +38 -0
  1206. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -6
  1207. package/src/duckdb/src/storage/table/column_data.cpp +252 -31
  1208. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -12
  1209. package/src/duckdb/src/storage/table/column_segment.cpp +63 -34
  1210. package/src/duckdb/src/storage/table/list_column_data.cpp +34 -15
  1211. package/src/duckdb/src/storage/table/row_group.cpp +228 -120
  1212. package/src/duckdb/src/storage/table/row_group_collection.cpp +122 -120
  1213. package/src/duckdb/src/storage/table/row_version_manager.cpp +27 -1
  1214. package/src/duckdb/src/storage/table/scan_state.cpp +101 -18
  1215. package/src/duckdb/src/storage/table/standard_column_data.cpp +20 -34
  1216. package/src/duckdb/src/storage/table/struct_column_data.cpp +39 -42
  1217. package/src/duckdb/src/storage/table/table_statistics.cpp +2 -1
  1218. package/src/duckdb/src/storage/table/update_segment.cpp +9 -8
  1219. package/src/duckdb/src/storage/table/validity_column_data.cpp +2 -2
  1220. package/src/duckdb/src/storage/table_index_list.cpp +8 -7
  1221. package/src/duckdb/src/storage/temporary_file_manager.cpp +11 -9
  1222. package/src/duckdb/src/storage/temporary_memory_manager.cpp +227 -39
  1223. package/src/duckdb/src/storage/wal_replay.cpp +68 -28
  1224. package/src/duckdb/src/storage/write_ahead_log.cpp +56 -47
  1225. package/src/duckdb/src/transaction/cleanup_state.cpp +9 -1
  1226. package/src/duckdb/src/transaction/commit_state.cpp +7 -170
  1227. package/src/duckdb/src/transaction/duck_transaction.cpp +87 -19
  1228. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +65 -10
  1229. package/src/duckdb/src/transaction/meta_transaction.cpp +18 -3
  1230. package/src/duckdb/src/transaction/transaction_context.cpp +21 -17
  1231. package/src/duckdb/src/transaction/undo_buffer.cpp +20 -14
  1232. package/src/duckdb/src/transaction/wal_write_state.cpp +292 -0
  1233. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +0 -1
  1234. package/src/duckdb/third_party/brotli/common/brotli_constants.h +204 -0
  1235. package/src/duckdb/third_party/brotli/common/brotli_platform.h +543 -0
  1236. package/src/duckdb/third_party/brotli/common/constants.cpp +17 -0
  1237. package/src/duckdb/third_party/brotli/common/context.cpp +156 -0
  1238. package/src/duckdb/third_party/brotli/common/context.h +110 -0
  1239. package/src/duckdb/third_party/brotli/common/dictionary.cpp +5912 -0
  1240. package/src/duckdb/third_party/brotli/common/dictionary.h +60 -0
  1241. package/src/duckdb/third_party/brotli/common/platform.cpp +24 -0
  1242. package/src/duckdb/third_party/brotli/common/shared_dictionary.cpp +517 -0
  1243. package/src/duckdb/third_party/brotli/common/shared_dictionary_internal.h +71 -0
  1244. package/src/duckdb/third_party/brotli/common/transform.cpp +287 -0
  1245. package/src/duckdb/third_party/brotli/common/transform.h +77 -0
  1246. package/src/duckdb/third_party/brotli/common/version.h +51 -0
  1247. package/src/duckdb/third_party/brotli/dec/bit_reader.cpp +74 -0
  1248. package/src/duckdb/third_party/brotli/dec/bit_reader.h +419 -0
  1249. package/src/duckdb/third_party/brotli/dec/decode.cpp +2758 -0
  1250. package/src/duckdb/third_party/brotli/dec/huffman.cpp +338 -0
  1251. package/src/duckdb/third_party/brotli/dec/huffman.h +118 -0
  1252. package/src/duckdb/third_party/brotli/dec/prefix.h +733 -0
  1253. package/src/duckdb/third_party/brotli/dec/state.cpp +178 -0
  1254. package/src/duckdb/third_party/brotli/dec/state.h +386 -0
  1255. package/src/duckdb/third_party/brotli/enc/backward_references.cpp +3775 -0
  1256. package/src/duckdb/third_party/brotli/enc/backward_references.h +36 -0
  1257. package/src/duckdb/third_party/brotli/enc/backward_references_hq.cpp +935 -0
  1258. package/src/duckdb/third_party/brotli/enc/backward_references_hq.h +92 -0
  1259. package/src/duckdb/third_party/brotli/enc/bit_cost.cpp +410 -0
  1260. package/src/duckdb/third_party/brotli/enc/bit_cost.h +60 -0
  1261. package/src/duckdb/third_party/brotli/enc/block_splitter.cpp +1653 -0
  1262. package/src/duckdb/third_party/brotli/enc/block_splitter.h +48 -0
  1263. package/src/duckdb/third_party/brotli/enc/brotli_bit_stream.cpp +1431 -0
  1264. package/src/duckdb/third_party/brotli/enc/brotli_bit_stream.h +85 -0
  1265. package/src/duckdb/third_party/brotli/enc/brotli_hash.h +4352 -0
  1266. package/src/duckdb/third_party/brotli/enc/brotli_params.h +47 -0
  1267. package/src/duckdb/third_party/brotli/enc/cluster.cpp +1025 -0
  1268. package/src/duckdb/third_party/brotli/enc/cluster.h +1017 -0
  1269. package/src/duckdb/third_party/brotli/enc/command.cpp +24 -0
  1270. package/src/duckdb/third_party/brotli/enc/command.h +187 -0
  1271. package/src/duckdb/third_party/brotli/enc/compound_dictionary.cpp +209 -0
  1272. package/src/duckdb/third_party/brotli/enc/compound_dictionary.h +75 -0
  1273. package/src/duckdb/third_party/brotli/enc/compress_fragment.cpp +796 -0
  1274. package/src/duckdb/third_party/brotli/enc/compress_fragment.h +82 -0
  1275. package/src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.cpp +653 -0
  1276. package/src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.h +68 -0
  1277. package/src/duckdb/third_party/brotli/enc/dictionary_hash.cpp +1844 -0
  1278. package/src/duckdb/third_party/brotli/enc/dictionary_hash.h +21 -0
  1279. package/src/duckdb/third_party/brotli/enc/encode.cpp +1990 -0
  1280. package/src/duckdb/third_party/brotli/enc/encoder_dict.cpp +636 -0
  1281. package/src/duckdb/third_party/brotli/enc/encoder_dict.h +153 -0
  1282. package/src/duckdb/third_party/brotli/enc/entropy_encode.cpp +500 -0
  1283. package/src/duckdb/third_party/brotli/enc/entropy_encode.h +119 -0
  1284. package/src/duckdb/third_party/brotli/enc/entropy_encode_static.h +538 -0
  1285. package/src/duckdb/third_party/brotli/enc/fast_log.cpp +101 -0
  1286. package/src/duckdb/third_party/brotli/enc/fast_log.h +63 -0
  1287. package/src/duckdb/third_party/brotli/enc/find_match_length.h +68 -0
  1288. package/src/duckdb/third_party/brotli/enc/histogram.cpp +96 -0
  1289. package/src/duckdb/third_party/brotli/enc/histogram.h +210 -0
  1290. package/src/duckdb/third_party/brotli/enc/literal_cost.cpp +176 -0
  1291. package/src/duckdb/third_party/brotli/enc/literal_cost.h +28 -0
  1292. package/src/duckdb/third_party/brotli/enc/memory.cpp +190 -0
  1293. package/src/duckdb/third_party/brotli/enc/memory.h +127 -0
  1294. package/src/duckdb/third_party/brotli/enc/metablock.cpp +1225 -0
  1295. package/src/duckdb/third_party/brotli/enc/metablock.h +102 -0
  1296. package/src/duckdb/third_party/brotli/enc/prefix.h +50 -0
  1297. package/src/duckdb/third_party/brotli/enc/quality.h +202 -0
  1298. package/src/duckdb/third_party/brotli/enc/ringbuffer.h +164 -0
  1299. package/src/duckdb/third_party/brotli/enc/state.h +106 -0
  1300. package/src/duckdb/third_party/brotli/enc/static_dict.cpp +538 -0
  1301. package/src/duckdb/third_party/brotli/enc/static_dict.h +37 -0
  1302. package/src/duckdb/third_party/brotli/enc/static_dict_lut.h +5862 -0
  1303. package/src/duckdb/third_party/brotli/enc/utf8_util.cpp +81 -0
  1304. package/src/duckdb/third_party/brotli/enc/utf8_util.h +29 -0
  1305. package/src/duckdb/third_party/brotli/enc/write_bits.h +84 -0
  1306. package/src/duckdb/third_party/brotli/include/brotli/decode.h +405 -0
  1307. package/src/duckdb/third_party/brotli/include/brotli/encode.h +489 -0
  1308. package/src/duckdb/third_party/brotli/include/brotli/port.h +238 -0
  1309. package/src/duckdb/third_party/brotli/include/brotli/shared_dictionary.h +96 -0
  1310. package/src/duckdb/third_party/brotli/include/brotli/types.h +83 -0
  1311. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +20 -4
  1312. package/src/duckdb/third_party/fmt/include/fmt/format.h +54 -10
  1313. package/src/duckdb/third_party/fsst/fsst.h +2 -2
  1314. package/src/duckdb/third_party/fsst/libfsst.hpp +2 -2
  1315. package/src/duckdb/third_party/httplib/httplib.hpp +6763 -5580
  1316. package/src/duckdb/third_party/hyperloglog/hyperloglog.cpp +13 -30
  1317. package/src/duckdb/third_party/hyperloglog/hyperloglog.hpp +8 -2
  1318. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +1 -0
  1319. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +22 -9
  1320. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1041 -554
  1321. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +1 -0
  1322. package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +2 -1
  1323. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +21605 -21752
  1324. package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +538 -299
  1325. package/src/duckdb/third_party/mbedtls/include/mbedtls/mbedtls_config.h +1 -0
  1326. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +36 -12
  1327. package/src/duckdb/third_party/mbedtls/library/md.cpp +6 -6
  1328. package/src/duckdb/third_party/mbedtls/library/sha1.cpp +2 -0
  1329. package/src/duckdb/third_party/mbedtls/library/sha256.cpp +3 -0
  1330. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +99 -47
  1331. package/src/duckdb/third_party/pcg/pcg_extras.hpp +1 -1
  1332. package/src/duckdb/third_party/re2/re2/prog.cc +2 -2
  1333. package/src/duckdb/third_party/snappy/snappy-internal.h +398 -0
  1334. package/src/duckdb/third_party/snappy/snappy-sinksource.cc +111 -9
  1335. package/src/duckdb/third_party/snappy/snappy-sinksource.h +158 -0
  1336. package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +523 -3
  1337. package/src/duckdb/third_party/snappy/snappy-stubs-public.h +34 -1
  1338. package/src/duckdb/third_party/snappy/snappy.cc +2626 -0
  1339. package/src/duckdb/third_party/snappy/snappy.h +223 -0
  1340. package/src/duckdb/third_party/snappy/snappy_version.hpp +11 -0
  1341. package/src/duckdb/third_party/utf8proc/include/utf8proc.hpp +69 -101
  1342. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +53 -0
  1343. package/src/duckdb/third_party/utf8proc/utf8proc.cpp +627 -678
  1344. package/src/duckdb/third_party/utf8proc/utf8proc_data.cpp +15008 -12868
  1345. package/src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp +185 -29
  1346. package/src/duckdb/ub_extension_json_json_functions.cpp +6 -0
  1347. package/src/duckdb/ub_src_catalog_default.cpp +4 -0
  1348. package/src/duckdb/ub_src_common.cpp +7 -1
  1349. package/src/duckdb/ub_src_common_arrow.cpp +10 -0
  1350. package/src/duckdb/ub_src_common_enums.cpp +2 -0
  1351. package/src/duckdb/ub_src_common_tree_renderer.cpp +10 -0
  1352. package/src/duckdb/ub_src_common_types.cpp +2 -0
  1353. package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +4 -0
  1354. package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +2 -0
  1355. package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +2 -0
  1356. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +2 -4
  1357. package/src/duckdb/ub_src_core_functions_scalar_map.cpp +2 -0
  1358. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +4 -0
  1359. package/src/duckdb/ub_src_execution_index_art.cpp +5 -3
  1360. package/src/duckdb/ub_src_execution_operator_csv_scanner_scanner.cpp +2 -0
  1361. package/src/duckdb/ub_src_execution_operator_helper.cpp +4 -0
  1362. package/src/duckdb/ub_src_function.cpp +4 -0
  1363. package/src/duckdb/ub_src_function_cast.cpp +2 -0
  1364. package/src/duckdb/ub_src_function_scalar_generic.cpp +4 -0
  1365. package/src/duckdb/ub_src_function_scalar_list.cpp +0 -2
  1366. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  1367. package/src/duckdb/ub_src_function_table.cpp +2 -0
  1368. package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
  1369. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  1370. package/src/duckdb/ub_src_main.cpp +4 -0
  1371. package/src/duckdb/ub_src_main_buffered_data.cpp +4 -0
  1372. package/src/duckdb/ub_src_main_capi.cpp +10 -0
  1373. package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
  1374. package/src/duckdb/ub_src_main_relation.cpp +2 -0
  1375. package/src/duckdb/ub_src_main_secret.cpp +2 -0
  1376. package/src/duckdb/ub_src_optimizer.cpp +8 -0
  1377. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +2 -0
  1378. package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
  1379. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  1380. package/src/duckdb/ub_src_parallel.cpp +4 -0
  1381. package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
  1382. package/src/duckdb/ub_src_planner.cpp +2 -0
  1383. package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
  1384. package/src/duckdb/ub_src_planner_binder_tableref.cpp +4 -0
  1385. package/src/duckdb/ub_src_storage_statistics.cpp +0 -2
  1386. package/src/duckdb/ub_src_transaction.cpp +2 -0
  1387. package/test/columns.test.ts +1 -1
  1388. package/test/prepare.test.ts +1 -1
  1389. package/test/test_all_types.test.ts +1 -1
@@ -1,6 +1,6 @@
1
1
  /* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
2
2
  /*
3
- * Copyright (c) 2014-2019 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
3
+ * Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
4
4
  * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
5
5
  *
6
6
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -27,7 +27,7 @@
27
27
  * Unicode data files.
28
28
  *
29
29
  * The original data files are available at
30
- * http://www.unicode.org/Public/UNIDATA/
30
+ * https://www.unicode.org/Public/UNIDATA/
31
31
  *
32
32
  * Please notice the copyright statement in the file "utf8proc_data.c".
33
33
  */
@@ -44,7 +44,6 @@
44
44
  #include "utf8proc.hpp"
45
45
 
46
46
  namespace duckdb {
47
-
48
47
  #ifndef SSIZE_MAX
49
48
  #define SSIZE_MAX ((size_t)SIZE_MAX/2)
50
49
  #endif
@@ -55,23 +54,23 @@ namespace duckdb {
55
54
  #include "utf8proc_data.cpp"
56
55
 
57
56
 
58
- // UTF8PROC_DLLEXPORT const utf8proc_int8_t utf8proc_utf8class[256] = {
59
- // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
- // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
- // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
- // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
- // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
- // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
- // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66
- // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
67
- // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
68
- // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69
- // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70
- // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71
- // 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
72
- // 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73
- // 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
74
- // 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 };
57
+ UTF8PROC_DLLEXPORT const utf8proc_int8_t utf8proc_utf8class[256] = {
58
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
67
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
68
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
71
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
72
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
73
+ 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 };
75
74
 
76
75
  #define UTF8PROC_HANGUL_SBASE 0xAC00
77
76
  #define UTF8PROC_HANGUL_LBASE 0x1100
@@ -94,755 +93,712 @@ namespace duckdb {
94
93
  #define UTF8PROC_HANGUL_S_END 0xD7A4
95
94
 
96
95
  /* Should follow semantic-versioning rules (semver.org) based on API
97
- compatibility. (Note that the shared-library version number will
98
- be different, being based on ABI compatibility.): */
96
+ compatibility. (Note that the shared-library version number will
97
+ be different, being based on ABI compatibility.): */
99
98
  #define STRINGIZEx(x) #x
100
99
  #define STRINGIZE(x) STRINGIZEx(x)
101
100
  UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
102
- return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
101
+ return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
103
102
  }
104
103
 
105
104
  UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) {
106
- return "12.1.0";
105
+ return "15.1.0";
107
106
  }
108
107
 
109
108
  UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
110
- switch (errcode) {
111
- case UTF8PROC_ERROR_NOMEM:
112
- return "Memory for processing UTF-8 data could not be allocated.";
113
- case UTF8PROC_ERROR_OVERFLOW:
114
- return "UTF-8 string is too long to be processed.";
115
- case UTF8PROC_ERROR_INVALIDUTF8:
116
- return "Invalid UTF-8 string";
117
- case UTF8PROC_ERROR_NOTASSIGNED:
118
- return "Unassigned Unicode code point found in UTF-8 string.";
119
- case UTF8PROC_ERROR_INVALIDOPTS:
120
- return "Invalid options for UTF-8 processing chosen.";
121
- default:
122
- return "An unknown error occurred while processing UTF-8 data.";
123
- }
109
+ switch (errcode) {
110
+ case UTF8PROC_ERROR_NOMEM:
111
+ return "Memory for processing UTF-8 data could not be allocated.";
112
+ case UTF8PROC_ERROR_OVERFLOW:
113
+ return "UTF-8 string is too long to be processed.";
114
+ case UTF8PROC_ERROR_INVALIDUTF8:
115
+ return "Invalid UTF-8 string";
116
+ case UTF8PROC_ERROR_NOTASSIGNED:
117
+ return "Unassigned Unicode code point found in UTF-8 string.";
118
+ case UTF8PROC_ERROR_INVALIDOPTS:
119
+ return "Invalid options for UTF-8 processing chosen.";
120
+ default:
121
+ return "An unknown error occurred while processing UTF-8 data.";
122
+ }
124
123
  }
125
124
 
126
125
  #define utf_cont(ch) (((ch) & 0xc0) == 0x80)
127
126
  UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
128
- const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
127
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
129
128
  ) {
130
- utf8proc_uint32_t uc;
131
- const utf8proc_uint8_t *end;
132
-
133
- *dst = -1;
134
- if (!strlen) return 0;
135
- end = str + ((strlen < 0) ? 4 : strlen);
136
- uc = *str++;
137
- if (uc < 0x80) {
138
- *dst = uc;
139
- return 1;
140
- }
141
- // Must be between 0xc2 and 0xf4 inclusive to be valid
142
- if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
143
- if (uc < 0xe0) { // 2-byte sequence
144
- // Must have valid continuation character
145
- if (str >= end || !utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
146
- *dst = ((uc & 0x1f)<<6) | (*str & 0x3f);
147
- return 2;
148
- }
149
- if (uc < 0xf0) { // 3-byte sequence
150
- if ((str + 1 >= end) || !utf_cont(*str) || !utf_cont(str[1]))
151
- return UTF8PROC_ERROR_INVALIDUTF8;
152
- // Check for surrogate chars
153
- if (uc == 0xed && *str > 0x9f)
154
- return UTF8PROC_ERROR_INVALIDUTF8;
155
- uc = ((uc & 0xf)<<12) | ((*str & 0x3f)<<6) | (str[1] & 0x3f);
156
- if (uc < 0x800)
157
- return UTF8PROC_ERROR_INVALIDUTF8;
158
- *dst = uc;
159
- return 3;
160
- }
161
- // 4-byte sequence
162
- // Must have 3 valid continuation characters
163
- if ((str + 2 >= end) || !utf_cont(*str) || !utf_cont(str[1]) || !utf_cont(str[2]))
164
- return UTF8PROC_ERROR_INVALIDUTF8;
165
- // Make sure in correct range (0x10000 - 0x10ffff)
166
- if (uc == 0xf0) {
167
- if (*str < 0x90) return UTF8PROC_ERROR_INVALIDUTF8;
168
- } else if (uc == 0xf4) {
169
- if (*str > 0x8f) return UTF8PROC_ERROR_INVALIDUTF8;
170
- }
171
- *dst = ((uc & 7)<<18) | ((*str & 0x3f)<<12) | ((str[1] & 0x3f)<<6) | (str[2] & 0x3f);
172
- return 4;
129
+ utf8proc_int32_t uc;
130
+ const utf8proc_uint8_t *end;
131
+
132
+ *dst = -1;
133
+ if (!strlen) return 0;
134
+ end = str + ((strlen < 0) ? 4 : strlen);
135
+ uc = *str++;
136
+ if (uc < 0x80) {
137
+ *dst = uc;
138
+ return 1;
139
+ }
140
+ // Must be between 0xc2 and 0xf4 inclusive to be valid
141
+ if ((utf8proc_uint32_t)(uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
142
+ if (uc < 0xe0) { // 2-byte sequence
143
+ // Must have valid continuation character
144
+ if (str >= end || !utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
145
+ *dst = ((uc & 0x1f)<<6) | (*str & 0x3f);
146
+ return 2;
147
+ }
148
+ if (uc < 0xf0) { // 3-byte sequence
149
+ if ((str + 1 >= end) || !utf_cont(*str) || !utf_cont(str[1]))
150
+ return UTF8PROC_ERROR_INVALIDUTF8;
151
+ // Check for surrogate chars
152
+ if (uc == 0xed && *str > 0x9f)
153
+ return UTF8PROC_ERROR_INVALIDUTF8;
154
+ uc = ((uc & 0xf)<<12) | ((*str & 0x3f)<<6) | (str[1] & 0x3f);
155
+ if (uc < 0x800)
156
+ return UTF8PROC_ERROR_INVALIDUTF8;
157
+ *dst = uc;
158
+ return 3;
159
+ }
160
+ // 4-byte sequence
161
+ // Must have 3 valid continuation characters
162
+ if ((str + 2 >= end) || !utf_cont(*str) || !utf_cont(str[1]) || !utf_cont(str[2]))
163
+ return UTF8PROC_ERROR_INVALIDUTF8;
164
+ // Make sure in correct range (0x10000 - 0x10ffff)
165
+ if (uc == 0xf0) {
166
+ if (*str < 0x90) return UTF8PROC_ERROR_INVALIDUTF8;
167
+ } else if (uc == 0xf4) {
168
+ if (*str > 0x8f) return UTF8PROC_ERROR_INVALIDUTF8;
169
+ }
170
+ *dst = ((uc & 7)<<18) | ((*str & 0x3f)<<12) | ((str[1] & 0x3f)<<6) | (str[2] & 0x3f);
171
+ return 4;
173
172
  }
174
173
 
175
174
  UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t uc) {
176
- return (((utf8proc_uint32_t)uc)-0xd800 > 0x07ff) && ((utf8proc_uint32_t)uc < 0x110000);
175
+ return (((utf8proc_uint32_t)uc)-0xd800 > 0x07ff) && ((utf8proc_uint32_t)uc < 0x110000);
177
176
  }
178
177
 
179
178
  UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
180
- if (uc < 0x00) {
181
- return 0;
182
- } else if (uc < 0x80) {
183
- dst[0] = (utf8proc_uint8_t) uc;
184
- return 1;
185
- } else if (uc < 0x800) {
186
- dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
187
- dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
188
- return 2;
189
- // Note: we allow encoding 0xd800-0xdfff here, so as not to change
190
- // the API, however, these are actually invalid in UTF-8
191
- } else if (uc < 0x10000) {
192
- dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
193
- dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
194
- dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
195
- return 3;
196
- } else if (uc < 0x110000) {
197
- dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18));
198
- dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
199
- dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
200
- dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
201
- return 4;
202
- } else return 0;
179
+ if (uc < 0x00) {
180
+ return 0;
181
+ } else if (uc < 0x80) {
182
+ dst[0] = (utf8proc_uint8_t) uc;
183
+ return 1;
184
+ } else if (uc < 0x800) {
185
+ dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
186
+ dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
187
+ return 2;
188
+ // Note: we allow encoding 0xd800-0xdfff here, so as not to change
189
+ // the API, however, these are actually invalid in UTF-8
190
+ } else if (uc < 0x10000) {
191
+ dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
192
+ dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
193
+ dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
194
+ return 3;
195
+ } else if (uc < 0x110000) {
196
+ dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18));
197
+ dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
198
+ dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
199
+ dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
200
+ return 4;
201
+ } else return 0;
203
202
  }
204
203
 
205
204
  /* internal version used for inserting 0xff bytes between graphemes */
206
205
  static utf8proc_ssize_t charbound_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
207
- if (uc < 0x00) {
208
- if (uc == -1) { /* internal value used for grapheme breaks */
209
- dst[0] = (utf8proc_uint8_t)0xFF;
210
- return 1;
211
- }
212
- return 0;
213
- } else if (uc < 0x80) {
214
- dst[0] = (utf8proc_uint8_t)uc;
215
- return 1;
216
- } else if (uc < 0x800) {
217
- dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
218
- dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
219
- return 2;
220
- } else if (uc < 0x10000) {
221
- dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
222
- dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
223
- dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
224
- return 3;
225
- } else if (uc < 0x110000) {
226
- dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18));
227
- dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
228
- dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
229
- dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
230
- return 4;
231
- } else return 0;
206
+ if (uc < 0x00) {
207
+ if (uc == -1) { /* internal value used for grapheme breaks */
208
+ dst[0] = (utf8proc_uint8_t)0xFF;
209
+ return 1;
210
+ }
211
+ return 0;
212
+ } else if (uc < 0x80) {
213
+ dst[0] = (utf8proc_uint8_t)uc;
214
+ return 1;
215
+ } else if (uc < 0x800) {
216
+ dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
217
+ dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
218
+ return 2;
219
+ } else if (uc < 0x10000) {
220
+ dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
221
+ dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
222
+ dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
223
+ return 3;
224
+ } else if (uc < 0x110000) {
225
+ dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18));
226
+ dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
227
+ dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
228
+ dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
229
+ return 4;
230
+ } else return 0;
232
231
  }
233
232
 
234
233
  /* internal "unsafe" version that does not check whether uc is in range */
235
234
  static const utf8proc_property_t *unsafe_get_property(utf8proc_int32_t uc) {
236
- /* ASSERT: uc >= 0 && uc < 0x110000 */
237
- return utf8proc_properties + (
238
- utf8proc_stage2table[
239
- utf8proc_stage1table[uc >> 8] + (uc & 0xFF)
240
- ]
241
- );
235
+ /* ASSERT: uc >= 0 && uc < 0x110000 */
236
+ return utf8proc_properties + (
237
+ utf8proc_stage2table[
238
+ utf8proc_stage1table[uc >> 8] + (uc & 0xFF)
239
+ ]
240
+ );
242
241
  }
243
242
 
244
243
  UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t uc) {
245
- return uc < 0 || uc >= 0x110000 ? utf8proc_properties : unsafe_get_property(uc);
244
+ return uc < 0 || uc >= 0x110000 ? utf8proc_properties : unsafe_get_property(uc);
246
245
  }
247
246
 
248
247
  /* return whether there is a grapheme break between boundclasses lbc and tbc
249
- (according to the definition of extended grapheme clusters)
248
+ (according to the definition of extended grapheme clusters)
250
249
 
251
- Rule numbering refers to TR29 Version 29 (Unicode 9.0.0):
252
- http://www.unicode.org/reports/tr29/tr29-29.html
250
+ Rule numbering refers to TR29 Version 29 (Unicode 9.0.0):
251
+ http://www.unicode.org/reports/tr29/tr29-29.html
253
252
 
254
- CAVEATS:
255
- Please note that evaluation of GB10 (grapheme breaks between emoji zwj sequences)
256
- and GB 12/13 (regional indicator code points) require knowledge of previous characters
257
- and are thus not handled by this function. This may result in an incorrect break before
258
- an E_Modifier class codepoint and an incorrectly missing break between two
259
- REGIONAL_INDICATOR class code points if such support does not exist in the caller.
253
+ CAVEATS:
254
+ Please note that evaluation of GB10 (grapheme breaks between emoji zwj sequences)
255
+ and GB 12/13 (regional indicator code points) require knowledge of previous characters
256
+ and are thus not handled by this function. This may result in an incorrect break before
257
+ an E_Modifier class codepoint and an incorrectly missing break between two
258
+ REGIONAL_INDICATOR class code points if such support does not exist in the caller.
260
259
 
261
- See the special support in grapheme_break_extended, for required bookkeeping by the caller.
260
+ See the special support in grapheme_break_extended, for required bookkeeping by the caller.
262
261
  */
263
262
  static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
264
- return
265
- (lbc == UTF8PROC_BOUNDCLASS_START) ? true : // GB1
266
- (lbc == UTF8PROC_BOUNDCLASS_CR && // GB3
267
- tbc == UTF8PROC_BOUNDCLASS_LF) ? false : // ---
268
- (lbc >= UTF8PROC_BOUNDCLASS_CR && lbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true : // GB4
269
- (tbc >= UTF8PROC_BOUNDCLASS_CR && tbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true : // GB5
270
- (lbc == UTF8PROC_BOUNDCLASS_L && // GB6
271
- (tbc == UTF8PROC_BOUNDCLASS_L || // ---
272
- tbc == UTF8PROC_BOUNDCLASS_V || // ---
273
- tbc == UTF8PROC_BOUNDCLASS_LV || // ---
274
- tbc == UTF8PROC_BOUNDCLASS_LVT)) ? false : // ---
275
- ((lbc == UTF8PROC_BOUNDCLASS_LV || // GB7
276
- lbc == UTF8PROC_BOUNDCLASS_V) && // ---
277
- (tbc == UTF8PROC_BOUNDCLASS_V || // ---
278
- tbc == UTF8PROC_BOUNDCLASS_T)) ? false : // ---
279
- ((lbc == UTF8PROC_BOUNDCLASS_LVT || // GB8
280
- lbc == UTF8PROC_BOUNDCLASS_T) && // ---
281
- tbc == UTF8PROC_BOUNDCLASS_T) ? false : // ---
282
- (tbc == UTF8PROC_BOUNDCLASS_EXTEND || // GB9
283
- tbc == UTF8PROC_BOUNDCLASS_ZWJ || // ---
284
- tbc == UTF8PROC_BOUNDCLASS_SPACINGMARK || // GB9a
285
- lbc == UTF8PROC_BOUNDCLASS_PREPEND) ? false : // GB9b
286
- (lbc == UTF8PROC_BOUNDCLASS_E_ZWG && // GB11 (requires additional handling below)
287
- tbc == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) ? false : // ----
288
- (lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR && // GB12/13 (requires additional handling below)
289
- tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false : // ----
290
- true; // GB999
291
- }
292
-
293
- utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
263
+ return
264
+ (lbc == UTF8PROC_BOUNDCLASS_START) ? true : // GB1
265
+ (lbc == UTF8PROC_BOUNDCLASS_CR && // GB3
266
+ tbc == UTF8PROC_BOUNDCLASS_LF) ? false : // ---
267
+ (lbc >= UTF8PROC_BOUNDCLASS_CR && lbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true : // GB4
268
+ (tbc >= UTF8PROC_BOUNDCLASS_CR && tbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true : // GB5
269
+ (lbc == UTF8PROC_BOUNDCLASS_L && // GB6
270
+ (tbc == UTF8PROC_BOUNDCLASS_L || // ---
271
+ tbc == UTF8PROC_BOUNDCLASS_V || // ---
272
+ tbc == UTF8PROC_BOUNDCLASS_LV || // ---
273
+ tbc == UTF8PROC_BOUNDCLASS_LVT)) ? false : // ---
274
+ ((lbc == UTF8PROC_BOUNDCLASS_LV || // GB7
275
+ lbc == UTF8PROC_BOUNDCLASS_V) && // ---
276
+ (tbc == UTF8PROC_BOUNDCLASS_V || // ---
277
+ tbc == UTF8PROC_BOUNDCLASS_T)) ? false : // ---
278
+ ((lbc == UTF8PROC_BOUNDCLASS_LVT || // GB8
279
+ lbc == UTF8PROC_BOUNDCLASS_T) && // ---
280
+ tbc == UTF8PROC_BOUNDCLASS_T) ? false : // ---
281
+ (tbc == UTF8PROC_BOUNDCLASS_EXTEND || // GB9
282
+ tbc == UTF8PROC_BOUNDCLASS_ZWJ || // ---
283
+ tbc == UTF8PROC_BOUNDCLASS_SPACINGMARK || // GB9a
284
+ lbc == UTF8PROC_BOUNDCLASS_PREPEND) ? false : // GB9b
285
+ (lbc == UTF8PROC_BOUNDCLASS_E_ZWG && // GB11 (requires additional handling below)
286
+ tbc == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) ? false : // ----
287
+ (lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR && // GB12/13 (requires additional handling below)
288
+ tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false : // ----
289
+ true; // GB999
290
+ }
291
+
292
+ static utf8proc_bool grapheme_break_extended(int lbc, int tbc, int licb, int ticb, utf8proc_int32_t *state)
294
293
  {
295
- int lbc_override = ((state && *state != UTF8PROC_BOUNDCLASS_START)
296
- ? *state : lbc);
297
- utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
298
- if (state) {
299
- // Special support for GB 12/13 made possible by GB999. After two RI
300
- // class codepoints we want to force a break. Do this by resetting the
301
- // second RI's bound class to UTF8PROC_BOUNDCLASS_OTHER, to force a break
302
- // after that character according to GB999 (unless of course such a break is
303
- // forbidden by a different rule such as GB9).
304
- if (*state == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
305
- *state = UTF8PROC_BOUNDCLASS_OTHER;
306
- // Special support for GB11 (emoji extend* zwj / emoji)
307
- else if (*state == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
308
- if (tbc == UTF8PROC_BOUNDCLASS_EXTEND) // fold EXTEND codepoints into emoji
309
- *state = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
310
- else if (tbc == UTF8PROC_BOUNDCLASS_ZWJ)
311
- *state = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
312
- else
313
- *state = tbc;
314
- }
315
- else
316
- *state = tbc;
317
- }
318
- return break_permitted;
294
+ if (state) {
295
+ int state_bc, state_icb; /* boundclass and indic_conjunct_break state */
296
+ if (*state == 0) { /* state initialization */
297
+ state_bc = lbc;
298
+ state_icb = licb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT ? licb : UTF8PROC_INDIC_CONJUNCT_BREAK_NONE;
299
+ }
300
+ else { /* lbc and licb are already encoded in *state */
301
+ state_bc = *state & 0xff; // 1st byte of state is bound class
302
+ state_icb = *state >> 8; // 2nd byte of state is indic conjunct break
303
+ }
304
+
305
+ utf8proc_bool break_permitted = grapheme_break_simple(state_bc, tbc) &&
306
+ !(state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER
307
+ && ticb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT); // GB9c
308
+
309
+ // Special support for GB9c. Don't break between two consonants
310
+ // separated 1+ linker characters and 0+ extend characters in any order.
311
+ // After a consonant, we enter LINKER state after at least one linker.
312
+ if (ticb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT
313
+ || state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT
314
+ || state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND)
315
+ state_icb = ticb;
316
+ else if (state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER)
317
+ state_icb = ticb == UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND ?
318
+ UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER : ticb;
319
+
320
+ // Special support for GB 12/13 made possible by GB999. After two RI
321
+ // class codepoints we want to force a break. Do this by resetting the
322
+ // second RI's bound class to UTF8PROC_BOUNDCLASS_OTHER, to force a break
323
+ // after that character according to GB999 (unless of course such a break is
324
+ // forbidden by a different rule such as GB9).
325
+ if (state_bc == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
326
+ state_bc = UTF8PROC_BOUNDCLASS_OTHER;
327
+ // Special support for GB11 (emoji extend* zwj / emoji)
328
+ else if (state_bc == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
329
+ if (tbc == UTF8PROC_BOUNDCLASS_EXTEND) // fold EXTEND codepoints into emoji
330
+ state_bc = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
331
+ else if (tbc == UTF8PROC_BOUNDCLASS_ZWJ)
332
+ state_bc = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
333
+ else
334
+ state_bc = tbc;
335
+ }
336
+ else
337
+ state_bc = tbc;
338
+
339
+ *state = state_bc + (state_icb << 8);
340
+ return break_permitted;
341
+ }
342
+ else
343
+ return grapheme_break_simple(lbc, tbc);
319
344
  }
320
345
 
321
346
  UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
322
- utf8proc_int32_t c1, utf8proc_int32_t c2, utf8proc_int32_t *state) {
347
+ utf8proc_int32_t c1, utf8proc_int32_t c2, utf8proc_int32_t *state) {
323
348
 
324
- return grapheme_break_extended(utf8proc_get_property(c1)->boundclass,
325
- utf8proc_get_property(c2)->boundclass,
326
- state);
349
+ const utf8proc_property_t *p1 = utf8proc_get_property(c1);
350
+ const utf8proc_property_t *p2 = utf8proc_get_property(c2);
351
+ return grapheme_break_extended(p1->boundclass,
352
+ p2->boundclass,
353
+ p1->indic_conjunct_break,
354
+ p2->indic_conjunct_break,
355
+ state);
327
356
  }
328
357
 
329
358
 
330
359
  UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(
331
- utf8proc_int32_t c1, utf8proc_int32_t c2) {
332
- return utf8proc_grapheme_break_stateful(c1, c2, NULL);
333
- }
334
-
335
- // from http://www.zedwood.com/article/cpp-utf8-char-to-codepoint
336
- UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_codepoint(const char *u_input, int &sz) {
337
- auto u = (const unsigned char *) u_input;
338
- unsigned char u0 = u[0];
339
- if (u0<=127) {
340
- sz = 1;
341
- return u0;
342
- }
343
- unsigned char u1 = u[1];
344
- if (u0>=192 && u0<=223) {
345
- sz = 2;
346
- return (u0-192)*64 + (u1-128);
347
- }
348
- if (u[0]==0xed && (u[1] & 0xa0) == 0xa0) {
349
- return -1; //code points, 0xd800 to 0xdfff
350
- }
351
- unsigned char u2 = u[2];
352
- if (u0>=224 && u0<=239) {
353
- sz = 3;
354
- return (u0-224)*4096 + (u1-128)*64 + (u2-128);
355
- }
356
- unsigned char u3 = u[3];
357
- if (u0>=240 && u0<=247) {
358
- sz = 4;
359
- return (u0-240)*262144 + (u1-128)*4096 + (u2-128)*64 + (u3-128);
360
- }
361
- return -1;
362
- }
363
-
364
- bool utf8proc_codepoint_to_utf8(int cp, int &sz, char *c) {
365
- if (cp<=0x7F) {
366
- sz = 1;
367
- c[0] = cp;
368
- } else if(cp<=0x7FF) {
369
- sz = 2;
370
- c[0] = (cp>>6)+192;
371
- c[1] = (cp&63)+128;
372
- } else if(0xd800<=cp && cp<=0xdfff) {
373
- sz = -1;
374
- // invalid block of utf
375
- return false;
376
- } else if(cp<=0xFFFF) {
377
- sz = 3;
378
- c[0] = (cp>>12)+224;
379
- c[1]= ((cp>>6)&63)+128;
380
- c[2]=(cp&63)+128;
381
- } else if(cp<=0x10FFFF) {
382
- sz = 4;
383
- c[0] = (cp>>18)+240;
384
- c[1] = ((cp>>12)&63)+128;
385
- c[2] = ((cp>>6)&63)+128;
386
- c[3]=(cp&63)+128;
387
- } else {
388
- sz = -1;
389
- return false;
390
- }
391
- return true;
392
- }
393
-
394
- int utf8proc_codepoint_length(int cp) {
395
- if (cp<=0x7F) {
396
- return 1;
397
- } else if(cp<=0x7FF) {
398
- return 2;
399
- } else if(0xd800<=cp && cp<=0xdfff) {
400
- return -1;
401
- } else if(cp<=0xFFFF) {
402
- return 3;
403
- } else if(cp<=0x10FFFF) {
404
- return 4;
405
- }
406
- return -1;
407
- }
408
-
409
- size_t utf8proc_next_grapheme(const char *s, size_t len, size_t cpos) {
410
- int sz;
411
- int boundclass = UTF8PROC_BOUNDCLASS_START;
412
- int initial = utf8proc_get_property(utf8proc_codepoint(s + cpos, sz))->boundclass;
413
- grapheme_break_extended(boundclass, initial, &boundclass);
414
- while(true) {
415
- cpos += sz;
416
- if (cpos >= len) {
417
- return cpos;
418
- }
419
- int next = utf8proc_get_property(utf8proc_codepoint(s + cpos, sz))->boundclass;
420
- if (grapheme_break_extended(boundclass, next, &boundclass)) {
421
- return cpos;
422
- }
423
- }
360
+ utf8proc_int32_t c1, utf8proc_int32_t c2) {
361
+ return utf8proc_grapheme_break_stateful(c1, c2, NULL);
424
362
  }
425
363
 
426
364
  static utf8proc_int32_t seqindex_decode_entry(const utf8proc_uint16_t **entry)
427
365
  {
428
- utf8proc_int32_t entry_cp = **entry;
429
- if ((entry_cp & 0xF800) == 0xD800) {
430
- *entry = *entry + 1;
431
- entry_cp = ((entry_cp & 0x03FF) << 10) | (**entry & 0x03FF);
432
- entry_cp += 0x10000;
433
- }
434
- return entry_cp;
366
+ utf8proc_int32_t entry_cp = **entry;
367
+ if ((entry_cp & 0xF800) == 0xD800) {
368
+ *entry = *entry + 1;
369
+ entry_cp = ((entry_cp & 0x03FF) << 10) | (**entry & 0x03FF);
370
+ entry_cp += 0x10000;
371
+ }
372
+ return entry_cp;
435
373
  }
436
374
 
437
375
  static utf8proc_int32_t seqindex_decode_index(const utf8proc_uint32_t seqindex)
438
376
  {
439
- const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex];
440
- return seqindex_decode_entry(&entry);
377
+ const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex];
378
+ return seqindex_decode_entry(&entry);
441
379
  }
442
380
 
443
381
  static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqindex, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
444
- utf8proc_ssize_t written = 0;
445
- const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x1FFF];
446
- int len = seqindex >> 13;
447
- if (len >= 7) {
448
- len = *entry;
449
- entry++;
450
- }
451
- for (; len >= 0; entry++, len--) {
452
- utf8proc_int32_t entry_cp = seqindex_decode_entry(&entry);
453
- utf8proc_int32_t *dst_ptr = dst ? dst + written : nullptr;
454
- written += utf8proc_decompose_char(entry_cp, dst_ptr,
455
- (bufsize > written) ? (bufsize - written) : 0, options,
456
- last_boundclass);
457
- if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
458
- }
459
- return written;
382
+ utf8proc_ssize_t written = 0;
383
+ const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x3FFF];
384
+ int len = seqindex >> 14;
385
+ if (len >= 3) {
386
+ len = *entry;
387
+ entry++;
388
+ }
389
+ for (; len >= 0; entry++, len--) {
390
+ utf8proc_int32_t entry_cp = seqindex_decode_entry(&entry);
391
+
392
+ written += utf8proc_decompose_char(entry_cp, dst ? dst+written : nullptr,
393
+ (bufsize > written) ? (bufsize - written) : 0, options,
394
+ last_boundclass);
395
+ if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
396
+ }
397
+ return written;
460
398
  }
461
399
 
462
400
  UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
463
401
  {
464
- utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_seqindex;
465
- return cl != UINT16_MAX ? seqindex_decode_index(cl) : c;
402
+ utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_seqindex;
403
+ return cl != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cl) : c;
466
404
  }
467
405
 
468
406
  UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
469
407
  {
470
- utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_seqindex;
471
- return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
408
+ utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_seqindex;
409
+ return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
472
410
  }
473
411
 
474
412
  UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
475
413
  {
476
- utf8proc_int32_t cu = utf8proc_get_property(c)->titlecase_seqindex;
477
- return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
414
+ utf8proc_int32_t cu = utf8proc_get_property(c)->titlecase_seqindex;
415
+ return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
416
+ }
417
+
418
+ UTF8PROC_DLLEXPORT int utf8proc_islower(utf8proc_int32_t c)
419
+ {
420
+ const utf8proc_property_t *p = utf8proc_get_property(c);
421
+ return p->lowercase_seqindex != p->uppercase_seqindex && p->lowercase_seqindex == UINT16_MAX;
422
+ }
423
+
424
+ UTF8PROC_DLLEXPORT int utf8proc_isupper(utf8proc_int32_t c)
425
+ {
426
+ const utf8proc_property_t *p = utf8proc_get_property(c);
427
+ return p->lowercase_seqindex != p->uppercase_seqindex && p->uppercase_seqindex == UINT16_MAX && p->category != UTF8PROC_CATEGORY_LT;
478
428
  }
479
429
 
480
430
  /* return a character width analogous to wcwidth (except portable and
481
- hopefully less buggy than most system wcwidth functions). */
431
+ hopefully less buggy than most system wcwidth functions). */
482
432
  UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) {
483
- return utf8proc_get_property(c)->charwidth;
433
+ return utf8proc_get_property(c)->charwidth;
484
434
  }
485
435
 
486
436
  UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) {
487
- return (utf8proc_category_t)utf8proc_get_property(c)->category;
437
+ return (utf8proc_category_t) utf8proc_get_property(c)->category;
488
438
  }
489
439
 
490
440
  UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
491
- static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"};
492
- return s[utf8proc_category(c)];
441
+ static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"};
442
+ return s[utf8proc_category(c)];
493
443
  }
494
444
 
495
445
  #define utf8proc_decompose_lump(replacement_uc) \
496
- return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
497
- (utf8proc_option_t) (options & ~UTF8PROC_LUMP), last_boundclass)
446
+ return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
447
+ (utf8proc_option_t)(options & ~(unsigned int)UTF8PROC_LUMP), last_boundclass)
498
448
 
499
449
  UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
500
- const utf8proc_property_t *property;
501
- utf8proc_propval_t category;
502
- utf8proc_int32_t hangul_sindex;
503
- if (uc < 0 || uc >= 0x110000) return UTF8PROC_ERROR_NOTASSIGNED;
504
- property = unsafe_get_property(uc);
505
- category = property->category;
506
- hangul_sindex = uc - UTF8PROC_HANGUL_SBASE;
507
- if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
508
- if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT) {
509
- utf8proc_int32_t hangul_tindex;
510
- if (bufsize >= 1) {
511
- dst[0] = UTF8PROC_HANGUL_LBASE +
512
- hangul_sindex / UTF8PROC_HANGUL_NCOUNT;
513
- if (bufsize >= 2) dst[1] = UTF8PROC_HANGUL_VBASE +
514
- (hangul_sindex % UTF8PROC_HANGUL_NCOUNT) / UTF8PROC_HANGUL_TCOUNT;
515
- }
516
- hangul_tindex = hangul_sindex % UTF8PROC_HANGUL_TCOUNT;
517
- if (!hangul_tindex) return 2;
518
- if (bufsize >= 3) dst[2] = UTF8PROC_HANGUL_TBASE + hangul_tindex;
519
- return 3;
520
- }
521
- }
522
- if (options & UTF8PROC_REJECTNA) {
523
- if (!category) return UTF8PROC_ERROR_NOTASSIGNED;
524
- }
525
- if (options & UTF8PROC_IGNORE) {
526
- if (property->ignorable) return 0;
527
- }
528
- if (options & UTF8PROC_STRIPNA) {
529
- if (!category) return 0;
530
- }
531
- if (options & UTF8PROC_LUMP) {
532
- if (category == UTF8PROC_CATEGORY_ZS) utf8proc_decompose_lump(0x0020);
533
- if (uc == 0x2018 || uc == 0x2019 || uc == 0x02BC || uc == 0x02C8)
534
- utf8proc_decompose_lump(0x0027);
535
- if (category == UTF8PROC_CATEGORY_PD || uc == 0x2212)
536
- utf8proc_decompose_lump(0x002D);
537
- if (uc == 0x2044 || uc == 0x2215) utf8proc_decompose_lump(0x002F);
538
- if (uc == 0x2236) utf8proc_decompose_lump(0x003A);
539
- if (uc == 0x2039 || uc == 0x2329 || uc == 0x3008)
540
- utf8proc_decompose_lump(0x003C);
541
- if (uc == 0x203A || uc == 0x232A || uc == 0x3009)
542
- utf8proc_decompose_lump(0x003E);
543
- if (uc == 0x2216) utf8proc_decompose_lump(0x005C);
544
- if (uc == 0x02C4 || uc == 0x02C6 || uc == 0x2038 || uc == 0x2303)
545
- utf8proc_decompose_lump(0x005E);
546
- if (category == UTF8PROC_CATEGORY_PC || uc == 0x02CD)
547
- utf8proc_decompose_lump(0x005F);
548
- if (uc == 0x02CB) utf8proc_decompose_lump(0x0060);
549
- if (uc == 0x2223) utf8proc_decompose_lump(0x007C);
550
- if (uc == 0x223C) utf8proc_decompose_lump(0x007E);
551
- if ((options & UTF8PROC_NLF2LS) && (options & UTF8PROC_NLF2PS)) {
552
- if (category == UTF8PROC_CATEGORY_ZL ||
553
- category == UTF8PROC_CATEGORY_ZP)
554
- utf8proc_decompose_lump(0x000A);
555
- }
556
- }
557
- if (options & UTF8PROC_STRIPMARK) {
558
- if (category == UTF8PROC_CATEGORY_MN ||
559
- category == UTF8PROC_CATEGORY_MC ||
560
- category == UTF8PROC_CATEGORY_ME) return 0;
561
- }
562
- if (options & UTF8PROC_CASEFOLD) {
563
- if (property->casefold_seqindex != UINT16_MAX) {
564
- return seqindex_write_char_decomposed(property->casefold_seqindex, dst, bufsize, options, last_boundclass);
565
- }
566
- }
567
- if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
568
- if (property->decomp_seqindex != UINT16_MAX &&
569
- (!property->decomp_type || (options & UTF8PROC_COMPAT))) {
570
- return seqindex_write_char_decomposed(property->decomp_seqindex, dst, bufsize, options, last_boundclass);
571
- }
572
- }
573
- if (options & UTF8PROC_CHARBOUND) {
574
- utf8proc_bool boundary;
575
- int tbc = property->boundclass;
576
- boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
577
- if (boundary) {
578
- if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
579
- if (bufsize >= 2) dst[1] = uc;
580
- return 2;
581
- }
582
- }
583
- if (bufsize >= 1) *dst = uc;
584
- return 1;
450
+ const utf8proc_property_t *property;
451
+ utf8proc_propval_t category;
452
+ utf8proc_int32_t hangul_sindex;
453
+ if (uc < 0 || uc >= 0x110000) return UTF8PROC_ERROR_NOTASSIGNED;
454
+ property = unsafe_get_property(uc);
455
+ category = property->category;
456
+ hangul_sindex = uc - UTF8PROC_HANGUL_SBASE;
457
+ if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
458
+ if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT) {
459
+ utf8proc_int32_t hangul_tindex;
460
+ if (bufsize >= 1) {
461
+ dst[0] = UTF8PROC_HANGUL_LBASE +
462
+ hangul_sindex / UTF8PROC_HANGUL_NCOUNT;
463
+ if (bufsize >= 2) dst[1] = UTF8PROC_HANGUL_VBASE +
464
+ (hangul_sindex % UTF8PROC_HANGUL_NCOUNT) / UTF8PROC_HANGUL_TCOUNT;
465
+ }
466
+ hangul_tindex = hangul_sindex % UTF8PROC_HANGUL_TCOUNT;
467
+ if (!hangul_tindex) return 2;
468
+ if (bufsize >= 3) dst[2] = UTF8PROC_HANGUL_TBASE + hangul_tindex;
469
+ return 3;
470
+ }
471
+ }
472
+ if (options & UTF8PROC_REJECTNA) {
473
+ if (!category) return UTF8PROC_ERROR_NOTASSIGNED;
474
+ }
475
+ if (options & UTF8PROC_IGNORE) {
476
+ if (property->ignorable) return 0;
477
+ }
478
+ if (options & UTF8PROC_STRIPNA) {
479
+ if (!category) return 0;
480
+ }
481
+ if (options & UTF8PROC_LUMP) {
482
+ if (category == UTF8PROC_CATEGORY_ZS) utf8proc_decompose_lump(0x0020);
483
+ if (uc == 0x2018 || uc == 0x2019 || uc == 0x02BC || uc == 0x02C8)
484
+ utf8proc_decompose_lump(0x0027);
485
+ if (category == UTF8PROC_CATEGORY_PD || uc == 0x2212)
486
+ utf8proc_decompose_lump(0x002D);
487
+ if (uc == 0x2044 || uc == 0x2215) utf8proc_decompose_lump(0x002F);
488
+ if (uc == 0x2236) utf8proc_decompose_lump(0x003A);
489
+ if (uc == 0x2039 || uc == 0x2329 || uc == 0x3008)
490
+ utf8proc_decompose_lump(0x003C);
491
+ if (uc == 0x203A || uc == 0x232A || uc == 0x3009)
492
+ utf8proc_decompose_lump(0x003E);
493
+ if (uc == 0x2216) utf8proc_decompose_lump(0x005C);
494
+ if (uc == 0x02C4 || uc == 0x02C6 || uc == 0x2038 || uc == 0x2303)
495
+ utf8proc_decompose_lump(0x005E);
496
+ if (category == UTF8PROC_CATEGORY_PC || uc == 0x02CD)
497
+ utf8proc_decompose_lump(0x005F);
498
+ if (uc == 0x02CB) utf8proc_decompose_lump(0x0060);
499
+ if (uc == 0x2223) utf8proc_decompose_lump(0x007C);
500
+ if (uc == 0x223C) utf8proc_decompose_lump(0x007E);
501
+ if ((options & UTF8PROC_NLF2LS) && (options & UTF8PROC_NLF2PS)) {
502
+ if (category == UTF8PROC_CATEGORY_ZL ||
503
+ category == UTF8PROC_CATEGORY_ZP)
504
+ utf8proc_decompose_lump(0x000A);
505
+ }
506
+ }
507
+ if (options & UTF8PROC_STRIPMARK) {
508
+ if (category == UTF8PROC_CATEGORY_MN ||
509
+ category == UTF8PROC_CATEGORY_MC ||
510
+ category == UTF8PROC_CATEGORY_ME) return 0;
511
+ }
512
+ if (options & UTF8PROC_CASEFOLD) {
513
+ if (property->casefold_seqindex != UINT16_MAX) {
514
+ return seqindex_write_char_decomposed(property->casefold_seqindex, dst, bufsize, options, last_boundclass);
515
+ }
516
+ }
517
+ if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
518
+ if (property->decomp_seqindex != UINT16_MAX &&
519
+ (!property->decomp_type || (options & UTF8PROC_COMPAT))) {
520
+ return seqindex_write_char_decomposed(property->decomp_seqindex, dst, bufsize, options, last_boundclass);
521
+ }
522
+ }
523
+ if (options & UTF8PROC_CHARBOUND) {
524
+ utf8proc_bool boundary;
525
+ boundary = grapheme_break_extended(0, property->boundclass, 0, property->indic_conjunct_break,
526
+ last_boundclass);
527
+ if (boundary) {
528
+ if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
529
+ if (bufsize >= 2) dst[1] = uc;
530
+ return 2;
531
+ }
532
+ }
533
+ if (bufsize >= 1) *dst = uc;
534
+ return 1;
585
535
  }
586
536
 
587
537
  UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
588
- const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
589
- utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
538
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
539
+ utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
590
540
  ) {
591
- return utf8proc_decompose_custom(str, strlen, buffer, bufsize, options, NULL, NULL);
541
+ return utf8proc_decompose_custom(str, strlen, buffer, bufsize, options, NULL, NULL);
592
542
  }
593
543
 
594
544
  UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom(
595
- const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
596
- utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options,
597
- utf8proc_custom_func custom_func, void *custom_data
545
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
546
+ utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options,
547
+ utf8proc_custom_func custom_func, void *custom_data
598
548
  ) {
599
- /* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */
600
- utf8proc_ssize_t wpos = 0;
601
- if ((options & UTF8PROC_COMPOSE) && (options & UTF8PROC_DECOMPOSE))
602
- return UTF8PROC_ERROR_INVALIDOPTS;
603
- if ((options & UTF8PROC_STRIPMARK) &&
604
- !(options & UTF8PROC_COMPOSE) && !(options & UTF8PROC_DECOMPOSE))
605
- return UTF8PROC_ERROR_INVALIDOPTS;
606
- {
607
- utf8proc_int32_t uc;
608
- utf8proc_ssize_t rpos = 0;
609
- utf8proc_ssize_t decomp_result;
610
- int boundclass = UTF8PROC_BOUNDCLASS_START;
611
- while (1) {
612
- if (options & UTF8PROC_NULLTERM) {
613
- rpos += utf8proc_iterate(str + rpos, -1, &uc);
614
- /* checking of return value is not necessary,
615
- as 'uc' is < 0 in case of error */
616
- if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8;
617
- if (rpos < 0) return UTF8PROC_ERROR_OVERFLOW;
618
- if (uc == 0) break;
619
- } else {
620
- if (rpos >= strlen) break;
621
- rpos += utf8proc_iterate(str + rpos, strlen - rpos, &uc);
622
- if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8;
623
- }
624
- if (custom_func != NULL) {
625
- uc = custom_func(uc, custom_data); /* user-specified custom mapping */
626
- }
627
- utf8proc_int32_t *target_buffer = buffer ? buffer + wpos : nullptr;
628
- decomp_result = utf8proc_decompose_char(
629
- uc, target_buffer, (bufsize > wpos) ? (bufsize - wpos) : 0, options,
630
- &boundclass
631
- );
632
- if (decomp_result < 0) return decomp_result;
633
- wpos += decomp_result;
634
- /* prohibiting integer overflows due to too long strings: */
635
- if (wpos < 0 ||
636
- wpos > (utf8proc_ssize_t)(SSIZE_MAX/sizeof(utf8proc_int32_t)/2))
637
- return UTF8PROC_ERROR_OVERFLOW;
638
- }
639
- }
640
- if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) && bufsize >= wpos) {
641
- utf8proc_ssize_t pos = 0;
642
- while (pos < wpos-1) {
643
- utf8proc_int32_t uc1, uc2;
644
- const utf8proc_property_t *property1, *property2;
645
- uc1 = buffer[pos];
646
- uc2 = buffer[pos+1];
647
- property1 = unsafe_get_property(uc1);
648
- property2 = unsafe_get_property(uc2);
649
- if (property1->combining_class > property2->combining_class &&
650
- property2->combining_class > 0) {
651
- buffer[pos] = uc2;
652
- buffer[pos+1] = uc1;
653
- if (pos > 0) pos--; else pos++;
654
- } else {
655
- pos++;
656
- }
657
- }
658
- }
659
- return wpos;
549
+ /* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */
550
+ utf8proc_ssize_t wpos = 0;
551
+ if ((options & UTF8PROC_COMPOSE) && (options & UTF8PROC_DECOMPOSE))
552
+ return UTF8PROC_ERROR_INVALIDOPTS;
553
+ if ((options & UTF8PROC_STRIPMARK) &&
554
+ !(options & UTF8PROC_COMPOSE) && !(options & UTF8PROC_DECOMPOSE))
555
+ return UTF8PROC_ERROR_INVALIDOPTS;
556
+ {
557
+ utf8proc_int32_t uc;
558
+ utf8proc_ssize_t rpos = 0;
559
+ utf8proc_ssize_t decomp_result;
560
+ int boundclass = UTF8PROC_BOUNDCLASS_START;
561
+ while (1) {
562
+ if (options & UTF8PROC_NULLTERM) {
563
+ rpos += utf8proc_iterate(str + rpos, -1, &uc);
564
+ /* checking of return value is not necessary,
565
+ as 'uc' is < 0 in case of error */
566
+ if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8;
567
+ if (rpos < 0) return UTF8PROC_ERROR_OVERFLOW;
568
+ if (uc == 0) break;
569
+ } else {
570
+ if (rpos >= strlen) break;
571
+ rpos += utf8proc_iterate(str + rpos, strlen - rpos, &uc);
572
+ if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8;
573
+ }
574
+ if (custom_func != NULL) {
575
+ uc = custom_func(uc, custom_data); /* user-specified custom mapping */
576
+ }
577
+ decomp_result = utf8proc_decompose_char(
578
+ uc, buffer ? buffer + wpos : nullptr, (bufsize > wpos) ? (bufsize - wpos) : 0, options,
579
+ &boundclass
580
+ );
581
+ if (decomp_result < 0) return decomp_result;
582
+ wpos += decomp_result;
583
+ /* prohibiting integer overflows due to too long strings: */
584
+ if (wpos < 0 ||
585
+ wpos > (utf8proc_ssize_t)(SSIZE_MAX/sizeof(utf8proc_int32_t)/2))
586
+ return UTF8PROC_ERROR_OVERFLOW;
587
+ }
588
+ }
589
+ if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) && bufsize >= wpos) {
590
+ utf8proc_ssize_t pos = 0;
591
+ while (pos < wpos-1) {
592
+ utf8proc_int32_t uc1, uc2;
593
+ const utf8proc_property_t *property1, *property2;
594
+ uc1 = buffer[pos];
595
+ uc2 = buffer[pos+1];
596
+ property1 = unsafe_get_property(uc1);
597
+ property2 = unsafe_get_property(uc2);
598
+ if (property1->combining_class > property2->combining_class &&
599
+ property2->combining_class > 0) {
600
+ buffer[pos] = uc2;
601
+ buffer[pos+1] = uc1;
602
+ if (pos > 0) pos--; else pos++;
603
+ } else {
604
+ pos++;
605
+ }
606
+ }
607
+ }
608
+ return wpos;
660
609
  }
661
610
 
662
611
  UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
663
- /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored */
664
- if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
665
- utf8proc_ssize_t rpos;
666
- utf8proc_ssize_t wpos = 0;
667
- utf8proc_int32_t uc;
668
- for (rpos = 0; rpos < length; rpos++) {
669
- uc = buffer[rpos];
670
- if (uc == 0x000D && rpos < length-1 && buffer[rpos+1] == 0x000A) rpos++;
671
- if (uc == 0x000A || uc == 0x000D || uc == 0x0085 ||
672
- ((options & UTF8PROC_STRIPCC) && (uc == 0x000B || uc == 0x000C))) {
673
- if (options & UTF8PROC_NLF2LS) {
674
- if (options & UTF8PROC_NLF2PS) {
675
- buffer[wpos++] = 0x000A;
676
- } else {
677
- buffer[wpos++] = 0x2028;
678
- }
679
- } else {
680
- if (options & UTF8PROC_NLF2PS) {
681
- buffer[wpos++] = 0x2029;
682
- } else {
683
- buffer[wpos++] = 0x0020;
684
- }
685
- }
686
- } else if ((options & UTF8PROC_STRIPCC) &&
687
- (uc < 0x0020 || (uc >= 0x007F && uc < 0x00A0))) {
688
- if (uc == 0x0009) buffer[wpos++] = 0x0020;
689
- } else {
690
- buffer[wpos++] = uc;
691
- }
692
- }
693
- length = wpos;
694
- }
695
- if (options & UTF8PROC_COMPOSE) {
696
- utf8proc_int32_t *starter = NULL;
697
- utf8proc_int32_t current_char;
698
- const utf8proc_property_t *starter_property = NULL, *current_property;
699
- utf8proc_propval_t max_combining_class = -1;
700
- utf8proc_ssize_t rpos;
701
- utf8proc_ssize_t wpos = 0;
702
- utf8proc_int32_t composition;
703
- for (rpos = 0; rpos < length; rpos++) {
704
- current_char = buffer[rpos];
705
- current_property = unsafe_get_property(current_char);
706
- if (starter && current_property->combining_class > max_combining_class) {
707
- /* combination perhaps possible */
708
- utf8proc_int32_t hangul_lindex;
709
- utf8proc_int32_t hangul_sindex;
710
- hangul_lindex = *starter - UTF8PROC_HANGUL_LBASE;
711
- if (hangul_lindex >= 0 && hangul_lindex < UTF8PROC_HANGUL_LCOUNT) {
712
- utf8proc_int32_t hangul_vindex;
713
- hangul_vindex = current_char - UTF8PROC_HANGUL_VBASE;
714
- if (hangul_vindex >= 0 && hangul_vindex < UTF8PROC_HANGUL_VCOUNT) {
715
- *starter = UTF8PROC_HANGUL_SBASE +
716
- (hangul_lindex * UTF8PROC_HANGUL_VCOUNT + hangul_vindex) *
717
- UTF8PROC_HANGUL_TCOUNT;
718
- starter_property = NULL;
719
- continue;
720
- }
721
- }
722
- hangul_sindex = *starter - UTF8PROC_HANGUL_SBASE;
723
- if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT &&
724
- (hangul_sindex % UTF8PROC_HANGUL_TCOUNT) == 0) {
725
- utf8proc_int32_t hangul_tindex;
726
- hangul_tindex = current_char - UTF8PROC_HANGUL_TBASE;
727
- if (hangul_tindex >= 0 && hangul_tindex < UTF8PROC_HANGUL_TCOUNT) {
728
- *starter += hangul_tindex;
729
- starter_property = NULL;
730
- continue;
731
- }
732
- }
733
- if (!starter_property) {
734
- starter_property = unsafe_get_property(*starter);
735
- }
736
- if (starter_property->comb_index < 0x8000 &&
737
- current_property->comb_index != UINT16_MAX &&
738
- current_property->comb_index >= 0x8000) {
739
- int sidx = starter_property->comb_index;
740
- int idx = current_property->comb_index & 0x3FFF;
741
- if (idx >= utf8proc_combinations[sidx] && idx <= utf8proc_combinations[sidx + 1] ) {
742
- idx += sidx + 2 - utf8proc_combinations[sidx];
743
- if (current_property->comb_index & 0x4000) {
744
- composition = (utf8proc_combinations[idx] << 16) | utf8proc_combinations[idx+1];
745
- } else
746
- composition = utf8proc_combinations[idx];
747
-
748
- if (composition > 0 && (!(options & UTF8PROC_STABLE) ||
749
- !(unsafe_get_property(composition)->comp_exclusion))) {
750
- *starter = composition;
751
- starter_property = NULL;
752
- continue;
753
- }
754
- }
755
- }
756
- }
757
- buffer[wpos] = current_char;
758
- if (current_property->combining_class) {
759
- if (current_property->combining_class > max_combining_class) {
760
- max_combining_class = current_property->combining_class;
761
- }
762
- } else {
763
- starter = buffer + wpos;
764
- starter_property = NULL;
765
- max_combining_class = -1;
766
- }
767
- wpos++;
768
- }
769
- length = wpos;
770
- }
771
- return length;
612
+ /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored */
613
+ if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
614
+ utf8proc_ssize_t rpos;
615
+ utf8proc_ssize_t wpos = 0;
616
+ utf8proc_int32_t uc;
617
+ for (rpos = 0; rpos < length; rpos++) {
618
+ uc = buffer[rpos];
619
+ if (uc == 0x000D && rpos < length-1 && buffer[rpos+1] == 0x000A) rpos++;
620
+ if (uc == 0x000A || uc == 0x000D || uc == 0x0085 ||
621
+ ((options & UTF8PROC_STRIPCC) && (uc == 0x000B || uc == 0x000C))) {
622
+ if (options & UTF8PROC_NLF2LS) {
623
+ if (options & UTF8PROC_NLF2PS) {
624
+ buffer[wpos++] = 0x000A;
625
+ } else {
626
+ buffer[wpos++] = 0x2028;
627
+ }
628
+ } else {
629
+ if (options & UTF8PROC_NLF2PS) {
630
+ buffer[wpos++] = 0x2029;
631
+ } else {
632
+ buffer[wpos++] = 0x0020;
633
+ }
634
+ }
635
+ } else if ((options & UTF8PROC_STRIPCC) &&
636
+ (uc < 0x0020 || (uc >= 0x007F && uc < 0x00A0))) {
637
+ if (uc == 0x0009) buffer[wpos++] = 0x0020;
638
+ } else {
639
+ buffer[wpos++] = uc;
640
+ }
641
+ }
642
+ length = wpos;
643
+ }
644
+ if (options & UTF8PROC_COMPOSE) {
645
+ utf8proc_int32_t *starter = NULL;
646
+ utf8proc_int32_t current_char;
647
+ const utf8proc_property_t *starter_property = NULL, *current_property;
648
+ utf8proc_propval_t max_combining_class = -1;
649
+ utf8proc_ssize_t rpos;
650
+ utf8proc_ssize_t wpos = 0;
651
+ utf8proc_int32_t composition;
652
+ for (rpos = 0; rpos < length; rpos++) {
653
+ current_char = buffer[rpos];
654
+ current_property = unsafe_get_property(current_char);
655
+ if (starter && current_property->combining_class > max_combining_class) {
656
+ /* combination perhaps possible */
657
+ utf8proc_int32_t hangul_lindex;
658
+ utf8proc_int32_t hangul_sindex;
659
+ hangul_lindex = *starter - UTF8PROC_HANGUL_LBASE;
660
+ if (hangul_lindex >= 0 && hangul_lindex < UTF8PROC_HANGUL_LCOUNT) {
661
+ utf8proc_int32_t hangul_vindex;
662
+ hangul_vindex = current_char - UTF8PROC_HANGUL_VBASE;
663
+ if (hangul_vindex >= 0 && hangul_vindex < UTF8PROC_HANGUL_VCOUNT) {
664
+ *starter = UTF8PROC_HANGUL_SBASE +
665
+ (hangul_lindex * UTF8PROC_HANGUL_VCOUNT + hangul_vindex) *
666
+ UTF8PROC_HANGUL_TCOUNT;
667
+ starter_property = NULL;
668
+ continue;
669
+ }
670
+ }
671
+ hangul_sindex = *starter - UTF8PROC_HANGUL_SBASE;
672
+ if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT &&
673
+ (hangul_sindex % UTF8PROC_HANGUL_TCOUNT) == 0) {
674
+ utf8proc_int32_t hangul_tindex;
675
+ hangul_tindex = current_char - UTF8PROC_HANGUL_TBASE;
676
+ if (hangul_tindex >= 0 && hangul_tindex < UTF8PROC_HANGUL_TCOUNT) {
677
+ *starter += hangul_tindex;
678
+ starter_property = NULL;
679
+ continue;
680
+ }
681
+ }
682
+ if (!starter_property) {
683
+ starter_property = unsafe_get_property(*starter);
684
+ }
685
+ if (starter_property->comb_index < 0x8000 &&
686
+ current_property->comb_index != UINT16_MAX &&
687
+ current_property->comb_index >= 0x8000) {
688
+ int sidx = starter_property->comb_index;
689
+ int idx = current_property->comb_index & 0x3FFF;
690
+ if (idx >= utf8proc_combinations[sidx] && idx <= utf8proc_combinations[sidx + 1] ) {
691
+ idx += sidx + 2 - utf8proc_combinations[sidx];
692
+ if (current_property->comb_index & 0x4000) {
693
+ composition = (utf8proc_combinations[idx] << 16) | utf8proc_combinations[idx+1];
694
+ } else
695
+ composition = utf8proc_combinations[idx];
696
+
697
+ if (composition > 0 && (!(options & UTF8PROC_STABLE) ||
698
+ !(unsafe_get_property(composition)->comp_exclusion))) {
699
+ *starter = composition;
700
+ starter_property = NULL;
701
+ continue;
702
+ }
703
+ }
704
+ }
705
+ }
706
+ buffer[wpos] = current_char;
707
+ if (current_property->combining_class) {
708
+ if (current_property->combining_class > max_combining_class) {
709
+ max_combining_class = current_property->combining_class;
710
+ }
711
+ } else {
712
+ starter = buffer + wpos;
713
+ starter_property = NULL;
714
+ max_combining_class = -1;
715
+ }
716
+ wpos++;
717
+ }
718
+ length = wpos;
719
+ }
720
+ return length;
772
721
  }
773
722
 
774
723
  UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
775
- /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
776
- ASSERT: 'buffer' has one spare byte of free space at the end! */
777
- length = utf8proc_normalize_utf32(buffer, length, options);
778
- if (length < 0) return length;
779
- {
780
- utf8proc_ssize_t rpos, wpos = 0;
781
- utf8proc_int32_t uc;
782
- if (options & UTF8PROC_CHARBOUND) {
783
- for (rpos = 0; rpos < length; rpos++) {
784
- uc = buffer[rpos];
785
- wpos += charbound_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
786
- }
787
- } else {
788
- for (rpos = 0; rpos < length; rpos++) {
789
- uc = buffer[rpos];
790
- wpos += utf8proc_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
791
- }
792
- }
793
- ((utf8proc_uint8_t *)buffer)[wpos] = 0;
794
- return wpos;
795
- }
724
+ /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
725
+ ASSERT: 'buffer' has one spare byte of free space at the end! */
726
+ length = utf8proc_normalize_utf32(buffer, length, options);
727
+ if (length < 0) return length;
728
+ {
729
+ utf8proc_ssize_t rpos, wpos = 0;
730
+ utf8proc_int32_t uc;
731
+ if (options & UTF8PROC_CHARBOUND) {
732
+ for (rpos = 0; rpos < length; rpos++) {
733
+ uc = buffer[rpos];
734
+ wpos += charbound_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
735
+ }
736
+ } else {
737
+ for (rpos = 0; rpos < length; rpos++) {
738
+ uc = buffer[rpos];
739
+ wpos += utf8proc_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
740
+ }
741
+ }
742
+ ((utf8proc_uint8_t *)buffer)[wpos] = 0;
743
+ return wpos;
744
+ }
796
745
  }
797
746
 
798
747
  UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
799
- const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
748
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
800
749
  ) {
801
- return utf8proc_map_custom(str, strlen, dstptr, options, NULL, NULL);
750
+ return utf8proc_map_custom(str, strlen, dstptr, options, NULL, NULL);
802
751
  }
803
752
 
804
753
  UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
805
- const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options,
806
- utf8proc_custom_func custom_func, void *custom_data
754
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options,
755
+ utf8proc_custom_func custom_func, void *custom_data
807
756
  ) {
808
- utf8proc_int32_t *buffer;
809
- utf8proc_ssize_t result;
810
- *dstptr = NULL;
811
- result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
812
- if (result < 0) return result;
813
- buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
814
- if (!buffer) return UTF8PROC_ERROR_NOMEM;
815
- result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
816
- if (result < 0) {
817
- free(buffer);
818
- return result;
819
- }
820
- result = utf8proc_reencode(buffer, result, options);
821
- if (result < 0) {
822
- free(buffer);
823
- return result;
824
- }
825
- {
826
- utf8proc_int32_t *newptr;
827
- newptr = (utf8proc_int32_t *) realloc(buffer, (size_t)result+1);
828
- if (newptr) buffer = newptr;
829
- }
830
- *dstptr = (utf8proc_uint8_t *)buffer;
831
- return result;
832
- }
833
-
834
- UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str, utf8proc_ssize_t len) {
835
- utf8proc_uint8_t *retval;
836
- utf8proc_map(str, len, &retval, (utf8proc_option_t)(UTF8PROC_STABLE |
837
- UTF8PROC_DECOMPOSE));
838
- return retval;
839
- }
840
-
841
- UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str, utf8proc_ssize_t len) {
842
- utf8proc_uint8_t *retval;
843
- utf8proc_map(str, len, &retval, (utf8proc_option_t)(UTF8PROC_STABLE |
844
- UTF8PROC_COMPOSE));
845
- return retval;
757
+ utf8proc_int32_t *buffer;
758
+ utf8proc_ssize_t result;
759
+ *dstptr = NULL;
760
+ result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
761
+ if (result < 0) return result;
762
+ buffer = (utf8proc_int32_t *) malloc(((utf8proc_size_t)result) * sizeof(utf8proc_int32_t) + 1);
763
+ if (!buffer) return UTF8PROC_ERROR_NOMEM;
764
+ result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
765
+ if (result < 0) {
766
+ free(buffer);
767
+ return result;
768
+ }
769
+ result = utf8proc_reencode(buffer, result, options);
770
+ if (result < 0) {
771
+ free(buffer);
772
+ return result;
773
+ }
774
+ {
775
+ utf8proc_int32_t *newptr;
776
+ newptr = (utf8proc_int32_t *) realloc(buffer, (size_t)result+1);
777
+ if (newptr) buffer = newptr;
778
+ }
779
+ *dstptr = (utf8proc_uint8_t *)buffer;
780
+ return result;
781
+ }
782
+
783
+ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen) {
784
+ utf8proc_uint8_t *retval;
785
+ utf8proc_map(str, strlen, &retval, utf8proc_option_t(UTF8PROC_STABLE |
786
+ UTF8PROC_DECOMPOSE));
787
+ return retval;
788
+ }
789
+
790
+ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen) {
791
+ utf8proc_uint8_t *retval;
792
+ utf8proc_map(str, strlen, &retval, utf8proc_option_t(UTF8PROC_STABLE |
793
+ UTF8PROC_COMPOSE));
794
+ return retval;
795
+ }
796
+
797
+ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen) {
798
+ utf8proc_uint8_t *retval;
799
+ utf8proc_map(str, strlen, &retval, utf8proc_option_t(UTF8PROC_STABLE |
800
+ UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT));
801
+ return retval;
846
802
  }
847
803
 
848
804
  UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_remove_accents(const utf8proc_uint8_t *str, utf8proc_ssize_t len) {
@@ -852,25 +808,18 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_remove_accents(const utf8proc_uint
852
808
  return retval;
853
809
  }
854
810
 
855
- UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str, utf8proc_ssize_t len) {
856
- utf8proc_uint8_t *retval;
857
- utf8proc_map(str, len, &retval, (utf8proc_option_t)(UTF8PROC_STABLE |
858
- UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT));
859
- return retval;
811
+ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen) {
812
+ utf8proc_uint8_t *retval;
813
+ utf8proc_map(str, strlen, &retval, utf8proc_option_t(UTF8PROC_STABLE |
814
+ UTF8PROC_COMPOSE | UTF8PROC_COMPAT));
815
+ return retval;
860
816
  }
861
817
 
862
- UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str, utf8proc_ssize_t len) {
863
- utf8proc_uint8_t *retval;
864
- utf8proc_map(str, len, &retval, (utf8proc_option_t)(UTF8PROC_STABLE |
865
- UTF8PROC_COMPOSE | UTF8PROC_COMPAT));
866
- return retval;
867
- }
868
-
869
- UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str, utf8proc_ssize_t len) {
870
- utf8proc_uint8_t *retval;
871
- utf8proc_map(str, len, &retval, (utf8proc_option_t)(UTF8PROC_STABLE |
872
- UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE));
873
- return retval;
818
+ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen) {
819
+ utf8proc_uint8_t *retval;
820
+ utf8proc_map(str, strlen, &retval, utf8proc_option_t(UTF8PROC_STABLE |
821
+ UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE));
822
+ return retval;
874
823
  }
875
824
 
876
825
  }