duckdb 0.10.2-dev0.0 → 0.10.2-dev3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (699) hide show
  1. package/binding.gyp +22 -18
  2. package/binding.gyp.in +3 -0
  3. package/package.json +1 -1
  4. package/src/duckdb/extension/icu/icu-timezone.cpp +3 -1
  5. package/src/duckdb/extension/icu/icu_extension.cpp +6 -2
  6. package/src/duckdb/extension/json/buffered_json_reader.cpp +10 -3
  7. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +2 -0
  8. package/src/duckdb/extension/json/include/json_scan.hpp +13 -7
  9. package/src/duckdb/extension/json/include/json_serializer.hpp +5 -4
  10. package/src/duckdb/extension/json/include/json_structure.hpp +3 -3
  11. package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +15 -5
  12. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +15 -6
  13. package/src/duckdb/extension/json/json_functions/json_structure.cpp +21 -20
  14. package/src/duckdb/extension/json/json_functions/read_json.cpp +37 -3
  15. package/src/duckdb/extension/json/json_functions.cpp +7 -2
  16. package/src/duckdb/extension/json/json_scan.cpp +57 -33
  17. package/src/duckdb/extension/parquet/column_reader.cpp +12 -3
  18. package/src/duckdb/extension/parquet/column_writer.cpp +44 -7
  19. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +5 -1
  20. package/src/duckdb/extension/parquet/parquet_extension.cpp +30 -3
  21. package/src/duckdb/extension/parquet/parquet_metadata.cpp +1 -1
  22. package/src/duckdb/extension/parquet/parquet_writer.cpp +4 -2
  23. package/src/duckdb/extension/parquet/zstd_file_system.cpp +1 -1
  24. package/src/duckdb/src/catalog/catalog.cpp +5 -1
  25. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +21 -5
  26. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +8 -9
  27. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +3 -7
  28. package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +1 -1
  29. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +6 -7
  30. package/src/duckdb/src/catalog/catalog_entry.cpp +8 -0
  31. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -0
  32. package/src/duckdb/src/catalog/catalog_set.cpp +2 -2
  33. package/src/duckdb/src/catalog/default/default_functions.cpp +6 -6
  34. package/src/duckdb/src/catalog/default/default_schemas.cpp +1 -1
  35. package/src/duckdb/src/catalog/default/default_views.cpp +7 -7
  36. package/src/duckdb/src/catalog/dependency_catalog_set.cpp +2 -1
  37. package/src/duckdb/src/catalog/dependency_list.cpp +92 -8
  38. package/src/duckdb/src/catalog/dependency_manager.cpp +53 -68
  39. package/src/duckdb/src/catalog/duck_catalog.cpp +1 -1
  40. package/src/duckdb/src/common/adbc/adbc.cpp +287 -45
  41. package/src/duckdb/src/common/arrow/appender/union_data.cpp +2 -2
  42. package/src/duckdb/src/common/box_renderer.cpp +12 -12
  43. package/src/duckdb/src/common/crypto/md5.cpp +2 -1
  44. package/src/duckdb/src/common/enum_util.cpp +307 -1
  45. package/src/duckdb/src/common/enums/expression_type.cpp +4 -0
  46. package/src/duckdb/src/common/enums/optimizer_type.cpp +1 -1
  47. package/src/duckdb/src/common/file_system.cpp +60 -13
  48. package/src/duckdb/src/common/filename_pattern.cpp +13 -13
  49. package/src/duckdb/src/common/gzip_file_system.cpp +1 -1
  50. package/src/duckdb/src/common/http_state.cpp +1 -1
  51. package/src/duckdb/src/common/local_file_system.cpp +72 -71
  52. package/src/duckdb/src/common/multi_file_reader.cpp +48 -28
  53. package/src/duckdb/src/common/row_operations/row_matcher.cpp +2 -2
  54. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +13 -1
  55. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +32 -13
  56. package/src/duckdb/src/common/string_util.cpp +2 -3
  57. package/src/duckdb/src/common/tree_renderer.cpp +32 -67
  58. package/src/duckdb/src/common/types/bit.cpp +6 -6
  59. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  60. package/src/duckdb/src/common/types/hash.cpp +6 -6
  61. package/src/duckdb/src/common/types/hyperloglog.cpp +2 -0
  62. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +13 -0
  63. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +5 -7
  64. package/src/duckdb/src/common/types/uuid.cpp +1 -1
  65. package/src/duckdb/src/common/types/vector.cpp +22 -14
  66. package/src/duckdb/src/common/types.cpp +8 -1
  67. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +20 -18
  68. package/src/duckdb/src/common/vector_operations/generators.cpp +1 -1
  69. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +267 -110
  70. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +52 -23
  71. package/src/duckdb/src/common/virtual_file_system.cpp +33 -20
  72. package/src/duckdb/src/core_functions/aggregate/algebraic/avg.cpp +2 -2
  73. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +3 -3
  74. package/src/duckdb/src/core_functions/aggregate/distributive/sum.cpp +31 -16
  75. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +3 -0
  76. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -0
  77. package/src/duckdb/src/core_functions/core_functions.cpp +1 -1
  78. package/src/duckdb/src/core_functions/function_list.cpp +2 -2
  79. package/src/duckdb/src/core_functions/scalar/date/time_bucket.cpp +1 -1
  80. package/src/duckdb/src/core_functions/scalar/generic/system_functions.cpp +46 -17
  81. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +1 -1
  82. package/src/duckdb/src/core_functions/scalar/list/flatten.cpp +82 -45
  83. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -2
  84. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +3 -0
  85. package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +3 -2
  86. package/src/duckdb/src/core_functions/scalar/string/hex.cpp +2 -4
  87. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +12 -21
  88. package/src/duckdb/src/execution/column_binding_resolver.cpp +2 -10
  89. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +133 -66
  90. package/src/duckdb/src/execution/expression_executor/execute_function.cpp +0 -2
  91. package/src/duckdb/src/execution/expression_executor.cpp +0 -4
  92. package/src/duckdb/src/execution/expression_executor_state.cpp +1 -1
  93. package/src/duckdb/src/execution/index/art/art.cpp +2 -2
  94. package/src/duckdb/src/execution/index/unknown_index.cpp +13 -13
  95. package/src/duckdb/src/execution/join_hashtable.cpp +1 -1
  96. package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_mark.cpp +0 -1
  97. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +10 -7
  98. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +32 -1
  99. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +16 -2
  100. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +7 -7
  101. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +354 -159
  102. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +11 -2
  103. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +22 -7
  104. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +1 -1
  105. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +10 -10
  106. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +9 -18
  107. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +180 -47
  108. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +100 -58
  109. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +88 -21
  110. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +12 -13
  111. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +2 -0
  112. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +118 -23
  113. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +2 -2
  114. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +14 -5
  115. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +1 -1
  116. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +5 -5
  117. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +1 -1
  118. package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +0 -9
  119. package/src/duckdb/src/execution/physical_plan/plan_vacuum.cpp +18 -0
  120. package/src/duckdb/src/execution/physical_plan_generator.cpp +5 -3
  121. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +81 -106
  122. package/src/duckdb/src/execution/reservoir_sample.cpp +1 -1
  123. package/src/duckdb/src/execution/window_executor.cpp +48 -28
  124. package/src/duckdb/src/execution/window_segment_tree.cpp +20 -23
  125. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +1 -1
  126. package/src/duckdb/src/function/cast/enum_casts.cpp +20 -55
  127. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +10 -9
  128. package/src/duckdb/src/function/cast_rules.cpp +9 -1
  129. package/src/duckdb/src/function/compression_config.cpp +1 -1
  130. package/src/duckdb/src/function/function_binder.cpp +45 -44
  131. package/src/duckdb/src/function/function_set.cpp +9 -9
  132. package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -2
  133. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +21 -5
  134. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +2 -2
  135. package/src/duckdb/src/function/scalar/list/list_select.cpp +5 -2
  136. package/src/duckdb/src/function/scalar/list/list_zip.cpp +5 -4
  137. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +60 -32
  138. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +1 -1
  139. package/src/duckdb/src/function/scalar/strftime_format.cpp +31 -25
  140. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +6 -6
  141. package/src/duckdb/src/function/scalar/string/length.cpp +23 -2
  142. package/src/duckdb/src/function/scalar/string/like.cpp +1 -1
  143. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +1 -1
  144. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +3 -3
  145. package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +1 -1
  146. package/src/duckdb/src/function/table/arrow.cpp +7 -1
  147. package/src/duckdb/src/function/table/copy_csv.cpp +17 -13
  148. package/src/duckdb/src/function/table/read_csv.cpp +52 -39
  149. package/src/duckdb/src/function/table/sniff_csv.cpp +7 -13
  150. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +1 -1
  151. package/src/duckdb/src/function/table/system/duckdb_databases.cpp +7 -1
  152. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +12 -2
  153. package/src/duckdb/src/function/table/system/duckdb_sequences.cpp +1 -2
  154. package/src/duckdb/src/function/table/system/duckdb_tables.cpp +2 -2
  155. package/src/duckdb/src/function/table/system/pragma_metadata_info.cpp +9 -2
  156. package/src/duckdb/src/function/table/system/pragma_table_info.cpp +10 -6
  157. package/src/duckdb/src/function/table/table_scan.cpp +1 -4
  158. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  159. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +6 -2
  160. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
  161. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +1 -1
  162. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp +1 -1
  163. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +6 -1
  164. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +3 -3
  165. package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +3 -3
  166. package/src/duckdb/src/include/duckdb/catalog/dependency.hpp +26 -4
  167. package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +39 -6
  168. package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +19 -14
  169. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +1 -1
  170. package/src/duckdb/src/include/duckdb/catalog/standard_entry.hpp +4 -0
  171. package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -3
  172. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +1 -1
  173. package/src/duckdb/src/include/duckdb/common/bit_utils.hpp +1 -1
  174. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +16 -16
  175. package/src/duckdb/src/include/duckdb/common/crypto/md5.hpp +0 -1
  176. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +32 -0
  177. package/src/duckdb/src/include/duckdb/common/enums/expression_type.hpp +4 -2
  178. package/src/duckdb/src/include/duckdb/common/exception/binder_exception.hpp +14 -10
  179. package/src/duckdb/src/include/duckdb/common/exception/catalog_exception.hpp +4 -4
  180. package/src/duckdb/src/include/duckdb/common/exception/conversion_exception.hpp +6 -6
  181. package/src/duckdb/src/include/duckdb/common/exception/http_exception.hpp +3 -3
  182. package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -4
  183. package/src/duckdb/src/include/duckdb/common/exception/transaction_exception.hpp +2 -2
  184. package/src/duckdb/src/include/duckdb/common/exception.hpp +57 -58
  185. package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +2 -2
  186. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +2 -2
  187. package/src/duckdb/src/include/duckdb/common/file_open_flags.hpp +134 -0
  188. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -6
  189. package/src/duckdb/src/include/duckdb/common/file_system.hpp +35 -36
  190. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +4 -6
  191. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +13 -13
  192. package/src/duckdb/src/include/duckdb/common/helper.hpp +42 -47
  193. package/src/duckdb/src/include/duckdb/common/http_state.hpp +1 -1
  194. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +2 -2
  195. package/src/duckdb/src/include/duckdb/common/index_vector.hpp +10 -10
  196. package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +13 -10
  197. package/src/duckdb/src/include/duckdb/common/memory_safety.hpp +3 -3
  198. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +1 -1
  199. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +50 -24
  200. package/src/duckdb/src/include/duckdb/common/operator/abs.hpp +12 -4
  201. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +5 -5
  202. package/src/duckdb/src/include/duckdb/common/pipe_file_system.hpp +1 -1
  203. package/src/duckdb/src/include/duckdb/common/platform.h +1 -1
  204. package/src/duckdb/src/include/duckdb/common/printer.hpp +5 -5
  205. package/src/duckdb/src/include/duckdb/common/profiler.hpp +2 -2
  206. package/src/duckdb/src/include/duckdb/common/progress_bar/display/terminal_progress_bar_display.hpp +5 -5
  207. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +1 -1
  208. package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +7 -7
  209. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +3 -0
  210. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +2 -2
  211. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +17 -6
  212. package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +12 -0
  213. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +14 -0
  214. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +16 -0
  215. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +3 -0
  216. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +2 -0
  217. package/src/duckdb/src/include/duckdb/common/string_util.hpp +4 -4
  218. package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +24 -25
  219. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +5 -5
  220. package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +1 -2
  221. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +10 -10
  222. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_iterators.hpp +5 -5
  223. package/src/duckdb/src/include/duckdb/common/types/column/partitioned_column_data.hpp +1 -1
  224. package/src/duckdb/src/include/duckdb/common/types/constraint_conflict_info.hpp +1 -2
  225. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -2
  226. package/src/duckdb/src/include/duckdb/common/types/date.hpp +1 -1
  227. package/src/duckdb/src/include/duckdb/common/types/hash.hpp +5 -5
  228. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +1 -1
  229. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +4 -4
  230. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +1 -1
  231. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +10 -3
  232. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +1 -1
  233. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +12 -12
  234. package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +1 -1
  235. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +21 -20
  236. package/src/duckdb/src/include/duckdb/common/types/time.hpp +2 -2
  237. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +3 -3
  238. package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +2 -2
  239. package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -2
  240. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +15 -7
  241. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +1 -1
  242. package/src/duckdb/src/include/duckdb/common/types.hpp +14 -10
  243. package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +2 -2
  244. package/src/duckdb/src/include/duckdb/common/union_by_name.hpp +1 -1
  245. package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +15 -14
  246. package/src/duckdb/src/include/duckdb/common/vector.hpp +21 -21
  247. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +2 -0
  248. package/src/duckdb/src/include/duckdb/common/vector_operations/general_cast.hpp +2 -1
  249. package/src/duckdb/src/include/duckdb/common/vector_operations/generic_executor.hpp +2 -3
  250. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +3 -2
  251. package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +50 -32
  252. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +10 -11
  253. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +1 -1
  254. package/src/duckdb/src/include/duckdb/core_functions/aggregate/sum_helpers.hpp +13 -1
  255. package/src/duckdb/src/include/duckdb/core_functions/function_list.hpp +1 -1
  256. package/src/duckdb/src/include/duckdb/core_functions/lambda_functions.hpp +2 -1
  257. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -1
  258. package/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp +1 -1
  259. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
  260. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +2 -5
  261. package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +3 -3
  262. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/aggregate_object.hpp +2 -2
  263. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp +1 -1
  264. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
  265. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +0 -3
  266. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +9 -6
  267. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +3 -0
  268. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +40 -22
  269. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +5 -0
  270. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +1 -0
  271. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +6 -4
  272. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +24 -10
  273. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +4 -0
  274. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +3 -1
  275. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/skip_scanner.hpp +0 -3
  276. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +58 -17
  277. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +1 -1
  278. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +1 -1
  279. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_prepare.hpp +3 -3
  280. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +1 -1
  281. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +4 -1
  282. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +16 -5
  283. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +6 -6
  284. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +2 -1
  285. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +1 -1
  286. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
  287. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  288. package/src/duckdb/src/include/duckdb/function/cast/cast_function_set.hpp +5 -5
  289. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +4 -4
  290. package/src/duckdb/src/include/duckdb/function/cast/vector_cast_helpers.hpp +6 -6
  291. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +4 -4
  292. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +10 -13
  293. package/src/duckdb/src/include/duckdb/function/function.hpp +3 -3
  294. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +24 -23
  295. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +1 -1
  296. package/src/duckdb/src/include/duckdb/function/function_set.hpp +1 -1
  297. package/src/duckdb/src/include/duckdb/function/pragma_function.hpp +1 -1
  298. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +3 -3
  299. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +7 -7
  300. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +3 -3
  301. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +6 -6
  302. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +4 -4
  303. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -2
  304. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +1 -3
  305. package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +1 -1
  306. package/src/duckdb/src/include/duckdb/function/table_function.hpp +12 -11
  307. package/src/duckdb/src/include/duckdb/function/udf_function.hpp +66 -60
  308. package/src/duckdb/src/include/duckdb/main/appender.hpp +6 -6
  309. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +3 -1
  310. package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +2 -2
  311. package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +1 -1
  312. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -2
  313. package/src/duckdb/src/include/duckdb/main/chunk_scan_state/query_result.hpp +2 -2
  314. package/src/duckdb/src/include/duckdb/main/client_context.hpp +1 -1
  315. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +3 -2
  316. package/src/duckdb/src/include/duckdb/main/client_data.hpp +1 -0
  317. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +2 -1
  318. package/src/duckdb/src/include/duckdb/main/config.hpp +14 -5
  319. package/src/duckdb/src/include/duckdb/main/connection.hpp +27 -26
  320. package/src/duckdb/src/include/duckdb/main/database.hpp +19 -3
  321. package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +58 -0
  322. package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +2 -2
  323. package/src/duckdb/src/include/duckdb/main/error_manager.hpp +6 -6
  324. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +8 -0
  325. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -0
  326. package/src/duckdb/src/include/duckdb/main/external_dependencies.hpp +2 -1
  327. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +8 -8
  328. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +2 -59
  329. package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -3
  330. package/src/duckdb/src/include/duckdb/main/relation/query_relation.hpp +1 -1
  331. package/src/duckdb/src/include/duckdb/main/relation.hpp +1 -1
  332. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +11 -7
  333. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +2 -2
  334. package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +4 -4
  335. package/src/duckdb/src/include/duckdb/main/settings.hpp +78 -70
  336. package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_analyzer.hpp +45 -0
  337. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +11 -13
  338. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
  339. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +1 -1
  340. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +1 -1
  341. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +1 -1
  342. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +1 -1
  343. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +1 -1
  344. package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +7 -7
  345. package/src/duckdb/src/include/duckdb/optimizer/matcher/function_matcher.hpp +7 -7
  346. package/src/duckdb/src/include/duckdb/optimizer/matcher/type_matcher.hpp +1 -1
  347. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  348. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +32 -30
  349. package/src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp +1 -1
  350. package/src/duckdb/src/include/duckdb/parallel/event.hpp +1 -1
  351. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +1 -1
  352. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +2 -2
  353. package/src/duckdb/src/include/duckdb/parallel/pipeline_event.hpp +1 -1
  354. package/src/duckdb/src/include/duckdb/parser/column_list.hpp +4 -4
  355. package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +49 -8
  356. package/src/duckdb/src/include/duckdb/parser/expression/bound_expression.hpp +1 -1
  357. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +1 -1
  358. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +1 -1
  359. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_scalar_function_info.hpp +1 -1
  360. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +1 -1
  361. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +1 -1
  362. package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_database_info.hpp +40 -0
  363. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp +2 -1
  364. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +1 -0
  365. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +2 -1
  366. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +1 -1
  367. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_pragma_function_info.hpp +1 -1
  368. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_secret_info.hpp +2 -2
  369. package/src/duckdb/src/include/duckdb/parser/parsed_data/extra_drop_info.hpp +3 -3
  370. package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +3 -2
  371. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +2 -5
  372. package/src/duckdb/src/include/duckdb/parser/parser.hpp +1 -1
  373. package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +3 -3
  374. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +1 -1
  375. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +3 -5
  376. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -2
  377. package/src/duckdb/src/include/duckdb/planner/binder.hpp +16 -9
  378. package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +1 -1
  379. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +1 -1
  380. package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +1 -1
  381. package/src/duckdb/src/include/duckdb/planner/expression/bound_expanded_expression.hpp +34 -0
  382. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
  383. package/src/duckdb/src/include/duckdb/planner/expression.hpp +1 -1
  384. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +5 -8
  385. package/src/duckdb/src/include/duckdb/planner/expression_binder/column_alias_binder.hpp +3 -4
  386. package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +3 -2
  387. package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +3 -5
  388. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +6 -14
  389. package/src/duckdb/src/include/duckdb/planner/expression_binder/qualify_binder.hpp +3 -6
  390. package/src/duckdb/src/include/duckdb/planner/expression_binder/select_bind_state.hpp +52 -0
  391. package/src/duckdb/src/include/duckdb/planner/expression_binder/select_binder.hpp +4 -10
  392. package/src/duckdb/src/include/duckdb/planner/expression_binder/table_function_binder.hpp +5 -2
  393. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +1 -0
  394. package/src/duckdb/src/include/duckdb/planner/expression_iterator.hpp +2 -0
  395. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +3 -6
  396. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +2 -0
  397. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  398. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -0
  399. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_database.hpp +4 -12
  400. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_secret.hpp +1 -1
  401. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +2 -2
  402. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -1
  403. package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +1 -1
  404. package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +1 -1
  405. package/src/duckdb/src/include/duckdb/planner/operator/logical_export.hpp +2 -2
  406. package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +2 -2
  407. package/src/duckdb/src/include/duckdb/planner/operator/logical_extension_operator.hpp +2 -2
  408. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +2 -2
  409. package/src/duckdb/src/include/duckdb/planner/operator/logical_pragma.hpp +1 -1
  410. package/src/duckdb/src/include/duckdb/planner/operator/logical_prepare.hpp +3 -2
  411. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +3 -3
  412. package/src/duckdb/src/include/duckdb/planner/operator/logical_reset.hpp +1 -1
  413. package/src/duckdb/src/include/duckdb/planner/operator/logical_set.hpp +2 -1
  414. package/src/duckdb/src/include/duckdb/planner/operator/logical_vacuum.hpp +52 -0
  415. package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +1 -1
  416. package/src/duckdb/src/include/duckdb/planner/parsed_data/bound_create_table_info.hpp +2 -2
  417. package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +5 -4
  418. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  419. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +4 -1
  420. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +1 -1
  421. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +3 -3
  422. package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +2 -3
  423. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +1 -1
  424. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -3
  425. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +5 -5
  426. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +1 -1
  427. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +1 -1
  428. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_fetch.hpp +1 -1
  429. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_scan.hpp +1 -1
  430. package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +3 -3
  431. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +2 -2
  432. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +1 -1
  433. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_fetch.hpp +1 -1
  434. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp +3 -3
  435. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/flag_buffer.hpp +1 -1
  436. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/leading_zero_buffer.hpp +3 -3
  437. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp.hpp +3 -3
  438. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_analyze.hpp +0 -1
  439. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  440. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +4 -4
  441. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas.hpp +4 -4
  442. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_analyze.hpp +0 -1
  443. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  444. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +3 -3
  445. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
  446. package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
  447. package/src/duckdb/src/include/duckdb/storage/magic_bytes.hpp +1 -1
  448. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +3 -3
  449. package/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp +1 -1
  450. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +1 -1
  451. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +4 -1
  452. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +24 -24
  453. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +3 -3
  454. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -2
  455. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +2 -2
  456. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +4 -4
  457. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +9 -6
  458. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +1 -1
  459. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +2 -2
  460. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +1 -1
  461. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +10 -1
  462. package/src/duckdb/src/include/duckdb/storage/table/data_table_info.hpp +3 -2
  463. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +1 -1
  464. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +1 -1
  465. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
  466. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +3 -3
  467. package/src/duckdb/src/include/duckdb/storage/table/segment_lock.hpp +1 -1
  468. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +4 -4
  469. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +2 -0
  470. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +3 -2
  471. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +1 -1
  472. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +1 -1
  473. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +2 -1
  474. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +1 -1
  475. package/src/duckdb/src/include/duckdb/transaction/delete_info.hpp +20 -1
  476. package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -1
  477. package/src/duckdb/src/include/duckdb/transaction/transaction_data.hpp +1 -1
  478. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +1 -1
  479. package/src/duckdb/src/include/duckdb/transaction/update_info.hpp +1 -1
  480. package/src/duckdb/src/main/appender.cpp +1 -1
  481. package/src/duckdb/src/main/attached_database.cpp +11 -3
  482. package/src/duckdb/src/main/capi/arrow-c.cpp +6 -2
  483. package/src/duckdb/src/main/capi/cast/utils-c.cpp +1 -1
  484. package/src/duckdb/src/main/capi/duckdb-c.cpp +1 -1
  485. package/src/duckdb/src/main/capi/pending-c.cpp +1 -1
  486. package/src/duckdb/src/main/capi/prepared-c.cpp +2 -2
  487. package/src/duckdb/src/main/capi/result-c.cpp +4 -4
  488. package/src/duckdb/src/main/client_context.cpp +4 -12
  489. package/src/duckdb/src/main/client_context_file_opener.cpp +15 -3
  490. package/src/duckdb/src/main/client_data.cpp +5 -0
  491. package/src/duckdb/src/main/config.cpp +82 -82
  492. package/src/duckdb/src/main/database.cpp +31 -7
  493. package/src/duckdb/src/main/database_manager.cpp +3 -2
  494. package/src/duckdb/src/main/database_path_and_type.cpp +4 -4
  495. package/src/duckdb/src/main/error_manager.cpp +1 -1
  496. package/src/duckdb/src/main/extension/extension_alias.cpp +9 -9
  497. package/src/duckdb/src/main/extension/extension_helper.cpp +10 -5
  498. package/src/duckdb/src/main/extension/extension_install.cpp +1 -1
  499. package/src/duckdb/src/main/extension/extension_load.cpp +111 -37
  500. package/src/duckdb/src/main/query_profiler.cpp +1 -118
  501. package/src/duckdb/src/main/secret/secret_manager.cpp +1 -2
  502. package/src/duckdb/src/main/secret/secret_storage.cpp +1 -1
  503. package/src/duckdb/src/main/settings/settings.cpp +81 -65
  504. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +8 -1
  505. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +3 -0
  506. package/src/duckdb/src/optimizer/compressed_materialization.cpp +26 -28
  507. package/src/duckdb/src/optimizer/cse_optimizer.cpp +5 -5
  508. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +10 -6
  509. package/src/duckdb/src/optimizer/optimizer.cpp +14 -17
  510. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +16 -5
  511. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +7 -4
  512. package/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +2 -2
  513. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +3 -3
  514. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +2 -2
  515. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +1 -1
  516. package/src/duckdb/src/optimizer/statistics/expression/propagate_between.cpp +8 -8
  517. package/src/duckdb/src/optimizer/statistics/expression/propagate_case.cpp +1 -1
  518. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +1 -1
  519. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  520. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +7 -7
  521. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +6 -6
  522. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +1 -1
  523. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +2 -2
  524. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -10
  525. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +1 -1
  526. package/src/duckdb/src/optimizer/statistics/operator/propagate_cross_product.cpp +1 -1
  527. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +14 -7
  528. package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +2 -2
  529. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +9 -9
  530. package/src/duckdb/src/optimizer/statistics/operator/propagate_limit.cpp +1 -1
  531. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  532. package/src/duckdb/src/optimizer/statistics/operator/propagate_projection.cpp +2 -2
  533. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +1 -1
  534. package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +1 -1
  535. package/src/duckdb/src/optimizer/statistics_propagator.cpp +39 -18
  536. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
  537. package/src/duckdb/src/parallel/task_scheduler.cpp +8 -1
  538. package/src/duckdb/src/parser/constraints/unique_constraint.cpp +4 -2
  539. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +64 -0
  540. package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +1 -0
  541. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +13 -5
  542. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +24 -0
  543. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -8
  544. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +2 -1
  545. package/src/duckdb/src/parser/transform/statement/transform_create_index.cpp +3 -0
  546. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +2 -6
  547. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +2 -6
  548. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -6
  549. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +1 -1
  550. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +5 -2
  551. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +2 -8
  552. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +1 -6
  553. package/src/duckdb/src/parser/transform/statement/transform_use.cpp +3 -2
  554. package/src/duckdb/src/parser/transformer.cpp +14 -2
  555. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +40 -17
  556. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +8 -3
  557. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +32 -30
  558. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +28 -17
  559. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +146 -101
  560. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +28 -26
  561. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +29 -0
  562. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +10 -10
  563. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +3 -2
  564. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +23 -28
  565. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +11 -12
  566. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +9 -11
  567. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +10 -13
  568. package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +3 -3
  569. package/src/duckdb/src/planner/binder/statement/bind_set.cpp +3 -0
  570. package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +66 -65
  571. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +54 -46
  572. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +3 -0
  573. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +1 -1
  574. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +5 -5
  575. package/src/duckdb/src/planner/binder.cpp +78 -6
  576. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +11 -3
  577. package/src/duckdb/src/planner/expression/bound_expanded_expression.cpp +22 -0
  578. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +11 -58
  579. package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +5 -8
  580. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +5 -4
  581. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +5 -19
  582. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +8 -8
  583. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +42 -26
  584. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +3 -22
  585. package/src/duckdb/src/planner/expression_binder/select_bind_state.cpp +52 -0
  586. package/src/duckdb/src/planner/expression_binder/select_binder.cpp +43 -5
  587. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +19 -7
  588. package/src/duckdb/src/planner/logical_operator.cpp +20 -3
  589. package/src/duckdb/src/planner/operator/logical_copy_database.cpp +4 -14
  590. package/src/duckdb/src/planner/operator/logical_delete.cpp +1 -1
  591. package/src/duckdb/src/planner/operator/logical_get.cpp +1 -1
  592. package/src/duckdb/src/planner/operator/logical_insert.cpp +1 -1
  593. package/src/duckdb/src/planner/operator/logical_update.cpp +1 -1
  594. package/src/duckdb/src/planner/operator/logical_vacuum.cpp +65 -0
  595. package/src/duckdb/src/planner/planner.cpp +4 -4
  596. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +26 -27
  597. package/src/duckdb/src/storage/arena_allocator.cpp +9 -0
  598. package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +1 -1
  599. package/src/duckdb/src/storage/buffer_manager.cpp +2 -10
  600. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
  601. package/src/duckdb/src/storage/checkpoint_manager.cpp +15 -8
  602. package/src/duckdb/src/storage/compression/bitpacking.cpp +6 -1
  603. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +1 -1
  604. package/src/duckdb/src/storage/data_pointer.cpp +1 -1
  605. package/src/duckdb/src/storage/data_table.cpp +18 -7
  606. package/src/duckdb/src/storage/local_storage.cpp +8 -5
  607. package/src/duckdb/src/storage/magic_bytes.cpp +6 -5
  608. package/src/duckdb/src/storage/partial_block_manager.cpp +1 -1
  609. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -9
  610. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +12 -10
  611. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +23 -0
  612. package/src/duckdb/src/storage/single_file_block_manager.cpp +46 -19
  613. package/src/duckdb/src/storage/standard_buffer_manager.cpp +21 -5
  614. package/src/duckdb/src/storage/statistics/array_stats.cpp +1 -1
  615. package/src/duckdb/src/storage/statistics/base_statistics.cpp +2 -3
  616. package/src/duckdb/src/storage/statistics/list_stats.cpp +1 -1
  617. package/src/duckdb/src/storage/statistics/struct_stats.cpp +1 -1
  618. package/src/duckdb/src/storage/storage_info.cpp +19 -19
  619. package/src/duckdb/src/storage/storage_manager.cpp +18 -13
  620. package/src/duckdb/src/storage/table/chunk_info.cpp +11 -3
  621. package/src/duckdb/src/storage/table/column_data.cpp +88 -66
  622. package/src/duckdb/src/storage/table/row_group.cpp +7 -7
  623. package/src/duckdb/src/storage/table/row_version_manager.cpp +2 -2
  624. package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -0
  625. package/src/duckdb/src/storage/table/update_segment.cpp +3 -1
  626. package/src/duckdb/src/storage/table_index_list.cpp +6 -1
  627. package/src/duckdb/src/storage/temporary_file_manager.cpp +1 -1
  628. package/src/duckdb/src/storage/wal_replay.cpp +8 -7
  629. package/src/duckdb/src/storage/write_ahead_log.cpp +3 -4
  630. package/src/duckdb/src/transaction/cleanup_state.cpp +10 -3
  631. package/src/duckdb/src/transaction/commit_state.cpp +11 -4
  632. package/src/duckdb/src/transaction/duck_transaction.cpp +23 -3
  633. package/src/duckdb/src/transaction/rollback_state.cpp +1 -1
  634. package/src/duckdb/src/transaction/transaction_context.cpp +1 -1
  635. package/src/duckdb/src/transaction/undo_buffer.cpp +3 -1
  636. package/src/duckdb/third_party/fmt/include/fmt/core.h +0 -5
  637. package/src/duckdb/third_party/fsst/fsst.h +1 -1
  638. package/src/duckdb/third_party/fsst/libfsst.cpp +1 -140
  639. package/src/duckdb/third_party/fsst/libfsst.hpp +0 -13
  640. package/src/duckdb/third_party/hyperloglog/hyperloglog.hpp +4 -0
  641. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +8 -1
  642. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  643. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15401 -15354
  644. package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +299 -538
  645. package/src/duckdb/third_party/lz4/lz4.cpp +2605 -0
  646. package/src/duckdb/third_party/lz4/lz4.hpp +843 -0
  647. package/src/duckdb/third_party/parquet/parquet_types.cpp +3 -0
  648. package/src/duckdb/third_party/parquet/parquet_types.h +2 -1
  649. package/src/duckdb/third_party/re2/re2/bitmap256.cc +44 -0
  650. package/src/duckdb/third_party/re2/re2/bitmap256.h +3 -35
  651. package/src/duckdb/third_party/re2/re2/bitstate.cc +31 -24
  652. package/src/duckdb/third_party/re2/re2/compile.cc +146 -164
  653. package/src/duckdb/third_party/re2/re2/dfa.cc +174 -181
  654. package/src/duckdb/third_party/re2/re2/filtered_re2.cc +19 -3
  655. package/src/duckdb/third_party/re2/re2/filtered_re2.h +27 -23
  656. package/src/duckdb/third_party/re2/re2/mimics_pcre.cc +21 -11
  657. package/src/duckdb/third_party/re2/re2/nfa.cc +91 -131
  658. package/src/duckdb/third_party/re2/re2/onepass.cc +11 -10
  659. package/src/duckdb/third_party/re2/re2/parse.cc +171 -154
  660. package/src/duckdb/third_party/re2/re2/perl_groups.cc +35 -35
  661. package/src/duckdb/third_party/re2/re2/pod_array.h +55 -0
  662. package/src/duckdb/third_party/re2/re2/prefilter.cc +40 -40
  663. package/src/duckdb/third_party/re2/re2/prefilter.h +24 -2
  664. package/src/duckdb/third_party/re2/re2/prefilter_tree.cc +70 -84
  665. package/src/duckdb/third_party/re2/re2/prefilter_tree.h +5 -4
  666. package/src/duckdb/third_party/re2/re2/prog.cc +315 -58
  667. package/src/duckdb/third_party/re2/re2/prog.h +77 -44
  668. package/src/duckdb/third_party/re2/re2/re2.cc +333 -221
  669. package/src/duckdb/third_party/re2/re2/re2.h +277 -201
  670. package/src/duckdb/third_party/re2/re2/regexp.cc +137 -105
  671. package/src/duckdb/third_party/re2/re2/regexp.h +45 -40
  672. package/src/duckdb/third_party/re2/re2/set.cc +40 -17
  673. package/src/duckdb/third_party/re2/re2/set.h +11 -6
  674. package/src/duckdb/third_party/re2/re2/simplify.cc +50 -41
  675. package/src/duckdb/third_party/re2/re2/sparse_array.h +392 -0
  676. package/src/duckdb/third_party/re2/re2/sparse_set.h +264 -0
  677. package/src/duckdb/third_party/re2/re2/stringpiece.cc +1 -1
  678. package/src/duckdb/third_party/re2/re2/stringpiece.h +11 -8
  679. package/src/duckdb/third_party/re2/re2/tostring.cc +8 -6
  680. package/src/duckdb/third_party/re2/re2/unicode_casefold.cc +39 -10
  681. package/src/duckdb/third_party/re2/re2/unicode_casefold.h +1 -1
  682. package/src/duckdb/third_party/re2/re2/unicode_groups.cc +5019 -4566
  683. package/src/duckdb/third_party/re2/re2/unicode_groups.h +1 -1
  684. package/src/duckdb/third_party/re2/re2/walker-inl.h +21 -20
  685. package/src/duckdb/third_party/re2/util/logging.h +14 -18
  686. package/src/duckdb/third_party/re2/util/mix.h +4 -4
  687. package/src/duckdb/third_party/re2/util/mutex.h +48 -15
  688. package/src/duckdb/third_party/re2/util/rune.cc +5 -5
  689. package/src/duckdb/third_party/re2/util/strutil.cc +1 -16
  690. package/src/duckdb/third_party/re2/util/strutil.h +1 -3
  691. package/src/duckdb/third_party/re2/util/utf.h +1 -1
  692. package/src/duckdb/third_party/re2/util/util.h +9 -1
  693. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +2 -0
  694. package/src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp +36 -1
  695. package/src/duckdb/ub_src_common.cpp +0 -2
  696. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  697. package/src/duckdb/ub_src_planner_expression.cpp +2 -0
  698. package/src/duckdb/ub_src_planner_expression_binder.cpp +2 -0
  699. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
@@ -39,33 +39,27 @@
39
39
  #include "util/logging.h"
40
40
  #include "util/mix.h"
41
41
  #include "util/mutex.h"
42
- #include "util/pod_array.h"
43
- #include "util/sparse_set.h"
44
42
  #include "util/strutil.h"
43
+ #include "re2/pod_array.h"
45
44
  #include "re2/prog.h"
45
+ #include "re2/re2.h"
46
+ #include "re2/sparse_set.h"
46
47
  #include "re2/stringpiece.h"
47
48
 
48
49
  // Silence "zero-sized array in struct/union" warning for DFA::State::next_.
49
50
  #ifdef _MSC_VER
50
- //#pragma warning(disable: 4200)
51
+ #pragma warning(disable: 4200)
51
52
  #endif
52
53
 
53
54
  namespace duckdb_re2 {
54
55
 
55
- #if !defined(__linux__) /* only Linux seems to have memrchr */
56
- static void* memrchr(const void* s, int c, size_t n) {
57
- const unsigned char* p = (const unsigned char*)s;
58
- for (p += n; n > 0; n--)
59
- if (*--p == c)
60
- return (void*)p;
61
-
62
- return NULL;
63
- }
64
- #endif
65
-
66
56
  // Controls whether the DFA should bail out early if the NFA would be faster.
67
57
  static bool dfa_should_bail_when_slow = true;
68
58
 
59
+ void Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(bool b) {
60
+ dfa_should_bail_when_slow = b;
61
+ }
62
+
69
63
  // A DFA implementation of a regular expression program.
70
64
  // Since this is entirely a forward declaration mandated by C++,
71
65
  // some of the comments here are better understood after reading
@@ -115,7 +109,6 @@ class DFA {
115
109
  // byte c, the next state should be s->next_[c].
116
110
  struct State {
117
111
  inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; }
118
- void SaveMatch(std::vector<int>* v);
119
112
 
120
113
  int* inst_; // Instruction pointers in the state.
121
114
  int ninst_; // # of inst_ pointers.
@@ -125,17 +118,15 @@ class DFA {
125
118
 
126
119
  // Work around the bug affecting flexible array members in GCC 6.x (for x >= 1).
127
120
  // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932)
128
- std::atomic<State*> next_[1]; // Outgoing arrows from State,
121
+ #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1
122
+ std::atomic<State*> next_[0]; // Outgoing arrows from State,
123
+ #else
124
+ std::atomic<State*> next_[]; // Outgoing arrows from State,
125
+ #endif
126
+
129
127
  // one per input byte class
130
128
  };
131
129
 
132
- // Marks separate thread groups of different priority
133
- // in the work queue when in leftmost-longest matching mode.
134
- constexpr static int32_t Mark = -1;
135
- // Separates the match IDs from the instructions in inst_.
136
- // Used only for "many match" DFA states.
137
- constexpr static int32_t MatchSep = -2;
138
-
139
130
  enum {
140
131
  kByteEndText = 256, // imaginary byte at end of text
141
132
 
@@ -176,11 +167,8 @@ class DFA {
176
167
  typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
177
168
 
178
169
  private:
179
- // Special "first_byte" values for a state. (Values >= 0 denote actual bytes.)
180
- enum {
181
- kFbUnknown = -1, // No analysis has been performed.
182
- kFbNone = -2, // The first-byte trick cannot be used.
183
- };
170
+ // Make it easier to swap in a scalable reader-writer mutex.
171
+ using CacheMutex = Mutex;
184
172
 
185
173
  enum {
186
174
  // Indices into start_ for unanchored searches.
@@ -248,25 +236,26 @@ class DFA {
248
236
  struct SearchParams {
249
237
  SearchParams(const StringPiece& text, const StringPiece& context,
250
238
  RWLocker* cache_lock)
251
- : text(text), context(context),
239
+ : text(text),
240
+ context(context),
252
241
  anchored(false),
242
+ can_prefix_accel(false),
253
243
  want_earliest_match(false),
254
244
  run_forward(false),
255
245
  start(NULL),
256
- first_byte(kFbUnknown),
257
246
  cache_lock(cache_lock),
258
247
  failed(false),
259
248
  ep(NULL),
260
- matches(NULL) { }
249
+ matches(NULL) {}
261
250
 
262
251
  StringPiece text;
263
252
  StringPiece context;
264
253
  bool anchored;
254
+ bool can_prefix_accel;
265
255
  bool want_earliest_match;
266
256
  bool run_forward;
267
257
  State* start;
268
- int first_byte;
269
- RWLocker *cache_lock;
258
+ RWLocker* cache_lock;
270
259
  bool failed; // "out" parameter: whether search gave up
271
260
  const char* ep; // "out" parameter: end pointer for match
272
261
  SparseSet* matches;
@@ -277,15 +266,13 @@ class DFA {
277
266
  };
278
267
 
279
268
  // Before each search, the parameters to Search are analyzed by
280
- // AnalyzeSearch to determine the state in which to start and the
281
- // "first_byte" for that state, if any.
269
+ // AnalyzeSearch to determine the state in which to start.
282
270
  struct StartInfo {
283
- StartInfo() : start(NULL), first_byte(kFbUnknown) {}
284
- State* start;
285
- std::atomic<int> first_byte;
271
+ StartInfo() : start(NULL) {}
272
+ std::atomic<State*> start;
286
273
  };
287
274
 
288
- // Fills in params->start and params->first_byte using
275
+ // Fills in params->start and params->can_prefix_accel using
289
276
  // the other search parameters. Returns true on success,
290
277
  // false on failure.
291
278
  // cache_mutex_.r <= L < mutex_
@@ -296,10 +283,10 @@ class DFA {
296
283
  // The generic search loop, inlined to create specialized versions.
297
284
  // cache_mutex_.r <= L < mutex_
298
285
  // Might unlock and relock cache_mutex_ via params->cache_lock.
299
- inline bool InlinedSearchLoop(SearchParams* params,
300
- bool have_first_byte,
301
- bool want_earliest_match,
302
- bool run_forward);
286
+ template <bool can_prefix_accel,
287
+ bool want_earliest_match,
288
+ bool run_forward>
289
+ inline bool InlinedSearchLoop(SearchParams* params);
303
290
 
304
291
  // The specialized versions of InlinedSearchLoop. The three letters
305
292
  // at the ends of the name denote the true/false values used as the
@@ -321,13 +308,6 @@ class DFA {
321
308
  // Might unlock and relock cache_mutex_ via params->cache_lock.
322
309
  bool FastSearchLoop(SearchParams* params);
323
310
 
324
- // For debugging, a slow search loop that calls InlinedSearchLoop
325
- // directly -- because the booleans passed are not constants, the
326
- // loop is not specialized like the SearchFFF etc. versions, so it
327
- // runs much more slowly. Useful only for debugging.
328
- // cache_mutex_.r <= L < mutex_
329
- // Might unlock and relock cache_mutex_ via params->cache_lock.
330
- bool SlowSearchLoop(SearchParams* params);
331
311
 
332
312
  // Looks up bytes in bytemap_ but handles case c == kByteEndText too.
333
313
  int ByteMap(int c) {
@@ -354,11 +334,14 @@ class DFA {
354
334
  // while holding cache_mutex_ for writing, to avoid interrupting other
355
335
  // readers. Any State* pointers are only valid while cache_mutex_
356
336
  // is held.
357
- Mutex cache_mutex_;
337
+ CacheMutex cache_mutex_;
358
338
  int64_t mem_budget_; // Total memory budget for all States.
359
339
  int64_t state_budget_; // Amount of memory remaining for new States.
360
340
  StateSet state_cache_; // All States computed so far.
361
341
  StartInfo start_[kMaxStart];
342
+
343
+ DFA(const DFA&) = delete;
344
+ DFA& operator=(const DFA&) = delete;
362
345
  };
363
346
 
364
347
  // Shorthand for casting to uint8_t*.
@@ -368,6 +351,17 @@ static inline const uint8_t* BytePtr(const void* v) {
368
351
 
369
352
  // Work queues
370
353
 
354
+ // Marks separate thread groups of different priority
355
+ // in the work queue when in leftmost-longest matching mode.
356
+ //#define Mark (-1)
357
+ constexpr auto Mark = -1;
358
+
359
+
360
+ // Separates the match IDs from the instructions in inst_.
361
+ // Used only for "many match" DFA states.
362
+ //#define MatchSep (-2)
363
+ constexpr auto MatchSep = -2;
364
+
371
365
  // Internally, the DFA uses a sparse array of
372
366
  // program instruction pointers as a work queue.
373
367
  // In leftmost longest mode, marks separate sections
@@ -498,10 +492,10 @@ std::string DFA::DumpWorkq(Workq* q) {
498
492
  const char* sep = "";
499
493
  for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
500
494
  if (q->is_mark(*it)) {
501
- StringAppendF(&s, "|");
495
+ s += "|";
502
496
  sep = "";
503
497
  } else {
504
- StringAppendF(&s, "%s%d", sep, *it);
498
+ s += StringPrintf("%s%d", sep, *it);
505
499
  sep = ",";
506
500
  }
507
501
  }
@@ -518,20 +512,20 @@ std::string DFA::DumpState(State* state) {
518
512
  return "*";
519
513
  std::string s;
520
514
  const char* sep = "";
521
- StringAppendF(&s, "(%p)", state);
515
+ s += StringPrintf("(%p)", state);
522
516
  for (int i = 0; i < state->ninst_; i++) {
523
517
  if (state->inst_[i] == Mark) {
524
- StringAppendF(&s, "|");
518
+ s += "|";
525
519
  sep = "";
526
520
  } else if (state->inst_[i] == MatchSep) {
527
- StringAppendF(&s, "||");
521
+ s += "||";
528
522
  sep = "";
529
523
  } else {
530
- StringAppendF(&s, "%s%d", sep, state->inst_[i]);
524
+ s += StringPrintf("%s%d", sep, state->inst_[i]);
531
525
  sep = ",";
532
526
  }
533
527
  }
534
- StringAppendF(&s, " flag=%#x", state->flag_);
528
+ s += StringPrintf(" flag=%#x", state->flag_);
535
529
  return s;
536
530
  }
537
531
 
@@ -602,11 +596,12 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
602
596
  // Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
603
597
  // those are the only operators with any effect in
604
598
  // RunWorkqOnEmptyString or RunWorkqOnByte.
605
- int* inst = new int[q->size()];
599
+ PODArray<int> inst(q->size());
606
600
  int n = 0;
607
601
  uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions
608
602
  bool sawmatch = false; // whether queue contains guaranteed kInstMatch
609
603
  bool sawmark = false; // whether queue contains a Mark
604
+
610
605
  for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
611
606
  int id = *it;
612
607
  if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id)))
@@ -630,7 +625,6 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
630
625
  (it == q->begin() && ip->greedy(prog_))) &&
631
626
  (kind_ != Prog::kLongestMatch || !sawmark) &&
632
627
  (flag & kFlagMatch)) {
633
- delete[] inst;
634
628
  return FullMatchState;
635
629
  }
636
630
  FALLTHROUGH_INTENDED;
@@ -675,7 +669,6 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
675
669
  // the execution loop can stop early. This is only okay
676
670
  // if the state is *not* a matching state.
677
671
  if (n == 0 && flag == 0) {
678
- delete[] inst;
679
672
  return DeadState;
680
673
  }
681
674
 
@@ -683,7 +676,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
683
676
  // unordered state sets separated by Marks. Sort each set
684
677
  // to canonicalize, to reduce the number of distinct sets stored.
685
678
  if (kind_ == Prog::kLongestMatch) {
686
- int* ip = inst;
679
+ int* ip = inst.data();
687
680
  int* ep = ip + n;
688
681
  while (ip < ep) {
689
682
  int* markp = ip;
@@ -696,6 +689,15 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
696
689
  }
697
690
  }
698
691
 
692
+ // If we're in many match mode, canonicalize for similar reasons:
693
+ // we have an unordered set of states (i.e. we don't have Marks)
694
+ // and sorting will reduce the number of distinct sets stored.
695
+ if (kind_ == Prog::kManyMatch) {
696
+ int* ip = inst.data();
697
+ int* ep = ip + n;
698
+ std::sort(ip, ep);
699
+ }
700
+
699
701
  // Append MatchSep and the match IDs in mq if necessary.
700
702
  if (mq != NULL) {
701
703
  inst[n++] = MatchSep;
@@ -710,8 +712,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
710
712
  // Save the needed empty-width flags in the top bits for use later.
711
713
  flag |= needflags << kFlagNeedShift;
712
714
 
713
- State* state = CachedState(inst, n, flag);
714
- delete[] inst;
715
+ State* state = CachedState(inst.data(), n, flag);
715
716
  return state;
716
717
  }
717
718
 
@@ -940,8 +941,21 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
940
941
  break;
941
942
 
942
943
  case kInstByteRange: // can follow if c is in range
943
- if (ip->Matches(c))
944
- AddToQueue(newq, ip->out(), flag);
944
+ if (!ip->Matches(c))
945
+ break;
946
+ AddToQueue(newq, ip->out(), flag);
947
+ if (ip->hint() != 0) {
948
+ // We have a hint, but we must cancel out the
949
+ // increment that will occur after the break.
950
+ i += ip->hint() - 1;
951
+ } else {
952
+ // We have no hint, so we must find the end
953
+ // of the current list and then skip to it.
954
+ Prog::Inst* ip0 = ip;
955
+ while (!ip->last())
956
+ ++ip;
957
+ i += ip - ip0;
958
+ }
945
959
  break;
946
960
 
947
961
  case kInstMatch:
@@ -956,6 +970,7 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
956
970
  break;
957
971
  }
958
972
  }
973
+
959
974
  }
960
975
 
961
976
  // Processes input byte c in state, returning new state.
@@ -1082,7 +1097,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
1082
1097
 
1083
1098
  class DFA::RWLocker {
1084
1099
  public:
1085
- explicit RWLocker(Mutex* mu);
1100
+ explicit RWLocker(CacheMutex* mu);
1086
1101
  ~RWLocker();
1087
1102
 
1088
1103
  // If the lock is only held for reading right now,
@@ -1092,19 +1107,19 @@ class DFA::RWLocker {
1092
1107
  void LockForWriting();
1093
1108
 
1094
1109
  private:
1095
- Mutex* mu_;
1110
+ CacheMutex* mu_;
1096
1111
  bool writing_;
1097
1112
 
1098
1113
  RWLocker(const RWLocker&) = delete;
1099
1114
  RWLocker& operator=(const RWLocker&) = delete;
1100
1115
  };
1101
1116
 
1102
- DFA::RWLocker::RWLocker(Mutex* mu) : mu_(mu), writing_(false) {
1117
+ DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) {
1103
1118
  mu_->ReaderLock();
1104
1119
  }
1105
1120
 
1106
- // This function is marked as NO_THREAD_SAFETY_ANALYSIS because the annotations
1107
- // does not support lock upgrade.
1121
+ // This function is marked as NO_THREAD_SAFETY_ANALYSIS because
1122
+ // the annotations don't support lock upgrade.
1108
1123
  void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS {
1109
1124
  if (!writing_) {
1110
1125
  mu_->ReaderUnlock();
@@ -1136,11 +1151,14 @@ void DFA::ResetCache(RWLocker* cache_lock) {
1136
1151
  // Re-acquire the cache_mutex_ for writing (exclusive use).
1137
1152
  cache_lock->LockForWriting();
1138
1153
 
1154
+ hooks::GetDFAStateCacheResetHook()({
1155
+ state_budget_,
1156
+ state_cache_.size(),
1157
+ });
1158
+
1139
1159
  // Clear the cache, reset the memory budget.
1140
- for (int i = 0; i < kMaxStart; i++) {
1141
- start_[i].start = NULL;
1142
- start_[i].first_byte.store(kFbUnknown, std::memory_order_relaxed);
1143
- }
1160
+ for (int i = 0; i < kMaxStart; i++)
1161
+ start_[i].start.store(NULL, std::memory_order_relaxed);
1144
1162
  ClearCache();
1145
1163
  mem_budget_ = state_budget_;
1146
1164
  }
@@ -1255,8 +1273,7 @@ DFA::State* DFA::StateSaver::Restore() {
1255
1273
  // situation, the DFA can do better than executing the simple loop.
1256
1274
  // Instead, it can call memchr to search very quickly for the byte c.
1257
1275
  // Whether the start state has this property is determined during a
1258
- // pre-compilation pass, and if so, the byte b is passed to the search
1259
- // loop as the "first_byte" argument, along with a boolean "have_first_byte".
1276
+ // pre-compilation pass and the "can_prefix_accel" argument is set.
1260
1277
  //
1261
1278
  // Fourth, the desired behavior is to search for the leftmost-best match
1262
1279
  // (approximately, the same one that Perl would find), which is not
@@ -1279,7 +1296,7 @@ DFA::State* DFA::StateSaver::Restore() {
1279
1296
  // inline it to create the specialized ones.
1280
1297
  //
1281
1298
  // Note that matches are delayed by one byte, to make it easier to
1282
- // accommodate match conditions depending on the next input byte (like $ and \b).
1299
+ // accomodate match conditions depending on the next input byte (like $ and \b).
1283
1300
  // When s->next[c]->IsMatch(), it means that there is a match ending just
1284
1301
  // *before* byte c.
1285
1302
 
@@ -1288,15 +1305,16 @@ DFA::State* DFA::StateSaver::Restore() {
1288
1305
  // The bools are equal to the same-named variables in params, but
1289
1306
  // making them function arguments lets the inliner specialize
1290
1307
  // this function to each combination (see two paragraphs above).
1291
- inline bool DFA::InlinedSearchLoop(SearchParams* params,
1292
- bool have_first_byte,
1293
- bool want_earliest_match,
1294
- bool run_forward) {
1308
+ template <bool can_prefix_accel,
1309
+ bool want_earliest_match,
1310
+ bool run_forward>
1311
+ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
1295
1312
  State* start = params->start;
1296
- const uint8_t* bp = BytePtr(params->text.begin()); // start of text
1297
- const uint8_t* p = bp; // text scanning point
1298
- const uint8_t* ep = BytePtr(params->text.end()); // end of text
1299
- const uint8_t* resetp = NULL; // p at last cache reset
1313
+ const uint8_t* bp = BytePtr(params->text.data()); // start of text
1314
+ const uint8_t* p = bp; // text scanning point
1315
+ const uint8_t* ep = BytePtr(params->text.data() +
1316
+ params->text.size()); // end of text
1317
+ const uint8_t* resetp = NULL; // p at last cache reset
1300
1318
  if (!run_forward) {
1301
1319
  using std::swap;
1302
1320
  swap(p, ep);
@@ -1326,22 +1344,15 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
1326
1344
  }
1327
1345
 
1328
1346
  while (p != ep) {
1329
- if (have_first_byte && s == start) {
1330
- // In start state, only way out is to find first_byte,
1331
- // so use optimized assembly in memchr to skip ahead.
1332
- // If first_byte isn't found, we can skip to the end
1333
- // of the string.
1334
- if (run_forward) {
1335
- if ((p = BytePtr(memchr(p, params->first_byte, ep - p))) == NULL) {
1336
- p = ep;
1337
- break;
1338
- }
1339
- } else {
1340
- if ((p = BytePtr(memrchr(ep, params->first_byte, p - ep))) == NULL) {
1341
- p = ep;
1342
- break;
1343
- }
1344
- p++;
1347
+
1348
+ if (can_prefix_accel && s == start) {
1349
+ // In start state, only way out is to find the prefix,
1350
+ // so we use prefix accel (e.g. memchr) to skip ahead.
1351
+ // If not found, we can skip to the end of the string.
1352
+ p = BytePtr(prog_->PrefixAccel(p, ep - p));
1353
+ if (p == NULL) {
1354
+ p = ep;
1355
+ break;
1345
1356
  }
1346
1357
  }
1347
1358
 
@@ -1380,9 +1391,11 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
1380
1391
  // byte runs at about 0.2 MB/s, while the NFA (nfa.cc) can do the
1381
1392
  // same at about 2 MB/s. Unless we're processing an average
1382
1393
  // of 10 bytes per state computation, fail so that RE2 can
1383
- // fall back to the NFA.
1394
+ // fall back to the NFA. However, RE2::Set cannot fall back,
1395
+ // so we just have to keep on keeping on in that case.
1384
1396
  if (dfa_should_bail_when_slow && resetp != NULL &&
1385
- static_cast<size_t>(p - resetp) < 10*state_cache_.size()) {
1397
+ static_cast<size_t>(p - resetp) < 10*state_cache_.size() &&
1398
+ kind_ != Prog::kManyMatch) {
1386
1399
  params->failed = true;
1387
1400
  return false;
1388
1401
  }
@@ -1446,17 +1459,18 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
1446
1459
 
1447
1460
  // Process one more byte to see if it triggers a match.
1448
1461
  // (Remember, matches are delayed one byte.)
1462
+
1449
1463
  int lastbyte;
1450
1464
  if (run_forward) {
1451
- if (params->text.end() == params->context.end())
1465
+ if (EndPtr(params->text) == EndPtr(params->context))
1452
1466
  lastbyte = kByteEndText;
1453
1467
  else
1454
- lastbyte = params->text.end()[0] & 0xFF;
1468
+ lastbyte = EndPtr(params->text)[0] & 0xFF;
1455
1469
  } else {
1456
- if (params->text.begin() == params->context.begin())
1470
+ if (BeginPtr(params->text) == BeginPtr(params->context))
1457
1471
  lastbyte = kByteEndText;
1458
1472
  else
1459
- lastbyte = params->text.begin()[-1] & 0xFF;
1473
+ lastbyte = BeginPtr(params->text)[-1] & 0xFF;
1460
1474
  }
1461
1475
 
1462
1476
  State* ns = s->next_[ByteMap(lastbyte)].load(std::memory_order_acquire);
@@ -1507,36 +1521,28 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
1507
1521
 
1508
1522
  // Inline specializations of the general loop.
1509
1523
  bool DFA::SearchFFF(SearchParams* params) {
1510
- return InlinedSearchLoop(params, 0, 0, 0);
1524
+ return InlinedSearchLoop<false, false, false>(params);
1511
1525
  }
1512
1526
  bool DFA::SearchFFT(SearchParams* params) {
1513
- return InlinedSearchLoop(params, 0, 0, 1);
1527
+ return InlinedSearchLoop<false, false, true>(params);
1514
1528
  }
1515
1529
  bool DFA::SearchFTF(SearchParams* params) {
1516
- return InlinedSearchLoop(params, 0, 1, 0);
1530
+ return InlinedSearchLoop<false, true, false>(params);
1517
1531
  }
1518
1532
  bool DFA::SearchFTT(SearchParams* params) {
1519
- return InlinedSearchLoop(params, 0, 1, 1);
1533
+ return InlinedSearchLoop<false, true, true>(params);
1520
1534
  }
1521
1535
  bool DFA::SearchTFF(SearchParams* params) {
1522
- return InlinedSearchLoop(params, 1, 0, 0);
1536
+ return InlinedSearchLoop<true, false, false>(params);
1523
1537
  }
1524
1538
  bool DFA::SearchTFT(SearchParams* params) {
1525
- return InlinedSearchLoop(params, 1, 0, 1);
1539
+ return InlinedSearchLoop<true, false, true>(params);
1526
1540
  }
1527
1541
  bool DFA::SearchTTF(SearchParams* params) {
1528
- return InlinedSearchLoop(params, 1, 1, 0);
1542
+ return InlinedSearchLoop<true, true, false>(params);
1529
1543
  }
1530
1544
  bool DFA::SearchTTT(SearchParams* params) {
1531
- return InlinedSearchLoop(params, 1, 1, 1);
1532
- }
1533
-
1534
- // For debugging, calls the general code directly.
1535
- bool DFA::SlowSearchLoop(SearchParams* params) {
1536
- return InlinedSearchLoop(params,
1537
- params->first_byte >= 0,
1538
- params->want_earliest_match,
1539
- params->run_forward);
1545
+ return InlinedSearchLoop<true, true, true>(params);
1540
1546
  }
1541
1547
 
1542
1548
  // For performance, calls the appropriate specialized version
@@ -1555,8 +1561,7 @@ bool DFA::FastSearchLoop(SearchParams* params) {
1555
1561
  &DFA::SearchTTT,
1556
1562
  };
1557
1563
 
1558
- bool have_first_byte = params->first_byte >= 0;
1559
- int index = 4 * have_first_byte +
1564
+ int index = 4 * params->can_prefix_accel +
1560
1565
  2 * params->want_earliest_match +
1561
1566
  1 * params->run_forward;
1562
1567
  return (this->*Searches[index])(params);
@@ -1594,7 +1599,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
1594
1599
  const StringPiece& context = params->context;
1595
1600
 
1596
1601
  // Sanity check: make sure that text lies within context.
1597
- if (text.begin() < context.begin() || text.end() > context.end()) {
1602
+ if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
1598
1603
  LOG(DFATAL) << "context does not contain text";
1599
1604
  params->start = DeadState;
1600
1605
  return true;
@@ -1604,13 +1609,13 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
1604
1609
  int start;
1605
1610
  uint32_t flags;
1606
1611
  if (params->run_forward) {
1607
- if (text.begin() == context.begin()) {
1612
+ if (BeginPtr(text) == BeginPtr(context)) {
1608
1613
  start = kStartBeginText;
1609
1614
  flags = kEmptyBeginText|kEmptyBeginLine;
1610
- } else if (text.begin()[-1] == '\n') {
1615
+ } else if (BeginPtr(text)[-1] == '\n') {
1611
1616
  start = kStartBeginLine;
1612
1617
  flags = kEmptyBeginLine;
1613
- } else if (Prog::IsWordChar(text.begin()[-1] & 0xFF)) {
1618
+ } else if (Prog::IsWordChar(BeginPtr(text)[-1] & 0xFF)) {
1614
1619
  start = kStartAfterWordChar;
1615
1620
  flags = kFlagLastWord;
1616
1621
  } else {
@@ -1618,13 +1623,13 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
1618
1623
  flags = 0;
1619
1624
  }
1620
1625
  } else {
1621
- if (text.end() == context.end()) {
1626
+ if (EndPtr(text) == EndPtr(context)) {
1622
1627
  start = kStartBeginText;
1623
1628
  flags = kEmptyBeginText|kEmptyBeginLine;
1624
- } else if (text.end()[0] == '\n') {
1629
+ } else if (EndPtr(text)[0] == '\n') {
1625
1630
  start = kStartBeginLine;
1626
1631
  flags = kEmptyBeginLine;
1627
- } else if (Prog::IsWordChar(text.end()[0] & 0xFF)) {
1632
+ } else if (Prog::IsWordChar(EndPtr(text)[0] & 0xFF)) {
1628
1633
  start = kStartAfterWordChar;
1629
1634
  flags = kFlagLastWord;
1630
1635
  } else {
@@ -1642,14 +1647,23 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
1642
1647
  if (!AnalyzeSearchHelper(params, info, flags)) {
1643
1648
  ResetCache(params->cache_lock);
1644
1649
  if (!AnalyzeSearchHelper(params, info, flags)) {
1645
- LOG(DFATAL) << "Failed to analyze start state.";
1646
1650
  params->failed = true;
1651
+ LOG(DFATAL) << "Failed to analyze start state.";
1647
1652
  return false;
1648
1653
  }
1649
1654
  }
1650
1655
 
1651
- params->start = info->start;
1652
- params->first_byte = info->first_byte.load(std::memory_order_acquire);
1656
+ params->start = info->start.load(std::memory_order_acquire);
1657
+
1658
+ // Even if we could prefix accel, we cannot do so when anchored and,
1659
+ // less obviously, we cannot do so when we are going to need flags.
1660
+ // This trick works only when there is a single byte that leads to a
1661
+ // different state!
1662
+ if (prog_->can_prefix_accel() &&
1663
+ !params->anchored &&
1664
+ params->start > SpecialStateMax &&
1665
+ params->start->flag_ >> kFlagNeedShift == 0)
1666
+ params->can_prefix_accel = true;
1653
1667
 
1654
1668
  return true;
1655
1669
  }
@@ -1658,47 +1672,25 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
1658
1672
  bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
1659
1673
  uint32_t flags) {
1660
1674
  // Quick check.
1661
- int fb = info->first_byte.load(std::memory_order_acquire);
1662
- if (fb != kFbUnknown)
1675
+ State* start = info->start.load(std::memory_order_acquire);
1676
+ if (start != NULL)
1663
1677
  return true;
1664
1678
 
1665
1679
  MutexLock l(&mutex_);
1666
- fb = info->first_byte.load(std::memory_order_relaxed);
1667
- if (fb != kFbUnknown)
1680
+ start = info->start.load(std::memory_order_relaxed);
1681
+ if (start != NULL)
1668
1682
  return true;
1669
1683
 
1670
1684
  q0_->clear();
1671
1685
  AddToQueue(q0_,
1672
1686
  params->anchored ? prog_->start() : prog_->start_unanchored(),
1673
1687
  flags);
1674
- info->start = WorkqToCachedState(q0_, NULL, flags);
1675
- if (info->start == NULL)
1688
+ start = WorkqToCachedState(q0_, NULL, flags);
1689
+ if (start == NULL)
1676
1690
  return false;
1677
1691
 
1678
- if (info->start == DeadState) {
1679
- // Synchronize with "quick check" above.
1680
- info->first_byte.store(kFbNone, std::memory_order_release);
1681
- return true;
1682
- }
1683
-
1684
- if (info->start == FullMatchState) {
1685
- // Synchronize with "quick check" above.
1686
- info->first_byte.store(kFbNone, std::memory_order_release); // will be ignored
1687
- return true;
1688
- }
1689
-
1690
- // Even if we have a first_byte, we cannot use it when anchored and,
1691
- // less obviously, we cannot use it when we are going to need flags.
1692
- // This trick works only when there is a single byte that leads to a
1693
- // different state!
1694
- int first_byte = prog_->first_byte();
1695
- if (first_byte == -1 ||
1696
- params->anchored ||
1697
- info->start->flag_ >> kFlagNeedShift != 0)
1698
- first_byte = kFbNone;
1699
-
1700
1692
  // Synchronize with "quick check" above.
1701
- info->first_byte.store(first_byte, std::memory_order_release);
1693
+ info->start.store(start, std::memory_order_release);
1702
1694
  return true;
1703
1695
  }
1704
1696
 
@@ -1733,9 +1725,9 @@ bool DFA::Search(const StringPiece& text,
1733
1725
  return false;
1734
1726
  if (params.start == FullMatchState) {
1735
1727
  if (run_forward == want_earliest_match)
1736
- *epp = text.begin();
1728
+ *epp = text.data();
1737
1729
  else
1738
- *epp = text.end();
1730
+ *epp = text.data() + text.size();
1739
1731
  return true;
1740
1732
  }
1741
1733
  bool ret = FastSearchLoop(&params);
@@ -1796,17 +1788,17 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
1796
1788
  *failed = false;
1797
1789
 
1798
1790
  StringPiece context = const_context;
1799
- if (context.begin() == NULL)
1791
+ if (context.data() == NULL)
1800
1792
  context = text;
1801
- bool carat = anchor_start();
1793
+ bool caret = anchor_start();
1802
1794
  bool dollar = anchor_end();
1803
1795
  if (reversed_) {
1804
1796
  using std::swap;
1805
- swap(carat, dollar);
1797
+ swap(caret, dollar);
1806
1798
  }
1807
- if (carat && context.begin() != text.begin())
1799
+ if (caret && BeginPtr(context) != BeginPtr(text))
1808
1800
  return false;
1809
- if (dollar && context.end() != text.end())
1801
+ if (dollar && EndPtr(context) != EndPtr(text))
1810
1802
  return false;
1811
1803
 
1812
1804
  // Handle full match by running an anchored longest match
@@ -1839,11 +1831,15 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
1839
1831
  bool matched = dfa->Search(text, context, anchored,
1840
1832
  want_earliest_match, !reversed_,
1841
1833
  failed, &ep, matches);
1842
- if (*failed)
1834
+ if (*failed) {
1835
+ hooks::GetDFASearchFailureHook()({
1836
+ // Nothing yet...
1837
+ });
1843
1838
  return false;
1839
+ }
1844
1840
  if (!matched)
1845
1841
  return false;
1846
- if (endmatch && ep != (reversed_ ? text.begin() : text.end()))
1842
+ if (endmatch && ep != (reversed_ ? text.data() : text.data() + text.size()))
1847
1843
  return false;
1848
1844
 
1849
1845
  // If caller cares, record the boundary of the match.
@@ -1851,10 +1847,11 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
1851
1847
  // as the beginning.
1852
1848
  if (match0) {
1853
1849
  if (reversed_)
1854
- *match0 = StringPiece(ep, static_cast<size_t>(text.end() - ep));
1850
+ *match0 =
1851
+ StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep));
1855
1852
  else
1856
1853
  *match0 =
1857
- StringPiece(text.begin(), static_cast<size_t>(ep - text.begin()));
1854
+ StringPiece(text.data(), static_cast<size_t>(ep - text.data()));
1858
1855
  }
1859
1856
  return true;
1860
1857
  }
@@ -1932,10 +1929,6 @@ int Prog::BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb) {
1932
1929
  return GetDFA(kind)->BuildAllStates(cb);
1933
1930
  }
1934
1931
 
1935
- void Prog::TEST_dfa_should_bail_when_slow(bool b) {
1936
- dfa_should_bail_when_slow = b;
1937
- }
1938
-
1939
1932
  // Computes min and max for matching string.
1940
1933
  // Won't return strings bigger than maxlen.
1941
1934
  bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
@@ -2081,4 +2074,4 @@ bool Prog::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
2081
2074
  return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen);
2082
2075
  }
2083
2076
 
2084
- } // namespace duckdb_re2
2077
+ } // namespace re2