duckdb 0.10.2-dev0.0 → 0.10.2-dev3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (699) hide show
  1. package/binding.gyp +22 -18
  2. package/binding.gyp.in +3 -0
  3. package/package.json +1 -1
  4. package/src/duckdb/extension/icu/icu-timezone.cpp +3 -1
  5. package/src/duckdb/extension/icu/icu_extension.cpp +6 -2
  6. package/src/duckdb/extension/json/buffered_json_reader.cpp +10 -3
  7. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +2 -0
  8. package/src/duckdb/extension/json/include/json_scan.hpp +13 -7
  9. package/src/duckdb/extension/json/include/json_serializer.hpp +5 -4
  10. package/src/duckdb/extension/json/include/json_structure.hpp +3 -3
  11. package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +15 -5
  12. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +15 -6
  13. package/src/duckdb/extension/json/json_functions/json_structure.cpp +21 -20
  14. package/src/duckdb/extension/json/json_functions/read_json.cpp +37 -3
  15. package/src/duckdb/extension/json/json_functions.cpp +7 -2
  16. package/src/duckdb/extension/json/json_scan.cpp +57 -33
  17. package/src/duckdb/extension/parquet/column_reader.cpp +12 -3
  18. package/src/duckdb/extension/parquet/column_writer.cpp +44 -7
  19. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +5 -1
  20. package/src/duckdb/extension/parquet/parquet_extension.cpp +30 -3
  21. package/src/duckdb/extension/parquet/parquet_metadata.cpp +1 -1
  22. package/src/duckdb/extension/parquet/parquet_writer.cpp +4 -2
  23. package/src/duckdb/extension/parquet/zstd_file_system.cpp +1 -1
  24. package/src/duckdb/src/catalog/catalog.cpp +5 -1
  25. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +21 -5
  26. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +8 -9
  27. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +3 -7
  28. package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +1 -1
  29. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +6 -7
  30. package/src/duckdb/src/catalog/catalog_entry.cpp +8 -0
  31. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -0
  32. package/src/duckdb/src/catalog/catalog_set.cpp +2 -2
  33. package/src/duckdb/src/catalog/default/default_functions.cpp +6 -6
  34. package/src/duckdb/src/catalog/default/default_schemas.cpp +1 -1
  35. package/src/duckdb/src/catalog/default/default_views.cpp +7 -7
  36. package/src/duckdb/src/catalog/dependency_catalog_set.cpp +2 -1
  37. package/src/duckdb/src/catalog/dependency_list.cpp +92 -8
  38. package/src/duckdb/src/catalog/dependency_manager.cpp +53 -68
  39. package/src/duckdb/src/catalog/duck_catalog.cpp +1 -1
  40. package/src/duckdb/src/common/adbc/adbc.cpp +287 -45
  41. package/src/duckdb/src/common/arrow/appender/union_data.cpp +2 -2
  42. package/src/duckdb/src/common/box_renderer.cpp +12 -12
  43. package/src/duckdb/src/common/crypto/md5.cpp +2 -1
  44. package/src/duckdb/src/common/enum_util.cpp +307 -1
  45. package/src/duckdb/src/common/enums/expression_type.cpp +4 -0
  46. package/src/duckdb/src/common/enums/optimizer_type.cpp +1 -1
  47. package/src/duckdb/src/common/file_system.cpp +60 -13
  48. package/src/duckdb/src/common/filename_pattern.cpp +13 -13
  49. package/src/duckdb/src/common/gzip_file_system.cpp +1 -1
  50. package/src/duckdb/src/common/http_state.cpp +1 -1
  51. package/src/duckdb/src/common/local_file_system.cpp +72 -71
  52. package/src/duckdb/src/common/multi_file_reader.cpp +48 -28
  53. package/src/duckdb/src/common/row_operations/row_matcher.cpp +2 -2
  54. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +13 -1
  55. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +32 -13
  56. package/src/duckdb/src/common/string_util.cpp +2 -3
  57. package/src/duckdb/src/common/tree_renderer.cpp +32 -67
  58. package/src/duckdb/src/common/types/bit.cpp +6 -6
  59. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  60. package/src/duckdb/src/common/types/hash.cpp +6 -6
  61. package/src/duckdb/src/common/types/hyperloglog.cpp +2 -0
  62. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +13 -0
  63. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +5 -7
  64. package/src/duckdb/src/common/types/uuid.cpp +1 -1
  65. package/src/duckdb/src/common/types/vector.cpp +22 -14
  66. package/src/duckdb/src/common/types.cpp +8 -1
  67. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +20 -18
  68. package/src/duckdb/src/common/vector_operations/generators.cpp +1 -1
  69. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +267 -110
  70. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +52 -23
  71. package/src/duckdb/src/common/virtual_file_system.cpp +33 -20
  72. package/src/duckdb/src/core_functions/aggregate/algebraic/avg.cpp +2 -2
  73. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +3 -3
  74. package/src/duckdb/src/core_functions/aggregate/distributive/sum.cpp +31 -16
  75. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +3 -0
  76. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -0
  77. package/src/duckdb/src/core_functions/core_functions.cpp +1 -1
  78. package/src/duckdb/src/core_functions/function_list.cpp +2 -2
  79. package/src/duckdb/src/core_functions/scalar/date/time_bucket.cpp +1 -1
  80. package/src/duckdb/src/core_functions/scalar/generic/system_functions.cpp +46 -17
  81. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +1 -1
  82. package/src/duckdb/src/core_functions/scalar/list/flatten.cpp +82 -45
  83. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -2
  84. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +3 -0
  85. package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +3 -2
  86. package/src/duckdb/src/core_functions/scalar/string/hex.cpp +2 -4
  87. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +12 -21
  88. package/src/duckdb/src/execution/column_binding_resolver.cpp +2 -10
  89. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +133 -66
  90. package/src/duckdb/src/execution/expression_executor/execute_function.cpp +0 -2
  91. package/src/duckdb/src/execution/expression_executor.cpp +0 -4
  92. package/src/duckdb/src/execution/expression_executor_state.cpp +1 -1
  93. package/src/duckdb/src/execution/index/art/art.cpp +2 -2
  94. package/src/duckdb/src/execution/index/unknown_index.cpp +13 -13
  95. package/src/duckdb/src/execution/join_hashtable.cpp +1 -1
  96. package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_mark.cpp +0 -1
  97. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +10 -7
  98. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +32 -1
  99. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +16 -2
  100. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +7 -7
  101. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +354 -159
  102. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +11 -2
  103. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +22 -7
  104. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +1 -1
  105. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +10 -10
  106. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +9 -18
  107. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +180 -47
  108. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +100 -58
  109. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +88 -21
  110. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +12 -13
  111. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +2 -0
  112. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +118 -23
  113. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +2 -2
  114. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +14 -5
  115. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +1 -1
  116. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +5 -5
  117. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +1 -1
  118. package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +0 -9
  119. package/src/duckdb/src/execution/physical_plan/plan_vacuum.cpp +18 -0
  120. package/src/duckdb/src/execution/physical_plan_generator.cpp +5 -3
  121. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +81 -106
  122. package/src/duckdb/src/execution/reservoir_sample.cpp +1 -1
  123. package/src/duckdb/src/execution/window_executor.cpp +48 -28
  124. package/src/duckdb/src/execution/window_segment_tree.cpp +20 -23
  125. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +1 -1
  126. package/src/duckdb/src/function/cast/enum_casts.cpp +20 -55
  127. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +10 -9
  128. package/src/duckdb/src/function/cast_rules.cpp +9 -1
  129. package/src/duckdb/src/function/compression_config.cpp +1 -1
  130. package/src/duckdb/src/function/function_binder.cpp +45 -44
  131. package/src/duckdb/src/function/function_set.cpp +9 -9
  132. package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -2
  133. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +21 -5
  134. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +2 -2
  135. package/src/duckdb/src/function/scalar/list/list_select.cpp +5 -2
  136. package/src/duckdb/src/function/scalar/list/list_zip.cpp +5 -4
  137. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +60 -32
  138. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +1 -1
  139. package/src/duckdb/src/function/scalar/strftime_format.cpp +31 -25
  140. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +6 -6
  141. package/src/duckdb/src/function/scalar/string/length.cpp +23 -2
  142. package/src/duckdb/src/function/scalar/string/like.cpp +1 -1
  143. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +1 -1
  144. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +3 -3
  145. package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +1 -1
  146. package/src/duckdb/src/function/table/arrow.cpp +7 -1
  147. package/src/duckdb/src/function/table/copy_csv.cpp +17 -13
  148. package/src/duckdb/src/function/table/read_csv.cpp +52 -39
  149. package/src/duckdb/src/function/table/sniff_csv.cpp +7 -13
  150. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +1 -1
  151. package/src/duckdb/src/function/table/system/duckdb_databases.cpp +7 -1
  152. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +12 -2
  153. package/src/duckdb/src/function/table/system/duckdb_sequences.cpp +1 -2
  154. package/src/duckdb/src/function/table/system/duckdb_tables.cpp +2 -2
  155. package/src/duckdb/src/function/table/system/pragma_metadata_info.cpp +9 -2
  156. package/src/duckdb/src/function/table/system/pragma_table_info.cpp +10 -6
  157. package/src/duckdb/src/function/table/table_scan.cpp +1 -4
  158. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  159. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +6 -2
  160. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
  161. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +1 -1
  162. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp +1 -1
  163. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +6 -1
  164. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +3 -3
  165. package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +3 -3
  166. package/src/duckdb/src/include/duckdb/catalog/dependency.hpp +26 -4
  167. package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +39 -6
  168. package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +19 -14
  169. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +1 -1
  170. package/src/duckdb/src/include/duckdb/catalog/standard_entry.hpp +4 -0
  171. package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -3
  172. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +1 -1
  173. package/src/duckdb/src/include/duckdb/common/bit_utils.hpp +1 -1
  174. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +16 -16
  175. package/src/duckdb/src/include/duckdb/common/crypto/md5.hpp +0 -1
  176. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +32 -0
  177. package/src/duckdb/src/include/duckdb/common/enums/expression_type.hpp +4 -2
  178. package/src/duckdb/src/include/duckdb/common/exception/binder_exception.hpp +14 -10
  179. package/src/duckdb/src/include/duckdb/common/exception/catalog_exception.hpp +4 -4
  180. package/src/duckdb/src/include/duckdb/common/exception/conversion_exception.hpp +6 -6
  181. package/src/duckdb/src/include/duckdb/common/exception/http_exception.hpp +3 -3
  182. package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -4
  183. package/src/duckdb/src/include/duckdb/common/exception/transaction_exception.hpp +2 -2
  184. package/src/duckdb/src/include/duckdb/common/exception.hpp +57 -58
  185. package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +2 -2
  186. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +2 -2
  187. package/src/duckdb/src/include/duckdb/common/file_open_flags.hpp +134 -0
  188. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -6
  189. package/src/duckdb/src/include/duckdb/common/file_system.hpp +35 -36
  190. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +4 -6
  191. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +13 -13
  192. package/src/duckdb/src/include/duckdb/common/helper.hpp +42 -47
  193. package/src/duckdb/src/include/duckdb/common/http_state.hpp +1 -1
  194. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +2 -2
  195. package/src/duckdb/src/include/duckdb/common/index_vector.hpp +10 -10
  196. package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +13 -10
  197. package/src/duckdb/src/include/duckdb/common/memory_safety.hpp +3 -3
  198. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +1 -1
  199. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +50 -24
  200. package/src/duckdb/src/include/duckdb/common/operator/abs.hpp +12 -4
  201. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +5 -5
  202. package/src/duckdb/src/include/duckdb/common/pipe_file_system.hpp +1 -1
  203. package/src/duckdb/src/include/duckdb/common/platform.h +1 -1
  204. package/src/duckdb/src/include/duckdb/common/printer.hpp +5 -5
  205. package/src/duckdb/src/include/duckdb/common/profiler.hpp +2 -2
  206. package/src/duckdb/src/include/duckdb/common/progress_bar/display/terminal_progress_bar_display.hpp +5 -5
  207. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +1 -1
  208. package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +7 -7
  209. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +3 -0
  210. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +2 -2
  211. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +17 -6
  212. package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +12 -0
  213. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +14 -0
  214. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +16 -0
  215. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +3 -0
  216. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +2 -0
  217. package/src/duckdb/src/include/duckdb/common/string_util.hpp +4 -4
  218. package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +24 -25
  219. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +5 -5
  220. package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +1 -2
  221. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +10 -10
  222. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_iterators.hpp +5 -5
  223. package/src/duckdb/src/include/duckdb/common/types/column/partitioned_column_data.hpp +1 -1
  224. package/src/duckdb/src/include/duckdb/common/types/constraint_conflict_info.hpp +1 -2
  225. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -2
  226. package/src/duckdb/src/include/duckdb/common/types/date.hpp +1 -1
  227. package/src/duckdb/src/include/duckdb/common/types/hash.hpp +5 -5
  228. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +1 -1
  229. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +4 -4
  230. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +1 -1
  231. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +10 -3
  232. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +1 -1
  233. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +12 -12
  234. package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +1 -1
  235. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +21 -20
  236. package/src/duckdb/src/include/duckdb/common/types/time.hpp +2 -2
  237. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +3 -3
  238. package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +2 -2
  239. package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -2
  240. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +15 -7
  241. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +1 -1
  242. package/src/duckdb/src/include/duckdb/common/types.hpp +14 -10
  243. package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +2 -2
  244. package/src/duckdb/src/include/duckdb/common/union_by_name.hpp +1 -1
  245. package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +15 -14
  246. package/src/duckdb/src/include/duckdb/common/vector.hpp +21 -21
  247. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +2 -0
  248. package/src/duckdb/src/include/duckdb/common/vector_operations/general_cast.hpp +2 -1
  249. package/src/duckdb/src/include/duckdb/common/vector_operations/generic_executor.hpp +2 -3
  250. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +3 -2
  251. package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +50 -32
  252. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +10 -11
  253. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +1 -1
  254. package/src/duckdb/src/include/duckdb/core_functions/aggregate/sum_helpers.hpp +13 -1
  255. package/src/duckdb/src/include/duckdb/core_functions/function_list.hpp +1 -1
  256. package/src/duckdb/src/include/duckdb/core_functions/lambda_functions.hpp +2 -1
  257. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -1
  258. package/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp +1 -1
  259. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
  260. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +2 -5
  261. package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +3 -3
  262. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/aggregate_object.hpp +2 -2
  263. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp +1 -1
  264. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
  265. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +0 -3
  266. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +9 -6
  267. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +3 -0
  268. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +40 -22
  269. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +5 -0
  270. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +1 -0
  271. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +6 -4
  272. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +24 -10
  273. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +4 -0
  274. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +3 -1
  275. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/skip_scanner.hpp +0 -3
  276. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +58 -17
  277. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +1 -1
  278. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +1 -1
  279. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_prepare.hpp +3 -3
  280. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +1 -1
  281. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +4 -1
  282. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +16 -5
  283. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +6 -6
  284. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +2 -1
  285. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +1 -1
  286. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
  287. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  288. package/src/duckdb/src/include/duckdb/function/cast/cast_function_set.hpp +5 -5
  289. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +4 -4
  290. package/src/duckdb/src/include/duckdb/function/cast/vector_cast_helpers.hpp +6 -6
  291. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +4 -4
  292. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +10 -13
  293. package/src/duckdb/src/include/duckdb/function/function.hpp +3 -3
  294. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +24 -23
  295. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +1 -1
  296. package/src/duckdb/src/include/duckdb/function/function_set.hpp +1 -1
  297. package/src/duckdb/src/include/duckdb/function/pragma_function.hpp +1 -1
  298. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +3 -3
  299. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +7 -7
  300. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +3 -3
  301. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +6 -6
  302. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +4 -4
  303. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -2
  304. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +1 -3
  305. package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +1 -1
  306. package/src/duckdb/src/include/duckdb/function/table_function.hpp +12 -11
  307. package/src/duckdb/src/include/duckdb/function/udf_function.hpp +66 -60
  308. package/src/duckdb/src/include/duckdb/main/appender.hpp +6 -6
  309. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +3 -1
  310. package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +2 -2
  311. package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +1 -1
  312. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -2
  313. package/src/duckdb/src/include/duckdb/main/chunk_scan_state/query_result.hpp +2 -2
  314. package/src/duckdb/src/include/duckdb/main/client_context.hpp +1 -1
  315. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +3 -2
  316. package/src/duckdb/src/include/duckdb/main/client_data.hpp +1 -0
  317. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +2 -1
  318. package/src/duckdb/src/include/duckdb/main/config.hpp +14 -5
  319. package/src/duckdb/src/include/duckdb/main/connection.hpp +27 -26
  320. package/src/duckdb/src/include/duckdb/main/database.hpp +19 -3
  321. package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +58 -0
  322. package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +2 -2
  323. package/src/duckdb/src/include/duckdb/main/error_manager.hpp +6 -6
  324. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +8 -0
  325. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -0
  326. package/src/duckdb/src/include/duckdb/main/external_dependencies.hpp +2 -1
  327. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +8 -8
  328. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +2 -59
  329. package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -3
  330. package/src/duckdb/src/include/duckdb/main/relation/query_relation.hpp +1 -1
  331. package/src/duckdb/src/include/duckdb/main/relation.hpp +1 -1
  332. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +11 -7
  333. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +2 -2
  334. package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +4 -4
  335. package/src/duckdb/src/include/duckdb/main/settings.hpp +78 -70
  336. package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_analyzer.hpp +45 -0
  337. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +11 -13
  338. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
  339. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +1 -1
  340. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +1 -1
  341. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +1 -1
  342. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +1 -1
  343. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +1 -1
  344. package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +7 -7
  345. package/src/duckdb/src/include/duckdb/optimizer/matcher/function_matcher.hpp +7 -7
  346. package/src/duckdb/src/include/duckdb/optimizer/matcher/type_matcher.hpp +1 -1
  347. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  348. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +32 -30
  349. package/src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp +1 -1
  350. package/src/duckdb/src/include/duckdb/parallel/event.hpp +1 -1
  351. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +1 -1
  352. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +2 -2
  353. package/src/duckdb/src/include/duckdb/parallel/pipeline_event.hpp +1 -1
  354. package/src/duckdb/src/include/duckdb/parser/column_list.hpp +4 -4
  355. package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +49 -8
  356. package/src/duckdb/src/include/duckdb/parser/expression/bound_expression.hpp +1 -1
  357. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +1 -1
  358. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +1 -1
  359. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_scalar_function_info.hpp +1 -1
  360. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +1 -1
  361. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +1 -1
  362. package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_database_info.hpp +40 -0
  363. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp +2 -1
  364. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +1 -0
  365. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +2 -1
  366. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +1 -1
  367. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_pragma_function_info.hpp +1 -1
  368. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_secret_info.hpp +2 -2
  369. package/src/duckdb/src/include/duckdb/parser/parsed_data/extra_drop_info.hpp +3 -3
  370. package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +3 -2
  371. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +2 -5
  372. package/src/duckdb/src/include/duckdb/parser/parser.hpp +1 -1
  373. package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +3 -3
  374. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +1 -1
  375. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +3 -5
  376. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -2
  377. package/src/duckdb/src/include/duckdb/planner/binder.hpp +16 -9
  378. package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +1 -1
  379. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +1 -1
  380. package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +1 -1
  381. package/src/duckdb/src/include/duckdb/planner/expression/bound_expanded_expression.hpp +34 -0
  382. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
  383. package/src/duckdb/src/include/duckdb/planner/expression.hpp +1 -1
  384. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +5 -8
  385. package/src/duckdb/src/include/duckdb/planner/expression_binder/column_alias_binder.hpp +3 -4
  386. package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +3 -2
  387. package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +3 -5
  388. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +6 -14
  389. package/src/duckdb/src/include/duckdb/planner/expression_binder/qualify_binder.hpp +3 -6
  390. package/src/duckdb/src/include/duckdb/planner/expression_binder/select_bind_state.hpp +52 -0
  391. package/src/duckdb/src/include/duckdb/planner/expression_binder/select_binder.hpp +4 -10
  392. package/src/duckdb/src/include/duckdb/planner/expression_binder/table_function_binder.hpp +5 -2
  393. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +1 -0
  394. package/src/duckdb/src/include/duckdb/planner/expression_iterator.hpp +2 -0
  395. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +3 -6
  396. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +2 -0
  397. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  398. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -0
  399. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_database.hpp +4 -12
  400. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_secret.hpp +1 -1
  401. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +2 -2
  402. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -1
  403. package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +1 -1
  404. package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +1 -1
  405. package/src/duckdb/src/include/duckdb/planner/operator/logical_export.hpp +2 -2
  406. package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +2 -2
  407. package/src/duckdb/src/include/duckdb/planner/operator/logical_extension_operator.hpp +2 -2
  408. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +2 -2
  409. package/src/duckdb/src/include/duckdb/planner/operator/logical_pragma.hpp +1 -1
  410. package/src/duckdb/src/include/duckdb/planner/operator/logical_prepare.hpp +3 -2
  411. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +3 -3
  412. package/src/duckdb/src/include/duckdb/planner/operator/logical_reset.hpp +1 -1
  413. package/src/duckdb/src/include/duckdb/planner/operator/logical_set.hpp +2 -1
  414. package/src/duckdb/src/include/duckdb/planner/operator/logical_vacuum.hpp +52 -0
  415. package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +1 -1
  416. package/src/duckdb/src/include/duckdb/planner/parsed_data/bound_create_table_info.hpp +2 -2
  417. package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +5 -4
  418. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  419. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +4 -1
  420. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +1 -1
  421. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +3 -3
  422. package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +2 -3
  423. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +1 -1
  424. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -3
  425. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +5 -5
  426. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +1 -1
  427. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +1 -1
  428. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_fetch.hpp +1 -1
  429. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_scan.hpp +1 -1
  430. package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +3 -3
  431. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +2 -2
  432. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +1 -1
  433. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_fetch.hpp +1 -1
  434. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp +3 -3
  435. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/flag_buffer.hpp +1 -1
  436. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/leading_zero_buffer.hpp +3 -3
  437. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp.hpp +3 -3
  438. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_analyze.hpp +0 -1
  439. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  440. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +4 -4
  441. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas.hpp +4 -4
  442. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_analyze.hpp +0 -1
  443. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  444. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +3 -3
  445. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
  446. package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
  447. package/src/duckdb/src/include/duckdb/storage/magic_bytes.hpp +1 -1
  448. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +3 -3
  449. package/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp +1 -1
  450. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +1 -1
  451. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +4 -1
  452. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +24 -24
  453. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +3 -3
  454. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -2
  455. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +2 -2
  456. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +4 -4
  457. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +9 -6
  458. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +1 -1
  459. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +2 -2
  460. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +1 -1
  461. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +10 -1
  462. package/src/duckdb/src/include/duckdb/storage/table/data_table_info.hpp +3 -2
  463. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +1 -1
  464. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +1 -1
  465. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
  466. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +3 -3
  467. package/src/duckdb/src/include/duckdb/storage/table/segment_lock.hpp +1 -1
  468. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +4 -4
  469. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +2 -0
  470. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +3 -2
  471. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +1 -1
  472. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +1 -1
  473. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +2 -1
  474. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +1 -1
  475. package/src/duckdb/src/include/duckdb/transaction/delete_info.hpp +20 -1
  476. package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -1
  477. package/src/duckdb/src/include/duckdb/transaction/transaction_data.hpp +1 -1
  478. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +1 -1
  479. package/src/duckdb/src/include/duckdb/transaction/update_info.hpp +1 -1
  480. package/src/duckdb/src/main/appender.cpp +1 -1
  481. package/src/duckdb/src/main/attached_database.cpp +11 -3
  482. package/src/duckdb/src/main/capi/arrow-c.cpp +6 -2
  483. package/src/duckdb/src/main/capi/cast/utils-c.cpp +1 -1
  484. package/src/duckdb/src/main/capi/duckdb-c.cpp +1 -1
  485. package/src/duckdb/src/main/capi/pending-c.cpp +1 -1
  486. package/src/duckdb/src/main/capi/prepared-c.cpp +2 -2
  487. package/src/duckdb/src/main/capi/result-c.cpp +4 -4
  488. package/src/duckdb/src/main/client_context.cpp +4 -12
  489. package/src/duckdb/src/main/client_context_file_opener.cpp +15 -3
  490. package/src/duckdb/src/main/client_data.cpp +5 -0
  491. package/src/duckdb/src/main/config.cpp +82 -82
  492. package/src/duckdb/src/main/database.cpp +31 -7
  493. package/src/duckdb/src/main/database_manager.cpp +3 -2
  494. package/src/duckdb/src/main/database_path_and_type.cpp +4 -4
  495. package/src/duckdb/src/main/error_manager.cpp +1 -1
  496. package/src/duckdb/src/main/extension/extension_alias.cpp +9 -9
  497. package/src/duckdb/src/main/extension/extension_helper.cpp +10 -5
  498. package/src/duckdb/src/main/extension/extension_install.cpp +1 -1
  499. package/src/duckdb/src/main/extension/extension_load.cpp +111 -37
  500. package/src/duckdb/src/main/query_profiler.cpp +1 -118
  501. package/src/duckdb/src/main/secret/secret_manager.cpp +1 -2
  502. package/src/duckdb/src/main/secret/secret_storage.cpp +1 -1
  503. package/src/duckdb/src/main/settings/settings.cpp +81 -65
  504. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +8 -1
  505. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +3 -0
  506. package/src/duckdb/src/optimizer/compressed_materialization.cpp +26 -28
  507. package/src/duckdb/src/optimizer/cse_optimizer.cpp +5 -5
  508. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +10 -6
  509. package/src/duckdb/src/optimizer/optimizer.cpp +14 -17
  510. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +16 -5
  511. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +7 -4
  512. package/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +2 -2
  513. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +3 -3
  514. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +2 -2
  515. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +1 -1
  516. package/src/duckdb/src/optimizer/statistics/expression/propagate_between.cpp +8 -8
  517. package/src/duckdb/src/optimizer/statistics/expression/propagate_case.cpp +1 -1
  518. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +1 -1
  519. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  520. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +7 -7
  521. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +6 -6
  522. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +1 -1
  523. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +2 -2
  524. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -10
  525. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +1 -1
  526. package/src/duckdb/src/optimizer/statistics/operator/propagate_cross_product.cpp +1 -1
  527. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +14 -7
  528. package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +2 -2
  529. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +9 -9
  530. package/src/duckdb/src/optimizer/statistics/operator/propagate_limit.cpp +1 -1
  531. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  532. package/src/duckdb/src/optimizer/statistics/operator/propagate_projection.cpp +2 -2
  533. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +1 -1
  534. package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +1 -1
  535. package/src/duckdb/src/optimizer/statistics_propagator.cpp +39 -18
  536. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
  537. package/src/duckdb/src/parallel/task_scheduler.cpp +8 -1
  538. package/src/duckdb/src/parser/constraints/unique_constraint.cpp +4 -2
  539. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +64 -0
  540. package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +1 -0
  541. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +13 -5
  542. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +24 -0
  543. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -8
  544. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +2 -1
  545. package/src/duckdb/src/parser/transform/statement/transform_create_index.cpp +3 -0
  546. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +2 -6
  547. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +2 -6
  548. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -6
  549. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +1 -1
  550. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +5 -2
  551. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +2 -8
  552. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +1 -6
  553. package/src/duckdb/src/parser/transform/statement/transform_use.cpp +3 -2
  554. package/src/duckdb/src/parser/transformer.cpp +14 -2
  555. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +40 -17
  556. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +8 -3
  557. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +32 -30
  558. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +28 -17
  559. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +146 -101
  560. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +28 -26
  561. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +29 -0
  562. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +10 -10
  563. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +3 -2
  564. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +23 -28
  565. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +11 -12
  566. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +9 -11
  567. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +10 -13
  568. package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +3 -3
  569. package/src/duckdb/src/planner/binder/statement/bind_set.cpp +3 -0
  570. package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +66 -65
  571. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +54 -46
  572. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +3 -0
  573. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +1 -1
  574. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +5 -5
  575. package/src/duckdb/src/planner/binder.cpp +78 -6
  576. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +11 -3
  577. package/src/duckdb/src/planner/expression/bound_expanded_expression.cpp +22 -0
  578. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +11 -58
  579. package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +5 -8
  580. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +5 -4
  581. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +5 -19
  582. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +8 -8
  583. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +42 -26
  584. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +3 -22
  585. package/src/duckdb/src/planner/expression_binder/select_bind_state.cpp +52 -0
  586. package/src/duckdb/src/planner/expression_binder/select_binder.cpp +43 -5
  587. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +19 -7
  588. package/src/duckdb/src/planner/logical_operator.cpp +20 -3
  589. package/src/duckdb/src/planner/operator/logical_copy_database.cpp +4 -14
  590. package/src/duckdb/src/planner/operator/logical_delete.cpp +1 -1
  591. package/src/duckdb/src/planner/operator/logical_get.cpp +1 -1
  592. package/src/duckdb/src/planner/operator/logical_insert.cpp +1 -1
  593. package/src/duckdb/src/planner/operator/logical_update.cpp +1 -1
  594. package/src/duckdb/src/planner/operator/logical_vacuum.cpp +65 -0
  595. package/src/duckdb/src/planner/planner.cpp +4 -4
  596. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +26 -27
  597. package/src/duckdb/src/storage/arena_allocator.cpp +9 -0
  598. package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +1 -1
  599. package/src/duckdb/src/storage/buffer_manager.cpp +2 -10
  600. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
  601. package/src/duckdb/src/storage/checkpoint_manager.cpp +15 -8
  602. package/src/duckdb/src/storage/compression/bitpacking.cpp +6 -1
  603. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +1 -1
  604. package/src/duckdb/src/storage/data_pointer.cpp +1 -1
  605. package/src/duckdb/src/storage/data_table.cpp +18 -7
  606. package/src/duckdb/src/storage/local_storage.cpp +8 -5
  607. package/src/duckdb/src/storage/magic_bytes.cpp +6 -5
  608. package/src/duckdb/src/storage/partial_block_manager.cpp +1 -1
  609. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -9
  610. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +12 -10
  611. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +23 -0
  612. package/src/duckdb/src/storage/single_file_block_manager.cpp +46 -19
  613. package/src/duckdb/src/storage/standard_buffer_manager.cpp +21 -5
  614. package/src/duckdb/src/storage/statistics/array_stats.cpp +1 -1
  615. package/src/duckdb/src/storage/statistics/base_statistics.cpp +2 -3
  616. package/src/duckdb/src/storage/statistics/list_stats.cpp +1 -1
  617. package/src/duckdb/src/storage/statistics/struct_stats.cpp +1 -1
  618. package/src/duckdb/src/storage/storage_info.cpp +19 -19
  619. package/src/duckdb/src/storage/storage_manager.cpp +18 -13
  620. package/src/duckdb/src/storage/table/chunk_info.cpp +11 -3
  621. package/src/duckdb/src/storage/table/column_data.cpp +88 -66
  622. package/src/duckdb/src/storage/table/row_group.cpp +7 -7
  623. package/src/duckdb/src/storage/table/row_version_manager.cpp +2 -2
  624. package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -0
  625. package/src/duckdb/src/storage/table/update_segment.cpp +3 -1
  626. package/src/duckdb/src/storage/table_index_list.cpp +6 -1
  627. package/src/duckdb/src/storage/temporary_file_manager.cpp +1 -1
  628. package/src/duckdb/src/storage/wal_replay.cpp +8 -7
  629. package/src/duckdb/src/storage/write_ahead_log.cpp +3 -4
  630. package/src/duckdb/src/transaction/cleanup_state.cpp +10 -3
  631. package/src/duckdb/src/transaction/commit_state.cpp +11 -4
  632. package/src/duckdb/src/transaction/duck_transaction.cpp +23 -3
  633. package/src/duckdb/src/transaction/rollback_state.cpp +1 -1
  634. package/src/duckdb/src/transaction/transaction_context.cpp +1 -1
  635. package/src/duckdb/src/transaction/undo_buffer.cpp +3 -1
  636. package/src/duckdb/third_party/fmt/include/fmt/core.h +0 -5
  637. package/src/duckdb/third_party/fsst/fsst.h +1 -1
  638. package/src/duckdb/third_party/fsst/libfsst.cpp +1 -140
  639. package/src/duckdb/third_party/fsst/libfsst.hpp +0 -13
  640. package/src/duckdb/third_party/hyperloglog/hyperloglog.hpp +4 -0
  641. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +8 -1
  642. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  643. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15401 -15354
  644. package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +299 -538
  645. package/src/duckdb/third_party/lz4/lz4.cpp +2605 -0
  646. package/src/duckdb/third_party/lz4/lz4.hpp +843 -0
  647. package/src/duckdb/third_party/parquet/parquet_types.cpp +3 -0
  648. package/src/duckdb/third_party/parquet/parquet_types.h +2 -1
  649. package/src/duckdb/third_party/re2/re2/bitmap256.cc +44 -0
  650. package/src/duckdb/third_party/re2/re2/bitmap256.h +3 -35
  651. package/src/duckdb/third_party/re2/re2/bitstate.cc +31 -24
  652. package/src/duckdb/third_party/re2/re2/compile.cc +146 -164
  653. package/src/duckdb/third_party/re2/re2/dfa.cc +174 -181
  654. package/src/duckdb/third_party/re2/re2/filtered_re2.cc +19 -3
  655. package/src/duckdb/third_party/re2/re2/filtered_re2.h +27 -23
  656. package/src/duckdb/third_party/re2/re2/mimics_pcre.cc +21 -11
  657. package/src/duckdb/third_party/re2/re2/nfa.cc +91 -131
  658. package/src/duckdb/third_party/re2/re2/onepass.cc +11 -10
  659. package/src/duckdb/third_party/re2/re2/parse.cc +171 -154
  660. package/src/duckdb/third_party/re2/re2/perl_groups.cc +35 -35
  661. package/src/duckdb/third_party/re2/re2/pod_array.h +55 -0
  662. package/src/duckdb/third_party/re2/re2/prefilter.cc +40 -40
  663. package/src/duckdb/third_party/re2/re2/prefilter.h +24 -2
  664. package/src/duckdb/third_party/re2/re2/prefilter_tree.cc +70 -84
  665. package/src/duckdb/third_party/re2/re2/prefilter_tree.h +5 -4
  666. package/src/duckdb/third_party/re2/re2/prog.cc +315 -58
  667. package/src/duckdb/third_party/re2/re2/prog.h +77 -44
  668. package/src/duckdb/third_party/re2/re2/re2.cc +333 -221
  669. package/src/duckdb/third_party/re2/re2/re2.h +277 -201
  670. package/src/duckdb/third_party/re2/re2/regexp.cc +137 -105
  671. package/src/duckdb/third_party/re2/re2/regexp.h +45 -40
  672. package/src/duckdb/third_party/re2/re2/set.cc +40 -17
  673. package/src/duckdb/third_party/re2/re2/set.h +11 -6
  674. package/src/duckdb/third_party/re2/re2/simplify.cc +50 -41
  675. package/src/duckdb/third_party/re2/re2/sparse_array.h +392 -0
  676. package/src/duckdb/third_party/re2/re2/sparse_set.h +264 -0
  677. package/src/duckdb/third_party/re2/re2/stringpiece.cc +1 -1
  678. package/src/duckdb/third_party/re2/re2/stringpiece.h +11 -8
  679. package/src/duckdb/third_party/re2/re2/tostring.cc +8 -6
  680. package/src/duckdb/third_party/re2/re2/unicode_casefold.cc +39 -10
  681. package/src/duckdb/third_party/re2/re2/unicode_casefold.h +1 -1
  682. package/src/duckdb/third_party/re2/re2/unicode_groups.cc +5019 -4566
  683. package/src/duckdb/third_party/re2/re2/unicode_groups.h +1 -1
  684. package/src/duckdb/third_party/re2/re2/walker-inl.h +21 -20
  685. package/src/duckdb/third_party/re2/util/logging.h +14 -18
  686. package/src/duckdb/third_party/re2/util/mix.h +4 -4
  687. package/src/duckdb/third_party/re2/util/mutex.h +48 -15
  688. package/src/duckdb/third_party/re2/util/rune.cc +5 -5
  689. package/src/duckdb/third_party/re2/util/strutil.cc +1 -16
  690. package/src/duckdb/third_party/re2/util/strutil.h +1 -3
  691. package/src/duckdb/third_party/re2/util/utf.h +1 -1
  692. package/src/duckdb/third_party/re2/util/util.h +9 -1
  693. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +2 -0
  694. package/src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp +36 -1
  695. package/src/duckdb/ub_src_common.cpp +0 -2
  696. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  697. package/src/duckdb/ub_src_planner_expression.cpp +2 -0
  698. package/src/duckdb/ub_src_planner_expression_binder.cpp +2 -0
  699. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
@@ -27,29 +27,29 @@
27
27
 
28
28
  #include "util/util.h"
29
29
  #include "util/logging.h"
30
- #include "util/pod_array.h"
31
30
  #include "util/strutil.h"
32
31
  #include "util/utf.h"
32
+ #include "re2/pod_array.h"
33
33
  #include "re2/regexp.h"
34
34
  #include "re2/stringpiece.h"
35
35
  #include "re2/unicode_casefold.h"
36
36
  #include "re2/unicode_groups.h"
37
37
  #include "re2/walker-inl.h"
38
38
 
39
- // #if defined(RE2_USE_ICU)
40
- // #include "unicode/uniset.h"
41
- // #include "unicode/unistr.h"
42
- // #include "unicode/utypes.h"
43
- // #endif
39
+ #if defined(RE2_USE_ICU)
40
+ //#include "unicode/uniset.h"
41
+ //#include "unicode/unistr.h"
42
+ //#include "unicode/utypes.h"
43
+ #endif
44
44
 
45
45
  namespace duckdb_re2 {
46
46
 
47
- // Reduce the maximum repeat count by an order of magnitude when fuzzing.
48
- #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
49
- static const int kMaxRepeat = 100;
50
- #else
51
- static const int kMaxRepeat = 1000;
52
- #endif
47
+ // Controls the maximum repeat count permitted by the parser.
48
+ static int maximum_repeat_count = 1000;
49
+
50
+ void Regexp::FUZZING_ONLY_set_maximum_repeat_count(int i) {
51
+ maximum_repeat_count = i;
52
+ }
53
53
 
54
54
  // Regular expression parse state.
55
55
  // The list of parsed regexps so far is maintained as a vector of
@@ -93,7 +93,7 @@ class Regexp::ParseState {
93
93
  bool PushSimpleOp(RegexpOp op);
94
94
 
95
95
  // Pushes a ^ onto the stack.
96
- bool PushCarat();
96
+ bool PushCaret();
97
97
 
98
98
  // Pushes a \b (word == true) or \B (word == false) onto the stack.
99
99
  bool PushWordBoundary(bool word);
@@ -209,7 +209,7 @@ Regexp::ParseState::~ParseState() {
209
209
  next = re->down_;
210
210
  re->down_ = NULL;
211
211
  if (re->op() == kLeftParen)
212
- delete re->capture_.name_;
212
+ delete re->arguments.capture.name_;
213
213
  re->Decref();
214
214
  }
215
215
  }
@@ -222,10 +222,10 @@ Regexp* Regexp::ParseState::FinishRegexp(Regexp* re) {
222
222
  return NULL;
223
223
  re->down_ = NULL;
224
224
 
225
- if (re->op_ == kRegexpCharClass && re->char_class_.ccb_ != NULL) {
226
- CharClassBuilder* ccb = re->char_class_.ccb_;
227
- re->char_class_.ccb_ = NULL;
228
- re->char_class_.cc_ = ccb->GetCharClass();
225
+ if (re->op_ == kRegexpCharClass && re->arguments.char_class.ccb_ != NULL) {
226
+ CharClassBuilder* ccb = re->arguments.char_class.ccb_;
227
+ re->arguments.char_class.ccb_ = NULL;
228
+ re->arguments.char_class.cc_ = ccb->GetCharClass();
229
229
  delete ccb;
230
230
  }
231
231
 
@@ -242,20 +242,19 @@ bool Regexp::ParseState::PushRegexp(Regexp* re) {
242
242
  // single characters (e.g., [.] instead of \.), and some
243
243
  // analysis does better with fewer character classes.
244
244
  // Similarly, [Aa] can be rewritten as a literal A with ASCII case folding.
245
- auto ccb = re->char_class_.ccb_;
246
- if (re->op_ == kRegexpCharClass && ccb != NULL) {
247
- ccb->RemoveAbove(rune_max_);
248
- if (ccb->size() == 1) {
249
- Rune r = ccb->begin()->lo;
245
+ if (re->op_ == kRegexpCharClass && re->arguments.char_class.ccb_ != NULL) {
246
+ re->arguments.char_class.ccb_->RemoveAbove(rune_max_);
247
+ if (re->arguments.char_class.ccb_->size() == 1) {
248
+ Rune r = re->arguments.char_class.ccb_->begin()->lo;
250
249
  re->Decref();
251
250
  re = new Regexp(kRegexpLiteral, flags_);
252
- re->rune_ = r;
253
- } else if (ccb->size() == 2) {
254
- Rune r = ccb->begin()->lo;
255
- if ('A' <= r && r <= 'Z' && ccb->Contains(r + 'a' - 'A')) {
251
+ re->arguments.rune_ = r;
252
+ } else if (re->arguments.char_class.ccb_->size() == 2) {
253
+ Rune r = re->arguments.char_class.ccb_->begin()->lo;
254
+ if ('A' <= r && r <= 'Z' && re->arguments.char_class.ccb_->Contains(r + 'a' - 'A')) {
256
255
  re->Decref();
257
256
  re = new Regexp(kRegexpLiteral, flags_ | FoldCase);
258
- re->rune_ = r + 'a' - 'A';
257
+ re->arguments.rune_ = r + 'a' - 'A';
259
258
  }
260
259
  }
261
260
  }
@@ -399,11 +398,11 @@ bool Regexp::ParseState::PushLiteral(Rune r) {
399
398
  // Do case folding if needed.
400
399
  if ((flags_ & FoldCase) && CycleFoldRune(r) != r) {
401
400
  Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
402
- re->char_class_.ccb_ = new CharClassBuilder;
401
+ re->arguments.char_class.ccb_ = new CharClassBuilder;
403
402
  Rune r1 = r;
404
403
  do {
405
404
  if (!(flags_ & NeverNL) || r != '\n') {
406
- re->char_class_.ccb_->AddRange(r, r);
405
+ re->arguments.char_class.ccb_->AddRange(r, r);
407
406
  }
408
407
  r = CycleFoldRune(r);
409
408
  } while (r != r1);
@@ -419,12 +418,12 @@ bool Regexp::ParseState::PushLiteral(Rune r) {
419
418
  return true;
420
419
 
421
420
  Regexp* re = new Regexp(kRegexpLiteral, flags_);
422
- re->rune_ = r;
421
+ re->arguments.rune_ = r;
423
422
  return PushRegexp(re);
424
423
  }
425
424
 
426
425
  // Pushes a ^ onto the stack.
427
- bool Regexp::ParseState::PushCarat() {
426
+ bool Regexp::ParseState::PushCaret() {
428
427
  if (flags_ & OneLine) {
429
428
  return PushSimpleOp(kRegexpBeginText);
430
429
  }
@@ -458,9 +457,9 @@ bool Regexp::ParseState::PushDot() {
458
457
  return PushSimpleOp(kRegexpAnyChar);
459
458
  // Rewrite . into [^\n]
460
459
  Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
461
- re->char_class_.ccb_ = new CharClassBuilder;
462
- re->char_class_.ccb_->AddRange(0, '\n' - 1);
463
- re->char_class_.ccb_->AddRange('\n' + 1, rune_max_);
460
+ re->arguments.char_class.ccb_ = new CharClassBuilder;
461
+ re->arguments.char_class.ccb_->AddRange(0, '\n' - 1);
462
+ re->arguments.char_class.ccb_->AddRange('\n' + 1, rune_max_);
464
463
  return PushRegexp(re);
465
464
  }
466
465
 
@@ -557,9 +556,10 @@ int RepetitionWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
557
556
  }
558
557
 
559
558
  int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
560
- // This should never be called, since we use Walk and not
561
- // WalkExponential.
559
+ // Should never be called: we use Walk(), not WalkExponential().
560
+ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
562
561
  LOG(DFATAL) << "RepetitionWalker::ShortVisit called";
562
+ #endif
563
563
  return 0;
564
564
  }
565
565
 
@@ -568,7 +568,9 @@ int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
568
568
  bool Regexp::ParseState::PushRepetition(int min, int max,
569
569
  const StringPiece& s,
570
570
  bool nongreedy) {
571
- if ((max != -1 && max < min) || min > kMaxRepeat || max > kMaxRepeat) {
571
+ if ((max != -1 && max < min) ||
572
+ min > maximum_repeat_count ||
573
+ max > maximum_repeat_count) {
572
574
  status_->set_code(kRegexpRepeatSize);
573
575
  status_->set_error_arg(s);
574
576
  return false;
@@ -582,8 +584,8 @@ bool Regexp::ParseState::PushRepetition(int min, int max,
582
584
  if (nongreedy)
583
585
  fl = fl ^ NonGreedy;
584
586
  Regexp* re = new Regexp(kRegexpRepeat, fl);
585
- re->repeat_.min_ = min;
586
- re->repeat_.max_ = max;
587
+ re->arguments.repeat.min_ = min;
588
+ re->arguments.repeat.max_ = max;
587
589
  re->AllocSub(1);
588
590
  re->down_ = stacktop_->down_;
589
591
  re->sub()[0] = FinishRegexp(stacktop_);
@@ -591,7 +593,7 @@ bool Regexp::ParseState::PushRepetition(int min, int max,
591
593
  stacktop_ = re;
592
594
  if (min >= 2 || max >= 2) {
593
595
  RepetitionWalker w;
594
- if (w.Walk(stacktop_, kMaxRepeat) == 0) {
596
+ if (w.Walk(stacktop_, maximum_repeat_count) == 0) {
595
597
  status_->set_code(kRegexpRepeatSize);
596
598
  status_->set_error_arg(s);
597
599
  return false;
@@ -609,16 +611,16 @@ bool Regexp::ParseState::IsMarker(RegexpOp op) {
609
611
  // Pushes a marker onto the stack.
610
612
  bool Regexp::ParseState::DoLeftParen(const StringPiece& name) {
611
613
  Regexp* re = new Regexp(kLeftParen, flags_);
612
- re->capture_.cap_ = ++ncap_;
614
+ re->arguments.capture.cap_ = ++ncap_;
613
615
  if (name.data() != NULL)
614
- re->capture_.name_ = new std::string(name);
616
+ re->arguments.capture.name_ = new std::string(name);
615
617
  return PushRegexp(re);
616
618
  }
617
619
 
618
620
  // Pushes a non-capturing marker onto the stack.
619
621
  bool Regexp::ParseState::DoLeftParenNoCapture() {
620
622
  Regexp* re = new Regexp(kLeftParen, flags_);
621
- re->capture_.cap_ = -1;
623
+ re->arguments.capture.cap_ = -1;
622
624
  return PushRegexp(re);
623
625
  }
624
626
 
@@ -685,7 +687,7 @@ bool Regexp::ParseState::DoRightParen() {
685
687
  if ((r1 = stacktop_) == NULL ||
686
688
  (r2 = r1->down_) == NULL ||
687
689
  r2->op() != kLeftParen) {
688
- status_->set_code(kRegexpMissingParen);
690
+ status_->set_code(kRegexpUnexpectedParen);
689
691
  status_->set_error_arg(whole_regexp_);
690
692
  return false;
691
693
  }
@@ -698,7 +700,7 @@ bool Regexp::ParseState::DoRightParen() {
698
700
  flags_ = re->parse_flags();
699
701
 
700
702
  // Rewrite LeftParen as capture if needed.
701
- if (re->capture_.cap_ > 0) {
703
+ if (re->arguments.capture.cap_ > 0) {
702
704
  re->op_ = kRegexpCapture;
703
705
  // re->cap_ is already set
704
706
  re->AllocSub(1);
@@ -781,12 +783,12 @@ Rune* Regexp::LeadingString(Regexp* re, int *nrune,
781
783
 
782
784
  if (re->op() == kRegexpLiteral) {
783
785
  *nrune = 1;
784
- return &re->rune_;
786
+ return &re->arguments.rune_;
785
787
  }
786
788
 
787
789
  if (re->op() == kRegexpLiteralString) {
788
- *nrune = re->literal_string_.nrunes_;
789
- return re->literal_string_.runes_;
790
+ *nrune = re->arguments.literal_string.nrunes_;
791
+ return re->arguments.literal_string.runes_;
790
792
  }
791
793
 
792
794
  *nrune = 0;
@@ -802,7 +804,7 @@ void Regexp::RemoveLeadingString(Regexp* re, int n) {
802
804
  // limit on the size of a concatenation, so we should never
803
805
  // see more than two here.
804
806
  Regexp* stk[4];
805
- int d = 0;
807
+ size_t d = 0;
806
808
  while (re->op() == kRegexpConcat) {
807
809
  if (d < arraysize(stk))
808
810
  stk[d++] = re;
@@ -811,30 +813,30 @@ void Regexp::RemoveLeadingString(Regexp* re, int n) {
811
813
 
812
814
  // Remove leading string from re.
813
815
  if (re->op() == kRegexpLiteral) {
814
- re->rune_ = 0;
816
+ re->arguments.rune_ = 0;
815
817
  re->op_ = kRegexpEmptyMatch;
816
818
  } else if (re->op() == kRegexpLiteralString) {
817
- if (n >= re->literal_string_.nrunes_) {
818
- delete[] re->literal_string_.runes_;
819
- re->literal_string_.runes_ = NULL;
820
- re->literal_string_.nrunes_ = 0;
819
+ if (n >= re->arguments.literal_string.nrunes_) {
820
+ delete[] re->arguments.literal_string.runes_;
821
+ re->arguments.literal_string.runes_ = NULL;
822
+ re->arguments.literal_string.nrunes_ = 0;
821
823
  re->op_ = kRegexpEmptyMatch;
822
- } else if (n == re->literal_string_.nrunes_ - 1) {
823
- Rune rune = re->literal_string_.runes_[re->literal_string_.nrunes_ - 1];
824
- delete[] re->literal_string_.runes_;
825
- re->literal_string_.runes_ = NULL;
826
- re->literal_string_.nrunes_ = 0;
827
- re->rune_ = rune;
824
+ } else if (n == re->arguments.literal_string.nrunes_ - 1) {
825
+ Rune rune = re->arguments.literal_string.runes_[re->arguments.literal_string.nrunes_ - 1];
826
+ delete[] re->arguments.literal_string.runes_;
827
+ re->arguments.literal_string.runes_ = NULL;
828
+ re->arguments.literal_string.nrunes_ = 0;
829
+ re->arguments.rune_ = rune;
828
830
  re->op_ = kRegexpLiteral;
829
831
  } else {
830
- re->literal_string_.nrunes_ -= n;
831
- memmove(re->literal_string_.runes_, re->literal_string_.runes_ + n, re->literal_string_.nrunes_ * sizeof re->literal_string_.runes_[0]);
832
+ re->arguments.literal_string.nrunes_ -= n;
833
+ memmove(re->arguments.literal_string.runes_, re->arguments.literal_string.runes_ + n, re->arguments.literal_string.nrunes_ * sizeof re->arguments.literal_string.runes_[0]);
832
834
  }
833
835
  }
834
836
 
835
837
  // If re is now empty, concatenations might simplify too.
836
- while (d-- > 0) {
837
- re = stk[d];
838
+ while (d > 0) {
839
+ re = stk[--d];
838
840
  Regexp** sub = re->sub();
839
841
  if (sub[0]->op() == kRegexpEmptyMatch) {
840
842
  sub[0]->Decref();
@@ -1288,28 +1290,28 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
1288
1290
 
1289
1291
  if (re2->op_ == kRegexpLiteral) {
1290
1292
  // convert into string
1291
- Rune rune = re2->rune_;
1293
+ Rune rune = re2->arguments.rune_;
1292
1294
  re2->op_ = kRegexpLiteralString;
1293
- re2->literal_string_.nrunes_ = 0;
1294
- re2->literal_string_.runes_ = NULL;
1295
+ re2->arguments.literal_string.nrunes_ = 0;
1296
+ re2->arguments.literal_string.runes_ = NULL;
1295
1297
  re2->AddRuneToString(rune);
1296
1298
  }
1297
1299
 
1298
1300
  // push re1 into re2.
1299
1301
  if (re1->op_ == kRegexpLiteral) {
1300
- re2->AddRuneToString(re1->rune_);
1302
+ re2->AddRuneToString(re1->arguments.rune_);
1301
1303
  } else {
1302
- for (int i = 0; i < re1->literal_string_.nrunes_; i++)
1303
- re2->AddRuneToString(re1->literal_string_.runes_[i]);
1304
- re1->literal_string_.nrunes_ = 0;
1305
- delete[] re1->literal_string_.runes_;
1306
- re1->literal_string_.runes_ = NULL;
1304
+ for (int i = 0; i < re1->arguments.literal_string.nrunes_; i++)
1305
+ re2->AddRuneToString(re1->arguments.literal_string.runes_[i]);
1306
+ re1->arguments.literal_string.nrunes_ = 0;
1307
+ delete[] re1->arguments.literal_string.runes_;
1308
+ re1->arguments.literal_string.runes_ = NULL;
1307
1309
  }
1308
1310
 
1309
1311
  // reuse re1 if possible
1310
1312
  if (r >= 0) {
1311
1313
  re1->op_ = kRegexpLiteral;
1312
- re1->rune_ = r;
1314
+ re1->arguments.rune_ = r;
1313
1315
  re1->parse_flags_ = static_cast<uint16_t>(flags);
1314
1316
  return true;
1315
1317
  }
@@ -1324,14 +1326,14 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
1324
1326
  // Parses a decimal integer, storing it in *np.
1325
1327
  // Sets *s to span the remainder of the string.
1326
1328
  static bool ParseInteger(StringPiece* s, int* np) {
1327
- if (s->size() == 0 || !isdigit((*s)[0] & 0xFF))
1329
+ if (s->empty() || !isdigit((*s)[0] & 0xFF))
1328
1330
  return false;
1329
1331
  // Disallow leading zeros.
1330
1332
  if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
1331
1333
  return false;
1332
1334
  int n = 0;
1333
1335
  int c;
1334
- while (s->size() > 0 && isdigit(c = (*s)[0] & 0xFF)) {
1336
+ while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) {
1335
1337
  // Avoid overflow.
1336
1338
  if (n >= 100000000)
1337
1339
  return false;
@@ -1353,16 +1355,16 @@ static bool ParseInteger(StringPiece* s, int* np) {
1353
1355
  // s must NOT be edited unless MaybeParseRepetition returns true.
1354
1356
  static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
1355
1357
  StringPiece s = *sp;
1356
- if (s.size() == 0 || s[0] != '{')
1358
+ if (s.empty() || s[0] != '{')
1357
1359
  return false;
1358
1360
  s.remove_prefix(1); // '{'
1359
1361
  if (!ParseInteger(&s, lo))
1360
1362
  return false;
1361
- if (s.size() == 0)
1363
+ if (s.empty())
1362
1364
  return false;
1363
1365
  if (s[0] == ',') {
1364
1366
  s.remove_prefix(1); // ','
1365
- if (s.size() == 0)
1367
+ if (s.empty())
1366
1368
  return false;
1367
1369
  if (s[0] == '}') {
1368
1370
  // {2,} means at least 2
@@ -1376,7 +1378,7 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
1376
1378
  // {2} means exactly two
1377
1379
  *hi = *lo;
1378
1380
  }
1379
- if (s.size() == 0 || s[0] != '}')
1381
+ if (s.empty() || s[0] != '}')
1380
1382
  return false;
1381
1383
  s.remove_prefix(1); // '}'
1382
1384
  *sp = s;
@@ -1407,17 +1409,19 @@ static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
1407
1409
  }
1408
1410
  }
1409
1411
 
1410
- status->set_code(kRegexpBadUTF8);
1411
- status->set_error_arg(StringPiece());
1412
+ if (status != NULL) {
1413
+ status->set_code(kRegexpBadUTF8);
1414
+ status->set_error_arg(StringPiece());
1415
+ }
1412
1416
  return -1;
1413
1417
  }
1414
1418
 
1415
- // Return whether name is valid UTF-8.
1416
- // If not, set status to kRegexpBadUTF8.
1419
+ // Returns whether name is valid UTF-8.
1420
+ // If not, sets status to kRegexpBadUTF8.
1417
1421
  static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) {
1418
1422
  StringPiece t = s;
1419
1423
  Rune r;
1420
- while (t.size() > 0) {
1424
+ while (!t.empty()) {
1421
1425
  if (StringPieceToRune(&r, &t, status) < 0)
1422
1426
  return false;
1423
1427
  }
@@ -1448,14 +1452,14 @@ static int UnHex(int c) {
1448
1452
  // Sets *rp to the named character.
1449
1453
  static bool ParseEscape(StringPiece* s, Rune* rp,
1450
1454
  RegexpStatus* status, int rune_max) {
1451
- const char* begin = s->begin();
1452
- if (s->size() < 1 || (*s)[0] != '\\') {
1455
+ const char* begin = s->data();
1456
+ if (s->empty() || (*s)[0] != '\\') {
1453
1457
  // Should not happen - caller always checks.
1454
1458
  status->set_code(kRegexpInternalError);
1455
1459
  status->set_error_arg(StringPiece());
1456
1460
  return false;
1457
1461
  }
1458
- if (s->size() < 2) {
1462
+ if (s->size() == 1) {
1459
1463
  status->set_code(kRegexpTrailingBackslash);
1460
1464
  status->set_error_arg(StringPiece());
1461
1465
  return false;
@@ -1486,16 +1490,16 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
1486
1490
  case '6':
1487
1491
  case '7':
1488
1492
  // Single non-zero octal digit is a backreference; not supported.
1489
- if (s->size() == 0 || (*s)[0] < '0' || (*s)[0] > '7')
1493
+ if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
1490
1494
  goto BadEscape;
1491
1495
  FALLTHROUGH_INTENDED;
1492
1496
  case '0':
1493
1497
  // consume up to three octal digits; already have one.
1494
1498
  code = c - '0';
1495
- if (s->size() > 0 && '0' <= (c = (*s)[0]) && c <= '7') {
1499
+ if (!s->empty() && '0' <= (c = (*s)[0]) && c <= '7') {
1496
1500
  code = code * 8 + c - '0';
1497
1501
  s->remove_prefix(1); // digit
1498
- if (s->size() > 0) {
1502
+ if (!s->empty()) {
1499
1503
  c = (*s)[0];
1500
1504
  if ('0' <= c && c <= '7') {
1501
1505
  code = code * 8 + c - '0';
@@ -1510,7 +1514,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
1510
1514
 
1511
1515
  // Hexadecimal escapes
1512
1516
  case 'x':
1513
- if (s->size() == 0)
1517
+ if (s->empty())
1514
1518
  goto BadEscape;
1515
1519
  if (StringPieceToRune(&c, s, status) < 0)
1516
1520
  return false;
@@ -1530,7 +1534,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
1530
1534
  code = code * 16 + UnHex(c);
1531
1535
  if (code > rune_max)
1532
1536
  goto BadEscape;
1533
- if (s->size() == 0)
1537
+ if (s->empty())
1534
1538
  goto BadEscape;
1535
1539
  if (StringPieceToRune(&c, s, status) < 0)
1536
1540
  return false;
@@ -1541,7 +1545,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
1541
1545
  return true;
1542
1546
  }
1543
1547
  // Easy case: two hex digits.
1544
- if (s->size() == 0)
1548
+ if (s->empty())
1545
1549
  goto BadEscape;
1546
1550
  if (StringPieceToRune(&c1, s, status) < 0)
1547
1551
  return false;
@@ -1585,13 +1589,11 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
1585
1589
  // return true;
1586
1590
  }
1587
1591
 
1588
- LOG(DFATAL) << "Not reached in ParseEscape.";
1589
-
1590
1592
  BadEscape:
1591
1593
  // Unrecognized escape sequence.
1592
1594
  status->set_code(kRegexpBadEscape);
1593
1595
  status->set_error_arg(
1594
- StringPiece(begin, static_cast<size_t>(s->begin() - begin)));
1596
+ StringPiece(begin, static_cast<size_t>(s->data() - begin)));
1595
1597
  return false;
1596
1598
  }
1597
1599
 
@@ -1711,7 +1713,7 @@ const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_fl
1711
1713
  return NULL;
1712
1714
  // Could use StringPieceToRune, but there aren't
1713
1715
  // any non-ASCII Perl group names.
1714
- StringPiece name(s->begin(), 2);
1716
+ StringPiece name(s->data(), 2);
1715
1717
  const UGroup *g = LookupPerlGroup(name);
1716
1718
  if (g == NULL)
1717
1719
  return NULL;
@@ -1751,8 +1753,8 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
1751
1753
  return kParseError;
1752
1754
  if (c != '{') {
1753
1755
  // Name is the bit of string we just skipped over for c.
1754
- const char* p = seq.begin() + 2;
1755
- name = StringPiece(p, static_cast<size_t>(s->begin() - p));
1756
+ const char* p = seq.data() + 2;
1757
+ name = StringPiece(p, static_cast<size_t>(s->data() - p));
1756
1758
  } else {
1757
1759
  // Name is in braces. Look for closing }
1758
1760
  size_t end = s->find('}', 0);
@@ -1763,16 +1765,16 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
1763
1765
  status->set_error_arg(seq);
1764
1766
  return kParseError;
1765
1767
  }
1766
- name = StringPiece(s->begin(), end); // without '}'
1768
+ name = StringPiece(s->data(), end); // without '}'
1767
1769
  s->remove_prefix(end + 1); // with '}'
1768
1770
  if (!IsValidUTF8(name, status))
1769
1771
  return kParseError;
1770
1772
  }
1771
1773
 
1772
1774
  // Chop seq where s now begins.
1773
- seq = StringPiece(seq.begin(), static_cast<size_t>(s->begin() - seq.begin()));
1775
+ seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data()));
1774
1776
 
1775
- if (name.size() > 0 && name[0] == '^') {
1777
+ if (!name.empty() && name[0] == '^') {
1776
1778
  sign = -sign;
1777
1779
  name.remove_prefix(1); // '^'
1778
1780
  }
@@ -1802,14 +1804,13 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
1802
1804
 
1803
1805
  // Convert the UnicodeSet to a URange32 and UGroup that we can add.
1804
1806
  int nr = uset.getRangeCount();
1805
- URange32* r = new URange32[nr];
1807
+ PODArray<URange32> r(nr);
1806
1808
  for (int i = 0; i < nr; i++) {
1807
1809
  r[i].lo = uset.getRangeStart(i);
1808
1810
  r[i].hi = uset.getRangeEnd(i);
1809
1811
  }
1810
- UGroup g = {"", +1, 0, 0, r, nr};
1812
+ UGroup g = {"", +1, 0, 0, r.data(), nr};
1811
1813
  AddUGroup(cc, &g, sign, parse_flags);
1812
- delete[] r;
1813
1814
  #endif
1814
1815
 
1815
1816
  return kParseOk;
@@ -1859,7 +1860,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
1859
1860
  bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
1860
1861
  const StringPiece& whole_class,
1861
1862
  RegexpStatus* status) {
1862
- if (s->size() == 0) {
1863
+ if (s->empty()) {
1863
1864
  status->set_code(kRegexpMissingBracket);
1864
1865
  status->set_error_arg(whole_class);
1865
1866
  return false;
@@ -1867,7 +1868,7 @@ bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
1867
1868
 
1868
1869
  // Allow regular escape sequences even though
1869
1870
  // many need not be escaped in this context.
1870
- if (s->size() >= 1 && (*s)[0] == '\\')
1871
+ if ((*s)[0] == '\\')
1871
1872
  return ParseEscape(s, rp, status, rune_max_);
1872
1873
 
1873
1874
  // Otherwise take the next rune.
@@ -1909,7 +1910,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
1909
1910
  Regexp** out_re,
1910
1911
  RegexpStatus* status) {
1911
1912
  StringPiece whole_class = *s;
1912
- if (s->size() == 0 || (*s)[0] != '[') {
1913
+ if (s->empty() || (*s)[0] != '[') {
1913
1914
  // Caller checked this.
1914
1915
  status->set_code(kRegexpInternalError);
1915
1916
  status->set_error_arg(StringPiece());
@@ -1917,19 +1918,19 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
1917
1918
  }
1918
1919
  bool negated = false;
1919
1920
  Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
1920
- re->char_class_.ccb_ = new CharClassBuilder;
1921
+ re->arguments.char_class.ccb_ = new CharClassBuilder;
1921
1922
  s->remove_prefix(1); // '['
1922
- if (s->size() > 0 && (*s)[0] == '^') {
1923
+ if (!s->empty() && (*s)[0] == '^') {
1923
1924
  s->remove_prefix(1); // '^'
1924
1925
  negated = true;
1925
1926
  if (!(flags_ & ClassNL) || (flags_ & NeverNL)) {
1926
1927
  // If NL can't match implicitly, then pretend
1927
1928
  // negated classes include a leading \n.
1928
- re->char_class_.ccb_->AddRange('\n', '\n');
1929
+ re->arguments.char_class.ccb_->AddRange('\n', '\n');
1929
1930
  }
1930
1931
  }
1931
1932
  bool first = true; // ] is okay as first char in class
1932
- while (s->size() > 0 && ((*s)[0] != ']' || first)) {
1933
+ while (!s->empty() && ((*s)[0] != ']' || first)) {
1933
1934
  // - is only okay unescaped as first or last in class.
1934
1935
  // Except that Perl allows - anywhere.
1935
1936
  if ((*s)[0] == '-' && !first && !(flags_&PerlX) &&
@@ -1951,7 +1952,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
1951
1952
 
1952
1953
  // Look for [:alnum:] etc.
1953
1954
  if (s->size() > 2 && (*s)[0] == '[' && (*s)[1] == ':') {
1954
- switch (ParseCCName(s, flags_, re->char_class_.ccb_, status)) {
1955
+ switch (ParseCCName(s, flags_, re->arguments.char_class.ccb_, status)) {
1955
1956
  case kParseOk:
1956
1957
  continue;
1957
1958
  case kParseError:
@@ -1966,7 +1967,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
1966
1967
  if (s->size() > 2 &&
1967
1968
  (*s)[0] == '\\' &&
1968
1969
  ((*s)[1] == 'p' || (*s)[1] == 'P')) {
1969
- switch (ParseUnicodeGroup(s, flags_, re->char_class_.ccb_, status)) {
1970
+ switch (ParseUnicodeGroup(s, flags_, re->arguments.char_class.ccb_, status)) {
1970
1971
  case kParseOk:
1971
1972
  continue;
1972
1973
  case kParseError:
@@ -1980,7 +1981,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
1980
1981
  // Look for Perl character class symbols (extension).
1981
1982
  const UGroup *g = MaybeParsePerlCCEscape(s, flags_);
1982
1983
  if (g != NULL) {
1983
- AddUGroup(re->char_class_.ccb_, g, g->sign, flags_);
1984
+ AddUGroup(re->arguments.char_class.ccb_, g, g->sign, flags_);
1984
1985
  continue;
1985
1986
  }
1986
1987
 
@@ -1995,9 +1996,9 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
1995
1996
  // Regexp::ClassNL is set. In an explicit range or singleton
1996
1997
  // like we just parsed, we do not filter \n out, so set ClassNL
1997
1998
  // in the flags.
1998
- re->char_class_.ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL);
1999
+ re->arguments.char_class.ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL);
1999
2000
  }
2000
- if (s->size() == 0) {
2001
+ if (s->empty()) {
2001
2002
  status->set_code(kRegexpMissingBracket);
2002
2003
  status->set_error_arg(whole_class);
2003
2004
  re->Decref();
@@ -2006,25 +2007,40 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
2006
2007
  s->remove_prefix(1); // ']'
2007
2008
 
2008
2009
  if (negated)
2009
- re->char_class_.ccb_->Negate();
2010
+ re->arguments.char_class.ccb_->Negate();
2010
2011
 
2011
2012
  *out_re = re;
2012
2013
  return true;
2013
2014
  }
2014
2015
 
2015
- // Is this a valid capture name? [A-Za-z0-9_]+
2016
- // PCRE limits names to 32 bytes.
2017
- // Python rejects names starting with digits.
2018
- // We don't enforce either of those.
2016
+ // Returns whether name is a valid capture name.
2019
2017
  static bool IsValidCaptureName(const StringPiece& name) {
2020
- if (name.size() == 0)
2018
+ if (name.empty())
2021
2019
  return false;
2022
- for (size_t i = 0; i < name.size(); i++) {
2023
- int c = name[i];
2024
- if (('0' <= c && c <= '9') ||
2025
- ('a' <= c && c <= 'z') ||
2026
- ('A' <= c && c <= 'Z') ||
2027
- c == '_')
2020
+
2021
+ // Historically, we effectively used [0-9A-Za-z_]+ to validate; that
2022
+ // followed Python 2 except for not restricting the first character.
2023
+ // As of Python 3, Unicode characters beyond ASCII are also allowed;
2024
+ // accordingly, we permit the Lu, Ll, Lt, Lm, Lo, Nl, Mn, Mc, Nd and
2025
+ // Pc categories, but again without restricting the first character.
2026
+ // Also, Unicode normalization (e.g. NFKC) isn't performed: Python 3
2027
+ // performs it for identifiers, but seemingly not for capture names;
2028
+ // if they start doing that for capture names, we won't follow suit.
2029
+ static const CharClass* const cc = []() {
2030
+ CharClassBuilder ccb;
2031
+ for (StringPiece group :
2032
+ {"Lu", "Ll", "Lt", "Lm", "Lo", "Nl", "Mn", "Mc", "Nd", "Pc"})
2033
+ AddUGroup(&ccb, LookupGroup(group, unicode_groups, num_unicode_groups),
2034
+ +1, Regexp::NoParseFlags);
2035
+ return ccb.GetCharClass();
2036
+ }();
2037
+
2038
+ StringPiece t = name;
2039
+ Rune r;
2040
+ while (!t.empty()) {
2041
+ if (StringPieceToRune(&r, &t, NULL) < 0)
2042
+ return false;
2043
+ if (cc->Contains(r))
2028
2044
  continue;
2029
2045
  return false;
2030
2046
  }
@@ -2041,8 +2057,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
2041
2057
 
2042
2058
  // Caller is supposed to check this.
2043
2059
  if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') {
2044
- LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
2045
2060
  status_->set_code(kRegexpInternalError);
2061
+ LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
2046
2062
  return false;
2047
2063
  }
2048
2064
 
@@ -2075,8 +2091,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
2075
2091
  }
2076
2092
 
2077
2093
  // t is "P<name>...", t[end] == '>'
2078
- StringPiece capture(t.begin()-2, end+3); // "(?P<name>"
2079
- StringPiece name(t.begin()+2, end-2); // "name"
2094
+ StringPiece capture(t.data()-2, end+3); // "(?P<name>"
2095
+ StringPiece name(t.data()+2, end-2); // "name"
2080
2096
  if (!IsValidUTF8(name, status_))
2081
2097
  return false;
2082
2098
  if (!IsValidCaptureName(name)) {
@@ -2090,7 +2106,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
2090
2106
  return false;
2091
2107
  }
2092
2108
 
2093
- s->remove_prefix(static_cast<size_t>(capture.end() - s->begin()));
2109
+ s->remove_prefix(
2110
+ static_cast<size_t>(capture.data() + capture.size() - s->data()));
2094
2111
  return true;
2095
2112
  }
2096
2113
 
@@ -2099,7 +2116,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
2099
2116
  int nflags = flags_;
2100
2117
  Rune c;
2101
2118
  for (bool done = false; !done; ) {
2102
- if (t.size() == 0)
2119
+ if (t.empty())
2103
2120
  goto BadPerlOp;
2104
2121
  if (StringPieceToRune(&c, &t, status_) < 0)
2105
2122
  return false;
@@ -2174,7 +2191,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
2174
2191
  BadPerlOp:
2175
2192
  status_->set_code(kRegexpBadPerlOp);
2176
2193
  status_->set_error_arg(
2177
- StringPiece(s->begin(), static_cast<size_t>(t.begin() - s->begin())));
2194
+ StringPiece(s->data(), static_cast<size_t>(t.data() - s->data())));
2178
2195
  return false;
2179
2196
  }
2180
2197
 
@@ -2217,7 +2234,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
2217
2234
 
2218
2235
  if (global_flags & Literal) {
2219
2236
  // Special parse loop for literal string.
2220
- while (t.size() > 0) {
2237
+ while (!t.empty()) {
2221
2238
  Rune r;
2222
2239
  if (StringPieceToRune(&r, &t, status) < 0)
2223
2240
  return NULL;
@@ -2228,7 +2245,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
2228
2245
  }
2229
2246
 
2230
2247
  StringPiece lastunary = StringPiece();
2231
- while (t.size() > 0) {
2248
+ while (!t.empty()) {
2232
2249
  StringPiece isunary = StringPiece();
2233
2250
  switch (t[0]) {
2234
2251
  default: {
@@ -2271,7 +2288,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
2271
2288
  break;
2272
2289
 
2273
2290
  case '^': // Beginning of line.
2274
- if (!ps.PushCarat())
2291
+ if (!ps.PushCaret())
2275
2292
  return NULL;
2276
2293
  t.remove_prefix(1); // '^'
2277
2294
  break;
@@ -2312,18 +2329,18 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
2312
2329
  bool nongreedy = false;
2313
2330
  t.remove_prefix(1); // '*' or '+' or '?'
2314
2331
  if (ps.flags() & PerlX) {
2315
- if (t.size() > 0 && t[0] == '?') {
2332
+ if (!t.empty() && t[0] == '?') {
2316
2333
  nongreedy = true;
2317
2334
  t.remove_prefix(1); // '?'
2318
2335
  }
2319
- if (lastunary.size() > 0) {
2336
+ if (!lastunary.empty()) {
2320
2337
  // In Perl it is not allowed to stack repetition operators:
2321
2338
  // a** is a syntax error, not a double-star.
2322
2339
  // (and a++ means something else entirely, which we don't support!)
2323
2340
  status->set_code(kRegexpRepeatOp);
2324
2341
  status->set_error_arg(StringPiece(
2325
- lastunary.begin(),
2326
- static_cast<size_t>(t.begin() - lastunary.begin())));
2342
+ lastunary.data(),
2343
+ static_cast<size_t>(t.data() - lastunary.data())));
2327
2344
  return NULL;
2328
2345
  }
2329
2346
  }
@@ -2347,16 +2364,16 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
2347
2364
  }
2348
2365
  bool nongreedy = false;
2349
2366
  if (ps.flags() & PerlX) {
2350
- if (t.size() > 0 && t[0] == '?') {
2367
+ if (!t.empty() && t[0] == '?') {
2351
2368
  nongreedy = true;
2352
2369
  t.remove_prefix(1); // '?'
2353
2370
  }
2354
- if (lastunary.size() > 0) {
2371
+ if (!lastunary.empty()) {
2355
2372
  // Not allowed to stack repetition operators.
2356
2373
  status->set_code(kRegexpRepeatOp);
2357
2374
  status->set_error_arg(StringPiece(
2358
- lastunary.begin(),
2359
- static_cast<size_t>(t.begin() - lastunary.begin())));
2375
+ lastunary.data(),
2376
+ static_cast<size_t>(t.data() - lastunary.data())));
2360
2377
  return NULL;
2361
2378
  }
2362
2379
  }
@@ -2405,7 +2422,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
2405
2422
 
2406
2423
  if (t[1] == 'Q') { // \Q ... \E: the ... is always literals
2407
2424
  t.remove_prefix(2); // '\\', 'Q'
2408
- while (t.size() > 0) {
2425
+ while (!t.empty()) {
2409
2426
  if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') {
2410
2427
  t.remove_prefix(2); // '\\', 'E'
2411
2428
  break;
@@ -2422,8 +2439,8 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
2422
2439
 
2423
2440
  if (t.size() >= 2 && (t[1] == 'p' || t[1] == 'P')) {
2424
2441
  Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase);
2425
- re->char_class_.ccb_ = new CharClassBuilder;
2426
- switch (ParseUnicodeGroup(&t, ps.flags(), re->char_class_.ccb_, status)) {
2442
+ re->arguments.char_class.ccb_ = new CharClassBuilder;
2443
+ switch (ParseUnicodeGroup(&t, ps.flags(), re->arguments.char_class.ccb_, status)) {
2427
2444
  case kParseOk:
2428
2445
  if (!ps.PushRegexp(re))
2429
2446
  return NULL;
@@ -2440,8 +2457,8 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
2440
2457
  const UGroup *g = MaybeParsePerlCCEscape(&t, ps.flags());
2441
2458
  if (g != NULL) {
2442
2459
  Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase);
2443
- re->char_class_.ccb_ = new CharClassBuilder;
2444
- AddUGroup(re->char_class_.ccb_, g, g->sign, ps.flags());
2460
+ re->arguments.char_class.ccb_ = new CharClassBuilder;
2461
+ AddUGroup(re->arguments.char_class.ccb_, g, g->sign, ps.flags());
2445
2462
  if (!ps.PushRegexp(re))
2446
2463
  return NULL;
2447
2464
  break;
@@ -2461,4 +2478,4 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
2461
2478
  return ps.DoFinish();
2462
2479
  }
2463
2480
 
2464
- } // namespace duckdb_re2
2481
+ } // namespace re2