duckdb 1.0.1-dev22.0 → 1.0.1-dev27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1389) hide show
  1. package/.github/workflows/NodeJS.yml +1 -1
  2. package/binding.gyp +41 -0
  3. package/package.json +1 -1
  4. package/src/duckdb/extension/icu/icu-dateadd.cpp +4 -2
  5. package/src/duckdb/extension/icu/icu-datefunc.cpp +6 -2
  6. package/src/duckdb/extension/icu/icu-datesub.cpp +13 -2
  7. package/src/duckdb/extension/icu/icu-strptime.cpp +6 -6
  8. package/src/duckdb/extension/icu/icu-table-range.cpp +92 -73
  9. package/src/duckdb/extension/icu/icu-timebucket.cpp +12 -2
  10. package/src/duckdb/extension/icu/icu-timezone.cpp +3 -3
  11. package/src/duckdb/extension/icu/icu_extension.cpp +61 -9
  12. package/src/duckdb/extension/json/include/json_executors.hpp +20 -23
  13. package/src/duckdb/extension/json/include/json_functions.hpp +4 -0
  14. package/src/duckdb/extension/json/include/json_scan.hpp +6 -2
  15. package/src/duckdb/extension/json/include/json_structure.hpp +12 -9
  16. package/src/duckdb/extension/json/json_common.cpp +66 -10
  17. package/src/duckdb/extension/json/json_extension.cpp +13 -5
  18. package/src/duckdb/extension/json/json_functions/json_array_length.cpp +1 -1
  19. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -4
  20. package/src/duckdb/extension/json/json_functions/json_exists.cpp +32 -0
  21. package/src/duckdb/extension/json/json_functions/json_extract.cpp +2 -2
  22. package/src/duckdb/extension/json/json_functions/json_keys.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_pretty.cpp +32 -0
  24. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +5 -1
  25. package/src/duckdb/extension/json/json_functions/json_structure.cpp +305 -94
  26. package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
  27. package/src/duckdb/extension/json/json_functions/json_type.cpp +3 -3
  28. package/src/duckdb/extension/json/json_functions/json_value.cpp +42 -0
  29. package/src/duckdb/extension/json/json_functions/read_json.cpp +16 -2
  30. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -2
  31. package/src/duckdb/extension/json/json_functions.cpp +5 -1
  32. package/src/duckdb/extension/json/json_scan.cpp +13 -12
  33. package/src/duckdb/extension/json/serialize_json.cpp +5 -3
  34. package/src/duckdb/extension/parquet/column_reader.cpp +206 -43
  35. package/src/duckdb/extension/parquet/column_writer.cpp +133 -62
  36. package/src/duckdb/extension/parquet/geo_parquet.cpp +391 -0
  37. package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +16 -5
  38. package/src/duckdb/extension/parquet/include/column_reader.hpp +37 -12
  39. package/src/duckdb/extension/parquet/include/column_writer.hpp +10 -11
  40. package/src/duckdb/extension/parquet/include/expression_column_reader.hpp +52 -0
  41. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +139 -0
  42. package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +13 -8
  43. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -0
  44. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +7 -3
  45. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +55 -8
  46. package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +3 -3
  47. package/src/duckdb/extension/parquet/include/parquet_rle_bp_encoder.hpp +1 -1
  48. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +8 -0
  49. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +21 -7
  50. package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +33 -11
  51. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +5 -2
  52. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +48 -14
  53. package/src/duckdb/extension/parquet/parquet_crypto.cpp +109 -61
  54. package/src/duckdb/extension/parquet/parquet_extension.cpp +305 -72
  55. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -4
  56. package/src/duckdb/extension/parquet/parquet_reader.cpp +151 -40
  57. package/src/duckdb/extension/parquet/parquet_statistics.cpp +50 -16
  58. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +42 -1
  59. package/src/duckdb/extension/parquet/parquet_writer.cpp +67 -75
  60. package/src/duckdb/extension/parquet/serialize_parquet.cpp +3 -1
  61. package/src/duckdb/extension/parquet/zstd_file_system.cpp +5 -1
  62. package/src/duckdb/src/catalog/catalog.cpp +14 -16
  63. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +14 -11
  64. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +39 -19
  65. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +92 -78
  66. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +10 -2
  67. package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +10 -3
  68. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +3 -3
  69. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +7 -7
  70. package/src/duckdb/src/catalog/catalog_entry.cpp +6 -3
  71. package/src/duckdb/src/catalog/catalog_set.cpp +14 -19
  72. package/src/duckdb/src/catalog/default/default_functions.cpp +179 -166
  73. package/src/duckdb/src/catalog/default/default_generator.cpp +24 -0
  74. package/src/duckdb/src/catalog/default/default_schemas.cpp +4 -3
  75. package/src/duckdb/src/catalog/default/default_table_functions.cpp +148 -0
  76. package/src/duckdb/src/catalog/default/default_views.cpp +7 -3
  77. package/src/duckdb/src/catalog/duck_catalog.cpp +7 -1
  78. package/src/duckdb/src/common/adbc/adbc.cpp +120 -58
  79. package/src/duckdb/src/common/allocator.cpp +71 -6
  80. package/src/duckdb/src/common/arrow/appender/bool_data.cpp +8 -7
  81. package/src/duckdb/src/common/arrow/appender/fixed_size_list_data.cpp +1 -1
  82. package/src/duckdb/src/common/arrow/appender/union_data.cpp +4 -5
  83. package/src/duckdb/src/common/arrow/arrow_appender.cpp +55 -21
  84. package/src/duckdb/src/common/arrow/arrow_converter.cpp +85 -10
  85. package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +142 -0
  86. package/src/duckdb/src/common/arrow/arrow_query_result.cpp +56 -0
  87. package/src/duckdb/src/common/arrow/physical_arrow_batch_collector.cpp +37 -0
  88. package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +128 -0
  89. package/src/duckdb/src/common/arrow/schema_metadata.cpp +101 -0
  90. package/src/duckdb/src/common/cgroups.cpp +189 -0
  91. package/src/duckdb/src/common/compressed_file_system.cpp +6 -3
  92. package/src/duckdb/src/common/encryption_state.cpp +38 -0
  93. package/src/duckdb/src/common/enum_util.cpp +682 -14
  94. package/src/duckdb/src/common/enums/file_compression_type.cpp +24 -0
  95. package/src/duckdb/src/common/enums/metric_type.cpp +208 -0
  96. package/src/duckdb/src/common/enums/optimizer_type.cpp +8 -2
  97. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  98. package/src/duckdb/src/common/enums/relation_type.cpp +4 -0
  99. package/src/duckdb/src/common/enums/statement_type.cpp +15 -0
  100. package/src/duckdb/src/common/error_data.cpp +22 -20
  101. package/src/duckdb/src/common/exception/binder_exception.cpp +5 -0
  102. package/src/duckdb/src/common/exception.cpp +11 -1
  103. package/src/duckdb/src/common/extra_type_info.cpp +3 -0
  104. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  105. package/src/duckdb/src/common/file_system.cpp +25 -3
  106. package/src/duckdb/src/common/filename_pattern.cpp +1 -0
  107. package/src/duckdb/src/common/fsst.cpp +15 -14
  108. package/src/duckdb/src/common/gzip_file_system.cpp +3 -1
  109. package/src/duckdb/src/common/hive_partitioning.cpp +103 -43
  110. package/src/duckdb/src/common/http_util.cpp +25 -0
  111. package/src/duckdb/src/common/local_file_system.cpp +48 -27
  112. package/src/duckdb/src/common/multi_file_list.cpp +113 -22
  113. package/src/duckdb/src/common/multi_file_reader.cpp +59 -58
  114. package/src/duckdb/src/common/operator/cast_operators.cpp +133 -34
  115. package/src/duckdb/src/common/operator/string_cast.cpp +42 -11
  116. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +2 -2
  117. package/src/duckdb/src/common/progress_bar/terminal_progress_bar_display.cpp +1 -1
  118. package/src/duckdb/src/common/radix_partitioning.cpp +31 -21
  119. package/src/duckdb/src/common/random_engine.cpp +4 -0
  120. package/src/duckdb/src/common/re2_regex.cpp +47 -12
  121. package/src/duckdb/src/common/render_tree.cpp +243 -0
  122. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +1 -1
  123. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
  124. package/src/duckdb/src/common/row_operations/row_matcher.cpp +58 -5
  125. package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +79 -43
  126. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
  127. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +6 -4
  128. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +18 -9
  129. package/src/duckdb/src/common/serializer/memory_stream.cpp +1 -0
  130. package/src/duckdb/src/common/sort/partition_state.cpp +33 -18
  131. package/src/duckdb/src/common/sort/radix_sort.cpp +22 -15
  132. package/src/duckdb/src/common/sort/sort_state.cpp +19 -16
  133. package/src/duckdb/src/common/sort/sorted_block.cpp +11 -10
  134. package/src/duckdb/src/common/string_util.cpp +167 -10
  135. package/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp +108 -0
  136. package/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp +267 -0
  137. package/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp +116 -0
  138. package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +482 -0
  139. package/src/duckdb/src/common/tree_renderer/tree_renderer.cpp +12 -0
  140. package/src/duckdb/src/common/tree_renderer.cpp +16 -508
  141. package/src/duckdb/src/common/types/batched_data_collection.cpp +78 -9
  142. package/src/duckdb/src/common/types/bit.cpp +24 -22
  143. package/src/duckdb/src/common/types/blob.cpp +15 -11
  144. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +18 -9
  145. package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
  146. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +3 -4
  147. package/src/duckdb/src/common/types/column/column_data_consumer.cpp +2 -2
  148. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +70 -21
  149. package/src/duckdb/src/common/types/data_chunk.cpp +10 -1
  150. package/src/duckdb/src/common/types/date.cpp +8 -19
  151. package/src/duckdb/src/common/types/decimal.cpp +3 -2
  152. package/src/duckdb/src/common/types/hugeint.cpp +11 -3
  153. package/src/duckdb/src/common/types/hyperloglog.cpp +212 -227
  154. package/src/duckdb/src/common/types/interval.cpp +1 -1
  155. package/src/duckdb/src/common/types/list_segment.cpp +83 -49
  156. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +22 -83
  157. package/src/duckdb/src/common/types/row/row_data_collection.cpp +2 -2
  158. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +20 -4
  159. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +28 -7
  160. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +29 -14
  161. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +152 -102
  162. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +4 -1
  163. package/src/duckdb/src/common/types/selection_vector.cpp +17 -1
  164. package/src/duckdb/src/common/types/time.cpp +62 -31
  165. package/src/duckdb/src/common/types/timestamp.cpp +70 -12
  166. package/src/duckdb/src/common/types/uuid.cpp +1 -1
  167. package/src/duckdb/src/common/types/validity_mask.cpp +40 -5
  168. package/src/duckdb/src/common/types/value.cpp +50 -8
  169. package/src/duckdb/src/common/types/varint.cpp +295 -0
  170. package/src/duckdb/src/common/types/vector.cpp +165 -54
  171. package/src/duckdb/src/common/types/vector_buffer.cpp +5 -4
  172. package/src/duckdb/src/common/types.cpp +106 -26
  173. package/src/duckdb/src/common/vector_operations/vector_copy.cpp +13 -25
  174. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +6 -0
  175. package/src/duckdb/src/common/virtual_file_system.cpp +3 -3
  176. package/src/duckdb/src/core_functions/aggregate/distributive/approx_count.cpp +35 -82
  177. package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +283 -46
  178. package/src/duckdb/src/core_functions/aggregate/distributive/bitagg.cpp +4 -4
  179. package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +3 -2
  180. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +226 -338
  181. package/src/duckdb/src/core_functions/aggregate/distributive/sum.cpp +2 -0
  182. package/src/duckdb/src/core_functions/aggregate/holistic/approx_top_k.cpp +388 -0
  183. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +63 -21
  184. package/src/duckdb/src/core_functions/aggregate/holistic/mad.cpp +330 -0
  185. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +136 -97
  186. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +601 -1485
  187. package/src/duckdb/src/core_functions/aggregate/nested/binned_histogram.cpp +405 -0
  188. package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +136 -165
  189. package/src/duckdb/src/core_functions/function_list.cpp +35 -8
  190. package/src/duckdb/src/core_functions/lambda_functions.cpp +5 -7
  191. package/src/duckdb/src/core_functions/scalar/array/array_functions.cpp +172 -198
  192. package/src/duckdb/src/core_functions/scalar/blob/create_sort_key.cpp +341 -54
  193. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +2 -2
  194. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +89 -29
  195. package/src/duckdb/src/core_functions/scalar/date/date_trunc.cpp +1 -1
  196. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +2 -2
  197. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +133 -71
  198. package/src/duckdb/src/core_functions/scalar/date/to_interval.cpp +1 -1
  199. package/src/duckdb/src/core_functions/scalar/enum/enum_functions.cpp +1 -1
  200. package/src/duckdb/src/core_functions/scalar/generic/can_implicitly_cast.cpp +40 -0
  201. package/src/duckdb/src/core_functions/scalar/generic/error.cpp +1 -1
  202. package/src/duckdb/src/core_functions/scalar/generic/least.cpp +161 -58
  203. package/src/duckdb/src/core_functions/scalar/generic/typeof.cpp +13 -0
  204. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +1 -1
  205. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +59 -75
  206. package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +93 -40
  207. package/src/duckdb/src/core_functions/scalar/list/list_has_any_or_all.cpp +227 -0
  208. package/src/duckdb/src/core_functions/scalar/list/list_reduce.cpp +20 -19
  209. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +0 -2
  210. package/src/duckdb/src/core_functions/scalar/list/list_value.cpp +106 -8
  211. package/src/duckdb/src/core_functions/scalar/map/map_contains.cpp +56 -0
  212. package/src/duckdb/src/core_functions/scalar/map/map_extract.cpp +73 -118
  213. package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +98 -2
  214. package/src/duckdb/src/core_functions/scalar/operators/bitwise.cpp +1 -2
  215. package/src/duckdb/src/core_functions/scalar/random/setseed.cpp +1 -1
  216. package/src/duckdb/src/core_functions/scalar/string/bar.cpp +1 -1
  217. package/src/duckdb/src/core_functions/scalar/string/hex.cpp +5 -1
  218. package/src/duckdb/src/core_functions/scalar/string/md5.cpp +10 -37
  219. package/src/duckdb/src/core_functions/scalar/string/printf.cpp +18 -2
  220. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +45 -0
  221. package/src/duckdb/src/core_functions/scalar/string/reverse.cpp +4 -5
  222. package/src/duckdb/src/core_functions/scalar/string/sha1.cpp +35 -0
  223. package/src/duckdb/src/core_functions/scalar/string/sha256.cpp +5 -2
  224. package/src/duckdb/src/core_functions/scalar/string/url_encode.cpp +49 -0
  225. package/src/duckdb/src/core_functions/scalar/struct/struct_pack.cpp +1 -2
  226. package/src/duckdb/src/core_functions/scalar/union/union_extract.cpp +4 -2
  227. package/src/duckdb/src/execution/adaptive_filter.cpp +30 -11
  228. package/src/duckdb/src/execution/aggregate_hashtable.cpp +13 -18
  229. package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +4 -9
  230. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  231. package/src/duckdb/src/execution/index/art/art.cpp +683 -670
  232. package/src/duckdb/src/execution/index/art/art_key.cpp +121 -38
  233. package/src/duckdb/src/execution/index/art/base_leaf.cpp +168 -0
  234. package/src/duckdb/src/execution/index/art/base_node.cpp +163 -0
  235. package/src/duckdb/src/execution/index/art/iterator.cpp +148 -77
  236. package/src/duckdb/src/execution/index/art/leaf.cpp +159 -263
  237. package/src/duckdb/src/execution/index/art/node.cpp +493 -247
  238. package/src/duckdb/src/execution/index/art/node256.cpp +31 -91
  239. package/src/duckdb/src/execution/index/art/node256_leaf.cpp +71 -0
  240. package/src/duckdb/src/execution/index/art/node48.cpp +75 -143
  241. package/src/duckdb/src/execution/index/art/prefix.cpp +424 -244
  242. package/src/duckdb/src/execution/index/bound_index.cpp +7 -1
  243. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +22 -18
  244. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +22 -73
  245. package/src/duckdb/src/execution/join_hashtable.cpp +637 -179
  246. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +4 -4
  247. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +15 -10
  248. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +13 -8
  249. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +525 -132
  250. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +147 -138
  251. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +531 -312
  252. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +1 -1
  253. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +4 -3
  254. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +9 -2
  255. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +13 -17
  256. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +60 -16
  257. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +105 -0
  258. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +24 -24
  259. package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +25 -2
  260. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +275 -112
  261. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +106 -11
  262. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +253 -115
  263. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +93 -52
  264. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +116 -76
  265. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +29 -14
  266. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +1 -1
  267. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +70 -26
  268. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +81 -60
  269. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +88 -50
  270. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +161 -51
  271. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +59 -17
  272. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +5 -5
  273. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +0 -21
  274. package/src/duckdb/src/execution/operator/helper/physical_buffered_batch_collector.cpp +109 -0
  275. package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +5 -13
  276. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +1 -1
  277. package/src/duckdb/src/execution/operator/helper/physical_load.cpp +12 -4
  278. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +0 -16
  279. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +4 -2
  280. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +5 -0
  281. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +3 -1
  282. package/src/duckdb/src/execution/operator/helper/physical_set_variable.cpp +39 -0
  283. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +4 -2
  284. package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +16 -5
  285. package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
  286. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
  287. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
  288. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +5 -4
  289. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +59 -21
  290. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +7 -4
  291. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +333 -176
  292. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +57 -34
  293. package/src/duckdb/src/execution/operator/join/physical_join.cpp +16 -8
  294. package/src/duckdb/src/execution/operator/join/physical_left_delim_join.cpp +10 -4
  295. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +2 -5
  296. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +3 -3
  297. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +5 -5
  298. package/src/duckdb/src/execution/operator/join/physical_right_delim_join.cpp +7 -2
  299. package/src/duckdb/src/execution/operator/order/physical_order.cpp +17 -12
  300. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +12 -9
  301. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +35 -17
  302. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +17 -11
  303. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +5 -1
  304. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +156 -47
  305. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +10 -2
  306. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +1 -3
  307. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -2
  308. package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +13 -6
  309. package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +22 -3
  310. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +19 -3
  311. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +37 -22
  312. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +77 -21
  313. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +27 -55
  314. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +41 -44
  315. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +4 -6
  316. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +4 -6
  317. package/src/duckdb/src/execution/operator/set/physical_union.cpp +18 -4
  318. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +3 -2
  319. package/src/duckdb/src/execution/physical_operator.cpp +45 -4
  320. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -7
  321. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +8 -3
  322. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +13 -6
  323. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +3 -3
  324. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +111 -19
  325. package/src/duckdb/src/execution/physical_plan/plan_limit.cpp +19 -2
  326. package/src/duckdb/src/execution/physical_plan/plan_set.cpp +9 -0
  327. package/src/duckdb/src/execution/physical_plan/plan_window.cpp +3 -1
  328. package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -3
  329. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +49 -49
  330. package/src/duckdb/src/execution/reservoir_sample.cpp +2 -2
  331. package/src/duckdb/src/execution/window_executor.cpp +556 -318
  332. package/src/duckdb/src/execution/window_segment_tree.cpp +1058 -485
  333. package/src/duckdb/src/function/aggregate/distributive/count.cpp +5 -5
  334. package/src/duckdb/src/function/aggregate/distributive/first.cpp +92 -95
  335. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +10 -9
  336. package/src/duckdb/src/function/aggregate_function.cpp +8 -0
  337. package/src/duckdb/src/function/cast/cast_function_set.cpp +10 -1
  338. package/src/duckdb/src/function/cast/decimal_cast.cpp +10 -1
  339. package/src/duckdb/src/function/cast/default_casts.cpp +2 -0
  340. package/src/duckdb/src/function/cast/numeric_casts.cpp +3 -0
  341. package/src/duckdb/src/function/cast/string_cast.cpp +8 -5
  342. package/src/duckdb/src/function/cast/time_casts.cpp +2 -2
  343. package/src/duckdb/src/function/cast/union_casts.cpp +1 -1
  344. package/src/duckdb/src/function/cast/varint_casts.cpp +283 -0
  345. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +3 -1
  346. package/src/duckdb/src/function/cast_rules.cpp +104 -15
  347. package/src/duckdb/src/function/compression_config.cpp +35 -33
  348. package/src/duckdb/src/function/copy_function.cpp +27 -0
  349. package/src/duckdb/src/function/function_binder.cpp +39 -11
  350. package/src/duckdb/src/function/macro_function.cpp +75 -32
  351. package/src/duckdb/src/function/pragma/pragma_queries.cpp +10 -0
  352. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -0
  353. package/src/duckdb/src/function/scalar/generic/binning.cpp +507 -0
  354. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +58 -0
  355. package/src/duckdb/src/function/scalar/generic_functions.cpp +1 -0
  356. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +33 -47
  357. package/src/duckdb/src/function/scalar/list/list_extract.cpp +70 -143
  358. package/src/duckdb/src/function/scalar/list/list_resize.cpp +93 -84
  359. package/src/duckdb/src/function/scalar/list/list_zip.cpp +3 -0
  360. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +24 -11
  361. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +4 -4
  362. package/src/duckdb/src/function/scalar/strftime_format.cpp +196 -57
  363. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +9 -7
  364. package/src/duckdb/src/function/scalar/string/concat.cpp +239 -123
  365. package/src/duckdb/src/function/scalar/string/concat_ws.cpp +149 -0
  366. package/src/duckdb/src/function/scalar/string/contains.cpp +18 -7
  367. package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
  368. package/src/duckdb/src/function/scalar/string/substring.cpp +6 -11
  369. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  370. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +7 -3
  371. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
  372. package/src/duckdb/src/function/scalar_function.cpp +5 -2
  373. package/src/duckdb/src/function/scalar_macro_function.cpp +2 -2
  374. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +20 -39
  375. package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +135 -0
  376. package/src/duckdb/src/function/table/arrow.cpp +194 -52
  377. package/src/duckdb/src/function/table/arrow_conversion.cpp +212 -69
  378. package/src/duckdb/src/function/table/copy_csv.cpp +43 -14
  379. package/src/duckdb/src/function/table/query_function.cpp +80 -0
  380. package/src/duckdb/src/function/table/range.cpp +222 -142
  381. package/src/duckdb/src/function/table/read_csv.cpp +25 -13
  382. package/src/duckdb/src/function/table/sniff_csv.cpp +55 -35
  383. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +141 -129
  384. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +25 -14
  385. package/src/duckdb/src/function/table/system/duckdb_functions.cpp +20 -14
  386. package/src/duckdb/src/function/table/system/duckdb_indexes.cpp +15 -1
  387. package/src/duckdb/src/function/table/system/duckdb_variables.cpp +84 -0
  388. package/src/duckdb/src/function/table/system/test_all_types.cpp +1 -0
  389. package/src/duckdb/src/function/table/system/test_vector_types.cpp +33 -3
  390. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  391. package/src/duckdb/src/function/table/table_scan.cpp +45 -22
  392. package/src/duckdb/src/function/table/unnest.cpp +2 -2
  393. package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -4
  394. package/src/duckdb/src/function/table_function.cpp +5 -4
  395. package/src/duckdb/src/function/table_macro_function.cpp +2 -2
  396. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +8 -4
  397. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +5 -2
  398. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +3 -0
  399. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +2 -2
  400. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +3 -4
  401. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +5 -5
  402. package/src/duckdb/src/include/duckdb/catalog/default/builtin_types/types.hpp +2 -1
  403. package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +4 -5
  404. package/src/duckdb/src/include/duckdb/catalog/default/default_generator.hpp +4 -5
  405. package/src/duckdb/src/include/duckdb/catalog/default/default_schemas.hpp +2 -1
  406. package/src/duckdb/src/include/duckdb/catalog/default/default_table_functions.hpp +47 -0
  407. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -0
  408. package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +2 -2
  409. package/src/duckdb/src/include/duckdb/common/allocator.hpp +9 -1
  410. package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +120 -0
  411. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +37 -11
  412. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +9 -8
  413. package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +1 -0
  414. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +6 -4
  415. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +92 -0
  416. package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +2 -2
  417. package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +26 -4
  418. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +90 -11
  419. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +6 -6
  420. package/src/duckdb/src/include/duckdb/common/arrow/arrow_buffer.hpp +8 -1
  421. package/src/duckdb/src/include/duckdb/common/arrow/arrow_merge_event.hpp +62 -0
  422. package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +52 -0
  423. package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +42 -0
  424. package/src/duckdb/src/include/duckdb/common/arrow/physical_arrow_batch_collector.hpp +30 -0
  425. package/src/duckdb/src/include/duckdb/common/arrow/physical_arrow_collector.hpp +65 -0
  426. package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +43 -0
  427. package/src/duckdb/src/include/duckdb/common/bswap.hpp +18 -16
  428. package/src/duckdb/src/include/duckdb/common/cgroups.hpp +30 -0
  429. package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -0
  430. package/src/duckdb/src/include/duckdb/common/dl.hpp +8 -1
  431. package/src/duckdb/src/include/duckdb/common/encryption_state.hpp +48 -0
  432. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +88 -0
  433. package/src/duckdb/src/include/duckdb/common/enums/checkpoint_type.hpp +2 -2
  434. package/src/duckdb/src/include/duckdb/common/enums/copy_overwrite_mode.hpp +6 -1
  435. package/src/duckdb/src/include/duckdb/common/enums/destroy_buffer_upon.hpp +21 -0
  436. package/src/duckdb/src/include/duckdb/common/enums/explain_format.hpp +17 -0
  437. package/src/duckdb/src/include/duckdb/common/enums/file_compression_type.hpp +4 -0
  438. package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +2 -2
  439. package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +88 -0
  440. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +6 -1
  441. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +2 -1
  442. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  443. package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
  444. package/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +3 -1
  445. package/src/duckdb/src/include/duckdb/common/enums/set_scope.hpp +2 -1
  446. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +23 -2
  447. package/src/duckdb/src/include/duckdb/common/enums/stream_execution_result.hpp +25 -0
  448. package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
  449. package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +1 -0
  450. package/src/duckdb/src/include/duckdb/common/error_data.hpp +5 -2
  451. package/src/duckdb/src/include/duckdb/common/exception/binder_exception.hpp +1 -0
  452. package/src/duckdb/src/include/duckdb/common/exception.hpp +20 -2
  453. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +12 -0
  454. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +2 -0
  455. package/src/duckdb/src/include/duckdb/common/file_open_flags.hpp +16 -0
  456. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +18 -0
  457. package/src/duckdb/src/include/duckdb/common/file_system.hpp +3 -0
  458. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +4 -0
  459. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +160 -96
  460. package/src/duckdb/src/include/duckdb/common/fsst.hpp +9 -2
  461. package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -8
  462. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +16 -7
  463. package/src/duckdb/src/include/duckdb/common/http_util.hpp +19 -0
  464. package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +19 -6
  465. package/src/duckdb/src/include/duckdb/common/limits.hpp +9 -2
  466. package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +38 -6
  467. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +9 -2
  468. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +5 -1
  469. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +82 -50
  470. package/src/duckdb/src/include/duckdb/common/operator/abs.hpp +11 -0
  471. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +7 -3
  472. package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +23 -1
  473. package/src/duckdb/src/include/duckdb/common/operator/double_cast_operator.hpp +2 -1
  474. package/src/duckdb/src/include/duckdb/common/operator/integer_cast_operator.hpp +1 -1
  475. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +4 -0
  476. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +2 -0
  477. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +10 -5
  478. package/src/duckdb/src/include/duckdb/common/optionally_owned_ptr.hpp +1 -0
  479. package/src/duckdb/src/include/duckdb/common/owning_string_map.hpp +155 -0
  480. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -3
  481. package/src/duckdb/src/include/duckdb/common/platform.hpp +58 -0
  482. package/src/duckdb/src/include/duckdb/common/radix.hpp +172 -27
  483. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +5 -1
  484. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +1 -0
  485. package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
  486. package/src/duckdb/src/include/duckdb/common/render_tree.hpp +77 -0
  487. package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +12 -0
  488. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +6 -2
  489. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +5 -3
  490. package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +15 -7
  491. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +3 -1
  492. package/src/duckdb/src/include/duckdb/common/serializer/serialization_data.hpp +245 -0
  493. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +10 -0
  494. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +10 -11
  495. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +12 -6
  496. package/src/duckdb/src/include/duckdb/common/string_util.hpp +37 -7
  497. package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +44 -0
  498. package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +44 -0
  499. package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +44 -0
  500. package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +119 -0
  501. package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +9 -123
  502. package/src/duckdb/src/include/duckdb/common/type_visitor.hpp +96 -0
  503. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +11 -1
  504. package/src/duckdb/src/include/duckdb/common/types/arrow_string_view_type.hpp +84 -0
  505. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +36 -1
  506. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +1 -1
  507. package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +2 -2
  508. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +4 -2
  509. package/src/duckdb/src/include/duckdb/common/types/column/partitioned_column_data.hpp +52 -0
  510. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -0
  511. package/src/duckdb/src/include/duckdb/common/types/date.hpp +0 -3
  512. package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +65 -0
  513. package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +5 -2
  514. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +49 -40
  515. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +5 -1
  516. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +2 -1
  517. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
  518. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +4 -3
  519. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +3 -1
  520. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
  521. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
  522. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +1 -1
  523. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +4 -0
  524. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +4 -1
  525. package/src/duckdb/src/include/duckdb/common/types/time.hpp +11 -6
  526. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +13 -3
  527. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +103 -12
  528. package/src/duckdb/src/include/duckdb/common/types/value.hpp +12 -3
  529. package/src/duckdb/src/include/duckdb/common/types/varint.hpp +107 -0
  530. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +5 -1
  531. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +7 -2
  532. package/src/duckdb/src/include/duckdb/common/types.hpp +6 -39
  533. package/src/duckdb/src/include/duckdb/common/union_by_name.hpp +42 -10
  534. package/src/duckdb/src/include/duckdb/common/vector_operations/generic_executor.hpp +29 -0
  535. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +0 -7
  536. package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +2 -0
  537. package/src/duckdb/src/include/duckdb/common/winapi.hpp +8 -0
  538. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +8 -4
  539. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/stddev.hpp +8 -4
  540. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +4 -2
  541. package/src/duckdb/src/include/duckdb/core_functions/aggregate/histogram_helpers.hpp +99 -0
  542. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +16 -7
  543. package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +396 -0
  544. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +10 -0
  545. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_helpers.hpp +65 -0
  546. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_sort_tree.hpp +349 -0
  547. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_state.hpp +300 -0
  548. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +1 -1
  549. package/src/duckdb/src/include/duckdb/core_functions/aggregate/sort_key_helpers.hpp +55 -0
  550. package/src/duckdb/src/include/duckdb/core_functions/array_kernels.hpp +107 -0
  551. package/src/duckdb/src/include/duckdb/core_functions/create_sort_key.hpp +55 -0
  552. package/src/duckdb/src/include/duckdb/core_functions/lambda_functions.hpp +1 -2
  553. package/src/duckdb/src/include/duckdb/core_functions/scalar/array_functions.hpp +24 -0
  554. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +9 -0
  555. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +27 -0
  556. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +80 -8
  557. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +9 -0
  558. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +54 -0
  559. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +30 -21
  560. package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +25 -14
  561. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +2 -48
  562. package/src/duckdb/src/include/duckdb/execution/executor.hpp +25 -2
  563. package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +102 -0
  564. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +94 -101
  565. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +43 -25
  566. package/src/duckdb/src/include/duckdb/execution/index/art/base_leaf.hpp +109 -0
  567. package/src/duckdb/src/include/duckdb/execution/index/art/base_node.hpp +140 -0
  568. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +43 -24
  569. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +41 -52
  570. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +133 -74
  571. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +46 -29
  572. package/src/duckdb/src/include/duckdb/execution/index/art/node256_leaf.hpp +53 -0
  573. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +52 -35
  574. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +96 -57
  575. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +9 -4
  576. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +48 -10
  577. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +0 -2
  578. package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +4 -2
  579. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +114 -36
  580. package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +158 -67
  581. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/aggregate_object.hpp +1 -1
  582. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
  583. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  584. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_streaming_window.hpp +19 -2
  585. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -1
  586. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +1 -1
  587. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +75 -0
  588. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +81 -23
  589. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +27 -8
  590. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +2 -1
  591. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +31 -22
  592. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +4 -2
  593. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +48 -5
  594. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +7 -3
  595. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +22 -12
  596. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +35 -0
  597. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +81 -39
  598. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +2 -1
  599. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +18 -1
  600. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +9 -7
  601. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +5 -4
  602. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/header_value.hpp +26 -0
  603. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +6 -9
  604. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/skip_scanner.hpp +3 -0
  605. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +5 -3
  606. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +36 -19
  607. package/src/duckdb/src/include/duckdb/execution/operator/filter/physical_filter.hpp +1 -1
  608. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +21 -0
  609. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +53 -0
  610. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_collector.hpp +3 -0
  611. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +6 -2
  612. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +18 -0
  613. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
  614. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +6 -0
  615. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +2 -2
  616. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set_variable.hpp +43 -0
  617. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +1 -1
  618. package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +59 -0
  619. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
  620. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_comparison_join.hpp +8 -1
  621. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +5 -2
  622. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +4 -2
  623. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -0
  624. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_join.hpp +1 -1
  625. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_left_delim_join.hpp +3 -1
  626. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +4 -1
  627. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_right_delim_join.hpp +3 -1
  628. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +1 -1
  629. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +1 -1
  630. package/src/duckdb/src/include/duckdb/execution/operator/persistent/batch_memory_manager.hpp +5 -37
  631. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +5 -4
  632. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +8 -2
  633. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +1 -1
  634. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -0
  635. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +9 -3
  636. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +8 -6
  637. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +2 -2
  638. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +1 -1
  639. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  640. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +21 -6
  641. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +3 -2
  642. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  643. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +137 -110
  644. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +57 -126
  645. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +21 -4
  646. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +1 -1
  647. package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +10 -10
  648. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +37 -7
  649. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +24 -11
  650. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +4 -4
  651. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +41 -1
  652. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +15 -5
  653. package/src/duckdb/src/include/duckdb/function/pragma/pragma_functions.hpp +1 -0
  654. package/src/duckdb/src/include/duckdb/function/replacement_scan.hpp +20 -4
  655. package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +6 -0
  656. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +77 -109
  657. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +1 -1
  658. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +6 -3
  659. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +25 -12
  660. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +9 -8
  661. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +38 -4
  662. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +1 -1
  663. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +11 -57
  664. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +142 -0
  665. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_datetime_type.hpp +18 -0
  666. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +7 -0
  667. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_variable_size_type.hpp +10 -0
  668. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
  669. package/src/duckdb/src/include/duckdb/function/table/range.hpp +4 -0
  670. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -1
  671. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  672. package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +5 -5
  673. package/src/duckdb/src/include/duckdb/function/table_function.hpp +14 -2
  674. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +1 -1
  675. package/src/duckdb/src/include/duckdb/main/appender.hpp +14 -4
  676. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +25 -7
  677. package/src/duckdb/src/include/duckdb/main/buffered_data/batched_buffered_data.hpp +79 -0
  678. package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +10 -20
  679. package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +11 -12
  680. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
  681. package/src/duckdb/src/include/duckdb/main/capi/cast/generic.hpp +1 -1
  682. package/src/duckdb/src/include/duckdb/main/capi/cast/utils.hpp +2 -2
  683. package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +809 -0
  684. package/src/duckdb/src/include/duckdb/main/chunk_scan_state/batched_data_collection.hpp +35 -0
  685. package/src/duckdb/src/include/duckdb/main/client_config.hpp +68 -2
  686. package/src/duckdb/src/include/duckdb/main/client_context.hpp +30 -22
  687. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +79 -1
  688. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +9 -3
  689. package/src/duckdb/src/include/duckdb/main/config.hpp +55 -7
  690. package/src/duckdb/src/include/duckdb/main/connection.hpp +5 -1
  691. package/src/duckdb/src/include/duckdb/main/database.hpp +16 -5
  692. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +9 -8
  693. package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +21 -6
  694. package/src/duckdb/src/include/duckdb/main/extension.hpp +20 -0
  695. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -0
  696. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +29 -23
  697. package/src/duckdb/src/include/duckdb/main/extension_install_info.hpp +6 -0
  698. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +3 -0
  699. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +4 -2
  700. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +5 -6
  701. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +2 -5
  702. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +87 -0
  703. package/src/duckdb/src/include/duckdb/main/profiling_node.hpp +60 -0
  704. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +72 -34
  705. package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
  706. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +2 -1
  707. package/src/duckdb/src/include/duckdb/main/relation/delim_get_relation.hpp +30 -0
  708. package/src/duckdb/src/include/duckdb/main/relation/explain_relation.hpp +3 -1
  709. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +3 -0
  710. package/src/duckdb/src/include/duckdb/main/relation/materialized_relation.hpp +1 -4
  711. package/src/duckdb/src/include/duckdb/main/relation/query_relation.hpp +4 -1
  712. package/src/duckdb/src/include/duckdb/main/relation/read_json_relation.hpp +6 -0
  713. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
  714. package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +2 -0
  715. package/src/duckdb/src/include/duckdb/main/relation.hpp +7 -4
  716. package/src/duckdb/src/include/duckdb/main/secret/default_secrets.hpp +36 -0
  717. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +108 -0
  718. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +14 -4
  719. package/src/duckdb/src/include/duckdb/main/settings.hpp +227 -3
  720. package/src/duckdb/src/include/duckdb/main/stream_query_result.hpp +8 -0
  721. package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +51 -0
  722. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +7 -0
  723. package/src/duckdb/src/include/duckdb/optimizer/cte_filter_pusher.hpp +46 -0
  724. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -1
  725. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +7 -0
  726. package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +31 -0
  727. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +51 -10
  728. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +1 -0
  729. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +17 -5
  730. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +1 -1
  731. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +15 -13
  732. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +9 -4
  733. package/src/duckdb/src/include/duckdb/optimizer/limit_pushdown.hpp +25 -0
  734. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +1 -0
  735. package/src/duckdb/src/include/duckdb/optimizer/rule/join_dependent_filter.hpp +37 -0
  736. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +6 -1
  737. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +54 -2
  738. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +27 -8
  739. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -0
  740. package/src/duckdb/src/include/duckdb/parallel/pipeline_prepare_finish_event.hpp +25 -0
  741. package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +63 -0
  742. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +10 -1
  743. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -1
  744. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +5 -0
  745. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +5 -0
  746. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +5 -0
  747. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +2 -0
  748. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +11 -1
  749. package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +9 -0
  750. package/src/duckdb/src/include/duckdb/parser/parsed_expression_iterator.hpp +13 -6
  751. package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +1 -1
  752. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -3
  753. package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +2 -0
  754. package/src/duckdb/src/include/duckdb/parser/statement/explain_statement.hpp +5 -1
  755. package/src/duckdb/src/include/duckdb/parser/statement/set_statement.hpp +2 -2
  756. package/src/duckdb/src/include/duckdb/parser/statement/transaction_statement.hpp +1 -1
  757. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +0 -2
  758. package/src/duckdb/src/include/duckdb/parser/tableref/column_data_ref.hpp +9 -7
  759. package/src/duckdb/src/include/duckdb/parser/tableref/delimgetref.hpp +37 -0
  760. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +4 -0
  761. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +0 -2
  762. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +0 -2
  763. package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +0 -1
  764. package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
  765. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +17 -9
  766. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -14
  767. package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +44 -0
  768. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +1 -1
  769. package/src/duckdb/src/include/duckdb/planner/expression/bound_between_expression.hpp +1 -1
  770. package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +1 -1
  771. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +1 -1
  772. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +1 -1
  773. package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +1 -1
  774. package/src/duckdb/src/include/duckdb/planner/expression/bound_conjunction_expression.hpp +1 -1
  775. package/src/duckdb/src/include/duckdb/planner/expression/bound_constant_expression.hpp +1 -1
  776. package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +1 -1
  777. package/src/duckdb/src/include/duckdb/planner/expression/bound_expanded_expression.hpp +1 -1
  778. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -1
  779. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +1 -1
  780. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +1 -1
  781. package/src/duckdb/src/include/duckdb/planner/expression/bound_operator_expression.hpp +1 -1
  782. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +2 -0
  783. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +1 -1
  784. package/src/duckdb/src/include/duckdb/planner/expression/bound_reference_expression.hpp +1 -1
  785. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +2 -2
  786. package/src/duckdb/src/include/duckdb/planner/expression/bound_unnest_expression.hpp +1 -1
  787. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +1 -1
  788. package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -2
  789. package/src/duckdb/src/include/duckdb/planner/expression_binder/column_alias_binder.hpp +2 -0
  790. package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +1 -0
  791. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +6 -5
  792. package/src/duckdb/src/include/duckdb/planner/expression_binder/where_binder.hpp +1 -0
  793. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +19 -11
  794. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
  795. package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
  796. package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
  797. package/src/duckdb/src/include/duckdb/planner/filter/struct_filter.hpp +2 -0
  798. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +7 -2
  799. package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +2 -1
  800. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -1
  801. package/src/duckdb/src/include/duckdb/planner/operator/logical_any_join.hpp +1 -1
  802. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +6 -1
  803. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +10 -2
  804. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +1 -0
  805. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -1
  806. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +1 -1
  807. package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -1
  808. package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +4 -2
  809. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +15 -5
  810. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +1 -0
  811. package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +1 -1
  812. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -1
  813. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +24 -2
  814. package/src/duckdb/src/include/duckdb/planner/tableref/bound_delimgetref.hpp +26 -0
  815. package/src/duckdb/src/include/duckdb/planner/tableref/bound_joinref.hpp +6 -0
  816. package/src/duckdb/src/include/duckdb/planner/tableref/bound_subqueryref.hpp +1 -1
  817. package/src/duckdb/src/include/duckdb/planner/tableref/bound_table_function.hpp +2 -0
  818. package/src/duckdb/src/include/duckdb/planner/tableref/list.hpp +2 -0
  819. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -1
  820. package/src/duckdb/src/include/duckdb/storage/block.hpp +4 -2
  821. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +48 -3
  822. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +21 -7
  823. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +65 -51
  824. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +14 -5
  825. package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +0 -4
  826. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +3 -2
  827. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +1 -0
  828. package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +3 -4
  829. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +2 -0
  830. package/src/duckdb/src/include/duckdb/storage/compression/alp/algorithm/alp.hpp +4 -4
  831. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +6 -4
  832. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +19 -17
  833. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +2 -2
  834. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_scan.hpp +3 -4
  835. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_utils.hpp +3 -2
  836. package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +3 -2
  837. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +13 -11
  838. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +19 -19
  839. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp +3 -4
  840. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  841. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  842. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +10 -2
  843. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +3 -2
  844. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +15 -0
  845. package/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +14 -10
  846. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +6 -8
  847. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +7 -4
  848. package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -7
  849. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +29 -4
  850. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +22 -7
  851. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +15 -2
  852. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +8 -2
  853. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +5 -16
  854. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +51 -13
  855. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +6 -3
  856. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +29 -19
  857. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +23 -7
  858. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +27 -18
  859. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +6 -3
  860. package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +5 -2
  861. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  862. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +5 -1
  863. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +77 -6
  864. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +23 -11
  865. package/src/duckdb/src/include/duckdb/storage/table/data_table_info.hpp +3 -0
  866. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +5 -2
  867. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +18 -4
  868. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +7 -1
  869. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
  870. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +89 -14
  871. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +4 -2
  872. package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +4 -2
  873. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +2 -2
  874. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -1
  875. package/src/duckdb/src/include/duckdb/storage/temporary_memory_manager.hpp +33 -15
  876. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +9 -9
  877. package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +3 -1
  878. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +4 -16
  879. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +27 -4
  880. package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +11 -0
  881. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +6 -2
  882. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +5 -5
  883. package/src/duckdb/src/include/duckdb/transaction/transaction_context.hpp +6 -2
  884. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +5 -3
  885. package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +48 -0
  886. package/src/duckdb/src/include/duckdb.h +1779 -739
  887. package/src/duckdb/src/include/duckdb_extension.h +921 -0
  888. package/src/duckdb/src/main/appender.cpp +53 -7
  889. package/src/duckdb/src/main/attached_database.cpp +87 -17
  890. package/src/duckdb/src/main/buffered_data/batched_buffered_data.cpp +226 -0
  891. package/src/duckdb/src/main/buffered_data/buffered_data.cpp +35 -0
  892. package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +48 -23
  893. package/src/duckdb/src/main/capi/aggregate_function-c.cpp +327 -0
  894. package/src/duckdb/src/main/capi/appender-c.cpp +18 -0
  895. package/src/duckdb/src/main/capi/cast/utils-c.cpp +2 -2
  896. package/src/duckdb/src/main/capi/cast_function-c.cpp +210 -0
  897. package/src/duckdb/src/main/capi/config-c.cpp +3 -3
  898. package/src/duckdb/src/main/capi/data_chunk-c.cpp +18 -7
  899. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +223 -24
  900. package/src/duckdb/src/main/capi/helper-c.cpp +51 -11
  901. package/src/duckdb/src/main/capi/logical_types-c.cpp +105 -46
  902. package/src/duckdb/src/main/capi/pending-c.cpp +7 -6
  903. package/src/duckdb/src/main/capi/prepared-c.cpp +18 -7
  904. package/src/duckdb/src/main/capi/profiling_info-c.cpp +84 -0
  905. package/src/duckdb/src/main/capi/result-c.cpp +139 -37
  906. package/src/duckdb/src/main/capi/scalar_function-c.cpp +269 -0
  907. package/src/duckdb/src/main/capi/table_description-c.cpp +82 -0
  908. package/src/duckdb/src/main/capi/table_function-c.cpp +161 -95
  909. package/src/duckdb/src/main/capi/value-c.cpp +2 -2
  910. package/src/duckdb/src/main/chunk_scan_state/batched_data_collection.cpp +57 -0
  911. package/src/duckdb/src/main/client_config.cpp +17 -0
  912. package/src/duckdb/src/main/client_context.cpp +67 -52
  913. package/src/duckdb/src/main/client_data.cpp +3 -3
  914. package/src/duckdb/src/main/config.cpp +120 -62
  915. package/src/duckdb/src/main/connection.cpp +14 -2
  916. package/src/duckdb/src/main/database.cpp +96 -35
  917. package/src/duckdb/src/main/database_manager.cpp +25 -23
  918. package/src/duckdb/src/main/database_path_and_type.cpp +2 -2
  919. package/src/duckdb/src/main/db_instance_cache.cpp +54 -19
  920. package/src/duckdb/src/main/extension/extension_helper.cpp +47 -42
  921. package/src/duckdb/src/main/extension/extension_install.cpp +155 -87
  922. package/src/duckdb/src/main/extension/extension_load.cpp +180 -26
  923. package/src/duckdb/src/main/extension/extension_util.cpp +8 -0
  924. package/src/duckdb/src/main/extension.cpp +72 -5
  925. package/src/duckdb/src/main/pending_query_result.cpp +20 -12
  926. package/src/duckdb/src/main/prepared_statement.cpp +6 -6
  927. package/src/duckdb/src/main/prepared_statement_data.cpp +28 -17
  928. package/src/duckdb/src/main/profiling_info.cpp +196 -0
  929. package/src/duckdb/src/main/query_profiler.cpp +413 -224
  930. package/src/duckdb/src/main/query_result.cpp +1 -1
  931. package/src/duckdb/src/main/relation/create_table_relation.cpp +4 -2
  932. package/src/duckdb/src/main/relation/create_view_relation.cpp +0 -6
  933. package/src/duckdb/src/main/relation/delim_get_relation.cpp +44 -0
  934. package/src/duckdb/src/main/relation/explain_relation.cpp +4 -3
  935. package/src/duckdb/src/main/relation/join_relation.cpp +5 -0
  936. package/src/duckdb/src/main/relation/limit_relation.cpp +1 -1
  937. package/src/duckdb/src/main/relation/materialized_relation.cpp +3 -3
  938. package/src/duckdb/src/main/relation/query_relation.cpp +42 -15
  939. package/src/duckdb/src/main/relation/read_csv_relation.cpp +7 -14
  940. package/src/duckdb/src/main/relation/read_json_relation.cpp +20 -0
  941. package/src/duckdb/src/main/relation/setop_relation.cpp +1 -1
  942. package/src/duckdb/src/main/relation/table_function_relation.cpp +6 -0
  943. package/src/duckdb/src/main/relation/view_relation.cpp +10 -0
  944. package/src/duckdb/src/main/relation.cpp +12 -8
  945. package/src/duckdb/src/main/secret/default_secrets.cpp +108 -0
  946. package/src/duckdb/src/main/secret/secret.cpp +145 -2
  947. package/src/duckdb/src/main/secret/secret_manager.cpp +85 -35
  948. package/src/duckdb/src/main/secret/secret_storage.cpp +29 -17
  949. package/src/duckdb/src/main/settings/settings.cpp +503 -11
  950. package/src/duckdb/src/main/stream_query_result.cpp +75 -2
  951. package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +248 -0
  952. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +28 -6
  953. package/src/duckdb/src/optimizer/compressed_materialization/compress_comparison_join.cpp +152 -0
  954. package/src/duckdb/src/optimizer/compressed_materialization.cpp +11 -1
  955. package/src/duckdb/src/optimizer/cse_optimizer.cpp +3 -0
  956. package/src/duckdb/src/optimizer/cte_filter_pusher.cpp +117 -0
  957. package/src/duckdb/src/optimizer/filter_combiner.cpp +30 -9
  958. package/src/duckdb/src/optimizer/filter_pullup.cpp +54 -2
  959. package/src/duckdb/src/optimizer/filter_pushdown.cpp +71 -3
  960. package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +154 -0
  961. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +245 -114
  962. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +42 -20
  963. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +6 -2
  964. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +32 -10
  965. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +97 -131
  966. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +265 -51
  967. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +21 -17
  968. package/src/duckdb/src/optimizer/limit_pushdown.cpp +42 -0
  969. package/src/duckdb/src/optimizer/optimizer.cpp +51 -8
  970. package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +17 -17
  971. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +22 -4
  972. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +1 -18
  973. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +6 -0
  974. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +4 -2
  975. package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +91 -0
  976. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +21 -25
  977. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +1 -0
  978. package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +3 -0
  979. package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +2 -2
  980. package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +8 -2
  981. package/src/duckdb/src/optimizer/rule/join_dependent_filter.cpp +135 -0
  982. package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +1 -1
  983. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +1 -1
  984. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +6 -1
  985. package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +7 -6
  986. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -1
  987. package/src/duckdb/src/optimizer/topn_optimizer.cpp +46 -7
  988. package/src/duckdb/src/parallel/executor.cpp +129 -51
  989. package/src/duckdb/src/parallel/executor_task.cpp +16 -3
  990. package/src/duckdb/src/parallel/meta_pipeline.cpp +98 -29
  991. package/src/duckdb/src/parallel/pipeline.cpp +17 -3
  992. package/src/duckdb/src/parallel/pipeline_executor.cpp +14 -2
  993. package/src/duckdb/src/parallel/pipeline_prepare_finish_event.cpp +34 -0
  994. package/src/duckdb/src/parallel/task_executor.cpp +84 -0
  995. package/src/duckdb/src/parallel/task_scheduler.cpp +94 -16
  996. package/src/duckdb/src/parallel/thread_context.cpp +1 -1
  997. package/src/duckdb/src/parser/expression/function_expression.cpp +14 -0
  998. package/src/duckdb/src/parser/expression/star_expression.cpp +35 -2
  999. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +5 -1
  1000. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +17 -0
  1001. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +37 -28
  1002. package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +44 -2
  1003. package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +21 -1
  1004. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +29 -25
  1005. package/src/duckdb/src/parser/parser.cpp +41 -1
  1006. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +1 -0
  1007. package/src/duckdb/src/parser/statement/explain_statement.cpp +28 -13
  1008. package/src/duckdb/src/parser/statement/relation_statement.cpp +5 -0
  1009. package/src/duckdb/src/parser/statement/set_statement.cpp +4 -2
  1010. package/src/duckdb/src/parser/statement/transaction_statement.cpp +3 -3
  1011. package/src/duckdb/src/parser/tableref/column_data_ref.cpp +1 -27
  1012. package/src/duckdb/src/parser/tableref/delimgetref.cpp +30 -0
  1013. package/src/duckdb/src/parser/tableref/joinref.cpp +4 -0
  1014. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +35 -29
  1015. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +32 -32
  1016. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +2 -1
  1017. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +17 -0
  1018. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +5 -0
  1019. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +36 -34
  1020. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +30 -14
  1021. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
  1022. package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +2 -1
  1023. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +27 -19
  1024. package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +31 -28
  1025. package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +25 -27
  1026. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +1 -1
  1027. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +53 -42
  1028. package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +6 -6
  1029. package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +1 -1
  1030. package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
  1031. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
  1032. package/src/duckdb/src/parser/transform/statement/transform_explain.cpp +38 -3
  1033. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +1 -2
  1034. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +1 -1
  1035. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +1 -1
  1036. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +26 -21
  1037. package/src/duckdb/src/parser/transform/statement/transform_set.cpp +8 -8
  1038. package/src/duckdb/src/parser/transform/statement/transform_show.cpp +5 -2
  1039. package/src/duckdb/src/parser/transform/statement/transform_show_select.cpp +6 -4
  1040. package/src/duckdb/src/parser/transform/statement/transform_transaction.cpp +27 -6
  1041. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +8 -9
  1042. package/src/duckdb/src/parser/transform/statement/transform_upsert.cpp +11 -12
  1043. package/src/duckdb/src/parser/transform/statement/transform_vacuum.cpp +3 -3
  1044. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +16 -10
  1045. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +1 -1
  1046. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  1047. package/src/duckdb/src/parser/transformer.cpp +11 -7
  1048. package/src/duckdb/src/planner/bind_context.cpp +3 -3
  1049. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +22 -7
  1050. package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +3 -3
  1051. package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +3 -2
  1052. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +11 -4
  1053. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +9 -54
  1054. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +3 -5
  1055. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -27
  1056. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +7 -7
  1057. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +9 -2
  1058. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +26 -7
  1059. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +5 -0
  1060. package/src/duckdb/src/planner/binder/expression/bind_unpacked_star_expression.cpp +91 -0
  1061. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +2 -2
  1062. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -8
  1063. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +1 -1
  1064. package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +6 -10
  1065. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +14 -10
  1066. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +3 -3
  1067. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +46 -7
  1068. package/src/duckdb/src/planner/binder/statement/bind_call.cpp +13 -20
  1069. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +105 -13
  1070. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +7 -3
  1071. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +75 -55
  1072. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -1
  1073. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +5 -4
  1074. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +2 -2
  1075. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +24 -8
  1076. package/src/duckdb/src/planner/binder/statement/bind_explain.cpp +2 -2
  1077. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +5 -105
  1078. package/src/duckdb/src/planner/binder/statement/bind_extension.cpp +2 -2
  1079. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +109 -41
  1080. package/src/duckdb/src/planner/binder/statement/bind_set.cpp +23 -7
  1081. package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +4 -1
  1082. package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +17 -3
  1083. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -4
  1084. package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +8 -6
  1085. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +55 -42
  1086. package/src/duckdb/src/planner/binder/tableref/bind_column_data_ref.cpp +3 -2
  1087. package/src/duckdb/src/planner/binder/tableref/bind_delimgetref.cpp +16 -0
  1088. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +31 -1
  1089. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +6 -0
  1090. package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +2 -0
  1091. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +106 -46
  1092. package/src/duckdb/src/planner/binder/tableref/plan_delimgetref.cpp +11 -0
  1093. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +15 -2
  1094. package/src/duckdb/src/planner/binder/tableref/plan_table_function.cpp +4 -0
  1095. package/src/duckdb/src/planner/binder.cpp +172 -15
  1096. package/src/duckdb/src/planner/collation_binding.cpp +99 -0
  1097. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -4
  1098. package/src/duckdb/src/planner/expression/bound_between_expression.cpp +1 -1
  1099. package/src/duckdb/src/planner/expression/bound_case_expression.cpp +1 -1
  1100. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +14 -12
  1101. package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +1 -1
  1102. package/src/duckdb/src/planner/expression/bound_comparison_expression.cpp +1 -1
  1103. package/src/duckdb/src/planner/expression/bound_conjunction_expression.cpp +1 -1
  1104. package/src/duckdb/src/planner/expression/bound_constant_expression.cpp +1 -1
  1105. package/src/duckdb/src/planner/expression/bound_expanded_expression.cpp +1 -1
  1106. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -2
  1107. package/src/duckdb/src/planner/expression/bound_lambda_expression.cpp +1 -1
  1108. package/src/duckdb/src/planner/expression/bound_lambdaref_expression.cpp +1 -1
  1109. package/src/duckdb/src/planner/expression/bound_operator_expression.cpp +1 -1
  1110. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +1 -1
  1111. package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +1 -1
  1112. package/src/duckdb/src/planner/expression/bound_subquery_expression.cpp +1 -1
  1113. package/src/duckdb/src/planner/expression/bound_unnest_expression.cpp +1 -1
  1114. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +6 -6
  1115. package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
  1116. package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -2
  1117. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +1 -1
  1118. package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +7 -0
  1119. package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +3 -3
  1120. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +26 -22
  1121. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +7 -1
  1122. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +2 -2
  1123. package/src/duckdb/src/planner/expression_binder/insert_binder.cpp +2 -2
  1124. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +2 -2
  1125. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +61 -43
  1126. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +2 -2
  1127. package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +4 -4
  1128. package/src/duckdb/src/planner/expression_binder/returning_binder.cpp +3 -2
  1129. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +10 -3
  1130. package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
  1131. package/src/duckdb/src/planner/expression_binder/where_binder.cpp +9 -2
  1132. package/src/duckdb/src/planner/expression_binder.cpp +121 -21
  1133. package/src/duckdb/src/planner/expression_iterator.cpp +26 -1
  1134. package/src/duckdb/src/planner/filter/conjunction_filter.cpp +33 -0
  1135. package/src/duckdb/src/planner/filter/constant_filter.cpp +15 -0
  1136. package/src/duckdb/src/planner/filter/null_filter.cpp +22 -0
  1137. package/src/duckdb/src/planner/filter/struct_filter.cpp +16 -0
  1138. package/src/duckdb/src/planner/logical_operator.cpp +24 -7
  1139. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +13 -7
  1140. package/src/duckdb/src/planner/operator/logical_any_join.cpp +5 -2
  1141. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +13 -5
  1142. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +64 -8
  1143. package/src/duckdb/src/planner/operator/logical_cteref.cpp +7 -0
  1144. package/src/duckdb/src/planner/operator/logical_distinct.cpp +6 -5
  1145. package/src/duckdb/src/planner/operator/logical_get.cpp +60 -18
  1146. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +7 -0
  1147. package/src/duckdb/src/planner/operator/logical_order.cpp +7 -4
  1148. package/src/duckdb/src/planner/operator/logical_top_n.cpp +2 -2
  1149. package/src/duckdb/src/planner/operator/logical_vacuum.cpp +1 -1
  1150. package/src/duckdb/src/planner/planner.cpp +2 -3
  1151. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +27 -10
  1152. package/src/duckdb/src/planner/table_filter.cpp +51 -0
  1153. package/src/duckdb/src/storage/arena_allocator.cpp +28 -10
  1154. package/src/duckdb/src/storage/block.cpp +3 -2
  1155. package/src/duckdb/src/storage/buffer/block_handle.cpp +29 -14
  1156. package/src/duckdb/src/storage/buffer/block_manager.cpp +6 -5
  1157. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +1 -1
  1158. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +264 -125
  1159. package/src/duckdb/src/storage/buffer_manager.cpp +5 -1
  1160. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +0 -6
  1161. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +26 -3
  1162. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +21 -9
  1163. package/src/duckdb/src/storage/checkpoint_manager.cpp +49 -24
  1164. package/src/duckdb/src/storage/compression/alp/alp.cpp +6 -11
  1165. package/src/duckdb/src/storage/compression/alprd.cpp +5 -9
  1166. package/src/duckdb/src/storage/compression/bitpacking.cpp +35 -31
  1167. package/src/duckdb/src/storage/compression/chimp/chimp.cpp +6 -8
  1168. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +71 -58
  1169. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +15 -13
  1170. package/src/duckdb/src/storage/compression/fsst.cpp +66 -53
  1171. package/src/duckdb/src/storage/compression/numeric_constant.cpp +4 -5
  1172. package/src/duckdb/src/storage/compression/patas.cpp +6 -17
  1173. package/src/duckdb/src/storage/compression/rle.cpp +20 -18
  1174. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +71 -52
  1175. package/src/duckdb/src/storage/compression/uncompressed.cpp +2 -2
  1176. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +8 -7
  1177. package/src/duckdb/src/storage/data_pointer.cpp +22 -0
  1178. package/src/duckdb/src/storage/data_table.cpp +41 -12
  1179. package/src/duckdb/src/storage/local_storage.cpp +22 -8
  1180. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +33 -17
  1181. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +4 -4
  1182. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +3 -3
  1183. package/src/duckdb/src/storage/partial_block_manager.cpp +19 -8
  1184. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +11 -8
  1185. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +1 -1
  1186. package/src/duckdb/src/storage/serialization/serialize_extension_install_info.cpp +2 -0
  1187. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +3 -3
  1188. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +19 -5
  1189. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +21 -1
  1190. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +4 -2
  1191. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +2 -2
  1192. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +2 -0
  1193. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +8 -4
  1194. package/src/duckdb/src/storage/serialization/serialize_types.cpp +4 -4
  1195. package/src/duckdb/src/storage/single_file_block_manager.cpp +170 -34
  1196. package/src/duckdb/src/storage/standard_buffer_manager.cpp +221 -64
  1197. package/src/duckdb/src/storage/statistics/column_statistics.cpp +4 -3
  1198. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +36 -26
  1199. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +4 -15
  1200. package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -8
  1201. package/src/duckdb/src/storage/statistics/struct_stats.cpp +2 -1
  1202. package/src/duckdb/src/storage/storage_info.cpp +34 -9
  1203. package/src/duckdb/src/storage/storage_manager.cpp +147 -74
  1204. package/src/duckdb/src/storage/table/array_column_data.cpp +37 -17
  1205. package/src/duckdb/src/storage/table/chunk_info.cpp +38 -0
  1206. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -6
  1207. package/src/duckdb/src/storage/table/column_data.cpp +252 -31
  1208. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -12
  1209. package/src/duckdb/src/storage/table/column_segment.cpp +63 -34
  1210. package/src/duckdb/src/storage/table/list_column_data.cpp +34 -15
  1211. package/src/duckdb/src/storage/table/row_group.cpp +228 -120
  1212. package/src/duckdb/src/storage/table/row_group_collection.cpp +122 -120
  1213. package/src/duckdb/src/storage/table/row_version_manager.cpp +27 -1
  1214. package/src/duckdb/src/storage/table/scan_state.cpp +101 -18
  1215. package/src/duckdb/src/storage/table/standard_column_data.cpp +20 -34
  1216. package/src/duckdb/src/storage/table/struct_column_data.cpp +39 -42
  1217. package/src/duckdb/src/storage/table/table_statistics.cpp +2 -1
  1218. package/src/duckdb/src/storage/table/update_segment.cpp +9 -8
  1219. package/src/duckdb/src/storage/table/validity_column_data.cpp +2 -2
  1220. package/src/duckdb/src/storage/table_index_list.cpp +8 -7
  1221. package/src/duckdb/src/storage/temporary_file_manager.cpp +11 -9
  1222. package/src/duckdb/src/storage/temporary_memory_manager.cpp +227 -39
  1223. package/src/duckdb/src/storage/wal_replay.cpp +68 -28
  1224. package/src/duckdb/src/storage/write_ahead_log.cpp +56 -47
  1225. package/src/duckdb/src/transaction/cleanup_state.cpp +9 -1
  1226. package/src/duckdb/src/transaction/commit_state.cpp +7 -170
  1227. package/src/duckdb/src/transaction/duck_transaction.cpp +87 -19
  1228. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +65 -10
  1229. package/src/duckdb/src/transaction/meta_transaction.cpp +18 -3
  1230. package/src/duckdb/src/transaction/transaction_context.cpp +21 -17
  1231. package/src/duckdb/src/transaction/undo_buffer.cpp +20 -14
  1232. package/src/duckdb/src/transaction/wal_write_state.cpp +292 -0
  1233. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +0 -1
  1234. package/src/duckdb/third_party/brotli/common/brotli_constants.h +204 -0
  1235. package/src/duckdb/third_party/brotli/common/brotli_platform.h +543 -0
  1236. package/src/duckdb/third_party/brotli/common/constants.cpp +17 -0
  1237. package/src/duckdb/third_party/brotli/common/context.cpp +156 -0
  1238. package/src/duckdb/third_party/brotli/common/context.h +110 -0
  1239. package/src/duckdb/third_party/brotli/common/dictionary.cpp +5912 -0
  1240. package/src/duckdb/third_party/brotli/common/dictionary.h +60 -0
  1241. package/src/duckdb/third_party/brotli/common/platform.cpp +24 -0
  1242. package/src/duckdb/third_party/brotli/common/shared_dictionary.cpp +517 -0
  1243. package/src/duckdb/third_party/brotli/common/shared_dictionary_internal.h +71 -0
  1244. package/src/duckdb/third_party/brotli/common/transform.cpp +287 -0
  1245. package/src/duckdb/third_party/brotli/common/transform.h +77 -0
  1246. package/src/duckdb/third_party/brotli/common/version.h +51 -0
  1247. package/src/duckdb/third_party/brotli/dec/bit_reader.cpp +74 -0
  1248. package/src/duckdb/third_party/brotli/dec/bit_reader.h +419 -0
  1249. package/src/duckdb/third_party/brotli/dec/decode.cpp +2758 -0
  1250. package/src/duckdb/third_party/brotli/dec/huffman.cpp +338 -0
  1251. package/src/duckdb/third_party/brotli/dec/huffman.h +118 -0
  1252. package/src/duckdb/third_party/brotli/dec/prefix.h +733 -0
  1253. package/src/duckdb/third_party/brotli/dec/state.cpp +178 -0
  1254. package/src/duckdb/third_party/brotli/dec/state.h +386 -0
  1255. package/src/duckdb/third_party/brotli/enc/backward_references.cpp +3775 -0
  1256. package/src/duckdb/third_party/brotli/enc/backward_references.h +36 -0
  1257. package/src/duckdb/third_party/brotli/enc/backward_references_hq.cpp +935 -0
  1258. package/src/duckdb/third_party/brotli/enc/backward_references_hq.h +92 -0
  1259. package/src/duckdb/third_party/brotli/enc/bit_cost.cpp +410 -0
  1260. package/src/duckdb/third_party/brotli/enc/bit_cost.h +60 -0
  1261. package/src/duckdb/third_party/brotli/enc/block_splitter.cpp +1653 -0
  1262. package/src/duckdb/third_party/brotli/enc/block_splitter.h +48 -0
  1263. package/src/duckdb/third_party/brotli/enc/brotli_bit_stream.cpp +1431 -0
  1264. package/src/duckdb/third_party/brotli/enc/brotli_bit_stream.h +85 -0
  1265. package/src/duckdb/third_party/brotli/enc/brotli_hash.h +4352 -0
  1266. package/src/duckdb/third_party/brotli/enc/brotli_params.h +47 -0
  1267. package/src/duckdb/third_party/brotli/enc/cluster.cpp +1025 -0
  1268. package/src/duckdb/third_party/brotli/enc/cluster.h +1017 -0
  1269. package/src/duckdb/third_party/brotli/enc/command.cpp +24 -0
  1270. package/src/duckdb/third_party/brotli/enc/command.h +187 -0
  1271. package/src/duckdb/third_party/brotli/enc/compound_dictionary.cpp +209 -0
  1272. package/src/duckdb/third_party/brotli/enc/compound_dictionary.h +75 -0
  1273. package/src/duckdb/third_party/brotli/enc/compress_fragment.cpp +796 -0
  1274. package/src/duckdb/third_party/brotli/enc/compress_fragment.h +82 -0
  1275. package/src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.cpp +653 -0
  1276. package/src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.h +68 -0
  1277. package/src/duckdb/third_party/brotli/enc/dictionary_hash.cpp +1844 -0
  1278. package/src/duckdb/third_party/brotli/enc/dictionary_hash.h +21 -0
  1279. package/src/duckdb/third_party/brotli/enc/encode.cpp +1990 -0
  1280. package/src/duckdb/third_party/brotli/enc/encoder_dict.cpp +636 -0
  1281. package/src/duckdb/third_party/brotli/enc/encoder_dict.h +153 -0
  1282. package/src/duckdb/third_party/brotli/enc/entropy_encode.cpp +500 -0
  1283. package/src/duckdb/third_party/brotli/enc/entropy_encode.h +119 -0
  1284. package/src/duckdb/third_party/brotli/enc/entropy_encode_static.h +538 -0
  1285. package/src/duckdb/third_party/brotli/enc/fast_log.cpp +101 -0
  1286. package/src/duckdb/third_party/brotli/enc/fast_log.h +63 -0
  1287. package/src/duckdb/third_party/brotli/enc/find_match_length.h +68 -0
  1288. package/src/duckdb/third_party/brotli/enc/histogram.cpp +96 -0
  1289. package/src/duckdb/third_party/brotli/enc/histogram.h +210 -0
  1290. package/src/duckdb/third_party/brotli/enc/literal_cost.cpp +176 -0
  1291. package/src/duckdb/third_party/brotli/enc/literal_cost.h +28 -0
  1292. package/src/duckdb/third_party/brotli/enc/memory.cpp +190 -0
  1293. package/src/duckdb/third_party/brotli/enc/memory.h +127 -0
  1294. package/src/duckdb/third_party/brotli/enc/metablock.cpp +1225 -0
  1295. package/src/duckdb/third_party/brotli/enc/metablock.h +102 -0
  1296. package/src/duckdb/third_party/brotli/enc/prefix.h +50 -0
  1297. package/src/duckdb/third_party/brotli/enc/quality.h +202 -0
  1298. package/src/duckdb/third_party/brotli/enc/ringbuffer.h +164 -0
  1299. package/src/duckdb/third_party/brotli/enc/state.h +106 -0
  1300. package/src/duckdb/third_party/brotli/enc/static_dict.cpp +538 -0
  1301. package/src/duckdb/third_party/brotli/enc/static_dict.h +37 -0
  1302. package/src/duckdb/third_party/brotli/enc/static_dict_lut.h +5862 -0
  1303. package/src/duckdb/third_party/brotli/enc/utf8_util.cpp +81 -0
  1304. package/src/duckdb/third_party/brotli/enc/utf8_util.h +29 -0
  1305. package/src/duckdb/third_party/brotli/enc/write_bits.h +84 -0
  1306. package/src/duckdb/third_party/brotli/include/brotli/decode.h +405 -0
  1307. package/src/duckdb/third_party/brotli/include/brotli/encode.h +489 -0
  1308. package/src/duckdb/third_party/brotli/include/brotli/port.h +238 -0
  1309. package/src/duckdb/third_party/brotli/include/brotli/shared_dictionary.h +96 -0
  1310. package/src/duckdb/third_party/brotli/include/brotli/types.h +83 -0
  1311. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +20 -4
  1312. package/src/duckdb/third_party/fmt/include/fmt/format.h +54 -10
  1313. package/src/duckdb/third_party/fsst/fsst.h +2 -2
  1314. package/src/duckdb/third_party/fsst/libfsst.hpp +2 -2
  1315. package/src/duckdb/third_party/httplib/httplib.hpp +6763 -5580
  1316. package/src/duckdb/third_party/hyperloglog/hyperloglog.cpp +13 -30
  1317. package/src/duckdb/third_party/hyperloglog/hyperloglog.hpp +8 -2
  1318. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +1 -0
  1319. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +22 -9
  1320. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1041 -554
  1321. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +1 -0
  1322. package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +2 -1
  1323. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +21605 -21752
  1324. package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +538 -299
  1325. package/src/duckdb/third_party/mbedtls/include/mbedtls/mbedtls_config.h +1 -0
  1326. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +36 -12
  1327. package/src/duckdb/third_party/mbedtls/library/md.cpp +6 -6
  1328. package/src/duckdb/third_party/mbedtls/library/sha1.cpp +2 -0
  1329. package/src/duckdb/third_party/mbedtls/library/sha256.cpp +3 -0
  1330. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +99 -47
  1331. package/src/duckdb/third_party/pcg/pcg_extras.hpp +1 -1
  1332. package/src/duckdb/third_party/re2/re2/prog.cc +2 -2
  1333. package/src/duckdb/third_party/snappy/snappy-internal.h +398 -0
  1334. package/src/duckdb/third_party/snappy/snappy-sinksource.cc +111 -9
  1335. package/src/duckdb/third_party/snappy/snappy-sinksource.h +158 -0
  1336. package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +523 -3
  1337. package/src/duckdb/third_party/snappy/snappy-stubs-public.h +34 -1
  1338. package/src/duckdb/third_party/snappy/snappy.cc +2626 -0
  1339. package/src/duckdb/third_party/snappy/snappy.h +223 -0
  1340. package/src/duckdb/third_party/snappy/snappy_version.hpp +11 -0
  1341. package/src/duckdb/third_party/utf8proc/include/utf8proc.hpp +69 -101
  1342. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +53 -0
  1343. package/src/duckdb/third_party/utf8proc/utf8proc.cpp +627 -678
  1344. package/src/duckdb/third_party/utf8proc/utf8proc_data.cpp +15008 -12868
  1345. package/src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp +185 -29
  1346. package/src/duckdb/ub_extension_json_json_functions.cpp +6 -0
  1347. package/src/duckdb/ub_src_catalog_default.cpp +4 -0
  1348. package/src/duckdb/ub_src_common.cpp +7 -1
  1349. package/src/duckdb/ub_src_common_arrow.cpp +10 -0
  1350. package/src/duckdb/ub_src_common_enums.cpp +2 -0
  1351. package/src/duckdb/ub_src_common_tree_renderer.cpp +10 -0
  1352. package/src/duckdb/ub_src_common_types.cpp +2 -0
  1353. package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +4 -0
  1354. package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +2 -0
  1355. package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +2 -0
  1356. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +2 -4
  1357. package/src/duckdb/ub_src_core_functions_scalar_map.cpp +2 -0
  1358. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +4 -0
  1359. package/src/duckdb/ub_src_execution_index_art.cpp +5 -3
  1360. package/src/duckdb/ub_src_execution_operator_csv_scanner_scanner.cpp +2 -0
  1361. package/src/duckdb/ub_src_execution_operator_helper.cpp +4 -0
  1362. package/src/duckdb/ub_src_function.cpp +4 -0
  1363. package/src/duckdb/ub_src_function_cast.cpp +2 -0
  1364. package/src/duckdb/ub_src_function_scalar_generic.cpp +4 -0
  1365. package/src/duckdb/ub_src_function_scalar_list.cpp +0 -2
  1366. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  1367. package/src/duckdb/ub_src_function_table.cpp +2 -0
  1368. package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
  1369. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  1370. package/src/duckdb/ub_src_main.cpp +4 -0
  1371. package/src/duckdb/ub_src_main_buffered_data.cpp +4 -0
  1372. package/src/duckdb/ub_src_main_capi.cpp +10 -0
  1373. package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
  1374. package/src/duckdb/ub_src_main_relation.cpp +2 -0
  1375. package/src/duckdb/ub_src_main_secret.cpp +2 -0
  1376. package/src/duckdb/ub_src_optimizer.cpp +8 -0
  1377. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +2 -0
  1378. package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
  1379. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  1380. package/src/duckdb/ub_src_parallel.cpp +4 -0
  1381. package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
  1382. package/src/duckdb/ub_src_planner.cpp +2 -0
  1383. package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
  1384. package/src/duckdb/ub_src_planner_binder_tableref.cpp +4 -0
  1385. package/src/duckdb/ub_src_storage_statistics.cpp +0 -2
  1386. package/src/duckdb/ub_src_transaction.cpp +2 -0
  1387. package/test/columns.test.ts +1 -1
  1388. package/test/prepare.test.ts +1 -1
  1389. package/test/test_all_types.test.ts +1 -1
@@ -0,0 +1,3775 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Function to find backward reference copies. */
8
+
9
+ #include "backward_references.h"
10
+
11
+ #include <brotli/types.h>
12
+
13
+ #include "../common/brotli_constants.h"
14
+ #include "../common/dictionary.h"
15
+ #include "../common/brotli_platform.h"
16
+ #include "command.h"
17
+ #include "compound_dictionary.h"
18
+ #include "dictionary_hash.h"
19
+ #include "encoder_dict.h"
20
+ #include "memory.h"
21
+ #include "quality.h"
22
+
23
+ using namespace duckdb_brotli;
24
+
25
+ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
26
+ size_t max_distance,
27
+ const int* dist_cache) {
28
+ if (distance <= max_distance) {
29
+ size_t distance_plus_3 = distance + 3;
30
+ size_t offset0 = distance_plus_3 - (size_t)dist_cache[0];
31
+ size_t offset1 = distance_plus_3 - (size_t)dist_cache[1];
32
+ if (distance == (size_t)dist_cache[0]) {
33
+ return 0;
34
+ } else if (distance == (size_t)dist_cache[1]) {
35
+ return 1;
36
+ } else if (offset0 < 7) {
37
+ return (0x9750468 >> (4 * offset0)) & 0xF;
38
+ } else if (offset1 < 7) {
39
+ return (0xFDB1ACE >> (4 * offset1)) & 0xF;
40
+ } else if (distance == (size_t)dist_cache[2]) {
41
+ return 2;
42
+ } else if (distance == (size_t)dist_cache[3]) {
43
+ return 3;
44
+ }
45
+ }
46
+ return distance + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
47
+ }
48
+
49
+ #define EXPAND_CAT(a, b) CAT(a, b)
50
+ #define CAT(a, b) a ## b
51
+ #define FN(X) EXPAND_CAT(X, HASHER())
52
+ #define EXPORT_FN(X) EXPAND_CAT(X, EXPAND_CAT(PREFIX(), HASHER()))
53
+
54
+ #define PREFIX() N
55
+ #define ENABLE_COMPOUND_DICTIONARY 0
56
+
57
+ #define HASHER() H2
58
+ /* NOLINTNEXTLINE(build/include) */
59
+ /* NOLINT(build/header_guard) */
60
+ /* Copyright 2013 Google Inc. All Rights Reserved.
61
+
62
+ Distributed under MIT license.
63
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
64
+ */
65
+
66
+ /* template parameters: EXPORT_FN, FN */
67
+
68
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
69
+ size_t num_bytes, size_t position,
70
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
71
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
72
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
73
+ Command* commands, size_t* num_commands, size_t* num_literals) {
74
+ HASHER()* privat = &hasher->privat.FN(_);
75
+ /* Set maximum distance, see section 9.1. of the spec. */
76
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
77
+ const size_t position_offset = params->stream_offset;
78
+
79
+ const Command* const orig_commands = commands;
80
+ size_t insert_length = *last_insert_len;
81
+ const size_t pos_end = position + num_bytes;
82
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
83
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
84
+
85
+ /* For speed up heuristics for random data. */
86
+ const size_t random_heuristics_window_size =
87
+ LiteralSpreeLengthForSparseSearch(params);
88
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
89
+ const size_t gap = params->dictionary.compound.total_size;
90
+
91
+ /* Minimum score to accept a backward reference. */
92
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
93
+
94
+ FN(PrepareDistanceCache)(privat, dist_cache);
95
+
96
+ while (position + FN(HashTypeLength)() < pos_end) {
97
+ size_t max_length = pos_end - position;
98
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
99
+ size_t dictionary_start = BROTLI_MIN(size_t,
100
+ position + position_offset, max_backward_limit);
101
+ HasherSearchResult sr;
102
+ int dict_id = 0;
103
+ uint8_t p1 = 0;
104
+ uint8_t p2 = 0;
105
+ if (params->dictionary.contextual.context_based) {
106
+ p1 = position >= 1 ?
107
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
108
+ p2 = position >= 2 ?
109
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
110
+ dict_id = params->dictionary.contextual.context_map[
111
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
112
+ }
113
+ sr.len = 0;
114
+ sr.len_code_delta = 0;
115
+ sr.distance = 0;
116
+ sr.score = kMinScore;
117
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
118
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
119
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
120
+ if (ENABLE_COMPOUND_DICTIONARY) {
121
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
122
+ ringbuffer_mask, dist_cache, position, max_length,
123
+ dictionary_start, params->dist.max_distance, &sr);
124
+ }
125
+ if (sr.score > kMinScore) {
126
+ /* Found a match. Let's look for something even better ahead. */
127
+ int delayed_backward_references_in_row = 0;
128
+ --max_length;
129
+ for (;; --max_length) {
130
+ const score_t cost_diff_lazy = 175;
131
+ HasherSearchResult sr2;
132
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
133
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
134
+ sr2.len_code_delta = 0;
135
+ sr2.distance = 0;
136
+ sr2.score = kMinScore;
137
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
138
+ dictionary_start = BROTLI_MIN(size_t,
139
+ position + 1 + position_offset, max_backward_limit);
140
+ if (params->dictionary.contextual.context_based) {
141
+ p2 = p1;
142
+ p1 = ringbuffer[position & ringbuffer_mask];
143
+ dict_id = params->dictionary.contextual.context_map[
144
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
145
+ }
146
+ FN(FindLongestMatch)(privat,
147
+ params->dictionary.contextual.dict[dict_id],
148
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
149
+ max_distance, dictionary_start + gap, params->dist.max_distance,
150
+ &sr2);
151
+ if (ENABLE_COMPOUND_DICTIONARY) {
152
+ LookupCompoundDictionaryMatch(
153
+ &params->dictionary.compound, ringbuffer,
154
+ ringbuffer_mask, dist_cache, position + 1, max_length,
155
+ dictionary_start, params->dist.max_distance, &sr2);
156
+ }
157
+ if (sr2.score >= sr.score + cost_diff_lazy) {
158
+ /* Ok, let's just write one byte for now and start a match from the
159
+ next byte. */
160
+ ++position;
161
+ ++insert_length;
162
+ sr = sr2;
163
+ if (++delayed_backward_references_in_row < 4 &&
164
+ position + FN(HashTypeLength)() < pos_end) {
165
+ continue;
166
+ }
167
+ }
168
+ break;
169
+ }
170
+ apply_random_heuristics =
171
+ position + 2 * sr.len + random_heuristics_window_size;
172
+ dictionary_start = BROTLI_MIN(size_t,
173
+ position + position_offset, max_backward_limit);
174
+ {
175
+ /* The first 16 codes are special short-codes,
176
+ and the minimum offset is 1. */
177
+ size_t distance_code = ComputeDistanceCode(
178
+ sr.distance, dictionary_start + gap, dist_cache);
179
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
180
+ dist_cache[3] = dist_cache[2];
181
+ dist_cache[2] = dist_cache[1];
182
+ dist_cache[1] = dist_cache[0];
183
+ dist_cache[0] = (int)sr.distance;
184
+ FN(PrepareDistanceCache)(privat, dist_cache);
185
+ }
186
+ InitCommand(commands++, &params->dist, insert_length,
187
+ sr.len, sr.len_code_delta, distance_code);
188
+ }
189
+ *num_literals += insert_length;
190
+ insert_length = 0;
191
+ /* Put the hash keys into the table, if there are enough bytes left.
192
+ Depending on the hasher implementation, it can push all positions
193
+ in the given range or only a subset of them.
194
+ Avoid hash poisoning with RLE data. */
195
+ {
196
+ size_t range_start = position + 2;
197
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
198
+ if (sr.distance < (sr.len >> 2)) {
199
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
200
+ range_start, position + sr.len - (sr.distance << 2)));
201
+ }
202
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
203
+ range_end);
204
+ }
205
+ position += sr.len;
206
+ } else {
207
+ ++insert_length;
208
+ ++position;
209
+ /* If we have not seen matches for a long time, we can skip some
210
+ match lookups. Unsuccessful match lookups are very very expensive
211
+ and this kind of a heuristic speeds up compression quite
212
+ a lot. */
213
+ if (position > apply_random_heuristics) {
214
+ /* Going through uncompressible data, jump. */
215
+ if (position >
216
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
217
+ /* It is quite a long time since we saw a copy, so we assume
218
+ that this data is not compressible, and store hashes less
219
+ often. Hashes of non compressible data are less likely to
220
+ turn out to be useful in the future, too, so we store less of
221
+ them to not to flood out the hash table of good compressible
222
+ data. */
223
+ const size_t kMargin =
224
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
225
+ size_t pos_jump =
226
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
227
+ for (; position < pos_jump; position += 4) {
228
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
229
+ insert_length += 4;
230
+ }
231
+ } else {
232
+ const size_t kMargin =
233
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
234
+ size_t pos_jump =
235
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
236
+ for (; position < pos_jump; position += 2) {
237
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
238
+ insert_length += 2;
239
+ }
240
+ }
241
+ }
242
+ }
243
+ }
244
+ insert_length += pos_end - position;
245
+ *last_insert_len = insert_length;
246
+ *num_commands += (size_t)(commands - orig_commands);
247
+ }
248
+ #undef HASHER
249
+
250
+ #define HASHER() H3
251
+ /* NOLINTNEXTLINE(build/include) */
252
+ /* NOLINT(build/header_guard) */
253
+ /* Copyright 2013 Google Inc. All Rights Reserved.
254
+
255
+ Distributed under MIT license.
256
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
257
+ */
258
+
259
+ /* template parameters: EXPORT_FN, FN */
260
+
261
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
262
+ size_t num_bytes, size_t position,
263
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
264
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
265
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
266
+ Command* commands, size_t* num_commands, size_t* num_literals) {
267
+ HASHER()* privat = &hasher->privat.FN(_);
268
+ /* Set maximum distance, see section 9.1. of the spec. */
269
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
270
+ const size_t position_offset = params->stream_offset;
271
+
272
+ const Command* const orig_commands = commands;
273
+ size_t insert_length = *last_insert_len;
274
+ const size_t pos_end = position + num_bytes;
275
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
276
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
277
+
278
+ /* For speed up heuristics for random data. */
279
+ const size_t random_heuristics_window_size =
280
+ LiteralSpreeLengthForSparseSearch(params);
281
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
282
+ const size_t gap = params->dictionary.compound.total_size;
283
+
284
+ /* Minimum score to accept a backward reference. */
285
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
286
+
287
+ FN(PrepareDistanceCache)(privat, dist_cache);
288
+
289
+ while (position + FN(HashTypeLength)() < pos_end) {
290
+ size_t max_length = pos_end - position;
291
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
292
+ size_t dictionary_start = BROTLI_MIN(size_t,
293
+ position + position_offset, max_backward_limit);
294
+ HasherSearchResult sr;
295
+ int dict_id = 0;
296
+ uint8_t p1 = 0;
297
+ uint8_t p2 = 0;
298
+ if (params->dictionary.contextual.context_based) {
299
+ p1 = position >= 1 ?
300
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
301
+ p2 = position >= 2 ?
302
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
303
+ dict_id = params->dictionary.contextual.context_map[
304
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
305
+ }
306
+ sr.len = 0;
307
+ sr.len_code_delta = 0;
308
+ sr.distance = 0;
309
+ sr.score = kMinScore;
310
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
311
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
312
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
313
+ if (ENABLE_COMPOUND_DICTIONARY) {
314
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
315
+ ringbuffer_mask, dist_cache, position, max_length,
316
+ dictionary_start, params->dist.max_distance, &sr);
317
+ }
318
+ if (sr.score > kMinScore) {
319
+ /* Found a match. Let's look for something even better ahead. */
320
+ int delayed_backward_references_in_row = 0;
321
+ --max_length;
322
+ for (;; --max_length) {
323
+ const score_t cost_diff_lazy = 175;
324
+ HasherSearchResult sr2;
325
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
326
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
327
+ sr2.len_code_delta = 0;
328
+ sr2.distance = 0;
329
+ sr2.score = kMinScore;
330
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
331
+ dictionary_start = BROTLI_MIN(size_t,
332
+ position + 1 + position_offset, max_backward_limit);
333
+ if (params->dictionary.contextual.context_based) {
334
+ p2 = p1;
335
+ p1 = ringbuffer[position & ringbuffer_mask];
336
+ dict_id = params->dictionary.contextual.context_map[
337
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
338
+ }
339
+ FN(FindLongestMatch)(privat,
340
+ params->dictionary.contextual.dict[dict_id],
341
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
342
+ max_distance, dictionary_start + gap, params->dist.max_distance,
343
+ &sr2);
344
+ if (ENABLE_COMPOUND_DICTIONARY) {
345
+ LookupCompoundDictionaryMatch(
346
+ &params->dictionary.compound, ringbuffer,
347
+ ringbuffer_mask, dist_cache, position + 1, max_length,
348
+ dictionary_start, params->dist.max_distance, &sr2);
349
+ }
350
+ if (sr2.score >= sr.score + cost_diff_lazy) {
351
+ /* Ok, let's just write one byte for now and start a match from the
352
+ next byte. */
353
+ ++position;
354
+ ++insert_length;
355
+ sr = sr2;
356
+ if (++delayed_backward_references_in_row < 4 &&
357
+ position + FN(HashTypeLength)() < pos_end) {
358
+ continue;
359
+ }
360
+ }
361
+ break;
362
+ }
363
+ apply_random_heuristics =
364
+ position + 2 * sr.len + random_heuristics_window_size;
365
+ dictionary_start = BROTLI_MIN(size_t,
366
+ position + position_offset, max_backward_limit);
367
+ {
368
+ /* The first 16 codes are special short-codes,
369
+ and the minimum offset is 1. */
370
+ size_t distance_code = ComputeDistanceCode(
371
+ sr.distance, dictionary_start + gap, dist_cache);
372
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
373
+ dist_cache[3] = dist_cache[2];
374
+ dist_cache[2] = dist_cache[1];
375
+ dist_cache[1] = dist_cache[0];
376
+ dist_cache[0] = (int)sr.distance;
377
+ FN(PrepareDistanceCache)(privat, dist_cache);
378
+ }
379
+ InitCommand(commands++, &params->dist, insert_length,
380
+ sr.len, sr.len_code_delta, distance_code);
381
+ }
382
+ *num_literals += insert_length;
383
+ insert_length = 0;
384
+ /* Put the hash keys into the table, if there are enough bytes left.
385
+ Depending on the hasher implementation, it can push all positions
386
+ in the given range or only a subset of them.
387
+ Avoid hash poisoning with RLE data. */
388
+ {
389
+ size_t range_start = position + 2;
390
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
391
+ if (sr.distance < (sr.len >> 2)) {
392
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
393
+ range_start, position + sr.len - (sr.distance << 2)));
394
+ }
395
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
396
+ range_end);
397
+ }
398
+ position += sr.len;
399
+ } else {
400
+ ++insert_length;
401
+ ++position;
402
+ /* If we have not seen matches for a long time, we can skip some
403
+ match lookups. Unsuccessful match lookups are very very expensive
404
+ and this kind of a heuristic speeds up compression quite
405
+ a lot. */
406
+ if (position > apply_random_heuristics) {
407
+ /* Going through uncompressible data, jump. */
408
+ if (position >
409
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
410
+ /* It is quite a long time since we saw a copy, so we assume
411
+ that this data is not compressible, and store hashes less
412
+ often. Hashes of non compressible data are less likely to
413
+ turn out to be useful in the future, too, so we store less of
414
+ them to not to flood out the hash table of good compressible
415
+ data. */
416
+ const size_t kMargin =
417
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
418
+ size_t pos_jump =
419
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
420
+ for (; position < pos_jump; position += 4) {
421
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
422
+ insert_length += 4;
423
+ }
424
+ } else {
425
+ const size_t kMargin =
426
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
427
+ size_t pos_jump =
428
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
429
+ for (; position < pos_jump; position += 2) {
430
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
431
+ insert_length += 2;
432
+ }
433
+ }
434
+ }
435
+ }
436
+ }
437
+ insert_length += pos_end - position;
438
+ *last_insert_len = insert_length;
439
+ *num_commands += (size_t)(commands - orig_commands);
440
+ }
441
+ #undef HASHER
442
+
443
+ #define HASHER() H4
444
+ /* NOLINTNEXTLINE(build/include) */
445
+ /* NOLINT(build/header_guard) */
446
+ /* Copyright 2013 Google Inc. All Rights Reserved.
447
+
448
+ Distributed under MIT license.
449
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
450
+ */
451
+
452
+ /* template parameters: EXPORT_FN, FN */
453
+
454
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
455
+ size_t num_bytes, size_t position,
456
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
457
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
458
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
459
+ Command* commands, size_t* num_commands, size_t* num_literals) {
460
+ HASHER()* privat = &hasher->privat.FN(_);
461
+ /* Set maximum distance, see section 9.1. of the spec. */
462
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
463
+ const size_t position_offset = params->stream_offset;
464
+
465
+ const Command* const orig_commands = commands;
466
+ size_t insert_length = *last_insert_len;
467
+ const size_t pos_end = position + num_bytes;
468
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
469
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
470
+
471
+ /* For speed up heuristics for random data. */
472
+ const size_t random_heuristics_window_size =
473
+ LiteralSpreeLengthForSparseSearch(params);
474
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
475
+ const size_t gap = params->dictionary.compound.total_size;
476
+
477
+ /* Minimum score to accept a backward reference. */
478
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
479
+
480
+ FN(PrepareDistanceCache)(privat, dist_cache);
481
+
482
+ while (position + FN(HashTypeLength)() < pos_end) {
483
+ size_t max_length = pos_end - position;
484
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
485
+ size_t dictionary_start = BROTLI_MIN(size_t,
486
+ position + position_offset, max_backward_limit);
487
+ HasherSearchResult sr;
488
+ int dict_id = 0;
489
+ uint8_t p1 = 0;
490
+ uint8_t p2 = 0;
491
+ if (params->dictionary.contextual.context_based) {
492
+ p1 = position >= 1 ?
493
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
494
+ p2 = position >= 2 ?
495
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
496
+ dict_id = params->dictionary.contextual.context_map[
497
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
498
+ }
499
+ sr.len = 0;
500
+ sr.len_code_delta = 0;
501
+ sr.distance = 0;
502
+ sr.score = kMinScore;
503
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
504
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
505
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
506
+ if (ENABLE_COMPOUND_DICTIONARY) {
507
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
508
+ ringbuffer_mask, dist_cache, position, max_length,
509
+ dictionary_start, params->dist.max_distance, &sr);
510
+ }
511
+ if (sr.score > kMinScore) {
512
+ /* Found a match. Let's look for something even better ahead. */
513
+ int delayed_backward_references_in_row = 0;
514
+ --max_length;
515
+ for (;; --max_length) {
516
+ const score_t cost_diff_lazy = 175;
517
+ HasherSearchResult sr2;
518
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
519
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
520
+ sr2.len_code_delta = 0;
521
+ sr2.distance = 0;
522
+ sr2.score = kMinScore;
523
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
524
+ dictionary_start = BROTLI_MIN(size_t,
525
+ position + 1 + position_offset, max_backward_limit);
526
+ if (params->dictionary.contextual.context_based) {
527
+ p2 = p1;
528
+ p1 = ringbuffer[position & ringbuffer_mask];
529
+ dict_id = params->dictionary.contextual.context_map[
530
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
531
+ }
532
+ FN(FindLongestMatch)(privat,
533
+ params->dictionary.contextual.dict[dict_id],
534
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
535
+ max_distance, dictionary_start + gap, params->dist.max_distance,
536
+ &sr2);
537
+ if (ENABLE_COMPOUND_DICTIONARY) {
538
+ LookupCompoundDictionaryMatch(
539
+ &params->dictionary.compound, ringbuffer,
540
+ ringbuffer_mask, dist_cache, position + 1, max_length,
541
+ dictionary_start, params->dist.max_distance, &sr2);
542
+ }
543
+ if (sr2.score >= sr.score + cost_diff_lazy) {
544
+ /* Ok, let's just write one byte for now and start a match from the
545
+ next byte. */
546
+ ++position;
547
+ ++insert_length;
548
+ sr = sr2;
549
+ if (++delayed_backward_references_in_row < 4 &&
550
+ position + FN(HashTypeLength)() < pos_end) {
551
+ continue;
552
+ }
553
+ }
554
+ break;
555
+ }
556
+ apply_random_heuristics =
557
+ position + 2 * sr.len + random_heuristics_window_size;
558
+ dictionary_start = BROTLI_MIN(size_t,
559
+ position + position_offset, max_backward_limit);
560
+ {
561
+ /* The first 16 codes are special short-codes,
562
+ and the minimum offset is 1. */
563
+ size_t distance_code = ComputeDistanceCode(
564
+ sr.distance, dictionary_start + gap, dist_cache);
565
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
566
+ dist_cache[3] = dist_cache[2];
567
+ dist_cache[2] = dist_cache[1];
568
+ dist_cache[1] = dist_cache[0];
569
+ dist_cache[0] = (int)sr.distance;
570
+ FN(PrepareDistanceCache)(privat, dist_cache);
571
+ }
572
+ InitCommand(commands++, &params->dist, insert_length,
573
+ sr.len, sr.len_code_delta, distance_code);
574
+ }
575
+ *num_literals += insert_length;
576
+ insert_length = 0;
577
+ /* Put the hash keys into the table, if there are enough bytes left.
578
+ Depending on the hasher implementation, it can push all positions
579
+ in the given range or only a subset of them.
580
+ Avoid hash poisoning with RLE data. */
581
+ {
582
+ size_t range_start = position + 2;
583
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
584
+ if (sr.distance < (sr.len >> 2)) {
585
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
586
+ range_start, position + sr.len - (sr.distance << 2)));
587
+ }
588
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
589
+ range_end);
590
+ }
591
+ position += sr.len;
592
+ } else {
593
+ ++insert_length;
594
+ ++position;
595
+ /* If we have not seen matches for a long time, we can skip some
596
+ match lookups. Unsuccessful match lookups are very very expensive
597
+ and this kind of a heuristic speeds up compression quite
598
+ a lot. */
599
+ if (position > apply_random_heuristics) {
600
+ /* Going through uncompressible data, jump. */
601
+ if (position >
602
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
603
+ /* It is quite a long time since we saw a copy, so we assume
604
+ that this data is not compressible, and store hashes less
605
+ often. Hashes of non compressible data are less likely to
606
+ turn out to be useful in the future, too, so we store less of
607
+ them to not to flood out the hash table of good compressible
608
+ data. */
609
+ const size_t kMargin =
610
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
611
+ size_t pos_jump =
612
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
613
+ for (; position < pos_jump; position += 4) {
614
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
615
+ insert_length += 4;
616
+ }
617
+ } else {
618
+ const size_t kMargin =
619
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
620
+ size_t pos_jump =
621
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
622
+ for (; position < pos_jump; position += 2) {
623
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
624
+ insert_length += 2;
625
+ }
626
+ }
627
+ }
628
+ }
629
+ }
630
+ insert_length += pos_end - position;
631
+ *last_insert_len = insert_length;
632
+ *num_commands += (size_t)(commands - orig_commands);
633
+ }
634
+ #undef HASHER
635
+
636
+ #define HASHER() H5
637
+ /* NOLINTNEXTLINE(build/include) */
638
+ /* NOLINT(build/header_guard) */
639
+ /* Copyright 2013 Google Inc. All Rights Reserved.
640
+
641
+ Distributed under MIT license.
642
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
643
+ */
644
+
645
+ /* template parameters: EXPORT_FN, FN */
646
+
647
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
648
+ size_t num_bytes, size_t position,
649
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
650
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
651
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
652
+ Command* commands, size_t* num_commands, size_t* num_literals) {
653
+ HASHER()* privat = &hasher->privat.FN(_);
654
+ /* Set maximum distance, see section 9.1. of the spec. */
655
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
656
+ const size_t position_offset = params->stream_offset;
657
+
658
+ const Command* const orig_commands = commands;
659
+ size_t insert_length = *last_insert_len;
660
+ const size_t pos_end = position + num_bytes;
661
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
662
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
663
+
664
+ /* For speed up heuristics for random data. */
665
+ const size_t random_heuristics_window_size =
666
+ LiteralSpreeLengthForSparseSearch(params);
667
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
668
+ const size_t gap = params->dictionary.compound.total_size;
669
+
670
+ /* Minimum score to accept a backward reference. */
671
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
672
+
673
+ FN(PrepareDistanceCache)(privat, dist_cache);
674
+
675
+ while (position + FN(HashTypeLength)() < pos_end) {
676
+ size_t max_length = pos_end - position;
677
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
678
+ size_t dictionary_start = BROTLI_MIN(size_t,
679
+ position + position_offset, max_backward_limit);
680
+ HasherSearchResult sr;
681
+ int dict_id = 0;
682
+ uint8_t p1 = 0;
683
+ uint8_t p2 = 0;
684
+ if (params->dictionary.contextual.context_based) {
685
+ p1 = position >= 1 ?
686
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
687
+ p2 = position >= 2 ?
688
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
689
+ dict_id = params->dictionary.contextual.context_map[
690
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
691
+ }
692
+ sr.len = 0;
693
+ sr.len_code_delta = 0;
694
+ sr.distance = 0;
695
+ sr.score = kMinScore;
696
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
697
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
698
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
699
+ if (ENABLE_COMPOUND_DICTIONARY) {
700
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
701
+ ringbuffer_mask, dist_cache, position, max_length,
702
+ dictionary_start, params->dist.max_distance, &sr);
703
+ }
704
+ if (sr.score > kMinScore) {
705
+ /* Found a match. Let's look for something even better ahead. */
706
+ int delayed_backward_references_in_row = 0;
707
+ --max_length;
708
+ for (;; --max_length) {
709
+ const score_t cost_diff_lazy = 175;
710
+ HasherSearchResult sr2;
711
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
712
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
713
+ sr2.len_code_delta = 0;
714
+ sr2.distance = 0;
715
+ sr2.score = kMinScore;
716
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
717
+ dictionary_start = BROTLI_MIN(size_t,
718
+ position + 1 + position_offset, max_backward_limit);
719
+ if (params->dictionary.contextual.context_based) {
720
+ p2 = p1;
721
+ p1 = ringbuffer[position & ringbuffer_mask];
722
+ dict_id = params->dictionary.contextual.context_map[
723
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
724
+ }
725
+ FN(FindLongestMatch)(privat,
726
+ params->dictionary.contextual.dict[dict_id],
727
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
728
+ max_distance, dictionary_start + gap, params->dist.max_distance,
729
+ &sr2);
730
+ if (ENABLE_COMPOUND_DICTIONARY) {
731
+ LookupCompoundDictionaryMatch(
732
+ &params->dictionary.compound, ringbuffer,
733
+ ringbuffer_mask, dist_cache, position + 1, max_length,
734
+ dictionary_start, params->dist.max_distance, &sr2);
735
+ }
736
+ if (sr2.score >= sr.score + cost_diff_lazy) {
737
+ /* Ok, let's just write one byte for now and start a match from the
738
+ next byte. */
739
+ ++position;
740
+ ++insert_length;
741
+ sr = sr2;
742
+ if (++delayed_backward_references_in_row < 4 &&
743
+ position + FN(HashTypeLength)() < pos_end) {
744
+ continue;
745
+ }
746
+ }
747
+ break;
748
+ }
749
+ apply_random_heuristics =
750
+ position + 2 * sr.len + random_heuristics_window_size;
751
+ dictionary_start = BROTLI_MIN(size_t,
752
+ position + position_offset, max_backward_limit);
753
+ {
754
+ /* The first 16 codes are special short-codes,
755
+ and the minimum offset is 1. */
756
+ size_t distance_code = ComputeDistanceCode(
757
+ sr.distance, dictionary_start + gap, dist_cache);
758
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
759
+ dist_cache[3] = dist_cache[2];
760
+ dist_cache[2] = dist_cache[1];
761
+ dist_cache[1] = dist_cache[0];
762
+ dist_cache[0] = (int)sr.distance;
763
+ FN(PrepareDistanceCache)(privat, dist_cache);
764
+ }
765
+ InitCommand(commands++, &params->dist, insert_length,
766
+ sr.len, sr.len_code_delta, distance_code);
767
+ }
768
+ *num_literals += insert_length;
769
+ insert_length = 0;
770
+ /* Put the hash keys into the table, if there are enough bytes left.
771
+ Depending on the hasher implementation, it can push all positions
772
+ in the given range or only a subset of them.
773
+ Avoid hash poisoning with RLE data. */
774
+ {
775
+ size_t range_start = position + 2;
776
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
777
+ if (sr.distance < (sr.len >> 2)) {
778
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
779
+ range_start, position + sr.len - (sr.distance << 2)));
780
+ }
781
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
782
+ range_end);
783
+ }
784
+ position += sr.len;
785
+ } else {
786
+ ++insert_length;
787
+ ++position;
788
+ /* If we have not seen matches for a long time, we can skip some
789
+ match lookups. Unsuccessful match lookups are very very expensive
790
+ and this kind of a heuristic speeds up compression quite
791
+ a lot. */
792
+ if (position > apply_random_heuristics) {
793
+ /* Going through uncompressible data, jump. */
794
+ if (position >
795
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
796
+ /* It is quite a long time since we saw a copy, so we assume
797
+ that this data is not compressible, and store hashes less
798
+ often. Hashes of non compressible data are less likely to
799
+ turn out to be useful in the future, too, so we store less of
800
+ them to not to flood out the hash table of good compressible
801
+ data. */
802
+ const size_t kMargin =
803
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
804
+ size_t pos_jump =
805
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
806
+ for (; position < pos_jump; position += 4) {
807
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
808
+ insert_length += 4;
809
+ }
810
+ } else {
811
+ const size_t kMargin =
812
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
813
+ size_t pos_jump =
814
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
815
+ for (; position < pos_jump; position += 2) {
816
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
817
+ insert_length += 2;
818
+ }
819
+ }
820
+ }
821
+ }
822
+ }
823
+ insert_length += pos_end - position;
824
+ *last_insert_len = insert_length;
825
+ *num_commands += (size_t)(commands - orig_commands);
826
+ }
827
+ #undef HASHER
828
+
829
+ #define HASHER() H6
830
+ /* NOLINTNEXTLINE(build/include) */
831
+ /* NOLINT(build/header_guard) */
832
+ /* Copyright 2013 Google Inc. All Rights Reserved.
833
+
834
+ Distributed under MIT license.
835
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
836
+ */
837
+
838
+ /* template parameters: EXPORT_FN, FN */
839
+
840
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
841
+ size_t num_bytes, size_t position,
842
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
843
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
844
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
845
+ Command* commands, size_t* num_commands, size_t* num_literals) {
846
+ HASHER()* privat = &hasher->privat.FN(_);
847
+ /* Set maximum distance, see section 9.1. of the spec. */
848
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
849
+ const size_t position_offset = params->stream_offset;
850
+
851
+ const Command* const orig_commands = commands;
852
+ size_t insert_length = *last_insert_len;
853
+ const size_t pos_end = position + num_bytes;
854
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
855
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
856
+
857
+ /* For speed up heuristics for random data. */
858
+ const size_t random_heuristics_window_size =
859
+ LiteralSpreeLengthForSparseSearch(params);
860
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
861
+ const size_t gap = params->dictionary.compound.total_size;
862
+
863
+ /* Minimum score to accept a backward reference. */
864
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
865
+
866
+ FN(PrepareDistanceCache)(privat, dist_cache);
867
+
868
+ while (position + FN(HashTypeLength)() < pos_end) {
869
+ size_t max_length = pos_end - position;
870
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
871
+ size_t dictionary_start = BROTLI_MIN(size_t,
872
+ position + position_offset, max_backward_limit);
873
+ HasherSearchResult sr;
874
+ int dict_id = 0;
875
+ uint8_t p1 = 0;
876
+ uint8_t p2 = 0;
877
+ if (params->dictionary.contextual.context_based) {
878
+ p1 = position >= 1 ?
879
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
880
+ p2 = position >= 2 ?
881
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
882
+ dict_id = params->dictionary.contextual.context_map[
883
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
884
+ }
885
+ sr.len = 0;
886
+ sr.len_code_delta = 0;
887
+ sr.distance = 0;
888
+ sr.score = kMinScore;
889
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
890
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
891
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
892
+ if (ENABLE_COMPOUND_DICTIONARY) {
893
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
894
+ ringbuffer_mask, dist_cache, position, max_length,
895
+ dictionary_start, params->dist.max_distance, &sr);
896
+ }
897
+ if (sr.score > kMinScore) {
898
+ /* Found a match. Let's look for something even better ahead. */
899
+ int delayed_backward_references_in_row = 0;
900
+ --max_length;
901
+ for (;; --max_length) {
902
+ const score_t cost_diff_lazy = 175;
903
+ HasherSearchResult sr2;
904
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
905
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
906
+ sr2.len_code_delta = 0;
907
+ sr2.distance = 0;
908
+ sr2.score = kMinScore;
909
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
910
+ dictionary_start = BROTLI_MIN(size_t,
911
+ position + 1 + position_offset, max_backward_limit);
912
+ if (params->dictionary.contextual.context_based) {
913
+ p2 = p1;
914
+ p1 = ringbuffer[position & ringbuffer_mask];
915
+ dict_id = params->dictionary.contextual.context_map[
916
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
917
+ }
918
+ FN(FindLongestMatch)(privat,
919
+ params->dictionary.contextual.dict[dict_id],
920
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
921
+ max_distance, dictionary_start + gap, params->dist.max_distance,
922
+ &sr2);
923
+ if (ENABLE_COMPOUND_DICTIONARY) {
924
+ LookupCompoundDictionaryMatch(
925
+ &params->dictionary.compound, ringbuffer,
926
+ ringbuffer_mask, dist_cache, position + 1, max_length,
927
+ dictionary_start, params->dist.max_distance, &sr2);
928
+ }
929
+ if (sr2.score >= sr.score + cost_diff_lazy) {
930
+ /* Ok, let's just write one byte for now and start a match from the
931
+ next byte. */
932
+ ++position;
933
+ ++insert_length;
934
+ sr = sr2;
935
+ if (++delayed_backward_references_in_row < 4 &&
936
+ position + FN(HashTypeLength)() < pos_end) {
937
+ continue;
938
+ }
939
+ }
940
+ break;
941
+ }
942
+ apply_random_heuristics =
943
+ position + 2 * sr.len + random_heuristics_window_size;
944
+ dictionary_start = BROTLI_MIN(size_t,
945
+ position + position_offset, max_backward_limit);
946
+ {
947
+ /* The first 16 codes are special short-codes,
948
+ and the minimum offset is 1. */
949
+ size_t distance_code = ComputeDistanceCode(
950
+ sr.distance, dictionary_start + gap, dist_cache);
951
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
952
+ dist_cache[3] = dist_cache[2];
953
+ dist_cache[2] = dist_cache[1];
954
+ dist_cache[1] = dist_cache[0];
955
+ dist_cache[0] = (int)sr.distance;
956
+ FN(PrepareDistanceCache)(privat, dist_cache);
957
+ }
958
+ InitCommand(commands++, &params->dist, insert_length,
959
+ sr.len, sr.len_code_delta, distance_code);
960
+ }
961
+ *num_literals += insert_length;
962
+ insert_length = 0;
963
+ /* Put the hash keys into the table, if there are enough bytes left.
964
+ Depending on the hasher implementation, it can push all positions
965
+ in the given range or only a subset of them.
966
+ Avoid hash poisoning with RLE data. */
967
+ {
968
+ size_t range_start = position + 2;
969
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
970
+ if (sr.distance < (sr.len >> 2)) {
971
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
972
+ range_start, position + sr.len - (sr.distance << 2)));
973
+ }
974
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
975
+ range_end);
976
+ }
977
+ position += sr.len;
978
+ } else {
979
+ ++insert_length;
980
+ ++position;
981
+ /* If we have not seen matches for a long time, we can skip some
982
+ match lookups. Unsuccessful match lookups are very very expensive
983
+ and this kind of a heuristic speeds up compression quite
984
+ a lot. */
985
+ if (position > apply_random_heuristics) {
986
+ /* Going through uncompressible data, jump. */
987
+ if (position >
988
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
989
+ /* It is quite a long time since we saw a copy, so we assume
990
+ that this data is not compressible, and store hashes less
991
+ often. Hashes of non compressible data are less likely to
992
+ turn out to be useful in the future, too, so we store less of
993
+ them to not to flood out the hash table of good compressible
994
+ data. */
995
+ const size_t kMargin =
996
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
997
+ size_t pos_jump =
998
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
999
+ for (; position < pos_jump; position += 4) {
1000
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
1001
+ insert_length += 4;
1002
+ }
1003
+ } else {
1004
+ const size_t kMargin =
1005
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
1006
+ size_t pos_jump =
1007
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
1008
+ for (; position < pos_jump; position += 2) {
1009
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
1010
+ insert_length += 2;
1011
+ }
1012
+ }
1013
+ }
1014
+ }
1015
+ }
1016
+ insert_length += pos_end - position;
1017
+ *last_insert_len = insert_length;
1018
+ *num_commands += (size_t)(commands - orig_commands);
1019
+ }
1020
+ #undef HASHER
1021
+
1022
+ #define HASHER() H40
1023
+ /* NOLINTNEXTLINE(build/include) */
1024
+ /* NOLINT(build/header_guard) */
1025
+ /* Copyright 2013 Google Inc. All Rights Reserved.
1026
+
1027
+ Distributed under MIT license.
1028
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
1029
+ */
1030
+
1031
+ /* template parameters: EXPORT_FN, FN */
1032
+
1033
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
1034
+ size_t num_bytes, size_t position,
1035
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
1036
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
1037
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
1038
+ Command* commands, size_t* num_commands, size_t* num_literals) {
1039
+ HASHER()* privat = &hasher->privat.FN(_);
1040
+ /* Set maximum distance, see section 9.1. of the spec. */
1041
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
1042
+ const size_t position_offset = params->stream_offset;
1043
+
1044
+ const Command* const orig_commands = commands;
1045
+ size_t insert_length = *last_insert_len;
1046
+ const size_t pos_end = position + num_bytes;
1047
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
1048
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
1049
+
1050
+ /* For speed up heuristics for random data. */
1051
+ const size_t random_heuristics_window_size =
1052
+ LiteralSpreeLengthForSparseSearch(params);
1053
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
1054
+ const size_t gap = params->dictionary.compound.total_size;
1055
+
1056
+ /* Minimum score to accept a backward reference. */
1057
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
1058
+
1059
+ FN(PrepareDistanceCache)(privat, dist_cache);
1060
+
1061
+ while (position + FN(HashTypeLength)() < pos_end) {
1062
+ size_t max_length = pos_end - position;
1063
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
1064
+ size_t dictionary_start = BROTLI_MIN(size_t,
1065
+ position + position_offset, max_backward_limit);
1066
+ HasherSearchResult sr;
1067
+ int dict_id = 0;
1068
+ uint8_t p1 = 0;
1069
+ uint8_t p2 = 0;
1070
+ if (params->dictionary.contextual.context_based) {
1071
+ p1 = position >= 1 ?
1072
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
1073
+ p2 = position >= 2 ?
1074
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
1075
+ dict_id = params->dictionary.contextual.context_map[
1076
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
1077
+ }
1078
+ sr.len = 0;
1079
+ sr.len_code_delta = 0;
1080
+ sr.distance = 0;
1081
+ sr.score = kMinScore;
1082
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
1083
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
1084
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
1085
+ if (ENABLE_COMPOUND_DICTIONARY) {
1086
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
1087
+ ringbuffer_mask, dist_cache, position, max_length,
1088
+ dictionary_start, params->dist.max_distance, &sr);
1089
+ }
1090
+ if (sr.score > kMinScore) {
1091
+ /* Found a match. Let's look for something even better ahead. */
1092
+ int delayed_backward_references_in_row = 0;
1093
+ --max_length;
1094
+ for (;; --max_length) {
1095
+ const score_t cost_diff_lazy = 175;
1096
+ HasherSearchResult sr2;
1097
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
1098
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
1099
+ sr2.len_code_delta = 0;
1100
+ sr2.distance = 0;
1101
+ sr2.score = kMinScore;
1102
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
1103
+ dictionary_start = BROTLI_MIN(size_t,
1104
+ position + 1 + position_offset, max_backward_limit);
1105
+ if (params->dictionary.contextual.context_based) {
1106
+ p2 = p1;
1107
+ p1 = ringbuffer[position & ringbuffer_mask];
1108
+ dict_id = params->dictionary.contextual.context_map[
1109
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
1110
+ }
1111
+ FN(FindLongestMatch)(privat,
1112
+ params->dictionary.contextual.dict[dict_id],
1113
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
1114
+ max_distance, dictionary_start + gap, params->dist.max_distance,
1115
+ &sr2);
1116
+ if (ENABLE_COMPOUND_DICTIONARY) {
1117
+ LookupCompoundDictionaryMatch(
1118
+ &params->dictionary.compound, ringbuffer,
1119
+ ringbuffer_mask, dist_cache, position + 1, max_length,
1120
+ dictionary_start, params->dist.max_distance, &sr2);
1121
+ }
1122
+ if (sr2.score >= sr.score + cost_diff_lazy) {
1123
+ /* Ok, let's just write one byte for now and start a match from the
1124
+ next byte. */
1125
+ ++position;
1126
+ ++insert_length;
1127
+ sr = sr2;
1128
+ if (++delayed_backward_references_in_row < 4 &&
1129
+ position + FN(HashTypeLength)() < pos_end) {
1130
+ continue;
1131
+ }
1132
+ }
1133
+ break;
1134
+ }
1135
+ apply_random_heuristics =
1136
+ position + 2 * sr.len + random_heuristics_window_size;
1137
+ dictionary_start = BROTLI_MIN(size_t,
1138
+ position + position_offset, max_backward_limit);
1139
+ {
1140
+ /* The first 16 codes are special short-codes,
1141
+ and the minimum offset is 1. */
1142
+ size_t distance_code = ComputeDistanceCode(
1143
+ sr.distance, dictionary_start + gap, dist_cache);
1144
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
1145
+ dist_cache[3] = dist_cache[2];
1146
+ dist_cache[2] = dist_cache[1];
1147
+ dist_cache[1] = dist_cache[0];
1148
+ dist_cache[0] = (int)sr.distance;
1149
+ FN(PrepareDistanceCache)(privat, dist_cache);
1150
+ }
1151
+ InitCommand(commands++, &params->dist, insert_length,
1152
+ sr.len, sr.len_code_delta, distance_code);
1153
+ }
1154
+ *num_literals += insert_length;
1155
+ insert_length = 0;
1156
+ /* Put the hash keys into the table, if there are enough bytes left.
1157
+ Depending on the hasher implementation, it can push all positions
1158
+ in the given range or only a subset of them.
1159
+ Avoid hash poisoning with RLE data. */
1160
+ {
1161
+ size_t range_start = position + 2;
1162
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
1163
+ if (sr.distance < (sr.len >> 2)) {
1164
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
1165
+ range_start, position + sr.len - (sr.distance << 2)));
1166
+ }
1167
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
1168
+ range_end);
1169
+ }
1170
+ position += sr.len;
1171
+ } else {
1172
+ ++insert_length;
1173
+ ++position;
1174
+ /* If we have not seen matches for a long time, we can skip some
1175
+ match lookups. Unsuccessful match lookups are very very expensive
1176
+ and this kind of a heuristic speeds up compression quite
1177
+ a lot. */
1178
+ if (position > apply_random_heuristics) {
1179
+ /* Going through uncompressible data, jump. */
1180
+ if (position >
1181
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
1182
+ /* It is quite a long time since we saw a copy, so we assume
1183
+ that this data is not compressible, and store hashes less
1184
+ often. Hashes of non compressible data are less likely to
1185
+ turn out to be useful in the future, too, so we store less of
1186
+ them to not to flood out the hash table of good compressible
1187
+ data. */
1188
+ const size_t kMargin =
1189
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
1190
+ size_t pos_jump =
1191
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
1192
+ for (; position < pos_jump; position += 4) {
1193
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
1194
+ insert_length += 4;
1195
+ }
1196
+ } else {
1197
+ const size_t kMargin =
1198
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
1199
+ size_t pos_jump =
1200
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
1201
+ for (; position < pos_jump; position += 2) {
1202
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
1203
+ insert_length += 2;
1204
+ }
1205
+ }
1206
+ }
1207
+ }
1208
+ }
1209
+ insert_length += pos_end - position;
1210
+ *last_insert_len = insert_length;
1211
+ *num_commands += (size_t)(commands - orig_commands);
1212
+ }
1213
+ #undef HASHER
1214
+
1215
+ #define HASHER() H41
1216
+ /* NOLINTNEXTLINE(build/include) */
1217
+ /* NOLINT(build/header_guard) */
1218
+ /* Copyright 2013 Google Inc. All Rights Reserved.
1219
+
1220
+ Distributed under MIT license.
1221
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
1222
+ */
1223
+
1224
+ /* template parameters: EXPORT_FN, FN */
1225
+
1226
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
1227
+ size_t num_bytes, size_t position,
1228
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
1229
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
1230
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
1231
+ Command* commands, size_t* num_commands, size_t* num_literals) {
1232
+ HASHER()* privat = &hasher->privat.FN(_);
1233
+ /* Set maximum distance, see section 9.1. of the spec. */
1234
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
1235
+ const size_t position_offset = params->stream_offset;
1236
+
1237
+ const Command* const orig_commands = commands;
1238
+ size_t insert_length = *last_insert_len;
1239
+ const size_t pos_end = position + num_bytes;
1240
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
1241
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
1242
+
1243
+ /* For speed up heuristics for random data. */
1244
+ const size_t random_heuristics_window_size =
1245
+ LiteralSpreeLengthForSparseSearch(params);
1246
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
1247
+ const size_t gap = params->dictionary.compound.total_size;
1248
+
1249
+ /* Minimum score to accept a backward reference. */
1250
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
1251
+
1252
+ FN(PrepareDistanceCache)(privat, dist_cache);
1253
+
1254
+ while (position + FN(HashTypeLength)() < pos_end) {
1255
+ size_t max_length = pos_end - position;
1256
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
1257
+ size_t dictionary_start = BROTLI_MIN(size_t,
1258
+ position + position_offset, max_backward_limit);
1259
+ HasherSearchResult sr;
1260
+ int dict_id = 0;
1261
+ uint8_t p1 = 0;
1262
+ uint8_t p2 = 0;
1263
+ if (params->dictionary.contextual.context_based) {
1264
+ p1 = position >= 1 ?
1265
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
1266
+ p2 = position >= 2 ?
1267
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
1268
+ dict_id = params->dictionary.contextual.context_map[
1269
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
1270
+ }
1271
+ sr.len = 0;
1272
+ sr.len_code_delta = 0;
1273
+ sr.distance = 0;
1274
+ sr.score = kMinScore;
1275
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
1276
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
1277
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
1278
+ if (ENABLE_COMPOUND_DICTIONARY) {
1279
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
1280
+ ringbuffer_mask, dist_cache, position, max_length,
1281
+ dictionary_start, params->dist.max_distance, &sr);
1282
+ }
1283
+ if (sr.score > kMinScore) {
1284
+ /* Found a match. Let's look for something even better ahead. */
1285
+ int delayed_backward_references_in_row = 0;
1286
+ --max_length;
1287
+ for (;; --max_length) {
1288
+ const score_t cost_diff_lazy = 175;
1289
+ HasherSearchResult sr2;
1290
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
1291
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
1292
+ sr2.len_code_delta = 0;
1293
+ sr2.distance = 0;
1294
+ sr2.score = kMinScore;
1295
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
1296
+ dictionary_start = BROTLI_MIN(size_t,
1297
+ position + 1 + position_offset, max_backward_limit);
1298
+ if (params->dictionary.contextual.context_based) {
1299
+ p2 = p1;
1300
+ p1 = ringbuffer[position & ringbuffer_mask];
1301
+ dict_id = params->dictionary.contextual.context_map[
1302
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
1303
+ }
1304
+ FN(FindLongestMatch)(privat,
1305
+ params->dictionary.contextual.dict[dict_id],
1306
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
1307
+ max_distance, dictionary_start + gap, params->dist.max_distance,
1308
+ &sr2);
1309
+ if (ENABLE_COMPOUND_DICTIONARY) {
1310
+ LookupCompoundDictionaryMatch(
1311
+ &params->dictionary.compound, ringbuffer,
1312
+ ringbuffer_mask, dist_cache, position + 1, max_length,
1313
+ dictionary_start, params->dist.max_distance, &sr2);
1314
+ }
1315
+ if (sr2.score >= sr.score + cost_diff_lazy) {
1316
+ /* Ok, let's just write one byte for now and start a match from the
1317
+ next byte. */
1318
+ ++position;
1319
+ ++insert_length;
1320
+ sr = sr2;
1321
+ if (++delayed_backward_references_in_row < 4 &&
1322
+ position + FN(HashTypeLength)() < pos_end) {
1323
+ continue;
1324
+ }
1325
+ }
1326
+ break;
1327
+ }
1328
+ apply_random_heuristics =
1329
+ position + 2 * sr.len + random_heuristics_window_size;
1330
+ dictionary_start = BROTLI_MIN(size_t,
1331
+ position + position_offset, max_backward_limit);
1332
+ {
1333
+ /* The first 16 codes are special short-codes,
1334
+ and the minimum offset is 1. */
1335
+ size_t distance_code = ComputeDistanceCode(
1336
+ sr.distance, dictionary_start + gap, dist_cache);
1337
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
1338
+ dist_cache[3] = dist_cache[2];
1339
+ dist_cache[2] = dist_cache[1];
1340
+ dist_cache[1] = dist_cache[0];
1341
+ dist_cache[0] = (int)sr.distance;
1342
+ FN(PrepareDistanceCache)(privat, dist_cache);
1343
+ }
1344
+ InitCommand(commands++, &params->dist, insert_length,
1345
+ sr.len, sr.len_code_delta, distance_code);
1346
+ }
1347
+ *num_literals += insert_length;
1348
+ insert_length = 0;
1349
+ /* Put the hash keys into the table, if there are enough bytes left.
1350
+ Depending on the hasher implementation, it can push all positions
1351
+ in the given range or only a subset of them.
1352
+ Avoid hash poisoning with RLE data. */
1353
+ {
1354
+ size_t range_start = position + 2;
1355
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
1356
+ if (sr.distance < (sr.len >> 2)) {
1357
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
1358
+ range_start, position + sr.len - (sr.distance << 2)));
1359
+ }
1360
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
1361
+ range_end);
1362
+ }
1363
+ position += sr.len;
1364
+ } else {
1365
+ ++insert_length;
1366
+ ++position;
1367
+ /* If we have not seen matches for a long time, we can skip some
1368
+ match lookups. Unsuccessful match lookups are very very expensive
1369
+ and this kind of a heuristic speeds up compression quite
1370
+ a lot. */
1371
+ if (position > apply_random_heuristics) {
1372
+ /* Going through uncompressible data, jump. */
1373
+ if (position >
1374
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
1375
+ /* It is quite a long time since we saw a copy, so we assume
1376
+ that this data is not compressible, and store hashes less
1377
+ often. Hashes of non compressible data are less likely to
1378
+ turn out to be useful in the future, too, so we store less of
1379
+ them to not to flood out the hash table of good compressible
1380
+ data. */
1381
+ const size_t kMargin =
1382
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
1383
+ size_t pos_jump =
1384
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
1385
+ for (; position < pos_jump; position += 4) {
1386
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
1387
+ insert_length += 4;
1388
+ }
1389
+ } else {
1390
+ const size_t kMargin =
1391
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
1392
+ size_t pos_jump =
1393
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
1394
+ for (; position < pos_jump; position += 2) {
1395
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
1396
+ insert_length += 2;
1397
+ }
1398
+ }
1399
+ }
1400
+ }
1401
+ }
1402
+ insert_length += pos_end - position;
1403
+ *last_insert_len = insert_length;
1404
+ *num_commands += (size_t)(commands - orig_commands);
1405
+ }
1406
+ #undef HASHER
1407
+
1408
+ #define HASHER() H42
1409
+ /* NOLINTNEXTLINE(build/include) */
1410
+ /* NOLINT(build/header_guard) */
1411
+ /* Copyright 2013 Google Inc. All Rights Reserved.
1412
+
1413
+ Distributed under MIT license.
1414
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
1415
+ */
1416
+
1417
+ /* template parameters: EXPORT_FN, FN */
1418
+
1419
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
1420
+ size_t num_bytes, size_t position,
1421
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
1422
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
1423
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
1424
+ Command* commands, size_t* num_commands, size_t* num_literals) {
1425
+ HASHER()* privat = &hasher->privat.FN(_);
1426
+ /* Set maximum distance, see section 9.1. of the spec. */
1427
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
1428
+ const size_t position_offset = params->stream_offset;
1429
+
1430
+ const Command* const orig_commands = commands;
1431
+ size_t insert_length = *last_insert_len;
1432
+ const size_t pos_end = position + num_bytes;
1433
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
1434
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
1435
+
1436
+ /* For speed up heuristics for random data. */
1437
+ const size_t random_heuristics_window_size =
1438
+ LiteralSpreeLengthForSparseSearch(params);
1439
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
1440
+ const size_t gap = params->dictionary.compound.total_size;
1441
+
1442
+ /* Minimum score to accept a backward reference. */
1443
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
1444
+
1445
+ FN(PrepareDistanceCache)(privat, dist_cache);
1446
+
1447
+ while (position + FN(HashTypeLength)() < pos_end) {
1448
+ size_t max_length = pos_end - position;
1449
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
1450
+ size_t dictionary_start = BROTLI_MIN(size_t,
1451
+ position + position_offset, max_backward_limit);
1452
+ HasherSearchResult sr;
1453
+ int dict_id = 0;
1454
+ uint8_t p1 = 0;
1455
+ uint8_t p2 = 0;
1456
+ if (params->dictionary.contextual.context_based) {
1457
+ p1 = position >= 1 ?
1458
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
1459
+ p2 = position >= 2 ?
1460
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
1461
+ dict_id = params->dictionary.contextual.context_map[
1462
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
1463
+ }
1464
+ sr.len = 0;
1465
+ sr.len_code_delta = 0;
1466
+ sr.distance = 0;
1467
+ sr.score = kMinScore;
1468
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
1469
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
1470
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
1471
+ if (ENABLE_COMPOUND_DICTIONARY) {
1472
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
1473
+ ringbuffer_mask, dist_cache, position, max_length,
1474
+ dictionary_start, params->dist.max_distance, &sr);
1475
+ }
1476
+ if (sr.score > kMinScore) {
1477
+ /* Found a match. Let's look for something even better ahead. */
1478
+ int delayed_backward_references_in_row = 0;
1479
+ --max_length;
1480
+ for (;; --max_length) {
1481
+ const score_t cost_diff_lazy = 175;
1482
+ HasherSearchResult sr2;
1483
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
1484
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
1485
+ sr2.len_code_delta = 0;
1486
+ sr2.distance = 0;
1487
+ sr2.score = kMinScore;
1488
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
1489
+ dictionary_start = BROTLI_MIN(size_t,
1490
+ position + 1 + position_offset, max_backward_limit);
1491
+ if (params->dictionary.contextual.context_based) {
1492
+ p2 = p1;
1493
+ p1 = ringbuffer[position & ringbuffer_mask];
1494
+ dict_id = params->dictionary.contextual.context_map[
1495
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
1496
+ }
1497
+ FN(FindLongestMatch)(privat,
1498
+ params->dictionary.contextual.dict[dict_id],
1499
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
1500
+ max_distance, dictionary_start + gap, params->dist.max_distance,
1501
+ &sr2);
1502
+ if (ENABLE_COMPOUND_DICTIONARY) {
1503
+ LookupCompoundDictionaryMatch(
1504
+ &params->dictionary.compound, ringbuffer,
1505
+ ringbuffer_mask, dist_cache, position + 1, max_length,
1506
+ dictionary_start, params->dist.max_distance, &sr2);
1507
+ }
1508
+ if (sr2.score >= sr.score + cost_diff_lazy) {
1509
+ /* Ok, let's just write one byte for now and start a match from the
1510
+ next byte. */
1511
+ ++position;
1512
+ ++insert_length;
1513
+ sr = sr2;
1514
+ if (++delayed_backward_references_in_row < 4 &&
1515
+ position + FN(HashTypeLength)() < pos_end) {
1516
+ continue;
1517
+ }
1518
+ }
1519
+ break;
1520
+ }
1521
+ apply_random_heuristics =
1522
+ position + 2 * sr.len + random_heuristics_window_size;
1523
+ dictionary_start = BROTLI_MIN(size_t,
1524
+ position + position_offset, max_backward_limit);
1525
+ {
1526
+ /* The first 16 codes are special short-codes,
1527
+ and the minimum offset is 1. */
1528
+ size_t distance_code = ComputeDistanceCode(
1529
+ sr.distance, dictionary_start + gap, dist_cache);
1530
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
1531
+ dist_cache[3] = dist_cache[2];
1532
+ dist_cache[2] = dist_cache[1];
1533
+ dist_cache[1] = dist_cache[0];
1534
+ dist_cache[0] = (int)sr.distance;
1535
+ FN(PrepareDistanceCache)(privat, dist_cache);
1536
+ }
1537
+ InitCommand(commands++, &params->dist, insert_length,
1538
+ sr.len, sr.len_code_delta, distance_code);
1539
+ }
1540
+ *num_literals += insert_length;
1541
+ insert_length = 0;
1542
+ /* Put the hash keys into the table, if there are enough bytes left.
1543
+ Depending on the hasher implementation, it can push all positions
1544
+ in the given range or only a subset of them.
1545
+ Avoid hash poisoning with RLE data. */
1546
+ {
1547
+ size_t range_start = position + 2;
1548
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
1549
+ if (sr.distance < (sr.len >> 2)) {
1550
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
1551
+ range_start, position + sr.len - (sr.distance << 2)));
1552
+ }
1553
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
1554
+ range_end);
1555
+ }
1556
+ position += sr.len;
1557
+ } else {
1558
+ ++insert_length;
1559
+ ++position;
1560
+ /* If we have not seen matches for a long time, we can skip some
1561
+ match lookups. Unsuccessful match lookups are very very expensive
1562
+ and this kind of a heuristic speeds up compression quite
1563
+ a lot. */
1564
+ if (position > apply_random_heuristics) {
1565
+ /* Going through uncompressible data, jump. */
1566
+ if (position >
1567
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
1568
+ /* It is quite a long time since we saw a copy, so we assume
1569
+ that this data is not compressible, and store hashes less
1570
+ often. Hashes of non compressible data are less likely to
1571
+ turn out to be useful in the future, too, so we store less of
1572
+ them to not to flood out the hash table of good compressible
1573
+ data. */
1574
+ const size_t kMargin =
1575
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
1576
+ size_t pos_jump =
1577
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
1578
+ for (; position < pos_jump; position += 4) {
1579
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
1580
+ insert_length += 4;
1581
+ }
1582
+ } else {
1583
+ const size_t kMargin =
1584
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
1585
+ size_t pos_jump =
1586
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
1587
+ for (; position < pos_jump; position += 2) {
1588
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
1589
+ insert_length += 2;
1590
+ }
1591
+ }
1592
+ }
1593
+ }
1594
+ }
1595
+ insert_length += pos_end - position;
1596
+ *last_insert_len = insert_length;
1597
+ *num_commands += (size_t)(commands - orig_commands);
1598
+ }
1599
+ #undef HASHER
1600
+
1601
+ #define HASHER() H54
1602
+ /* NOLINTNEXTLINE(build/include) */
1603
+ /* NOLINT(build/header_guard) */
1604
+ /* Copyright 2013 Google Inc. All Rights Reserved.
1605
+
1606
+ Distributed under MIT license.
1607
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
1608
+ */
1609
+
1610
+ /* template parameters: EXPORT_FN, FN */
1611
+
1612
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
1613
+ size_t num_bytes, size_t position,
1614
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
1615
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
1616
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
1617
+ Command* commands, size_t* num_commands, size_t* num_literals) {
1618
+ HASHER()* privat = &hasher->privat.FN(_);
1619
+ /* Set maximum distance, see section 9.1. of the spec. */
1620
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
1621
+ const size_t position_offset = params->stream_offset;
1622
+
1623
+ const Command* const orig_commands = commands;
1624
+ size_t insert_length = *last_insert_len;
1625
+ const size_t pos_end = position + num_bytes;
1626
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
1627
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
1628
+
1629
+ /* For speed up heuristics for random data. */
1630
+ const size_t random_heuristics_window_size =
1631
+ LiteralSpreeLengthForSparseSearch(params);
1632
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
1633
+ const size_t gap = params->dictionary.compound.total_size;
1634
+
1635
+ /* Minimum score to accept a backward reference. */
1636
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
1637
+
1638
+ FN(PrepareDistanceCache)(privat, dist_cache);
1639
+
1640
+ while (position + FN(HashTypeLength)() < pos_end) {
1641
+ size_t max_length = pos_end - position;
1642
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
1643
+ size_t dictionary_start = BROTLI_MIN(size_t,
1644
+ position + position_offset, max_backward_limit);
1645
+ HasherSearchResult sr;
1646
+ int dict_id = 0;
1647
+ uint8_t p1 = 0;
1648
+ uint8_t p2 = 0;
1649
+ if (params->dictionary.contextual.context_based) {
1650
+ p1 = position >= 1 ?
1651
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
1652
+ p2 = position >= 2 ?
1653
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
1654
+ dict_id = params->dictionary.contextual.context_map[
1655
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
1656
+ }
1657
+ sr.len = 0;
1658
+ sr.len_code_delta = 0;
1659
+ sr.distance = 0;
1660
+ sr.score = kMinScore;
1661
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
1662
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
1663
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
1664
+ if (ENABLE_COMPOUND_DICTIONARY) {
1665
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
1666
+ ringbuffer_mask, dist_cache, position, max_length,
1667
+ dictionary_start, params->dist.max_distance, &sr);
1668
+ }
1669
+ if (sr.score > kMinScore) {
1670
+ /* Found a match. Let's look for something even better ahead. */
1671
+ int delayed_backward_references_in_row = 0;
1672
+ --max_length;
1673
+ for (;; --max_length) {
1674
+ const score_t cost_diff_lazy = 175;
1675
+ HasherSearchResult sr2;
1676
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
1677
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
1678
+ sr2.len_code_delta = 0;
1679
+ sr2.distance = 0;
1680
+ sr2.score = kMinScore;
1681
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
1682
+ dictionary_start = BROTLI_MIN(size_t,
1683
+ position + 1 + position_offset, max_backward_limit);
1684
+ if (params->dictionary.contextual.context_based) {
1685
+ p2 = p1;
1686
+ p1 = ringbuffer[position & ringbuffer_mask];
1687
+ dict_id = params->dictionary.contextual.context_map[
1688
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
1689
+ }
1690
+ FN(FindLongestMatch)(privat,
1691
+ params->dictionary.contextual.dict[dict_id],
1692
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
1693
+ max_distance, dictionary_start + gap, params->dist.max_distance,
1694
+ &sr2);
1695
+ if (ENABLE_COMPOUND_DICTIONARY) {
1696
+ LookupCompoundDictionaryMatch(
1697
+ &params->dictionary.compound, ringbuffer,
1698
+ ringbuffer_mask, dist_cache, position + 1, max_length,
1699
+ dictionary_start, params->dist.max_distance, &sr2);
1700
+ }
1701
+ if (sr2.score >= sr.score + cost_diff_lazy) {
1702
+ /* Ok, let's just write one byte for now and start a match from the
1703
+ next byte. */
1704
+ ++position;
1705
+ ++insert_length;
1706
+ sr = sr2;
1707
+ if (++delayed_backward_references_in_row < 4 &&
1708
+ position + FN(HashTypeLength)() < pos_end) {
1709
+ continue;
1710
+ }
1711
+ }
1712
+ break;
1713
+ }
1714
+ apply_random_heuristics =
1715
+ position + 2 * sr.len + random_heuristics_window_size;
1716
+ dictionary_start = BROTLI_MIN(size_t,
1717
+ position + position_offset, max_backward_limit);
1718
+ {
1719
+ /* The first 16 codes are special short-codes,
1720
+ and the minimum offset is 1. */
1721
+ size_t distance_code = ComputeDistanceCode(
1722
+ sr.distance, dictionary_start + gap, dist_cache);
1723
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
1724
+ dist_cache[3] = dist_cache[2];
1725
+ dist_cache[2] = dist_cache[1];
1726
+ dist_cache[1] = dist_cache[0];
1727
+ dist_cache[0] = (int)sr.distance;
1728
+ FN(PrepareDistanceCache)(privat, dist_cache);
1729
+ }
1730
+ InitCommand(commands++, &params->dist, insert_length,
1731
+ sr.len, sr.len_code_delta, distance_code);
1732
+ }
1733
+ *num_literals += insert_length;
1734
+ insert_length = 0;
1735
+ /* Put the hash keys into the table, if there are enough bytes left.
1736
+ Depending on the hasher implementation, it can push all positions
1737
+ in the given range or only a subset of them.
1738
+ Avoid hash poisoning with RLE data. */
1739
+ {
1740
+ size_t range_start = position + 2;
1741
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
1742
+ if (sr.distance < (sr.len >> 2)) {
1743
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
1744
+ range_start, position + sr.len - (sr.distance << 2)));
1745
+ }
1746
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
1747
+ range_end);
1748
+ }
1749
+ position += sr.len;
1750
+ } else {
1751
+ ++insert_length;
1752
+ ++position;
1753
+ /* If we have not seen matches for a long time, we can skip some
1754
+ match lookups. Unsuccessful match lookups are very very expensive
1755
+ and this kind of a heuristic speeds up compression quite
1756
+ a lot. */
1757
+ if (position > apply_random_heuristics) {
1758
+ /* Going through uncompressible data, jump. */
1759
+ if (position >
1760
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
1761
+ /* It is quite a long time since we saw a copy, so we assume
1762
+ that this data is not compressible, and store hashes less
1763
+ often. Hashes of non compressible data are less likely to
1764
+ turn out to be useful in the future, too, so we store less of
1765
+ them to not to flood out the hash table of good compressible
1766
+ data. */
1767
+ const size_t kMargin =
1768
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
1769
+ size_t pos_jump =
1770
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
1771
+ for (; position < pos_jump; position += 4) {
1772
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
1773
+ insert_length += 4;
1774
+ }
1775
+ } else {
1776
+ const size_t kMargin =
1777
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
1778
+ size_t pos_jump =
1779
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
1780
+ for (; position < pos_jump; position += 2) {
1781
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
1782
+ insert_length += 2;
1783
+ }
1784
+ }
1785
+ }
1786
+ }
1787
+ }
1788
+ insert_length += pos_end - position;
1789
+ *last_insert_len = insert_length;
1790
+ *num_commands += (size_t)(commands - orig_commands);
1791
+ }
1792
+ #undef HASHER
1793
+
1794
+ #define HASHER() H35
1795
+ /* NOLINTNEXTLINE(build/include) */
1796
+ /* NOLINT(build/header_guard) */
1797
+ /* Copyright 2013 Google Inc. All Rights Reserved.
1798
+
1799
+ Distributed under MIT license.
1800
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
1801
+ */
1802
+
1803
+ /* template parameters: EXPORT_FN, FN */
1804
+
1805
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
1806
+ size_t num_bytes, size_t position,
1807
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
1808
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
1809
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
1810
+ Command* commands, size_t* num_commands, size_t* num_literals) {
1811
+ HASHER()* privat = &hasher->privat.FN(_);
1812
+ /* Set maximum distance, see section 9.1. of the spec. */
1813
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
1814
+ const size_t position_offset = params->stream_offset;
1815
+
1816
+ const Command* const orig_commands = commands;
1817
+ size_t insert_length = *last_insert_len;
1818
+ const size_t pos_end = position + num_bytes;
1819
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
1820
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
1821
+
1822
+ /* For speed up heuristics for random data. */
1823
+ const size_t random_heuristics_window_size =
1824
+ LiteralSpreeLengthForSparseSearch(params);
1825
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
1826
+ const size_t gap = params->dictionary.compound.total_size;
1827
+
1828
+ /* Minimum score to accept a backward reference. */
1829
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
1830
+
1831
+ FN(PrepareDistanceCache)(privat, dist_cache);
1832
+
1833
+ while (position + FN(HashTypeLength)() < pos_end) {
1834
+ size_t max_length = pos_end - position;
1835
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
1836
+ size_t dictionary_start = BROTLI_MIN(size_t,
1837
+ position + position_offset, max_backward_limit);
1838
+ HasherSearchResult sr;
1839
+ int dict_id = 0;
1840
+ uint8_t p1 = 0;
1841
+ uint8_t p2 = 0;
1842
+ if (params->dictionary.contextual.context_based) {
1843
+ p1 = position >= 1 ?
1844
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
1845
+ p2 = position >= 2 ?
1846
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
1847
+ dict_id = params->dictionary.contextual.context_map[
1848
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
1849
+ }
1850
+ sr.len = 0;
1851
+ sr.len_code_delta = 0;
1852
+ sr.distance = 0;
1853
+ sr.score = kMinScore;
1854
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
1855
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
1856
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
1857
+ if (ENABLE_COMPOUND_DICTIONARY) {
1858
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
1859
+ ringbuffer_mask, dist_cache, position, max_length,
1860
+ dictionary_start, params->dist.max_distance, &sr);
1861
+ }
1862
+ if (sr.score > kMinScore) {
1863
+ /* Found a match. Let's look for something even better ahead. */
1864
+ int delayed_backward_references_in_row = 0;
1865
+ --max_length;
1866
+ for (;; --max_length) {
1867
+ const score_t cost_diff_lazy = 175;
1868
+ HasherSearchResult sr2;
1869
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
1870
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
1871
+ sr2.len_code_delta = 0;
1872
+ sr2.distance = 0;
1873
+ sr2.score = kMinScore;
1874
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
1875
+ dictionary_start = BROTLI_MIN(size_t,
1876
+ position + 1 + position_offset, max_backward_limit);
1877
+ if (params->dictionary.contextual.context_based) {
1878
+ p2 = p1;
1879
+ p1 = ringbuffer[position & ringbuffer_mask];
1880
+ dict_id = params->dictionary.contextual.context_map[
1881
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
1882
+ }
1883
+ FN(FindLongestMatch)(privat,
1884
+ params->dictionary.contextual.dict[dict_id],
1885
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
1886
+ max_distance, dictionary_start + gap, params->dist.max_distance,
1887
+ &sr2);
1888
+ if (ENABLE_COMPOUND_DICTIONARY) {
1889
+ LookupCompoundDictionaryMatch(
1890
+ &params->dictionary.compound, ringbuffer,
1891
+ ringbuffer_mask, dist_cache, position + 1, max_length,
1892
+ dictionary_start, params->dist.max_distance, &sr2);
1893
+ }
1894
+ if (sr2.score >= sr.score + cost_diff_lazy) {
1895
+ /* Ok, let's just write one byte for now and start a match from the
1896
+ next byte. */
1897
+ ++position;
1898
+ ++insert_length;
1899
+ sr = sr2;
1900
+ if (++delayed_backward_references_in_row < 4 &&
1901
+ position + FN(HashTypeLength)() < pos_end) {
1902
+ continue;
1903
+ }
1904
+ }
1905
+ break;
1906
+ }
1907
+ apply_random_heuristics =
1908
+ position + 2 * sr.len + random_heuristics_window_size;
1909
+ dictionary_start = BROTLI_MIN(size_t,
1910
+ position + position_offset, max_backward_limit);
1911
+ {
1912
+ /* The first 16 codes are special short-codes,
1913
+ and the minimum offset is 1. */
1914
+ size_t distance_code = ComputeDistanceCode(
1915
+ sr.distance, dictionary_start + gap, dist_cache);
1916
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
1917
+ dist_cache[3] = dist_cache[2];
1918
+ dist_cache[2] = dist_cache[1];
1919
+ dist_cache[1] = dist_cache[0];
1920
+ dist_cache[0] = (int)sr.distance;
1921
+ FN(PrepareDistanceCache)(privat, dist_cache);
1922
+ }
1923
+ InitCommand(commands++, &params->dist, insert_length,
1924
+ sr.len, sr.len_code_delta, distance_code);
1925
+ }
1926
+ *num_literals += insert_length;
1927
+ insert_length = 0;
1928
+ /* Put the hash keys into the table, if there are enough bytes left.
1929
+ Depending on the hasher implementation, it can push all positions
1930
+ in the given range or only a subset of them.
1931
+ Avoid hash poisoning with RLE data. */
1932
+ {
1933
+ size_t range_start = position + 2;
1934
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
1935
+ if (sr.distance < (sr.len >> 2)) {
1936
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
1937
+ range_start, position + sr.len - (sr.distance << 2)));
1938
+ }
1939
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
1940
+ range_end);
1941
+ }
1942
+ position += sr.len;
1943
+ } else {
1944
+ ++insert_length;
1945
+ ++position;
1946
+ /* If we have not seen matches for a long time, we can skip some
1947
+ match lookups. Unsuccessful match lookups are very very expensive
1948
+ and this kind of a heuristic speeds up compression quite
1949
+ a lot. */
1950
+ if (position > apply_random_heuristics) {
1951
+ /* Going through uncompressible data, jump. */
1952
+ if (position >
1953
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
1954
+ /* It is quite a long time since we saw a copy, so we assume
1955
+ that this data is not compressible, and store hashes less
1956
+ often. Hashes of non compressible data are less likely to
1957
+ turn out to be useful in the future, too, so we store less of
1958
+ them to not to flood out the hash table of good compressible
1959
+ data. */
1960
+ const size_t kMargin =
1961
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
1962
+ size_t pos_jump =
1963
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
1964
+ for (; position < pos_jump; position += 4) {
1965
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
1966
+ insert_length += 4;
1967
+ }
1968
+ } else {
1969
+ const size_t kMargin =
1970
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
1971
+ size_t pos_jump =
1972
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
1973
+ for (; position < pos_jump; position += 2) {
1974
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
1975
+ insert_length += 2;
1976
+ }
1977
+ }
1978
+ }
1979
+ }
1980
+ }
1981
+ insert_length += pos_end - position;
1982
+ *last_insert_len = insert_length;
1983
+ *num_commands += (size_t)(commands - orig_commands);
1984
+ }
1985
+ #undef HASHER
1986
+
1987
+ #define HASHER() H55
1988
+ /* NOLINTNEXTLINE(build/include) */
1989
+ /* NOLINT(build/header_guard) */
1990
+ /* Copyright 2013 Google Inc. All Rights Reserved.
1991
+
1992
+ Distributed under MIT license.
1993
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
1994
+ */
1995
+
1996
+ /* template parameters: EXPORT_FN, FN */
1997
+
1998
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
1999
+ size_t num_bytes, size_t position,
2000
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
2001
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
2002
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
2003
+ Command* commands, size_t* num_commands, size_t* num_literals) {
2004
+ HASHER()* privat = &hasher->privat.FN(_);
2005
+ /* Set maximum distance, see section 9.1. of the spec. */
2006
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
2007
+ const size_t position_offset = params->stream_offset;
2008
+
2009
+ const Command* const orig_commands = commands;
2010
+ size_t insert_length = *last_insert_len;
2011
+ const size_t pos_end = position + num_bytes;
2012
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
2013
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
2014
+
2015
+ /* For speed up heuristics for random data. */
2016
+ const size_t random_heuristics_window_size =
2017
+ LiteralSpreeLengthForSparseSearch(params);
2018
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
2019
+ const size_t gap = params->dictionary.compound.total_size;
2020
+
2021
+ /* Minimum score to accept a backward reference. */
2022
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
2023
+
2024
+ FN(PrepareDistanceCache)(privat, dist_cache);
2025
+
2026
+ while (position + FN(HashTypeLength)() < pos_end) {
2027
+ size_t max_length = pos_end - position;
2028
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
2029
+ size_t dictionary_start = BROTLI_MIN(size_t,
2030
+ position + position_offset, max_backward_limit);
2031
+ HasherSearchResult sr;
2032
+ int dict_id = 0;
2033
+ uint8_t p1 = 0;
2034
+ uint8_t p2 = 0;
2035
+ if (params->dictionary.contextual.context_based) {
2036
+ p1 = position >= 1 ?
2037
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
2038
+ p2 = position >= 2 ?
2039
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
2040
+ dict_id = params->dictionary.contextual.context_map[
2041
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
2042
+ }
2043
+ sr.len = 0;
2044
+ sr.len_code_delta = 0;
2045
+ sr.distance = 0;
2046
+ sr.score = kMinScore;
2047
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
2048
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
2049
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
2050
+ if (ENABLE_COMPOUND_DICTIONARY) {
2051
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
2052
+ ringbuffer_mask, dist_cache, position, max_length,
2053
+ dictionary_start, params->dist.max_distance, &sr);
2054
+ }
2055
+ if (sr.score > kMinScore) {
2056
+ /* Found a match. Let's look for something even better ahead. */
2057
+ int delayed_backward_references_in_row = 0;
2058
+ --max_length;
2059
+ for (;; --max_length) {
2060
+ const score_t cost_diff_lazy = 175;
2061
+ HasherSearchResult sr2;
2062
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
2063
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
2064
+ sr2.len_code_delta = 0;
2065
+ sr2.distance = 0;
2066
+ sr2.score = kMinScore;
2067
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
2068
+ dictionary_start = BROTLI_MIN(size_t,
2069
+ position + 1 + position_offset, max_backward_limit);
2070
+ if (params->dictionary.contextual.context_based) {
2071
+ p2 = p1;
2072
+ p1 = ringbuffer[position & ringbuffer_mask];
2073
+ dict_id = params->dictionary.contextual.context_map[
2074
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
2075
+ }
2076
+ FN(FindLongestMatch)(privat,
2077
+ params->dictionary.contextual.dict[dict_id],
2078
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
2079
+ max_distance, dictionary_start + gap, params->dist.max_distance,
2080
+ &sr2);
2081
+ if (ENABLE_COMPOUND_DICTIONARY) {
2082
+ LookupCompoundDictionaryMatch(
2083
+ &params->dictionary.compound, ringbuffer,
2084
+ ringbuffer_mask, dist_cache, position + 1, max_length,
2085
+ dictionary_start, params->dist.max_distance, &sr2);
2086
+ }
2087
+ if (sr2.score >= sr.score + cost_diff_lazy) {
2088
+ /* Ok, let's just write one byte for now and start a match from the
2089
+ next byte. */
2090
+ ++position;
2091
+ ++insert_length;
2092
+ sr = sr2;
2093
+ if (++delayed_backward_references_in_row < 4 &&
2094
+ position + FN(HashTypeLength)() < pos_end) {
2095
+ continue;
2096
+ }
2097
+ }
2098
+ break;
2099
+ }
2100
+ apply_random_heuristics =
2101
+ position + 2 * sr.len + random_heuristics_window_size;
2102
+ dictionary_start = BROTLI_MIN(size_t,
2103
+ position + position_offset, max_backward_limit);
2104
+ {
2105
+ /* The first 16 codes are special short-codes,
2106
+ and the minimum offset is 1. */
2107
+ size_t distance_code = ComputeDistanceCode(
2108
+ sr.distance, dictionary_start + gap, dist_cache);
2109
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
2110
+ dist_cache[3] = dist_cache[2];
2111
+ dist_cache[2] = dist_cache[1];
2112
+ dist_cache[1] = dist_cache[0];
2113
+ dist_cache[0] = (int)sr.distance;
2114
+ FN(PrepareDistanceCache)(privat, dist_cache);
2115
+ }
2116
+ InitCommand(commands++, &params->dist, insert_length,
2117
+ sr.len, sr.len_code_delta, distance_code);
2118
+ }
2119
+ *num_literals += insert_length;
2120
+ insert_length = 0;
2121
+ /* Put the hash keys into the table, if there are enough bytes left.
2122
+ Depending on the hasher implementation, it can push all positions
2123
+ in the given range or only a subset of them.
2124
+ Avoid hash poisoning with RLE data. */
2125
+ {
2126
+ size_t range_start = position + 2;
2127
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
2128
+ if (sr.distance < (sr.len >> 2)) {
2129
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
2130
+ range_start, position + sr.len - (sr.distance << 2)));
2131
+ }
2132
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
2133
+ range_end);
2134
+ }
2135
+ position += sr.len;
2136
+ } else {
2137
+ ++insert_length;
2138
+ ++position;
2139
+ /* If we have not seen matches for a long time, we can skip some
2140
+ match lookups. Unsuccessful match lookups are very very expensive
2141
+ and this kind of a heuristic speeds up compression quite
2142
+ a lot. */
2143
+ if (position > apply_random_heuristics) {
2144
+ /* Going through uncompressible data, jump. */
2145
+ if (position >
2146
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
2147
+ /* It is quite a long time since we saw a copy, so we assume
2148
+ that this data is not compressible, and store hashes less
2149
+ often. Hashes of non compressible data are less likely to
2150
+ turn out to be useful in the future, too, so we store less of
2151
+ them to not to flood out the hash table of good compressible
2152
+ data. */
2153
+ const size_t kMargin =
2154
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
2155
+ size_t pos_jump =
2156
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
2157
+ for (; position < pos_jump; position += 4) {
2158
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
2159
+ insert_length += 4;
2160
+ }
2161
+ } else {
2162
+ const size_t kMargin =
2163
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
2164
+ size_t pos_jump =
2165
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
2166
+ for (; position < pos_jump; position += 2) {
2167
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
2168
+ insert_length += 2;
2169
+ }
2170
+ }
2171
+ }
2172
+ }
2173
+ }
2174
+ insert_length += pos_end - position;
2175
+ *last_insert_len = insert_length;
2176
+ *num_commands += (size_t)(commands - orig_commands);
2177
+ }
2178
+ #undef HASHER
2179
+
2180
+ #define HASHER() H65
2181
+ /* NOLINTNEXTLINE(build/include) */
2182
+ /* NOLINT(build/header_guard) */
2183
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2184
+
2185
+ Distributed under MIT license.
2186
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
2187
+ */
2188
+
2189
+ /* template parameters: EXPORT_FN, FN */
2190
+
2191
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
2192
+ size_t num_bytes, size_t position,
2193
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
2194
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
2195
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
2196
+ Command* commands, size_t* num_commands, size_t* num_literals) {
2197
+ HASHER()* privat = &hasher->privat.FN(_);
2198
+ /* Set maximum distance, see section 9.1. of the spec. */
2199
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
2200
+ const size_t position_offset = params->stream_offset;
2201
+
2202
+ const Command* const orig_commands = commands;
2203
+ size_t insert_length = *last_insert_len;
2204
+ const size_t pos_end = position + num_bytes;
2205
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
2206
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
2207
+
2208
+ /* For speed up heuristics for random data. */
2209
+ const size_t random_heuristics_window_size =
2210
+ LiteralSpreeLengthForSparseSearch(params);
2211
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
2212
+ const size_t gap = params->dictionary.compound.total_size;
2213
+
2214
+ /* Minimum score to accept a backward reference. */
2215
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
2216
+
2217
+ FN(PrepareDistanceCache)(privat, dist_cache);
2218
+
2219
+ while (position + FN(HashTypeLength)() < pos_end) {
2220
+ size_t max_length = pos_end - position;
2221
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
2222
+ size_t dictionary_start = BROTLI_MIN(size_t,
2223
+ position + position_offset, max_backward_limit);
2224
+ HasherSearchResult sr;
2225
+ int dict_id = 0;
2226
+ uint8_t p1 = 0;
2227
+ uint8_t p2 = 0;
2228
+ if (params->dictionary.contextual.context_based) {
2229
+ p1 = position >= 1 ?
2230
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
2231
+ p2 = position >= 2 ?
2232
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
2233
+ dict_id = params->dictionary.contextual.context_map[
2234
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
2235
+ }
2236
+ sr.len = 0;
2237
+ sr.len_code_delta = 0;
2238
+ sr.distance = 0;
2239
+ sr.score = kMinScore;
2240
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
2241
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
2242
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
2243
+ if (ENABLE_COMPOUND_DICTIONARY) {
2244
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
2245
+ ringbuffer_mask, dist_cache, position, max_length,
2246
+ dictionary_start, params->dist.max_distance, &sr);
2247
+ }
2248
+ if (sr.score > kMinScore) {
2249
+ /* Found a match. Let's look for something even better ahead. */
2250
+ int delayed_backward_references_in_row = 0;
2251
+ --max_length;
2252
+ for (;; --max_length) {
2253
+ const score_t cost_diff_lazy = 175;
2254
+ HasherSearchResult sr2;
2255
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
2256
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
2257
+ sr2.len_code_delta = 0;
2258
+ sr2.distance = 0;
2259
+ sr2.score = kMinScore;
2260
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
2261
+ dictionary_start = BROTLI_MIN(size_t,
2262
+ position + 1 + position_offset, max_backward_limit);
2263
+ if (params->dictionary.contextual.context_based) {
2264
+ p2 = p1;
2265
+ p1 = ringbuffer[position & ringbuffer_mask];
2266
+ dict_id = params->dictionary.contextual.context_map[
2267
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
2268
+ }
2269
+ FN(FindLongestMatch)(privat,
2270
+ params->dictionary.contextual.dict[dict_id],
2271
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
2272
+ max_distance, dictionary_start + gap, params->dist.max_distance,
2273
+ &sr2);
2274
+ if (ENABLE_COMPOUND_DICTIONARY) {
2275
+ LookupCompoundDictionaryMatch(
2276
+ &params->dictionary.compound, ringbuffer,
2277
+ ringbuffer_mask, dist_cache, position + 1, max_length,
2278
+ dictionary_start, params->dist.max_distance, &sr2);
2279
+ }
2280
+ if (sr2.score >= sr.score + cost_diff_lazy) {
2281
+ /* Ok, let's just write one byte for now and start a match from the
2282
+ next byte. */
2283
+ ++position;
2284
+ ++insert_length;
2285
+ sr = sr2;
2286
+ if (++delayed_backward_references_in_row < 4 &&
2287
+ position + FN(HashTypeLength)() < pos_end) {
2288
+ continue;
2289
+ }
2290
+ }
2291
+ break;
2292
+ }
2293
+ apply_random_heuristics =
2294
+ position + 2 * sr.len + random_heuristics_window_size;
2295
+ dictionary_start = BROTLI_MIN(size_t,
2296
+ position + position_offset, max_backward_limit);
2297
+ {
2298
+ /* The first 16 codes are special short-codes,
2299
+ and the minimum offset is 1. */
2300
+ size_t distance_code = ComputeDistanceCode(
2301
+ sr.distance, dictionary_start + gap, dist_cache);
2302
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
2303
+ dist_cache[3] = dist_cache[2];
2304
+ dist_cache[2] = dist_cache[1];
2305
+ dist_cache[1] = dist_cache[0];
2306
+ dist_cache[0] = (int)sr.distance;
2307
+ FN(PrepareDistanceCache)(privat, dist_cache);
2308
+ }
2309
+ InitCommand(commands++, &params->dist, insert_length,
2310
+ sr.len, sr.len_code_delta, distance_code);
2311
+ }
2312
+ *num_literals += insert_length;
2313
+ insert_length = 0;
2314
+ /* Put the hash keys into the table, if there are enough bytes left.
2315
+ Depending on the hasher implementation, it can push all positions
2316
+ in the given range or only a subset of them.
2317
+ Avoid hash poisoning with RLE data. */
2318
+ {
2319
+ size_t range_start = position + 2;
2320
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
2321
+ if (sr.distance < (sr.len >> 2)) {
2322
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
2323
+ range_start, position + sr.len - (sr.distance << 2)));
2324
+ }
2325
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
2326
+ range_end);
2327
+ }
2328
+ position += sr.len;
2329
+ } else {
2330
+ ++insert_length;
2331
+ ++position;
2332
+ /* If we have not seen matches for a long time, we can skip some
2333
+ match lookups. Unsuccessful match lookups are very very expensive
2334
+ and this kind of a heuristic speeds up compression quite
2335
+ a lot. */
2336
+ if (position > apply_random_heuristics) {
2337
+ /* Going through uncompressible data, jump. */
2338
+ if (position >
2339
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
2340
+ /* It is quite a long time since we saw a copy, so we assume
2341
+ that this data is not compressible, and store hashes less
2342
+ often. Hashes of non compressible data are less likely to
2343
+ turn out to be useful in the future, too, so we store less of
2344
+ them to not to flood out the hash table of good compressible
2345
+ data. */
2346
+ const size_t kMargin =
2347
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
2348
+ size_t pos_jump =
2349
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
2350
+ for (; position < pos_jump; position += 4) {
2351
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
2352
+ insert_length += 4;
2353
+ }
2354
+ } else {
2355
+ const size_t kMargin =
2356
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
2357
+ size_t pos_jump =
2358
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
2359
+ for (; position < pos_jump; position += 2) {
2360
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
2361
+ insert_length += 2;
2362
+ }
2363
+ }
2364
+ }
2365
+ }
2366
+ }
2367
+ insert_length += pos_end - position;
2368
+ *last_insert_len = insert_length;
2369
+ *num_commands += (size_t)(commands - orig_commands);
2370
+ }
2371
+ #undef HASHER
2372
+
2373
+ #undef ENABLE_COMPOUND_DICTIONARY
2374
+ #undef PREFIX
2375
+ #define PREFIX() D
2376
+ #define ENABLE_COMPOUND_DICTIONARY 1
2377
+
2378
+ #define HASHER() H5
2379
+ /* NOLINTNEXTLINE(build/include) */
2380
+ /* NOLINT(build/header_guard) */
2381
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2382
+
2383
+ Distributed under MIT license.
2384
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
2385
+ */
2386
+
2387
+ /* template parameters: EXPORT_FN, FN */
2388
+
2389
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
2390
+ size_t num_bytes, size_t position,
2391
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
2392
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
2393
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
2394
+ Command* commands, size_t* num_commands, size_t* num_literals) {
2395
+ HASHER()* privat = &hasher->privat.FN(_);
2396
+ /* Set maximum distance, see section 9.1. of the spec. */
2397
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
2398
+ const size_t position_offset = params->stream_offset;
2399
+
2400
+ const Command* const orig_commands = commands;
2401
+ size_t insert_length = *last_insert_len;
2402
+ const size_t pos_end = position + num_bytes;
2403
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
2404
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
2405
+
2406
+ /* For speed up heuristics for random data. */
2407
+ const size_t random_heuristics_window_size =
2408
+ LiteralSpreeLengthForSparseSearch(params);
2409
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
2410
+ const size_t gap = params->dictionary.compound.total_size;
2411
+
2412
+ /* Minimum score to accept a backward reference. */
2413
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
2414
+
2415
+ FN(PrepareDistanceCache)(privat, dist_cache);
2416
+
2417
+ while (position + FN(HashTypeLength)() < pos_end) {
2418
+ size_t max_length = pos_end - position;
2419
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
2420
+ size_t dictionary_start = BROTLI_MIN(size_t,
2421
+ position + position_offset, max_backward_limit);
2422
+ HasherSearchResult sr;
2423
+ int dict_id = 0;
2424
+ uint8_t p1 = 0;
2425
+ uint8_t p2 = 0;
2426
+ if (params->dictionary.contextual.context_based) {
2427
+ p1 = position >= 1 ?
2428
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
2429
+ p2 = position >= 2 ?
2430
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
2431
+ dict_id = params->dictionary.contextual.context_map[
2432
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
2433
+ }
2434
+ sr.len = 0;
2435
+ sr.len_code_delta = 0;
2436
+ sr.distance = 0;
2437
+ sr.score = kMinScore;
2438
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
2439
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
2440
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
2441
+ if (ENABLE_COMPOUND_DICTIONARY) {
2442
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
2443
+ ringbuffer_mask, dist_cache, position, max_length,
2444
+ dictionary_start, params->dist.max_distance, &sr);
2445
+ }
2446
+ if (sr.score > kMinScore) {
2447
+ /* Found a match. Let's look for something even better ahead. */
2448
+ int delayed_backward_references_in_row = 0;
2449
+ --max_length;
2450
+ for (;; --max_length) {
2451
+ const score_t cost_diff_lazy = 175;
2452
+ HasherSearchResult sr2;
2453
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
2454
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
2455
+ sr2.len_code_delta = 0;
2456
+ sr2.distance = 0;
2457
+ sr2.score = kMinScore;
2458
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
2459
+ dictionary_start = BROTLI_MIN(size_t,
2460
+ position + 1 + position_offset, max_backward_limit);
2461
+ if (params->dictionary.contextual.context_based) {
2462
+ p2 = p1;
2463
+ p1 = ringbuffer[position & ringbuffer_mask];
2464
+ dict_id = params->dictionary.contextual.context_map[
2465
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
2466
+ }
2467
+ FN(FindLongestMatch)(privat,
2468
+ params->dictionary.contextual.dict[dict_id],
2469
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
2470
+ max_distance, dictionary_start + gap, params->dist.max_distance,
2471
+ &sr2);
2472
+ if (ENABLE_COMPOUND_DICTIONARY) {
2473
+ LookupCompoundDictionaryMatch(
2474
+ &params->dictionary.compound, ringbuffer,
2475
+ ringbuffer_mask, dist_cache, position + 1, max_length,
2476
+ dictionary_start, params->dist.max_distance, &sr2);
2477
+ }
2478
+ if (sr2.score >= sr.score + cost_diff_lazy) {
2479
+ /* Ok, let's just write one byte for now and start a match from the
2480
+ next byte. */
2481
+ ++position;
2482
+ ++insert_length;
2483
+ sr = sr2;
2484
+ if (++delayed_backward_references_in_row < 4 &&
2485
+ position + FN(HashTypeLength)() < pos_end) {
2486
+ continue;
2487
+ }
2488
+ }
2489
+ break;
2490
+ }
2491
+ apply_random_heuristics =
2492
+ position + 2 * sr.len + random_heuristics_window_size;
2493
+ dictionary_start = BROTLI_MIN(size_t,
2494
+ position + position_offset, max_backward_limit);
2495
+ {
2496
+ /* The first 16 codes are special short-codes,
2497
+ and the minimum offset is 1. */
2498
+ size_t distance_code = ComputeDistanceCode(
2499
+ sr.distance, dictionary_start + gap, dist_cache);
2500
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
2501
+ dist_cache[3] = dist_cache[2];
2502
+ dist_cache[2] = dist_cache[1];
2503
+ dist_cache[1] = dist_cache[0];
2504
+ dist_cache[0] = (int)sr.distance;
2505
+ FN(PrepareDistanceCache)(privat, dist_cache);
2506
+ }
2507
+ InitCommand(commands++, &params->dist, insert_length,
2508
+ sr.len, sr.len_code_delta, distance_code);
2509
+ }
2510
+ *num_literals += insert_length;
2511
+ insert_length = 0;
2512
+ /* Put the hash keys into the table, if there are enough bytes left.
2513
+ Depending on the hasher implementation, it can push all positions
2514
+ in the given range or only a subset of them.
2515
+ Avoid hash poisoning with RLE data. */
2516
+ {
2517
+ size_t range_start = position + 2;
2518
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
2519
+ if (sr.distance < (sr.len >> 2)) {
2520
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
2521
+ range_start, position + sr.len - (sr.distance << 2)));
2522
+ }
2523
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
2524
+ range_end);
2525
+ }
2526
+ position += sr.len;
2527
+ } else {
2528
+ ++insert_length;
2529
+ ++position;
2530
+ /* If we have not seen matches for a long time, we can skip some
2531
+ match lookups. Unsuccessful match lookups are very very expensive
2532
+ and this kind of a heuristic speeds up compression quite
2533
+ a lot. */
2534
+ if (position > apply_random_heuristics) {
2535
+ /* Going through uncompressible data, jump. */
2536
+ if (position >
2537
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
2538
+ /* It is quite a long time since we saw a copy, so we assume
2539
+ that this data is not compressible, and store hashes less
2540
+ often. Hashes of non compressible data are less likely to
2541
+ turn out to be useful in the future, too, so we store less of
2542
+ them to not to flood out the hash table of good compressible
2543
+ data. */
2544
+ const size_t kMargin =
2545
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
2546
+ size_t pos_jump =
2547
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
2548
+ for (; position < pos_jump; position += 4) {
2549
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
2550
+ insert_length += 4;
2551
+ }
2552
+ } else {
2553
+ const size_t kMargin =
2554
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
2555
+ size_t pos_jump =
2556
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
2557
+ for (; position < pos_jump; position += 2) {
2558
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
2559
+ insert_length += 2;
2560
+ }
2561
+ }
2562
+ }
2563
+ }
2564
+ }
2565
+ insert_length += pos_end - position;
2566
+ *last_insert_len = insert_length;
2567
+ *num_commands += (size_t)(commands - orig_commands);
2568
+ }
2569
+ #undef HASHER
2570
+ #define HASHER() H6
2571
+ /* NOLINTNEXTLINE(build/include) */
2572
+ /* NOLINT(build/header_guard) */
2573
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2574
+
2575
+ Distributed under MIT license.
2576
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
2577
+ */
2578
+
2579
+ /* template parameters: EXPORT_FN, FN */
2580
+
2581
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
2582
+ size_t num_bytes, size_t position,
2583
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
2584
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
2585
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
2586
+ Command* commands, size_t* num_commands, size_t* num_literals) {
2587
+ HASHER()* privat = &hasher->privat.FN(_);
2588
+ /* Set maximum distance, see section 9.1. of the spec. */
2589
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
2590
+ const size_t position_offset = params->stream_offset;
2591
+
2592
+ const Command* const orig_commands = commands;
2593
+ size_t insert_length = *last_insert_len;
2594
+ const size_t pos_end = position + num_bytes;
2595
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
2596
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
2597
+
2598
+ /* For speed up heuristics for random data. */
2599
+ const size_t random_heuristics_window_size =
2600
+ LiteralSpreeLengthForSparseSearch(params);
2601
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
2602
+ const size_t gap = params->dictionary.compound.total_size;
2603
+
2604
+ /* Minimum score to accept a backward reference. */
2605
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
2606
+
2607
+ FN(PrepareDistanceCache)(privat, dist_cache);
2608
+
2609
+ while (position + FN(HashTypeLength)() < pos_end) {
2610
+ size_t max_length = pos_end - position;
2611
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
2612
+ size_t dictionary_start = BROTLI_MIN(size_t,
2613
+ position + position_offset, max_backward_limit);
2614
+ HasherSearchResult sr;
2615
+ int dict_id = 0;
2616
+ uint8_t p1 = 0;
2617
+ uint8_t p2 = 0;
2618
+ if (params->dictionary.contextual.context_based) {
2619
+ p1 = position >= 1 ?
2620
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
2621
+ p2 = position >= 2 ?
2622
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
2623
+ dict_id = params->dictionary.contextual.context_map[
2624
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
2625
+ }
2626
+ sr.len = 0;
2627
+ sr.len_code_delta = 0;
2628
+ sr.distance = 0;
2629
+ sr.score = kMinScore;
2630
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
2631
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
2632
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
2633
+ if (ENABLE_COMPOUND_DICTIONARY) {
2634
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
2635
+ ringbuffer_mask, dist_cache, position, max_length,
2636
+ dictionary_start, params->dist.max_distance, &sr);
2637
+ }
2638
+ if (sr.score > kMinScore) {
2639
+ /* Found a match. Let's look for something even better ahead. */
2640
+ int delayed_backward_references_in_row = 0;
2641
+ --max_length;
2642
+ for (;; --max_length) {
2643
+ const score_t cost_diff_lazy = 175;
2644
+ HasherSearchResult sr2;
2645
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
2646
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
2647
+ sr2.len_code_delta = 0;
2648
+ sr2.distance = 0;
2649
+ sr2.score = kMinScore;
2650
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
2651
+ dictionary_start = BROTLI_MIN(size_t,
2652
+ position + 1 + position_offset, max_backward_limit);
2653
+ if (params->dictionary.contextual.context_based) {
2654
+ p2 = p1;
2655
+ p1 = ringbuffer[position & ringbuffer_mask];
2656
+ dict_id = params->dictionary.contextual.context_map[
2657
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
2658
+ }
2659
+ FN(FindLongestMatch)(privat,
2660
+ params->dictionary.contextual.dict[dict_id],
2661
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
2662
+ max_distance, dictionary_start + gap, params->dist.max_distance,
2663
+ &sr2);
2664
+ if (ENABLE_COMPOUND_DICTIONARY) {
2665
+ LookupCompoundDictionaryMatch(
2666
+ &params->dictionary.compound, ringbuffer,
2667
+ ringbuffer_mask, dist_cache, position + 1, max_length,
2668
+ dictionary_start, params->dist.max_distance, &sr2);
2669
+ }
2670
+ if (sr2.score >= sr.score + cost_diff_lazy) {
2671
+ /* Ok, let's just write one byte for now and start a match from the
2672
+ next byte. */
2673
+ ++position;
2674
+ ++insert_length;
2675
+ sr = sr2;
2676
+ if (++delayed_backward_references_in_row < 4 &&
2677
+ position + FN(HashTypeLength)() < pos_end) {
2678
+ continue;
2679
+ }
2680
+ }
2681
+ break;
2682
+ }
2683
+ apply_random_heuristics =
2684
+ position + 2 * sr.len + random_heuristics_window_size;
2685
+ dictionary_start = BROTLI_MIN(size_t,
2686
+ position + position_offset, max_backward_limit);
2687
+ {
2688
+ /* The first 16 codes are special short-codes,
2689
+ and the minimum offset is 1. */
2690
+ size_t distance_code = ComputeDistanceCode(
2691
+ sr.distance, dictionary_start + gap, dist_cache);
2692
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
2693
+ dist_cache[3] = dist_cache[2];
2694
+ dist_cache[2] = dist_cache[1];
2695
+ dist_cache[1] = dist_cache[0];
2696
+ dist_cache[0] = (int)sr.distance;
2697
+ FN(PrepareDistanceCache)(privat, dist_cache);
2698
+ }
2699
+ InitCommand(commands++, &params->dist, insert_length,
2700
+ sr.len, sr.len_code_delta, distance_code);
2701
+ }
2702
+ *num_literals += insert_length;
2703
+ insert_length = 0;
2704
+ /* Put the hash keys into the table, if there are enough bytes left.
2705
+ Depending on the hasher implementation, it can push all positions
2706
+ in the given range or only a subset of them.
2707
+ Avoid hash poisoning with RLE data. */
2708
+ {
2709
+ size_t range_start = position + 2;
2710
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
2711
+ if (sr.distance < (sr.len >> 2)) {
2712
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
2713
+ range_start, position + sr.len - (sr.distance << 2)));
2714
+ }
2715
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
2716
+ range_end);
2717
+ }
2718
+ position += sr.len;
2719
+ } else {
2720
+ ++insert_length;
2721
+ ++position;
2722
+ /* If we have not seen matches for a long time, we can skip some
2723
+ match lookups. Unsuccessful match lookups are very very expensive
2724
+ and this kind of a heuristic speeds up compression quite
2725
+ a lot. */
2726
+ if (position > apply_random_heuristics) {
2727
+ /* Going through uncompressible data, jump. */
2728
+ if (position >
2729
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
2730
+ /* It is quite a long time since we saw a copy, so we assume
2731
+ that this data is not compressible, and store hashes less
2732
+ often. Hashes of non compressible data are less likely to
2733
+ turn out to be useful in the future, too, so we store less of
2734
+ them to not to flood out the hash table of good compressible
2735
+ data. */
2736
+ const size_t kMargin =
2737
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
2738
+ size_t pos_jump =
2739
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
2740
+ for (; position < pos_jump; position += 4) {
2741
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
2742
+ insert_length += 4;
2743
+ }
2744
+ } else {
2745
+ const size_t kMargin =
2746
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
2747
+ size_t pos_jump =
2748
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
2749
+ for (; position < pos_jump; position += 2) {
2750
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
2751
+ insert_length += 2;
2752
+ }
2753
+ }
2754
+ }
2755
+ }
2756
+ }
2757
+ insert_length += pos_end - position;
2758
+ *last_insert_len = insert_length;
2759
+ *num_commands += (size_t)(commands - orig_commands);
2760
+ }
2761
+ #undef HASHER
2762
+ #define HASHER() H40
2763
+ /* NOLINTNEXTLINE(build/include) */
2764
+ /* NOLINT(build/header_guard) */
2765
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2766
+
2767
+ Distributed under MIT license.
2768
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
2769
+ */
2770
+
2771
+ /* template parameters: EXPORT_FN, FN */
2772
+
2773
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
2774
+ size_t num_bytes, size_t position,
2775
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
2776
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
2777
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
2778
+ Command* commands, size_t* num_commands, size_t* num_literals) {
2779
+ HASHER()* privat = &hasher->privat.FN(_);
2780
+ /* Set maximum distance, see section 9.1. of the spec. */
2781
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
2782
+ const size_t position_offset = params->stream_offset;
2783
+
2784
+ const Command* const orig_commands = commands;
2785
+ size_t insert_length = *last_insert_len;
2786
+ const size_t pos_end = position + num_bytes;
2787
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
2788
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
2789
+
2790
+ /* For speed up heuristics for random data. */
2791
+ const size_t random_heuristics_window_size =
2792
+ LiteralSpreeLengthForSparseSearch(params);
2793
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
2794
+ const size_t gap = params->dictionary.compound.total_size;
2795
+
2796
+ /* Minimum score to accept a backward reference. */
2797
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
2798
+
2799
+ FN(PrepareDistanceCache)(privat, dist_cache);
2800
+
2801
+ while (position + FN(HashTypeLength)() < pos_end) {
2802
+ size_t max_length = pos_end - position;
2803
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
2804
+ size_t dictionary_start = BROTLI_MIN(size_t,
2805
+ position + position_offset, max_backward_limit);
2806
+ HasherSearchResult sr;
2807
+ int dict_id = 0;
2808
+ uint8_t p1 = 0;
2809
+ uint8_t p2 = 0;
2810
+ if (params->dictionary.contextual.context_based) {
2811
+ p1 = position >= 1 ?
2812
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
2813
+ p2 = position >= 2 ?
2814
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
2815
+ dict_id = params->dictionary.contextual.context_map[
2816
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
2817
+ }
2818
+ sr.len = 0;
2819
+ sr.len_code_delta = 0;
2820
+ sr.distance = 0;
2821
+ sr.score = kMinScore;
2822
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
2823
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
2824
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
2825
+ if (ENABLE_COMPOUND_DICTIONARY) {
2826
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
2827
+ ringbuffer_mask, dist_cache, position, max_length,
2828
+ dictionary_start, params->dist.max_distance, &sr);
2829
+ }
2830
+ if (sr.score > kMinScore) {
2831
+ /* Found a match. Let's look for something even better ahead. */
2832
+ int delayed_backward_references_in_row = 0;
2833
+ --max_length;
2834
+ for (;; --max_length) {
2835
+ const score_t cost_diff_lazy = 175;
2836
+ HasherSearchResult sr2;
2837
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
2838
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
2839
+ sr2.len_code_delta = 0;
2840
+ sr2.distance = 0;
2841
+ sr2.score = kMinScore;
2842
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
2843
+ dictionary_start = BROTLI_MIN(size_t,
2844
+ position + 1 + position_offset, max_backward_limit);
2845
+ if (params->dictionary.contextual.context_based) {
2846
+ p2 = p1;
2847
+ p1 = ringbuffer[position & ringbuffer_mask];
2848
+ dict_id = params->dictionary.contextual.context_map[
2849
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
2850
+ }
2851
+ FN(FindLongestMatch)(privat,
2852
+ params->dictionary.contextual.dict[dict_id],
2853
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
2854
+ max_distance, dictionary_start + gap, params->dist.max_distance,
2855
+ &sr2);
2856
+ if (ENABLE_COMPOUND_DICTIONARY) {
2857
+ LookupCompoundDictionaryMatch(
2858
+ &params->dictionary.compound, ringbuffer,
2859
+ ringbuffer_mask, dist_cache, position + 1, max_length,
2860
+ dictionary_start, params->dist.max_distance, &sr2);
2861
+ }
2862
+ if (sr2.score >= sr.score + cost_diff_lazy) {
2863
+ /* Ok, let's just write one byte for now and start a match from the
2864
+ next byte. */
2865
+ ++position;
2866
+ ++insert_length;
2867
+ sr = sr2;
2868
+ if (++delayed_backward_references_in_row < 4 &&
2869
+ position + FN(HashTypeLength)() < pos_end) {
2870
+ continue;
2871
+ }
2872
+ }
2873
+ break;
2874
+ }
2875
+ apply_random_heuristics =
2876
+ position + 2 * sr.len + random_heuristics_window_size;
2877
+ dictionary_start = BROTLI_MIN(size_t,
2878
+ position + position_offset, max_backward_limit);
2879
+ {
2880
+ /* The first 16 codes are special short-codes,
2881
+ and the minimum offset is 1. */
2882
+ size_t distance_code = ComputeDistanceCode(
2883
+ sr.distance, dictionary_start + gap, dist_cache);
2884
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
2885
+ dist_cache[3] = dist_cache[2];
2886
+ dist_cache[2] = dist_cache[1];
2887
+ dist_cache[1] = dist_cache[0];
2888
+ dist_cache[0] = (int)sr.distance;
2889
+ FN(PrepareDistanceCache)(privat, dist_cache);
2890
+ }
2891
+ InitCommand(commands++, &params->dist, insert_length,
2892
+ sr.len, sr.len_code_delta, distance_code);
2893
+ }
2894
+ *num_literals += insert_length;
2895
+ insert_length = 0;
2896
+ /* Put the hash keys into the table, if there are enough bytes left.
2897
+ Depending on the hasher implementation, it can push all positions
2898
+ in the given range or only a subset of them.
2899
+ Avoid hash poisoning with RLE data. */
2900
+ {
2901
+ size_t range_start = position + 2;
2902
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
2903
+ if (sr.distance < (sr.len >> 2)) {
2904
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
2905
+ range_start, position + sr.len - (sr.distance << 2)));
2906
+ }
2907
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
2908
+ range_end);
2909
+ }
2910
+ position += sr.len;
2911
+ } else {
2912
+ ++insert_length;
2913
+ ++position;
2914
+ /* If we have not seen matches for a long time, we can skip some
2915
+ match lookups. Unsuccessful match lookups are very very expensive
2916
+ and this kind of a heuristic speeds up compression quite
2917
+ a lot. */
2918
+ if (position > apply_random_heuristics) {
2919
+ /* Going through uncompressible data, jump. */
2920
+ if (position >
2921
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
2922
+ /* It is quite a long time since we saw a copy, so we assume
2923
+ that this data is not compressible, and store hashes less
2924
+ often. Hashes of non compressible data are less likely to
2925
+ turn out to be useful in the future, too, so we store less of
2926
+ them to not to flood out the hash table of good compressible
2927
+ data. */
2928
+ const size_t kMargin =
2929
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
2930
+ size_t pos_jump =
2931
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
2932
+ for (; position < pos_jump; position += 4) {
2933
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
2934
+ insert_length += 4;
2935
+ }
2936
+ } else {
2937
+ const size_t kMargin =
2938
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
2939
+ size_t pos_jump =
2940
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
2941
+ for (; position < pos_jump; position += 2) {
2942
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
2943
+ insert_length += 2;
2944
+ }
2945
+ }
2946
+ }
2947
+ }
2948
+ }
2949
+ insert_length += pos_end - position;
2950
+ *last_insert_len = insert_length;
2951
+ *num_commands += (size_t)(commands - orig_commands);
2952
+ }
2953
+ #undef HASHER
2954
+ #define HASHER() H41
2955
+ /* NOLINTNEXTLINE(build/include) */
2956
+ /* NOLINT(build/header_guard) */
2957
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2958
+
2959
+ Distributed under MIT license.
2960
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
2961
+ */
2962
+
2963
+ /* template parameters: EXPORT_FN, FN */
2964
+
2965
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
2966
+ size_t num_bytes, size_t position,
2967
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
2968
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
2969
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
2970
+ Command* commands, size_t* num_commands, size_t* num_literals) {
2971
+ HASHER()* privat = &hasher->privat.FN(_);
2972
+ /* Set maximum distance, see section 9.1. of the spec. */
2973
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
2974
+ const size_t position_offset = params->stream_offset;
2975
+
2976
+ const Command* const orig_commands = commands;
2977
+ size_t insert_length = *last_insert_len;
2978
+ const size_t pos_end = position + num_bytes;
2979
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
2980
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
2981
+
2982
+ /* For speed up heuristics for random data. */
2983
+ const size_t random_heuristics_window_size =
2984
+ LiteralSpreeLengthForSparseSearch(params);
2985
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
2986
+ const size_t gap = params->dictionary.compound.total_size;
2987
+
2988
+ /* Minimum score to accept a backward reference. */
2989
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
2990
+
2991
+ FN(PrepareDistanceCache)(privat, dist_cache);
2992
+
2993
+ while (position + FN(HashTypeLength)() < pos_end) {
2994
+ size_t max_length = pos_end - position;
2995
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
2996
+ size_t dictionary_start = BROTLI_MIN(size_t,
2997
+ position + position_offset, max_backward_limit);
2998
+ HasherSearchResult sr;
2999
+ int dict_id = 0;
3000
+ uint8_t p1 = 0;
3001
+ uint8_t p2 = 0;
3002
+ if (params->dictionary.contextual.context_based) {
3003
+ p1 = position >= 1 ?
3004
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
3005
+ p2 = position >= 2 ?
3006
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
3007
+ dict_id = params->dictionary.contextual.context_map[
3008
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
3009
+ }
3010
+ sr.len = 0;
3011
+ sr.len_code_delta = 0;
3012
+ sr.distance = 0;
3013
+ sr.score = kMinScore;
3014
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
3015
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
3016
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
3017
+ if (ENABLE_COMPOUND_DICTIONARY) {
3018
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
3019
+ ringbuffer_mask, dist_cache, position, max_length,
3020
+ dictionary_start, params->dist.max_distance, &sr);
3021
+ }
3022
+ if (sr.score > kMinScore) {
3023
+ /* Found a match. Let's look for something even better ahead. */
3024
+ int delayed_backward_references_in_row = 0;
3025
+ --max_length;
3026
+ for (;; --max_length) {
3027
+ const score_t cost_diff_lazy = 175;
3028
+ HasherSearchResult sr2;
3029
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
3030
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
3031
+ sr2.len_code_delta = 0;
3032
+ sr2.distance = 0;
3033
+ sr2.score = kMinScore;
3034
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
3035
+ dictionary_start = BROTLI_MIN(size_t,
3036
+ position + 1 + position_offset, max_backward_limit);
3037
+ if (params->dictionary.contextual.context_based) {
3038
+ p2 = p1;
3039
+ p1 = ringbuffer[position & ringbuffer_mask];
3040
+ dict_id = params->dictionary.contextual.context_map[
3041
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
3042
+ }
3043
+ FN(FindLongestMatch)(privat,
3044
+ params->dictionary.contextual.dict[dict_id],
3045
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
3046
+ max_distance, dictionary_start + gap, params->dist.max_distance,
3047
+ &sr2);
3048
+ if (ENABLE_COMPOUND_DICTIONARY) {
3049
+ LookupCompoundDictionaryMatch(
3050
+ &params->dictionary.compound, ringbuffer,
3051
+ ringbuffer_mask, dist_cache, position + 1, max_length,
3052
+ dictionary_start, params->dist.max_distance, &sr2);
3053
+ }
3054
+ if (sr2.score >= sr.score + cost_diff_lazy) {
3055
+ /* Ok, let's just write one byte for now and start a match from the
3056
+ next byte. */
3057
+ ++position;
3058
+ ++insert_length;
3059
+ sr = sr2;
3060
+ if (++delayed_backward_references_in_row < 4 &&
3061
+ position + FN(HashTypeLength)() < pos_end) {
3062
+ continue;
3063
+ }
3064
+ }
3065
+ break;
3066
+ }
3067
+ apply_random_heuristics =
3068
+ position + 2 * sr.len + random_heuristics_window_size;
3069
+ dictionary_start = BROTLI_MIN(size_t,
3070
+ position + position_offset, max_backward_limit);
3071
+ {
3072
+ /* The first 16 codes are special short-codes,
3073
+ and the minimum offset is 1. */
3074
+ size_t distance_code = ComputeDistanceCode(
3075
+ sr.distance, dictionary_start + gap, dist_cache);
3076
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
3077
+ dist_cache[3] = dist_cache[2];
3078
+ dist_cache[2] = dist_cache[1];
3079
+ dist_cache[1] = dist_cache[0];
3080
+ dist_cache[0] = (int)sr.distance;
3081
+ FN(PrepareDistanceCache)(privat, dist_cache);
3082
+ }
3083
+ InitCommand(commands++, &params->dist, insert_length,
3084
+ sr.len, sr.len_code_delta, distance_code);
3085
+ }
3086
+ *num_literals += insert_length;
3087
+ insert_length = 0;
3088
+ /* Put the hash keys into the table, if there are enough bytes left.
3089
+ Depending on the hasher implementation, it can push all positions
3090
+ in the given range or only a subset of them.
3091
+ Avoid hash poisoning with RLE data. */
3092
+ {
3093
+ size_t range_start = position + 2;
3094
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
3095
+ if (sr.distance < (sr.len >> 2)) {
3096
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
3097
+ range_start, position + sr.len - (sr.distance << 2)));
3098
+ }
3099
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
3100
+ range_end);
3101
+ }
3102
+ position += sr.len;
3103
+ } else {
3104
+ ++insert_length;
3105
+ ++position;
3106
+ /* If we have not seen matches for a long time, we can skip some
3107
+ match lookups. Unsuccessful match lookups are very very expensive
3108
+ and this kind of a heuristic speeds up compression quite
3109
+ a lot. */
3110
+ if (position > apply_random_heuristics) {
3111
+ /* Going through uncompressible data, jump. */
3112
+ if (position >
3113
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
3114
+ /* It is quite a long time since we saw a copy, so we assume
3115
+ that this data is not compressible, and store hashes less
3116
+ often. Hashes of non compressible data are less likely to
3117
+ turn out to be useful in the future, too, so we store less of
3118
+ them to not to flood out the hash table of good compressible
3119
+ data. */
3120
+ const size_t kMargin =
3121
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
3122
+ size_t pos_jump =
3123
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
3124
+ for (; position < pos_jump; position += 4) {
3125
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
3126
+ insert_length += 4;
3127
+ }
3128
+ } else {
3129
+ const size_t kMargin =
3130
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
3131
+ size_t pos_jump =
3132
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
3133
+ for (; position < pos_jump; position += 2) {
3134
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
3135
+ insert_length += 2;
3136
+ }
3137
+ }
3138
+ }
3139
+ }
3140
+ }
3141
+ insert_length += pos_end - position;
3142
+ *last_insert_len = insert_length;
3143
+ *num_commands += (size_t)(commands - orig_commands);
3144
+ }
3145
+ #undef HASHER
3146
+ #define HASHER() H42
3147
+ /* NOLINTNEXTLINE(build/include) */
3148
+ /* NOLINT(build/header_guard) */
3149
+ /* Copyright 2013 Google Inc. All Rights Reserved.
3150
+
3151
+ Distributed under MIT license.
3152
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
3153
+ */
3154
+
3155
+ /* template parameters: EXPORT_FN, FN */
3156
+
3157
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
3158
+ size_t num_bytes, size_t position,
3159
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
3160
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
3161
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
3162
+ Command* commands, size_t* num_commands, size_t* num_literals) {
3163
+ HASHER()* privat = &hasher->privat.FN(_);
3164
+ /* Set maximum distance, see section 9.1. of the spec. */
3165
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
3166
+ const size_t position_offset = params->stream_offset;
3167
+
3168
+ const Command* const orig_commands = commands;
3169
+ size_t insert_length = *last_insert_len;
3170
+ const size_t pos_end = position + num_bytes;
3171
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
3172
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
3173
+
3174
+ /* For speed up heuristics for random data. */
3175
+ const size_t random_heuristics_window_size =
3176
+ LiteralSpreeLengthForSparseSearch(params);
3177
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
3178
+ const size_t gap = params->dictionary.compound.total_size;
3179
+
3180
+ /* Minimum score to accept a backward reference. */
3181
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
3182
+
3183
+ FN(PrepareDistanceCache)(privat, dist_cache);
3184
+
3185
+ while (position + FN(HashTypeLength)() < pos_end) {
3186
+ size_t max_length = pos_end - position;
3187
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
3188
+ size_t dictionary_start = BROTLI_MIN(size_t,
3189
+ position + position_offset, max_backward_limit);
3190
+ HasherSearchResult sr;
3191
+ int dict_id = 0;
3192
+ uint8_t p1 = 0;
3193
+ uint8_t p2 = 0;
3194
+ if (params->dictionary.contextual.context_based) {
3195
+ p1 = position >= 1 ?
3196
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
3197
+ p2 = position >= 2 ?
3198
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
3199
+ dict_id = params->dictionary.contextual.context_map[
3200
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
3201
+ }
3202
+ sr.len = 0;
3203
+ sr.len_code_delta = 0;
3204
+ sr.distance = 0;
3205
+ sr.score = kMinScore;
3206
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
3207
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
3208
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
3209
+ if (ENABLE_COMPOUND_DICTIONARY) {
3210
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
3211
+ ringbuffer_mask, dist_cache, position, max_length,
3212
+ dictionary_start, params->dist.max_distance, &sr);
3213
+ }
3214
+ if (sr.score > kMinScore) {
3215
+ /* Found a match. Let's look for something even better ahead. */
3216
+ int delayed_backward_references_in_row = 0;
3217
+ --max_length;
3218
+ for (;; --max_length) {
3219
+ const score_t cost_diff_lazy = 175;
3220
+ HasherSearchResult sr2;
3221
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
3222
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
3223
+ sr2.len_code_delta = 0;
3224
+ sr2.distance = 0;
3225
+ sr2.score = kMinScore;
3226
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
3227
+ dictionary_start = BROTLI_MIN(size_t,
3228
+ position + 1 + position_offset, max_backward_limit);
3229
+ if (params->dictionary.contextual.context_based) {
3230
+ p2 = p1;
3231
+ p1 = ringbuffer[position & ringbuffer_mask];
3232
+ dict_id = params->dictionary.contextual.context_map[
3233
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
3234
+ }
3235
+ FN(FindLongestMatch)(privat,
3236
+ params->dictionary.contextual.dict[dict_id],
3237
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
3238
+ max_distance, dictionary_start + gap, params->dist.max_distance,
3239
+ &sr2);
3240
+ if (ENABLE_COMPOUND_DICTIONARY) {
3241
+ LookupCompoundDictionaryMatch(
3242
+ &params->dictionary.compound, ringbuffer,
3243
+ ringbuffer_mask, dist_cache, position + 1, max_length,
3244
+ dictionary_start, params->dist.max_distance, &sr2);
3245
+ }
3246
+ if (sr2.score >= sr.score + cost_diff_lazy) {
3247
+ /* Ok, let's just write one byte for now and start a match from the
3248
+ next byte. */
3249
+ ++position;
3250
+ ++insert_length;
3251
+ sr = sr2;
3252
+ if (++delayed_backward_references_in_row < 4 &&
3253
+ position + FN(HashTypeLength)() < pos_end) {
3254
+ continue;
3255
+ }
3256
+ }
3257
+ break;
3258
+ }
3259
+ apply_random_heuristics =
3260
+ position + 2 * sr.len + random_heuristics_window_size;
3261
+ dictionary_start = BROTLI_MIN(size_t,
3262
+ position + position_offset, max_backward_limit);
3263
+ {
3264
+ /* The first 16 codes are special short-codes,
3265
+ and the minimum offset is 1. */
3266
+ size_t distance_code = ComputeDistanceCode(
3267
+ sr.distance, dictionary_start + gap, dist_cache);
3268
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
3269
+ dist_cache[3] = dist_cache[2];
3270
+ dist_cache[2] = dist_cache[1];
3271
+ dist_cache[1] = dist_cache[0];
3272
+ dist_cache[0] = (int)sr.distance;
3273
+ FN(PrepareDistanceCache)(privat, dist_cache);
3274
+ }
3275
+ InitCommand(commands++, &params->dist, insert_length,
3276
+ sr.len, sr.len_code_delta, distance_code);
3277
+ }
3278
+ *num_literals += insert_length;
3279
+ insert_length = 0;
3280
+ /* Put the hash keys into the table, if there are enough bytes left.
3281
+ Depending on the hasher implementation, it can push all positions
3282
+ in the given range or only a subset of them.
3283
+ Avoid hash poisoning with RLE data. */
3284
+ {
3285
+ size_t range_start = position + 2;
3286
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
3287
+ if (sr.distance < (sr.len >> 2)) {
3288
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
3289
+ range_start, position + sr.len - (sr.distance << 2)));
3290
+ }
3291
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
3292
+ range_end);
3293
+ }
3294
+ position += sr.len;
3295
+ } else {
3296
+ ++insert_length;
3297
+ ++position;
3298
+ /* If we have not seen matches for a long time, we can skip some
3299
+ match lookups. Unsuccessful match lookups are very very expensive
3300
+ and this kind of a heuristic speeds up compression quite
3301
+ a lot. */
3302
+ if (position > apply_random_heuristics) {
3303
+ /* Going through uncompressible data, jump. */
3304
+ if (position >
3305
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
3306
+ /* It is quite a long time since we saw a copy, so we assume
3307
+ that this data is not compressible, and store hashes less
3308
+ often. Hashes of non compressible data are less likely to
3309
+ turn out to be useful in the future, too, so we store less of
3310
+ them to not to flood out the hash table of good compressible
3311
+ data. */
3312
+ const size_t kMargin =
3313
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
3314
+ size_t pos_jump =
3315
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
3316
+ for (; position < pos_jump; position += 4) {
3317
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
3318
+ insert_length += 4;
3319
+ }
3320
+ } else {
3321
+ const size_t kMargin =
3322
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
3323
+ size_t pos_jump =
3324
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
3325
+ for (; position < pos_jump; position += 2) {
3326
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
3327
+ insert_length += 2;
3328
+ }
3329
+ }
3330
+ }
3331
+ }
3332
+ }
3333
+ insert_length += pos_end - position;
3334
+ *last_insert_len = insert_length;
3335
+ *num_commands += (size_t)(commands - orig_commands);
3336
+ }
3337
+ #undef HASHER
3338
+ #define HASHER() H55
3339
+ /* NOLINTNEXTLINE(build/include) */
3340
+ /* NOLINT(build/header_guard) */
3341
+ /* Copyright 2013 Google Inc. All Rights Reserved.
3342
+
3343
+ Distributed under MIT license.
3344
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
3345
+ */
3346
+
3347
+ /* template parameters: EXPORT_FN, FN */
3348
+
3349
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
3350
+ size_t num_bytes, size_t position,
3351
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
3352
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
3353
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
3354
+ Command* commands, size_t* num_commands, size_t* num_literals) {
3355
+ HASHER()* privat = &hasher->privat.FN(_);
3356
+ /* Set maximum distance, see section 9.1. of the spec. */
3357
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
3358
+ const size_t position_offset = params->stream_offset;
3359
+
3360
+ const Command* const orig_commands = commands;
3361
+ size_t insert_length = *last_insert_len;
3362
+ const size_t pos_end = position + num_bytes;
3363
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
3364
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
3365
+
3366
+ /* For speed up heuristics for random data. */
3367
+ const size_t random_heuristics_window_size =
3368
+ LiteralSpreeLengthForSparseSearch(params);
3369
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
3370
+ const size_t gap = params->dictionary.compound.total_size;
3371
+
3372
+ /* Minimum score to accept a backward reference. */
3373
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
3374
+
3375
+ FN(PrepareDistanceCache)(privat, dist_cache);
3376
+
3377
+ while (position + FN(HashTypeLength)() < pos_end) {
3378
+ size_t max_length = pos_end - position;
3379
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
3380
+ size_t dictionary_start = BROTLI_MIN(size_t,
3381
+ position + position_offset, max_backward_limit);
3382
+ HasherSearchResult sr;
3383
+ int dict_id = 0;
3384
+ uint8_t p1 = 0;
3385
+ uint8_t p2 = 0;
3386
+ if (params->dictionary.contextual.context_based) {
3387
+ p1 = position >= 1 ?
3388
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
3389
+ p2 = position >= 2 ?
3390
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
3391
+ dict_id = params->dictionary.contextual.context_map[
3392
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
3393
+ }
3394
+ sr.len = 0;
3395
+ sr.len_code_delta = 0;
3396
+ sr.distance = 0;
3397
+ sr.score = kMinScore;
3398
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
3399
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
3400
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
3401
+ if (ENABLE_COMPOUND_DICTIONARY) {
3402
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
3403
+ ringbuffer_mask, dist_cache, position, max_length,
3404
+ dictionary_start, params->dist.max_distance, &sr);
3405
+ }
3406
+ if (sr.score > kMinScore) {
3407
+ /* Found a match. Let's look for something even better ahead. */
3408
+ int delayed_backward_references_in_row = 0;
3409
+ --max_length;
3410
+ for (;; --max_length) {
3411
+ const score_t cost_diff_lazy = 175;
3412
+ HasherSearchResult sr2;
3413
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
3414
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
3415
+ sr2.len_code_delta = 0;
3416
+ sr2.distance = 0;
3417
+ sr2.score = kMinScore;
3418
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
3419
+ dictionary_start = BROTLI_MIN(size_t,
3420
+ position + 1 + position_offset, max_backward_limit);
3421
+ if (params->dictionary.contextual.context_based) {
3422
+ p2 = p1;
3423
+ p1 = ringbuffer[position & ringbuffer_mask];
3424
+ dict_id = params->dictionary.contextual.context_map[
3425
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
3426
+ }
3427
+ FN(FindLongestMatch)(privat,
3428
+ params->dictionary.contextual.dict[dict_id],
3429
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
3430
+ max_distance, dictionary_start + gap, params->dist.max_distance,
3431
+ &sr2);
3432
+ if (ENABLE_COMPOUND_DICTIONARY) {
3433
+ LookupCompoundDictionaryMatch(
3434
+ &params->dictionary.compound, ringbuffer,
3435
+ ringbuffer_mask, dist_cache, position + 1, max_length,
3436
+ dictionary_start, params->dist.max_distance, &sr2);
3437
+ }
3438
+ if (sr2.score >= sr.score + cost_diff_lazy) {
3439
+ /* Ok, let's just write one byte for now and start a match from the
3440
+ next byte. */
3441
+ ++position;
3442
+ ++insert_length;
3443
+ sr = sr2;
3444
+ if (++delayed_backward_references_in_row < 4 &&
3445
+ position + FN(HashTypeLength)() < pos_end) {
3446
+ continue;
3447
+ }
3448
+ }
3449
+ break;
3450
+ }
3451
+ apply_random_heuristics =
3452
+ position + 2 * sr.len + random_heuristics_window_size;
3453
+ dictionary_start = BROTLI_MIN(size_t,
3454
+ position + position_offset, max_backward_limit);
3455
+ {
3456
+ /* The first 16 codes are special short-codes,
3457
+ and the minimum offset is 1. */
3458
+ size_t distance_code = ComputeDistanceCode(
3459
+ sr.distance, dictionary_start + gap, dist_cache);
3460
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
3461
+ dist_cache[3] = dist_cache[2];
3462
+ dist_cache[2] = dist_cache[1];
3463
+ dist_cache[1] = dist_cache[0];
3464
+ dist_cache[0] = (int)sr.distance;
3465
+ FN(PrepareDistanceCache)(privat, dist_cache);
3466
+ }
3467
+ InitCommand(commands++, &params->dist, insert_length,
3468
+ sr.len, sr.len_code_delta, distance_code);
3469
+ }
3470
+ *num_literals += insert_length;
3471
+ insert_length = 0;
3472
+ /* Put the hash keys into the table, if there are enough bytes left.
3473
+ Depending on the hasher implementation, it can push all positions
3474
+ in the given range or only a subset of them.
3475
+ Avoid hash poisoning with RLE data. */
3476
+ {
3477
+ size_t range_start = position + 2;
3478
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
3479
+ if (sr.distance < (sr.len >> 2)) {
3480
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
3481
+ range_start, position + sr.len - (sr.distance << 2)));
3482
+ }
3483
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
3484
+ range_end);
3485
+ }
3486
+ position += sr.len;
3487
+ } else {
3488
+ ++insert_length;
3489
+ ++position;
3490
+ /* If we have not seen matches for a long time, we can skip some
3491
+ match lookups. Unsuccessful match lookups are very very expensive
3492
+ and this kind of a heuristic speeds up compression quite
3493
+ a lot. */
3494
+ if (position > apply_random_heuristics) {
3495
+ /* Going through uncompressible data, jump. */
3496
+ if (position >
3497
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
3498
+ /* It is quite a long time since we saw a copy, so we assume
3499
+ that this data is not compressible, and store hashes less
3500
+ often. Hashes of non compressible data are less likely to
3501
+ turn out to be useful in the future, too, so we store less of
3502
+ them to not to flood out the hash table of good compressible
3503
+ data. */
3504
+ const size_t kMargin =
3505
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
3506
+ size_t pos_jump =
3507
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
3508
+ for (; position < pos_jump; position += 4) {
3509
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
3510
+ insert_length += 4;
3511
+ }
3512
+ } else {
3513
+ const size_t kMargin =
3514
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
3515
+ size_t pos_jump =
3516
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
3517
+ for (; position < pos_jump; position += 2) {
3518
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
3519
+ insert_length += 2;
3520
+ }
3521
+ }
3522
+ }
3523
+ }
3524
+ }
3525
+ insert_length += pos_end - position;
3526
+ *last_insert_len = insert_length;
3527
+ *num_commands += (size_t)(commands - orig_commands);
3528
+ }
3529
+ #undef HASHER
3530
+ #define HASHER() H65
3531
+ /* NOLINTNEXTLINE(build/include) */
3532
+ /* NOLINT(build/header_guard) */
3533
+ /* Copyright 2013 Google Inc. All Rights Reserved.
3534
+
3535
+ Distributed under MIT license.
3536
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
3537
+ */
3538
+
3539
+ /* template parameters: EXPORT_FN, FN */
3540
+
3541
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
3542
+ size_t num_bytes, size_t position,
3543
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
3544
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
3545
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
3546
+ Command* commands, size_t* num_commands, size_t* num_literals) {
3547
+ HASHER()* privat = &hasher->privat.FN(_);
3548
+ /* Set maximum distance, see section 9.1. of the spec. */
3549
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
3550
+ const size_t position_offset = params->stream_offset;
3551
+
3552
+ const Command* const orig_commands = commands;
3553
+ size_t insert_length = *last_insert_len;
3554
+ const size_t pos_end = position + num_bytes;
3555
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
3556
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
3557
+
3558
+ /* For speed up heuristics for random data. */
3559
+ const size_t random_heuristics_window_size =
3560
+ LiteralSpreeLengthForSparseSearch(params);
3561
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
3562
+ const size_t gap = params->dictionary.compound.total_size;
3563
+
3564
+ /* Minimum score to accept a backward reference. */
3565
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
3566
+
3567
+ FN(PrepareDistanceCache)(privat, dist_cache);
3568
+
3569
+ while (position + FN(HashTypeLength)() < pos_end) {
3570
+ size_t max_length = pos_end - position;
3571
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
3572
+ size_t dictionary_start = BROTLI_MIN(size_t,
3573
+ position + position_offset, max_backward_limit);
3574
+ HasherSearchResult sr;
3575
+ int dict_id = 0;
3576
+ uint8_t p1 = 0;
3577
+ uint8_t p2 = 0;
3578
+ if (params->dictionary.contextual.context_based) {
3579
+ p1 = position >= 1 ?
3580
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
3581
+ p2 = position >= 2 ?
3582
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
3583
+ dict_id = params->dictionary.contextual.context_map[
3584
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
3585
+ }
3586
+ sr.len = 0;
3587
+ sr.len_code_delta = 0;
3588
+ sr.distance = 0;
3589
+ sr.score = kMinScore;
3590
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
3591
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
3592
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
3593
+ if (ENABLE_COMPOUND_DICTIONARY) {
3594
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
3595
+ ringbuffer_mask, dist_cache, position, max_length,
3596
+ dictionary_start, params->dist.max_distance, &sr);
3597
+ }
3598
+ if (sr.score > kMinScore) {
3599
+ /* Found a match. Let's look for something even better ahead. */
3600
+ int delayed_backward_references_in_row = 0;
3601
+ --max_length;
3602
+ for (;; --max_length) {
3603
+ const score_t cost_diff_lazy = 175;
3604
+ HasherSearchResult sr2;
3605
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
3606
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
3607
+ sr2.len_code_delta = 0;
3608
+ sr2.distance = 0;
3609
+ sr2.score = kMinScore;
3610
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
3611
+ dictionary_start = BROTLI_MIN(size_t,
3612
+ position + 1 + position_offset, max_backward_limit);
3613
+ if (params->dictionary.contextual.context_based) {
3614
+ p2 = p1;
3615
+ p1 = ringbuffer[position & ringbuffer_mask];
3616
+ dict_id = params->dictionary.contextual.context_map[
3617
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
3618
+ }
3619
+ FN(FindLongestMatch)(privat,
3620
+ params->dictionary.contextual.dict[dict_id],
3621
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
3622
+ max_distance, dictionary_start + gap, params->dist.max_distance,
3623
+ &sr2);
3624
+ if (ENABLE_COMPOUND_DICTIONARY) {
3625
+ LookupCompoundDictionaryMatch(
3626
+ &params->dictionary.compound, ringbuffer,
3627
+ ringbuffer_mask, dist_cache, position + 1, max_length,
3628
+ dictionary_start, params->dist.max_distance, &sr2);
3629
+ }
3630
+ if (sr2.score >= sr.score + cost_diff_lazy) {
3631
+ /* Ok, let's just write one byte for now and start a match from the
3632
+ next byte. */
3633
+ ++position;
3634
+ ++insert_length;
3635
+ sr = sr2;
3636
+ if (++delayed_backward_references_in_row < 4 &&
3637
+ position + FN(HashTypeLength)() < pos_end) {
3638
+ continue;
3639
+ }
3640
+ }
3641
+ break;
3642
+ }
3643
+ apply_random_heuristics =
3644
+ position + 2 * sr.len + random_heuristics_window_size;
3645
+ dictionary_start = BROTLI_MIN(size_t,
3646
+ position + position_offset, max_backward_limit);
3647
+ {
3648
+ /* The first 16 codes are special short-codes,
3649
+ and the minimum offset is 1. */
3650
+ size_t distance_code = ComputeDistanceCode(
3651
+ sr.distance, dictionary_start + gap, dist_cache);
3652
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
3653
+ dist_cache[3] = dist_cache[2];
3654
+ dist_cache[2] = dist_cache[1];
3655
+ dist_cache[1] = dist_cache[0];
3656
+ dist_cache[0] = (int)sr.distance;
3657
+ FN(PrepareDistanceCache)(privat, dist_cache);
3658
+ }
3659
+ InitCommand(commands++, &params->dist, insert_length,
3660
+ sr.len, sr.len_code_delta, distance_code);
3661
+ }
3662
+ *num_literals += insert_length;
3663
+ insert_length = 0;
3664
+ /* Put the hash keys into the table, if there are enough bytes left.
3665
+ Depending on the hasher implementation, it can push all positions
3666
+ in the given range or only a subset of them.
3667
+ Avoid hash poisoning with RLE data. */
3668
+ {
3669
+ size_t range_start = position + 2;
3670
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
3671
+ if (sr.distance < (sr.len >> 2)) {
3672
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
3673
+ range_start, position + sr.len - (sr.distance << 2)));
3674
+ }
3675
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
3676
+ range_end);
3677
+ }
3678
+ position += sr.len;
3679
+ } else {
3680
+ ++insert_length;
3681
+ ++position;
3682
+ /* If we have not seen matches for a long time, we can skip some
3683
+ match lookups. Unsuccessful match lookups are very very expensive
3684
+ and this kind of a heuristic speeds up compression quite
3685
+ a lot. */
3686
+ if (position > apply_random_heuristics) {
3687
+ /* Going through uncompressible data, jump. */
3688
+ if (position >
3689
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
3690
+ /* It is quite a long time since we saw a copy, so we assume
3691
+ that this data is not compressible, and store hashes less
3692
+ often. Hashes of non compressible data are less likely to
3693
+ turn out to be useful in the future, too, so we store less of
3694
+ them to not to flood out the hash table of good compressible
3695
+ data. */
3696
+ const size_t kMargin =
3697
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
3698
+ size_t pos_jump =
3699
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
3700
+ for (; position < pos_jump; position += 4) {
3701
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
3702
+ insert_length += 4;
3703
+ }
3704
+ } else {
3705
+ const size_t kMargin =
3706
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
3707
+ size_t pos_jump =
3708
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
3709
+ for (; position < pos_jump; position += 2) {
3710
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
3711
+ insert_length += 2;
3712
+ }
3713
+ }
3714
+ }
3715
+ }
3716
+ }
3717
+ insert_length += pos_end - position;
3718
+ *last_insert_len = insert_length;
3719
+ *num_commands += (size_t)(commands - orig_commands);
3720
+ }
3721
+ #undef HASHER
3722
+
3723
+ #undef ENABLE_COMPOUND_DICTIONARY
3724
+ #undef PREFIX
3725
+
3726
+ #undef EXPORT_FN
3727
+ #undef FN
3728
+ #undef CAT
3729
+ #undef EXPAND_CAT
3730
+
3731
+ void duckdb_brotli::BrotliCreateBackwardReferences(size_t num_bytes,
3732
+ size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
3733
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
3734
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
3735
+ Command* commands, size_t* num_commands, size_t* num_literals) {
3736
+ if (params->dictionary.compound.num_chunks != 0) {
3737
+ switch (params->hasher.type) {
3738
+ #define CASE_(N) \
3739
+ case N: \
3740
+ CreateBackwardReferencesDH ## N(num_bytes, \
3741
+ position, ringbuffer, ringbuffer_mask, \
3742
+ literal_context_lut, params, hasher, dist_cache, \
3743
+ last_insert_len, commands, num_commands, num_literals); \
3744
+ return;
3745
+ CASE_(5)
3746
+ CASE_(6)
3747
+ CASE_(40)
3748
+ CASE_(41)
3749
+ CASE_(42)
3750
+ CASE_(55)
3751
+ CASE_(65)
3752
+ #undef CASE_
3753
+ default:
3754
+ BROTLI_DCHECK(false);
3755
+ break;
3756
+ }
3757
+ }
3758
+
3759
+ switch (params->hasher.type) {
3760
+ #define CASE_(N) \
3761
+ case N: \
3762
+ CreateBackwardReferencesNH ## N(num_bytes, \
3763
+ position, ringbuffer, ringbuffer_mask, \
3764
+ literal_context_lut, params, hasher, dist_cache, \
3765
+ last_insert_len, commands, num_commands, num_literals); \
3766
+ return;
3767
+ FOR_GENERIC_HASHERS(CASE_)
3768
+ #undef CASE_
3769
+ default:
3770
+ BROTLI_DCHECK(false);
3771
+ break;
3772
+ }
3773
+ }
3774
+
3775
+