html-to-markdown 2.24.6 → 2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  6. data/lib/html_to_markdown/version.rb +1 -1
  7. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  8. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  9. data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
  10. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  11. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
  12. data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
  13. data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
  14. data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
  15. data/rust-vendor/memmap2/CHANGELOG.md +8 -0
  16. data/rust-vendor/memmap2/Cargo.lock +1 -1
  17. data/rust-vendor/memmap2/Cargo.toml +2 -1
  18. data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
  19. data/rust-vendor/memmap2/src/lib.rs +25 -1
  20. data/rust-vendor/memmap2/src/stub.rs +1 -4
  21. data/rust-vendor/memmap2/src/unix.rs +14 -1
  22. data/rust-vendor/png/.cargo-checksum.json +1 -1
  23. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  24. data/rust-vendor/png/CHANGES.md +44 -0
  25. data/rust-vendor/png/Cargo.lock +124 -171
  26. data/rust-vendor/png/Cargo.toml +1 -1
  27. data/rust-vendor/png/Cargo.toml.orig +1 -1
  28. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  29. data/rust-vendor/png/benches/unfilter.rs +3 -3
  30. data/rust-vendor/png/src/adam7.rs +17 -10
  31. data/rust-vendor/png/src/common.rs +8 -8
  32. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  33. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  34. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  35. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  36. data/rust-vendor/png/src/encoder.rs +4 -2
  37. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  38. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  39. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  40. data/rust-vendor/png/src/filter/simd.rs +308 -0
  41. data/rust-vendor/png/src/lib.rs +1 -0
  42. data/rust-vendor/syn/.cargo-checksum.json +1 -1
  43. data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
  44. data/rust-vendor/syn/Cargo.lock +40 -41
  45. data/rust-vendor/syn/Cargo.toml +1 -1
  46. data/rust-vendor/syn/Cargo.toml.orig +1 -1
  47. data/rust-vendor/syn/src/item.rs +61 -40
  48. data/rust-vendor/syn/src/lib.rs +2 -1
  49. data/rust-vendor/syn/tests/test_item.rs +54 -0
  50. data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
  51. data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
  52. data/rust-vendor/unicode-ident/Cargo.lock +21 -21
  53. data/rust-vendor/unicode-ident/Cargo.toml +1 -1
  54. data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
  55. data/rust-vendor/unicode-ident/src/lib.rs +1 -1
  56. data/rust-vendor/unicode-ident/src/tables.rs +87 -97
  57. data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
  58. metadata +7 -177
  59. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  60. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  61. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  62. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  63. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  64. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  65. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  66. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  67. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  68. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  69. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  70. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  71. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  72. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  153. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  154. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  155. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  156. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  157. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  158. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  159. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  160. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  161. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  162. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  163. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  164. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  165. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  166. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  167. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  168. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  169. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  170. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  171. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  172. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  173. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  174. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  175. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  176. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  177. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  178. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  179. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  180. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  181. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  182. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  183. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  184. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  185. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  186. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  187. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  188. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  189. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  190. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  191. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  192. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  193. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  194. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  195. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  196. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  197. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  198. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  199. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  200. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  201. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  202. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  203. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  204. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  205. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  206. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  207. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  208. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  209. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  210. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  211. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  212. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  213. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  214. data/rust-vendor/xml5ever/README.md +0 -72
  215. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  216. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  217. data/rust-vendor/xml5ever/examples/README.md +0 -223
  218. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  219. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  220. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  221. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  222. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  223. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  224. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  225. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  226. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  227. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  228. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  229. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  230. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  231. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -8,12 +8,7 @@ use crate::Info;
8
8
  pub(crate) struct UnfilteringBuffer {
9
9
  /// Vec containing the uncompressed image data currently being processed.
10
10
  data_stream: Vec<u8>,
11
- /// Index in `data_stream` where the previous row starts.
12
- /// This excludes the filter type byte - it points at the first byte of actual pixel data.
13
- /// The pixel data is already-`unfilter`-ed.
14
- ///
15
- /// If `prev_start == current_start` then it means that there is no previous row.
16
- prev_start: usize,
11
+ prev_row: PrevRow,
17
12
  /// Index in `data_stream` where the current row starts.
18
13
  /// This points at the filter type byte of the current row (i.e. the actual pixel data starts at `current_start + 1`)
19
14
  /// The pixel data is not-yet-`unfilter`-ed.
@@ -26,6 +21,10 @@ pub(crate) struct UnfilteringBuffer {
26
21
  available: usize,
27
22
  /// The number of bytes before we shift the buffer back.
28
23
  shift_back_limit: usize,
24
+ /// How many bytes are left to decompress into this buffer for the current frame.
25
+ remaining_bytes: u64,
26
+ /// To avoid always allocating a new vector in `fn unfilter_curr_row_using_scratch_buffer`.
27
+ scratch_buffer: Vec<u8>,
29
28
  }
30
29
 
31
30
  impl UnfilteringBuffer {
@@ -34,9 +33,14 @@ impl UnfilteringBuffer {
34
33
  /// Asserts in debug builds that all the invariants hold. No-op in release
35
34
  /// builds. Intended to be called after creating or mutating `self` to
36
35
  /// ensure that the final state preserves the invariants.
36
+ #[cfg(not(debug_assertions))]
37
+ fn debug_assert_invariants(&self) {}
38
+ #[cfg(debug_assertions)]
37
39
  fn debug_assert_invariants(&self) {
38
- debug_assert!(self.prev_start <= self.current_start);
39
- debug_assert!(self.current_start <= self.available);
40
+ if let PrevRow::InPlace(prev_start) = &self.prev_row {
41
+ debug_assert!(*prev_start <= self.current_start);
42
+ }
43
+ debug_assert!(self.current_start <= self.filled);
40
44
  debug_assert!(self.available <= self.filled);
41
45
  debug_assert!(self.filled <= self.data_stream.len());
42
46
  }
@@ -87,11 +91,13 @@ impl UnfilteringBuffer {
87
91
 
88
92
  let result = Self {
89
93
  data_stream: Vec::with_capacity(data_stream_capacity),
90
- prev_start: 0,
94
+ prev_row: PrevRow::None,
91
95
  current_start: 0,
92
96
  filled: 0,
93
97
  available: 0,
94
98
  shift_back_limit,
99
+ remaining_bytes: u64::MAX,
100
+ scratch_buffer: Vec::new(),
95
101
  };
96
102
 
97
103
  result.debug_assert_invariants();
@@ -101,38 +107,72 @@ impl UnfilteringBuffer {
101
107
  /// Called to indicate that there is no previous row (e.g. when the current
102
108
  /// row is the first scanline of a given Adam7 pass).
103
109
  pub fn reset_prev_row(&mut self) {
104
- self.prev_start = self.current_start;
110
+ // Stash a previously allocated buffer (for potential reuse later)
111
+ // rather than throwing it away when resetting `self.prev_row`.
112
+ if let PrevRow::Scratch(buf) = &mut self.prev_row {
113
+ self.scratch_buffer = std::mem::take(buf);
114
+ }
115
+
116
+ self.prev_row = PrevRow::None;
105
117
  self.debug_assert_invariants();
106
118
  }
107
119
 
108
- pub fn reset_all(&mut self) {
120
+ pub fn start_frame(&mut self, frame_bytes: u64) {
109
121
  self.data_stream.clear();
110
- self.prev_start = 0;
122
+ self.prev_row = PrevRow::None;
111
123
  self.current_start = 0;
112
124
  self.filled = 0;
113
125
  self.available = 0;
126
+ self.remaining_bytes = frame_bytes;
127
+ }
128
+
129
+ pub fn remaining_bytes(&self) -> u64 {
130
+ self.remaining_bytes
114
131
  }
115
132
 
116
133
  /// Returns the previous (already `unfilter`-ed) row.
117
134
  pub fn prev_row(&self) -> &[u8] {
118
- &self.data_stream[self.prev_start..self.current_start]
135
+ self.prev_row
136
+ .as_slice(&self.data_stream[..self.current_start])
137
+ }
138
+
139
+ /// Returns how many bytes of the current row are present in the mutable
140
+ /// part of the buffer (32kB most recently decompressed bytes are read-only
141
+ /// to retain the "lookback" window as needed for inflate algorithm). If a
142
+ /// full row is mutable, then it may be unfiltered using
143
+ /// `unfilter_curr_row_in_place`.
144
+ ///
145
+ /// See also `readable_len_of_curr_row`.
146
+ pub fn mutable_len_of_curr_row(&self) -> usize {
147
+ self.available.saturating_sub(self.current_start)
119
148
  }
120
149
 
121
- /// Returns how many bytes of the current row are present in the buffer.
122
- pub fn curr_row_len(&self) -> usize {
123
- self.available - self.current_start
150
+ /// Returns how many bytes of the current row have been already
151
+ /// decompressed. If a full row is available, then it may be unfiltered
152
+ /// using `unfilter_curr_row_using_scratch_buffer`.
153
+ ///
154
+ /// See also `mutable_len_of_curr_row`.
155
+ pub fn readable_len_of_curr_row(&self) -> usize {
156
+ self.filled - self.current_start
124
157
  }
125
158
 
126
- /// Returns a `&mut Vec<u8>` suitable for passing to
127
- /// `ReadDecoder.decode_image_data` or `StreamingDecoder.update`.
159
+ /// Runs `f` on the underlying buffer.
128
160
  ///
129
161
  /// Invariants of `self` depend on the assumption that the caller will only
130
162
  /// append new bytes to the returned vector (which is indeed the behavior of
131
163
  /// `ReadDecoder` and `StreamingDecoder`). TODO: Consider protecting the
132
164
  /// invariants by returning an append-only view of the vector
133
165
  /// (`FnMut(&[u8])`??? or maybe `std::io::Write`???).
134
- pub fn as_unfilled_buffer(&mut self) -> UnfilterBuf<'_> {
135
- if self.prev_start >= self.shift_back_limit
166
+ pub fn with_unfilled_buffer<F, T>(&mut self, f: F) -> T
167
+ where
168
+ F: FnOnce(&mut UnfilterBuf<'_>) -> T,
169
+ {
170
+ // Potentially shift the buffer left to avoid unbounded growth.
171
+ let discard_size = self.available.min(match &self.prev_row {
172
+ PrevRow::None | PrevRow::Scratch(_) => self.current_start,
173
+ PrevRow::InPlace(prev_start) => *prev_start,
174
+ });
175
+ if discard_size >= self.shift_back_limit
136
176
  // Avoid the shift back if the buffer is still very empty. Consider how we got here: a
137
177
  // previous decompression filled the buffer, then we unfiltered, we're now refilling
138
178
  // the buffer again. The condition implies, the previous decompression filled at most
@@ -140,62 +180,140 @@ impl UnfilteringBuffer {
140
180
  // attempt will not yet be limited by the buffer length.
141
181
  && self.filled >= self.data_stream.len() / 2
142
182
  {
143
- // We have to relocate the data to the start of the buffer. Benchmarking suggests that
144
- // the codegen for an unbounded range is better / different than the one for a bounded
145
- // range. We prefer the former if the data overhead is not too high. `16` was
146
- // determined experimentally and might be system (memory) dependent. There's also the
147
- // question if we could be a little smarter and avoid crossing page boundaries when
148
- // that is not required. Alas, microbenchmarking TBD.
149
- if let Some(16..) = self.data_stream.len().checked_sub(self.filled) {
150
- self.data_stream
151
- .copy_within(self.prev_start..self.filled, 0);
152
- } else {
153
- self.data_stream.copy_within(self.prev_start.., 0);
154
- }
155
-
156
- // The data kept its relative position to `filled` which now lands exactly at
157
- // the distance between prev_start and filled.
158
- self.current_start -= self.prev_start;
159
- self.available -= self.prev_start;
160
- self.filled -= self.prev_start;
161
- self.prev_start = 0;
183
+ self.shift_buffer_left(discard_size);
162
184
  }
163
185
 
164
186
  if self.filled + Self::GROWTH_BYTES > self.data_stream.len() {
165
187
  self.data_stream.resize(self.filled + Self::GROWTH_BYTES, 0);
166
188
  }
167
189
 
168
- UnfilterBuf {
190
+ if self.remaining_bytes < usize::MAX as u64
191
+ && self.filled.saturating_add(self.remaining_bytes as usize) < self.data_stream.len()
192
+ {
193
+ self.data_stream
194
+ .resize(self.filled + self.remaining_bytes as usize, 0);
195
+ }
196
+
197
+ let old_filled = self.filled;
198
+ let ret = f(&mut UnfilterBuf {
169
199
  buffer: &mut self.data_stream,
170
200
  filled: &mut self.filled,
171
201
  available: &mut self.available,
202
+ });
203
+ assert!(self.filled >= old_filled);
204
+ self.remaining_bytes -= (self.filled - old_filled) as u64;
205
+
206
+ if self.remaining_bytes == 0 {
207
+ self.available = self.filled;
172
208
  }
209
+
210
+ self.debug_assert_invariants();
211
+ ret
212
+ }
213
+
214
+ /// Shifts the contents of `self.data_stream` left,
215
+ /// discarding the first `discard_size` bytes.
216
+ fn shift_buffer_left(&mut self, discard_size: usize) {
217
+ // Violating this assertion will clobber the immutable "lookback"
218
+ // window that needs to be maintained for decompressor.
219
+ assert!(discard_size <= self.available);
220
+
221
+ // We have to relocate the data to the start of the buffer. Benchmarking suggests that
222
+ // the codegen for an unbounded range is better / different than the one for a bounded
223
+ // range. We prefer the former if the data overhead is not too high. `16` was
224
+ // determined experimentally and might be system (memory) dependent. There's also the
225
+ // question if we could be a little smarter and avoid crossing page boundaries when
226
+ // that is not required. Alas, microbenchmarking TBD.
227
+ if let Some(16..) = self.data_stream.len().checked_sub(self.filled) {
228
+ self.data_stream.copy_within(discard_size..self.filled, 0);
229
+ } else {
230
+ self.data_stream.copy_within(discard_size.., 0);
231
+ }
232
+
233
+ // The data kept its relative position to `filled` which now lands exactly at
234
+ // the distance between prev_start and filled.
235
+ self.current_start -= discard_size;
236
+ self.available -= discard_size;
237
+ self.filled -= discard_size;
238
+ match &mut self.prev_row {
239
+ PrevRow::None | PrevRow::Scratch(_) => (),
240
+ PrevRow::InPlace(prev_start) => *prev_start -= discard_size,
241
+ }
242
+ }
243
+
244
+ fn curr_row_filter(&self) -> Result<RowFilter, DecodingError> {
245
+ let filter = self.data_stream[self.current_start];
246
+ RowFilter::from_u8(filter).ok_or(DecodingError::Format(
247
+ FormatErrorInner::UnknownFilterMethod(filter).into(),
248
+ ))
173
249
  }
174
250
 
175
- /// Runs `unfilter` on the current row, and then shifts rows so that the current row becomes the previous row.
251
+ /// Runs `unfilter` on the current row, and then shifts rows so that the
252
+ /// current row becomes the previous row.
176
253
  ///
177
- /// Will panic if `self.curr_row_len() < rowlen`.
178
- pub fn unfilter_curr_row(
254
+ /// `unfilter` will mutate the current row in-place, and therefore the
255
+ /// caller should first consult `mutable_len_of_curr_row` to check if all
256
+ /// bytes of the current row are indeed mutable.
257
+ pub fn unfilter_curr_row_in_place(
179
258
  &mut self,
180
259
  rowlen: usize,
181
260
  bpp: BytesPerPixel,
182
261
  ) -> Result<(), DecodingError> {
183
- debug_assert!(rowlen >= 2); // 1 byte for `FilterType` and at least 1 byte of pixel data.
262
+ debug_assert!(rowlen >= 2); // 1 byte for `RowFilter` and at least 1 byte of pixel data.
263
+
264
+ // Violating the assertion below would clobber the bytes in the
265
+ // "lookback" window.
266
+ debug_assert!(self.mutable_len_of_curr_row() >= rowlen);
184
267
 
268
+ let filter = self.curr_row_filter()?;
185
269
  let (prev, row) = self.data_stream.split_at_mut(self.current_start);
186
- let prev: &[u8] = &prev[self.prev_start..];
270
+ let prev: &[u8] = self.prev_row.as_slice(prev);
271
+ let row = &mut row[1..rowlen]; // Skip the `RowFilter` byte.
272
+ debug_assert!(prev.is_empty() || prev.len() == row.len());
187
273
 
188
- debug_assert!(prev.is_empty() || prev.len() == (rowlen - 1));
274
+ unfilter(filter, bpp, prev, row);
275
+
276
+ self.reset_prev_row();
277
+ self.prev_row = PrevRow::InPlace(self.current_start + 1);
278
+ self.current_start += rowlen;
279
+ self.debug_assert_invariants();
189
280
 
190
- // Get the filter type.
191
- let filter = RowFilter::from_u8(row[0]).ok_or(DecodingError::Format(
192
- FormatErrorInner::UnknownFilterMethod(row[0]).into(),
193
- ))?;
194
- let row = &mut row[1..rowlen];
281
+ Ok(())
282
+ }
195
283
 
196
- unfilter(filter, bpp, prev, row);
284
+ /// Runs `unfilter` on the current row, and then shifts rows so that the
285
+ /// current row becomes the previous row.
286
+ ///
287
+ /// Before running `unfilter`, the contents of the current row will be
288
+ /// copied into a scratch buffer. This allows unfiltering to happen even
289
+ /// if `mutable_len_of_curr_row < rowlen` (e.g. when handling partial
290
+ /// or not-yet-complete input streams).
291
+ pub fn unfilter_curr_row_using_scratch_buffer(
292
+ &mut self,
293
+ rowlen: usize,
294
+ bpp: BytesPerPixel,
295
+ ) -> Result<(), DecodingError> {
296
+ debug_assert!(rowlen >= 2); // 1 byte for `RowFilter` and at least 1 byte of pixel data.
297
+ debug_assert!(self.readable_len_of_curr_row() >= rowlen);
298
+
299
+ // If `mutable_len_of_curr_row >= rowlen`, then `unfilter_curr_row_in_place`
300
+ // should have been called instead (to avoid the cost of `copy_from_slice` below).
301
+ debug_assert!(self.mutable_len_of_curr_row() < rowlen);
302
+
303
+ let filter = self.curr_row_filter()?;
304
+
305
+ let mut row = std::mem::take(&mut self.scratch_buffer);
306
+ row.resize(rowlen - 1, 0);
307
+ row.as_mut_slice()
308
+ .copy_from_slice(&self.data_stream[self.current_start + 1..][..rowlen - 1]);
309
+
310
+ let prev = self.prev_row();
311
+ debug_assert!(prev.is_empty() || prev.len() == (rowlen - 1));
312
+
313
+ unfilter(filter, bpp, prev, &mut row);
197
314
 
198
- self.prev_start = self.current_start + 1;
315
+ self.reset_prev_row();
316
+ self.prev_row = PrevRow::Scratch(row);
199
317
  self.current_start += rowlen;
200
318
 
201
319
  self.debug_assert_invariants();
@@ -204,6 +322,45 @@ impl UnfilteringBuffer {
204
322
  }
205
323
  }
206
324
 
325
+ /// An already `unfilter`-ed, previous row.
326
+ ///
327
+ /// The data excludes the `RowFilter` byte - it only includes the actual pixel data.
328
+ enum PrevRow {
329
+ /// No unfiltered row.
330
+ ///
331
+ /// `None` is the value of `UnfilteringBuffer::prev_row` before any row has
332
+ /// been unfiltered (or at the start of a new interlace pass).
333
+ None,
334
+
335
+ /// Offset of `UnfilteringBuffer::data_stream` where the unfiltered row
336
+ /// starts.
337
+ ///
338
+ /// `UnfilteringBuffer::InPlace(_)` is used by `unfilter_curr_row_in_place`
339
+ /// when setting `UnfilteringBuffer::prev_row`.
340
+ InPlace(usize),
341
+
342
+ /// Separate scratch buffer containing the unfiltered row data.
343
+ ///
344
+ /// `UnfilteringBuffer::Scratch(_)` is used by
345
+ /// `unfilter_curr_row_using_scratch_buffer`
346
+ /// when setting `UnfilteringBuffer::prev_row`.
347
+ Scratch(Vec<u8>),
348
+ }
349
+
350
+ impl PrevRow {
351
+ /// Returns the previous unfiltered row as a slice of bytes.
352
+ ///
353
+ /// `buf` should refer to the `..current_start` portion of
354
+ /// `UnfilteringBuffer::data_stream`.
355
+ fn as_slice<'a>(&'a self, buf: &'a [u8]) -> &'a [u8] {
356
+ match self {
357
+ PrevRow::None => &[],
358
+ PrevRow::InPlace(prev_start) => &buf[*prev_start..],
359
+ PrevRow::Scratch(scratch) => scratch.as_slice(),
360
+ }
361
+ }
362
+ }
363
+
207
364
  fn checked_next_multiple_of(val: usize, factor: usize) -> Option<usize> {
208
365
  if factor == 0 {
209
366
  return None;
@@ -2,38 +2,56 @@ use super::{stream::FormatErrorInner, unfiltering_buffer::UnfilteringBuffer, Dec
2
2
 
3
3
  use fdeflate::Decompressor;
4
4
 
5
- /// An inplace buffer for decompression and filtering of PNG rowlines.
5
+ /// [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#10Compression) says that
6
+ /// "deflate/inflate compression with a sliding window (which is an upper bound on the
7
+ /// distances appearing in the deflate stream) of at most 32768 bytes".
6
8
  ///
7
- /// The underlying data structure is a vector, with additional markers denoting a region of bytes
8
- /// that are utilized by the decompression but not yet available to arbitrary modifications. The
9
- /// caller can still shift around data between calls to the stream decompressor as long as the data
10
- /// in the marked region is not modified and the indices adjusted accordingly. See
11
- /// [`UnfilterRegion`] that contains these markers.
9
+ /// `fdeflate` requires that we keep this many most recently decompressed bytes in the
10
+ /// `out_buffer` - this allows referring back to them when handling "length and distance
11
+ /// codes" in the deflate stream).
12
+ const LOOKBACK_SIZE: usize = 32768;
13
+
14
+ /// A buffer for decompression and in-place filtering of PNG rowlines.
12
15
  ///
13
- /// Violating the invariants, i.e. modifying bytes in the marked region, results in absurdly wacky
14
- /// decompression output or panics but not undefined behavior.
16
+ /// The underlying data structure is a vector, with additional markers dividing
17
+ /// the vector into specific regions of bytes - see [`UnfilterRegion`] for more
18
+ /// details.
15
19
  pub struct UnfilterBuf<'data> {
16
- /// The data container. Starts with arbitrary data unrelated to the decoder, a slice of decoder
17
- /// private data followed by free space for further decoder output. The regions are delimited
18
- /// by `filled` and `available` which must be updated accordingly.
19
- pub(crate) buffer: &'data mut Vec<u8>,
20
- /// Where we record changes to the out position.
21
- pub(crate) filled: &'data mut usize,
22
- /// Where we record changes to the available byte.
20
+ /// The data container.
21
+ pub(crate) buffer: &'data mut [u8],
22
+ /// The past-the-end index of the region that is allowed to be modified.
23
23
  pub(crate) available: &'data mut usize,
24
+ /// The past-the-end index of the region with decompressed bytes.
25
+ pub(crate) filled: &'data mut usize,
24
26
  }
25
27
 
26
- /// A region into a buffer utilized as a [`UnfilterBuf`].
28
+ /// `UnfilterRegion` divides a `Vec<u8>` buffer into three consecutive regions:
29
+ ///
30
+ /// * `vector[0..available]` - bytes that may be mutated (this typically means
31
+ /// bytes that were decompressed earlier, but user of the buffer may also use
32
+ /// this region for storing other data)
33
+ /// * `vector[available..filled]` - already decompressed bytes that need to be
34
+ /// preserved. (Future decompressor calls may reference and copy bytes from
35
+ /// this region. The maximum `filled - available` "look back" distance for
36
+ /// [PNG compression method 0](https://www.w3.org/TR/png-3/#10CompressionCM0)
37
+ /// is 32768 bytes)
38
+ /// * `vector[filled..]` - buffer where future decompressor calls can write
39
+ /// additional decompressed bytes
40
+ ///
41
+ /// Even though only `vector[0..available]` bytes can be mutated, it is allowed
42
+ /// to "shift" or "move" the contents of vector, as long as the:
43
+ ///
44
+ /// * `vector[available..filled]` bytes are preserved
45
+ /// * `available` and `filled` offsets are updated
27
46
  ///
28
- /// The span of data denoted by `filled..available` is the region of bytes that must be preserved
29
- /// for use by the decompression algorithm. It may be moved, e.g. by subtracting the same amount
30
- /// from both of these fields. Always ensure that `filled <= available`, the library does not
31
- /// violate this invariant when modifying this struct as an [`UnfilterBuf`].
47
+ /// Violating the invariants described above (e.g. mutating the bytes in the
48
+ /// `vector[available..filled]` region) may result in absurdly wacky
49
+ /// decompression output or panics, but not undefined behavior.
32
50
  #[derive(Default, Clone, Copy)]
33
51
  pub struct UnfilterRegion {
34
- /// The past-the-end index of byte that are allowed to be modified.
52
+ /// The past-the-end index of the region that is allowed to be modified.
35
53
  pub available: usize,
36
- /// The past-the-end of bytes that have been written to.
54
+ /// The past-the-end index of the region with decompressed bytes.
37
55
  pub filled: usize,
38
56
  }
39
57
 
@@ -52,15 +70,6 @@ pub(super) struct ZlibStream {
52
70
  }
53
71
 
54
72
  impl ZlibStream {
55
- // [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#10Compression) says that
56
- // "deflate/inflate compression with a sliding window (which is an upper bound on the
57
- // distances appearing in the deflate stream) of at most 32768 bytes".
58
- //
59
- // `fdeflate` requires that we keep this many most recently decompressed bytes in the
60
- // `out_buffer` - this allows referring back to them when handling "length and distance
61
- // codes" in the deflate stream).
62
- const LOOKBACK_SIZE: usize = 32768;
63
-
64
73
  pub(crate) fn new() -> Self {
65
74
  ZlibStream {
66
75
  state: Box::new(Decompressor::new()),
@@ -113,68 +122,72 @@ impl ZlibStream {
113
122
  self.state.ignore_adler32();
114
123
  }
115
124
 
116
- let (buffer, filled) = image_data.borrow_mut();
117
- let output_limit = (filled + UnfilteringBuffer::GROWTH_BYTES).min(buffer.len());
118
- let (in_consumed, out_consumed) = self
119
- .state
120
- .read(data, &mut buffer[..output_limit], filled, false)
121
- .map_err(|err| {
122
- DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into())
123
- })?;
124
-
125
+ let in_consumed = image_data.decompress(&mut self.state, data)?;
125
126
  self.started = true;
126
- let filled = filled + out_consumed;
127
- image_data.filled(filled);
128
-
129
- if self.state.is_done() {
130
- image_data.commit(filled);
131
- } else {
132
- // See [`Self::LOOKBACK_SIZE`].
133
- image_data.commit(filled.saturating_sub(Self::LOOKBACK_SIZE));
134
- }
135
127
 
136
128
  Ok(in_consumed)
137
129
  }
138
130
 
139
- /// Called after all consecutive IDAT chunks were handled.
131
+ /// Output any remaining buffered data within the decompressor.
140
132
  ///
141
- /// The compressed stream can be split on arbitrary byte boundaries. This enables some cleanup
142
- /// within the decompressor and flushing additional data which may have been kept back in case
143
- /// more data were passed to it.
144
- pub(crate) fn finish_compressed_chunks(
133
+ /// Returns `Ok(true)` if all data has been decompressed and there is no
134
+ /// more data that will be produced, or `Ok(false)` if there's potentially
135
+ /// more output.
136
+ ///
137
+ /// Returns `Err` if the zlib stream is corrupt or truncated too early.
138
+ pub(crate) fn finish(
145
139
  &mut self,
146
140
  image_data: &mut UnfilterBuf<'_>,
147
- ) -> Result<(), DecodingError> {
148
- if !self.started {
149
- return Ok(());
141
+ ) -> Result<bool, DecodingError> {
142
+ if !self.started || self.state.is_done() {
143
+ return Ok(true);
150
144
  }
151
145
 
152
- if self.state.is_done() {
153
- // We can end up here only after the [`decompress`] call above has detected the state
154
- // to be done, too. In this case the filled and committed amount of data are already
155
- // equal to each other. So neither of them needs to be touched in any way.
156
- return Ok(());
146
+ // If the zlib stream isn't done but we've already output all the pixel
147
+ // data needed, then either there's too much compressed data or the
148
+ // checksum is missing. Those aren't allowed by the spec, but libpng
149
+ // generally doesn't treat them as fatal.
150
+ if *image_data.filled == image_data.buffer.len() {
151
+ return Ok(true);
157
152
  }
158
153
 
159
- let (_, mut filled) = image_data.borrow_mut();
160
- while !self.state.is_done() {
161
- let (buffer, _) = image_data.borrow_mut();
162
- let (_in_consumed, out_consumed) =
163
- self.state.read(&[], buffer, filled, true).map_err(|err| {
164
- DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into())
165
- })?;
166
-
167
- filled += out_consumed;
154
+ let (_, out_consumed) = self
155
+ .state
156
+ .read(
157
+ &[],
158
+ &mut image_data.buffer[*image_data.available..],
159
+ *image_data.filled - *image_data.available,
160
+ false,
161
+ )
162
+ .map_err(|err| {
163
+ DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into())
164
+ })?;
165
+ *image_data.filled += out_consumed;
168
166
 
169
- if !self.state.is_done() {
170
- image_data.flush_allocate();
171
- }
167
+ if self.state.is_done() {
168
+ *image_data.available = *image_data.filled;
169
+ return Ok(true);
172
170
  }
173
171
 
174
- image_data.filled(filled);
175
- image_data.commit(filled);
172
+ // More output is only possible if zlib stream hasn't finished and the
173
+ // output buffer *is* full. (Empty space in the output buffer tells us
174
+ // there wasn't more data to write into it.)
175
+ if *image_data.filled == image_data.buffer.len() {
176
+ *image_data.available =
177
+ (*image_data.available).max(image_data.filled.saturating_sub(LOOKBACK_SIZE));
178
+ return Ok(false);
179
+ }
176
180
 
177
- Ok(())
181
+ // The zlib stream was truncated before the end of the pixel data. This
182
+ // would ordinarily be caught within fdeflate if we'd passed
183
+ // end_of_input=true. But we intentionally don't pass that flag so that
184
+ // we're able to drain all available pixel data first.
185
+ Err(DecodingError::Format(
186
+ FormatErrorInner::CorruptFlateStream {
187
+ err: fdeflate::DecompressionError::InsufficientInput,
188
+ }
189
+ .into(),
190
+ ))
178
191
  }
179
192
  }
180
193
 
@@ -184,7 +197,11 @@ impl UnfilterRegion {
184
197
  /// Pass the wrapped buffer to
185
198
  /// [`StreamingDecoder::update`][`super::stream::StreamingDecoder::update`] to fill it with
186
199
  /// data and update the region indices.
200
+ ///
201
+ /// May panic if invariants of [`UnfilterRegion`] are violated.
187
202
  pub fn as_buf<'data>(&'data mut self, buffer: &'data mut Vec<u8>) -> UnfilterBuf<'data> {
203
+ assert!(self.available <= self.filled);
204
+ assert!(self.filled <= buffer.len());
188
205
  UnfilterBuf {
189
206
  buffer,
190
207
  filled: &mut self.filled,
@@ -194,20 +211,43 @@ impl UnfilterRegion {
194
211
  }
195
212
 
196
213
  impl UnfilterBuf<'_> {
197
- pub(crate) fn borrow_mut(&mut self) -> (&mut [u8], usize) {
198
- (self.buffer, *self.filled)
199
- }
200
-
201
- pub(crate) fn filled(&mut self, filled: usize) {
202
- *self.filled = filled;
203
- }
214
+ /// Pushes `input` into `fdeflate` crate and appends decompressed bytes to `self.buffer`
215
+ /// (adjusting `self.filled` and `self.available` depending on how many bytes have been
216
+ /// decompressed).
217
+ ///
218
+ /// Returns how many bytes of `input` have been consumed.
219
+ #[inline]
220
+ fn decompress(
221
+ &mut self,
222
+ decompressor: &mut fdeflate::Decompressor,
223
+ input: &[u8],
224
+ ) -> Result<usize, DecodingError> {
225
+ let output_limit = (*self.filled + UnfilteringBuffer::GROWTH_BYTES).min(self.buffer.len());
226
+ let (in_consumed, out_consumed) = decompressor
227
+ .read(
228
+ input,
229
+ &mut self.buffer[*self.available..output_limit],
230
+ *self.filled - *self.available,
231
+ false,
232
+ )
233
+ .map_err(|err| {
234
+ DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into())
235
+ })?;
204
236
 
205
- pub(crate) fn commit(&mut self, howmany: usize) {
206
- *self.available = howmany;
207
- }
237
+ *self.filled += out_consumed;
238
+ if decompressor.is_done() {
239
+ *self.available = *self.filled;
240
+ } else if let Some(new_available) = self.filled.checked_sub(LOOKBACK_SIZE) {
241
+ // The decompressed data may have started in the middle of the buffer,
242
+ // so ensure that `self.available` never goes backward. This is needed
243
+ // to avoid miscommunicating the size of the "look-back" window when calling
244
+ // `fdeflate::Decompressor::read` a bit earlier and passing
245
+ // `&mut self.buffer[*self.available..output_limit]`.
246
+ if new_available > *self.available {
247
+ *self.available = new_available;
248
+ }
249
+ }
208
250
 
209
- pub(crate) fn flush_allocate(&mut self) {
210
- let len = self.buffer.len() + 32 * 1024;
211
- self.buffer.resize(len, 0);
251
+ Ok(in_consumed)
212
252
  }
213
253
  }