html-to-markdown 2.24.6 → 2.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/ext/html-to-markdown-rb/native/Cargo.lock +3 -26
  4. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  5. data/lib/html_to_markdown/version.rb +1 -1
  6. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  7. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  8. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  9. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +53 -91
  10. data/rust-vendor/png/.cargo-checksum.json +1 -1
  11. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  12. data/rust-vendor/png/CHANGES.md +44 -0
  13. data/rust-vendor/png/Cargo.lock +124 -171
  14. data/rust-vendor/png/Cargo.toml +1 -1
  15. data/rust-vendor/png/Cargo.toml.orig +1 -1
  16. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  17. data/rust-vendor/png/benches/unfilter.rs +3 -3
  18. data/rust-vendor/png/src/adam7.rs +17 -10
  19. data/rust-vendor/png/src/common.rs +8 -8
  20. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  21. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  22. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  23. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  24. data/rust-vendor/png/src/encoder.rs +4 -2
  25. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  26. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  27. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  28. data/rust-vendor/png/src/filter/simd.rs +308 -0
  29. data/rust-vendor/png/src/lib.rs +1 -0
  30. metadata +7 -177
  31. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  32. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  33. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  34. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  35. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  36. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  37. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  38. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  39. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  40. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  41. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  42. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  43. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  44. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  45. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  46. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  47. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  48. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  49. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  50. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  51. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  52. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  53. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  54. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  55. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  56. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  57. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  58. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  59. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  60. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  61. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  62. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  63. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  64. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  65. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  66. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  67. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  68. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  69. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  70. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  71. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  72. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  153. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  154. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  155. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  156. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  157. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  158. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  159. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  160. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  161. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  162. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  163. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  164. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  165. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  166. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  167. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  168. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  169. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  170. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  171. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  172. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  173. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  174. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  175. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  176. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  177. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  178. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  179. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  180. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  181. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  182. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  183. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  184. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  185. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  186. data/rust-vendor/xml5ever/README.md +0 -72
  187. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  188. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  189. data/rust-vendor/xml5ever/examples/README.md +0 -223
  190. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  191. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  192. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  193. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  194. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  195. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  196. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  197. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  198. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  199. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  200. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  201. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  202. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  203. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -270,8 +270,8 @@ pub(crate) enum FormatErrorInner {
270
270
  UnexpectedRestartOfDataChunkSequence {
271
271
  kind: ChunkType,
272
272
  },
273
- /// Failure to parse a chunk, because the chunk didn't contain enough bytes.
274
- ChunkTooShort {
273
+ /// Failure to parse a chunk, because the chunk had the wrong number of bytes.
274
+ ChunkLengthWrong {
275
275
  kind: ChunkType,
276
276
  },
277
277
  UnrecognizedCriticalChunk {
@@ -415,8 +415,8 @@ impl fmt::Display for FormatError {
415
415
  UnexpectedRestartOfDataChunkSequence { kind } => {
416
416
  write!(fmt, "Unexpected restart of {:?} chunk sequence", kind)
417
417
  }
418
- ChunkTooShort { kind } => {
419
- write!(fmt, "Chunk is too short: {:?}", kind)
418
+ ChunkLengthWrong { kind } => {
419
+ write!(fmt, "Chunk length wrong: {:?}", kind)
420
420
  }
421
421
  UnrecognizedCriticalChunk { type_str } => {
422
422
  write!(fmt, "Unrecognized critical chunk: {:?}", type_str)
@@ -570,6 +570,16 @@ struct ChunkState {
570
570
 
571
571
  /// Non-decoded bytes in the chunk.
572
572
  raw_bytes: Vec<u8>,
573
+
574
+ /// Whether this chunk should be skipped or decoded.
575
+ action: ChunkAction,
576
+ }
577
+
578
+ #[derive(Debug, PartialEq)]
579
+ enum ChunkAction {
580
+ Process,
581
+ Skip,
582
+ Reject,
573
583
  }
574
584
 
575
585
  impl StreamingDecoder {
@@ -591,6 +601,7 @@ impl StreamingDecoder {
591
601
  crc: Crc32::new(),
592
602
  remaining: 0,
593
603
  raw_bytes: Vec::with_capacity(CHUNK_BUFFER_SIZE),
604
+ action: ChunkAction::Process,
594
605
  },
595
606
  inflater,
596
607
  info: None,
@@ -720,8 +731,7 @@ impl StreamingDecoder {
720
731
  // values is that they occur fairly frequently and special-casing them results
721
732
  // in performance gains.
722
733
  const CONSUMED_BYTES: usize = 4;
723
- self.parse_u32(kind, &buf[0..4], image_data)
724
- .map(|decoded| (CONSUMED_BYTES, decoded))
734
+ self.parse_u32(kind, &buf[0..4], image_data, CONSUMED_BYTES)
725
735
  } else {
726
736
  let remaining_count = 4 - accumulated_count;
727
737
  let consumed_bytes = {
@@ -741,8 +751,7 @@ impl StreamingDecoder {
741
751
  Ok((consumed_bytes, Decoded::Nothing))
742
752
  } else {
743
753
  debug_assert_eq!(accumulated_count, 4);
744
- self.parse_u32(kind, &bytes, image_data)
745
- .map(|decoded| (consumed_bytes, decoded))
754
+ self.parse_u32(kind, &bytes, image_data, consumed_bytes)
746
755
  }
747
756
  }
748
757
  }
@@ -757,30 +766,33 @@ impl StreamingDecoder {
757
766
  remaining,
758
767
  raw_bytes,
759
768
  type_: _,
769
+ action,
760
770
  } = &mut self.current_chunk;
761
771
 
762
- if raw_bytes.len() == raw_bytes.capacity() {
763
- if self.limits.bytes == 0 {
764
- return Err(DecodingError::LimitsExceeded);
765
- }
766
-
767
- // Double the size of the Vec, but not beyond the allocation limit.
768
- debug_assert!(raw_bytes.capacity() > 0);
769
- let reserve_size = raw_bytes.capacity().min(self.limits.bytes);
770
-
771
- self.limits.reserve_bytes(reserve_size)?;
772
- raw_bytes.reserve_exact(reserve_size);
773
- }
774
-
775
772
  let buf_avail = raw_bytes.capacity() - raw_bytes.len();
776
773
  let bytes_avail = min(buf.len(), buf_avail);
777
774
  let n = min(*remaining, bytes_avail as u32);
778
-
779
775
  let buf = &buf[..n as usize];
776
+
780
777
  if !self.decode_options.ignore_crc {
781
778
  crc.update(buf);
782
779
  }
783
- raw_bytes.extend_from_slice(buf);
780
+
781
+ if *action == ChunkAction::Process {
782
+ if raw_bytes.len() == raw_bytes.capacity() {
783
+ if self.limits.bytes == 0 {
784
+ return Err(DecodingError::LimitsExceeded);
785
+ }
786
+
787
+ // Double the size of the Vec, but not beyond the allocation limit.
788
+ debug_assert!(raw_bytes.capacity() > 0);
789
+ let reserve_size = raw_bytes.capacity().min(self.limits.bytes);
790
+
791
+ self.limits.reserve_bytes(reserve_size)?;
792
+ raw_bytes.reserve_exact(reserve_size);
793
+ }
794
+ raw_bytes.extend_from_slice(buf);
795
+ }
784
796
 
785
797
  *remaining -= n;
786
798
  if *remaining == 0 {
@@ -823,7 +835,8 @@ impl StreamingDecoder {
823
835
  kind: U32ValueKind,
824
836
  u32_be_bytes: &[u8],
825
837
  image_data: Option<&mut UnfilterBuf<'_>>,
826
- ) -> Result<Decoded, DecodingError> {
838
+ consumed_bytes: usize,
839
+ ) -> Result<(usize, Decoded), DecodingError> {
827
840
  debug_assert_eq!(u32_be_bytes.len(), 4);
828
841
  let bytes = u32_be_bytes.try_into().unwrap();
829
842
  let val = u32::from_be_bytes(bytes);
@@ -832,7 +845,7 @@ impl StreamingDecoder {
832
845
  U32ValueKind::Signature1stU32 => {
833
846
  if bytes == [137, 80, 78, 71] {
834
847
  self.state = Some(State::new_u32(U32ValueKind::Signature2ndU32));
835
- Ok(Decoded::Nothing)
848
+ Ok((consumed_bytes, Decoded::Nothing))
836
849
  } else {
837
850
  Err(DecodingError::Format(
838
851
  FormatErrorInner::InvalidSignature.into(),
@@ -842,7 +855,7 @@ impl StreamingDecoder {
842
855
  U32ValueKind::Signature2ndU32 => {
843
856
  if bytes == [13, 10, 26, 10] {
844
857
  self.state = Some(State::new_u32(U32ValueKind::Length));
845
- Ok(Decoded::Nothing)
858
+ Ok((consumed_bytes, Decoded::Nothing))
846
859
  } else {
847
860
  Err(DecodingError::Format(
848
861
  FormatErrorInner::InvalidSignature.into(),
@@ -851,7 +864,7 @@ impl StreamingDecoder {
851
864
  }
852
865
  U32ValueKind::Length => {
853
866
  self.state = Some(State::new_u32(U32ValueKind::Type { length: val }));
854
- Ok(Decoded::Nothing)
867
+ Ok((consumed_bytes, Decoded::Nothing))
855
868
  }
856
869
  U32ValueKind::Type { length } => {
857
870
  let type_str = ChunkType(bytes);
@@ -863,20 +876,44 @@ impl StreamingDecoder {
863
876
  if type_str != self.current_chunk.type_
864
877
  && (self.current_chunk.type_ == IDAT || self.current_chunk.type_ == chunk::fdAT)
865
878
  {
866
- self.current_chunk.type_ = type_str;
867
- if let Some(image_data) = image_data {
868
- self.inflater.finish_compressed_chunks(image_data)?;
869
- }
879
+ let finished = match image_data {
880
+ Some(image_data) => self.inflater.finish(image_data)?,
881
+ None => true,
882
+ };
870
883
 
871
- self.ready_for_idat_chunks = false;
872
- self.ready_for_fdat_chunks = false;
884
+ // We ended up handling IDAT/fdAT data rather than the chunk
885
+ // type header atually received. Thus rewind `self.state` to
886
+ // what it was before this function was called.
873
887
  self.state = Some(State::U32 {
874
888
  kind,
875
889
  bytes,
876
- accumulated_count: 4,
890
+ accumulated_count: 4 - consumed_bytes,
877
891
  });
878
- return Ok(Decoded::ImageDataFlushed);
892
+
893
+ if finished {
894
+ // We've processed all the image data necessary. Update
895
+ // `current_chunk.type_`so this codepath isn't taken
896
+ // again next time.
897
+ self.current_chunk.type_ = type_str;
898
+ self.ready_for_idat_chunks = false;
899
+ self.ready_for_fdat_chunks = false;
900
+ return Ok((0, Decoded::ImageDataFlushed));
901
+ } else {
902
+ // Report that we processed some image data without
903
+ // consuming any input. This gives the caller a chance
904
+ // to grow the output buffer and call us again.
905
+ return Ok((0, Decoded::ImageData));
906
+ }
907
+ }
908
+
909
+ self.current_chunk.type_ = type_str;
910
+ if !self.decode_options.ignore_crc {
911
+ self.current_chunk.crc.reset();
912
+ self.current_chunk.crc.update(&type_str.0);
879
913
  }
914
+ self.current_chunk.remaining = length;
915
+ self.current_chunk.raw_bytes.clear();
916
+
880
917
  self.state = match type_str {
881
918
  chunk::fdAT => {
882
919
  if !self.ready_for_fdat_chunks {
@@ -892,6 +929,7 @@ impl StreamingDecoder {
892
929
  FormatErrorInner::FdatShorterThanFourBytes.into(),
893
930
  ));
894
931
  }
932
+ self.current_chunk.action = ChunkAction::Process;
895
933
  Some(State::new_u32(U32ValueKind::ApngSequenceNumber))
896
934
  }
897
935
  IDAT => {
@@ -904,18 +942,12 @@ impl StreamingDecoder {
904
942
  ));
905
943
  }
906
944
  self.have_idat = true;
945
+ self.current_chunk.action = ChunkAction::Process;
907
946
  Some(State::ImageData(type_str))
908
947
  }
909
- _ => Some(State::ReadChunkData(type_str)),
948
+ _ => Some(self.start_chunk(type_str, length)?),
910
949
  };
911
- self.current_chunk.type_ = type_str;
912
- if !self.decode_options.ignore_crc {
913
- self.current_chunk.crc.reset();
914
- self.current_chunk.crc.update(&type_str.0);
915
- }
916
- self.current_chunk.remaining = length;
917
- self.current_chunk.raw_bytes.clear();
918
- Ok(Decoded::ChunkBegin(length, type_str))
950
+ Ok((consumed_bytes, Decoded::ChunkBegin(length, type_str)))
919
951
  }
920
952
  U32ValueKind::Crc(type_str) => {
921
953
  // If ignore_crc is set, do not calculate CRC. We set
@@ -928,21 +960,36 @@ impl StreamingDecoder {
928
960
  };
929
961
 
930
962
  if val == sum || CHECKSUM_DISABLED {
931
- // A fatal error in chunk parsing leaves the decoder in state 'None' to enforce
932
- // that parsing can't continue after an error.
933
- debug_assert!(self.state.is_none());
934
- let decoded = self.parse_chunk(type_str)?;
935
-
936
- if type_str != IEND {
937
- self.state = Some(State::new_u32(U32ValueKind::Length));
963
+ match self.current_chunk.action {
964
+ ChunkAction::Process => {
965
+ // A fatal error in chunk parsing leaves the decoder in state 'None' to enforce
966
+ // that parsing can't continue after an error.
967
+ debug_assert!(self.state.is_none());
968
+ let decoded = self.parse_chunk(type_str)?;
969
+
970
+ if type_str != IEND {
971
+ self.state = Some(State::new_u32(U32ValueKind::Length));
972
+ }
973
+ Ok((consumed_bytes, decoded))
974
+ }
975
+ ChunkAction::Skip => {
976
+ self.state = Some(State::new_u32(U32ValueKind::Length));
977
+ Ok((
978
+ consumed_bytes,
979
+ Decoded::SkippedAncillaryChunk(self.current_chunk.type_),
980
+ ))
981
+ }
982
+ ChunkAction::Reject => {
983
+ self.state = Some(State::new_u32(U32ValueKind::Length));
984
+ Ok((consumed_bytes, Decoded::BadAncillaryChunk(type_str)))
985
+ }
938
986
  }
939
- Ok(decoded)
940
987
  } else if self.decode_options.skip_ancillary_crc_failures
941
988
  && !chunk::is_critical(type_str)
942
989
  {
943
990
  // Ignore ancillary chunk with invalid CRC
944
991
  self.state = Some(State::new_u32(U32ValueKind::Length));
945
- Ok(Decoded::BadAncillaryChunk(type_str))
992
+ Ok((consumed_bytes, Decoded::BadAncillaryChunk(type_str)))
946
993
  } else {
947
994
  Err(DecodingError::Format(
948
995
  FormatErrorInner::CrcMismatch {
@@ -983,9 +1030,67 @@ impl StreamingDecoder {
983
1030
  }
984
1031
 
985
1032
  self.state = Some(State::ImageData(chunk::fdAT));
986
- Ok(Decoded::Nothing)
1033
+ Ok((consumed_bytes, Decoded::Nothing))
1034
+ }
1035
+ }
1036
+ }
1037
+
1038
+ fn start_chunk(&mut self, type_str: ChunkType, length: u32) -> Result<State, DecodingError> {
1039
+ let target_length = match type_str {
1040
+ IHDR => 13..=13,
1041
+ chunk::PLTE => 3..=768,
1042
+ chunk::IEND => 0..=0,
1043
+ chunk::sBIT => 1..=4,
1044
+ chunk::tRNS => 1..=256,
1045
+ chunk::pHYs => 9..=9,
1046
+ chunk::gAMA => 4..=4,
1047
+ chunk::acTL => 8..=8,
1048
+ chunk::fcTL => 26..=26,
1049
+ chunk::cHRM => 32..=32,
1050
+ chunk::sRGB => 1..=1,
1051
+ chunk::cICP => 4..=4,
1052
+ chunk::mDCV => 24..=24,
1053
+ chunk::cLLI => 8..=8,
1054
+ chunk::bKGD => 1..=6,
1055
+
1056
+ // Unbounded size chunks
1057
+ chunk::eXIf => 0..=u32::MAX >> 1, // TODO: allow skipping.
1058
+ chunk::iCCP if !self.decode_options.ignore_iccp_chunk => 0..=u32::MAX >> 1,
1059
+ chunk::tEXt if !self.decode_options.ignore_text_chunk => 0..=u32::MAX >> 1,
1060
+ chunk::zTXt if !self.decode_options.ignore_text_chunk => 0..=u32::MAX >> 1,
1061
+ chunk::iTXt if !self.decode_options.ignore_text_chunk => 0..=u32::MAX >> 1,
1062
+
1063
+ chunk::IDAT | chunk::fdAT => unreachable!(),
1064
+
1065
+ _ if is_critical(type_str) => {
1066
+ return Err(DecodingError::Format(
1067
+ FormatErrorInner::UnrecognizedCriticalChunk { type_str }.into(),
1068
+ ));
1069
+ }
1070
+ _ => {
1071
+ self.current_chunk.action = ChunkAction::Skip;
1072
+ return Ok(State::ReadChunkData(type_str));
1073
+ }
1074
+ };
1075
+
1076
+ if !target_length.contains(&length) {
1077
+ // Uncomment to detect unexpected chunk lengths during testing.
1078
+ // panic!("chunk type_str={type_str:?} has length={length}, target_length={target_length:?}");
1079
+ match type_str {
1080
+ IHDR | chunk::PLTE | chunk::IEND | chunk::fcTL => {
1081
+ return Err(DecodingError::Format(
1082
+ FormatErrorInner::ChunkLengthWrong { kind: type_str }.into(),
1083
+ ));
1084
+ }
1085
+ _ => {
1086
+ self.current_chunk.action = ChunkAction::Reject;
1087
+ }
987
1088
  }
1089
+ } else {
1090
+ self.current_chunk.action = ChunkAction::Process;
988
1091
  }
1092
+
1093
+ Ok(State::ReadChunkData(type_str))
989
1094
  }
990
1095
 
991
1096
  fn parse_chunk(&mut self, type_str: ChunkType) -> Result<Decoded, DecodingError> {
@@ -1014,22 +1119,16 @@ impl StreamingDecoder {
1014
1119
  chunk::bKGD => self.parse_bkgd(),
1015
1120
 
1016
1121
  // Ancillary chunks with unbounded size.
1017
- chunk::eXIf => self.parse_exif(), // TODO: allow skipping.
1018
- chunk::iCCP if !self.decode_options.ignore_iccp_chunk => self.parse_iccp(),
1019
- chunk::tEXt if !self.decode_options.ignore_text_chunk => self.parse_text(),
1020
- chunk::zTXt if !self.decode_options.ignore_text_chunk => self.parse_ztxt(),
1021
- chunk::iTXt if !self.decode_options.ignore_text_chunk => self.parse_itxt(),
1122
+ chunk::eXIf => self.parse_exif(),
1123
+ chunk::iCCP => self.parse_iccp(),
1124
+ chunk::tEXt => self.parse_text(),
1125
+ chunk::zTXt => self.parse_ztxt(),
1126
+ chunk::iTXt => self.parse_itxt(),
1022
1127
 
1023
1128
  // Unrecognized chunks.
1024
- _ => {
1025
- if is_critical(type_str) {
1026
- return Err(DecodingError::Format(
1027
- FormatErrorInner::UnrecognizedCriticalChunk { type_str }.into(),
1028
- ));
1029
- } else {
1030
- return Ok(Decoded::SkippedAncillaryChunk(type_str));
1031
- }
1032
- }
1129
+ _ => unreachable!(
1130
+ "Unrecognized chunk {type_str:?} should have been caught in start_chunk"
1131
+ ),
1033
1132
  };
1034
1133
 
1035
1134
  parse_result = parse_result.map_err(|e| {
@@ -1040,7 +1139,7 @@ impl StreamingDecoder {
1040
1139
  // (potentially recoverable) `IoError` / `UnexpectedEof`.
1041
1140
  DecodingError::IoError(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
1042
1141
  let fmt_err: FormatError =
1043
- FormatErrorInner::ChunkTooShort { kind: type_str }.into();
1142
+ FormatErrorInner::ChunkLengthWrong { kind: type_str }.into();
1044
1143
  fmt_err.into()
1045
1144
  }
1046
1145
  e => e,
@@ -1094,7 +1193,7 @@ impl StreamingDecoder {
1094
1193
  0
1095
1194
  });
1096
1195
  self.inflater.reset();
1097
- self.ready_for_fdat_chunks = true;
1196
+ self.ready_for_fdat_chunks = self.have_idat;
1098
1197
  let fc = FrameControl {
1099
1198
  sequence_number: next_seq_no,
1100
1199
  width: buf.read_be()?,
@@ -1135,10 +1234,15 @@ impl StreamingDecoder {
1135
1234
  }
1136
1235
 
1137
1236
  fn parse_actl(&mut self) -> Result<(), DecodingError> {
1237
+ let info = self.info.as_mut().unwrap();
1138
1238
  if self.have_idat {
1139
1239
  Err(DecodingError::Format(
1140
1240
  FormatErrorInner::AfterIdat { kind: chunk::acTL }.into(),
1141
1241
  ))
1242
+ } else if info.animation_control.is_some() {
1243
+ Err(DecodingError::Format(
1244
+ FormatErrorInner::DuplicateChunk { kind: chunk::acTL }.into(),
1245
+ ))
1142
1246
  } else {
1143
1247
  let mut buf = &self.current_chunk.raw_bytes[..];
1144
1248
  let actl = AnimationControl {
@@ -1150,7 +1254,16 @@ impl StreamingDecoder {
1150
1254
  if actl.num_frames == 0 {
1151
1255
  return Ok(());
1152
1256
  }
1153
- self.info.as_mut().unwrap().animation_control = Some(actl);
1257
+
1258
+ // The spec also says that the number of frames and number of plays should be limited
1259
+ // to (2^31)-1. Same as the other condition we enforce it by ignoring the chunk.
1260
+ // Another option may be saturation which would lose us some frames but encourage
1261
+ // rather dubious handling.
1262
+ if actl.num_frames > 0x7FFFFFFF || actl.num_plays > 0x7FFFFFFF {
1263
+ return Ok(());
1264
+ }
1265
+
1266
+ info.animation_control = Some(actl);
1154
1267
  Ok(())
1155
1268
  }
1156
1269
  }
@@ -1163,8 +1276,6 @@ impl StreamingDecoder {
1163
1276
  FormatErrorInner::DuplicateChunk { kind: chunk::PLTE }.into(),
1164
1277
  ))
1165
1278
  } else {
1166
- self.limits
1167
- .reserve_bytes(self.current_chunk.raw_bytes.len())?;
1168
1279
  info.palette = Some(Cow::Owned(self.current_chunk.raw_bytes.clone()));
1169
1280
  Ok(())
1170
1281
  }
@@ -1197,8 +1308,6 @@ impl StreamingDecoder {
1197
1308
  } else {
1198
1309
  bit_depth
1199
1310
  };
1200
- self.limits
1201
- .reserve_bytes(self.current_chunk.raw_bytes.len())?;
1202
1311
  let vec = self.current_chunk.raw_bytes.clone();
1203
1312
  let len = vec.len();
1204
1313
 
@@ -1245,8 +1354,6 @@ impl StreamingDecoder {
1245
1354
  ));
1246
1355
  }
1247
1356
  let (color_type, bit_depth) = { (info.color_type, info.bit_depth as u8) };
1248
- self.limits
1249
- .reserve_bytes(self.current_chunk.raw_bytes.len())?;
1250
1357
  let mut vec = self.current_chunk.raw_bytes.clone();
1251
1358
  let len = vec.len();
1252
1359
  match color_type {
@@ -1842,7 +1949,7 @@ impl StreamingDecoder {
1842
1949
  let vec = self.current_chunk.raw_bytes.clone();
1843
1950
  if vec.len() != expected {
1844
1951
  return Err(DecodingError::Format(
1845
- FormatErrorInner::ChunkTooShort { kind: chunk::bKGD }.into(),
1952
+ FormatErrorInner::ChunkLengthWrong { kind: chunk::bKGD }.into(),
1846
1953
  ));
1847
1954
  }
1848
1955
 
@@ -2390,6 +2497,50 @@ mod tests {
2390
2497
  write_fctl(w, &fctl);
2391
2498
  }
2392
2499
 
2500
+ #[test]
2501
+ fn test_fdat_chunk_without_idat() {
2502
+ let png = {
2503
+ let width = 1;
2504
+ let mut png = Vec::new();
2505
+ write_png_sig(&mut png);
2506
+ write_rgba8_ihdr_with_width(&mut png, width);
2507
+ let image_data = generate_rgba8_with_width_and_height(width, width);
2508
+ write_actl(
2509
+ &mut png,
2510
+ &crate::AnimationControl {
2511
+ num_frames: 2,
2512
+ num_plays: 1,
2513
+ },
2514
+ );
2515
+ let mut fctl = crate::FrameControl {
2516
+ sequence_number: 0,
2517
+ width,
2518
+ height: width,
2519
+ ..Default::default()
2520
+ };
2521
+ write_fctl(&mut png, &fctl);
2522
+ fctl.sequence_number = 1;
2523
+ write_fctl(&mut png, &fctl);
2524
+ write_fdat(&mut png, 1, &image_data[..]);
2525
+ write_iend(&mut png);
2526
+ png
2527
+ };
2528
+ let decoder = Decoder::new(Cursor::new(&png));
2529
+ let Err(err) = decoder.read_info() else {
2530
+ panic!("Expected an error")
2531
+ };
2532
+ assert!(matches!(&err, DecodingError::Format(_)));
2533
+ assert_eq!(
2534
+ "Unexpected restart of ChunkType { type: fdAT, \
2535
+ critical: false, \
2536
+ private: true, \
2537
+ reserved: false, \
2538
+ safecopy: false \
2539
+ } chunk sequence",
2540
+ format!("{err}"),
2541
+ );
2542
+ }
2543
+
2393
2544
  #[test]
2394
2545
  fn test_fdat_chunk_payload_length_0() {
2395
2546
  let mut png = Vec::new();
@@ -3210,4 +3361,38 @@ mod tests {
3210
3361
  assert_eq!(info.width, SIZE);
3211
3362
  assert_eq!(info.uncompressed_latin1_text.len(), 0);
3212
3363
  }
3364
+
3365
+ /// This is a regression test for https://crbug.com/451710590.
3366
+ #[test]
3367
+ fn test_duplicate_actl_chunk() {
3368
+ let width = 16;
3369
+ let frame_data = generate_rgba8_with_width_and_height(width, width);
3370
+
3371
+ let mut png = Vec::new();
3372
+ write_png_sig(&mut png);
3373
+ write_rgba8_ihdr_with_width(&mut png, width);
3374
+ write_actl(
3375
+ &mut png,
3376
+ &crate::AnimationControl {
3377
+ num_frames: 2,
3378
+ num_plays: 123,
3379
+ },
3380
+ );
3381
+ write_actl(
3382
+ &mut png,
3383
+ &crate::AnimationControl {
3384
+ num_frames: 1, // <- should be ignored
3385
+ num_plays: 456,
3386
+ },
3387
+ );
3388
+ write_chunk(&mut png, b"IDAT", &frame_data);
3389
+ write_iend(&mut png);
3390
+
3391
+ let reader = Decoder::new(Cursor::new(png)).read_info().unwrap();
3392
+ let Some(actl) = reader.info().animation_control.as_ref() else {
3393
+ panic!("No `animation_control`?")
3394
+ };
3395
+ assert_eq!(actl.num_frames, 2);
3396
+ assert_eq!(actl.num_plays, 123);
3397
+ }
3213
3398
  }