html-to-markdown 2.24.6 → 2.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/ext/html-to-markdown-rb/native/Cargo.lock +3 -26
  4. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  5. data/lib/html_to_markdown/version.rb +1 -1
  6. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  7. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  8. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  9. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +53 -91
  10. data/rust-vendor/png/.cargo-checksum.json +1 -1
  11. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  12. data/rust-vendor/png/CHANGES.md +44 -0
  13. data/rust-vendor/png/Cargo.lock +124 -171
  14. data/rust-vendor/png/Cargo.toml +1 -1
  15. data/rust-vendor/png/Cargo.toml.orig +1 -1
  16. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  17. data/rust-vendor/png/benches/unfilter.rs +3 -3
  18. data/rust-vendor/png/src/adam7.rs +17 -10
  19. data/rust-vendor/png/src/common.rs +8 -8
  20. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  21. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  22. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  23. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  24. data/rust-vendor/png/src/encoder.rs +4 -2
  25. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  26. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  27. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  28. data/rust-vendor/png/src/filter/simd.rs +308 -0
  29. data/rust-vendor/png/src/lib.rs +1 -0
  30. metadata +7 -177
  31. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  32. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  33. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  34. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  35. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  36. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  37. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  38. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  39. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  40. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  41. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  42. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  43. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  44. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  45. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  46. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  47. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  48. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  49. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  50. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  51. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  52. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  53. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  54. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  55. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  56. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  57. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  58. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  59. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  60. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  61. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  62. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  63. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  64. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  65. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  66. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  67. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  68. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  69. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  70. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  71. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  72. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  153. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  154. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  155. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  156. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  157. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  158. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  159. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  160. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  161. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  162. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  163. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  164. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  165. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  166. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  167. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  168. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  169. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  170. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  171. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  172. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  173. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  174. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  175. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  176. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  177. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  178. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  179. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  180. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  181. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  182. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  183. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  184. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  185. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  186. data/rust-vendor/xml5ever/README.md +0 -72
  187. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  188. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  189. data/rust-vendor/xml5ever/examples/README.md +0 -223
  190. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  191. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  192. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  193. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  194. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  195. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  196. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  197. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  198. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  199. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  200. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  201. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  202. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  203. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,216 +0,0 @@
1
- // Copyright 2014-2017 The html5ever Project Developers. See the
2
- // COPYRIGHT file at the top-level directory of this distribution.
3
- //
4
- // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
- // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
- // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
- // option. This file may not be copied, modified, or distributed
8
- // except according to those terms.
9
-
10
- use crate::tree_builder::NamespaceMap;
11
- use crate::QualName;
12
- pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
13
- use std::io::{self, Write};
14
-
15
- #[derive(Clone)]
16
- /// Struct for setting serializer options.
17
- pub struct SerializeOpts {
18
- /// Serialize the root node? Default: ChildrenOnly
19
- pub traversal_scope: TraversalScope,
20
- }
21
-
22
- impl Default for SerializeOpts {
23
- fn default() -> SerializeOpts {
24
- SerializeOpts {
25
- traversal_scope: TraversalScope::ChildrenOnly(None),
26
- }
27
- }
28
- }
29
-
30
- /// Method for serializing generic node to a given writer.
31
- pub fn serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()>
32
- where
33
- Wr: Write,
34
- T: Serialize,
35
- {
36
- let mut ser = XmlSerializer::new(writer);
37
- node.serialize(&mut ser, opts.traversal_scope)
38
- }
39
-
40
- /// Struct used for serializing nodes into a text that other XML
41
- /// parses can read.
42
- ///
43
- /// Serializer contains a set of functions (start_elem, end_elem...)
44
- /// that make parsing nodes easier.
45
- pub struct XmlSerializer<Wr> {
46
- writer: Wr,
47
- namespace_stack: NamespaceMapStack,
48
- }
49
-
50
- #[derive(Debug)]
51
- struct NamespaceMapStack(Vec<NamespaceMap>);
52
-
53
- impl NamespaceMapStack {
54
- fn new() -> NamespaceMapStack {
55
- NamespaceMapStack(vec![])
56
- }
57
-
58
- fn push(&mut self, namespace: NamespaceMap) {
59
- self.0.push(namespace);
60
- }
61
-
62
- fn pop(&mut self) {
63
- self.0.pop();
64
- }
65
- }
66
-
67
- /// Writes given text into the Serializer, escaping it,
68
- /// depending on where the text is written inside the tag or attribute value.
69
- ///
70
- /// For example
71
- ///```text
72
- /// <tag>'&-quotes'</tag> becomes <tag>'&amp;-quotes'</tag>
73
- /// <tag = "'&-quotes'"> becomes <tag = "&apos;&amp;-quotes&apos;"
74
- ///```
75
- fn write_to_buf_escaped<W: Write>(writer: &mut W, text: &str, attr_mode: bool) -> io::Result<()> {
76
- for c in text.chars() {
77
- match c {
78
- '&' => writer.write_all(b"&amp;"),
79
- '\'' if attr_mode => writer.write_all(b"&apos;"),
80
- '"' if attr_mode => writer.write_all(b"&quot;"),
81
- '<' if !attr_mode => writer.write_all(b"&lt;"),
82
- '>' if !attr_mode => writer.write_all(b"&gt;"),
83
- c => writer.write_fmt(format_args!("{c}")),
84
- }?;
85
- }
86
- Ok(())
87
- }
88
-
89
- #[inline]
90
- fn write_qual_name<W: Write>(writer: &mut W, name: &QualName) -> io::Result<()> {
91
- if let Some(ref prefix) = name.prefix {
92
- writer.write_all(prefix.as_bytes())?;
93
- writer.write_all(b":")?;
94
- }
95
-
96
- writer.write_all(name.local.as_bytes())?;
97
- Ok(())
98
- }
99
-
100
- impl<Wr: Write> XmlSerializer<Wr> {
101
- /// Creates a new Serializier from a writer and given serialization options.
102
- pub fn new(writer: Wr) -> Self {
103
- XmlSerializer {
104
- writer,
105
- namespace_stack: NamespaceMapStack::new(),
106
- }
107
- }
108
-
109
- #[inline(always)]
110
- fn qual_name(&mut self, name: &QualName) -> io::Result<()> {
111
- self.find_or_insert_ns(name);
112
- write_qual_name(&mut self.writer, name)
113
- }
114
-
115
- #[inline(always)]
116
- fn qual_attr_name(&mut self, name: &QualName) -> io::Result<()> {
117
- self.find_or_insert_ns(name);
118
- write_qual_name(&mut self.writer, name)
119
- }
120
-
121
- fn find_uri(&self, name: &QualName) -> bool {
122
- let mut found = false;
123
- for stack in self.namespace_stack.0.iter().rev() {
124
- if let Some(Some(el)) = stack.get(&name.prefix) {
125
- found = *el == name.ns;
126
- break;
127
- }
128
- }
129
- found
130
- }
131
-
132
- fn find_or_insert_ns(&mut self, name: &QualName) {
133
- if (name.prefix.is_some() || !name.ns.is_empty()) && !self.find_uri(name) {
134
- if let Some(last_ns) = self.namespace_stack.0.last_mut() {
135
- last_ns.insert(name);
136
- }
137
- }
138
- }
139
- }
140
-
141
- impl<Wr: Write> Serializer for XmlSerializer<Wr> {
142
- /// Serializes given start element into text. Start element contains
143
- /// qualified name and an attributes iterator.
144
- fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()>
145
- where
146
- AttrIter: Iterator<Item = AttrRef<'a>>,
147
- {
148
- self.namespace_stack.push(NamespaceMap::empty());
149
-
150
- self.writer.write_all(b"<")?;
151
- self.qual_name(&name)?;
152
- if let Some(current_namespace) = self.namespace_stack.0.last() {
153
- for (prefix, url_opt) in current_namespace.get_scope_iter() {
154
- self.writer.write_all(b" xmlns")?;
155
- if let Some(ref prefix) = *prefix {
156
- self.writer.write_all(b":")?;
157
- self.writer.write_all(prefix.as_bytes())?;
158
- }
159
-
160
- self.writer.write_all(b"=\"")?;
161
- let url = if let Some(ref a) = *url_opt {
162
- a.as_bytes()
163
- } else {
164
- b""
165
- };
166
- self.writer.write_all(url)?;
167
- self.writer.write_all(b"\"")?;
168
- }
169
- }
170
- for (name, value) in attrs {
171
- self.writer.write_all(b" ")?;
172
- self.qual_attr_name(name)?;
173
- self.writer.write_all(b"=\"")?;
174
- write_to_buf_escaped(&mut self.writer, value, true)?;
175
- self.writer.write_all(b"\"")?;
176
- }
177
- self.writer.write_all(b">")?;
178
- Ok(())
179
- }
180
-
181
- /// Serializes given end element into text.
182
- fn end_elem(&mut self, name: QualName) -> io::Result<()> {
183
- self.namespace_stack.pop();
184
- self.writer.write_all(b"</")?;
185
- self.qual_name(&name)?;
186
- self.writer.write_all(b">")
187
- }
188
-
189
- /// Serializes comment into text.
190
- fn write_comment(&mut self, text: &str) -> io::Result<()> {
191
- self.writer.write_all(b"<!--")?;
192
- self.writer.write_all(text.as_bytes())?;
193
- self.writer.write_all(b"-->")
194
- }
195
-
196
- /// Serializes given doctype
197
- fn write_doctype(&mut self, name: &str) -> io::Result<()> {
198
- self.writer.write_all(b"<!DOCTYPE ")?;
199
- self.writer.write_all(name.as_bytes())?;
200
- self.writer.write_all(b">")
201
- }
202
-
203
- /// Serializes text for a node or an attributes.
204
- fn write_text(&mut self, text: &str) -> io::Result<()> {
205
- write_to_buf_escaped(&mut self.writer, text, false)
206
- }
207
-
208
- /// Serializes given processing instruction.
209
- fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> {
210
- self.writer.write_all(b"<?")?;
211
- self.writer.write_all(target.as_bytes())?;
212
- self.writer.write_all(b" ")?;
213
- self.writer.write_all(data.as_bytes())?;
214
- self.writer.write_all(b"?>")
215
- }
216
- }
@@ -1,456 +0,0 @@
1
- // Copyright 2014-2017 The html5ever Project Developers. See the
2
- // COPYRIGHT file at the top-level directory of this distribution.
3
- //
4
- // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
- // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
- // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
- // option. This file may not be copied, modified, or distributed
8
- // except according to those terms.
9
-
10
- use super::{TokenSink, XmlTokenizer};
11
- use crate::data;
12
- use crate::tendril::StrTendril;
13
- use log::debug;
14
- use markup5ever::buffer_queue::BufferQueue;
15
- use std::borrow::Cow::{self, Borrowed};
16
- use std::char::from_u32;
17
-
18
- use self::State::*;
19
- pub use self::Status::*;
20
-
21
- //§ tokenizing-character-references
22
- pub struct CharRef {
23
- /// The resulting character(s)
24
- pub chars: [char; 2],
25
-
26
- /// How many slots in `chars` are valid?
27
- pub num_chars: u8,
28
- }
29
-
30
- pub enum Status {
31
- Stuck,
32
- Progress,
33
- Done,
34
- }
35
-
36
- #[derive(Debug)]
37
- enum State {
38
- Begin,
39
- Octothorpe,
40
- Numeric(u32), // base
41
- NumericSemicolon,
42
- Named,
43
- BogusName,
44
- }
45
-
46
- pub struct CharRefTokenizer {
47
- state: State,
48
- addnl_allowed: Option<char>,
49
- result: Option<CharRef>,
50
-
51
- num: u32,
52
- num_too_big: bool,
53
- seen_digit: bool,
54
- hex_marker: Option<char>,
55
-
56
- name_buf_opt: Option<StrTendril>,
57
- name_match: Option<(u32, u32)>,
58
- name_len: usize,
59
- }
60
-
61
- impl CharRefTokenizer {
62
- // NB: We assume that we have an additional allowed character iff we're
63
- // tokenizing in an attribute value.
64
- pub fn new(addnl_allowed: Option<char>) -> CharRefTokenizer {
65
- CharRefTokenizer {
66
- state: Begin,
67
- addnl_allowed,
68
- result: None,
69
- num: 0,
70
- num_too_big: false,
71
- seen_digit: false,
72
- hex_marker: None,
73
- name_buf_opt: None,
74
- name_match: None,
75
- name_len: 0,
76
- }
77
- }
78
-
79
- // A CharRefTokenizer can only tokenize one character reference,
80
- // so this method consumes the tokenizer.
81
- pub fn get_result(self) -> CharRef {
82
- self.result.expect("get_result called before done")
83
- }
84
-
85
- fn name_buf(&self) -> &StrTendril {
86
- self.name_buf_opt
87
- .as_ref()
88
- .expect("name_buf missing in named character reference")
89
- }
90
-
91
- fn name_buf_mut(&mut self) -> &mut StrTendril {
92
- self.name_buf_opt
93
- .as_mut()
94
- .expect("name_buf missing in named character reference")
95
- }
96
-
97
- fn finish_none(&mut self) -> Status {
98
- self.result = Some(CharRef {
99
- chars: ['\0', '\0'],
100
- num_chars: 0,
101
- });
102
- Done
103
- }
104
-
105
- fn finish_one(&mut self, c: char) -> Status {
106
- self.result = Some(CharRef {
107
- chars: [c, '\0'],
108
- num_chars: 1,
109
- });
110
- Done
111
- }
112
- }
113
-
114
- impl CharRefTokenizer {
115
- pub fn step<Sink: TokenSink>(
116
- &mut self,
117
- tokenizer: &XmlTokenizer<Sink>,
118
- input: &BufferQueue,
119
- ) -> Status {
120
- if self.result.is_some() {
121
- return Done;
122
- }
123
-
124
- debug!("char ref tokenizer stepping in state {:?}", self.state);
125
- match self.state {
126
- Begin => self.do_begin(tokenizer, input),
127
- Octothorpe => self.do_octothorpe(tokenizer, input),
128
- Numeric(base) => self.do_numeric(tokenizer, base, input),
129
- NumericSemicolon => self.do_numeric_semicolon(tokenizer, input),
130
- Named => self.do_named(tokenizer, input),
131
- BogusName => self.do_bogus_name(tokenizer, input),
132
- }
133
- }
134
-
135
- fn do_begin<Sink: TokenSink>(
136
- &mut self,
137
- tokenizer: &XmlTokenizer<Sink>,
138
- input: &BufferQueue,
139
- ) -> Status {
140
- match tokenizer.peek(input) {
141
- Some('\t' | '\n' | '\x0C' | ' ' | '<' | '&') => self.finish_none(),
142
- Some(c) if Some(c) == self.addnl_allowed => self.finish_none(),
143
- Some('#') => {
144
- tokenizer.discard_char(input);
145
- self.state = Octothorpe;
146
- Progress
147
- },
148
- Some(_) => {
149
- self.state = Named;
150
- self.name_buf_opt = Some(StrTendril::new());
151
- Progress
152
- },
153
- None => Stuck,
154
- }
155
- }
156
-
157
- fn do_octothorpe<Sink: TokenSink>(
158
- &mut self,
159
- tokenizer: &XmlTokenizer<Sink>,
160
- input: &BufferQueue,
161
- ) -> Status {
162
- match tokenizer.peek(input) {
163
- Some(c @ ('x' | 'X')) => {
164
- tokenizer.discard_char(input);
165
- self.hex_marker = Some(c);
166
- self.state = Numeric(16);
167
- },
168
- Some(_) => {
169
- self.hex_marker = None;
170
- self.state = Numeric(10);
171
- },
172
- None => return Stuck,
173
- }
174
- Progress
175
- }
176
-
177
- fn do_numeric<Sink: TokenSink>(
178
- &mut self,
179
- tokenizer: &XmlTokenizer<Sink>,
180
- base: u32,
181
- input: &BufferQueue,
182
- ) -> Status {
183
- let Some(c) = tokenizer.peek(input) else {
184
- return Stuck;
185
- };
186
- match c.to_digit(base) {
187
- Some(n) => {
188
- tokenizer.discard_char(input);
189
- self.num = self.num.wrapping_mul(base);
190
- if self.num > 0x10FFFF {
191
- // We might overflow, and the character is definitely invalid.
192
- // We still parse digits and semicolon, but don't use the result.
193
- self.num_too_big = true;
194
- }
195
- self.num = self.num.wrapping_add(n);
196
- self.seen_digit = true;
197
- Progress
198
- },
199
-
200
- None if !self.seen_digit => self.unconsume_numeric(tokenizer, input),
201
-
202
- None => {
203
- self.state = NumericSemicolon;
204
- Progress
205
- },
206
- }
207
- }
208
-
209
- fn do_numeric_semicolon<Sink: TokenSink>(
210
- &mut self,
211
- tokenizer: &XmlTokenizer<Sink>,
212
- input: &BufferQueue,
213
- ) -> Status {
214
- match tokenizer.peek(input) {
215
- Some(';') => tokenizer.discard_char(input),
216
- Some(_) => tokenizer.emit_error(Borrowed(
217
- "Semicolon missing after numeric character reference",
218
- )),
219
- None => return Stuck,
220
- };
221
- self.finish_numeric(tokenizer)
222
- }
223
-
224
- fn unconsume_numeric<Sink: TokenSink>(
225
- &mut self,
226
- tokenizer: &XmlTokenizer<Sink>,
227
- input: &BufferQueue,
228
- ) -> Status {
229
- let mut unconsume = StrTendril::from_char('#');
230
- if let Some(c) = self.hex_marker {
231
- unconsume.push_char(c);
232
- }
233
-
234
- tokenizer.unconsume(input, unconsume);
235
- tokenizer.emit_error(Borrowed("Numeric character reference without digits"));
236
- self.finish_none()
237
- }
238
-
239
- fn finish_numeric<Sink: TokenSink>(&mut self, tokenizer: &XmlTokenizer<Sink>) -> Status {
240
- fn conv(n: u32) -> char {
241
- from_u32(n).expect("invalid char missed by error handling cases")
242
- }
243
-
244
- let (c, error) = match self.num {
245
- n if (n > 0x10FFFF) || self.num_too_big => ('\u{fffd}', true),
246
- 0x00 | 0xD800..=0xDFFF => ('\u{fffd}', true),
247
-
248
- 0x80..=0x9F => match data::C1_REPLACEMENTS[(self.num - 0x80) as usize] {
249
- Some(c) => (c, true),
250
- None => (conv(self.num), true),
251
- },
252
-
253
- 0x01..=0x08 | 0x0B | 0x0D..=0x1F | 0x7F | 0xFDD0..=0xFDEF => (conv(self.num), true),
254
-
255
- n if (n & 0xFFFE) == 0xFFFE => (conv(n), true),
256
-
257
- n => (conv(n), false),
258
- };
259
-
260
- if error {
261
- let msg = if tokenizer.opts.exact_errors {
262
- Cow::from(format!(
263
- "Invalid numeric character reference value 0x{:06X}",
264
- self.num
265
- ))
266
- } else {
267
- Cow::from("Invalid numeric character reference")
268
- };
269
- tokenizer.emit_error(msg);
270
- }
271
-
272
- self.finish_one(c)
273
- }
274
-
275
- fn do_named<Sink: TokenSink>(
276
- &mut self,
277
- tokenizer: &XmlTokenizer<Sink>,
278
- input: &BufferQueue,
279
- ) -> Status {
280
- let Some(c) = tokenizer.get_char(input) else {
281
- return Stuck;
282
- };
283
- self.name_buf_mut().push_char(c);
284
- match data::NAMED_ENTITIES.get(&self.name_buf()[..]) {
285
- // We have either a full match or a prefix of one.
286
- Some(&m) => {
287
- if m.0 != 0 {
288
- // We have a full match, but there might be a longer one to come.
289
- self.name_match = Some(m);
290
- self.name_len = self.name_buf().len();
291
- }
292
- // Otherwise we just have a prefix match.
293
- Progress
294
- },
295
-
296
- // Can't continue the match.
297
- None => self.finish_named(tokenizer, Some(c), input),
298
- }
299
- }
300
-
301
- fn emit_name_error<Sink: TokenSink>(&mut self, tokenizer: &XmlTokenizer<Sink>) {
302
- let msg = if tokenizer.opts.exact_errors {
303
- Cow::from(format!("Invalid character reference &{}", self.name_buf()))
304
- } else {
305
- Cow::from("Invalid character reference")
306
- };
307
- tokenizer.emit_error(msg);
308
- }
309
-
310
- fn unconsume_name<Sink: TokenSink>(
311
- &mut self,
312
- tokenizer: &XmlTokenizer<Sink>,
313
- input: &BufferQueue,
314
- ) {
315
- tokenizer.unconsume(input, self.name_buf_opt.take().unwrap());
316
- }
317
-
318
- fn finish_named<Sink: TokenSink>(
319
- &mut self,
320
- tokenizer: &XmlTokenizer<Sink>,
321
- end_char: Option<char>,
322
- input: &BufferQueue,
323
- ) -> Status {
324
- match self.name_match {
325
- None => {
326
- match end_char {
327
- Some(c) if c.is_ascii_alphanumeric() => {
328
- // Keep looking for a semicolon, to determine whether
329
- // we emit a parse error.
330
- self.state = BogusName;
331
- return Progress;
332
- },
333
-
334
- // Check length because &; is not a parse error.
335
- Some(';') if self.name_buf().len() > 1 => self.emit_name_error(tokenizer),
336
-
337
- _ => (),
338
- }
339
- self.unconsume_name(tokenizer, input);
340
- self.finish_none()
341
- },
342
-
343
- Some((c1, c2)) => {
344
- // We have a complete match, but we may have consumed
345
- // additional characters into self.name_buf. Usually
346
- // at least one, but several in cases like
347
- //
348
- // &not => match for U+00AC
349
- // &noti => valid prefix for &notin
350
- // &notit => can't continue match
351
-
352
- let name_len = self.name_len;
353
- assert!(name_len > 0);
354
- let last_matched = self.name_buf()[name_len - 1..].chars().next().unwrap();
355
-
356
- // There might not be a next character after the match, if
357
- // we had a full match and then hit EOF.
358
- let next_after = if name_len == self.name_buf().len() {
359
- None
360
- } else {
361
- Some(self.name_buf()[name_len..].chars().next().unwrap())
362
- };
363
-
364
- // "If the character reference is being consumed as part of an
365
- // attribute, and the last character matched is not a U+003B
366
- // SEMICOLON character (;), and the next character is either a
367
- // U+003D EQUALS SIGN character (=) or an alphanumeric ASCII
368
- // character, then, for historical reasons, all the characters
369
- // that were matched after the U+0026 AMPERSAND character (&)
370
- // must be unconsumed, and nothing is returned. However, if
371
- // this next character is in fact a U+003D EQUALS SIGN
372
- // character (=), then this is a parse error"
373
-
374
- let unconsume_all = match (self.addnl_allowed, last_matched, next_after) {
375
- (_, ';', _) => false,
376
- (Some(_), _, Some('=')) => {
377
- tokenizer.emit_error(Borrowed(
378
- "Equals sign after character reference in attribute",
379
- ));
380
- true
381
- },
382
- (Some(_), _, Some(c)) if c.is_ascii_alphanumeric() => true,
383
- _ => {
384
- tokenizer.emit_error(Borrowed(
385
- "Character reference does not end with semicolon",
386
- ));
387
- false
388
- },
389
- };
390
-
391
- if unconsume_all {
392
- self.unconsume_name(tokenizer, input);
393
- self.finish_none()
394
- } else {
395
- tokenizer
396
- .unconsume(input, StrTendril::from_slice(&self.name_buf()[name_len..]));
397
- self.result = Some(CharRef {
398
- chars: [from_u32(c1).unwrap(), from_u32(c2).unwrap()],
399
- num_chars: if c2 == 0 { 1 } else { 2 },
400
- });
401
- Done
402
- }
403
- },
404
- }
405
- }
406
-
407
- fn do_bogus_name<Sink: TokenSink>(
408
- &mut self,
409
- tokenizer: &XmlTokenizer<Sink>,
410
- input: &BufferQueue,
411
- ) -> Status {
412
- let Some(c) = tokenizer.get_char(input) else {
413
- return Stuck;
414
- };
415
- self.name_buf_mut().push_char(c);
416
- match c {
417
- _ if c.is_ascii_alphanumeric() => return Progress,
418
- ';' => self.emit_name_error(tokenizer),
419
- _ => (),
420
- }
421
- self.unconsume_name(tokenizer, input);
422
- self.finish_none()
423
- }
424
-
425
- pub fn end_of_file<Sink: TokenSink>(
426
- &mut self,
427
- tokenizer: &XmlTokenizer<Sink>,
428
- input: &BufferQueue,
429
- ) {
430
- while self.result.is_none() {
431
- match self.state {
432
- Begin => drop(self.finish_none()),
433
-
434
- Numeric(_) if !self.seen_digit => drop(self.unconsume_numeric(tokenizer, input)),
435
-
436
- Numeric(_) | NumericSemicolon => {
437
- tokenizer.emit_error(Borrowed("EOF in numeric character reference"));
438
- self.finish_numeric(tokenizer);
439
- },
440
-
441
- Named => drop(self.finish_named(tokenizer, None, input)),
442
-
443
- BogusName => {
444
- self.unconsume_name(tokenizer, input);
445
- self.finish_none();
446
- },
447
-
448
- Octothorpe => {
449
- tokenizer.unconsume(input, StrTendril::from_slice("#"));
450
- tokenizer.emit_error(Borrowed("EOF after '#' in character reference"));
451
- self.finish_none();
452
- },
453
- }
454
- }
455
- }
456
- }