html-to-markdown 2.24.6 → 2.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/ext/html-to-markdown-rb/native/Cargo.lock +3 -26
  4. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  5. data/lib/html_to_markdown/version.rb +1 -1
  6. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  7. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  8. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  9. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +53 -91
  10. data/rust-vendor/png/.cargo-checksum.json +1 -1
  11. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  12. data/rust-vendor/png/CHANGES.md +44 -0
  13. data/rust-vendor/png/Cargo.lock +124 -171
  14. data/rust-vendor/png/Cargo.toml +1 -1
  15. data/rust-vendor/png/Cargo.toml.orig +1 -1
  16. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  17. data/rust-vendor/png/benches/unfilter.rs +3 -3
  18. data/rust-vendor/png/src/adam7.rs +17 -10
  19. data/rust-vendor/png/src/common.rs +8 -8
  20. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  21. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  22. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  23. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  24. data/rust-vendor/png/src/encoder.rs +4 -2
  25. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  26. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  27. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  28. data/rust-vendor/png/src/filter/simd.rs +308 -0
  29. data/rust-vendor/png/src/lib.rs +1 -0
  30. metadata +7 -177
  31. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  32. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  33. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  34. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  35. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  36. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  37. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  38. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  39. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  40. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  41. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  42. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  43. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  44. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  45. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  46. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  47. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  48. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  49. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  50. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  51. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  52. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  53. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  54. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  55. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  56. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  57. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  58. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  59. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  60. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  61. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  62. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  63. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  64. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  65. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  66. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  67. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  68. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  69. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  70. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  71. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  72. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  153. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  154. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  155. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  156. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  157. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  158. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  159. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  160. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  161. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  162. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  163. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  164. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  165. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  166. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  167. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  168. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  169. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  170. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  171. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  172. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  173. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  174. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  175. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  176. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  177. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  178. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  179. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  180. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  181. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  182. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  183. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  184. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  185. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  186. data/rust-vendor/xml5ever/README.md +0 -72
  187. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  188. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  189. data/rust-vendor/xml5ever/examples/README.md +0 -223
  190. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  191. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  192. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  193. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  194. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  195. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  196. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  197. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  198. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  199. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  200. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  201. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  202. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  203. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,84 +0,0 @@
1
- // Copyright 2014-2017 The html5ever Project Developers. See the
2
- // COPYRIGHT file at the top-level directory of this distribution.
3
- //
4
- // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
- // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
- // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
- // option. This file may not be copied, modified, or distributed
8
- // except according to those terms.
9
-
10
- enum QualNameState {
11
- BeforeName,
12
- InName,
13
- AfterColon,
14
- }
15
-
16
- pub struct QualNameTokenizer<'a> {
17
- state: QualNameState,
18
- slice: &'a [u8],
19
- valid_index: Option<u32>,
20
- curr_ind: usize,
21
- }
22
-
23
- impl QualNameTokenizer<'_> {
24
- pub fn new(tag: &[u8]) -> QualNameTokenizer<'_> {
25
- QualNameTokenizer {
26
- state: QualNameState::BeforeName,
27
- slice: tag,
28
- valid_index: None,
29
- curr_ind: 0,
30
- }
31
- }
32
-
33
- pub fn run(&mut self) -> Option<u32> {
34
- if !self.slice.is_empty() {
35
- loop {
36
- if !self.step() {
37
- break;
38
- }
39
- }
40
- }
41
- self.valid_index
42
- }
43
-
44
- fn incr(&mut self) -> bool {
45
- if self.curr_ind + 1 < self.slice.len() {
46
- self.curr_ind += 1;
47
- return true;
48
- }
49
- false
50
- }
51
-
52
- fn step(&mut self) -> bool {
53
- match self.state {
54
- QualNameState::BeforeName => self.do_before_name(),
55
- QualNameState::InName => self.do_in_name(),
56
- QualNameState::AfterColon => self.do_after_colon(),
57
- }
58
- }
59
-
60
- fn do_before_name(&mut self) -> bool {
61
- if self.slice[self.curr_ind] == b':' {
62
- false
63
- } else {
64
- self.state = QualNameState::InName;
65
- self.incr()
66
- }
67
- }
68
-
69
- fn do_in_name(&mut self) -> bool {
70
- if self.slice[self.curr_ind] == b':' && self.curr_ind + 1 < self.slice.len() {
71
- self.valid_index = Some(self.curr_ind as u32);
72
- self.state = QualNameState::AfterColon;
73
- }
74
- self.incr()
75
- }
76
-
77
- fn do_after_colon(&mut self) -> bool {
78
- if self.slice[self.curr_ind] == b':' {
79
- self.valid_index = None;
80
- return false;
81
- }
82
- self.incr()
83
- }
84
- }
@@ -1,167 +0,0 @@
1
- // Copyright 2014-2017 The html5ever Project Developers. See the
2
- // COPYRIGHT file at the top-level directory of this distribution.
3
- //
4
- // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
- // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
- // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
- // option. This file may not be copied, modified, or distributed
8
- // except according to those terms.
9
-
10
- //! Tokenizer states.
11
-
12
- /// Specifies either the public or system identifier from a [Document Type Declaration] (DTD).
13
- ///
14
- /// [Document Type Declaration]: https://en.wikipedia.org/wiki/Document_type_declaration
15
- #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
16
- pub enum DoctypeKind {
17
- /// The public identifier.
18
- Public,
19
- /// The system identifier.
20
- System,
21
- }
22
-
23
- /// Specifies the different states a XML tokenizer will assume during parsing.
24
- #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
25
- pub enum XmlState {
26
- /// The initial state of the parser.
27
- ///
28
- /// It is equivalent to the [`Data`](https://html.spec.whatwg.org/#data-state) state of the html parser,
29
- /// except null codepoints do not cause errors.
30
- Data,
31
- /// Indicates that the parser has found a `<` character and will try to parse a tag.
32
- TagState,
33
- /// Indicates that the parser has consumed the `/` of a closing tag, like `</foo>`.
34
- EndTagState,
35
- /// Indicates that the parser is currently parsing the name of a closing tag, like the `foo` of `</foo>`.
36
- EndTagName,
37
- /// Indicates that the parser has finished parsing the name of a closing tag and expects a `>` to follow.
38
- EndTagNameAfter,
39
- /// Indicates that the parser has started parsing a [processing instruction] (PI).
40
- ///
41
- /// This state is reached after the initial `?` character has been consumed.
42
- ///
43
- /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
44
- Pi,
45
- /// Indicates that the parser is currently parsing the target of a [processing instruction].
46
- ///
47
- /// For example, the target of `<?xml-stylesheet type="text/xsl" href="style.xsl"?>` is `xml-stylesheet`.
48
- ///
49
- /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
50
- PiTarget,
51
- /// Indicates that the parser has finished parsing the target of a [processing instruction].
52
- ///
53
- /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
54
- PiTargetAfter,
55
- /// Indicates that the parser is currently parsing the data of a [processing instruction].
56
- ///
57
- /// The "data" refers to everything between the target and the closing `?` character.
58
- ///
59
- /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
60
- PiData,
61
- /// Indicates that the parser has parsed the closing `?` of a [processing instruction].
62
- ///
63
- /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
64
- PiAfter,
65
- /// Indicates that the parser has parsed the initial `!` of a markup declaration.
66
- ///
67
- /// Examples of such declarations include `<!ENTITY chap1 SYSTEM "chap1.xml">` or `<!-- Comment -->`.
68
- MarkupDecl,
69
- /// Indicates that the parser has parsed the start of a comment (`<!--`).
70
- CommentStart,
71
- /// Indicates that the parser has parsed the start of a comment and a `-` directly after it.
72
- CommentStartDash,
73
- /// Indicates that the parser is currently parsing the data within a comment.
74
- Comment,
75
- /// Indicates that the parser has parsed a `<` character within a comment.
76
- CommentLessThan,
77
- /// Indicates that the parser has parsed `<!` within a comment.
78
- CommentLessThanBang,
79
- /// Indicates that the parser has parsed `<!-` within a comment.
80
- CommentLessThanBangDash,
81
- /// Indicates that the parser has parsed `<!--` within a comment.
82
- CommentLessThanBangDashDash,
83
- /// Indicates that the parser has parsed two `-` characters within a comment which may or may not
84
- /// be the beginning of the comment end (`-->`).
85
- CommentEnd,
86
- /// Indicates that the parser has parsed a `-` character within a comment which may or may not
87
- /// be the beginning of the comment end (`-->`).
88
- CommentEndDash,
89
- /// Indicates that the parser has parsed `--!` within a comment which may or may not be part of the
90
- /// end of the comment. Comments in XML can be closed with `--!>`.
91
- CommentEndBang,
92
- /// Indicates that the parser has parsed the beginning of a CDATA section (`<![CDATA[`).
93
- Cdata,
94
- /// Indicates that the parser has parsed a `]` character within a CDATA section, which may be part of
95
- /// the end of the section (`]]>`).
96
- CdataBracket,
97
- /// Indicates that the parser has parsed two `]` characters within a CDATA section, which may be part of
98
- /// the end of the section (`]]>`).
99
- CdataEnd,
100
- /// Indicates that the parser is currently parsing the name of a tag, such as `foo` in `<foo>`.
101
- TagName,
102
- /// Indicates that the parser has parsed the `/` of a self-closing tag, such as `<foo/>`.
103
- TagEmpty,
104
- /// Indicates that the parser has finished parsing the name of a tag and is now expecting either attributes or
105
- /// a `>`.
106
- TagAttrNameBefore,
107
- /// Indicates that the parser is currently parsing the name of an attribute within a tag, such as
108
- /// `bar` in `<foo bar=baz>`.
109
- TagAttrName,
110
- /// Indicates that the parser has finished parsing the name of an attribute.
111
- TagAttrNameAfter,
112
- /// Indicates that the parser is about to parse the value of an attribute.
113
- TagAttrValueBefore,
114
- /// Indicates that the parser is currently parsing the value of an attribute, such as `baz` in
115
- /// `<foo bar=baz>`.
116
- ///
117
- /// Includes information about how the value is quoted, because the quotes before and after the attribute
118
- /// value need to match.
119
- TagAttrValue(AttrValueKind),
120
- /// Indicates that the parser has parsed the beginning of a document type definition (`<!DOCTYPE`).
121
- Doctype,
122
- /// Indicates that the parser expects to parse the name of the document type definition next.
123
- BeforeDoctypeName,
124
- /// Indicates that the parser is currently parsing the name of a document type definition, such as
125
- /// `html` in `<!DOCTYPE html>`.
126
- DoctypeName,
127
- /// Indicates that the parser has finished parsing the name of the document type definition and now optionally
128
- /// expects either a public or a system identifier.
129
- AfterDoctypeName,
130
- /// Indicates that the parser has parsed a keyword for either a public or system identifier (`PUBLIC` or `SYSTEM`).
131
- AfterDoctypeKeyword(DoctypeKind),
132
- /// Indicates that the parser is about to parse the value of a public or system identifier within
133
- /// a document type definition, such as `foo` in
134
- /// `<!DOCTYPE html PUBLIC "foo" "bar">`.
135
- BeforeDoctypeIdentifier(DoctypeKind),
136
- /// Indicates that the parser is currently parsing the value of a public or system identifier
137
- /// that is surrounded by double quotes , such as `foo` in
138
- /// `<!DOCTYPE html PUBLIC "foo" "bar">`.
139
- DoctypeIdentifierDoubleQuoted(DoctypeKind),
140
- /// Indicates that the parser is currently parsing the value of a public or system identifier
141
- /// that is surrounded by single quotes , such as `foo` in
142
- /// `<!DOCTYPE html PUBLIC 'foo' 'bar'>`.
143
- DoctypeIdentifierSingleQuoted(DoctypeKind),
144
- /// Indicates that the parser has finished parsing either a public or system identifier within a
145
- /// document type definition.
146
- AfterDoctypeIdentifier(DoctypeKind),
147
- /// Indicates that the parser has finished parsing a public identifier and now expects
148
- /// a system identifier.
149
- BetweenDoctypePublicAndSystemIdentifiers,
150
- /// Indicates that the parser is currently parsing an ill-formed document type defintion, such as
151
- /// `<!DOCTYPE html what-is-this>`.
152
- BogusDoctype,
153
- /// Indicates that the parser is currently parsing an ill-formed comment, such as
154
- /// `<? this is not what a comment should look like! >`.
155
- BogusComment,
156
- }
157
-
158
- /// Specifies how an attribute value is quoted, if at all.
159
- #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
160
- pub enum AttrValueKind {
161
- /// A attribute value that is not surrounded by quotes, like `bar` in `foo=bar`.
162
- Unquoted,
163
- /// A attribute value that is not surrounded by quotes, like `bar` in `foo='bar'`.
164
- SingleQuoted,
165
- /// A attribute value that is not surrounded by quotes, like `bar` in `foo="bar"`.
166
- DoubleQuoted,
167
- }