html-to-markdown 2.24.6 → 2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  6. data/lib/html_to_markdown/version.rb +1 -1
  7. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  8. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  9. data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
  10. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  11. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
  12. data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
  13. data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
  14. data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
  15. data/rust-vendor/memmap2/CHANGELOG.md +8 -0
  16. data/rust-vendor/memmap2/Cargo.lock +1 -1
  17. data/rust-vendor/memmap2/Cargo.toml +2 -1
  18. data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
  19. data/rust-vendor/memmap2/src/lib.rs +25 -1
  20. data/rust-vendor/memmap2/src/stub.rs +1 -4
  21. data/rust-vendor/memmap2/src/unix.rs +14 -1
  22. data/rust-vendor/png/.cargo-checksum.json +1 -1
  23. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  24. data/rust-vendor/png/CHANGES.md +44 -0
  25. data/rust-vendor/png/Cargo.lock +124 -171
  26. data/rust-vendor/png/Cargo.toml +1 -1
  27. data/rust-vendor/png/Cargo.toml.orig +1 -1
  28. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  29. data/rust-vendor/png/benches/unfilter.rs +3 -3
  30. data/rust-vendor/png/src/adam7.rs +17 -10
  31. data/rust-vendor/png/src/common.rs +8 -8
  32. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  33. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  34. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  35. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  36. data/rust-vendor/png/src/encoder.rs +4 -2
  37. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  38. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  39. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  40. data/rust-vendor/png/src/filter/simd.rs +308 -0
  41. data/rust-vendor/png/src/lib.rs +1 -0
  42. data/rust-vendor/syn/.cargo-checksum.json +1 -1
  43. data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
  44. data/rust-vendor/syn/Cargo.lock +40 -41
  45. data/rust-vendor/syn/Cargo.toml +1 -1
  46. data/rust-vendor/syn/Cargo.toml.orig +1 -1
  47. data/rust-vendor/syn/src/item.rs +61 -40
  48. data/rust-vendor/syn/src/lib.rs +2 -1
  49. data/rust-vendor/syn/tests/test_item.rs +54 -0
  50. data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
  51. data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
  52. data/rust-vendor/unicode-ident/Cargo.lock +21 -21
  53. data/rust-vendor/unicode-ident/Cargo.toml +1 -1
  54. data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
  55. data/rust-vendor/unicode-ident/src/lib.rs +1 -1
  56. data/rust-vendor/unicode-ident/src/tables.rs +87 -97
  57. data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
  58. metadata +7 -177
  59. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  60. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  61. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  62. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  63. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  64. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  65. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  66. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  67. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  68. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  69. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  70. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  71. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  72. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  153. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  154. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  155. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  156. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  157. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  158. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  159. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  160. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  161. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  162. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  163. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  164. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  165. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  166. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  167. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  168. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  169. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  170. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  171. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  172. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  173. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  174. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  175. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  176. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  177. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  178. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  179. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  180. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  181. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  182. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  183. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  184. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  185. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  186. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  187. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  188. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  189. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  190. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  191. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  192. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  193. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  194. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  195. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  196. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  197. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  198. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  199. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  200. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  201. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  202. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  203. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  204. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  205. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  206. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  207. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  208. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  209. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  210. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  211. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  212. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  213. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  214. data/rust-vendor/xml5ever/README.md +0 -72
  215. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  216. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  217. data/rust-vendor/xml5ever/examples/README.md +0 -223
  218. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  219. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  220. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  221. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  222. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  223. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  224. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  225. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  226. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  227. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  228. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  229. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  230. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  231. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,84 +0,0 @@
1
- // Copyright 2014-2017 The html5ever Project Developers. See the
2
- // COPYRIGHT file at the top-level directory of this distribution.
3
- //
4
- // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
- // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
- // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
- // option. This file may not be copied, modified, or distributed
8
- // except according to those terms.
9
-
10
- enum QualNameState {
11
- BeforeName,
12
- InName,
13
- AfterColon,
14
- }
15
-
16
- pub struct QualNameTokenizer<'a> {
17
- state: QualNameState,
18
- slice: &'a [u8],
19
- valid_index: Option<u32>,
20
- curr_ind: usize,
21
- }
22
-
23
- impl QualNameTokenizer<'_> {
24
- pub fn new(tag: &[u8]) -> QualNameTokenizer<'_> {
25
- QualNameTokenizer {
26
- state: QualNameState::BeforeName,
27
- slice: tag,
28
- valid_index: None,
29
- curr_ind: 0,
30
- }
31
- }
32
-
33
- pub fn run(&mut self) -> Option<u32> {
34
- if !self.slice.is_empty() {
35
- loop {
36
- if !self.step() {
37
- break;
38
- }
39
- }
40
- }
41
- self.valid_index
42
- }
43
-
44
- fn incr(&mut self) -> bool {
45
- if self.curr_ind + 1 < self.slice.len() {
46
- self.curr_ind += 1;
47
- return true;
48
- }
49
- false
50
- }
51
-
52
- fn step(&mut self) -> bool {
53
- match self.state {
54
- QualNameState::BeforeName => self.do_before_name(),
55
- QualNameState::InName => self.do_in_name(),
56
- QualNameState::AfterColon => self.do_after_colon(),
57
- }
58
- }
59
-
60
- fn do_before_name(&mut self) -> bool {
61
- if self.slice[self.curr_ind] == b':' {
62
- false
63
- } else {
64
- self.state = QualNameState::InName;
65
- self.incr()
66
- }
67
- }
68
-
69
- fn do_in_name(&mut self) -> bool {
70
- if self.slice[self.curr_ind] == b':' && self.curr_ind + 1 < self.slice.len() {
71
- self.valid_index = Some(self.curr_ind as u32);
72
- self.state = QualNameState::AfterColon;
73
- }
74
- self.incr()
75
- }
76
-
77
- fn do_after_colon(&mut self) -> bool {
78
- if self.slice[self.curr_ind] == b':' {
79
- self.valid_index = None;
80
- return false;
81
- }
82
- self.incr()
83
- }
84
- }
@@ -1,167 +0,0 @@
1
- // Copyright 2014-2017 The html5ever Project Developers. See the
2
- // COPYRIGHT file at the top-level directory of this distribution.
3
- //
4
- // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
- // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
- // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
- // option. This file may not be copied, modified, or distributed
8
- // except according to those terms.
9
-
10
- //! Tokenizer states.
11
-
12
- /// Specifies either the public or system identifier from a [Document Type Declaration] (DTD).
13
- ///
14
- /// [Document Type Declaration]: https://en.wikipedia.org/wiki/Document_type_declaration
15
- #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
16
- pub enum DoctypeKind {
17
- /// The public identifier.
18
- Public,
19
- /// The system identifier.
20
- System,
21
- }
22
-
23
- /// Specifies the different states a XML tokenizer will assume during parsing.
24
- #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
25
- pub enum XmlState {
26
- /// The initial state of the parser.
27
- ///
28
- /// It is equivalent to the [`Data`](https://html.spec.whatwg.org/#data-state) state of the html parser,
29
- /// except null codepoints do not cause errors.
30
- Data,
31
- /// Indicates that the parser has found a `<` character and will try to parse a tag.
32
- TagState,
33
- /// Indicates that the parser has consumed the `/` of a closing tag, like `</foo>`.
34
- EndTagState,
35
- /// Indicates that the parser is currently parsing the name of a closing tag, like the `foo` of `</foo>`.
36
- EndTagName,
37
- /// Indicates that the parser has finished parsing the name of a closing tag and expects a `>` to follow.
38
- EndTagNameAfter,
39
- /// Indicates that the parser has started parsing a [processing instruction] (PI).
40
- ///
41
- /// This state is reached after the initial `?` character has been consumed.
42
- ///
43
- /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
44
- Pi,
45
- /// Indicates that the parser is currently parsing the target of a [processing instruction].
46
- ///
47
- /// For example, the target of `<?xml-stylesheet type="text/xsl" href="style.xsl"?>` is `xml-stylesheet`.
48
- ///
49
- /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
50
- PiTarget,
51
- /// Indicates that the parser has finished parsing the target of a [processing instruction].
52
- ///
53
- /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
54
- PiTargetAfter,
55
- /// Indicates that the parser is currently parsing the data of a [processing instruction].
56
- ///
57
- /// The "data" refers to everything between the target and the closing `?` character.
58
- ///
59
- /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
60
- PiData,
61
- /// Indicates that the parser has parsed the closing `?` of a [processing instruction].
62
- ///
63
- /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
64
- PiAfter,
65
- /// Indicates that the parser has parsed the initial `!` of a markup declaration.
66
- ///
67
- /// Examples of such declarations include `<!ENTITY chap1 SYSTEM "chap1.xml">` or `<!-- Comment -->`.
68
- MarkupDecl,
69
- /// Indicates that the parser has parsed the start of a comment (`<!--`).
70
- CommentStart,
71
- /// Indicates that the parser has parsed the start of a comment and a `-` directly after it.
72
- CommentStartDash,
73
- /// Indicates that the parser is currently parsing the data within a comment.
74
- Comment,
75
- /// Indicates that the parser has parsed a `<` character within a comment.
76
- CommentLessThan,
77
- /// Indicates that the parser has parsed `<!` within a comment.
78
- CommentLessThanBang,
79
- /// Indicates that the parser has parsed `<!-` within a comment.
80
- CommentLessThanBangDash,
81
- /// Indicates that the parser has parsed `<!--` within a comment.
82
- CommentLessThanBangDashDash,
83
- /// Indicates that the parser has parsed two `-` characters within a comment which may or may not
84
- /// be the beginning of the comment end (`-->`).
85
- CommentEnd,
86
- /// Indicates that the parser has parsed a `-` character within a comment which may or may not
87
- /// be the beginning of the comment end (`-->`).
88
- CommentEndDash,
89
- /// Indicates that the parser has parsed `--!` within a comment which may or may not be part of the
90
- /// end of the comment. Comments in XML can be closed with `--!>`.
91
- CommentEndBang,
92
- /// Indicates that the parser has parsed the beginning of a CDATA section (`<![CDATA[`).
93
- Cdata,
94
- /// Indicates that the parser has parsed a `]` character within a CDATA section, which may be part of
95
- /// the end of the section (`]]>`).
96
- CdataBracket,
97
- /// Indicates that the parser has parsed two `]` characters within a CDATA section, which may be part of
98
- /// the end of the section (`]]>`).
99
- CdataEnd,
100
- /// Indicates that the parser is currently parsing the name of a tag, such as `foo` in `<foo>`.
101
- TagName,
102
- /// Indicates that the parser has parsed the `/` of a self-closing tag, such as `<foo/>`.
103
- TagEmpty,
104
- /// Indicates that the parser has finished parsing the name of a tag and is now expecting either attributes or
105
- /// a `>`.
106
- TagAttrNameBefore,
107
- /// Indicates that the parser is currently parsing the name of an attribute within a tag, such as
108
- /// `bar` in `<foo bar=baz>`.
109
- TagAttrName,
110
- /// Indicates that the parser has finished parsing the name of an attribute.
111
- TagAttrNameAfter,
112
- /// Indicates that the parser is about to parse the value of an attribute.
113
- TagAttrValueBefore,
114
- /// Indicates that the parser is currently parsing the value of an attribute, such as `baz` in
115
- /// `<foo bar=baz>`.
116
- ///
117
- /// Includes information about how the value is quoted, because the quotes before and after the attribute
118
- /// value need to match.
119
- TagAttrValue(AttrValueKind),
120
- /// Indicates that the parser has parsed the beginning of a document type definition (`<!DOCTYPE`).
121
- Doctype,
122
- /// Indicates that the parser expects to parse the name of the document type definition next.
123
- BeforeDoctypeName,
124
- /// Indicates that the parser is currently parsing the name of a document type definition, such as
125
- /// `html` in `<!DOCTYPE html>`.
126
- DoctypeName,
127
- /// Indicates that the parser has finished parsing the name of the document type definition and now optionally
128
- /// expects either a public or a system identifier.
129
- AfterDoctypeName,
130
- /// Indicates that the parser has parsed a keyword for either a public or system identifier (`PUBLIC` or `SYSTEM`).
131
- AfterDoctypeKeyword(DoctypeKind),
132
- /// Indicates that the parser is about to parse the value of a public or system identifier within
133
- /// a document type definition, such as `foo` in
134
- /// `<!DOCTYPE html PUBLIC "foo" "bar">`.
135
- BeforeDoctypeIdentifier(DoctypeKind),
136
- /// Indicates that the parser is currently parsing the value of a public or system identifier
137
- /// that is surrounded by double quotes , such as `foo` in
138
- /// `<!DOCTYPE html PUBLIC "foo" "bar">`.
139
- DoctypeIdentifierDoubleQuoted(DoctypeKind),
140
- /// Indicates that the parser is currently parsing the value of a public or system identifier
141
- /// that is surrounded by single quotes , such as `foo` in
142
- /// `<!DOCTYPE html PUBLIC 'foo' 'bar'>`.
143
- DoctypeIdentifierSingleQuoted(DoctypeKind),
144
- /// Indicates that the parser has finished parsing either a public or system identifier within a
145
- /// document type definition.
146
- AfterDoctypeIdentifier(DoctypeKind),
147
- /// Indicates that the parser has finished parsing a public identifier and now expects
148
- /// a system identifier.
149
- BetweenDoctypePublicAndSystemIdentifiers,
150
- /// Indicates that the parser is currently parsing an ill-formed document type defintion, such as
151
- /// `<!DOCTYPE html what-is-this>`.
152
- BogusDoctype,
153
- /// Indicates that the parser is currently parsing an ill-formed comment, such as
154
- /// `<? this is not what a comment should look like! >`.
155
- BogusComment,
156
- }
157
-
158
- /// Specifies how an attribute value is quoted, if at all.
159
- #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
160
- pub enum AttrValueKind {
161
- /// A attribute value that is not surrounded by quotes, like `bar` in `foo=bar`.
162
- Unquoted,
163
- /// A attribute value that is not surrounded by quotes, like `bar` in `foo='bar'`.
164
- SingleQuoted,
165
- /// A attribute value that is not surrounded by quotes, like `bar` in `foo="bar"`.
166
- DoubleQuoted,
167
- }