html-to-markdown 2.24.6 → 2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  6. data/lib/html_to_markdown/version.rb +1 -1
  7. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  8. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  9. data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
  10. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  11. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
  12. data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
  13. data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
  14. data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
  15. data/rust-vendor/memmap2/CHANGELOG.md +8 -0
  16. data/rust-vendor/memmap2/Cargo.lock +1 -1
  17. data/rust-vendor/memmap2/Cargo.toml +2 -1
  18. data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
  19. data/rust-vendor/memmap2/src/lib.rs +25 -1
  20. data/rust-vendor/memmap2/src/stub.rs +1 -4
  21. data/rust-vendor/memmap2/src/unix.rs +14 -1
  22. data/rust-vendor/png/.cargo-checksum.json +1 -1
  23. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  24. data/rust-vendor/png/CHANGES.md +44 -0
  25. data/rust-vendor/png/Cargo.lock +124 -171
  26. data/rust-vendor/png/Cargo.toml +1 -1
  27. data/rust-vendor/png/Cargo.toml.orig +1 -1
  28. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  29. data/rust-vendor/png/benches/unfilter.rs +3 -3
  30. data/rust-vendor/png/src/adam7.rs +17 -10
  31. data/rust-vendor/png/src/common.rs +8 -8
  32. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  33. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  34. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  35. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  36. data/rust-vendor/png/src/encoder.rs +4 -2
  37. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  38. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  39. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  40. data/rust-vendor/png/src/filter/simd.rs +308 -0
  41. data/rust-vendor/png/src/lib.rs +1 -0
  42. data/rust-vendor/syn/.cargo-checksum.json +1 -1
  43. data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
  44. data/rust-vendor/syn/Cargo.lock +40 -41
  45. data/rust-vendor/syn/Cargo.toml +1 -1
  46. data/rust-vendor/syn/Cargo.toml.orig +1 -1
  47. data/rust-vendor/syn/src/item.rs +61 -40
  48. data/rust-vendor/syn/src/lib.rs +2 -1
  49. data/rust-vendor/syn/tests/test_item.rs +54 -0
  50. data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
  51. data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
  52. data/rust-vendor/unicode-ident/Cargo.lock +21 -21
  53. data/rust-vendor/unicode-ident/Cargo.toml +1 -1
  54. data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
  55. data/rust-vendor/unicode-ident/src/lib.rs +1 -1
  56. data/rust-vendor/unicode-ident/src/tables.rs +87 -97
  57. data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
  58. metadata +7 -177
  59. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  60. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  61. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  62. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  63. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  64. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  65. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  66. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  67. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  68. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  69. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  70. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  71. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  72. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  153. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  154. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  155. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  156. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  157. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  158. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  159. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  160. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  161. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  162. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  163. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  164. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  165. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  166. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  167. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  168. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  169. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  170. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  171. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  172. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  173. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  174. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  175. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  176. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  177. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  178. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  179. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  180. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  181. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  182. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  183. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  184. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  185. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  186. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  187. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  188. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  189. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  190. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  191. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  192. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  193. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  194. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  195. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  196. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  197. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  198. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  199. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  200. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  201. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  202. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  203. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  204. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  205. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  206. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  207. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  208. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  209. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  210. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  211. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  212. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  213. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  214. data/rust-vendor/xml5ever/README.md +0 -72
  215. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  216. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  217. data/rust-vendor/xml5ever/examples/README.md +0 -223
  218. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  219. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  220. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  221. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  222. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  223. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  224. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  225. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  226. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  227. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  228. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  229. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  230. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  231. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,298 +0,0 @@
1
- // Copyright 2014-2017 The html5ever Project Developers. See the
2
- // COPYRIGHT file at the top-level directory of this distribution.
3
- //
4
- // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
- // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
- // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
- // option. This file may not be copied, modified, or distributed
8
- // except according to those terms.
9
-
10
- extern crate markup5ever_rcdom as rcdom;
11
- #[macro_use]
12
- extern crate html5ever;
13
-
14
- mod foreach_html5lib_test;
15
- use foreach_html5lib_test::foreach_html5lib_test;
16
-
17
- use std::collections::{HashMap, HashSet};
18
- use std::ffi::OsStr;
19
- use std::io::BufRead;
20
- use std::path::Path;
21
- use std::{fs, io, iter, mem};
22
-
23
- use html5ever::tendril::{StrTendril, TendrilSink};
24
- use html5ever::{parse_document, parse_fragment, ParseOpts};
25
- use html5ever::{LocalName, QualName};
26
- use rcdom::{Handle, NodeData, RcDom};
27
- use util::runner::{run_all, Test};
28
-
29
- mod util {
30
- pub mod runner;
31
- }
32
-
33
- fn parse_tests<It: Iterator<Item = String>>(mut lines: It) -> Vec<HashMap<String, String>> {
34
- let mut tests = vec![];
35
- let mut test = HashMap::new();
36
- let mut key: Option<String> = None;
37
- let mut val = String::new();
38
-
39
- macro_rules! finish_val ( () => (
40
- match key.take() {
41
- None => (),
42
- Some(key) => {
43
- assert!(test.insert(key, mem::take(&mut val)).is_none());
44
- }
45
- }
46
- ));
47
-
48
- macro_rules! finish_test ( () => (
49
- if !test.is_empty() {
50
- tests.push(mem::take(&mut test));
51
- }
52
- ));
53
-
54
- loop {
55
- match lines.next() {
56
- None => break,
57
- Some(line) => {
58
- if let Some(rest) = line.strip_prefix('#') {
59
- finish_val!();
60
- if line == "#data" {
61
- finish_test!();
62
- }
63
- key = Some(rest.to_owned());
64
- } else {
65
- val.push_str(&line);
66
- val.push('\n');
67
- }
68
- },
69
- }
70
- }
71
-
72
- finish_val!();
73
- finish_test!();
74
- tests
75
- }
76
-
77
- fn serialize(buf: &mut String, indent: usize, handle: Handle) {
78
- buf.push('|');
79
- buf.extend(iter::repeat_n(" ", indent));
80
-
81
- let node = handle;
82
- match node.data {
83
- NodeData::Document => panic!("should not reach Document"),
84
-
85
- NodeData::Doctype {
86
- ref name,
87
- ref public_id,
88
- ref system_id,
89
- } => {
90
- buf.push_str("<!DOCTYPE ");
91
- buf.push_str(name);
92
- if !public_id.is_empty() || !system_id.is_empty() {
93
- buf.push_str(&format!(" \"{public_id}\" \"{system_id}\""));
94
- }
95
- buf.push_str(">\n");
96
- },
97
-
98
- NodeData::Text { ref contents } => {
99
- buf.push('"');
100
- buf.push_str(&contents.borrow());
101
- buf.push_str("\"\n");
102
- },
103
-
104
- NodeData::Comment { ref contents } => {
105
- buf.push_str("<!-- ");
106
- buf.push_str(contents);
107
- buf.push_str(" -->\n");
108
- },
109
-
110
- NodeData::Element {
111
- ref name,
112
- ref attrs,
113
- ..
114
- } => {
115
- buf.push('<');
116
- match name.ns {
117
- ns!(svg) => buf.push_str("svg "),
118
- ns!(mathml) => buf.push_str("math "),
119
- _ => (),
120
- }
121
- buf.push_str(&name.local);
122
- buf.push_str(">\n");
123
-
124
- let mut attrs = attrs.borrow().clone();
125
- attrs.sort_by(|x, y| x.name.local.cmp(&y.name.local));
126
- // FIXME: sort by UTF-16 code unit
127
-
128
- for attr in attrs.into_iter() {
129
- buf.push('|');
130
- buf.extend(iter::repeat_n(" ", indent + 2));
131
- match attr.name.ns {
132
- ns!(xlink) => buf.push_str("xlink "),
133
- ns!(xml) => buf.push_str("xml "),
134
- ns!(xmlns) => buf.push_str("xmlns "),
135
- _ => (),
136
- }
137
- buf.push_str(&format!("{}=\"{}\"\n", attr.name.local, attr.value));
138
- }
139
- },
140
-
141
- NodeData::ProcessingInstruction { .. } => unreachable!(),
142
- }
143
-
144
- for child in node.children.borrow().iter() {
145
- serialize(buf, indent + 2, child.clone());
146
- }
147
-
148
- if let NodeData::Element {
149
- ref template_contents,
150
- ..
151
- } = node.data
152
- {
153
- if let Some(ref content) = &*template_contents.borrow() {
154
- buf.push('|');
155
- buf.extend(iter::repeat_n(" ", indent + 2));
156
- buf.push_str("content\n");
157
- for child in content.children.borrow().iter() {
158
- serialize(buf, indent + 4, child.clone());
159
- }
160
- }
161
- }
162
- }
163
-
164
- fn make_test(
165
- tests: &mut Vec<Test>,
166
- ignores: &HashSet<String>,
167
- filename: &str,
168
- idx: usize,
169
- fields: HashMap<String, String>,
170
- ) {
171
- let scripting_flags = &[false, true];
172
- let scripting_flags = if fields.contains_key("script-off") {
173
- &scripting_flags[0..1]
174
- } else if fields.contains_key("script-on") {
175
- &scripting_flags[1..2]
176
- } else {
177
- &scripting_flags[0..2]
178
- };
179
- let name = format!("tb: {filename}-{idx}");
180
- for scripting_enabled in scripting_flags {
181
- let test = make_test_desc_with_scripting_flag(ignores, &name, &fields, *scripting_enabled);
182
- tests.push(test);
183
- }
184
- }
185
-
186
- fn make_test_desc_with_scripting_flag(
187
- ignores: &HashSet<String>,
188
- name: &str,
189
- fields: &HashMap<String, String>,
190
- scripting_enabled: bool,
191
- ) -> Test {
192
- let get_field = |key| {
193
- let field = fields.get(key).expect("missing field");
194
- field.trim_end_matches('\n').to_string()
195
- };
196
-
197
- let mut data = fields.get("data").expect("missing data").to_string();
198
- data.pop();
199
- let expected = get_field("document");
200
- let context = fields
201
- .get("document-fragment")
202
- .map(|field| context_name(field.trim_end_matches('\n')));
203
- let skip = ignores.contains(name);
204
- let mut name = name.to_owned();
205
- if scripting_enabled {
206
- name.push_str(" (scripting enabled)");
207
- } else {
208
- name.push_str(" (scripting disabled)");
209
- };
210
-
211
- Test {
212
- name,
213
- skip,
214
- test: Box::new(move || {
215
- // Do this here because Tendril isn't Send.
216
- let data = StrTendril::from_slice(&data);
217
- let mut opts: ParseOpts = Default::default();
218
- opts.tree_builder.scripting_enabled = scripting_enabled;
219
- let mut result = String::new();
220
- match context {
221
- None => {
222
- let dom = parse_document(RcDom::default(), opts).one(data.clone());
223
- for child in dom.document.children.borrow().iter() {
224
- serialize(&mut result, 1, child.clone());
225
- }
226
- },
227
- Some(ref context) => {
228
- let dom = parse_fragment(RcDom::default(), opts, context.clone(), vec![], true)
229
- .one(data.clone());
230
- // fragment case: serialize children of the html element
231
- // rather than children of the document
232
- let doc = &dom.document;
233
- let root = &doc.children.borrow()[0];
234
- for child in root.children.borrow().iter() {
235
- serialize(&mut result, 1, child.clone());
236
- }
237
- },
238
- };
239
- let len = result.len();
240
- result.truncate(len - 1); // drop the trailing newline
241
-
242
- if result != expected {
243
- panic!("\ninput: {data}\ngot:\n{result}\nexpected:\n{expected}\n");
244
- }
245
- }),
246
- }
247
- }
248
-
249
- fn context_name(context: &str) -> QualName {
250
- if let Some(cx) = context.strip_prefix("svg ") {
251
- QualName::new(None, ns!(svg), LocalName::from(cx))
252
- } else if let Some(cx) = context.strip_prefix("math ") {
253
- QualName::new(None, ns!(mathml), LocalName::from(cx))
254
- } else {
255
- QualName::new(None, ns!(html), LocalName::from(context))
256
- }
257
- }
258
-
259
- fn tests(src_dir: &Path, ignores: &HashSet<String>) -> Vec<Test> {
260
- let mut tests = vec![];
261
-
262
- foreach_html5lib_test(
263
- src_dir,
264
- "html5lib-tests/tree-construction",
265
- OsStr::new("dat"),
266
- |path, file| {
267
- let buf = io::BufReader::new(file);
268
- let lines = buf.lines().map(|res| res.expect("couldn't read"));
269
- let data = parse_tests(lines);
270
-
271
- for (i, test) in data.into_iter().enumerate() {
272
- make_test(
273
- &mut tests,
274
- ignores,
275
- path.file_name().unwrap().to_str().unwrap(),
276
- i,
277
- test,
278
- );
279
- }
280
- },
281
- );
282
-
283
- tests
284
- }
285
-
286
- fn main() {
287
- let src_dir = Path::new("./");
288
- let mut ignores = HashSet::new();
289
- {
290
- let f = fs::File::open(src_dir.join("data/test/ignore")).unwrap();
291
- let r = io::BufReader::new(f);
292
- for ln in r.lines() {
293
- ignores.insert(ln.unwrap().trim_end().to_string());
294
- }
295
- }
296
-
297
- run_all(tests(src_dir, &ignores));
298
- }
@@ -1,141 +0,0 @@
1
- use html5ever::driver;
2
- use html5ever::tendril::stream::TendrilSink;
3
- use html5ever::tendril::StrTendril;
4
- use html5ever::ExpandedName;
5
- use html5ever::QualName;
6
- use markup5ever::interface::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
7
- use markup5ever::{local_name, ns, Attribute};
8
- use markup5ever_rcdom::{Handle, RcDom};
9
- use std::borrow::Cow;
10
- use std::cell::{Cell, RefCell};
11
-
12
- pub struct LineCountingDOM {
13
- pub line_vec: RefCell<Vec<(QualName, u64)>>,
14
- pub current_line: Cell<u64>,
15
- pub rcdom: RcDom,
16
- }
17
-
18
- impl TreeSink for LineCountingDOM {
19
- type Output = Self;
20
- type ElemName<'a> = ExpandedName<'a>;
21
-
22
- fn finish(self) -> Self {
23
- self
24
- }
25
-
26
- type Handle = Handle;
27
-
28
- fn parse_error(&self, msg: Cow<'static, str>) {
29
- self.rcdom.parse_error(msg);
30
- }
31
-
32
- fn get_document(&self) -> Handle {
33
- self.rcdom.get_document()
34
- }
35
-
36
- fn get_template_contents(&self, target: &Handle) -> Handle {
37
- self.rcdom.get_template_contents(target)
38
- }
39
-
40
- fn set_quirks_mode(&self, mode: QuirksMode) {
41
- self.rcdom.set_quirks_mode(mode)
42
- }
43
-
44
- fn same_node(&self, x: &Handle, y: &Handle) -> bool {
45
- self.rcdom.same_node(x, y)
46
- }
47
-
48
- fn elem_name<'a>(&'a self, target: &'a Handle) -> ExpandedName<'a> {
49
- self.rcdom.elem_name(target)
50
- }
51
-
52
- fn create_element(&self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags) -> Handle {
53
- self.line_vec
54
- .borrow_mut()
55
- .push((name.clone(), self.current_line.get()));
56
- self.rcdom.create_element(name, attrs, flags)
57
- }
58
-
59
- fn create_comment(&self, text: StrTendril) -> Handle {
60
- self.rcdom.create_comment(text)
61
- }
62
-
63
- fn create_pi(&self, target: StrTendril, content: StrTendril) -> Handle {
64
- self.rcdom.create_pi(target, content)
65
- }
66
-
67
- fn append(&self, parent: &Handle, child: NodeOrText<Handle>) {
68
- self.rcdom.append(parent, child)
69
- }
70
-
71
- fn append_before_sibling(&self, sibling: &Handle, child: NodeOrText<Handle>) {
72
- self.rcdom.append_before_sibling(sibling, child)
73
- }
74
-
75
- fn append_based_on_parent_node(
76
- &self,
77
- element: &Handle,
78
- prev_element: &Handle,
79
- child: NodeOrText<Handle>,
80
- ) {
81
- self.rcdom
82
- .append_based_on_parent_node(element, prev_element, child)
83
- }
84
-
85
- fn append_doctype_to_document(
86
- &self,
87
- name: StrTendril,
88
- public_id: StrTendril,
89
- system_id: StrTendril,
90
- ) {
91
- self.rcdom
92
- .append_doctype_to_document(name, public_id, system_id);
93
- }
94
-
95
- fn add_attrs_if_missing(&self, target: &Handle, attrs: Vec<Attribute>) {
96
- self.rcdom.add_attrs_if_missing(target, attrs);
97
- }
98
-
99
- fn remove_from_parent(&self, target: &Handle) {
100
- self.rcdom.remove_from_parent(target);
101
- }
102
-
103
- fn reparent_children(&self, node: &Handle, new_parent: &Handle) {
104
- self.rcdom.reparent_children(node, new_parent);
105
- }
106
-
107
- fn mark_script_already_started(&self, target: &Handle) {
108
- self.rcdom.mark_script_already_started(target);
109
- }
110
-
111
- fn set_current_line(&self, line_number: u64) {
112
- self.current_line.set(line_number);
113
- }
114
- }
115
-
116
- #[test]
117
- fn check_four_lines() {
118
- // Input
119
- let sink = LineCountingDOM {
120
- line_vec: RefCell::new(vec![]),
121
- current_line: Cell::new(1),
122
- rcdom: RcDom::default(),
123
- };
124
- let mut result_tok = driver::parse_document(sink, Default::default());
125
- result_tok.process(StrTendril::from("<a>\n"));
126
- result_tok.process(StrTendril::from("</a>\n"));
127
- result_tok.process(StrTendril::from("<b>\n"));
128
- result_tok.process(StrTendril::from("</b>"));
129
- // Actual Output
130
- let actual = result_tok.finish();
131
- // Expected Output
132
- let expected = vec![
133
- (QualName::new(None, ns!(html), local_name!("html")), 1),
134
- (QualName::new(None, ns!(html), local_name!("head")), 1),
135
- (QualName::new(None, ns!(html), local_name!("body")), 1),
136
- (QualName::new(None, ns!(html), local_name!("a")), 1),
137
- (QualName::new(None, ns!(html), local_name!("b")), 3),
138
- ];
139
- // Assertion
140
- assert_eq!(*actual.line_vec.borrow(), expected);
141
- }
@@ -1,34 +0,0 @@
1
- // Copyright 2014-2017 The html5ever Project Developers. See the
2
- // COPYRIGHT file at the top-level directory of this distribution.
3
- //
4
- // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
- // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
- // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
- // option. This file may not be copied, modified, or distributed
8
- // except according to those terms.
9
-
10
- use std::ffi::OsStr;
11
- use std::fs;
12
- use std::path::Path;
13
-
14
- pub fn foreach_xml5lib_test<Mk>(
15
- src_dir: &Path,
16
- subdir: &'static str,
17
- ext: &'static OsStr,
18
- mut mk: Mk,
19
- ) where
20
- Mk: FnMut(&Path, fs::File),
21
- {
22
- let mut test_dir_path = src_dir.to_path_buf();
23
- test_dir_path.push("xml5lib-tests");
24
- test_dir_path.push(subdir);
25
-
26
- let test_files = fs::read_dir(&test_dir_path).unwrap();
27
- for entry in test_files {
28
- let path = entry.unwrap().path();
29
- if path.extension() == Some(ext) {
30
- let file = fs::File::open(&path).unwrap();
31
- mk(&path, file);
32
- }
33
- }
34
- }
@@ -1,48 +0,0 @@
1
- // Copyright 2024 The html5ever Project Developers. See the
2
- // COPYRIGHT file at the top-level directory of this distribution.
3
- //
4
- // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
- // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
- // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
- // option. This file may not be copied, modified, or distributed
8
- // except according to those terms.
9
-
10
- use libtest_mimic::{Arguments, Trial};
11
-
12
- /// Simple container for storing tests for later execution
13
- pub struct Test {
14
- pub name: String,
15
- pub skip: bool,
16
- pub test: Box<dyn Fn() + Send + Sync>,
17
- }
18
-
19
- impl Test {
20
- /// Invoke the stored test function
21
- ///
22
- /// A status message is printed if the wrapped closure completes
23
- /// or is marked as skipped. The test should panic to report
24
- /// failure.
25
- pub fn run(&self) {
26
- print!("test {} ...", self.name);
27
- if self.skip {
28
- println!(" SKIPPED");
29
- } else {
30
- (self.test)();
31
- println!(" ok");
32
- }
33
- }
34
- }
35
-
36
- pub fn run_all(tests: Vec<Test>) {
37
- let mut harness_tests = Vec::new();
38
-
39
- for test in tests {
40
- let harness_test = Trial::test(test.name.clone(), move || {
41
- test.run();
42
- Ok(())
43
- });
44
- harness_tests.push(harness_test);
45
- }
46
- let args = Arguments::from_args();
47
- libtest_mimic::run(&args, harness_tests).exit();
48
- }
@@ -1,101 +0,0 @@
1
- use markup5ever_rcdom::{RcDom, SerializableHandle};
2
- use xml5ever::driver;
3
- use xml5ever::serialize;
4
- use xml5ever::tendril::TendrilSink;
5
-
6
- #[test]
7
- fn el_ns_serialize() {
8
- assert_eq_serialization(
9
- "<a:title xmlns:a=\"http://www.foo.org/\" value=\"test\">Test</a:title>",
10
- driver::parse_document(RcDom::default(), Default::default())
11
- .from_utf8()
12
- .one("<a:title xmlns:a=\"http://www.foo.org/\" value=\"test\">Test</title>".as_bytes()),
13
- );
14
- }
15
-
16
- #[test]
17
- fn nested_ns_serialize() {
18
- assert_eq_serialization("<a:x xmlns:a=\"http://www.foo.org/\" xmlns:b=\"http://www.bar.org/\" value=\"test\"><b:y/></a:x>",
19
- driver::parse_document(RcDom::default(), Default::default())
20
- .from_utf8()
21
- .one("<a:x xmlns:a=\"http://www.foo.org/\" xmlns:b=\"http://www.bar.org/\" value=\"test\"><b:y/></a:x>".as_bytes()));
22
- }
23
-
24
- #[test]
25
- fn def_ns_serialize() {
26
- assert_eq_serialization(
27
- "<table xmlns=\"html4\"><td></td></table>",
28
- driver::parse_document(RcDom::default(), Default::default())
29
- .from_utf8()
30
- .one("<table xmlns=\"html4\"><td></td></table>".as_bytes()),
31
- );
32
- }
33
-
34
- #[test]
35
- fn undefine_ns_serialize() {
36
- assert_eq_serialization(
37
- "<a:x xmlns:a=\"http://www.foo.org\"><a:y xmlns:a=\"\"><a:z/></a:y</a:x>",
38
- driver::parse_document(RcDom::default(), Default::default())
39
- .from_utf8()
40
- .one(
41
- "<a:x xmlns:a=\"http://www.foo.org\"><a:y xmlns:a=\"\"><a:z/></a:y</a:x>"
42
- .as_bytes(),
43
- ),
44
- );
45
- }
46
-
47
- #[test]
48
- fn redefine_default_ns_serialize() {
49
- assert_eq_serialization(
50
- "<x xmlns=\"http://www.foo.org\"><y xmlns=\"\"><z/></y</x>",
51
- driver::parse_document(RcDom::default(), Default::default())
52
- .from_utf8()
53
- .one("<x xmlns=\"http://www.foo.org\"><y xmlns=\"\"><z/></y</x>".as_bytes()),
54
- );
55
- }
56
-
57
- #[test]
58
- fn attr_serialize() {
59
- assert_serialization(
60
- "<title value=\"test\">Test</title>",
61
- driver::parse_document(RcDom::default(), Default::default())
62
- .from_utf8()
63
- .one("<title value='test'>Test".as_bytes()),
64
- );
65
- }
66
-
67
- #[test]
68
- fn from_utf8() {
69
- assert_serialization(
70
- "<title>Test</title>",
71
- driver::parse_document(RcDom::default(), Default::default())
72
- .from_utf8()
73
- .one("<title>Test".as_bytes()),
74
- );
75
- }
76
-
77
- fn assert_eq_serialization(text: &'static str, dom: RcDom) {
78
- let mut serialized = Vec::new();
79
- let document: SerializableHandle = dom.document.clone().into();
80
- serialize::serialize(&mut serialized, &document, Default::default()).unwrap();
81
-
82
- let dom_from_text = driver::parse_document(RcDom::default(), Default::default())
83
- .from_utf8()
84
- .one(text.as_bytes());
85
-
86
- let mut reserialized = Vec::new();
87
- let document: SerializableHandle = dom_from_text.document.clone().into();
88
- serialize::serialize(&mut reserialized, &document, Default::default()).unwrap();
89
-
90
- assert_eq!(
91
- String::from_utf8(serialized).unwrap(),
92
- String::from_utf8(reserialized).unwrap()
93
- );
94
- }
95
-
96
- fn assert_serialization(text: &'static str, dom: RcDom) {
97
- let mut serialized = Vec::new();
98
- let document: SerializableHandle = dom.document.clone().into();
99
- serialize::serialize(&mut serialized, &document, Default::default()).unwrap();
100
- assert_eq!(String::from_utf8(serialized).unwrap(), text);
101
- }