html-to-markdown 2.24.6 → 2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  6. data/lib/html_to_markdown/version.rb +1 -1
  7. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  8. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  9. data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
  10. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  11. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
  12. data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
  13. data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
  14. data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
  15. data/rust-vendor/memmap2/CHANGELOG.md +8 -0
  16. data/rust-vendor/memmap2/Cargo.lock +1 -1
  17. data/rust-vendor/memmap2/Cargo.toml +2 -1
  18. data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
  19. data/rust-vendor/memmap2/src/lib.rs +25 -1
  20. data/rust-vendor/memmap2/src/stub.rs +1 -4
  21. data/rust-vendor/memmap2/src/unix.rs +14 -1
  22. data/rust-vendor/png/.cargo-checksum.json +1 -1
  23. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  24. data/rust-vendor/png/CHANGES.md +44 -0
  25. data/rust-vendor/png/Cargo.lock +124 -171
  26. data/rust-vendor/png/Cargo.toml +1 -1
  27. data/rust-vendor/png/Cargo.toml.orig +1 -1
  28. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  29. data/rust-vendor/png/benches/unfilter.rs +3 -3
  30. data/rust-vendor/png/src/adam7.rs +17 -10
  31. data/rust-vendor/png/src/common.rs +8 -8
  32. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  33. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  34. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  35. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  36. data/rust-vendor/png/src/encoder.rs +4 -2
  37. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  38. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  39. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  40. data/rust-vendor/png/src/filter/simd.rs +308 -0
  41. data/rust-vendor/png/src/lib.rs +1 -0
  42. data/rust-vendor/syn/.cargo-checksum.json +1 -1
  43. data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
  44. data/rust-vendor/syn/Cargo.lock +40 -41
  45. data/rust-vendor/syn/Cargo.toml +1 -1
  46. data/rust-vendor/syn/Cargo.toml.orig +1 -1
  47. data/rust-vendor/syn/src/item.rs +61 -40
  48. data/rust-vendor/syn/src/lib.rs +2 -1
  49. data/rust-vendor/syn/tests/test_item.rs +54 -0
  50. data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
  51. data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
  52. data/rust-vendor/unicode-ident/Cargo.lock +21 -21
  53. data/rust-vendor/unicode-ident/Cargo.toml +1 -1
  54. data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
  55. data/rust-vendor/unicode-ident/src/lib.rs +1 -1
  56. data/rust-vendor/unicode-ident/src/tables.rs +87 -97
  57. data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
  58. metadata +7 -177
  59. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  60. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  61. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  62. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  63. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  64. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  65. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  66. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  67. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  68. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  69. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  70. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  71. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  72. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  153. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  154. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  155. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  156. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  157. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  158. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  159. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  160. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  161. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  162. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  163. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  164. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  165. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  166. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  167. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  168. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  169. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  170. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  171. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  172. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  173. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  174. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  175. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  176. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  177. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  178. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  179. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  180. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  181. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  182. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  183. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  184. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  185. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  186. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  187. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  188. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  189. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  190. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  191. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  192. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  193. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  194. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  195. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  196. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  197. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  198. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  199. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  200. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  201. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  202. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  203. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  204. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  205. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  206. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  207. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  208. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  209. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  210. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  211. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  212. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  213. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  214. data/rust-vendor/xml5ever/README.md +0 -72
  215. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  216. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  217. data/rust-vendor/xml5ever/examples/README.md +0 -223
  218. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  219. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  220. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  221. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  222. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  223. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  224. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  225. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  226. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  227. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  228. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  229. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  230. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  231. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,67 +0,0 @@
1
- #!/usr/bin/env run-cargo-script
2
- //! This is a regular crate doc comment, but it also contains a partial
3
- //! Cargo manifest. Note the use of a *fenced* code block, and the
4
- //! `cargo` "language".
5
- //!
6
- //! ```cargo
7
- //! [dependencies]
8
- //! xml5ever = "0.2.0"
9
- //! tendril = "0.1.3"
10
- //! ```
11
- extern crate markup5ever_rcdom as rcdom;
12
- extern crate xml5ever;
13
-
14
- use std::io;
15
-
16
- use rcdom::{Handle, NodeData, RcDom};
17
- use xml5ever::driver::parse_document;
18
- use xml5ever::tendril::TendrilSink;
19
-
20
- fn walk(prefix: &str, handle: &Handle) {
21
- let node = handle;
22
-
23
- print!("{prefix}");
24
- match node.data {
25
- NodeData::Document => println!("#document"),
26
-
27
- NodeData::Text { ref contents } => println!("#text {}", contents.borrow().escape_default()),
28
-
29
- NodeData::Element { ref name, .. } => {
30
- println!("{}", name.local);
31
- },
32
-
33
- _ => {},
34
- }
35
-
36
- let new_indent = {
37
- let mut temp = String::new();
38
- temp.push_str(prefix);
39
- temp.push_str(" ");
40
- temp
41
- };
42
-
43
- for child in node
44
- .children
45
- .borrow()
46
- .iter()
47
- .filter(|child| matches!(child.data, NodeData::Text { .. } | NodeData::Element { .. }))
48
- {
49
- walk(&new_indent, child);
50
- }
51
- }
52
-
53
- fn main() {
54
- env_logger::init();
55
-
56
- let stdin = io::stdin();
57
-
58
- // To parse XML into a tree form, we need a TreeSink
59
- // luckily xml5ever comes with a static RC backed tree represetation.
60
- let dom: RcDom = parse_document(RcDom::default(), Default::default())
61
- .from_utf8()
62
- .read_from(&mut stdin.lock())
63
- .unwrap();
64
-
65
- // Execute our visualizer on RcDom
66
- walk("", &dom.document);
67
- }
@@ -1,2 +0,0 @@
1
- *.dat -text diff
2
- *.test -text diff
@@ -1,76 +0,0 @@
1
- name: downstream
2
-
3
- concurrency:
4
- group: "${{github.workflow}}-${{github.ref}}"
5
- cancel-in-progress: true
6
-
7
- on:
8
- workflow_dispatch:
9
- push:
10
- branches:
11
- - master
12
- pull_request:
13
- types: [opened, synchronize]
14
- branches:
15
- - '*'
16
-
17
- jobs:
18
- skeleton:
19
- runs-on: ubuntu-latest
20
- steps:
21
- - run: echo hello world
22
-
23
- parse5:
24
- runs-on: ubuntu-latest
25
- steps:
26
- - uses: actions/checkout@v2
27
- with:
28
- repository: inikulin/parse5
29
- submodules: recursive
30
- - run: rm -rf test/data/html5lib-tests/
31
- - uses: actions/checkout@v2
32
- with:
33
- path: test/data/html5lib-tests/
34
- - uses: actions/setup-node@v3
35
- with:
36
- node-version: lts/*
37
- cache: npm
38
- - run: npm ci
39
- - run: npm run build --if-present
40
- - run: npm run unit-tests
41
-
42
- html5gum:
43
- runs-on: ubuntu-latest
44
- steps:
45
- - uses: actions/checkout@v2
46
- with:
47
- repository: untitaker/html5gum
48
- - run: rm -rf tests/html5lib-tests/
49
- - uses: actions/checkout@v2
50
- with:
51
- path: tests/html5lib-tests/
52
- - uses: actions-rs/toolchain@v1
53
- with:
54
- profile: minimal
55
- toolchain: stable
56
- override: true
57
- - run: cargo test
58
-
59
- nokogiri:
60
- runs-on: ubuntu-latest
61
- container:
62
- image: ghcr.io/sparklemotion/nokogiri-test:mri-3.2
63
- steps:
64
- - uses: actions/checkout@v3
65
- with:
66
- repository: sparklemotion/nokogiri
67
- path: nokogiri
68
- - uses: actions/checkout@v3
69
- with:
70
- path: nokogiri/test/html5lib-tests
71
- - working-directory: nokogiri
72
- name: "Run the Nokogiri test suite"
73
- run: |
74
- bundle install
75
- bundle exec rake compile -- --enable-system-libraries
76
- bundle exec rake test
@@ -1,25 +0,0 @@
1
- name: lint
2
-
3
- concurrency:
4
- group: "${{github.workflow}}-${{github.ref}}"
5
- cancel-in-progress: true
6
-
7
- on:
8
- workflow_dispatch:
9
- push:
10
- branches:
11
- - master
12
- pull_request:
13
- types: [opened, synchronize]
14
- branches:
15
- - '*'
16
-
17
- jobs:
18
- lint:
19
- runs-on: ubuntu-latest
20
- steps:
21
- - uses: actions/checkout@v3
22
- - uses: actions/setup-python@v4
23
- with:
24
- python-version: '3.11'
25
- - run: ./lint
@@ -1,79 +0,0 @@
1
- # Copyright (c) 2014 GitHub, Inc.
2
- #
3
- # Permission is hereby granted, free of charge, to any person obtaining a
4
- # copy of this software and associated documentation files (the "Software"),
5
- # to deal in the Software without restriction, including without limitation
6
- # the rights to use, copy, modify, merge, publish, distribute, sublicense,
7
- # and/or sell copies of the Software, and to permit persons to whom the
8
- # Software is furnished to do so, subject to the following conditions:
9
- #
10
- # The above copyright notice and this permission notice shall be included in
11
- # all copies or substantial portions of the Software.
12
- #
13
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
- # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19
- # DEALINGS IN THE SOFTWARE.
20
-
21
- # Byte-compiled / optimized / DLL files
22
- __pycache__/
23
- *.py[cod]
24
- *$py.class
25
-
26
- # C extensions
27
- *.so
28
-
29
- # Distribution / packaging
30
- .Python
31
- env/
32
- build/
33
- develop-eggs/
34
- dist/
35
- downloads/
36
- eggs/
37
- .eggs/
38
- lib/
39
- lib64/
40
- parts/
41
- sdist/
42
- var/
43
- *.egg-info/
44
- .installed.cfg
45
- *.egg
46
- MANIFEST
47
-
48
- # PyInstaller
49
- # Usually these files are written by a python script from a template
50
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
51
- *.manifest
52
- *.spec
53
-
54
- # Installer logs
55
- pip-log.txt
56
- pip-delete-this-directory.txt
57
-
58
- # Unit test / coverage reports
59
- htmlcov/
60
- .tox/
61
- .coverage
62
- .coverage.*
63
- .cache
64
- nosetests.xml
65
- coverage.xml
66
- *,cover
67
-
68
- # Translations
69
- *.mo
70
- *.pot
71
-
72
- # Django stuff:
73
- *.log
74
-
75
- # Sphinx documentation
76
- doc/_build/
77
-
78
- # PyBuilder
79
- target/
@@ -1,34 +0,0 @@
1
- Credits
2
- =======
3
-
4
- The ``html5lib`` test data is maintained by:
5
-
6
- - James Graham
7
- - Geoffrey Sneddon
8
-
9
-
10
- Contributors
11
- ------------
12
-
13
- - Adam Barth
14
- - Andi Sidwell
15
- - Anne van Kesteren
16
- - David Flanagan
17
- - Edward Z. Yang
18
- - Geoffrey Sneddon
19
- - Henri Sivonen
20
- - Ian Hickson
21
- - Jacques Distler
22
- - James Graham
23
- - Lachlan Hunt
24
- - lantis63
25
- - Mark Pilgrim
26
- - Mats Palmgren
27
- - Ms2ger
28
- - Nolan Waite
29
- - Philip Taylor
30
- - Rafael Weinstein
31
- - Ryan King
32
- - Sam Ruby
33
- - Simon Pieters
34
- - Thomas Broyer
@@ -1,21 +0,0 @@
1
- Copyright (c) 2006-2013 James Graham, Geoffrey Sneddon, and
2
- other contributors
3
-
4
- Permission is hereby granted, free of charge, to any person obtaining
5
- a copy of this software and associated documentation files (the
6
- "Software"), to deal in the Software without restriction, including
7
- without limitation the rights to use, copy, modify, merge, publish,
8
- distribute, sublicense, and/or sell copies of the Software, and to
9
- permit persons to whom the Software is furnished to do so, subject to
10
- the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be
13
- included in all copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -1,51 +0,0 @@
1
- �Ѥl�m�D�w�g�n �Ĥ@~�|�Q��
2
-
3
- �Ѥl�D�g
4
-
5
- �Ĥ@��
6
-
7
- �D�i�D�A�D�`�D�C�W�i�W�A�D�`�W�C�L�A�W�Ѧa���l�Q���A�W�U�������C
8
- �G�`�L�A���H�[�䧮�F�`���A���H�[���u�C����̡A�P�X�Ӳ��W�A�P�פ�
9
- �ȡC�Ȥ��S�ȡA���������C
10
-
11
- �ĤG��
12
-
13
- �ѤU�Ҫ����������A���c�o�Q�Ҫ����������A�������o�C�G���L�ۥ͡A��
14
- ���ۦ��A���u�ۧΡA���U�۶ɡA���n�۩M�A�e����H�C�O�H�t�H�B�u�L��
15
- �v���ơA��u�����v���СC�U���@�j�Ӥ���A�ͦӤ����A���Ӥ���A�\��
16
- �ӥ��~�C�Ұߥ��~�A�O�H���h�C
17
-
18
- �ĤT��
19
-
20
- ���|��A�ϥ������Q���Q���o���f�A�ϥ������s�Q�����i���A�ϥ��ߤ���
21
- �C�O�H�u�t�H�v���v�A���ߡA��両�A�z��ӡA�j�䰩�C�`�ϥ��L���L
22
- ���C�ϤҴ��̤������]�C���u�L���v�A�h�L���v�C
23
-
24
- �ĥ|��
25
-
26
- �u�D�v�R�A�ӥΤ��Τ��աC�W���A���U�����v�Q����U�A�Ѩ�ɡA�M���
27
- �A�P��СQ�祿���Φs�C�^�����֤��l�H�H�Ҥ����C
28
-
29
- �Ĥ���
30
-
31
- �Ѧa�����A�H�U�����쪯�Q�t�H�����A�H�ʩm���쪯�C�Ѧa�����A��S��
32
- ���G�H��Ӥ��}�A�ʦӷU�X�C�h���ƽa�A���p�u���C
33
-
34
- �Ĥ���
35
-
36
- ���������A�O�ץȦɡC�Ȧɤ����A�O�פѦa�ڡC�����Y�s�A�Τ����ԡC
37
-
38
- �ĤC��
39
-
40
- �Ѫ��a�[�C�Ѧa�ҥH����B�[�̡A�H�䤣�ۥ͡A�G����[�C�O�H�t�H���
41
- ���Ө����A�~�䨭�Ө��s�C�D�H��L�p���H�G�ন��p�C
42
-
43
- �ĤK��
44
-
45
- �W���Y���C�����Q�U���Ӥ����C�B���H���Ҵc�A�G�X��D�C�~���a�A�ߵ�
46
- �W�A�P�����A�����H�A�F���v�A�Ƶ���A�ʵ��ɡC�Ұߤ����A�G�L�סC
47
-
48
- �ĤE��
49
-
50
- ���Ӭդ��A���p��w�Q���ӾU���A���i���O�C���ɺ���A������u�Q�I�Q
51
- ��ź�A�ۿ��S�C�\�E���h�A�Ѥ��D�C
@@ -1,5 +0,0 @@
1
- #data
2
- <!DOCTYPE HTML>
3
- <script>document.write('<meta charset="ISO-8859-' + '2">')</script>
4
- #encoding
5
- iso-8859-2
@@ -1,10 +0,0 @@
1
- #data
2
- <html>
3
- <head>
4
- <meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
5
- <!--京-->
6
- <title>Yahoo! JAPAN</title>
7
- <meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
8
- <style type="text/css" media="all">
9
- #encoding
10
- euc-jp