html-to-markdown 2.24.6 → 2.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/ext/html-to-markdown-rb/native/Cargo.lock +3 -26
  4. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  5. data/lib/html_to_markdown/version.rb +1 -1
  6. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  7. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  8. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  9. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +53 -91
  10. data/rust-vendor/png/.cargo-checksum.json +1 -1
  11. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  12. data/rust-vendor/png/CHANGES.md +44 -0
  13. data/rust-vendor/png/Cargo.lock +124 -171
  14. data/rust-vendor/png/Cargo.toml +1 -1
  15. data/rust-vendor/png/Cargo.toml.orig +1 -1
  16. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  17. data/rust-vendor/png/benches/unfilter.rs +3 -3
  18. data/rust-vendor/png/src/adam7.rs +17 -10
  19. data/rust-vendor/png/src/common.rs +8 -8
  20. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  21. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  22. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  23. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  24. data/rust-vendor/png/src/encoder.rs +4 -2
  25. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  26. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  27. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  28. data/rust-vendor/png/src/filter/simd.rs +308 -0
  29. data/rust-vendor/png/src/lib.rs +1 -0
  30. metadata +7 -177
  31. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  32. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  33. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  34. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  35. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  36. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  37. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  38. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  39. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  40. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  41. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  42. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  43. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  44. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  45. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  46. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  47. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  48. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  49. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  50. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  51. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  52. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  53. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  54. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  55. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  56. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  57. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  58. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  59. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  60. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  61. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  62. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  63. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  64. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  65. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  66. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  67. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  68. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  69. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  70. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  71. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  72. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  153. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  154. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  155. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  156. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  157. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  158. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  159. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  160. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  161. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  162. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  163. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  164. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  165. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  166. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  167. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  168. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  169. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  170. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  171. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  172. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  173. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  174. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  175. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  176. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  177. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  178. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  179. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  180. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  181. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  182. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  183. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  184. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  185. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  186. data/rust-vendor/xml5ever/README.md +0 -72
  187. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  188. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  189. data/rust-vendor/xml5ever/examples/README.md +0 -223
  190. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  191. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  192. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  193. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  194. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  195. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  196. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  197. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  198. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  199. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  200. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  201. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  202. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  203. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,9 +0,0 @@
1
- {"tests": [
2
-
3
- {"description":"<!---- >",
4
- "input":"<!---- >",
5
- "output":[["Comment","-- >"]],
6
- "errors":[
7
- { "code": "eof-in-comment", "line": 1, "col": 9 }
8
- ]}
9
- ]}
@@ -1,353 +0,0 @@
1
- {"tests": [
2
-
3
- {"description":"Correct Doctype lowercase",
4
- "input":"<!DOCTYPE html>",
5
- "output":[["DOCTYPE", "html", null, null, true]]},
6
-
7
-
8
- {"description":"Correct Doctype uppercase",
9
- "input":"<!DOCTYPE HTML>",
10
- "output":[["DOCTYPE", "html", null, null, true]]},
11
-
12
- {"description":"Correct Doctype mixed case",
13
- "input":"<!DOCTYPE HtMl>",
14
- "output":[["DOCTYPE", "html", null, null, true]]},
15
-
16
- {"description":"Correct Doctype case with EOF",
17
- "input":"<!DOCTYPE HtMl",
18
- "output":[["DOCTYPE", "html", null, null, false]],
19
- "errors":[
20
- { "code": "eof-in-doctype", "line": 1, "col": 15 }
21
- ]},
22
-
23
- {"description":"Truncated doctype start",
24
- "input":"<!DOC>",
25
- "output":[["Comment", "DOC"]],
26
- "errors":[
27
- { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
28
- ]},
29
-
30
- {"description":"Doctype in error",
31
- "input":"<!DOCTYPE foo>",
32
- "output":[["DOCTYPE", "foo", null, null, true]]},
33
-
34
- {"description":"Single Start Tag",
35
- "input":"<h>",
36
- "output":[["StartTag", "h", {}]]},
37
-
38
- {"description":"Empty end tag",
39
- "input":"</>",
40
- "output":[],
41
- "errors":[
42
- { "code": "missing-end-tag-name", "line": 1, "col": 3 }
43
- ]},
44
-
45
- {"description":"Empty start tag",
46
- "input":"<>",
47
- "output":[["Character", "<>"]],
48
- "errors":[
49
- { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
50
- ]},
51
-
52
- {"description":"Start Tag w/attribute",
53
- "input":"<h a='b'>",
54
- "output":[["StartTag", "h", {"a":"b"}]]},
55
-
56
- {"description":"Start Tag w/attribute no quotes",
57
- "input":"<h a=b>",
58
- "output":[["StartTag", "h", {"a":"b"}]]},
59
-
60
- {"description":"Start/End Tag",
61
- "input":"<h></h>",
62
- "output":[["StartTag", "h", {}], ["EndTag", "h"]]},
63
-
64
- {"description":"Two unclosed start tags",
65
- "input":"<p>One<p>Two",
66
- "output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
67
-
68
- {"description":"End Tag w/attribute",
69
- "input":"<h></h a='b'>",
70
- "output":[["StartTag", "h", {}], ["EndTag", "h"]],
71
- "errors":[
72
- { "code": "end-tag-with-attributes", "line": 1, "col": 13 }
73
- ]},
74
-
75
- {"description":"Multiple atts",
76
- "input":"<h a='b' c='d'>",
77
- "output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
78
-
79
- {"description":"Multiple atts no space",
80
- "input":"<h a='b'c='d'>",
81
- "output":[["StartTag", "h", {"a":"b", "c":"d"}]],
82
- "errors":[
83
- { "code": "missing-whitespace-between-attributes", "line": 1, "col": 9 }
84
- ]},
85
-
86
- {"description":"Repeated attr",
87
- "input":"<h a='b' a='d'>",
88
- "output":[["StartTag", "h", {"a":"b"}]],
89
- "errors":[
90
- { "code": "duplicate-attribute", "line": 1, "col": 11 }
91
- ]},
92
-
93
- {"description":"Simple comment",
94
- "input":"<!--comment-->",
95
- "output":[["Comment", "comment"]]},
96
-
97
- {"description":"Comment, Central dash no space",
98
- "input":"<!----->",
99
- "output":[["Comment", "-"]]},
100
-
101
- {"description":"Comment, two central dashes",
102
- "input":"<!-- --comment -->",
103
- "output":[["Comment", " --comment "]]},
104
-
105
- {"description":"Comment, central less-than bang",
106
- "input":"<!--<!-->",
107
- "output":[["Comment", "<!"]]},
108
-
109
- {"description":"Unfinished comment",
110
- "input":"<!--comment",
111
- "output":[["Comment", "comment"]],
112
- "errors":[
113
- { "code": "eof-in-comment", "line": 1, "col": 12 }
114
- ]},
115
-
116
- {"description":"Unfinished comment after start of nested comment",
117
- "input":"<!-- <!--",
118
- "output":[["Comment", " <!"]],
119
- "errors":[
120
- { "code": "eof-in-comment", "line": 1, "col": 10 }
121
- ]},
122
-
123
- {"description":"Start of a comment",
124
- "input":"<!-",
125
- "output":[["Comment", "-"]],
126
- "errors":[
127
- { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
128
- ]},
129
-
130
- {"description":"Short comment",
131
- "input":"<!-->",
132
- "output":[["Comment", ""]],
133
- "errors":[
134
- { "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 5 }
135
- ]},
136
-
137
- {"description":"Short comment two",
138
- "input":"<!--->",
139
- "output":[["Comment", ""]],
140
- "errors":[
141
- { "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 6 }
142
- ]},
143
-
144
- {"description":"Short comment three",
145
- "input":"<!---->",
146
- "output":[["Comment", ""]]},
147
-
148
- {"description":"< in comment",
149
- "input":"<!-- <test-->",
150
- "output":[["Comment", " <test"]]},
151
-
152
- {"description":"<< in comment",
153
- "input":"<!--<<-->",
154
- "output":[["Comment", "<<"]]},
155
-
156
- {"description":"<! in comment",
157
- "input":"<!-- <!test-->",
158
- "output":[["Comment", " <!test"]]},
159
-
160
- {"description":"<!- in comment",
161
- "input":"<!-- <!-test-->",
162
- "output":[["Comment", " <!-test"]]},
163
-
164
- {"description":"Nested comment",
165
- "input":"<!-- <!--test-->",
166
- "output":[["Comment", " <!--test"]],
167
- "errors":[
168
- { "code": "nested-comment", "line": 1, "col": 10 }
169
- ]},
170
-
171
- {"description":"Nested comment with extra <",
172
- "input":"<!-- <<!--test-->",
173
- "output":[["Comment", " <<!--test"]],
174
- "errors":[
175
- { "code": "nested-comment", "line": 1, "col": 11 }
176
- ]},
177
-
178
- {"description":"< in script data",
179
- "initialStates":["Script data state"],
180
- "input":"<test-->",
181
- "output":[["Character", "<test-->"]]},
182
-
183
- {"description":"<! in script data",
184
- "initialStates":["Script data state"],
185
- "input":"<!test-->",
186
- "output":[["Character", "<!test-->"]]},
187
-
188
- {"description":"<!- in script data",
189
- "initialStates":["Script data state"],
190
- "input":"<!-test-->",
191
- "output":[["Character", "<!-test-->"]]},
192
-
193
- {"description":"Escaped script data",
194
- "initialStates":["Script data state"],
195
- "input":"<!--test-->",
196
- "output":[["Character", "<!--test-->"]]},
197
-
198
- {"description":"< in script HTML comment",
199
- "initialStates":["Script data state"],
200
- "input":"<!-- < test -->",
201
- "output":[["Character", "<!-- < test -->"]]},
202
-
203
- {"description":"</ in script HTML comment",
204
- "initialStates":["Script data state"],
205
- "input":"<!-- </ test -->",
206
- "output":[["Character", "<!-- </ test -->"]]},
207
-
208
- {"description":"Start tag in script HTML comment",
209
- "initialStates":["Script data state"],
210
- "input":"<!-- <test> -->",
211
- "output":[["Character", "<!-- <test> -->"]]},
212
-
213
- {"description":"End tag in script HTML comment",
214
- "initialStates":["Script data state"],
215
- "input":"<!-- </test> -->",
216
- "output":[["Character", "<!-- </test> -->"]]},
217
-
218
- {"description":"- in script HTML comment double escaped",
219
- "initialStates":["Script data state"],
220
- "input":"<!--<script>-</script>-->",
221
- "output":[["Character", "<!--<script>-</script>-->"]]},
222
-
223
- {"description":"-- in script HTML comment double escaped",
224
- "initialStates":["Script data state"],
225
- "input":"<!--<script>--</script>-->",
226
- "output":[["Character", "<!--<script>--</script>-->"]]},
227
-
228
- {"description":"--- in script HTML comment double escaped",
229
- "initialStates":["Script data state"],
230
- "input":"<!--<script>---</script>-->",
231
- "output":[["Character", "<!--<script>---</script>-->"]]},
232
-
233
- {"description":"- spaced in script HTML comment double escaped",
234
- "initialStates":["Script data state"],
235
- "input":"<!--<script> - </script>-->",
236
- "output":[["Character", "<!--<script> - </script>-->"]]},
237
-
238
- {"description":"-- spaced in script HTML comment double escaped",
239
- "initialStates":["Script data state"],
240
- "input":"<!--<script> -- </script>-->",
241
- "output":[["Character", "<!--<script> -- </script>-->"]]},
242
-
243
- {"description":"Ampersand EOF",
244
- "input":"&",
245
- "output":[["Character", "&"]]},
246
-
247
- {"description":"Ampersand ampersand EOF",
248
- "input":"&&",
249
- "output":[["Character", "&&"]]},
250
-
251
- {"description":"Ampersand space EOF",
252
- "input":"& ",
253
- "output":[["Character", "& "]]},
254
-
255
- {"description":"Unfinished entity",
256
- "input":"&f",
257
- "output":[["Character", "&f"]]},
258
-
259
- {"description":"Ampersand, number sign",
260
- "input":"&#",
261
- "output":[["Character", "&#"]],
262
- "errors":[
263
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 }
264
- ]},
265
-
266
- {"description":"Unfinished numeric entity",
267
- "input":"&#x",
268
- "output":[["Character", "&#x"]],
269
- "errors":[
270
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 }
271
- ]},
272
-
273
- {"description":"Entity with trailing semicolon (1)",
274
- "input":"I'm &not;it",
275
- "output":[["Character","I'm \u00ACit"]]},
276
-
277
- {"description":"Entity with trailing semicolon (2)",
278
- "input":"I'm &notin;",
279
- "output":[["Character","I'm \u2209"]]},
280
-
281
- {"description":"Entity without trailing semicolon (1)",
282
- "input":"I'm &notit",
283
- "output":[["Character","I'm \u00ACit"]],
284
- "errors": [
285
- {"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
286
- ]},
287
-
288
- {"description":"Entity without trailing semicolon (2)",
289
- "input":"I'm &notin",
290
- "output":[["Character","I'm \u00ACin"]],
291
- "errors": [
292
- {"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
293
- ]},
294
-
295
- {"description":"Partial entity match at end of file",
296
- "input":"I'm &no",
297
- "output":[["Character","I'm &no"]]},
298
-
299
- {"description":"Non-ASCII character reference name",
300
- "input":"&\u00AC;",
301
- "output":[["Character", "&\u00AC;"]]},
302
-
303
- {"description":"ASCII decimal entity",
304
- "input":"&#0036;",
305
- "output":[["Character","$"]]},
306
-
307
- {"description":"ASCII hexadecimal entity",
308
- "input":"&#x3f;",
309
- "output":[["Character","?"]]},
310
-
311
- {"description":"Hexadecimal entity in attribute",
312
- "input":"<h a='&#x3f;'></h>",
313
- "output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
314
-
315
- {"description":"Entity in attribute without semicolon ending in x",
316
- "input":"<h a='&notx'>",
317
- "output":[["StartTag", "h", {"a":"&notx"}]]},
318
-
319
- {"description":"Entity in attribute without semicolon ending in 1",
320
- "input":"<h a='&not1'>",
321
- "output":[["StartTag", "h", {"a":"&not1"}]]},
322
-
323
- {"description":"Entity in attribute without semicolon ending in i",
324
- "input":"<h a='&noti'>",
325
- "output":[["StartTag", "h", {"a":"&noti"}]]},
326
-
327
- {"description":"Entity in attribute without semicolon",
328
- "input":"<h a='&COPY'>",
329
- "output":[["StartTag", "h", {"a":"\u00A9"}]],
330
- "errors": [
331
- {"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 12 }
332
- ]},
333
-
334
- {"description":"Unquoted attribute ending in ampersand",
335
- "input":"<s o=& t>",
336
- "output":[["StartTag","s",{"o":"&","t":""}]]},
337
-
338
- {"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
339
- "input":"<a a=a&>foo",
340
- "output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
341
-
342
- {"description":"plaintext element",
343
- "input":"<plaintext>foobar",
344
- "output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
345
-
346
- {"description":"Open angled bracket in unquoted attribute value state",
347
- "input":"<a a=f<>",
348
- "output":[["StartTag", "a", {"a":"f<"}]],
349
- "errors":[
350
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
351
- ]}
352
-
353
- ]}
@@ -1,275 +0,0 @@
1
- {"tests": [
2
-
3
- {"description":"DOCTYPE without name",
4
- "input":"<!DOCTYPE>",
5
- "output":[["DOCTYPE", null, null, null, false]],
6
- "errors":[
7
- { "code": "missing-doctype-name", "line": 1, "col": 10 }
8
- ]},
9
-
10
- {"description":"DOCTYPE without space before name",
11
- "input":"<!DOCTYPEhtml>",
12
- "output":[["DOCTYPE", "html", null, null, true]],
13
- "errors":[
14
- { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }
15
- ]},
16
-
17
- {"description":"Incorrect DOCTYPE without a space before name",
18
- "input":"<!DOCTYPEfoo>",
19
- "output":[["DOCTYPE", "foo", null, null, true]],
20
- "errors":[
21
- { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }
22
- ]},
23
-
24
- {"description":"DOCTYPE with publicId",
25
- "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
26
- "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
27
-
28
- {"description":"DOCTYPE with EOF after PUBLIC",
29
- "input":"<!DOCTYPE html PUBLIC",
30
- "output":[["DOCTYPE", "html", null, null, false]],
31
- "errors": [
32
- { "code": "eof-in-doctype", "col": 22, "line": 1 }
33
- ]},
34
-
35
- {"description":"DOCTYPE with EOF after PUBLIC '",
36
- "input":"<!DOCTYPE html PUBLIC '",
37
- "output":[["DOCTYPE", "html", "", null, false]],
38
- "errors": [
39
- { "code": "eof-in-doctype", "col": 24, "line": 1 }
40
- ]},
41
-
42
- {"description":"DOCTYPE with EOF after PUBLIC 'x",
43
- "input":"<!DOCTYPE html PUBLIC 'x",
44
- "output":[["DOCTYPE", "html", "x", null, false]],
45
- "errors": [
46
- { "code": "eof-in-doctype", "col": 25, "line": 1 }
47
- ]},
48
-
49
- {"description":"DOCTYPE with systemId",
50
- "input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
51
- "output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
52
-
53
- {"description":"DOCTYPE with single-quoted systemId",
54
- "input":"<!DOCTYPE html SYSTEM '-//W3C//DTD HTML Transitional 4.01//EN'>",
55
- "output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
56
-
57
- {"description":"DOCTYPE with publicId and systemId",
58
- "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
59
- "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
60
-
61
- {"description":"DOCTYPE with > in double-quoted publicId",
62
- "input":"<!DOCTYPE html PUBLIC \">x",
63
- "output":[["DOCTYPE", "html", "", null, false], ["Character", "x"]],
64
- "errors": [
65
- { "code": "abrupt-doctype-public-identifier", "col": 24, "line": 1 }
66
- ]},
67
-
68
- {"description":"DOCTYPE with > in single-quoted publicId",
69
- "input":"<!DOCTYPE html PUBLIC '>x",
70
- "output":[["DOCTYPE", "html", "", null, false], ["Character", "x"]],
71
- "errors": [
72
- { "code": "abrupt-doctype-public-identifier", "col": 24, "line": 1 }
73
- ]},
74
-
75
- {"description":"DOCTYPE with > in double-quoted systemId",
76
- "input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
77
- "output":[["DOCTYPE", "html", "foo", "", false], ["Character", "x"]],
78
- "errors": [
79
- { "code": "abrupt-doctype-system-identifier", "col": 30, "line": 1 }
80
- ]},
81
-
82
- {"description":"DOCTYPE with > in single-quoted systemId",
83
- "input":"<!DOCTYPE html PUBLIC 'foo' '>x",
84
- "output":[["DOCTYPE", "html", "foo", "", false], ["Character", "x"]],
85
- "errors": [
86
- { "code": "abrupt-doctype-system-identifier", "col": 30, "line": 1 }
87
- ]},
88
-
89
- {"description":"Incomplete doctype",
90
- "input":"<!DOCTYPE html ",
91
- "output":[["DOCTYPE", "html", null, null, false]],
92
- "errors":[
93
- { "code": "eof-in-doctype", "line": 1, "col": 16 }
94
- ]},
95
-
96
- {"description":"Numeric entity representing the NUL character",
97
- "input":"&#0000;",
98
- "output":[["Character", "\uFFFD"]],
99
- "errors":[
100
- { "code": "null-character-reference", "line": 1, "col": 8 }
101
- ]},
102
-
103
- {"description":"Hexadecimal entity representing the NUL character",
104
- "input":"&#x0000;",
105
- "output":[["Character", "\uFFFD"]],
106
- "errors":[
107
- { "code": "null-character-reference", "line": 1, "col": 9 }
108
- ]},
109
-
110
- {"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
111
- "input":"&#2225222;",
112
- "output":[["Character", "\uFFFD"]],
113
- "errors":[
114
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 11 }
115
- ]},
116
-
117
- {"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
118
- "input":"&#x1010FFFF;",
119
- "output":[["Character", "\uFFFD"]],
120
- "errors":[
121
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 13 }
122
- ]},
123
-
124
- {"description":"Hexadecimal entity pair representing a surrogate pair",
125
- "input":"&#xD869;&#xDED6;",
126
- "output":[["Character", "\uFFFD\uFFFD"]],
127
- "errors":[
128
- { "code": "surrogate-character-reference", "line": 1, "col": 9 },
129
- { "code": "surrogate-character-reference", "line": 1, "col": 17 }
130
- ]},
131
-
132
- {"description":"Hexadecimal entity with mixed uppercase and lowercase",
133
- "input":"&#xaBcD;",
134
- "output":[["Character", "\uABCD"]]},
135
-
136
- {"description":"Entity without a name",
137
- "input":"&;",
138
- "output":[["Character", "&;"]]},
139
-
140
- {"description":"Unescaped ampersand in attribute value",
141
- "input":"<h a='&'>",
142
- "output":[["StartTag", "h", { "a":"&" }]]},
143
-
144
-
145
- {"description":"StartTag containing <",
146
- "input":"<a<b>",
147
- "output":[["StartTag", "a<b", { }]]},
148
-
149
- {"description":"Non-void element containing trailing /",
150
- "input":"<h/>",
151
- "output":[["StartTag","h",{},true]]},
152
-
153
- {"description":"Void element with permitted slash",
154
- "input":"<br/>",
155
- "output":[["StartTag","br",{},true]]},
156
-
157
- {"description":"Void element with permitted slash (with attribute)",
158
- "input":"<br foo='bar'/>",
159
- "output":[["StartTag","br",{"foo":"bar"},true]]},
160
-
161
- {"description":"StartTag containing /",
162
- "input":"<h/a='b'>",
163
- "output":[["StartTag", "h", { "a":"b" }]],
164
- "errors":[
165
- { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
166
- ]},
167
-
168
- {"description":"Double-quoted attribute value",
169
- "input":"<h a=\"b\">",
170
- "output":[["StartTag", "h", { "a":"b" }]]},
171
-
172
- {"description":"Unescaped </",
173
- "input":"</",
174
- "output":[["Character", "</"]],
175
- "errors":[
176
- { "code": "eof-before-tag-name", "line": 1, "col": 3 }
177
- ]},
178
-
179
- {"description":"Illegal end tag name",
180
- "input":"</1>",
181
- "output":[["Comment", "1"]],
182
- "errors":[
183
- { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
184
- ]},
185
-
186
- {"description":"Simili processing instruction",
187
- "input":"<?namespace>",
188
- "output":[["Comment", "?namespace"]],
189
- "errors":[
190
- { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
191
- ]},
192
-
193
- {"description":"A bogus comment stops at >, even if preceded by two dashes",
194
- "input":"<?foo-->",
195
- "output":[["Comment", "?foo--"]],
196
- "errors":[
197
- { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
198
- ]},
199
-
200
- {"description":"Unescaped <",
201
- "input":"foo < bar",
202
- "output":[["Character", "foo < bar"]],
203
- "errors":[
204
- { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 6 }
205
- ]},
206
-
207
- {"description":"Null Byte Replacement",
208
- "input":"\u0000",
209
- "output":[["Character", "\u0000"]],
210
- "errors":[
211
- { "code": "unexpected-null-character", "line": 1, "col": 1 }
212
- ]},
213
-
214
- {"description":"Comment with dash",
215
- "input":"<!---x",
216
- "output":[["Comment", "-x"]],
217
- "errors":[
218
- { "code": "eof-in-comment", "line": 1, "col": 7 }
219
- ]},
220
-
221
- {"description":"Entity + newline",
222
- "input":"\nx\n&gt;\n",
223
- "output":[["Character","\nx\n>\n"]]},
224
-
225
- {"description":"Start tag with no attributes but space before the greater-than sign",
226
- "input":"<h >",
227
- "output":[["StartTag", "h", {}]]},
228
-
229
- {"description":"Empty attribute followed by uppercase attribute",
230
- "input":"<h a B=''>",
231
- "output":[["StartTag", "h", {"a":"", "b":""}]]},
232
-
233
- {"description":"Double-quote after attribute name",
234
- "input":"<h a \">",
235
- "output":[["StartTag", "h", {"a":"", "\"":""}]],
236
- "errors":[
237
- { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
238
- ]},
239
-
240
- {"description":"Single-quote after attribute name",
241
- "input":"<h a '>",
242
- "output":[["StartTag", "h", {"a":"", "'":""}]],
243
- "errors":[
244
- { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
245
- ]},
246
-
247
- {"description":"Empty end tag with following characters",
248
- "input":"a</>bc",
249
- "output":[["Character", "abc"]],
250
- "errors":[
251
- { "code": "missing-end-tag-name", "line": 1, "col": 4 }
252
- ]},
253
-
254
- {"description":"Empty end tag with following tag",
255
- "input":"a</><b>c",
256
- "output":[["Character", "a"], ["StartTag", "b", {}], ["Character", "c"]],
257
- "errors":[
258
- { "code": "missing-end-tag-name", "line": 1, "col": 4 }
259
- ]},
260
-
261
- {"description":"Empty end tag with following comment",
262
- "input":"a</><!--b-->c",
263
- "output":[["Character", "a"], ["Comment", "b"], ["Character", "c"]],
264
- "errors":[
265
- { "code": "missing-end-tag-name", "line": 1, "col": 4 }
266
- ]},
267
-
268
- {"description":"Empty end tag with following end tag",
269
- "input":"a</></b>c",
270
- "output":[["Character", "a"], ["EndTag", "b"], ["Character", "c"]],
271
- "errors":[
272
- { "code": "missing-end-tag-name", "line": 1, "col": 4 }
273
- ]}
274
-
275
- ]}