html-to-markdown 2.24.6 → 2.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/ext/html-to-markdown-rb/native/Cargo.lock +3 -26
  4. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  5. data/lib/html_to_markdown/version.rb +1 -1
  6. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  7. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  8. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  9. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +53 -91
  10. data/rust-vendor/png/.cargo-checksum.json +1 -1
  11. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  12. data/rust-vendor/png/CHANGES.md +44 -0
  13. data/rust-vendor/png/Cargo.lock +124 -171
  14. data/rust-vendor/png/Cargo.toml +1 -1
  15. data/rust-vendor/png/Cargo.toml.orig +1 -1
  16. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  17. data/rust-vendor/png/benches/unfilter.rs +3 -3
  18. data/rust-vendor/png/src/adam7.rs +17 -10
  19. data/rust-vendor/png/src/common.rs +8 -8
  20. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  21. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  22. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  23. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  24. data/rust-vendor/png/src/encoder.rs +4 -2
  25. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  26. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  27. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  28. data/rust-vendor/png/src/filter/simd.rs +308 -0
  29. data/rust-vendor/png/src/lib.rs +1 -0
  30. metadata +7 -177
  31. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  32. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  33. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  34. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  35. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  36. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  37. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  38. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  39. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  40. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  41. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  42. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  43. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  44. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  45. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  46. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  47. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  48. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  49. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  50. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  51. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  52. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  53. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  54. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  55. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  56. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  57. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  58. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  59. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  60. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  61. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  62. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  63. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  64. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  65. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  66. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  67. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  68. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  69. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  70. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  71. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  72. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  153. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  154. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  155. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  156. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  157. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  158. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  159. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  160. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  161. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  162. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  163. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  164. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  165. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  166. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  167. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  168. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  169. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  170. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  171. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  172. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  173. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  174. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  175. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  176. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  177. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  178. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  179. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  180. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  181. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  182. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  183. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  184. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  185. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  186. data/rust-vendor/xml5ever/README.md +0 -72
  187. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  188. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  189. data/rust-vendor/xml5ever/examples/README.md +0 -223
  190. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  191. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  192. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  193. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  194. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  195. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  196. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  197. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  198. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  199. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  200. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  201. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  202. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  203. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,532 +0,0 @@
1
- {"tests": [
2
-
3
- {"description":"< in attribute name",
4
- "input":"<z/0 <>",
5
- "output":[["StartTag", "z", {"0": "", "<": ""}]],
6
- "errors":[
7
- { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 },
8
- { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
9
- ]},
10
-
11
- {"description":"< in unquoted attribute value",
12
- "input":"<z x=<>",
13
- "output":[["StartTag", "z", {"x": "<"}]],
14
- "errors":[
15
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
16
- ]},
17
-
18
- {"description":"= in unquoted attribute value",
19
- "input":"<z z=z=z>",
20
- "output":[["StartTag", "z", {"z": "z=z"}]],
21
- "errors":[
22
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
23
- ]},
24
-
25
- {"description":"= attribute",
26
- "input":"<z =>",
27
- "output":[["StartTag", "z", {"=": ""}]],
28
- "errors":[
29
- { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }
30
- ]},
31
-
32
- {"description":"== attribute",
33
- "input":"<z ==>",
34
- "output":[["StartTag", "z", {"=": ""}]],
35
- "errors":[
36
- { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
37
- { "code": "missing-attribute-value", "line": 1, "col": 6 }
38
- ]},
39
-
40
- {"description":"=== attribute",
41
- "input":"<z ===>",
42
- "output":[["StartTag", "z", {"=": "="}]],
43
- "errors":[
44
- { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
45
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
46
- ]},
47
-
48
- {"description":"==== attribute",
49
- "input":"<z ====>",
50
- "output":[["StartTag", "z", {"=": "=="}]],
51
- "errors":[
52
- { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
53
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 },
54
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
55
- ]},
56
-
57
- {"description":"\" after ampersand in double-quoted attribute value",
58
- "input":"<z z=\"&\">",
59
- "output":[["StartTag", "z", {"z": "&"}]]},
60
-
61
- {"description":"' after ampersand in double-quoted attribute value",
62
- "input":"<z z=\"&'\">",
63
- "output":[["StartTag", "z", {"z": "&'"}]]},
64
-
65
- {"description":"' after ampersand in single-quoted attribute value",
66
- "input":"<z z='&'>",
67
- "output":[["StartTag", "z", {"z": "&"}]]},
68
-
69
- {"description":"\" after ampersand in single-quoted attribute value",
70
- "input":"<z z='&\"'>",
71
- "output":[["StartTag", "z", {"z": "&\""}]]},
72
-
73
- {"description":"Text after bogus character reference",
74
- "input":"<z z='&xlink_xmlns;'>bar<z>",
75
- "output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
76
-
77
- {"description":"Text after hex character reference",
78
- "input":"<z z='&#x0020; foo'>bar<z>",
79
- "output":[["StartTag","z",{"z":" foo"}],["Character","bar"],["StartTag","z",{}]]},
80
-
81
- {"description":"Attribute name starting with \"",
82
- "input":"<foo \"='bar'>",
83
- "output":[["StartTag", "foo", {"\"": "bar"}]],
84
- "errors":[
85
- { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
86
- ]},
87
-
88
- {"description":"Attribute name starting with '",
89
- "input":"<foo '='bar'>",
90
- "output":[["StartTag", "foo", {"'": "bar"}]],
91
- "errors":[
92
- { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
93
- ]},
94
-
95
- {"description":"Attribute name containing \"",
96
- "input":"<foo a\"b='bar'>",
97
- "output":[["StartTag", "foo", {"a\"b": "bar"}]],
98
- "errors":[
99
- { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
100
- ]},
101
-
102
- {"description":"Attribute name containing '",
103
- "input":"<foo a'b='bar'>",
104
- "output":[["StartTag", "foo", {"a'b": "bar"}]],
105
- "errors":[
106
- { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
107
- ]},
108
-
109
- {"description":"Unquoted attribute value containing '",
110
- "input":"<foo a=b'c>",
111
- "output":[["StartTag", "foo", {"a": "b'c"}]],
112
- "errors":[
113
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 }
114
- ]},
115
-
116
-
117
- {"description":"Unquoted attribute value containing \"",
118
- "input":"<foo a=b\"c>",
119
- "output":[["StartTag", "foo", {"a": "b\"c"}]],
120
- "errors":[
121
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 }
122
- ]},
123
-
124
- {"description":"Double-quoted attribute value not followed by whitespace",
125
- "input":"<foo a=\"b\"c>",
126
- "output":[["StartTag", "foo", {"a": "b", "c": ""}]],
127
- "errors":[
128
- { "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 }
129
- ]},
130
-
131
- {"description":"Single-quoted attribute value not followed by whitespace",
132
- "input":"<foo a='b'c>",
133
- "output":[["StartTag", "foo", {"a": "b", "c": ""}]],
134
- "errors":[
135
- { "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 }
136
- ]},
137
-
138
- {"description":"Quoted attribute followed by permitted /",
139
- "input":"<br a='b'/>",
140
- "output":[["StartTag","br",{"a":"b"},true]]},
141
-
142
- {"description":"Quoted attribute followed by non-permitted /",
143
- "input":"<bar a='b'/>",
144
- "output":[["StartTag","bar",{"a":"b"},true]]},
145
-
146
- {"description":"CR EOF after doctype name",
147
- "input":"<!doctype html \r",
148
- "output":[["DOCTYPE", "html", null, null, false]],
149
- "errors":[
150
- { "code": "eof-in-doctype", "line": 2, "col": 1 }
151
- ]},
152
-
153
- {"description":"CR EOF in tag name",
154
- "input":"<z\r",
155
- "output":[],
156
- "errors":[
157
- { "code": "eof-in-tag", "line": 2, "col": 1 }
158
- ]},
159
-
160
- {"description":"Slash EOF in tag name",
161
- "input":"<z/",
162
- "output":[],
163
- "errors":[
164
- { "code": "eof-in-tag", "line": 1, "col": 4 }
165
- ]},
166
-
167
- {"description":"Zero hex numeric entity",
168
- "input":"&#x0",
169
- "output":[["Character", "\uFFFD"]],
170
- "errors":[
171
- { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 },
172
- { "code": "null-character-reference", "line": 1, "col": 5 }
173
- ]},
174
-
175
- {"description":"Zero decimal numeric entity",
176
- "input":"&#0",
177
- "output":[["Character", "\uFFFD"]],
178
- "errors":[
179
- { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 4 },
180
- { "code": "null-character-reference", "line": 1, "col": 4 }
181
- ]},
182
-
183
- {"description":"Zero-prefixed hex numeric entity",
184
- "input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
185
- "output":[["Character", "A"]]},
186
-
187
- {"description":"Zero-prefixed decimal numeric entity",
188
- "input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
189
- "output":[["Character", "A"]]},
190
-
191
- {"description":"Empty hex numeric entities",
192
- "input":"&#x &#X ",
193
- "output":[["Character", "&#x &#X "]],
194
- "errors":[
195
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 },
196
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 8 }
197
- ]},
198
-
199
- {"description":"Invalid digit in hex numeric entity",
200
- "input":"&#xZ",
201
- "output":[["Character", "&#xZ"]],
202
- "errors":[
203
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 }
204
- ]},
205
-
206
- {"description":"Empty decimal numeric entities",
207
- "input":"&# &#; ",
208
- "output":[["Character", "&# &#; "]],
209
- "errors":[
210
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 },
211
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 6 }
212
- ]},
213
-
214
- {"description":"Invalid digit in decimal numeric entity",
215
- "input":"&#A",
216
- "output":[["Character", "&#A"]],
217
- "errors":[
218
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 }
219
- ]},
220
-
221
- {"description":"Non-BMP numeric entity",
222
- "input":"&#x10000;",
223
- "output":[["Character", "\uD800\uDC00"]]},
224
-
225
- {"description":"Maximum non-BMP numeric entity",
226
- "input":"&#X10FFFF;",
227
- "output":[["Character", "\uDBFF\uDFFF"]],
228
- "errors":[
229
- { "code": "noncharacter-character-reference", "line": 1, "col": 11 }
230
- ]},
231
-
232
-
233
- {"description":"Above maximum numeric entity",
234
- "input":"&#x110000;",
235
- "output":[["Character", "\uFFFD"]],
236
- "errors":[
237
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 11 }
238
- ]},
239
-
240
- {"description":"32-bit hex numeric entity",
241
- "input":"&#x80000041;",
242
- "output":[["Character", "\uFFFD"]],
243
- "errors":[
244
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 13 }
245
- ]},
246
-
247
- {"description":"33-bit hex numeric entity",
248
- "input":"&#x100000041;",
249
- "output":[["Character", "\uFFFD"]],
250
- "errors":[
251
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
252
- ]},
253
-
254
- {"description":"33-bit decimal numeric entity",
255
- "input":"&#4294967361;",
256
- "output":[["Character", "\uFFFD"]],
257
- "errors":[
258
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
259
- ]},
260
-
261
- {"description":"65-bit hex numeric entity",
262
- "input":"&#x10000000000000041;",
263
- "output":[["Character", "\uFFFD"]],
264
- "errors":[
265
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 22 }
266
- ]},
267
-
268
- {"description":"65-bit decimal numeric entity",
269
- "input":"&#18446744073709551681;",
270
- "output":[["Character", "\uFFFD"]],
271
- "errors":[
272
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 24 }
273
- ]},
274
-
275
- {"description":"Surrogate code point edge cases",
276
- "input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
277
- "output":[["Character", "\uD7FF\uFFFD\uFFFD\uFFFD\uFFFD\uE000"]],
278
- "errors":[
279
- { "code": "surrogate-character-reference", "line": 1, "col": 17 },
280
- { "code": "surrogate-character-reference", "line": 1, "col": 25 },
281
- { "code": "surrogate-character-reference", "line": 1, "col": 33 },
282
- { "code": "surrogate-character-reference", "line": 1, "col": 41 }
283
- ]},
284
-
285
- {"description":"Uppercase start tag name",
286
- "input":"<X>",
287
- "output":[["StartTag", "x", {}]]},
288
-
289
- {"description":"Uppercase end tag name",
290
- "input":"</X>",
291
- "output":[["EndTag", "x"]]},
292
-
293
- {"description":"Uppercase attribute name",
294
- "input":"<x X>",
295
- "output":[["StartTag", "x", { "x":"" }]]},
296
-
297
- {"description":"Tag/attribute name case edge values",
298
- "input":"<x@AZ[`az{ @AZ[`az{>",
299
- "output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
300
-
301
- {"description":"Duplicate different-case attributes",
302
- "input":"<x x=1 x=2 X=3>",
303
- "output":[["StartTag", "x", { "x":"1" }]],
304
- "errors":[
305
- { "code": "duplicate-attribute", "line": 1, "col": 9 },
306
- { "code": "duplicate-attribute", "line": 1, "col": 13 }
307
- ]},
308
-
309
- {"description":"Uppercase close tag attributes",
310
- "input":"</x X>",
311
- "output":[["EndTag", "x"]],
312
- "errors":[
313
- { "code": "end-tag-with-attributes", "line": 1, "col": 6 }
314
- ]},
315
-
316
- {"description":"Duplicate close tag attributes",
317
- "input":"</x x x>",
318
- "output":[["EndTag", "x"]],
319
- "errors":[
320
- { "code": "duplicate-attribute", "line": 1, "col": 8 },
321
- { "code": "end-tag-with-attributes", "line": 1, "col": 8 }
322
- ]},
323
-
324
- {"description":"Permitted slash",
325
- "input":"<br/>",
326
- "output":[["StartTag","br",{},true]]},
327
-
328
- {"description":"Non-permitted slash",
329
- "input":"<xr/>",
330
- "output":[["StartTag","xr",{},true]]},
331
-
332
- {"description":"Permitted slash but in close tag",
333
- "input":"</br/>",
334
- "output":[["EndTag", "br"]],
335
- "errors":[
336
- { "code": "end-tag-with-trailing-solidus", "line": 1, "col": 6 }
337
- ]},
338
-
339
- {"description":"Doctype public case-sensitivity (1)",
340
- "input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
341
- "output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
342
-
343
- {"description":"Doctype public case-sensitivity (2)",
344
- "input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
345
- "output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
346
-
347
- {"description":"Doctype system case-sensitivity (1)",
348
- "input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
349
- "output":[["DOCTYPE", "html", null, "XyZ", true]]},
350
-
351
- {"description":"Doctype system case-sensitivity (2)",
352
- "input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
353
- "output":[["DOCTYPE", "html", null, "xYz", true]]},
354
-
355
- {"description":"U+0000 in lookahead region after non-matching character",
356
- "input":"<!doc>\u0000",
357
- "output":[["Comment", "doc"], ["Character", "\u0000"]],
358
- "errors":[
359
- { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
360
- { "code": "unexpected-null-character", "line": 1, "col": 7 }
361
- ]},
362
-
363
- {"description":"U+0000 in lookahead region",
364
- "input":"<!doc\u0000",
365
- "output":[["Comment", "doc\uFFFD"]],
366
- "errors":[
367
- { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
368
- { "code": "unexpected-null-character", "line": 1, "col": 6 }
369
- ]},
370
-
371
- {"description":"U+0080 in lookahead region",
372
- "input":"<!doc\u0080",
373
- "output":[["Comment", "doc\u0080"]],
374
- "errors":[
375
- { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
376
- { "code": "control-character-in-input-stream", "line": 1, "col": 6 }
377
- ]},
378
-
379
- {"description":"U+FDD1 in lookahead region",
380
- "input":"<!doc\uFDD1",
381
- "output":[["Comment", "doc\uFDD1"]],
382
- "errors":[
383
- { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
384
- { "code": "noncharacter-in-input-stream", "line": 1, "col": 6 }
385
- ]},
386
-
387
- {"description":"U+1FFFF in lookahead region",
388
- "input":"<!doc\uD83F\uDFFF",
389
- "output":[["Comment", "doc\uD83F\uDFFF"]],
390
- "errors":[
391
- { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
392
- { "code": "noncharacter-in-input-stream", "line": 1, "col": 6 }
393
- ]},
394
-
395
- {"description":"CR followed by non-LF",
396
- "input":"\r?",
397
- "output":[["Character", "\n?"]]},
398
-
399
- {"description":"CR at EOF",
400
- "input":"\r",
401
- "output":[["Character", "\n"]]},
402
-
403
- {"description":"LF at EOF",
404
- "input":"\n",
405
- "output":[["Character", "\n"]]},
406
-
407
- {"description":"CR LF",
408
- "input":"\r\n",
409
- "output":[["Character", "\n"]]},
410
-
411
- {"description":"CR CR",
412
- "input":"\r\r",
413
- "output":[["Character", "\n\n"]]},
414
-
415
- {"description":"LF LF",
416
- "input":"\n\n",
417
- "output":[["Character", "\n\n"]]},
418
-
419
- {"description":"LF CR",
420
- "input":"\n\r",
421
- "output":[["Character", "\n\n"]]},
422
-
423
- {"description":"text CR CR CR text",
424
- "input":"text\r\r\rtext",
425
- "output":[["Character", "text\n\n\ntext"]]},
426
-
427
- {"description":"Doctype publik",
428
- "input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
429
- "output":[["DOCTYPE", "html", null, null, false]],
430
- "errors":[
431
- { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
432
- ]},
433
-
434
- {"description":"Doctype publi",
435
- "input":"<!DOCTYPE html PUBLI",
436
- "output":[["DOCTYPE", "html", null, null, false]],
437
- "errors":[
438
- { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
439
- ]},
440
-
441
- {"description":"Doctype sistem",
442
- "input":"<!DOCTYPE html SISTEM \"AbC\">",
443
- "output":[["DOCTYPE", "html", null, null, false]],
444
- "errors":[
445
- { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
446
- ]},
447
-
448
- {"description":"Doctype sys",
449
- "input":"<!DOCTYPE html SYS",
450
- "output":[["DOCTYPE", "html", null, null, false]],
451
- "errors":[
452
- { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
453
- ]},
454
-
455
- {"description":"Doctype html x>text",
456
- "input":"<!DOCTYPE html x>text",
457
- "output":[["DOCTYPE", "html", null, null, false], ["Character", "text"]],
458
- "errors":[
459
- { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
460
- ]},
461
-
462
- {"description":"Grave accent in unquoted attribute",
463
- "input":"<a a=aa`>",
464
- "output":[["StartTag", "a", {"a":"aa`"}]],
465
- "errors":[
466
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 8 }
467
- ]},
468
-
469
- {"description":"EOF in tag name state ",
470
- "input":"<a",
471
- "output":[],
472
- "errors": [
473
- { "code": "eof-in-tag", "line": 1, "col": 3 }
474
- ]},
475
-
476
- {"description":"EOF in before attribute name state",
477
- "input":"<a ",
478
- "output":[],
479
- "errors":[
480
- { "code": "eof-in-tag", "line": 1, "col": 4 }
481
- ]},
482
-
483
- {"description":"EOF in attribute name state",
484
- "input":"<a a",
485
- "output":[],
486
- "errors":[
487
- { "code": "eof-in-tag", "line": 1, "col": 5 }
488
- ]},
489
-
490
- {"description":"EOF in after attribute name state",
491
- "input":"<a a ",
492
- "output":[],
493
- "errors":[
494
- { "code": "eof-in-tag", "line": 1, "col": 6 }
495
- ]},
496
-
497
- {"description":"EOF in before attribute value state",
498
- "input":"<a a =",
499
- "output":[],
500
- "errors":[
501
- { "code": "eof-in-tag", "line": 1, "col": 7 }
502
- ]},
503
-
504
- {"description":"EOF in attribute value (double quoted) state",
505
- "input":"<a a =\"a",
506
- "output":[],
507
- "errors":[
508
- { "code": "eof-in-tag", "line": 1, "col": 9 }
509
- ]},
510
-
511
- {"description":"EOF in attribute value (single quoted) state",
512
- "input":"<a a ='a",
513
- "output":[],
514
- "errors":[
515
- { "code": "eof-in-tag", "line": 1, "col": 9 }
516
- ]},
517
-
518
- {"description":"EOF in attribute value (unquoted) state",
519
- "input":"<a a =a",
520
- "output":[],
521
- "errors":[
522
- { "code": "eof-in-tag", "line": 1, "col": 8 }
523
- ]},
524
-
525
- {"description":"EOF in after attribute value state",
526
- "input":"<a a ='a'",
527
- "output":[],
528
- "errors":[
529
- { "code": "eof-in-tag", "line": 1, "col": 10 }
530
- ]}
531
-
532
- ]}