html-to-markdown 2.24.6 → 2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  6. data/lib/html_to_markdown/version.rb +1 -1
  7. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  8. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  9. data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
  10. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  11. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
  12. data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
  13. data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
  14. data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
  15. data/rust-vendor/memmap2/CHANGELOG.md +8 -0
  16. data/rust-vendor/memmap2/Cargo.lock +1 -1
  17. data/rust-vendor/memmap2/Cargo.toml +2 -1
  18. data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
  19. data/rust-vendor/memmap2/src/lib.rs +25 -1
  20. data/rust-vendor/memmap2/src/stub.rs +1 -4
  21. data/rust-vendor/memmap2/src/unix.rs +14 -1
  22. data/rust-vendor/png/.cargo-checksum.json +1 -1
  23. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  24. data/rust-vendor/png/CHANGES.md +44 -0
  25. data/rust-vendor/png/Cargo.lock +124 -171
  26. data/rust-vendor/png/Cargo.toml +1 -1
  27. data/rust-vendor/png/Cargo.toml.orig +1 -1
  28. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  29. data/rust-vendor/png/benches/unfilter.rs +3 -3
  30. data/rust-vendor/png/src/adam7.rs +17 -10
  31. data/rust-vendor/png/src/common.rs +8 -8
  32. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  33. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  34. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  35. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  36. data/rust-vendor/png/src/encoder.rs +4 -2
  37. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  38. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  39. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  40. data/rust-vendor/png/src/filter/simd.rs +308 -0
  41. data/rust-vendor/png/src/lib.rs +1 -0
  42. data/rust-vendor/syn/.cargo-checksum.json +1 -1
  43. data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
  44. data/rust-vendor/syn/Cargo.lock +40 -41
  45. data/rust-vendor/syn/Cargo.toml +1 -1
  46. data/rust-vendor/syn/Cargo.toml.orig +1 -1
  47. data/rust-vendor/syn/src/item.rs +61 -40
  48. data/rust-vendor/syn/src/lib.rs +2 -1
  49. data/rust-vendor/syn/tests/test_item.rs +54 -0
  50. data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
  51. data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
  52. data/rust-vendor/unicode-ident/Cargo.lock +21 -21
  53. data/rust-vendor/unicode-ident/Cargo.toml +1 -1
  54. data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
  55. data/rust-vendor/unicode-ident/src/lib.rs +1 -1
  56. data/rust-vendor/unicode-ident/src/tables.rs +87 -97
  57. data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
  58. metadata +7 -177
  59. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  60. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  61. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  62. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  63. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  64. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  65. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  66. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  67. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  68. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  69. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  70. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  71. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  72. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  153. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  154. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  155. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  156. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  157. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  158. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  159. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  160. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  161. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  162. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  163. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  164. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  165. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  166. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  167. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  168. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  169. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  170. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  171. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  172. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  173. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  174. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  175. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  176. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  177. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  178. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  179. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  180. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  181. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  182. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  183. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  184. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  185. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  186. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  187. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  188. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  189. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  190. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  191. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  192. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  193. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  194. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  195. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  196. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  197. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  198. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  199. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  200. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  201. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  202. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  203. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  204. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  205. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  206. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  207. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  208. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  209. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  210. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  211. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  212. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  213. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  214. data/rust-vendor/xml5ever/README.md +0 -72
  215. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  216. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  217. data/rust-vendor/xml5ever/examples/README.md +0 -223
  218. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  219. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  220. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  221. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  222. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  223. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  224. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  225. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  226. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  227. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  228. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  229. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  230. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  231. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,60 +0,0 @@
1
- {"tests":[
2
-
3
- {"description": "quote_char=\"'\"",
4
- "options": {"quote_char": "'"},
5
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test 'with' quote_char"}]]],
6
- "expected": ["<span title='test &#39;with&#39; quote_char'>"]
7
- },
8
-
9
- {"description": "quote_attr_values=true",
10
- "options": {"quote_attr_values": true},
11
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "button", [{"namespace": null, "name": "disabled", "value" :"disabled"}]]],
12
- "expected": ["<button disabled>"],
13
- "xhtml": ["<button disabled=\"disabled\">"]
14
- },
15
-
16
- {"description": "quote_attr_values=true with irrelevant",
17
- "options": {"quote_attr_values": true},
18
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
19
- "expected": ["<div irrelevant>"],
20
- "xhtml": ["<div irrelevant=\"irrelevant\">"]
21
- },
22
-
23
- {"description": "use_trailing_solidus=true with void element",
24
- "options": {"use_trailing_solidus": true},
25
- "input": [["EmptyTag", "img", {}]],
26
- "expected": ["<img />"]
27
- },
28
-
29
- {"description": "use_trailing_solidus=true with non-void element",
30
- "options": {"use_trailing_solidus": true},
31
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
32
- "expected": ["<div>"]
33
- },
34
-
35
- {"description": "minimize_boolean_attributes=false",
36
- "options": {"minimize_boolean_attributes": false},
37
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
38
- "expected": ["<div irrelevant=irrelevant>"],
39
- "xhtml": ["<div irrelevant=\"irrelevant\">"]
40
- },
41
-
42
- {"description": "minimize_boolean_attributes=false with empty value",
43
- "options": {"minimize_boolean_attributes": false},
44
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :""}]]],
45
- "expected": ["<div irrelevant=\"\">"]
46
- },
47
-
48
- {"description": "escape less than signs in attribute values",
49
- "options": {"escape_lt_in_attrs": true},
50
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "a", [{"namespace": null, "name": "title", "value": "a<b>c&d"}]]],
51
- "expected": ["<a title=\"a&lt;b>c&amp;d\">"]
52
- },
53
-
54
- {"description": "rcdata",
55
- "options": {"escape_rcdata": true},
56
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
57
- "expected": ["<script>a&lt;b&gt;c&amp;d"]
58
- }
59
-
60
- ]}
@@ -1,51 +0,0 @@
1
- {"tests": [
2
-
3
- {"description": "bare text with leading spaces",
4
- "options": {"strip_whitespace": true},
5
- "input": [["Characters", "\t\r\n\u000C foo"]],
6
- "expected": [" foo"]
7
- },
8
-
9
- {"description": "bare text with trailing spaces",
10
- "options": {"strip_whitespace": true},
11
- "input": [["Characters", "foo \t\r\n\u000C"]],
12
- "expected": ["foo "]
13
- },
14
-
15
- {"description": "bare text with inner spaces",
16
- "options": {"strip_whitespace": true},
17
- "input": [["Characters", "foo \t\r\n\u000C bar"]],
18
- "expected": ["foo bar"]
19
- },
20
-
21
- {"description": "text within <pre>",
22
- "options": {"strip_whitespace": true},
23
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
24
- "expected": ["<pre>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</pre>"]
25
- },
26
-
27
- {"description": "text within <pre>, with inner markup",
28
- "options": {"strip_whitespace": true},
29
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C fo"], ["StartTag", "http://www.w3.org/1999/xhtml", "span", {}], ["Characters", "o \t\r\n\u000C b"], ["EndTag", "http://www.w3.org/1999/xhtml", "span"], ["Characters", "ar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
30
- "expected": ["<pre>\t\r\n\u000C fo<span>o \t\r\n\u000C b</span>ar \t\r\n\u000C</pre>"]
31
- },
32
-
33
- {"description": "text within <textarea>",
34
- "options": {"strip_whitespace": true},
35
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "textarea", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "textarea"]],
36
- "expected": ["<textarea>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</textarea>"]
37
- },
38
-
39
- {"description": "text within <script>",
40
- "options": {"strip_whitespace": true},
41
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "script"]],
42
- "expected": ["<script>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</script>"]
43
- },
44
-
45
- {"description": "text within <style>",
46
- "options": {"strip_whitespace": true},
47
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "style"]],
48
- "expected": ["<style>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</style>"]
49
- }
50
-
51
- ]}
@@ -1,107 +0,0 @@
1
- Tokenizer tests
2
- ===============
3
-
4
- The test format is [JSON](http://www.json.org/). This has the advantage
5
- that the syntax allows backward-compatible extensions to the tests and
6
- the disadvantage that it is relatively verbose.
7
-
8
- Basic Structure
9
- ---------------
10
-
11
- {"tests": [
12
-     {"description": "Test description",
13
-     "input": "input_string",
14
-     "output": [expected_output_tokens],
15
-     "initialStates": [initial_states],
16
-     "lastStartTag": last_start_tag,
17
- "errors": [parse_errors]
18
-     }
19
- ]}
20
-
21
- Multiple tests per file are allowed simply by adding more objects to the
22
- "tests" list.
23
-
24
- Each parse error is an object that contains error `code` and one-based
25
- error location indices: `line` and `col`.
26
-
27
- `description`, `input` and `output` are always present. The other values
28
- are optional.
29
-
30
- ### Test set-up
31
-
32
- `test.input` is a string containing the characters to pass to the
33
- tokenizer. Specifically, it represents the characters of the **input
34
- stream**, and so implementations are expected to perform the processing
35
- described in the spec's **Preprocessing the input stream** section
36
- before feeding the result to the tokenizer.
37
-
38
- If `test.doubleEscaped` is present and `true`, then `test.input` is not
39
- quite as described above. Instead, it must first be subjected to another
40
- round of unescaping (i.e., in addition to any unescaping involved in the
41
- JSON import), and the result of *that* represents the characters of the
42
- input stream. Currently, the only unescaping required by this option is
43
- to convert each sequence of the form \\uHHHH (where H is a hex digit)
44
- into the corresponding Unicode code point. (Note that this option also
45
- affects the interpretation of `test.output`.)
46
-
47
- `test.initialStates` is a list of strings, each being the name of a
48
- tokenizer state which can be one of the following:
49
-
50
- - `Data state`
51
- - `PLAINTEXT state`
52
- - `RCDATA state`
53
- - `RAWTEXT state`
54
- - `Script data state`
55
- - `CDATA section state`
56
-
57
- The test should be run once for each string, using it
58
- to set the tokenizer's initial state for that run. If
59
- `test.initialStates` is omitted, it defaults to `["Data state"]`.
60
-
61
- `test.lastStartTag` is a lowercase string that should be used as "the
62
- tag name of the last start tag to have been emitted from this
63
- tokenizer", referenced in the spec's definition of **appropriate end tag
64
- token**. If it is omitted, it is treated as if "no start tag has been
65
- emitted from this tokenizer".
66
-
67
- ### Test results
68
-
69
- `test.output` is a list of tokens, ordered with the first produced by
70
- the tokenizer the first (leftmost) in the list. The list must mach the
71
- **complete** list of tokens that the tokenizer should produce. Valid
72
- tokens are:
73
-
74
- ["DOCTYPE", name, public_id, system_id, correctness]
75
- ["StartTag", name, {attributes}*, true*]
76
- ["StartTag", name, {attributes}]
77
- ["EndTag", name]
78
- ["Comment", data]
79
- ["Character", data]
80
-
81
- `public_id` and `system_id` are either strings or `null`. `correctness`
82
- is either `true` or `false`; `true` corresponds to the force-quirks flag
83
- being false, and vice-versa.
84
-
85
- When the self-closing flag is set, the `StartTag` array has `true` as
86
- its fourth entry. When the flag is not set, the array has only three
87
- entries for backwards compatibility.
88
-
89
- All adjacent character tokens are coalesced into a single
90
- `["Character", data]` token.
91
-
92
- If `test.doubleEscaped` is present and `true`, then every string within
93
- `test.output` must be further unescaped (as described above) before
94
- comparing with the tokenizer's output.
95
-
96
- xmlViolation tests
97
- ------------------
98
-
99
- `tokenizer/xmlViolation.test` differs from the above in a couple of
100
- ways:
101
-
102
- - The name of the single member of the top-level JSON object is
103
- "xmlViolationTests" instead of "tests".
104
- - Each test's expected output assumes that implementation is applying
105
- the tweaks given in the spec's "Coercing an HTML DOM into an
106
- infoset" section.
107
-
@@ -1,93 +0,0 @@
1
- {"tests": [
2
-
3
- {"description":"PLAINTEXT content model flag",
4
- "initialStates":["PLAINTEXT state"],
5
- "lastStartTag":"plaintext",
6
- "input":"<head>&body;",
7
- "output":[["Character", "<head>&body;"]]},
8
-
9
- {"description":"PLAINTEXT with seeming close tag",
10
- "initialStates":["PLAINTEXT state"],
11
- "lastStartTag":"plaintext",
12
- "input":"</plaintext>&body;",
13
- "output":[["Character", "</plaintext>&body;"]]},
14
-
15
- {"description":"End tag closing RCDATA or RAWTEXT",
16
- "initialStates":["RCDATA state", "RAWTEXT state"],
17
- "lastStartTag":"xmp",
18
- "input":"foo</xmp>",
19
- "output":[["Character", "foo"], ["EndTag", "xmp"]]},
20
-
21
- {"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
22
- "initialStates":["RCDATA state", "RAWTEXT state"],
23
- "lastStartTag":"xmp",
24
- "input":"foo</xMp>",
25
- "output":[["Character", "foo"], ["EndTag", "xmp"]]},
26
-
27
- {"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
28
- "initialStates":["RCDATA state", "RAWTEXT state"],
29
- "lastStartTag":"xmp",
30
- "input":"foo</xmp ",
31
- "output":[["Character", "foo"]],
32
- "errors":[
33
- { "code": "eof-in-tag", "line": 1, "col": 10 }
34
- ]},
35
-
36
- {"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
37
- "initialStates":["RCDATA state", "RAWTEXT state"],
38
- "lastStartTag":"xmp",
39
- "input":"foo</xmp",
40
- "output":[["Character", "foo</xmp"]]},
41
-
42
- {"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
43
- "initialStates":["RCDATA state", "RAWTEXT state"],
44
- "lastStartTag":"xmp",
45
- "input":"foo</xmp/",
46
- "output":[["Character", "foo"]],
47
- "errors":[
48
- { "code": "eof-in-tag", "line": 1, "col": 10 }
49
- ]},
50
-
51
- {"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
52
- "initialStates":["RCDATA state", "RAWTEXT state"],
53
- "lastStartTag":"xmp",
54
- "input":"foo</xmp<",
55
- "output":[["Character", "foo</xmp<"]]},
56
-
57
- {"description":"End tag with incorrect name in RCDATA or RAWTEXT",
58
- "initialStates":["RCDATA state", "RAWTEXT state"],
59
- "lastStartTag":"xmp",
60
- "input":"</foo>bar</xmp>",
61
- "output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
62
-
63
- {"description":"Partial end tags leading straight into partial end tags",
64
- "initialStates":["RCDATA state", "RAWTEXT state"],
65
- "lastStartTag":"xmp",
66
- "input":"</xmp</xmp</xmp>",
67
- "output":[["Character", "</xmp</xmp"], ["EndTag", "xmp"]]},
68
-
69
- {"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
70
- "initialStates":["RCDATA state", "RAWTEXT state"],
71
- "lastStartTag":"xmp",
72
- "input":"</foo>bar</xmpaar>",
73
- "output":[["Character", "</foo>bar</xmpaar>"]]},
74
-
75
- {"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
76
- "initialStates":["RCDATA state", "RAWTEXT state"],
77
- "lastStartTag":"xmp",
78
- "input":"foo</xmp></baz>",
79
- "output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
80
-
81
- {"description":"RAWTEXT w/ something looking like an entity",
82
- "initialStates":["RAWTEXT state"],
83
- "lastStartTag":"xmp",
84
- "input":"&foo;",
85
- "output":[["Character", "&foo;"]]},
86
-
87
- {"description":"RCDATA w/ an entity",
88
- "initialStates":["RCDATA state"],
89
- "lastStartTag":"textarea",
90
- "input":"&lt;",
91
- "output":[["Character", "<"]]}
92
-
93
- ]}
@@ -1,335 +0,0 @@
1
- {
2
- "tests": [
3
- {
4
- "description":"CR in bogus comment state",
5
- "input":"<?\u000d",
6
- "output":[["Comment", "?\u000a"]],
7
- "errors":[
8
- { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
9
- ]
10
- },
11
- {
12
- "description":"CRLF in bogus comment state",
13
- "input":"<?\u000d\u000a",
14
- "output":[["Comment", "?\u000a"]],
15
- "errors":[
16
- { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
17
- ]
18
- },
19
- {
20
- "description":"CRLFLF in bogus comment state",
21
- "input":"<?\u000d\u000a\u000a",
22
- "output":[["Comment", "?\u000a\u000a"]],
23
- "errors":[
24
- { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
25
- ]
26
- },
27
- {
28
- "description":"Raw NUL replacement",
29
- "doubleEscaped":true,
30
- "initialStates":["RCDATA state", "RAWTEXT state", "PLAINTEXT state", "Script data state"],
31
- "input":"\\u0000",
32
- "output":[["Character", "\\uFFFD"]],
33
- "errors":[
34
- { "code": "unexpected-null-character", "line": 1, "col": 1 }
35
- ]
36
- },
37
- {
38
- "description":"NUL in CDATA section",
39
- "doubleEscaped":true,
40
- "initialStates":["CDATA section state"],
41
- "input":"\\u0000]]>",
42
- "output":[["Character", "\\u0000"]]
43
- },
44
- {
45
- "description":"NUL in script HTML comment",
46
- "doubleEscaped":true,
47
- "initialStates":["Script data state"],
48
- "input":"<!--test\\u0000--><!--test-\\u0000--><!--test--\\u0000-->",
49
- "output":[["Character", "<!--test\\uFFFD--><!--test-\\uFFFD--><!--test--\\uFFFD-->"]],
50
- "errors":[
51
- { "code": "unexpected-null-character", "line": 1, "col": 9 },
52
- { "code": "unexpected-null-character", "line": 1, "col": 22 },
53
- { "code": "unexpected-null-character", "line": 1, "col": 36 }
54
- ]
55
- },
56
- {
57
- "description":"NUL in script HTML comment - double escaped",
58
- "doubleEscaped":true,
59
- "initialStates":["Script data state"],
60
- "input":"<!--<script>\\u0000--><!--<script>-\\u0000--><!--<script>--\\u0000-->",
61
- "output":[["Character", "<!--<script>\\uFFFD--><!--<script>-\\uFFFD--><!--<script>--\\uFFFD-->"]],
62
- "errors":[
63
- { "code": "unexpected-null-character", "line": 1, "col": 13 },
64
- { "code": "unexpected-null-character", "line": 1, "col": 30 },
65
- { "code": "unexpected-null-character", "line": 1, "col": 48 }
66
- ]
67
- },
68
- {
69
- "description":"EOF in script HTML comment",
70
- "initialStates":["Script data state"],
71
- "input":"<!--test",
72
- "output":[["Character", "<!--test"]],
73
- "errors":[
74
- { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 9 }
75
- ]
76
- },
77
- {
78
- "description":"EOF in script HTML comment after dash",
79
- "initialStates":["Script data state"],
80
- "input":"<!--test-",
81
- "output":[["Character", "<!--test-"]],
82
- "errors":[
83
- { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 10 }
84
- ]
85
- },
86
- {
87
- "description":"EOF in script HTML comment after dash dash",
88
- "initialStates":["Script data state"],
89
- "input":"<!--test--",
90
- "output":[["Character", "<!--test--"]],
91
- "errors":[
92
- { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 11 }
93
- ]
94
- },
95
- {
96
- "description":"EOF in script HTML comment double escaped after dash",
97
- "initialStates":["Script data state"],
98
- "input":"<!--<script>-",
99
- "output":[["Character", "<!--<script>-"]],
100
- "errors":[
101
- { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 14 }
102
- ]
103
- },
104
- {
105
- "description":"EOF in script HTML comment double escaped after dash dash",
106
- "initialStates":["Script data state"],
107
- "input":"<!--<script>--",
108
- "output":[["Character", "<!--<script>--"]],
109
- "errors":[
110
- { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 15 }
111
- ]
112
- },
113
- {
114
- "description":"EOF in script HTML comment - double escaped",
115
- "initialStates":["Script data state"],
116
- "input":"<!--<script>",
117
- "output":[["Character", "<!--<script>"]],
118
- "errors":[
119
- { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 13 }
120
- ]
121
- },
122
- {
123
- "description":"Dash in script HTML comment",
124
- "initialStates":["Script data state"],
125
- "input":"<!-- - -->",
126
- "output":[["Character", "<!-- - -->"]]
127
- },
128
- {
129
- "description":"Dash less-than in script HTML comment",
130
- "initialStates":["Script data state"],
131
- "input":"<!-- -< -->",
132
- "output":[["Character", "<!-- -< -->"]]
133
- },
134
- {
135
- "description":"Dash at end of script HTML comment",
136
- "initialStates":["Script data state"],
137
- "input":"<!--test--->",
138
- "output":[["Character", "<!--test--->"]]
139
- },
140
- {
141
- "description":"</script> in script HTML comment",
142
- "initialStates":["Script data state"],
143
- "lastStartTag":"script",
144
- "input":"<!-- </script> --></script>",
145
- "output":[["Character", "<!-- "], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
146
- },
147
- {
148
- "description":"</script> in script HTML comment - double escaped",
149
- "initialStates":["Script data state"],
150
- "lastStartTag":"script",
151
- "input":"<!-- <script></script> --></script>",
152
- "output":[["Character", "<!-- <script></script> -->"], ["EndTag", "script"]]
153
- },
154
- {
155
- "description":"</script> in script HTML comment - double escaped with nested <script>",
156
- "initialStates":["Script data state"],
157
- "lastStartTag":"script",
158
- "input":"<!-- <script><script></script></script> --></script>",
159
- "output":[["Character", "<!-- <script><script></script>"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
160
- },
161
- {
162
- "description":"</script> in script HTML comment - double escaped with abrupt end",
163
- "initialStates":["Script data state"],
164
- "lastStartTag":"script",
165
- "input":"<!-- <script>--></script> --></script>",
166
- "output":[["Character", "<!-- <script>-->"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
167
- },
168
- {
169
- "description":"Incomplete start tag in script HTML comment double escaped",
170
- "initialStates":["Script data state"],
171
- "lastStartTag":"script",
172
- "input":"<!--<scrip></script>-->",
173
- "output":[["Character", "<!--<scrip>"], ["EndTag", "script"], ["Character", "-->"]]
174
- },
175
- {
176
- "description":"Unclosed start tag in script HTML comment double escaped",
177
- "initialStates":["Script data state"],
178
- "lastStartTag":"script",
179
- "input":"<!--<script</script>-->",
180
- "output":[["Character", "<!--<script"], ["EndTag", "script"], ["Character", "-->"]]
181
- },
182
- {
183
- "description":"Incomplete end tag in script HTML comment double escaped",
184
- "initialStates":["Script data state"],
185
- "lastStartTag":"script",
186
- "input":"<!--<script></scrip>-->",
187
- "output":[["Character", "<!--<script></scrip>-->"]]
188
- },
189
- {
190
- "description":"Unclosed end tag in script HTML comment double escaped",
191
- "initialStates":["Script data state"],
192
- "lastStartTag":"script",
193
- "input":"<!--<script></script-->",
194
- "output":[["Character", "<!--<script></script-->"]]
195
- },
196
- {
197
- "description":"leading U+FEFF must pass through",
198
- "initialStates":["Data state", "RCDATA state", "RAWTEXT state", "Script data state"],
199
- "doubleEscaped":true,
200
- "input":"\\uFEFFfoo\\uFEFFbar",
201
- "output":[["Character", "\\uFEFFfoo\\uFEFFbar"]]
202
- },
203
- {
204
- "description":"Non BMP-charref in RCDATA",
205
- "initialStates":["RCDATA state"],
206
- "input":"&NotEqualTilde;",
207
- "output":[["Character", "\u2242\u0338"]]
208
- },
209
- {
210
- "description":"Bad charref in RCDATA",
211
- "initialStates":["RCDATA state"],
212
- "input":"&NotEqualTild;",
213
- "output":[["Character", "&NotEqualTild;"]],
214
- "errors":[
215
- { "code": "unknown-named-character-reference", "line": 1, "col": 14 }
216
- ]
217
- },
218
- {
219
- "description":"lowercase endtags",
220
- "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
221
- "lastStartTag":"xmp",
222
- "input":"</XMP>",
223
- "output":[["EndTag","xmp"]]
224
- },
225
- {
226
- "description":"bad endtag (space before name)",
227
- "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
228
- "lastStartTag":"xmp",
229
- "input":"</ XMP>",
230
- "output":[["Character","</ XMP>"]]
231
- },
232
- {
233
- "description":"bad endtag (not matching last start tag)",
234
- "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
235
- "lastStartTag":"xmp",
236
- "input":"</xm>",
237
- "output":[["Character","</xm>"]]
238
- },
239
- {
240
- "description":"bad endtag (without close bracket)",
241
- "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
242
- "lastStartTag":"xmp",
243
- "input":"</xm ",
244
- "output":[["Character","</xm "]]
245
- },
246
- {
247
- "description":"bad endtag (trailing solidus)",
248
- "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
249
- "lastStartTag":"xmp",
250
- "input":"</xm/",
251
- "output":[["Character","</xm/"]]
252
- },
253
- {
254
- "description":"Non BMP-charref in attribute",
255
- "input":"<p id=\"&NotEqualTilde;\">",
256
- "output":[["StartTag", "p", {"id":"\u2242\u0338"}]]
257
- },
258
- {
259
- "description":"--!NUL in comment ",
260
- "doubleEscaped":true,
261
- "input":"<!----!\\u0000-->",
262
- "output":[["Comment", "--!\\uFFFD"]],
263
- "errors":[
264
- { "code": "unexpected-null-character", "line": 1, "col": 8 }
265
- ]
266
- },
267
- {
268
- "description":"space EOF after doctype ",
269
- "input":"<!DOCTYPE html ",
270
- "output":[["DOCTYPE", "html", null, null , false]],
271
- "errors":[
272
- { "code": "eof-in-doctype", "line": 1, "col": 16 }
273
- ]
274
- },
275
- {
276
- "description":"CDATA in HTML content",
277
- "input":"<![CDATA[foo]]>",
278
- "output":[["Comment", "[CDATA[foo]]"]],
279
- "errors":[
280
- { "code": "cdata-in-html-content", "line": 1, "col": 9 }
281
- ]
282
- },
283
- {
284
- "description":"CDATA content",
285
- "input":"foo&#32;]]>",
286
- "initialStates":["CDATA section state"],
287
- "output":[["Character", "foo&#32;"]]
288
- },
289
- {
290
- "description":"CDATA followed by HTML content",
291
- "input":"foo&#32;]]>&#32;",
292
- "initialStates":["CDATA section state"],
293
- "output":[["Character", "foo&#32; "]]
294
- },
295
- {
296
- "description":"CDATA with extra bracket",
297
- "input":"foo]]]>",
298
- "initialStates":["CDATA section state"],
299
- "output":[["Character", "foo]"]]
300
- },
301
- {
302
- "description":"CDATA without end marker",
303
- "input":"foo",
304
- "initialStates":["CDATA section state"],
305
- "output":[["Character", "foo"]],
306
- "errors":[
307
- { "code": "eof-in-cdata", "line": 1, "col": 4 }
308
- ]
309
- },
310
- {
311
- "description":"CDATA with single bracket ending",
312
- "input":"foo]",
313
- "initialStates":["CDATA section state"],
314
- "output":[["Character", "foo]"]],
315
- "errors":[
316
- { "code": "eof-in-cdata", "line": 1, "col": 5 }
317
- ]
318
- },
319
- {
320
- "description":"CDATA with two brackets ending",
321
- "input":"foo]]",
322
- "initialStates":["CDATA section state"],
323
- "output":[["Character", "foo]]"]],
324
- "errors":[
325
- { "code": "eof-in-cdata", "line": 1, "col": 6 }
326
- ]
327
- },
328
- {
329
- "description": "HTML tag in script data",
330
- "input": "<b>hello world</b>",
331
- "initialStates": ["Script data state"],
332
- "output": [["Character", "<b>hello world</b>"]]
333
- }
334
- ]
335
- }