html-to-markdown 2.24.6 → 2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  6. data/lib/html_to_markdown/version.rb +1 -1
  7. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  8. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  9. data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
  10. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  11. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
  12. data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
  13. data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
  14. data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
  15. data/rust-vendor/memmap2/CHANGELOG.md +8 -0
  16. data/rust-vendor/memmap2/Cargo.lock +1 -1
  17. data/rust-vendor/memmap2/Cargo.toml +2 -1
  18. data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
  19. data/rust-vendor/memmap2/src/lib.rs +25 -1
  20. data/rust-vendor/memmap2/src/stub.rs +1 -4
  21. data/rust-vendor/memmap2/src/unix.rs +14 -1
  22. data/rust-vendor/png/.cargo-checksum.json +1 -1
  23. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  24. data/rust-vendor/png/CHANGES.md +44 -0
  25. data/rust-vendor/png/Cargo.lock +124 -171
  26. data/rust-vendor/png/Cargo.toml +1 -1
  27. data/rust-vendor/png/Cargo.toml.orig +1 -1
  28. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  29. data/rust-vendor/png/benches/unfilter.rs +3 -3
  30. data/rust-vendor/png/src/adam7.rs +17 -10
  31. data/rust-vendor/png/src/common.rs +8 -8
  32. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  33. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  34. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  35. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  36. data/rust-vendor/png/src/encoder.rs +4 -2
  37. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  38. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  39. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  40. data/rust-vendor/png/src/filter/simd.rs +308 -0
  41. data/rust-vendor/png/src/lib.rs +1 -0
  42. data/rust-vendor/syn/.cargo-checksum.json +1 -1
  43. data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
  44. data/rust-vendor/syn/Cargo.lock +40 -41
  45. data/rust-vendor/syn/Cargo.toml +1 -1
  46. data/rust-vendor/syn/Cargo.toml.orig +1 -1
  47. data/rust-vendor/syn/src/item.rs +61 -40
  48. data/rust-vendor/syn/src/lib.rs +2 -1
  49. data/rust-vendor/syn/tests/test_item.rs +54 -0
  50. data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
  51. data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
  52. data/rust-vendor/unicode-ident/Cargo.lock +21 -21
  53. data/rust-vendor/unicode-ident/Cargo.toml +1 -1
  54. data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
  55. data/rust-vendor/unicode-ident/src/lib.rs +1 -1
  56. data/rust-vendor/unicode-ident/src/tables.rs +87 -97
  57. data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
  58. metadata +7 -177
  59. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  60. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  61. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  62. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  63. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  64. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  65. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  66. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  67. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  68. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  69. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  70. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  71. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  72. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  153. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  154. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  155. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  156. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  157. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  158. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  159. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  160. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  161. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  162. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  163. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  164. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  165. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  166. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  167. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  168. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  169. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  170. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  171. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  172. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  173. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  174. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  175. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  176. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  177. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  178. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  179. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  180. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  181. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  182. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  183. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  184. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  185. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  186. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  187. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  188. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  189. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  190. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  191. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  192. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  193. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  194. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  195. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  196. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  197. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  198. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  199. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  200. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  201. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  202. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  203. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  204. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  205. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  206. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  207. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  208. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  209. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  210. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  211. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  212. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  213. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  214. data/rust-vendor/xml5ever/README.md +0 -72
  215. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  216. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  217. data/rust-vendor/xml5ever/examples/README.md +0 -223
  218. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  219. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  220. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  221. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  222. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  223. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  224. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  225. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  226. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  227. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  228. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  229. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  230. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  231. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,532 +0,0 @@
1
- {"tests": [
2
-
3
- {"description":"< in attribute name",
4
- "input":"<z/0 <>",
5
- "output":[["StartTag", "z", {"0": "", "<": ""}]],
6
- "errors":[
7
- { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 },
8
- { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
9
- ]},
10
-
11
- {"description":"< in unquoted attribute value",
12
- "input":"<z x=<>",
13
- "output":[["StartTag", "z", {"x": "<"}]],
14
- "errors":[
15
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
16
- ]},
17
-
18
- {"description":"= in unquoted attribute value",
19
- "input":"<z z=z=z>",
20
- "output":[["StartTag", "z", {"z": "z=z"}]],
21
- "errors":[
22
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
23
- ]},
24
-
25
- {"description":"= attribute",
26
- "input":"<z =>",
27
- "output":[["StartTag", "z", {"=": ""}]],
28
- "errors":[
29
- { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }
30
- ]},
31
-
32
- {"description":"== attribute",
33
- "input":"<z ==>",
34
- "output":[["StartTag", "z", {"=": ""}]],
35
- "errors":[
36
- { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
37
- { "code": "missing-attribute-value", "line": 1, "col": 6 }
38
- ]},
39
-
40
- {"description":"=== attribute",
41
- "input":"<z ===>",
42
- "output":[["StartTag", "z", {"=": "="}]],
43
- "errors":[
44
- { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
45
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
46
- ]},
47
-
48
- {"description":"==== attribute",
49
- "input":"<z ====>",
50
- "output":[["StartTag", "z", {"=": "=="}]],
51
- "errors":[
52
- { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
53
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 },
54
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
55
- ]},
56
-
57
- {"description":"\" after ampersand in double-quoted attribute value",
58
- "input":"<z z=\"&\">",
59
- "output":[["StartTag", "z", {"z": "&"}]]},
60
-
61
- {"description":"' after ampersand in double-quoted attribute value",
62
- "input":"<z z=\"&'\">",
63
- "output":[["StartTag", "z", {"z": "&'"}]]},
64
-
65
- {"description":"' after ampersand in single-quoted attribute value",
66
- "input":"<z z='&'>",
67
- "output":[["StartTag", "z", {"z": "&"}]]},
68
-
69
- {"description":"\" after ampersand in single-quoted attribute value",
70
- "input":"<z z='&\"'>",
71
- "output":[["StartTag", "z", {"z": "&\""}]]},
72
-
73
- {"description":"Text after bogus character reference",
74
- "input":"<z z='&xlink_xmlns;'>bar<z>",
75
- "output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
76
-
77
- {"description":"Text after hex character reference",
78
- "input":"<z z='&#x0020; foo'>bar<z>",
79
- "output":[["StartTag","z",{"z":" foo"}],["Character","bar"],["StartTag","z",{}]]},
80
-
81
- {"description":"Attribute name starting with \"",
82
- "input":"<foo \"='bar'>",
83
- "output":[["StartTag", "foo", {"\"": "bar"}]],
84
- "errors":[
85
- { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
86
- ]},
87
-
88
- {"description":"Attribute name starting with '",
89
- "input":"<foo '='bar'>",
90
- "output":[["StartTag", "foo", {"'": "bar"}]],
91
- "errors":[
92
- { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
93
- ]},
94
-
95
- {"description":"Attribute name containing \"",
96
- "input":"<foo a\"b='bar'>",
97
- "output":[["StartTag", "foo", {"a\"b": "bar"}]],
98
- "errors":[
99
- { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
100
- ]},
101
-
102
- {"description":"Attribute name containing '",
103
- "input":"<foo a'b='bar'>",
104
- "output":[["StartTag", "foo", {"a'b": "bar"}]],
105
- "errors":[
106
- { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
107
- ]},
108
-
109
- {"description":"Unquoted attribute value containing '",
110
- "input":"<foo a=b'c>",
111
- "output":[["StartTag", "foo", {"a": "b'c"}]],
112
- "errors":[
113
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 }
114
- ]},
115
-
116
-
117
- {"description":"Unquoted attribute value containing \"",
118
- "input":"<foo a=b\"c>",
119
- "output":[["StartTag", "foo", {"a": "b\"c"}]],
120
- "errors":[
121
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 }
122
- ]},
123
-
124
- {"description":"Double-quoted attribute value not followed by whitespace",
125
- "input":"<foo a=\"b\"c>",
126
- "output":[["StartTag", "foo", {"a": "b", "c": ""}]],
127
- "errors":[
128
- { "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 }
129
- ]},
130
-
131
- {"description":"Single-quoted attribute value not followed by whitespace",
132
- "input":"<foo a='b'c>",
133
- "output":[["StartTag", "foo", {"a": "b", "c": ""}]],
134
- "errors":[
135
- { "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 }
136
- ]},
137
-
138
- {"description":"Quoted attribute followed by permitted /",
139
- "input":"<br a='b'/>",
140
- "output":[["StartTag","br",{"a":"b"},true]]},
141
-
142
- {"description":"Quoted attribute followed by non-permitted /",
143
- "input":"<bar a='b'/>",
144
- "output":[["StartTag","bar",{"a":"b"},true]]},
145
-
146
- {"description":"CR EOF after doctype name",
147
- "input":"<!doctype html \r",
148
- "output":[["DOCTYPE", "html", null, null, false]],
149
- "errors":[
150
- { "code": "eof-in-doctype", "line": 2, "col": 1 }
151
- ]},
152
-
153
- {"description":"CR EOF in tag name",
154
- "input":"<z\r",
155
- "output":[],
156
- "errors":[
157
- { "code": "eof-in-tag", "line": 2, "col": 1 }
158
- ]},
159
-
160
- {"description":"Slash EOF in tag name",
161
- "input":"<z/",
162
- "output":[],
163
- "errors":[
164
- { "code": "eof-in-tag", "line": 1, "col": 4 }
165
- ]},
166
-
167
- {"description":"Zero hex numeric entity",
168
- "input":"&#x0",
169
- "output":[["Character", "\uFFFD"]],
170
- "errors":[
171
- { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 },
172
- { "code": "null-character-reference", "line": 1, "col": 5 }
173
- ]},
174
-
175
- {"description":"Zero decimal numeric entity",
176
- "input":"&#0",
177
- "output":[["Character", "\uFFFD"]],
178
- "errors":[
179
- { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 4 },
180
- { "code": "null-character-reference", "line": 1, "col": 4 }
181
- ]},
182
-
183
- {"description":"Zero-prefixed hex numeric entity",
184
- "input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
185
- "output":[["Character", "A"]]},
186
-
187
- {"description":"Zero-prefixed decimal numeric entity",
188
- "input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
189
- "output":[["Character", "A"]]},
190
-
191
- {"description":"Empty hex numeric entities",
192
- "input":"&#x &#X ",
193
- "output":[["Character", "&#x &#X "]],
194
- "errors":[
195
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 },
196
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 8 }
197
- ]},
198
-
199
- {"description":"Invalid digit in hex numeric entity",
200
- "input":"&#xZ",
201
- "output":[["Character", "&#xZ"]],
202
- "errors":[
203
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 }
204
- ]},
205
-
206
- {"description":"Empty decimal numeric entities",
207
- "input":"&# &#; ",
208
- "output":[["Character", "&# &#; "]],
209
- "errors":[
210
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 },
211
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 6 }
212
- ]},
213
-
214
- {"description":"Invalid digit in decimal numeric entity",
215
- "input":"&#A",
216
- "output":[["Character", "&#A"]],
217
- "errors":[
218
- { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 }
219
- ]},
220
-
221
- {"description":"Non-BMP numeric entity",
222
- "input":"&#x10000;",
223
- "output":[["Character", "\uD800\uDC00"]]},
224
-
225
- {"description":"Maximum non-BMP numeric entity",
226
- "input":"&#X10FFFF;",
227
- "output":[["Character", "\uDBFF\uDFFF"]],
228
- "errors":[
229
- { "code": "noncharacter-character-reference", "line": 1, "col": 11 }
230
- ]},
231
-
232
-
233
- {"description":"Above maximum numeric entity",
234
- "input":"&#x110000;",
235
- "output":[["Character", "\uFFFD"]],
236
- "errors":[
237
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 11 }
238
- ]},
239
-
240
- {"description":"32-bit hex numeric entity",
241
- "input":"&#x80000041;",
242
- "output":[["Character", "\uFFFD"]],
243
- "errors":[
244
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 13 }
245
- ]},
246
-
247
- {"description":"33-bit hex numeric entity",
248
- "input":"&#x100000041;",
249
- "output":[["Character", "\uFFFD"]],
250
- "errors":[
251
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
252
- ]},
253
-
254
- {"description":"33-bit decimal numeric entity",
255
- "input":"&#4294967361;",
256
- "output":[["Character", "\uFFFD"]],
257
- "errors":[
258
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
259
- ]},
260
-
261
- {"description":"65-bit hex numeric entity",
262
- "input":"&#x10000000000000041;",
263
- "output":[["Character", "\uFFFD"]],
264
- "errors":[
265
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 22 }
266
- ]},
267
-
268
- {"description":"65-bit decimal numeric entity",
269
- "input":"&#18446744073709551681;",
270
- "output":[["Character", "\uFFFD"]],
271
- "errors":[
272
- { "code": "character-reference-outside-unicode-range", "line": 1, "col": 24 }
273
- ]},
274
-
275
- {"description":"Surrogate code point edge cases",
276
- "input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
277
- "output":[["Character", "\uD7FF\uFFFD\uFFFD\uFFFD\uFFFD\uE000"]],
278
- "errors":[
279
- { "code": "surrogate-character-reference", "line": 1, "col": 17 },
280
- { "code": "surrogate-character-reference", "line": 1, "col": 25 },
281
- { "code": "surrogate-character-reference", "line": 1, "col": 33 },
282
- { "code": "surrogate-character-reference", "line": 1, "col": 41 }
283
- ]},
284
-
285
- {"description":"Uppercase start tag name",
286
- "input":"<X>",
287
- "output":[["StartTag", "x", {}]]},
288
-
289
- {"description":"Uppercase end tag name",
290
- "input":"</X>",
291
- "output":[["EndTag", "x"]]},
292
-
293
- {"description":"Uppercase attribute name",
294
- "input":"<x X>",
295
- "output":[["StartTag", "x", { "x":"" }]]},
296
-
297
- {"description":"Tag/attribute name case edge values",
298
- "input":"<x@AZ[`az{ @AZ[`az{>",
299
- "output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
300
-
301
- {"description":"Duplicate different-case attributes",
302
- "input":"<x x=1 x=2 X=3>",
303
- "output":[["StartTag", "x", { "x":"1" }]],
304
- "errors":[
305
- { "code": "duplicate-attribute", "line": 1, "col": 9 },
306
- { "code": "duplicate-attribute", "line": 1, "col": 13 }
307
- ]},
308
-
309
- {"description":"Uppercase close tag attributes",
310
- "input":"</x X>",
311
- "output":[["EndTag", "x"]],
312
- "errors":[
313
- { "code": "end-tag-with-attributes", "line": 1, "col": 6 }
314
- ]},
315
-
316
- {"description":"Duplicate close tag attributes",
317
- "input":"</x x x>",
318
- "output":[["EndTag", "x"]],
319
- "errors":[
320
- { "code": "duplicate-attribute", "line": 1, "col": 8 },
321
- { "code": "end-tag-with-attributes", "line": 1, "col": 8 }
322
- ]},
323
-
324
- {"description":"Permitted slash",
325
- "input":"<br/>",
326
- "output":[["StartTag","br",{},true]]},
327
-
328
- {"description":"Non-permitted slash",
329
- "input":"<xr/>",
330
- "output":[["StartTag","xr",{},true]]},
331
-
332
- {"description":"Permitted slash but in close tag",
333
- "input":"</br/>",
334
- "output":[["EndTag", "br"]],
335
- "errors":[
336
- { "code": "end-tag-with-trailing-solidus", "line": 1, "col": 6 }
337
- ]},
338
-
339
- {"description":"Doctype public case-sensitivity (1)",
340
- "input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
341
- "output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
342
-
343
- {"description":"Doctype public case-sensitivity (2)",
344
- "input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
345
- "output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
346
-
347
- {"description":"Doctype system case-sensitivity (1)",
348
- "input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
349
- "output":[["DOCTYPE", "html", null, "XyZ", true]]},
350
-
351
- {"description":"Doctype system case-sensitivity (2)",
352
- "input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
353
- "output":[["DOCTYPE", "html", null, "xYz", true]]},
354
-
355
- {"description":"U+0000 in lookahead region after non-matching character",
356
- "input":"<!doc>\u0000",
357
- "output":[["Comment", "doc"], ["Character", "\u0000"]],
358
- "errors":[
359
- { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
360
- { "code": "unexpected-null-character", "line": 1, "col": 7 }
361
- ]},
362
-
363
- {"description":"U+0000 in lookahead region",
364
- "input":"<!doc\u0000",
365
- "output":[["Comment", "doc\uFFFD"]],
366
- "errors":[
367
- { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
368
- { "code": "unexpected-null-character", "line": 1, "col": 6 }
369
- ]},
370
-
371
- {"description":"U+0080 in lookahead region",
372
- "input":"<!doc\u0080",
373
- "output":[["Comment", "doc\u0080"]],
374
- "errors":[
375
- { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
376
- { "code": "control-character-in-input-stream", "line": 1, "col": 6 }
377
- ]},
378
-
379
- {"description":"U+FDD1 in lookahead region",
380
- "input":"<!doc\uFDD1",
381
- "output":[["Comment", "doc\uFDD1"]],
382
- "errors":[
383
- { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
384
- { "code": "noncharacter-in-input-stream", "line": 1, "col": 6 }
385
- ]},
386
-
387
- {"description":"U+1FFFF in lookahead region",
388
- "input":"<!doc\uD83F\uDFFF",
389
- "output":[["Comment", "doc\uD83F\uDFFF"]],
390
- "errors":[
391
- { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
392
- { "code": "noncharacter-in-input-stream", "line": 1, "col": 6 }
393
- ]},
394
-
395
- {"description":"CR followed by non-LF",
396
- "input":"\r?",
397
- "output":[["Character", "\n?"]]},
398
-
399
- {"description":"CR at EOF",
400
- "input":"\r",
401
- "output":[["Character", "\n"]]},
402
-
403
- {"description":"LF at EOF",
404
- "input":"\n",
405
- "output":[["Character", "\n"]]},
406
-
407
- {"description":"CR LF",
408
- "input":"\r\n",
409
- "output":[["Character", "\n"]]},
410
-
411
- {"description":"CR CR",
412
- "input":"\r\r",
413
- "output":[["Character", "\n\n"]]},
414
-
415
- {"description":"LF LF",
416
- "input":"\n\n",
417
- "output":[["Character", "\n\n"]]},
418
-
419
- {"description":"LF CR",
420
- "input":"\n\r",
421
- "output":[["Character", "\n\n"]]},
422
-
423
- {"description":"text CR CR CR text",
424
- "input":"text\r\r\rtext",
425
- "output":[["Character", "text\n\n\ntext"]]},
426
-
427
- {"description":"Doctype publik",
428
- "input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
429
- "output":[["DOCTYPE", "html", null, null, false]],
430
- "errors":[
431
- { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
432
- ]},
433
-
434
- {"description":"Doctype publi",
435
- "input":"<!DOCTYPE html PUBLI",
436
- "output":[["DOCTYPE", "html", null, null, false]],
437
- "errors":[
438
- { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
439
- ]},
440
-
441
- {"description":"Doctype sistem",
442
- "input":"<!DOCTYPE html SISTEM \"AbC\">",
443
- "output":[["DOCTYPE", "html", null, null, false]],
444
- "errors":[
445
- { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
446
- ]},
447
-
448
- {"description":"Doctype sys",
449
- "input":"<!DOCTYPE html SYS",
450
- "output":[["DOCTYPE", "html", null, null, false]],
451
- "errors":[
452
- { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
453
- ]},
454
-
455
- {"description":"Doctype html x>text",
456
- "input":"<!DOCTYPE html x>text",
457
- "output":[["DOCTYPE", "html", null, null, false], ["Character", "text"]],
458
- "errors":[
459
- { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
460
- ]},
461
-
462
- {"description":"Grave accent in unquoted attribute",
463
- "input":"<a a=aa`>",
464
- "output":[["StartTag", "a", {"a":"aa`"}]],
465
- "errors":[
466
- { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 8 }
467
- ]},
468
-
469
- {"description":"EOF in tag name state ",
470
- "input":"<a",
471
- "output":[],
472
- "errors": [
473
- { "code": "eof-in-tag", "line": 1, "col": 3 }
474
- ]},
475
-
476
- {"description":"EOF in before attribute name state",
477
- "input":"<a ",
478
- "output":[],
479
- "errors":[
480
- { "code": "eof-in-tag", "line": 1, "col": 4 }
481
- ]},
482
-
483
- {"description":"EOF in attribute name state",
484
- "input":"<a a",
485
- "output":[],
486
- "errors":[
487
- { "code": "eof-in-tag", "line": 1, "col": 5 }
488
- ]},
489
-
490
- {"description":"EOF in after attribute name state",
491
- "input":"<a a ",
492
- "output":[],
493
- "errors":[
494
- { "code": "eof-in-tag", "line": 1, "col": 6 }
495
- ]},
496
-
497
- {"description":"EOF in before attribute value state",
498
- "input":"<a a =",
499
- "output":[],
500
- "errors":[
501
- { "code": "eof-in-tag", "line": 1, "col": 7 }
502
- ]},
503
-
504
- {"description":"EOF in attribute value (double quoted) state",
505
- "input":"<a a =\"a",
506
- "output":[],
507
- "errors":[
508
- { "code": "eof-in-tag", "line": 1, "col": 9 }
509
- ]},
510
-
511
- {"description":"EOF in attribute value (single quoted) state",
512
- "input":"<a a ='a",
513
- "output":[],
514
- "errors":[
515
- { "code": "eof-in-tag", "line": 1, "col": 9 }
516
- ]},
517
-
518
- {"description":"EOF in attribute value (unquoted) state",
519
- "input":"<a a =a",
520
- "output":[],
521
- "errors":[
522
- { "code": "eof-in-tag", "line": 1, "col": 8 }
523
- ]},
524
-
525
- {"description":"EOF in after attribute value state",
526
- "input":"<a a ='a'",
527
- "output":[],
528
- "errors":[
529
- { "code": "eof-in-tag", "line": 1, "col": 10 }
530
- ]}
531
-
532
- ]}