html-to-markdown 2.24.6 → 2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  6. data/lib/html_to_markdown/version.rb +1 -1
  7. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  8. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  9. data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
  10. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  11. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
  12. data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
  13. data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
  14. data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
  15. data/rust-vendor/memmap2/CHANGELOG.md +8 -0
  16. data/rust-vendor/memmap2/Cargo.lock +1 -1
  17. data/rust-vendor/memmap2/Cargo.toml +2 -1
  18. data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
  19. data/rust-vendor/memmap2/src/lib.rs +25 -1
  20. data/rust-vendor/memmap2/src/stub.rs +1 -4
  21. data/rust-vendor/memmap2/src/unix.rs +14 -1
  22. data/rust-vendor/png/.cargo-checksum.json +1 -1
  23. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  24. data/rust-vendor/png/CHANGES.md +44 -0
  25. data/rust-vendor/png/Cargo.lock +124 -171
  26. data/rust-vendor/png/Cargo.toml +1 -1
  27. data/rust-vendor/png/Cargo.toml.orig +1 -1
  28. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  29. data/rust-vendor/png/benches/unfilter.rs +3 -3
  30. data/rust-vendor/png/src/adam7.rs +17 -10
  31. data/rust-vendor/png/src/common.rs +8 -8
  32. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  33. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  34. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  35. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  36. data/rust-vendor/png/src/encoder.rs +4 -2
  37. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  38. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  39. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  40. data/rust-vendor/png/src/filter/simd.rs +308 -0
  41. data/rust-vendor/png/src/lib.rs +1 -0
  42. data/rust-vendor/syn/.cargo-checksum.json +1 -1
  43. data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
  44. data/rust-vendor/syn/Cargo.lock +40 -41
  45. data/rust-vendor/syn/Cargo.toml +1 -1
  46. data/rust-vendor/syn/Cargo.toml.orig +1 -1
  47. data/rust-vendor/syn/src/item.rs +61 -40
  48. data/rust-vendor/syn/src/lib.rs +2 -1
  49. data/rust-vendor/syn/tests/test_item.rs +54 -0
  50. data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
  51. data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
  52. data/rust-vendor/unicode-ident/Cargo.lock +21 -21
  53. data/rust-vendor/unicode-ident/Cargo.toml +1 -1
  54. data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
  55. data/rust-vendor/unicode-ident/src/lib.rs +1 -1
  56. data/rust-vendor/unicode-ident/src/tables.rs +87 -97
  57. data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
  58. metadata +7 -177
  59. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  60. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  61. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  62. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  63. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  64. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  65. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  66. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  67. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  68. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  69. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  70. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  71. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  72. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  153. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  154. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  155. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  156. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  157. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  158. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  159. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  160. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  161. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  162. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  163. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  164. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  165. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  166. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  167. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  168. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  169. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  170. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  171. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  172. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  173. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  174. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  175. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  176. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  177. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  178. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  179. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  180. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  181. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  182. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  183. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  184. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  185. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  186. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  187. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  188. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  189. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  190. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  191. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  192. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  193. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  194. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  195. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  196. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  197. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  198. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  199. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  200. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  201. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  202. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  203. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  204. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  205. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  206. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  207. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  208. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  209. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  210. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  211. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  212. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  213. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  214. data/rust-vendor/xml5ever/README.md +0 -72
  215. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  216. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  217. data/rust-vendor/xml5ever/examples/README.md +0 -223
  218. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  219. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  220. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  221. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  222. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  223. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  224. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  225. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  226. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  227. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  228. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  229. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  230. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  231. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,41 +0,0 @@
1
- {"tests" : [
2
- {"description": "Invalid Unicode character U+DFFF",
3
- "doubleEscaped":true,
4
- "input": "\\uDFFF",
5
- "output":[["Character", "\\uDFFF"]],
6
- "errors":[
7
- { "code": "surrogate-in-input-stream", "line": 1, "col": 1 }
8
- ]},
9
-
10
- {"description": "Invalid Unicode character U+D800",
11
- "doubleEscaped":true,
12
- "input": "\\uD800",
13
- "output":[["Character", "\\uD800"]],
14
- "errors":[
15
- { "code": "surrogate-in-input-stream", "line": 1, "col": 1 }
16
- ]},
17
-
18
- {"description": "Invalid Unicode character U+DFFF with valid preceding character",
19
- "doubleEscaped":true,
20
- "input": "a\\uDFFF",
21
- "output":[["Character", "a\\uDFFF"]],
22
- "errors":[
23
- { "code": "surrogate-in-input-stream", "line": 1, "col": 2 }
24
- ]},
25
-
26
- {"description": "Invalid Unicode character U+D800 with valid following character",
27
- "doubleEscaped":true,
28
- "input": "\\uD800a",
29
- "output":[["Character", "\\uD800a"]],
30
- "errors":[
31
- { "code": "surrogate-in-input-stream", "line": 1, "col": 1 }
32
- ]},
33
-
34
- {"description":"CR followed by U+0000",
35
- "input":"\r\u0000",
36
- "output":[["Character", "\n\u0000"]],
37
- "errors":[
38
- { "code": "unexpected-null-character", "line": 2, "col": 1 }
39
- ]}
40
- ]
41
- }
@@ -1,20 +0,0 @@
1
- {"xmlViolationTests": [
2
-
3
- {"description":"Non-XML character",
4
- "input":"a\uFFFFb",
5
- "output":[["Character","a\uFFFDb"]]},
6
-
7
- {"description":"Non-XML space",
8
- "input":"a\u000Cb",
9
- "output":[["Character","a b"]]},
10
-
11
- {"description":"Double hyphen in comment",
12
- "input":"<!-- foo -- bar -->",
13
- "output":[["Comment"," foo - - bar "]]},
14
-
15
- {"description":"FF between attributes",
16
- "input":"<a b=''\u000Cc=''>",
17
- "output":[["StartTag","a",{"b":"","c":""}]]}
18
- ]}
19
-
20
-
@@ -1,108 +0,0 @@
1
- Tree Construction Tests
2
- =======================
3
-
4
- Each file containing tree construction tests consists of any number of
5
- tests separated by two newlines (LF) and a single newline before the end
6
- of the file. For instance:
7
-
8
- [TEST]LF
9
- LF
10
- [TEST]LF
11
- LF
12
- [TEST]LF
13
-
14
- Where [TEST] is the following format:
15
-
16
- Each test must begin with a string "\#data" followed by a newline (LF).
17
- All subsequent lines until a line that says "\#errors" are the test data
18
- and must be passed to the system being tested unchanged, except with the
19
- final newline (on the last line) removed.
20
-
21
- Then there must be a line that says "\#errors". It must be followed by
22
- one line per parse error that a conformant checker would return. It
23
- doesn't matter what those lines are, although they can't be
24
- "\#new-errors", "\#document-fragment", "\#document", "\#script-off",
25
- "\#script-on", or empty, the only thing that matters is that there be
26
- the right number of parse errors.
27
-
28
- Then there \*may\* be a line that says "\#new-errors", which works like
29
- the "\#errors" section adding more errors to the expected number of
30
- errors.
31
-
32
- Then there \*may\* be a line that says "\#document-fragment", which must
33
- be followed by a newline (LF), followed by a string of characters that
34
- indicates the context element, followed by a newline (LF). If the string
35
- of characters starts with "svg ", the context element is in the SVG
36
- namespace and the substring after "svg " is the local name. If the
37
- string of characters starts with "math ", the context element is in the
38
- MathML namespace and the substring after "math " is the local name.
39
- Otherwise, the context element is in the HTML namespace and the string
40
- is the local name. If this line is present the "\#data" must be parsed
41
- using the HTML fragment parsing algorithm with the context element as
42
- context.
43
-
44
- Then there \*may\* be a line that says "\#script-off" or
45
- "\#script-on". If a line that says "\#script-off" is present, the
46
- parser must set the scripting flag to disabled. If a line that says
47
- "\#script-on" is present, it must set it to enabled. Otherwise, the
48
- test should be run in both modes.
49
-
50
- Then there must be a line that says "\#document", which must be followed
51
- by a dump of the tree of the parsed DOM. Each node must be represented
52
- by a single line. Each line must start with "| ", followed by two spaces
53
- per parent node that the node has before the root document node.
54
-
55
- - Element nodes must be represented by a "`<`" then the *tag name
56
- string* "`>`", and all the attributes must be given, sorted
57
- lexicographically by UTF-16 code unit according to their *attribute
58
- name string*, on subsequent lines, as if they were children of the
59
- element node.
60
- - Attribute nodes must have the *attribute name string*, then an "="
61
- sign, then the attribute value in double quotes (").
62
- - Text nodes must be the string, in double quotes. Newlines aren't
63
- escaped.
64
- - Comments must be "`<`" then "`!-- `" then the data then "` -->`".
65
- - DOCTYPEs must be "`<!DOCTYPE `" then the name then if either of the
66
- system id or public id is non-empty a space, public id in
67
- double-quotes, another space an the system id in double-quotes, and
68
- then in any case "`>`".
69
- - Processing instructions must be "`<?`", then the target, then a
70
- space, then the data and then "`>`". (The HTML parser cannot emit
71
- processing instructions, but scripts can, and the WebVTT to DOM
72
- rules can emit them.)
73
- - Template contents are represented by the string "content" with the
74
- children below it.
75
-
76
- The *tag name string* is the local name prefixed by a namespace
77
- designator. For the HTML namespace, the namespace designator is the
78
- empty string, i.e. there's no prefix. For the SVG namespace, the
79
- namespace designator is "svg ". For the MathML namespace, the namespace
80
- designator is "math ".
81
-
82
- The *attribute name string* is the local name prefixed by a namespace
83
- designator. For no namespace, the namespace designator is the empty
84
- string, i.e. there's no prefix. For the XLink namespace, the namespace
85
- designator is "xlink ". For the XML namespace, the namespace designator
86
- is "xml ". For the XMLNS namespace, the namespace designator is "xmlns
87
- ". Note the difference between "xlink:href" which is an attribute in no
88
- namespace with the local name "xlink:href" and "xlink href" which is an
89
- attribute in the xlink namespace with the local name "href".
90
-
91
- If there is also a "\#document-fragment" the bit following "\#document"
92
- must be a representation of the HTML fragment serialization for the
93
- context element given by "\#document-fragment".
94
-
95
- For example:
96
-
97
- #data
98
- <p>One<p>Two
99
- #errors
100
- 3: Missing document type declaration
101
- #document
102
- | <html>
103
- | <head>
104
- | <body>
105
- | <p>
106
- | "One"
107
- | <p>
108
- | "Two"
@@ -1,354 +0,0 @@
1
- #data
2
- <a><p></a></p>
3
- #errors
4
- (1,3): expected-doctype-but-got-start-tag
5
- (1,10): adoption-agency-1.3
6
- #document
7
- | <html>
8
- | <head>
9
- | <body>
10
- | <a>
11
- | <p>
12
- | <a>
13
-
14
- #data
15
- <a>1<p>2</a>3</p>
16
- #errors
17
- (1,3): expected-doctype-but-got-start-tag
18
- (1,12): adoption-agency-1.3
19
- #document
20
- | <html>
21
- | <head>
22
- | <body>
23
- | <a>
24
- | "1"
25
- | <p>
26
- | <a>
27
- | "2"
28
- | "3"
29
-
30
- #data
31
- <a>1<button>2</a>3</button>
32
- #errors
33
- (1,3): expected-doctype-but-got-start-tag
34
- (1,17): adoption-agency-1.3
35
- #document
36
- | <html>
37
- | <head>
38
- | <body>
39
- | <a>
40
- | "1"
41
- | <button>
42
- | <a>
43
- | "2"
44
- | "3"
45
-
46
- #data
47
- <a>1<b>2</a>3</b>
48
- #errors
49
- (1,3): expected-doctype-but-got-start-tag
50
- (1,12): adoption-agency-1.3
51
- #document
52
- | <html>
53
- | <head>
54
- | <body>
55
- | <a>
56
- | "1"
57
- | <b>
58
- | "2"
59
- | <b>
60
- | "3"
61
-
62
- #data
63
- <a>1<div>2<div>3</a>4</div>5</div>
64
- #errors
65
- (1,3): expected-doctype-but-got-start-tag
66
- (1,20): adoption-agency-1.3
67
- (1,20): adoption-agency-1.3
68
- #document
69
- | <html>
70
- | <head>
71
- | <body>
72
- | <a>
73
- | "1"
74
- | <div>
75
- | <a>
76
- | "2"
77
- | <div>
78
- | <a>
79
- | "3"
80
- | "4"
81
- | "5"
82
-
83
- #data
84
- <table><a>1<p>2</a>3</p>
85
- #errors
86
- (1,7): expected-doctype-but-got-start-tag
87
- (1,10): unexpected-start-tag-implies-table-voodoo
88
- (1,11): unexpected-character-implies-table-voodoo
89
- (1,14): unexpected-start-tag-implies-table-voodoo
90
- (1,15): unexpected-character-implies-table-voodoo
91
- (1,19): unexpected-end-tag-implies-table-voodoo
92
- (1,19): adoption-agency-1.3
93
- (1,20): unexpected-character-implies-table-voodoo
94
- (1,24): unexpected-end-tag-implies-table-voodoo
95
- (1,24): eof-in-table
96
- #document
97
- | <html>
98
- | <head>
99
- | <body>
100
- | <a>
101
- | "1"
102
- | <p>
103
- | <a>
104
- | "2"
105
- | "3"
106
- | <table>
107
-
108
- #data
109
- <b><b><a><p></a>
110
- #errors
111
- (1,3): expected-doctype-but-got-start-tag
112
- (1,16): adoption-agency-1.3
113
- (1,16): expected-closing-tag-but-got-eof
114
- #document
115
- | <html>
116
- | <head>
117
- | <body>
118
- | <b>
119
- | <b>
120
- | <a>
121
- | <p>
122
- | <a>
123
-
124
- #data
125
- <b><a><b><p></a>
126
- #errors
127
- (1,3): expected-doctype-but-got-start-tag
128
- (1,16): adoption-agency-1.3
129
- (1,16): expected-closing-tag-but-got-eof
130
- #document
131
- | <html>
132
- | <head>
133
- | <body>
134
- | <b>
135
- | <a>
136
- | <b>
137
- | <b>
138
- | <p>
139
- | <a>
140
-
141
- #data
142
- <a><b><b><p></a>
143
- #errors
144
- (1,3): expected-doctype-but-got-start-tag
145
- (1,16): adoption-agency-1.3
146
- (1,16): expected-closing-tag-but-got-eof
147
- #document
148
- | <html>
149
- | <head>
150
- | <body>
151
- | <a>
152
- | <b>
153
- | <b>
154
- | <b>
155
- | <b>
156
- | <p>
157
- | <a>
158
-
159
- #data
160
- <p>1<s id="A">2<b id="B">3</p>4</s>5</b>
161
- #errors
162
- (1,3): expected-doctype-but-got-start-tag
163
- (1,30): unexpected-end-tag
164
- (1,35): adoption-agency-1.3
165
- #document
166
- | <html>
167
- | <head>
168
- | <body>
169
- | <p>
170
- | "1"
171
- | <s>
172
- | id="A"
173
- | "2"
174
- | <b>
175
- | id="B"
176
- | "3"
177
- | <s>
178
- | id="A"
179
- | <b>
180
- | id="B"
181
- | "4"
182
- | <b>
183
- | id="B"
184
- | "5"
185
-
186
- #data
187
- <table><a>1<td>2</td>3</table>
188
- #errors
189
- (1,7): expected-doctype-but-got-start-tag
190
- (1,10): unexpected-start-tag-implies-table-voodoo
191
- (1,11): unexpected-character-implies-table-voodoo
192
- (1,15): unexpected-cell-in-table-body
193
- (1,30): unexpected-implied-end-tag-in-table-view
194
- #document
195
- | <html>
196
- | <head>
197
- | <body>
198
- | <a>
199
- | "1"
200
- | <a>
201
- | "3"
202
- | <table>
203
- | <tbody>
204
- | <tr>
205
- | <td>
206
- | "2"
207
-
208
- #data
209
- <table>A<td>B</td>C</table>
210
- #errors
211
- (1,7): expected-doctype-but-got-start-tag
212
- (1,8): unexpected-character-implies-table-voodoo
213
- (1,12): unexpected-cell-in-table-body
214
- (1,22): unexpected-character-implies-table-voodoo
215
- #document
216
- | <html>
217
- | <head>
218
- | <body>
219
- | "AC"
220
- | <table>
221
- | <tbody>
222
- | <tr>
223
- | <td>
224
- | "B"
225
-
226
- #data
227
- <a><svg><tr><input></a>
228
- #errors
229
- (1,3): expected-doctype-but-got-start-tag
230
- (1,23): unexpected-end-tag
231
- (1,23): adoption-agency-1.3
232
- #document
233
- | <html>
234
- | <head>
235
- | <body>
236
- | <a>
237
- | <svg svg>
238
- | <svg tr>
239
- | <svg input>
240
-
241
- #data
242
- <div><a><b><div><div><div><div><div><div><div><div><div><div></a>
243
- #errors
244
- (1,5): expected-doctype-but-got-start-tag
245
- (1,65): adoption-agency-1.3
246
- (1,65): adoption-agency-1.3
247
- (1,65): adoption-agency-1.3
248
- (1,65): adoption-agency-1.3
249
- (1,65): adoption-agency-1.3
250
- (1,65): adoption-agency-1.3
251
- (1,65): adoption-agency-1.3
252
- (1,65): adoption-agency-1.3
253
- (1,65): expected-closing-tag-but-got-eof
254
- #document
255
- | <html>
256
- | <head>
257
- | <body>
258
- | <div>
259
- | <a>
260
- | <b>
261
- | <b>
262
- | <div>
263
- | <a>
264
- | <div>
265
- | <a>
266
- | <div>
267
- | <a>
268
- | <div>
269
- | <a>
270
- | <div>
271
- | <a>
272
- | <div>
273
- | <a>
274
- | <div>
275
- | <a>
276
- | <div>
277
- | <a>
278
- | <div>
279
- | <div>
280
-
281
- #data
282
- <div><a><b><u><i><code><div></a>
283
- #errors
284
- (1,5): expected-doctype-but-got-start-tag
285
- (1,32): adoption-agency-1.3
286
- (1,32): expected-closing-tag-but-got-eof
287
- #document
288
- | <html>
289
- | <head>
290
- | <body>
291
- | <div>
292
- | <a>
293
- | <b>
294
- | <u>
295
- | <i>
296
- | <code>
297
- | <u>
298
- | <i>
299
- | <code>
300
- | <div>
301
- | <a>
302
-
303
- #data
304
- <b><b><b><b>x</b></b></b></b>y
305
- #errors
306
- (1,3): expected-doctype-but-got-start-tag
307
- #document
308
- | <html>
309
- | <head>
310
- | <body>
311
- | <b>
312
- | <b>
313
- | <b>
314
- | <b>
315
- | "x"
316
- | "y"
317
-
318
- #data
319
- <p><b><b><b><b><p>x
320
- #errors
321
- (1,3): expected-doctype-but-got-start-tag
322
- (1,18): unexpected-end-tag
323
- (1,19): expected-closing-tag-but-got-eof
324
- #document
325
- | <html>
326
- | <head>
327
- | <body>
328
- | <p>
329
- | <b>
330
- | <b>
331
- | <b>
332
- | <b>
333
- | <p>
334
- | <b>
335
- | <b>
336
- | <b>
337
- | "x"
338
-
339
- #data
340
- <b><em><foo><foob><fooc><aside></b></em>
341
- #errors
342
- (1,35): adoption-agency-1.3
343
- (1,40): adoption-agency-1.3
344
- (1,40): expected-closing-tag-but-got-eof
345
- #document-fragment
346
- div
347
- #document
348
- | <b>
349
- | <em>
350
- | <foo>
351
- | <foob>
352
- | <fooc>
353
- | <aside>
354
- | <b>
@@ -1,39 +0,0 @@
1
- #data
2
- <b>1<i>2<p>3</b>4
3
- #errors
4
- (1,3): expected-doctype-but-got-start-tag
5
- (1,16): adoption-agency-1.3
6
- (1,17): expected-closing-tag-but-got-eof
7
- #document
8
- | <html>
9
- | <head>
10
- | <body>
11
- | <b>
12
- | "1"
13
- | <i>
14
- | "2"
15
- | <i>
16
- | <p>
17
- | <b>
18
- | "3"
19
- | "4"
20
-
21
- #data
22
- <a><div><style></style><address><a>
23
- #errors
24
- (1,3): expected-doctype-but-got-start-tag
25
- (1,35): unexpected-start-tag-implies-end-tag
26
- (1,35): adoption-agency-1.3
27
- (1,35): adoption-agency-1.3
28
- (1,35): expected-closing-tag-but-got-eof
29
- #document
30
- | <html>
31
- | <head>
32
- | <body>
33
- | <a>
34
- | <div>
35
- | <a>
36
- | <style>
37
- | <address>
38
- | <a>
39
- | <a>