html-to-markdown 2.24.6 → 2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  6. data/lib/html_to_markdown/version.rb +1 -1
  7. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  8. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  9. data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
  10. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  11. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
  12. data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
  13. data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
  14. data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
  15. data/rust-vendor/memmap2/CHANGELOG.md +8 -0
  16. data/rust-vendor/memmap2/Cargo.lock +1 -1
  17. data/rust-vendor/memmap2/Cargo.toml +2 -1
  18. data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
  19. data/rust-vendor/memmap2/src/lib.rs +25 -1
  20. data/rust-vendor/memmap2/src/stub.rs +1 -4
  21. data/rust-vendor/memmap2/src/unix.rs +14 -1
  22. data/rust-vendor/png/.cargo-checksum.json +1 -1
  23. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  24. data/rust-vendor/png/CHANGES.md +44 -0
  25. data/rust-vendor/png/Cargo.lock +124 -171
  26. data/rust-vendor/png/Cargo.toml +1 -1
  27. data/rust-vendor/png/Cargo.toml.orig +1 -1
  28. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  29. data/rust-vendor/png/benches/unfilter.rs +3 -3
  30. data/rust-vendor/png/src/adam7.rs +17 -10
  31. data/rust-vendor/png/src/common.rs +8 -8
  32. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  33. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  34. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  35. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  36. data/rust-vendor/png/src/encoder.rs +4 -2
  37. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  38. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  39. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  40. data/rust-vendor/png/src/filter/simd.rs +308 -0
  41. data/rust-vendor/png/src/lib.rs +1 -0
  42. data/rust-vendor/syn/.cargo-checksum.json +1 -1
  43. data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
  44. data/rust-vendor/syn/Cargo.lock +40 -41
  45. data/rust-vendor/syn/Cargo.toml +1 -1
  46. data/rust-vendor/syn/Cargo.toml.orig +1 -1
  47. data/rust-vendor/syn/src/item.rs +61 -40
  48. data/rust-vendor/syn/src/lib.rs +2 -1
  49. data/rust-vendor/syn/tests/test_item.rs +54 -0
  50. data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
  51. data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
  52. data/rust-vendor/unicode-ident/Cargo.lock +21 -21
  53. data/rust-vendor/unicode-ident/Cargo.toml +1 -1
  54. data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
  55. data/rust-vendor/unicode-ident/src/lib.rs +1 -1
  56. data/rust-vendor/unicode-ident/src/tables.rs +87 -97
  57. data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
  58. metadata +7 -177
  59. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  60. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  61. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  62. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  63. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  64. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  65. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  66. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  67. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  68. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  69. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  70. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  71. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  72. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  153. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  154. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  155. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  156. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  157. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  158. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  159. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  160. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  161. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  162. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  163. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  164. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  165. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  166. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  167. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  168. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  169. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  170. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  171. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  172. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  173. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  174. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  175. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  176. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  177. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  178. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  179. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  180. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  181. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  182. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  183. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  184. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  185. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  186. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  187. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  188. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  189. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  190. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  191. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  192. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  193. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  194. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  195. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  196. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  197. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  198. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  199. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  200. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  201. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  202. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  203. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  204. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  205. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  206. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  207. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  208. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  209. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  210. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  211. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  212. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  213. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  214. data/rust-vendor/xml5ever/README.md +0 -72
  215. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  216. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  217. data/rust-vendor/xml5ever/examples/README.md +0 -223
  218. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  219. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  220. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  221. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  222. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  223. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  224. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  225. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  226. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  227. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  228. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  229. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  230. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  231. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,92 +0,0 @@
1
- Tokenizer tests
2
- ===============
3
-
4
- The test format is [JSON](http://www.json.org/). This has the advantage
5
- that the syntax allows backward-compatible extensions to the tests and
6
- the disadvantage that it is relatively verbose.
7
-
8
- Basic Structure
9
- ---------------
10
-
11
- {"tests": [
12
-     {"description": "Test description",
13
-     "input": "input_string",
14
-     "output": [expected_output_tokens],
15
-     "initialStates": [initial_states],
16
-     "lastStartTag": last_start_tag,
17
-     "ignoreErrorOrder": ignore_error_order
18
-     }
19
- ]}
20
-
21
- Multiple tests per file are allowed simply by adding more objects to the
22
- "tests" list.
23
-
24
- `description`, `input` and `output` are always present. The other values
25
- are optional.
26
-
27
- ### Test set-up
28
-
29
- `test.input` is a string containing the characters to pass to the
30
- tokenizer. Specifically, it represents the characters of the **input
31
- stream**, and so implementations are expected to perform the processing
32
- described in the spec's **Preprocessing the input stream** section
33
- before feeding the result to the tokenizer.
34
-
35
- If `test.doubleEscaped` is present and `true`, then `test.input` is not
36
- quite as described above. Instead, it must first be subjected to another
37
- round of unescaping (i.e., in addition to any unescaping involved in the
38
- JSON import), and the result of *that* represents the characters of the
39
- input stream. Currently, the only unescaping required by this option is
40
- to convert each sequence of the form \\uHHHH (where H is a hex digit)
41
- into the corresponding Unicode code point. (Note that this option also
42
- affects the interpretation of `test.output`.)
43
-
44
- `test.initialStates` is a list of strings, each being the name of a
45
- tokenizer state. The test should be run once for each string, using it
46
- to set the tokenizer's initial state for that run. If
47
- `test.initialStates` is omitted, it defaults to `["data state"]`.
48
-
49
- `test.lastStartTag` is a lowercase string that should be used as "the
50
- tag name of the last start tag to have been emitted from this
51
- tokenizer", referenced in the spec's definition of **appropriate end tag
52
- token**. If it is omitted, it is treated as if "no start tag has been
53
- emitted from this tokenizer".
54
-
55
- ### Test results
56
-
57
- `test.output` is a list of tokens, ordered with the first produced by
58
- the tokenizer the first (leftmost) in the list. The list must mach the
59
- **complete** list of tokens that the tokenizer should produce. Valid
60
- tokens are:
61
-
62
- ["DOCTYPE", name, public_id, system_id, correctness]
63
- ["StartTag", name, {attributes}*, true*]
64
- ["StartTag", name, {attributes}]
65
- ["EndTag", name]
66
- ["Comment", data]
67
- ["Character", data]
68
- "ParseError"
69
-
70
- `public_id` and `system_id` are either strings or `null`. `correctness`
71
- is either `true` or `false`; `true` corresponds to the force-quirks flag
72
- being false, and vice-versa.
73
-
74
- When the self-closing flag is set, the `StartTag` array has `true` as
75
- its fourth entry. When the flag is not set, the array has only three
76
- entries for backwards compatibility.
77
-
78
- All adjacent character tokens are coalesced into a single
79
- `["Character", data]` token.
80
-
81
- If `test.doubleEscaped` is present and `true`, then every string within
82
- `test.output` must be further unescaped (as described above) before
83
- comparing with the tokenizer's output.
84
-
85
- `test.ignoreErrorOrder` is a boolean value indicating that the order of
86
- `ParseError` tokens relative to other tokens in the output stream is
87
- unimportant, and implementations should ignore such differences between
88
- their output and `expected_output_tokens`. (This is used for errors
89
- emitted by the input stream preprocessing stage, since it is useful to
90
- test that code but it is undefined when the errors occur). If it is
91
- omitted, it defaults to `false`.
92
-
@@ -1,274 +0,0 @@
1
- {"tests": [
2
-
3
-
4
- {"description":"Comment",
5
- "input":"<!--comment-->",
6
- "output":[["Comment", "comment"]]
7
- },
8
-
9
- {"description":"--Comment",
10
- "input":"<!----comment -->",
11
- "output":[["Comment", "--comment "]]
12
- },
13
-
14
- {"description":"--Comment-",
15
- "input":"<!----comment--->",
16
- "output":[["Comment", "--comment-"]]
17
- },
18
-
19
- {"description":"Error comment --!>",
20
- "input":"<!----!>",
21
- "output":["ParseError", ["Comment", ""]]
22
- },
23
-
24
- {"description":"EOF inside comment",
25
- "input":"<!----!",
26
- "output":["ParseError", ["Comment", ""]]
27
- },
28
-
29
- {"description":"EOF inside comment 2",
30
- "input":"<!----",
31
- "output":["ParseError", ["Comment", ""]]
32
- },
33
-
34
- {"description":"EOF inside comment 3",
35
- "input":"<!--->",
36
- "output":["ParseError", ["Comment", ""]]
37
- },
38
-
39
- {"description":"EOF inside comment 4",
40
- "input":"<!-----",
41
- "output":["ParseError", ["Comment", "-"]]
42
- },
43
-
44
- {"description":"EOF inside comment 5",
45
- "input":"<!-->",
46
- "output":["ParseError", ["Comment", ""]]
47
- },
48
-
49
- {"description":"EOF inside comment 6",
50
- "input":"<!--",
51
- "output":["ParseError", ["Comment", ""]]
52
- },
53
-
54
- {"description":"EOF inside comment 7",
55
- "input":"<!--x",
56
- "output":["ParseError", ["Comment", "x"]]
57
- },
58
-
59
- {"description":"EOF inside comment 8",
60
- "input":"<!--<",
61
- "output":["ParseError", ["Comment", "<"]]
62
- },
63
-
64
- {"description":"EOF inside comment 9",
65
- "input":"<!--<!",
66
- "output":["ParseError", ["Comment", "<!"]]
67
- },
68
-
69
- {"description":"EOF inside comment 10",
70
- "input":"<!--<!-",
71
- "output":["ParseError", ["Comment", "<!"]]
72
- },
73
-
74
- {"description":"EOF inside comment 11",
75
- "input":"<!--<!--",
76
- "output":["ParseError", ["Comment", "<!"]]
77
- },
78
-
79
- {"description":"EOF inside comment 12",
80
- "input":"<!--<!--!",
81
- "output":["ParseError", "ParseError", ["Comment", "<!"]]
82
- },
83
-
84
- {"description":"<!-- inside comment",
85
- "input":"<!--<!--!>",
86
- "output":["ParseError", "ParseError", ["Comment", "<!"]]
87
- },
88
-
89
- {"description":"<!-- inside comment 2",
90
- "input":"<!--<!---",
91
- "output":["ParseError", "ParseError", ["Comment", "<!-"]]
92
- },
93
-
94
- {"description":"<!-- inside comment 3",
95
- "input":"<!--<!--->",
96
- "output":["ParseError", ["Comment", "<!-"]]
97
- },
98
-
99
- {"description":"<!-- inside comment 4",
100
- "input":"<!--<!--x",
101
- "output":["ParseError", "ParseError", ["Comment", "<!--x"]]
102
- },
103
-
104
- {"description":"<!-- inside comment 5",
105
- "input":"<!--<!--x-",
106
- "output":["ParseError", "ParseError", ["Comment", "<!--x"]]
107
- },
108
-
109
- {"description":"<!-- inside comment 6",
110
- "input":"<!--<!--x--",
111
- "output":["ParseError", "ParseError", ["Comment", "<!--x"]]
112
- },
113
-
114
- {"description":"<!-- inside comment 7",
115
- "input":"<!--<!--x-->",
116
- "output":["ParseError", ["Comment", "<!--x"]]
117
- },
118
-
119
- {"description":"<!-- inside comment 8",
120
- "input":"<!--<!-x",
121
- "output":["ParseError", ["Comment", "<!-x"]]
122
- },
123
-
124
- {"description":"<!-- inside comment 9",
125
- "input":"<!--<!-x-",
126
- "output":["ParseError", ["Comment", "<!-x"]]
127
- },
128
-
129
- {"description":"<!-- inside comment 10",
130
- "input":"<!--<!-x--",
131
- "output":["ParseError", ["Comment", "<!-x"]]
132
- },
133
-
134
- {"description":"<!-- inside comment 11",
135
- "input":"<!--<!x",
136
- "output":["ParseError", ["Comment", "<!x"]]
137
- },
138
-
139
- {"description":"<!-- inside comment 12",
140
- "input":"<!--<!x-",
141
- "output":["ParseError", ["Comment", "<!x"]]
142
- },
143
-
144
- {"description":"<!-- inside comment 13",
145
- "input":"<!--<!x--",
146
- "output":["ParseError", ["Comment", "<!x"]]
147
- },
148
-
149
- {"description":"<!-- inside comment 14",
150
- "input":"<!--<<!--x-->",
151
- "output":["ParseError", ["Comment", "<<!--x"]]
152
- },
153
-
154
- {"description":"<!-- inside comment 15",
155
- "input":"<!--<!<!--x-->",
156
- "output":["ParseError", ["Comment", "<!<!--x"]]
157
- },
158
-
159
- {"description":"<!-- inside comment 16",
160
- "input":"<!--<!-<!--x-->",
161
- "output":["ParseError", ["Comment", "<!-<!--x"]]
162
- },
163
-
164
- {"description":"EOF inside comment 13",
165
- "input":"<!----!->",
166
- "output":["ParseError", ["Comment", "--!->"]]
167
- },
168
-
169
- {"description":"EOF inside comment 14",
170
- "input":"<!----!x>",
171
- "output":["ParseError", ["Comment", "--!x>"]]
172
- },
173
-
174
- {"description":"EOF inside comment 15",
175
- "input":"<!-----x>",
176
- "output":["ParseError", ["Comment", "---x>"]]
177
- },
178
-
179
- {"description":"Tiny Bogus Comment",
180
- "input":"<!>",
181
- "output":["ParseError", ["Comment", ""]]
182
- },
183
-
184
- {"description":"<head> in Comment",
185
- "input":"<!--<head>-->",
186
- "output":[["Comment", "<head>"]]
187
- },
188
-
189
- {"description":"Short Bogus Comment",
190
- "input":"<!-->",
191
- "output":["ParseError", ["Comment", ""]]
192
- },
193
-
194
- {"description":"Short Bogus Comment2",
195
- "input":"<!-->test",
196
- "output":["ParseError", ["Comment", ""], ["Character", "test"]]
197
- },
198
-
199
- {"description":"Comments 1",
200
- "input":"<!----!-->",
201
- "output":[["Comment", "--!"]]
202
- },
203
-
204
- {"description":"Comments 2",
205
- "input":"<!----!x-->",
206
- "output":[["Comment", "--!x"]]
207
- },
208
-
209
- {"description":"Comments 3",
210
- "input":"<!----->",
211
- "output":[["Comment", "-"]]
212
- },
213
-
214
- {"description":"Comments 4",
215
- "input":"<!-----x-->",
216
- "output":[["Comment", "---x"]]
217
- },
218
-
219
- {"description":"Comments 5",
220
- "input":"<!--x-->",
221
- "output":[["Comment", "x"]]
222
- },
223
-
224
- {"description":"Comments 6",
225
- "input":"<!--<!-x-->",
226
- "output":[["Comment", "<!-x"]]
227
- },
228
-
229
- {"description":"Comments 7",
230
- "input":"<!--<!x-->",
231
- "output":[["Comment", "<!x"]]
232
- },
233
-
234
- {"description":"Comments 8",
235
- "input":"<!--<<!x-->",
236
- "output":[["Comment", "<<!x"]]
237
- },
238
-
239
- {"description":"Comments 9",
240
- "input":"<!--<<!-x-->",
241
- "output":[["Comment", "<<!-x"]]
242
- },
243
-
244
- {"description":"Comments 10",
245
- "input":"<!--<x-->",
246
- "output":[["Comment", "<x"]]
247
- },
248
-
249
- {"description":"Comments 11",
250
- "input":"<!--<>-->",
251
- "output":[["Comment", "<>"]]
252
- },
253
-
254
- {"description":"Comments 12",
255
- "input":"<!--<-->",
256
- "output":[["Comment", "<"]]
257
- },
258
-
259
- {"description":"Comments 13",
260
- "input":"<!--<--->",
261
- "output":[["Comment", "<-"]]
262
- },
263
-
264
- {"description":"Comments 13",
265
- "input":"<!--<!-->",
266
- "output":[["Comment", "<!"]]
267
- },
268
-
269
- {"description":"Comments long",
270
- "input":"<!---->",
271
- "output":[["Comment", ""]]
272
- }
273
-
274
- ]}