html-to-markdown 2.24.6 → 2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  6. data/lib/html_to_markdown/version.rb +1 -1
  7. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  8. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  9. data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
  10. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  11. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
  12. data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
  13. data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
  14. data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
  15. data/rust-vendor/memmap2/CHANGELOG.md +8 -0
  16. data/rust-vendor/memmap2/Cargo.lock +1 -1
  17. data/rust-vendor/memmap2/Cargo.toml +2 -1
  18. data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
  19. data/rust-vendor/memmap2/src/lib.rs +25 -1
  20. data/rust-vendor/memmap2/src/stub.rs +1 -4
  21. data/rust-vendor/memmap2/src/unix.rs +14 -1
  22. data/rust-vendor/png/.cargo-checksum.json +1 -1
  23. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  24. data/rust-vendor/png/CHANGES.md +44 -0
  25. data/rust-vendor/png/Cargo.lock +124 -171
  26. data/rust-vendor/png/Cargo.toml +1 -1
  27. data/rust-vendor/png/Cargo.toml.orig +1 -1
  28. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  29. data/rust-vendor/png/benches/unfilter.rs +3 -3
  30. data/rust-vendor/png/src/adam7.rs +17 -10
  31. data/rust-vendor/png/src/common.rs +8 -8
  32. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  33. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  34. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  35. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  36. data/rust-vendor/png/src/encoder.rs +4 -2
  37. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  38. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  39. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  40. data/rust-vendor/png/src/filter/simd.rs +308 -0
  41. data/rust-vendor/png/src/lib.rs +1 -0
  42. data/rust-vendor/syn/.cargo-checksum.json +1 -1
  43. data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
  44. data/rust-vendor/syn/Cargo.lock +40 -41
  45. data/rust-vendor/syn/Cargo.toml +1 -1
  46. data/rust-vendor/syn/Cargo.toml.orig +1 -1
  47. data/rust-vendor/syn/src/item.rs +61 -40
  48. data/rust-vendor/syn/src/lib.rs +2 -1
  49. data/rust-vendor/syn/tests/test_item.rs +54 -0
  50. data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
  51. data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
  52. data/rust-vendor/unicode-ident/Cargo.lock +21 -21
  53. data/rust-vendor/unicode-ident/Cargo.toml +1 -1
  54. data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
  55. data/rust-vendor/unicode-ident/src/lib.rs +1 -1
  56. data/rust-vendor/unicode-ident/src/tables.rs +87 -97
  57. data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
  58. metadata +7 -177
  59. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  60. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  61. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  62. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  63. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  64. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  65. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  66. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  67. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  68. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  69. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  70. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  71. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  72. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  153. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  154. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  155. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  156. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  157. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  158. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  159. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  160. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  161. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  162. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  163. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  164. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  165. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  166. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  167. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  168. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  169. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  170. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  171. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  172. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  173. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  174. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  175. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  176. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  177. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  178. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  179. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  180. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  181. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  182. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  183. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  184. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  185. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  186. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  187. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  188. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  189. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  190. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  191. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  192. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  193. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  194. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  195. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  196. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  197. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  198. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  199. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  200. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  201. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  202. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  203. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  204. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  205. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  206. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  207. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  208. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  209. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  210. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  211. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  212. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  213. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  214. data/rust-vendor/xml5ever/README.md +0 -72
  215. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  216. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  217. data/rust-vendor/xml5ever/examples/README.md +0 -223
  218. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  219. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  220. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  221. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  222. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  223. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  224. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  225. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  226. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  227. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  228. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  229. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  230. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  231. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,283 +0,0 @@
1
- {"tests": [
2
-
3
- {"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
4
- "input":"<h a='&noti;'>",
5
- "output": [["StartTag", "h", {"a": "&noti;"}]]},
6
-
7
- {"description": "Entity name followed by the equals sign in an attribute value.",
8
- "input":"<h a='&lang='>",
9
- "output": [["StartTag", "h", {"a": "&lang="}]]},
10
-
11
- {"description": "CR as numeric entity",
12
- "input":"&#013;",
13
- "output": ["ParseError", ["Character", "\r"]]},
14
-
15
- {"description": "CR as hexadecimal numeric entity",
16
- "input":"&#x00D;",
17
- "output": ["ParseError", ["Character", "\r"]]},
18
-
19
- {"description": "Windows-1252 EURO SIGN numeric entity.",
20
- "input":"&#0128;",
21
- "output": ["ParseError", ["Character", "\u20AC"]]},
22
-
23
- {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
24
- "input":"&#0129;",
25
- "output": ["ParseError", ["Character", "\u0081"]]},
26
-
27
- {"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
28
- "input":"&#0130;",
29
- "output": ["ParseError", ["Character", "\u201A"]]},
30
-
31
- {"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
32
- "input":"&#0131;",
33
- "output": ["ParseError", ["Character", "\u0192"]]},
34
-
35
- {"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
36
- "input":"&#0132;",
37
- "output": ["ParseError", ["Character", "\u201E"]]},
38
-
39
- {"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
40
- "input":"&#0133;",
41
- "output": ["ParseError", ["Character", "\u2026"]]},
42
-
43
- {"description": "Windows-1252 DAGGER numeric entity.",
44
- "input":"&#0134;",
45
- "output": ["ParseError", ["Character", "\u2020"]]},
46
-
47
- {"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
48
- "input":"&#0135;",
49
- "output": ["ParseError", ["Character", "\u2021"]]},
50
-
51
- {"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
52
- "input":"&#0136;",
53
- "output": ["ParseError", ["Character", "\u02C6"]]},
54
-
55
- {"description": "Windows-1252 PER MILLE SIGN numeric entity.",
56
- "input":"&#0137;",
57
- "output": ["ParseError", ["Character", "\u2030"]]},
58
-
59
- {"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
60
- "input":"&#0138;",
61
- "output": ["ParseError", ["Character", "\u0160"]]},
62
-
63
- {"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
64
- "input":"&#0139;",
65
- "output": ["ParseError", ["Character", "\u2039"]]},
66
-
67
- {"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
68
- "input":"&#0140;",
69
- "output": ["ParseError", ["Character", "\u0152"]]},
70
-
71
- {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
72
- "input":"&#0141;",
73
- "output": ["ParseError", ["Character", "\u008D"]]},
74
-
75
- {"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
76
- "input":"&#0142;",
77
- "output": ["ParseError", ["Character", "\u017D"]]},
78
-
79
- {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
80
- "input":"&#0143;",
81
- "output": ["ParseError", ["Character", "\u008F"]]},
82
-
83
- {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
84
- "input":"&#0144;",
85
- "output": ["ParseError", ["Character", "\u0090"]]},
86
-
87
- {"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
88
- "input":"&#0145;",
89
- "output": ["ParseError", ["Character", "\u2018"]]},
90
-
91
- {"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
92
- "input":"&#0146;",
93
- "output": ["ParseError", ["Character", "\u2019"]]},
94
-
95
- {"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
96
- "input":"&#0147;",
97
- "output": ["ParseError", ["Character", "\u201C"]]},
98
-
99
- {"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
100
- "input":"&#0148;",
101
- "output": ["ParseError", ["Character", "\u201D"]]},
102
-
103
- {"description": "Windows-1252 BULLET numeric entity.",
104
- "input":"&#0149;",
105
- "output": ["ParseError", ["Character", "\u2022"]]},
106
-
107
- {"description": "Windows-1252 EN DASH numeric entity.",
108
- "input":"&#0150;",
109
- "output": ["ParseError", ["Character", "\u2013"]]},
110
-
111
- {"description": "Windows-1252 EM DASH numeric entity.",
112
- "input":"&#0151;",
113
- "output": ["ParseError", ["Character", "\u2014"]]},
114
-
115
- {"description": "Windows-1252 SMALL TILDE numeric entity.",
116
- "input":"&#0152;",
117
- "output": ["ParseError", ["Character", "\u02DC"]]},
118
-
119
- {"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
120
- "input":"&#0153;",
121
- "output": ["ParseError", ["Character", "\u2122"]]},
122
-
123
- {"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
124
- "input":"&#0154;",
125
- "output": ["ParseError", ["Character", "\u0161"]]},
126
-
127
- {"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
128
- "input":"&#0155;",
129
- "output": ["ParseError", ["Character", "\u203A"]]},
130
-
131
- {"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
132
- "input":"&#0156;",
133
- "output": ["ParseError", ["Character", "\u0153"]]},
134
-
135
- {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
136
- "input":"&#0157;",
137
- "output": ["ParseError", ["Character", "\u009D"]]},
138
-
139
- {"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
140
- "input":"&#x080;",
141
- "output": ["ParseError", ["Character", "\u20AC"]]},
142
-
143
- {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
144
- "input":"&#x081;",
145
- "output": ["ParseError", ["Character", "\u0081"]]},
146
-
147
- {"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
148
- "input":"&#x082;",
149
- "output": ["ParseError", ["Character", "\u201A"]]},
150
-
151
- {"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
152
- "input":"&#x083;",
153
- "output": ["ParseError", ["Character", "\u0192"]]},
154
-
155
- {"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
156
- "input":"&#x084;",
157
- "output": ["ParseError", ["Character", "\u201E"]]},
158
-
159
- {"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
160
- "input":"&#x085;",
161
- "output": ["ParseError", ["Character", "\u2026"]]},
162
-
163
- {"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
164
- "input":"&#x086;",
165
- "output": ["ParseError", ["Character", "\u2020"]]},
166
-
167
- {"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
168
- "input":"&#x087;",
169
- "output": ["ParseError", ["Character", "\u2021"]]},
170
-
171
- {"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
172
- "input":"&#x088;",
173
- "output": ["ParseError", ["Character", "\u02C6"]]},
174
-
175
- {"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
176
- "input":"&#x089;",
177
- "output": ["ParseError", ["Character", "\u2030"]]},
178
-
179
- {"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
180
- "input":"&#x08A;",
181
- "output": ["ParseError", ["Character", "\u0160"]]},
182
-
183
- {"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
184
- "input":"&#x08B;",
185
- "output": ["ParseError", ["Character", "\u2039"]]},
186
-
187
- {"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
188
- "input":"&#x08C;",
189
- "output": ["ParseError", ["Character", "\u0152"]]},
190
-
191
- {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
192
- "input":"&#x08D;",
193
- "output": ["ParseError", ["Character", "\u008D"]]},
194
-
195
- {"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
196
- "input":"&#x08E;",
197
- "output": ["ParseError", ["Character", "\u017D"]]},
198
-
199
- {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
200
- "input":"&#x08F;",
201
- "output": ["ParseError", ["Character", "\u008F"]]},
202
-
203
- {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
204
- "input":"&#x090;",
205
- "output": ["ParseError", ["Character", "\u0090"]]},
206
-
207
- {"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
208
- "input":"&#x091;",
209
- "output": ["ParseError", ["Character", "\u2018"]]},
210
-
211
- {"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
212
- "input":"&#x092;",
213
- "output": ["ParseError", ["Character", "\u2019"]]},
214
-
215
- {"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
216
- "input":"&#x093;",
217
- "output": ["ParseError", ["Character", "\u201C"]]},
218
-
219
- {"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
220
- "input":"&#x094;",
221
- "output": ["ParseError", ["Character", "\u201D"]]},
222
-
223
- {"description": "Windows-1252 BULLET hexadecimal numeric entity.",
224
- "input":"&#x095;",
225
- "output": ["ParseError", ["Character", "\u2022"]]},
226
-
227
- {"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
228
- "input":"&#x096;",
229
- "output": ["ParseError", ["Character", "\u2013"]]},
230
-
231
- {"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
232
- "input":"&#x097;",
233
- "output": ["ParseError", ["Character", "\u2014"]]},
234
-
235
- {"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
236
- "input":"&#x098;",
237
- "output": ["ParseError", ["Character", "\u02DC"]]},
238
-
239
- {"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
240
- "input":"&#x099;",
241
- "output": ["ParseError", ["Character", "\u2122"]]},
242
-
243
- {"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
244
- "input":"&#x09A;",
245
- "output": ["ParseError", ["Character", "\u0161"]]},
246
-
247
- {"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
248
- "input":"&#x09B;",
249
- "output": ["ParseError", ["Character", "\u203A"]]},
250
-
251
- {"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
252
- "input":"&#x09C;",
253
- "output": ["ParseError", ["Character", "\u0153"]]},
254
-
255
- {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
256
- "input":"&#x09D;",
257
- "output": ["ParseError", ["Character", "\u009D"]]},
258
-
259
- {"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
260
- "input":"&#x09E;",
261
- "output": ["ParseError", ["Character", "\u017E"]]},
262
-
263
- {"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
264
- "input":"&#x09F;",
265
- "output": ["ParseError", ["Character", "\u0178"]]},
266
-
267
- {"description": "Decimal numeric entity followed by hex character a.",
268
- "input":"&#97a",
269
- "output": ["ParseError", ["Character", "aa"]]},
270
-
271
- {"description": "Decimal numeric entity followed by hex character A.",
272
- "input":"&#97A",
273
- "output": ["ParseError", ["Character", "aA"]]},
274
-
275
- {"description": "Decimal numeric entity followed by hex character f.",
276
- "input":"&#97f",
277
- "output": ["ParseError", ["Character", "af"]]},
278
-
279
- {"description": "Decimal numeric entity followed by hex character A.",
280
- "input":"&#97F",
281
- "output": ["ParseError", ["Character", "aF"]]}
282
-
283
- ]}
@@ -1,113 +0,0 @@
1
- {"tests": [
2
-
3
- {"description":"Data state EOF",
4
- "input":"",
5
- "output":[]
6
- },
7
-
8
- {"description":"Tag state EOF",
9
- "input":"<",
10
- "output":["ParseError", ["Character", "<"]]
11
- },
12
-
13
- {"description":"End tag state premature EOF",
14
- "input":"</",
15
- "output":["ParseError", ["Character", "</"]]
16
- },
17
-
18
- {"description":"End tag name state premature EOF",
19
- "input":"</a",
20
- "output":["ParseError", ["EndTag", "a"]]
21
- },
22
-
23
- {"description":"Pi state EOF",
24
- "input":"<?",
25
- "output":["ParseError", ["Comment", ""]]
26
- },
27
-
28
- {"description":"PI state Target EOF",
29
- "input":"<?ab",
30
- "output":["ParseError", ["PI", "ab", ""]]
31
- },
32
-
33
- {"description":"PI state Target after EOF",
34
- "input":"<?ab ",
35
- "output":["ParseError", ["PI", "ab", ""]]
36
- },
37
-
38
- {"description":"PI state Target after EOF with some text",
39
- "input":"<?ab az",
40
- "output":["ParseError", ["PI", "ab", "az"]]
41
- },
42
-
43
- {"description":"End tag with attributes premature EOF",
44
- "input":"<a x=test /",
45
- "output":["ParseError", "ParseError", ["EmptyTag", "a", {"x":"test"}]]
46
- },
47
-
48
- {"description":"Comment EOF",
49
- "input":"<!",
50
- "output":["ParseError", ["Comment", ""]]
51
- },
52
-
53
- {"description":"Comment dash state EOF",
54
- "input":"<!-",
55
- "output":["ParseError", ["Comment", "-"]]
56
- },
57
-
58
- {"description":"Comment dash state EOF",
59
- "input":"<!--",
60
- "output":["ParseError", ["Comment", ""]]
61
- },
62
-
63
- {"description":"CDATA state EOF",
64
- "input":"<![CDATA[",
65
- "output":["ParseError"]
66
- },
67
-
68
- {"description":"CDATA bracket state EOF",
69
- "input":"<![CDATA[]",
70
- "output":["ParseError"]
71
- },
72
-
73
- {"description":"CDATA bracket state EOF with chars",
74
- "input":"<![CDATA[ax]",
75
- "output":[["Character", "ax"], "ParseError"]
76
- },
77
-
78
- {"description":"Tag name state EOF",
79
- "input":"<ab",
80
- "output":["ParseError", ["StartTag", "ab", {}]]
81
- },
82
-
83
- {"description":"Tag name state EOF",
84
- "input":"<ab ",
85
- "output":["ParseError", ["StartTag", "ab", {}]]
86
- },
87
-
88
- {"description":"Tag attribute name state EOF",
89
- "input":"<ab xa",
90
- "output":["ParseError", ["StartTag", "ab", {"xa":""}]]
91
- },
92
-
93
- {"description":"Tag attribute name after state EOF",
94
- "input":"<ab xa=",
95
- "output":["ParseError", ["StartTag", "ab", {"xa":""}]]
96
- },
97
-
98
- {"description":"Tag attribute name state EOF single-quoted",
99
- "input":"<ab foo='bar'",
100
- "output":["ParseError", ["StartTag", "ab", {"foo":"bar"}]]
101
- },
102
-
103
- {"description":"Tag attribute name state EOF unquoted",
104
- "input":"<ab foo=bar",
105
- "output":["ParseError", ["StartTag", "ab", {"foo":"bar"}]]
106
- },
107
-
108
- {"description":"Tag attribute name state EOF double-quoted",
109
- "input":"<ab foo=\"bar\"",
110
- "output":["ParseError", ["StartTag", "ab", {"foo":"bar"}]]
111
- }
112
-
113
- ]}