html-to-markdown 2.24.6 → 2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
- data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
- data/lib/html_to_markdown/version.rb +1 -1
- data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
- data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
- data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
- data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
- data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
- data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
- data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
- data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
- data/rust-vendor/memmap2/CHANGELOG.md +8 -0
- data/rust-vendor/memmap2/Cargo.lock +1 -1
- data/rust-vendor/memmap2/Cargo.toml +2 -1
- data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
- data/rust-vendor/memmap2/src/lib.rs +25 -1
- data/rust-vendor/memmap2/src/stub.rs +1 -4
- data/rust-vendor/memmap2/src/unix.rs +14 -1
- data/rust-vendor/png/.cargo-checksum.json +1 -1
- data/rust-vendor/png/.cargo_vcs_info.json +1 -1
- data/rust-vendor/png/CHANGES.md +44 -0
- data/rust-vendor/png/Cargo.lock +124 -171
- data/rust-vendor/png/Cargo.toml +1 -1
- data/rust-vendor/png/Cargo.toml.orig +1 -1
- data/rust-vendor/png/benches/expand_paletted.rs +5 -5
- data/rust-vendor/png/benches/unfilter.rs +3 -3
- data/rust-vendor/png/src/adam7.rs +17 -10
- data/rust-vendor/png/src/common.rs +8 -8
- data/rust-vendor/png/src/decoder/mod.rs +53 -20
- data/rust-vendor/png/src/decoder/stream.rs +263 -78
- data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
- data/rust-vendor/png/src/decoder/zlib.rs +130 -90
- data/rust-vendor/png/src/encoder.rs +4 -2
- data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
- data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
- data/rust-vendor/png/src/filter/paeth.rs +398 -0
- data/rust-vendor/png/src/filter/simd.rs +308 -0
- data/rust-vendor/png/src/lib.rs +1 -0
- data/rust-vendor/syn/.cargo-checksum.json +1 -1
- data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
- data/rust-vendor/syn/Cargo.lock +40 -41
- data/rust-vendor/syn/Cargo.toml +1 -1
- data/rust-vendor/syn/Cargo.toml.orig +1 -1
- data/rust-vendor/syn/src/item.rs +61 -40
- data/rust-vendor/syn/src/lib.rs +2 -1
- data/rust-vendor/syn/tests/test_item.rs +54 -0
- data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
- data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
- data/rust-vendor/unicode-ident/Cargo.lock +21 -21
- data/rust-vendor/unicode-ident/Cargo.toml +1 -1
- data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
- data/rust-vendor/unicode-ident/src/lib.rs +1 -1
- data/rust-vendor/unicode-ident/src/tables.rs +87 -97
- data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
- metadata +7 -177
- data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
- data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
- data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
- data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
- data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
- data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
- data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
- data/rust-vendor/markup5ever_rcdom/README.md +0 -7
- data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
- data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
- data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
- data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
- data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
- data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
- data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
- data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
- data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
- data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
- data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
- data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
- data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
- data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
- data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
- data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
- data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
- data/rust-vendor/xml5ever/Cargo.lock +0 -752
- data/rust-vendor/xml5ever/Cargo.toml +0 -69
- data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
- data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
- data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
- data/rust-vendor/xml5ever/README.md +0 -72
- data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
- data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
- data/rust-vendor/xml5ever/examples/README.md +0 -223
- data/rust-vendor/xml5ever/examples/example.xml +0 -3
- data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
- data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
- data/rust-vendor/xml5ever/src/driver.rs +0 -90
- data/rust-vendor/xml5ever/src/lib.rs +0 -47
- data/rust-vendor/xml5ever/src/macros.rs +0 -18
- data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
- data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
- data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
- data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
- data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
- data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
- data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
- data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
|
@@ -1,283 +0,0 @@
|
|
|
1
|
-
{"tests": [
|
|
2
|
-
|
|
3
|
-
{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
|
|
4
|
-
"input":"<h a='¬i;'>",
|
|
5
|
-
"output": [["StartTag", "h", {"a": "¬i;"}]]},
|
|
6
|
-
|
|
7
|
-
{"description": "Entity name followed by the equals sign in an attribute value.",
|
|
8
|
-
"input":"<h a='&lang='>",
|
|
9
|
-
"output": [["StartTag", "h", {"a": "&lang="}]]},
|
|
10
|
-
|
|
11
|
-
{"description": "CR as numeric entity",
|
|
12
|
-
"input":"
",
|
|
13
|
-
"output": ["ParseError", ["Character", "\r"]]},
|
|
14
|
-
|
|
15
|
-
{"description": "CR as hexadecimal numeric entity",
|
|
16
|
-
"input":"
",
|
|
17
|
-
"output": ["ParseError", ["Character", "\r"]]},
|
|
18
|
-
|
|
19
|
-
{"description": "Windows-1252 EURO SIGN numeric entity.",
|
|
20
|
-
"input":"€",
|
|
21
|
-
"output": ["ParseError", ["Character", "\u20AC"]]},
|
|
22
|
-
|
|
23
|
-
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
|
24
|
-
"input":"",
|
|
25
|
-
"output": ["ParseError", ["Character", "\u0081"]]},
|
|
26
|
-
|
|
27
|
-
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
|
|
28
|
-
"input":"‚",
|
|
29
|
-
"output": ["ParseError", ["Character", "\u201A"]]},
|
|
30
|
-
|
|
31
|
-
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
|
|
32
|
-
"input":"ƒ",
|
|
33
|
-
"output": ["ParseError", ["Character", "\u0192"]]},
|
|
34
|
-
|
|
35
|
-
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
|
|
36
|
-
"input":"„",
|
|
37
|
-
"output": ["ParseError", ["Character", "\u201E"]]},
|
|
38
|
-
|
|
39
|
-
{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
|
|
40
|
-
"input":"…",
|
|
41
|
-
"output": ["ParseError", ["Character", "\u2026"]]},
|
|
42
|
-
|
|
43
|
-
{"description": "Windows-1252 DAGGER numeric entity.",
|
|
44
|
-
"input":"†",
|
|
45
|
-
"output": ["ParseError", ["Character", "\u2020"]]},
|
|
46
|
-
|
|
47
|
-
{"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
|
|
48
|
-
"input":"‡",
|
|
49
|
-
"output": ["ParseError", ["Character", "\u2021"]]},
|
|
50
|
-
|
|
51
|
-
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
|
|
52
|
-
"input":"ˆ",
|
|
53
|
-
"output": ["ParseError", ["Character", "\u02C6"]]},
|
|
54
|
-
|
|
55
|
-
{"description": "Windows-1252 PER MILLE SIGN numeric entity.",
|
|
56
|
-
"input":"‰",
|
|
57
|
-
"output": ["ParseError", ["Character", "\u2030"]]},
|
|
58
|
-
|
|
59
|
-
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
|
|
60
|
-
"input":"Š",
|
|
61
|
-
"output": ["ParseError", ["Character", "\u0160"]]},
|
|
62
|
-
|
|
63
|
-
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
|
|
64
|
-
"input":"‹",
|
|
65
|
-
"output": ["ParseError", ["Character", "\u2039"]]},
|
|
66
|
-
|
|
67
|
-
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
|
|
68
|
-
"input":"Œ",
|
|
69
|
-
"output": ["ParseError", ["Character", "\u0152"]]},
|
|
70
|
-
|
|
71
|
-
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
|
72
|
-
"input":"",
|
|
73
|
-
"output": ["ParseError", ["Character", "\u008D"]]},
|
|
74
|
-
|
|
75
|
-
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
|
|
76
|
-
"input":"Ž",
|
|
77
|
-
"output": ["ParseError", ["Character", "\u017D"]]},
|
|
78
|
-
|
|
79
|
-
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
|
80
|
-
"input":"",
|
|
81
|
-
"output": ["ParseError", ["Character", "\u008F"]]},
|
|
82
|
-
|
|
83
|
-
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
|
84
|
-
"input":"",
|
|
85
|
-
"output": ["ParseError", ["Character", "\u0090"]]},
|
|
86
|
-
|
|
87
|
-
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
|
|
88
|
-
"input":"‘",
|
|
89
|
-
"output": ["ParseError", ["Character", "\u2018"]]},
|
|
90
|
-
|
|
91
|
-
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
|
|
92
|
-
"input":"’",
|
|
93
|
-
"output": ["ParseError", ["Character", "\u2019"]]},
|
|
94
|
-
|
|
95
|
-
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
|
|
96
|
-
"input":"“",
|
|
97
|
-
"output": ["ParseError", ["Character", "\u201C"]]},
|
|
98
|
-
|
|
99
|
-
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
|
|
100
|
-
"input":"”",
|
|
101
|
-
"output": ["ParseError", ["Character", "\u201D"]]},
|
|
102
|
-
|
|
103
|
-
{"description": "Windows-1252 BULLET numeric entity.",
|
|
104
|
-
"input":"•",
|
|
105
|
-
"output": ["ParseError", ["Character", "\u2022"]]},
|
|
106
|
-
|
|
107
|
-
{"description": "Windows-1252 EN DASH numeric entity.",
|
|
108
|
-
"input":"–",
|
|
109
|
-
"output": ["ParseError", ["Character", "\u2013"]]},
|
|
110
|
-
|
|
111
|
-
{"description": "Windows-1252 EM DASH numeric entity.",
|
|
112
|
-
"input":"—",
|
|
113
|
-
"output": ["ParseError", ["Character", "\u2014"]]},
|
|
114
|
-
|
|
115
|
-
{"description": "Windows-1252 SMALL TILDE numeric entity.",
|
|
116
|
-
"input":"˜",
|
|
117
|
-
"output": ["ParseError", ["Character", "\u02DC"]]},
|
|
118
|
-
|
|
119
|
-
{"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
|
|
120
|
-
"input":"™",
|
|
121
|
-
"output": ["ParseError", ["Character", "\u2122"]]},
|
|
122
|
-
|
|
123
|
-
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
|
|
124
|
-
"input":"š",
|
|
125
|
-
"output": ["ParseError", ["Character", "\u0161"]]},
|
|
126
|
-
|
|
127
|
-
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
|
|
128
|
-
"input":"›",
|
|
129
|
-
"output": ["ParseError", ["Character", "\u203A"]]},
|
|
130
|
-
|
|
131
|
-
{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
|
|
132
|
-
"input":"œ",
|
|
133
|
-
"output": ["ParseError", ["Character", "\u0153"]]},
|
|
134
|
-
|
|
135
|
-
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
|
136
|
-
"input":"",
|
|
137
|
-
"output": ["ParseError", ["Character", "\u009D"]]},
|
|
138
|
-
|
|
139
|
-
{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
|
|
140
|
-
"input":"€",
|
|
141
|
-
"output": ["ParseError", ["Character", "\u20AC"]]},
|
|
142
|
-
|
|
143
|
-
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
|
144
|
-
"input":"",
|
|
145
|
-
"output": ["ParseError", ["Character", "\u0081"]]},
|
|
146
|
-
|
|
147
|
-
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
|
|
148
|
-
"input":"‚",
|
|
149
|
-
"output": ["ParseError", ["Character", "\u201A"]]},
|
|
150
|
-
|
|
151
|
-
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
|
|
152
|
-
"input":"ƒ",
|
|
153
|
-
"output": ["ParseError", ["Character", "\u0192"]]},
|
|
154
|
-
|
|
155
|
-
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
|
|
156
|
-
"input":"„",
|
|
157
|
-
"output": ["ParseError", ["Character", "\u201E"]]},
|
|
158
|
-
|
|
159
|
-
{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
|
|
160
|
-
"input":"…",
|
|
161
|
-
"output": ["ParseError", ["Character", "\u2026"]]},
|
|
162
|
-
|
|
163
|
-
{"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
|
|
164
|
-
"input":"†",
|
|
165
|
-
"output": ["ParseError", ["Character", "\u2020"]]},
|
|
166
|
-
|
|
167
|
-
{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
|
|
168
|
-
"input":"‡",
|
|
169
|
-
"output": ["ParseError", ["Character", "\u2021"]]},
|
|
170
|
-
|
|
171
|
-
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
|
|
172
|
-
"input":"ˆ",
|
|
173
|
-
"output": ["ParseError", ["Character", "\u02C6"]]},
|
|
174
|
-
|
|
175
|
-
{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
|
|
176
|
-
"input":"‰",
|
|
177
|
-
"output": ["ParseError", ["Character", "\u2030"]]},
|
|
178
|
-
|
|
179
|
-
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
|
|
180
|
-
"input":"Š",
|
|
181
|
-
"output": ["ParseError", ["Character", "\u0160"]]},
|
|
182
|
-
|
|
183
|
-
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
|
|
184
|
-
"input":"‹",
|
|
185
|
-
"output": ["ParseError", ["Character", "\u2039"]]},
|
|
186
|
-
|
|
187
|
-
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
|
|
188
|
-
"input":"Œ",
|
|
189
|
-
"output": ["ParseError", ["Character", "\u0152"]]},
|
|
190
|
-
|
|
191
|
-
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
|
192
|
-
"input":"",
|
|
193
|
-
"output": ["ParseError", ["Character", "\u008D"]]},
|
|
194
|
-
|
|
195
|
-
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
|
|
196
|
-
"input":"Ž",
|
|
197
|
-
"output": ["ParseError", ["Character", "\u017D"]]},
|
|
198
|
-
|
|
199
|
-
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
|
200
|
-
"input":"",
|
|
201
|
-
"output": ["ParseError", ["Character", "\u008F"]]},
|
|
202
|
-
|
|
203
|
-
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
|
204
|
-
"input":"",
|
|
205
|
-
"output": ["ParseError", ["Character", "\u0090"]]},
|
|
206
|
-
|
|
207
|
-
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
|
|
208
|
-
"input":"‘",
|
|
209
|
-
"output": ["ParseError", ["Character", "\u2018"]]},
|
|
210
|
-
|
|
211
|
-
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
|
|
212
|
-
"input":"’",
|
|
213
|
-
"output": ["ParseError", ["Character", "\u2019"]]},
|
|
214
|
-
|
|
215
|
-
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
|
|
216
|
-
"input":"“",
|
|
217
|
-
"output": ["ParseError", ["Character", "\u201C"]]},
|
|
218
|
-
|
|
219
|
-
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
|
|
220
|
-
"input":"”",
|
|
221
|
-
"output": ["ParseError", ["Character", "\u201D"]]},
|
|
222
|
-
|
|
223
|
-
{"description": "Windows-1252 BULLET hexadecimal numeric entity.",
|
|
224
|
-
"input":"•",
|
|
225
|
-
"output": ["ParseError", ["Character", "\u2022"]]},
|
|
226
|
-
|
|
227
|
-
{"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
|
|
228
|
-
"input":"–",
|
|
229
|
-
"output": ["ParseError", ["Character", "\u2013"]]},
|
|
230
|
-
|
|
231
|
-
{"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
|
|
232
|
-
"input":"—",
|
|
233
|
-
"output": ["ParseError", ["Character", "\u2014"]]},
|
|
234
|
-
|
|
235
|
-
{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
|
|
236
|
-
"input":"˜",
|
|
237
|
-
"output": ["ParseError", ["Character", "\u02DC"]]},
|
|
238
|
-
|
|
239
|
-
{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
|
|
240
|
-
"input":"™",
|
|
241
|
-
"output": ["ParseError", ["Character", "\u2122"]]},
|
|
242
|
-
|
|
243
|
-
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
|
|
244
|
-
"input":"š",
|
|
245
|
-
"output": ["ParseError", ["Character", "\u0161"]]},
|
|
246
|
-
|
|
247
|
-
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
|
|
248
|
-
"input":"›",
|
|
249
|
-
"output": ["ParseError", ["Character", "\u203A"]]},
|
|
250
|
-
|
|
251
|
-
{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
|
|
252
|
-
"input":"œ",
|
|
253
|
-
"output": ["ParseError", ["Character", "\u0153"]]},
|
|
254
|
-
|
|
255
|
-
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
|
256
|
-
"input":"",
|
|
257
|
-
"output": ["ParseError", ["Character", "\u009D"]]},
|
|
258
|
-
|
|
259
|
-
{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
|
|
260
|
-
"input":"ž",
|
|
261
|
-
"output": ["ParseError", ["Character", "\u017E"]]},
|
|
262
|
-
|
|
263
|
-
{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
|
|
264
|
-
"input":"Ÿ",
|
|
265
|
-
"output": ["ParseError", ["Character", "\u0178"]]},
|
|
266
|
-
|
|
267
|
-
{"description": "Decimal numeric entity followed by hex character a.",
|
|
268
|
-
"input":"aa",
|
|
269
|
-
"output": ["ParseError", ["Character", "aa"]]},
|
|
270
|
-
|
|
271
|
-
{"description": "Decimal numeric entity followed by hex character A.",
|
|
272
|
-
"input":"aA",
|
|
273
|
-
"output": ["ParseError", ["Character", "aA"]]},
|
|
274
|
-
|
|
275
|
-
{"description": "Decimal numeric entity followed by hex character f.",
|
|
276
|
-
"input":"af",
|
|
277
|
-
"output": ["ParseError", ["Character", "af"]]},
|
|
278
|
-
|
|
279
|
-
{"description": "Decimal numeric entity followed by hex character A.",
|
|
280
|
-
"input":"aF",
|
|
281
|
-
"output": ["ParseError", ["Character", "aF"]]}
|
|
282
|
-
|
|
283
|
-
]}
|
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
{"tests": [
|
|
2
|
-
|
|
3
|
-
{"description":"Data state EOF",
|
|
4
|
-
"input":"",
|
|
5
|
-
"output":[]
|
|
6
|
-
},
|
|
7
|
-
|
|
8
|
-
{"description":"Tag state EOF",
|
|
9
|
-
"input":"<",
|
|
10
|
-
"output":["ParseError", ["Character", "<"]]
|
|
11
|
-
},
|
|
12
|
-
|
|
13
|
-
{"description":"End tag state premature EOF",
|
|
14
|
-
"input":"</",
|
|
15
|
-
"output":["ParseError", ["Character", "</"]]
|
|
16
|
-
},
|
|
17
|
-
|
|
18
|
-
{"description":"End tag name state premature EOF",
|
|
19
|
-
"input":"</a",
|
|
20
|
-
"output":["ParseError", ["EndTag", "a"]]
|
|
21
|
-
},
|
|
22
|
-
|
|
23
|
-
{"description":"Pi state EOF",
|
|
24
|
-
"input":"<?",
|
|
25
|
-
"output":["ParseError", ["Comment", ""]]
|
|
26
|
-
},
|
|
27
|
-
|
|
28
|
-
{"description":"PI state Target EOF",
|
|
29
|
-
"input":"<?ab",
|
|
30
|
-
"output":["ParseError", ["PI", "ab", ""]]
|
|
31
|
-
},
|
|
32
|
-
|
|
33
|
-
{"description":"PI state Target after EOF",
|
|
34
|
-
"input":"<?ab ",
|
|
35
|
-
"output":["ParseError", ["PI", "ab", ""]]
|
|
36
|
-
},
|
|
37
|
-
|
|
38
|
-
{"description":"PI state Target after EOF with some text",
|
|
39
|
-
"input":"<?ab az",
|
|
40
|
-
"output":["ParseError", ["PI", "ab", "az"]]
|
|
41
|
-
},
|
|
42
|
-
|
|
43
|
-
{"description":"End tag with attributes premature EOF",
|
|
44
|
-
"input":"<a x=test /",
|
|
45
|
-
"output":["ParseError", "ParseError", ["EmptyTag", "a", {"x":"test"}]]
|
|
46
|
-
},
|
|
47
|
-
|
|
48
|
-
{"description":"Comment EOF",
|
|
49
|
-
"input":"<!",
|
|
50
|
-
"output":["ParseError", ["Comment", ""]]
|
|
51
|
-
},
|
|
52
|
-
|
|
53
|
-
{"description":"Comment dash state EOF",
|
|
54
|
-
"input":"<!-",
|
|
55
|
-
"output":["ParseError", ["Comment", "-"]]
|
|
56
|
-
},
|
|
57
|
-
|
|
58
|
-
{"description":"Comment dash state EOF",
|
|
59
|
-
"input":"<!--",
|
|
60
|
-
"output":["ParseError", ["Comment", ""]]
|
|
61
|
-
},
|
|
62
|
-
|
|
63
|
-
{"description":"CDATA state EOF",
|
|
64
|
-
"input":"<![CDATA[",
|
|
65
|
-
"output":["ParseError"]
|
|
66
|
-
},
|
|
67
|
-
|
|
68
|
-
{"description":"CDATA bracket state EOF",
|
|
69
|
-
"input":"<![CDATA[]",
|
|
70
|
-
"output":["ParseError"]
|
|
71
|
-
},
|
|
72
|
-
|
|
73
|
-
{"description":"CDATA bracket state EOF with chars",
|
|
74
|
-
"input":"<![CDATA[ax]",
|
|
75
|
-
"output":[["Character", "ax"], "ParseError"]
|
|
76
|
-
},
|
|
77
|
-
|
|
78
|
-
{"description":"Tag name state EOF",
|
|
79
|
-
"input":"<ab",
|
|
80
|
-
"output":["ParseError", ["StartTag", "ab", {}]]
|
|
81
|
-
},
|
|
82
|
-
|
|
83
|
-
{"description":"Tag name state EOF",
|
|
84
|
-
"input":"<ab ",
|
|
85
|
-
"output":["ParseError", ["StartTag", "ab", {}]]
|
|
86
|
-
},
|
|
87
|
-
|
|
88
|
-
{"description":"Tag attribute name state EOF",
|
|
89
|
-
"input":"<ab xa",
|
|
90
|
-
"output":["ParseError", ["StartTag", "ab", {"xa":""}]]
|
|
91
|
-
},
|
|
92
|
-
|
|
93
|
-
{"description":"Tag attribute name after state EOF",
|
|
94
|
-
"input":"<ab xa=",
|
|
95
|
-
"output":["ParseError", ["StartTag", "ab", {"xa":""}]]
|
|
96
|
-
},
|
|
97
|
-
|
|
98
|
-
{"description":"Tag attribute name state EOF single-quoted",
|
|
99
|
-
"input":"<ab foo='bar'",
|
|
100
|
-
"output":["ParseError", ["StartTag", "ab", {"foo":"bar"}]]
|
|
101
|
-
},
|
|
102
|
-
|
|
103
|
-
{"description":"Tag attribute name state EOF unquoted",
|
|
104
|
-
"input":"<ab foo=bar",
|
|
105
|
-
"output":["ParseError", ["StartTag", "ab", {"foo":"bar"}]]
|
|
106
|
-
},
|
|
107
|
-
|
|
108
|
-
{"description":"Tag attribute name state EOF double-quoted",
|
|
109
|
-
"input":"<ab foo=\"bar\"",
|
|
110
|
-
"output":["ParseError", ["StartTag", "ab", {"foo":"bar"}]]
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
]}
|