html-to-markdown 2.24.6 → 2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
- data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
- data/lib/html_to_markdown/version.rb +1 -1
- data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
- data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
- data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
- data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
- data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
- data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
- data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
- data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
- data/rust-vendor/memmap2/CHANGELOG.md +8 -0
- data/rust-vendor/memmap2/Cargo.lock +1 -1
- data/rust-vendor/memmap2/Cargo.toml +2 -1
- data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
- data/rust-vendor/memmap2/src/lib.rs +25 -1
- data/rust-vendor/memmap2/src/stub.rs +1 -4
- data/rust-vendor/memmap2/src/unix.rs +14 -1
- data/rust-vendor/png/.cargo-checksum.json +1 -1
- data/rust-vendor/png/.cargo_vcs_info.json +1 -1
- data/rust-vendor/png/CHANGES.md +44 -0
- data/rust-vendor/png/Cargo.lock +124 -171
- data/rust-vendor/png/Cargo.toml +1 -1
- data/rust-vendor/png/Cargo.toml.orig +1 -1
- data/rust-vendor/png/benches/expand_paletted.rs +5 -5
- data/rust-vendor/png/benches/unfilter.rs +3 -3
- data/rust-vendor/png/src/adam7.rs +17 -10
- data/rust-vendor/png/src/common.rs +8 -8
- data/rust-vendor/png/src/decoder/mod.rs +53 -20
- data/rust-vendor/png/src/decoder/stream.rs +263 -78
- data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
- data/rust-vendor/png/src/decoder/zlib.rs +130 -90
- data/rust-vendor/png/src/encoder.rs +4 -2
- data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
- data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
- data/rust-vendor/png/src/filter/paeth.rs +398 -0
- data/rust-vendor/png/src/filter/simd.rs +308 -0
- data/rust-vendor/png/src/lib.rs +1 -0
- data/rust-vendor/syn/.cargo-checksum.json +1 -1
- data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
- data/rust-vendor/syn/Cargo.lock +40 -41
- data/rust-vendor/syn/Cargo.toml +1 -1
- data/rust-vendor/syn/Cargo.toml.orig +1 -1
- data/rust-vendor/syn/src/item.rs +61 -40
- data/rust-vendor/syn/src/lib.rs +2 -1
- data/rust-vendor/syn/tests/test_item.rs +54 -0
- data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
- data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
- data/rust-vendor/unicode-ident/Cargo.lock +21 -21
- data/rust-vendor/unicode-ident/Cargo.toml +1 -1
- data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
- data/rust-vendor/unicode-ident/src/lib.rs +1 -1
- data/rust-vendor/unicode-ident/src/tables.rs +87 -97
- data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
- metadata +7 -177
- data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
- data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
- data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
- data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
- data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
- data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
- data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
- data/rust-vendor/markup5ever_rcdom/README.md +0 -7
- data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
- data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
- data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
- data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
- data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
- data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
- data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
- data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
- data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
- data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
- data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
- data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
- data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
- data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
- data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
- data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
- data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
- data/rust-vendor/xml5ever/Cargo.lock +0 -752
- data/rust-vendor/xml5ever/Cargo.toml +0 -69
- data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
- data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
- data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
- data/rust-vendor/xml5ever/README.md +0 -72
- data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
- data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
- data/rust-vendor/xml5ever/examples/README.md +0 -223
- data/rust-vendor/xml5ever/examples/example.xml +0 -3
- data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
- data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
- data/rust-vendor/xml5ever/src/driver.rs +0 -90
- data/rust-vendor/xml5ever/src/lib.rs +0 -47
- data/rust-vendor/xml5ever/src/macros.rs +0 -18
- data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
- data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
- data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
- data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
- data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
- data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
- data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
- data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
{"tests":[
|
|
2
|
-
|
|
3
|
-
{"description": "quote_char=\"'\"",
|
|
4
|
-
"options": {"quote_char": "'"},
|
|
5
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test 'with' quote_char"}]]],
|
|
6
|
-
"expected": ["<span title='test 'with' quote_char'>"]
|
|
7
|
-
},
|
|
8
|
-
|
|
9
|
-
{"description": "quote_attr_values=true",
|
|
10
|
-
"options": {"quote_attr_values": true},
|
|
11
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "button", [{"namespace": null, "name": "disabled", "value" :"disabled"}]]],
|
|
12
|
-
"expected": ["<button disabled>"],
|
|
13
|
-
"xhtml": ["<button disabled=\"disabled\">"]
|
|
14
|
-
},
|
|
15
|
-
|
|
16
|
-
{"description": "quote_attr_values=true with irrelevant",
|
|
17
|
-
"options": {"quote_attr_values": true},
|
|
18
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
|
|
19
|
-
"expected": ["<div irrelevant>"],
|
|
20
|
-
"xhtml": ["<div irrelevant=\"irrelevant\">"]
|
|
21
|
-
},
|
|
22
|
-
|
|
23
|
-
{"description": "use_trailing_solidus=true with void element",
|
|
24
|
-
"options": {"use_trailing_solidus": true},
|
|
25
|
-
"input": [["EmptyTag", "img", {}]],
|
|
26
|
-
"expected": ["<img />"]
|
|
27
|
-
},
|
|
28
|
-
|
|
29
|
-
{"description": "use_trailing_solidus=true with non-void element",
|
|
30
|
-
"options": {"use_trailing_solidus": true},
|
|
31
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
|
|
32
|
-
"expected": ["<div>"]
|
|
33
|
-
},
|
|
34
|
-
|
|
35
|
-
{"description": "minimize_boolean_attributes=false",
|
|
36
|
-
"options": {"minimize_boolean_attributes": false},
|
|
37
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
|
|
38
|
-
"expected": ["<div irrelevant=irrelevant>"],
|
|
39
|
-
"xhtml": ["<div irrelevant=\"irrelevant\">"]
|
|
40
|
-
},
|
|
41
|
-
|
|
42
|
-
{"description": "minimize_boolean_attributes=false with empty value",
|
|
43
|
-
"options": {"minimize_boolean_attributes": false},
|
|
44
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :""}]]],
|
|
45
|
-
"expected": ["<div irrelevant=\"\">"]
|
|
46
|
-
},
|
|
47
|
-
|
|
48
|
-
{"description": "escape less than signs in attribute values",
|
|
49
|
-
"options": {"escape_lt_in_attrs": true},
|
|
50
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "a", [{"namespace": null, "name": "title", "value": "a<b>c&d"}]]],
|
|
51
|
-
"expected": ["<a title=\"a<b>c&d\">"]
|
|
52
|
-
},
|
|
53
|
-
|
|
54
|
-
{"description": "rcdata",
|
|
55
|
-
"options": {"escape_rcdata": true},
|
|
56
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
|
|
57
|
-
"expected": ["<script>a<b>c&d"]
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
]}
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
{"tests": [
|
|
2
|
-
|
|
3
|
-
{"description": "bare text with leading spaces",
|
|
4
|
-
"options": {"strip_whitespace": true},
|
|
5
|
-
"input": [["Characters", "\t\r\n\u000C foo"]],
|
|
6
|
-
"expected": [" foo"]
|
|
7
|
-
},
|
|
8
|
-
|
|
9
|
-
{"description": "bare text with trailing spaces",
|
|
10
|
-
"options": {"strip_whitespace": true},
|
|
11
|
-
"input": [["Characters", "foo \t\r\n\u000C"]],
|
|
12
|
-
"expected": ["foo "]
|
|
13
|
-
},
|
|
14
|
-
|
|
15
|
-
{"description": "bare text with inner spaces",
|
|
16
|
-
"options": {"strip_whitespace": true},
|
|
17
|
-
"input": [["Characters", "foo \t\r\n\u000C bar"]],
|
|
18
|
-
"expected": ["foo bar"]
|
|
19
|
-
},
|
|
20
|
-
|
|
21
|
-
{"description": "text within <pre>",
|
|
22
|
-
"options": {"strip_whitespace": true},
|
|
23
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
|
|
24
|
-
"expected": ["<pre>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</pre>"]
|
|
25
|
-
},
|
|
26
|
-
|
|
27
|
-
{"description": "text within <pre>, with inner markup",
|
|
28
|
-
"options": {"strip_whitespace": true},
|
|
29
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C fo"], ["StartTag", "http://www.w3.org/1999/xhtml", "span", {}], ["Characters", "o \t\r\n\u000C b"], ["EndTag", "http://www.w3.org/1999/xhtml", "span"], ["Characters", "ar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
|
|
30
|
-
"expected": ["<pre>\t\r\n\u000C fo<span>o \t\r\n\u000C b</span>ar \t\r\n\u000C</pre>"]
|
|
31
|
-
},
|
|
32
|
-
|
|
33
|
-
{"description": "text within <textarea>",
|
|
34
|
-
"options": {"strip_whitespace": true},
|
|
35
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "textarea", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "textarea"]],
|
|
36
|
-
"expected": ["<textarea>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</textarea>"]
|
|
37
|
-
},
|
|
38
|
-
|
|
39
|
-
{"description": "text within <script>",
|
|
40
|
-
"options": {"strip_whitespace": true},
|
|
41
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "script"]],
|
|
42
|
-
"expected": ["<script>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</script>"]
|
|
43
|
-
},
|
|
44
|
-
|
|
45
|
-
{"description": "text within <style>",
|
|
46
|
-
"options": {"strip_whitespace": true},
|
|
47
|
-
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "style"]],
|
|
48
|
-
"expected": ["<style>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</style>"]
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
]}
|
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
Tokenizer tests
|
|
2
|
-
===============
|
|
3
|
-
|
|
4
|
-
The test format is [JSON](http://www.json.org/). This has the advantage
|
|
5
|
-
that the syntax allows backward-compatible extensions to the tests and
|
|
6
|
-
the disadvantage that it is relatively verbose.
|
|
7
|
-
|
|
8
|
-
Basic Structure
|
|
9
|
-
---------------
|
|
10
|
-
|
|
11
|
-
{"tests": [
|
|
12
|
-
{"description": "Test description",
|
|
13
|
-
"input": "input_string",
|
|
14
|
-
"output": [expected_output_tokens],
|
|
15
|
-
"initialStates": [initial_states],
|
|
16
|
-
"lastStartTag": last_start_tag,
|
|
17
|
-
"errors": [parse_errors]
|
|
18
|
-
}
|
|
19
|
-
]}
|
|
20
|
-
|
|
21
|
-
Multiple tests per file are allowed simply by adding more objects to the
|
|
22
|
-
"tests" list.
|
|
23
|
-
|
|
24
|
-
Each parse error is an object that contains error `code` and one-based
|
|
25
|
-
error location indices: `line` and `col`.
|
|
26
|
-
|
|
27
|
-
`description`, `input` and `output` are always present. The other values
|
|
28
|
-
are optional.
|
|
29
|
-
|
|
30
|
-
### Test set-up
|
|
31
|
-
|
|
32
|
-
`test.input` is a string containing the characters to pass to the
|
|
33
|
-
tokenizer. Specifically, it represents the characters of the **input
|
|
34
|
-
stream**, and so implementations are expected to perform the processing
|
|
35
|
-
described in the spec's **Preprocessing the input stream** section
|
|
36
|
-
before feeding the result to the tokenizer.
|
|
37
|
-
|
|
38
|
-
If `test.doubleEscaped` is present and `true`, then `test.input` is not
|
|
39
|
-
quite as described above. Instead, it must first be subjected to another
|
|
40
|
-
round of unescaping (i.e., in addition to any unescaping involved in the
|
|
41
|
-
JSON import), and the result of *that* represents the characters of the
|
|
42
|
-
input stream. Currently, the only unescaping required by this option is
|
|
43
|
-
to convert each sequence of the form \\uHHHH (where H is a hex digit)
|
|
44
|
-
into the corresponding Unicode code point. (Note that this option also
|
|
45
|
-
affects the interpretation of `test.output`.)
|
|
46
|
-
|
|
47
|
-
`test.initialStates` is a list of strings, each being the name of a
|
|
48
|
-
tokenizer state which can be one of the following:
|
|
49
|
-
|
|
50
|
-
- `Data state`
|
|
51
|
-
- `PLAINTEXT state`
|
|
52
|
-
- `RCDATA state`
|
|
53
|
-
- `RAWTEXT state`
|
|
54
|
-
- `Script data state`
|
|
55
|
-
- `CDATA section state`
|
|
56
|
-
|
|
57
|
-
The test should be run once for each string, using it
|
|
58
|
-
to set the tokenizer's initial state for that run. If
|
|
59
|
-
`test.initialStates` is omitted, it defaults to `["Data state"]`.
|
|
60
|
-
|
|
61
|
-
`test.lastStartTag` is a lowercase string that should be used as "the
|
|
62
|
-
tag name of the last start tag to have been emitted from this
|
|
63
|
-
tokenizer", referenced in the spec's definition of **appropriate end tag
|
|
64
|
-
token**. If it is omitted, it is treated as if "no start tag has been
|
|
65
|
-
emitted from this tokenizer".
|
|
66
|
-
|
|
67
|
-
### Test results
|
|
68
|
-
|
|
69
|
-
`test.output` is a list of tokens, ordered with the first produced by
|
|
70
|
-
the tokenizer the first (leftmost) in the list. The list must mach the
|
|
71
|
-
**complete** list of tokens that the tokenizer should produce. Valid
|
|
72
|
-
tokens are:
|
|
73
|
-
|
|
74
|
-
["DOCTYPE", name, public_id, system_id, correctness]
|
|
75
|
-
["StartTag", name, {attributes}*, true*]
|
|
76
|
-
["StartTag", name, {attributes}]
|
|
77
|
-
["EndTag", name]
|
|
78
|
-
["Comment", data]
|
|
79
|
-
["Character", data]
|
|
80
|
-
|
|
81
|
-
`public_id` and `system_id` are either strings or `null`. `correctness`
|
|
82
|
-
is either `true` or `false`; `true` corresponds to the force-quirks flag
|
|
83
|
-
being false, and vice-versa.
|
|
84
|
-
|
|
85
|
-
When the self-closing flag is set, the `StartTag` array has `true` as
|
|
86
|
-
its fourth entry. When the flag is not set, the array has only three
|
|
87
|
-
entries for backwards compatibility.
|
|
88
|
-
|
|
89
|
-
All adjacent character tokens are coalesced into a single
|
|
90
|
-
`["Character", data]` token.
|
|
91
|
-
|
|
92
|
-
If `test.doubleEscaped` is present and `true`, then every string within
|
|
93
|
-
`test.output` must be further unescaped (as described above) before
|
|
94
|
-
comparing with the tokenizer's output.
|
|
95
|
-
|
|
96
|
-
xmlViolation tests
|
|
97
|
-
------------------
|
|
98
|
-
|
|
99
|
-
`tokenizer/xmlViolation.test` differs from the above in a couple of
|
|
100
|
-
ways:
|
|
101
|
-
|
|
102
|
-
- The name of the single member of the top-level JSON object is
|
|
103
|
-
"xmlViolationTests" instead of "tests".
|
|
104
|
-
- Each test's expected output assumes that implementation is applying
|
|
105
|
-
the tweaks given in the spec's "Coercing an HTML DOM into an
|
|
106
|
-
infoset" section.
|
|
107
|
-
|
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
{"tests": [
|
|
2
|
-
|
|
3
|
-
{"description":"PLAINTEXT content model flag",
|
|
4
|
-
"initialStates":["PLAINTEXT state"],
|
|
5
|
-
"lastStartTag":"plaintext",
|
|
6
|
-
"input":"<head>&body;",
|
|
7
|
-
"output":[["Character", "<head>&body;"]]},
|
|
8
|
-
|
|
9
|
-
{"description":"PLAINTEXT with seeming close tag",
|
|
10
|
-
"initialStates":["PLAINTEXT state"],
|
|
11
|
-
"lastStartTag":"plaintext",
|
|
12
|
-
"input":"</plaintext>&body;",
|
|
13
|
-
"output":[["Character", "</plaintext>&body;"]]},
|
|
14
|
-
|
|
15
|
-
{"description":"End tag closing RCDATA or RAWTEXT",
|
|
16
|
-
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
17
|
-
"lastStartTag":"xmp",
|
|
18
|
-
"input":"foo</xmp>",
|
|
19
|
-
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
|
|
20
|
-
|
|
21
|
-
{"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
|
|
22
|
-
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
23
|
-
"lastStartTag":"xmp",
|
|
24
|
-
"input":"foo</xMp>",
|
|
25
|
-
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
|
|
26
|
-
|
|
27
|
-
{"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
|
|
28
|
-
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
29
|
-
"lastStartTag":"xmp",
|
|
30
|
-
"input":"foo</xmp ",
|
|
31
|
-
"output":[["Character", "foo"]],
|
|
32
|
-
"errors":[
|
|
33
|
-
{ "code": "eof-in-tag", "line": 1, "col": 10 }
|
|
34
|
-
]},
|
|
35
|
-
|
|
36
|
-
{"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
|
|
37
|
-
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
38
|
-
"lastStartTag":"xmp",
|
|
39
|
-
"input":"foo</xmp",
|
|
40
|
-
"output":[["Character", "foo</xmp"]]},
|
|
41
|
-
|
|
42
|
-
{"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
|
|
43
|
-
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
44
|
-
"lastStartTag":"xmp",
|
|
45
|
-
"input":"foo</xmp/",
|
|
46
|
-
"output":[["Character", "foo"]],
|
|
47
|
-
"errors":[
|
|
48
|
-
{ "code": "eof-in-tag", "line": 1, "col": 10 }
|
|
49
|
-
]},
|
|
50
|
-
|
|
51
|
-
{"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
|
|
52
|
-
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
53
|
-
"lastStartTag":"xmp",
|
|
54
|
-
"input":"foo</xmp<",
|
|
55
|
-
"output":[["Character", "foo</xmp<"]]},
|
|
56
|
-
|
|
57
|
-
{"description":"End tag with incorrect name in RCDATA or RAWTEXT",
|
|
58
|
-
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
59
|
-
"lastStartTag":"xmp",
|
|
60
|
-
"input":"</foo>bar</xmp>",
|
|
61
|
-
"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
|
|
62
|
-
|
|
63
|
-
{"description":"Partial end tags leading straight into partial end tags",
|
|
64
|
-
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
65
|
-
"lastStartTag":"xmp",
|
|
66
|
-
"input":"</xmp</xmp</xmp>",
|
|
67
|
-
"output":[["Character", "</xmp</xmp"], ["EndTag", "xmp"]]},
|
|
68
|
-
|
|
69
|
-
{"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
|
|
70
|
-
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
71
|
-
"lastStartTag":"xmp",
|
|
72
|
-
"input":"</foo>bar</xmpaar>",
|
|
73
|
-
"output":[["Character", "</foo>bar</xmpaar>"]]},
|
|
74
|
-
|
|
75
|
-
{"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
|
|
76
|
-
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
77
|
-
"lastStartTag":"xmp",
|
|
78
|
-
"input":"foo</xmp></baz>",
|
|
79
|
-
"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
|
|
80
|
-
|
|
81
|
-
{"description":"RAWTEXT w/ something looking like an entity",
|
|
82
|
-
"initialStates":["RAWTEXT state"],
|
|
83
|
-
"lastStartTag":"xmp",
|
|
84
|
-
"input":"&foo;",
|
|
85
|
-
"output":[["Character", "&foo;"]]},
|
|
86
|
-
|
|
87
|
-
{"description":"RCDATA w/ an entity",
|
|
88
|
-
"initialStates":["RCDATA state"],
|
|
89
|
-
"lastStartTag":"textarea",
|
|
90
|
-
"input":"<",
|
|
91
|
-
"output":[["Character", "<"]]}
|
|
92
|
-
|
|
93
|
-
]}
|
|
@@ -1,335 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"tests": [
|
|
3
|
-
{
|
|
4
|
-
"description":"CR in bogus comment state",
|
|
5
|
-
"input":"<?\u000d",
|
|
6
|
-
"output":[["Comment", "?\u000a"]],
|
|
7
|
-
"errors":[
|
|
8
|
-
{ "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
|
|
9
|
-
]
|
|
10
|
-
},
|
|
11
|
-
{
|
|
12
|
-
"description":"CRLF in bogus comment state",
|
|
13
|
-
"input":"<?\u000d\u000a",
|
|
14
|
-
"output":[["Comment", "?\u000a"]],
|
|
15
|
-
"errors":[
|
|
16
|
-
{ "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
|
|
17
|
-
]
|
|
18
|
-
},
|
|
19
|
-
{
|
|
20
|
-
"description":"CRLFLF in bogus comment state",
|
|
21
|
-
"input":"<?\u000d\u000a\u000a",
|
|
22
|
-
"output":[["Comment", "?\u000a\u000a"]],
|
|
23
|
-
"errors":[
|
|
24
|
-
{ "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
|
|
25
|
-
]
|
|
26
|
-
},
|
|
27
|
-
{
|
|
28
|
-
"description":"Raw NUL replacement",
|
|
29
|
-
"doubleEscaped":true,
|
|
30
|
-
"initialStates":["RCDATA state", "RAWTEXT state", "PLAINTEXT state", "Script data state"],
|
|
31
|
-
"input":"\\u0000",
|
|
32
|
-
"output":[["Character", "\\uFFFD"]],
|
|
33
|
-
"errors":[
|
|
34
|
-
{ "code": "unexpected-null-character", "line": 1, "col": 1 }
|
|
35
|
-
]
|
|
36
|
-
},
|
|
37
|
-
{
|
|
38
|
-
"description":"NUL in CDATA section",
|
|
39
|
-
"doubleEscaped":true,
|
|
40
|
-
"initialStates":["CDATA section state"],
|
|
41
|
-
"input":"\\u0000]]>",
|
|
42
|
-
"output":[["Character", "\\u0000"]]
|
|
43
|
-
},
|
|
44
|
-
{
|
|
45
|
-
"description":"NUL in script HTML comment",
|
|
46
|
-
"doubleEscaped":true,
|
|
47
|
-
"initialStates":["Script data state"],
|
|
48
|
-
"input":"<!--test\\u0000--><!--test-\\u0000--><!--test--\\u0000-->",
|
|
49
|
-
"output":[["Character", "<!--test\\uFFFD--><!--test-\\uFFFD--><!--test--\\uFFFD-->"]],
|
|
50
|
-
"errors":[
|
|
51
|
-
{ "code": "unexpected-null-character", "line": 1, "col": 9 },
|
|
52
|
-
{ "code": "unexpected-null-character", "line": 1, "col": 22 },
|
|
53
|
-
{ "code": "unexpected-null-character", "line": 1, "col": 36 }
|
|
54
|
-
]
|
|
55
|
-
},
|
|
56
|
-
{
|
|
57
|
-
"description":"NUL in script HTML comment - double escaped",
|
|
58
|
-
"doubleEscaped":true,
|
|
59
|
-
"initialStates":["Script data state"],
|
|
60
|
-
"input":"<!--<script>\\u0000--><!--<script>-\\u0000--><!--<script>--\\u0000-->",
|
|
61
|
-
"output":[["Character", "<!--<script>\\uFFFD--><!--<script>-\\uFFFD--><!--<script>--\\uFFFD-->"]],
|
|
62
|
-
"errors":[
|
|
63
|
-
{ "code": "unexpected-null-character", "line": 1, "col": 13 },
|
|
64
|
-
{ "code": "unexpected-null-character", "line": 1, "col": 30 },
|
|
65
|
-
{ "code": "unexpected-null-character", "line": 1, "col": 48 }
|
|
66
|
-
]
|
|
67
|
-
},
|
|
68
|
-
{
|
|
69
|
-
"description":"EOF in script HTML comment",
|
|
70
|
-
"initialStates":["Script data state"],
|
|
71
|
-
"input":"<!--test",
|
|
72
|
-
"output":[["Character", "<!--test"]],
|
|
73
|
-
"errors":[
|
|
74
|
-
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 9 }
|
|
75
|
-
]
|
|
76
|
-
},
|
|
77
|
-
{
|
|
78
|
-
"description":"EOF in script HTML comment after dash",
|
|
79
|
-
"initialStates":["Script data state"],
|
|
80
|
-
"input":"<!--test-",
|
|
81
|
-
"output":[["Character", "<!--test-"]],
|
|
82
|
-
"errors":[
|
|
83
|
-
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 10 }
|
|
84
|
-
]
|
|
85
|
-
},
|
|
86
|
-
{
|
|
87
|
-
"description":"EOF in script HTML comment after dash dash",
|
|
88
|
-
"initialStates":["Script data state"],
|
|
89
|
-
"input":"<!--test--",
|
|
90
|
-
"output":[["Character", "<!--test--"]],
|
|
91
|
-
"errors":[
|
|
92
|
-
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 11 }
|
|
93
|
-
]
|
|
94
|
-
},
|
|
95
|
-
{
|
|
96
|
-
"description":"EOF in script HTML comment double escaped after dash",
|
|
97
|
-
"initialStates":["Script data state"],
|
|
98
|
-
"input":"<!--<script>-",
|
|
99
|
-
"output":[["Character", "<!--<script>-"]],
|
|
100
|
-
"errors":[
|
|
101
|
-
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 14 }
|
|
102
|
-
]
|
|
103
|
-
},
|
|
104
|
-
{
|
|
105
|
-
"description":"EOF in script HTML comment double escaped after dash dash",
|
|
106
|
-
"initialStates":["Script data state"],
|
|
107
|
-
"input":"<!--<script>--",
|
|
108
|
-
"output":[["Character", "<!--<script>--"]],
|
|
109
|
-
"errors":[
|
|
110
|
-
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 15 }
|
|
111
|
-
]
|
|
112
|
-
},
|
|
113
|
-
{
|
|
114
|
-
"description":"EOF in script HTML comment - double escaped",
|
|
115
|
-
"initialStates":["Script data state"],
|
|
116
|
-
"input":"<!--<script>",
|
|
117
|
-
"output":[["Character", "<!--<script>"]],
|
|
118
|
-
"errors":[
|
|
119
|
-
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 13 }
|
|
120
|
-
]
|
|
121
|
-
},
|
|
122
|
-
{
|
|
123
|
-
"description":"Dash in script HTML comment",
|
|
124
|
-
"initialStates":["Script data state"],
|
|
125
|
-
"input":"<!-- - -->",
|
|
126
|
-
"output":[["Character", "<!-- - -->"]]
|
|
127
|
-
},
|
|
128
|
-
{
|
|
129
|
-
"description":"Dash less-than in script HTML comment",
|
|
130
|
-
"initialStates":["Script data state"],
|
|
131
|
-
"input":"<!-- -< -->",
|
|
132
|
-
"output":[["Character", "<!-- -< -->"]]
|
|
133
|
-
},
|
|
134
|
-
{
|
|
135
|
-
"description":"Dash at end of script HTML comment",
|
|
136
|
-
"initialStates":["Script data state"],
|
|
137
|
-
"input":"<!--test--->",
|
|
138
|
-
"output":[["Character", "<!--test--->"]]
|
|
139
|
-
},
|
|
140
|
-
{
|
|
141
|
-
"description":"</script> in script HTML comment",
|
|
142
|
-
"initialStates":["Script data state"],
|
|
143
|
-
"lastStartTag":"script",
|
|
144
|
-
"input":"<!-- </script> --></script>",
|
|
145
|
-
"output":[["Character", "<!-- "], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
|
|
146
|
-
},
|
|
147
|
-
{
|
|
148
|
-
"description":"</script> in script HTML comment - double escaped",
|
|
149
|
-
"initialStates":["Script data state"],
|
|
150
|
-
"lastStartTag":"script",
|
|
151
|
-
"input":"<!-- <script></script> --></script>",
|
|
152
|
-
"output":[["Character", "<!-- <script></script> -->"], ["EndTag", "script"]]
|
|
153
|
-
},
|
|
154
|
-
{
|
|
155
|
-
"description":"</script> in script HTML comment - double escaped with nested <script>",
|
|
156
|
-
"initialStates":["Script data state"],
|
|
157
|
-
"lastStartTag":"script",
|
|
158
|
-
"input":"<!-- <script><script></script></script> --></script>",
|
|
159
|
-
"output":[["Character", "<!-- <script><script></script>"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
|
|
160
|
-
},
|
|
161
|
-
{
|
|
162
|
-
"description":"</script> in script HTML comment - double escaped with abrupt end",
|
|
163
|
-
"initialStates":["Script data state"],
|
|
164
|
-
"lastStartTag":"script",
|
|
165
|
-
"input":"<!-- <script>--></script> --></script>",
|
|
166
|
-
"output":[["Character", "<!-- <script>-->"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
|
|
167
|
-
},
|
|
168
|
-
{
|
|
169
|
-
"description":"Incomplete start tag in script HTML comment double escaped",
|
|
170
|
-
"initialStates":["Script data state"],
|
|
171
|
-
"lastStartTag":"script",
|
|
172
|
-
"input":"<!--<scrip></script>-->",
|
|
173
|
-
"output":[["Character", "<!--<scrip>"], ["EndTag", "script"], ["Character", "-->"]]
|
|
174
|
-
},
|
|
175
|
-
{
|
|
176
|
-
"description":"Unclosed start tag in script HTML comment double escaped",
|
|
177
|
-
"initialStates":["Script data state"],
|
|
178
|
-
"lastStartTag":"script",
|
|
179
|
-
"input":"<!--<script</script>-->",
|
|
180
|
-
"output":[["Character", "<!--<script"], ["EndTag", "script"], ["Character", "-->"]]
|
|
181
|
-
},
|
|
182
|
-
{
|
|
183
|
-
"description":"Incomplete end tag in script HTML comment double escaped",
|
|
184
|
-
"initialStates":["Script data state"],
|
|
185
|
-
"lastStartTag":"script",
|
|
186
|
-
"input":"<!--<script></scrip>-->",
|
|
187
|
-
"output":[["Character", "<!--<script></scrip>-->"]]
|
|
188
|
-
},
|
|
189
|
-
{
|
|
190
|
-
"description":"Unclosed end tag in script HTML comment double escaped",
|
|
191
|
-
"initialStates":["Script data state"],
|
|
192
|
-
"lastStartTag":"script",
|
|
193
|
-
"input":"<!--<script></script-->",
|
|
194
|
-
"output":[["Character", "<!--<script></script-->"]]
|
|
195
|
-
},
|
|
196
|
-
{
|
|
197
|
-
"description":"leading U+FEFF must pass through",
|
|
198
|
-
"initialStates":["Data state", "RCDATA state", "RAWTEXT state", "Script data state"],
|
|
199
|
-
"doubleEscaped":true,
|
|
200
|
-
"input":"\\uFEFFfoo\\uFEFFbar",
|
|
201
|
-
"output":[["Character", "\\uFEFFfoo\\uFEFFbar"]]
|
|
202
|
-
},
|
|
203
|
-
{
|
|
204
|
-
"description":"Non BMP-charref in RCDATA",
|
|
205
|
-
"initialStates":["RCDATA state"],
|
|
206
|
-
"input":"≂̸",
|
|
207
|
-
"output":[["Character", "\u2242\u0338"]]
|
|
208
|
-
},
|
|
209
|
-
{
|
|
210
|
-
"description":"Bad charref in RCDATA",
|
|
211
|
-
"initialStates":["RCDATA state"],
|
|
212
|
-
"input":"&NotEqualTild;",
|
|
213
|
-
"output":[["Character", "&NotEqualTild;"]],
|
|
214
|
-
"errors":[
|
|
215
|
-
{ "code": "unknown-named-character-reference", "line": 1, "col": 14 }
|
|
216
|
-
]
|
|
217
|
-
},
|
|
218
|
-
{
|
|
219
|
-
"description":"lowercase endtags",
|
|
220
|
-
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
|
|
221
|
-
"lastStartTag":"xmp",
|
|
222
|
-
"input":"</XMP>",
|
|
223
|
-
"output":[["EndTag","xmp"]]
|
|
224
|
-
},
|
|
225
|
-
{
|
|
226
|
-
"description":"bad endtag (space before name)",
|
|
227
|
-
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
|
|
228
|
-
"lastStartTag":"xmp",
|
|
229
|
-
"input":"</ XMP>",
|
|
230
|
-
"output":[["Character","</ XMP>"]]
|
|
231
|
-
},
|
|
232
|
-
{
|
|
233
|
-
"description":"bad endtag (not matching last start tag)",
|
|
234
|
-
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
|
|
235
|
-
"lastStartTag":"xmp",
|
|
236
|
-
"input":"</xm>",
|
|
237
|
-
"output":[["Character","</xm>"]]
|
|
238
|
-
},
|
|
239
|
-
{
|
|
240
|
-
"description":"bad endtag (without close bracket)",
|
|
241
|
-
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
|
|
242
|
-
"lastStartTag":"xmp",
|
|
243
|
-
"input":"</xm ",
|
|
244
|
-
"output":[["Character","</xm "]]
|
|
245
|
-
},
|
|
246
|
-
{
|
|
247
|
-
"description":"bad endtag (trailing solidus)",
|
|
248
|
-
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
|
|
249
|
-
"lastStartTag":"xmp",
|
|
250
|
-
"input":"</xm/",
|
|
251
|
-
"output":[["Character","</xm/"]]
|
|
252
|
-
},
|
|
253
|
-
{
|
|
254
|
-
"description":"Non BMP-charref in attribute",
|
|
255
|
-
"input":"<p id=\"≂̸\">",
|
|
256
|
-
"output":[["StartTag", "p", {"id":"\u2242\u0338"}]]
|
|
257
|
-
},
|
|
258
|
-
{
|
|
259
|
-
"description":"--!NUL in comment ",
|
|
260
|
-
"doubleEscaped":true,
|
|
261
|
-
"input":"<!----!\\u0000-->",
|
|
262
|
-
"output":[["Comment", "--!\\uFFFD"]],
|
|
263
|
-
"errors":[
|
|
264
|
-
{ "code": "unexpected-null-character", "line": 1, "col": 8 }
|
|
265
|
-
]
|
|
266
|
-
},
|
|
267
|
-
{
|
|
268
|
-
"description":"space EOF after doctype ",
|
|
269
|
-
"input":"<!DOCTYPE html ",
|
|
270
|
-
"output":[["DOCTYPE", "html", null, null , false]],
|
|
271
|
-
"errors":[
|
|
272
|
-
{ "code": "eof-in-doctype", "line": 1, "col": 16 }
|
|
273
|
-
]
|
|
274
|
-
},
|
|
275
|
-
{
|
|
276
|
-
"description":"CDATA in HTML content",
|
|
277
|
-
"input":"<![CDATA[foo]]>",
|
|
278
|
-
"output":[["Comment", "[CDATA[foo]]"]],
|
|
279
|
-
"errors":[
|
|
280
|
-
{ "code": "cdata-in-html-content", "line": 1, "col": 9 }
|
|
281
|
-
]
|
|
282
|
-
},
|
|
283
|
-
{
|
|
284
|
-
"description":"CDATA content",
|
|
285
|
-
"input":"foo ]]>",
|
|
286
|
-
"initialStates":["CDATA section state"],
|
|
287
|
-
"output":[["Character", "foo "]]
|
|
288
|
-
},
|
|
289
|
-
{
|
|
290
|
-
"description":"CDATA followed by HTML content",
|
|
291
|
-
"input":"foo ]]> ",
|
|
292
|
-
"initialStates":["CDATA section state"],
|
|
293
|
-
"output":[["Character", "foo  "]]
|
|
294
|
-
},
|
|
295
|
-
{
|
|
296
|
-
"description":"CDATA with extra bracket",
|
|
297
|
-
"input":"foo]]]>",
|
|
298
|
-
"initialStates":["CDATA section state"],
|
|
299
|
-
"output":[["Character", "foo]"]]
|
|
300
|
-
},
|
|
301
|
-
{
|
|
302
|
-
"description":"CDATA without end marker",
|
|
303
|
-
"input":"foo",
|
|
304
|
-
"initialStates":["CDATA section state"],
|
|
305
|
-
"output":[["Character", "foo"]],
|
|
306
|
-
"errors":[
|
|
307
|
-
{ "code": "eof-in-cdata", "line": 1, "col": 4 }
|
|
308
|
-
]
|
|
309
|
-
},
|
|
310
|
-
{
|
|
311
|
-
"description":"CDATA with single bracket ending",
|
|
312
|
-
"input":"foo]",
|
|
313
|
-
"initialStates":["CDATA section state"],
|
|
314
|
-
"output":[["Character", "foo]"]],
|
|
315
|
-
"errors":[
|
|
316
|
-
{ "code": "eof-in-cdata", "line": 1, "col": 5 }
|
|
317
|
-
]
|
|
318
|
-
},
|
|
319
|
-
{
|
|
320
|
-
"description":"CDATA with two brackets ending",
|
|
321
|
-
"input":"foo]]",
|
|
322
|
-
"initialStates":["CDATA section state"],
|
|
323
|
-
"output":[["Character", "foo]]"]],
|
|
324
|
-
"errors":[
|
|
325
|
-
{ "code": "eof-in-cdata", "line": 1, "col": 6 }
|
|
326
|
-
]
|
|
327
|
-
},
|
|
328
|
-
{
|
|
329
|
-
"description": "HTML tag in script data",
|
|
330
|
-
"input": "<b>hello world</b>",
|
|
331
|
-
"initialStates": ["Script data state"],
|
|
332
|
-
"output": [["Character", "<b>hello world</b>"]]
|
|
333
|
-
}
|
|
334
|
-
]
|
|
335
|
-
}
|