html-to-markdown 2.24.6 → 2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
- data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
- data/lib/html_to_markdown/version.rb +1 -1
- data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
- data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
- data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
- data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
- data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
- data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
- data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
- data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
- data/rust-vendor/memmap2/CHANGELOG.md +8 -0
- data/rust-vendor/memmap2/Cargo.lock +1 -1
- data/rust-vendor/memmap2/Cargo.toml +2 -1
- data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
- data/rust-vendor/memmap2/src/lib.rs +25 -1
- data/rust-vendor/memmap2/src/stub.rs +1 -4
- data/rust-vendor/memmap2/src/unix.rs +14 -1
- data/rust-vendor/png/.cargo-checksum.json +1 -1
- data/rust-vendor/png/.cargo_vcs_info.json +1 -1
- data/rust-vendor/png/CHANGES.md +44 -0
- data/rust-vendor/png/Cargo.lock +124 -171
- data/rust-vendor/png/Cargo.toml +1 -1
- data/rust-vendor/png/Cargo.toml.orig +1 -1
- data/rust-vendor/png/benches/expand_paletted.rs +5 -5
- data/rust-vendor/png/benches/unfilter.rs +3 -3
- data/rust-vendor/png/src/adam7.rs +17 -10
- data/rust-vendor/png/src/common.rs +8 -8
- data/rust-vendor/png/src/decoder/mod.rs +53 -20
- data/rust-vendor/png/src/decoder/stream.rs +263 -78
- data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
- data/rust-vendor/png/src/decoder/zlib.rs +130 -90
- data/rust-vendor/png/src/encoder.rs +4 -2
- data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
- data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
- data/rust-vendor/png/src/filter/paeth.rs +398 -0
- data/rust-vendor/png/src/filter/simd.rs +308 -0
- data/rust-vendor/png/src/lib.rs +1 -0
- data/rust-vendor/syn/.cargo-checksum.json +1 -1
- data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
- data/rust-vendor/syn/Cargo.lock +40 -41
- data/rust-vendor/syn/Cargo.toml +1 -1
- data/rust-vendor/syn/Cargo.toml.orig +1 -1
- data/rust-vendor/syn/src/item.rs +61 -40
- data/rust-vendor/syn/src/lib.rs +2 -1
- data/rust-vendor/syn/tests/test_item.rs +54 -0
- data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
- data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
- data/rust-vendor/unicode-ident/Cargo.lock +21 -21
- data/rust-vendor/unicode-ident/Cargo.toml +1 -1
- data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
- data/rust-vendor/unicode-ident/src/lib.rs +1 -1
- data/rust-vendor/unicode-ident/src/tables.rs +87 -97
- data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
- metadata +7 -177
- data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
- data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
- data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
- data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
- data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
- data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
- data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
- data/rust-vendor/markup5ever_rcdom/README.md +0 -7
- data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
- data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
- data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
- data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
- data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
- data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
- data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
- data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
- data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
- data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
- data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
- data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
- data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
- data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
- data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
- data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
- data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
- data/rust-vendor/xml5ever/Cargo.lock +0 -752
- data/rust-vendor/xml5ever/Cargo.toml +0 -69
- data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
- data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
- data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
- data/rust-vendor/xml5ever/README.md +0 -72
- data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
- data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
- data/rust-vendor/xml5ever/examples/README.md +0 -223
- data/rust-vendor/xml5ever/examples/example.xml +0 -3
- data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
- data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
- data/rust-vendor/xml5ever/src/driver.rs +0 -90
- data/rust-vendor/xml5ever/src/lib.rs +0 -47
- data/rust-vendor/xml5ever/src/macros.rs +0 -18
- data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
- data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
- data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
- data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
- data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
- data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
- data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
- data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
Tokenizer tests
|
|
2
|
-
===============
|
|
3
|
-
|
|
4
|
-
The test format is [JSON](http://www.json.org/). This has the advantage
|
|
5
|
-
that the syntax allows backward-compatible extensions to the tests and
|
|
6
|
-
the disadvantage that it is relatively verbose.
|
|
7
|
-
|
|
8
|
-
Basic Structure
|
|
9
|
-
---------------
|
|
10
|
-
|
|
11
|
-
{"tests": [
|
|
12
|
-
{"description": "Test description",
|
|
13
|
-
"input": "input_string",
|
|
14
|
-
"output": [expected_output_tokens],
|
|
15
|
-
"initialStates": [initial_states],
|
|
16
|
-
"lastStartTag": last_start_tag,
|
|
17
|
-
"ignoreErrorOrder": ignore_error_order
|
|
18
|
-
}
|
|
19
|
-
]}
|
|
20
|
-
|
|
21
|
-
Multiple tests per file are allowed simply by adding more objects to the
|
|
22
|
-
"tests" list.
|
|
23
|
-
|
|
24
|
-
`description`, `input` and `output` are always present. The other values
|
|
25
|
-
are optional.
|
|
26
|
-
|
|
27
|
-
### Test set-up
|
|
28
|
-
|
|
29
|
-
`test.input` is a string containing the characters to pass to the
|
|
30
|
-
tokenizer. Specifically, it represents the characters of the **input
|
|
31
|
-
stream**, and so implementations are expected to perform the processing
|
|
32
|
-
described in the spec's **Preprocessing the input stream** section
|
|
33
|
-
before feeding the result to the tokenizer.
|
|
34
|
-
|
|
35
|
-
If `test.doubleEscaped` is present and `true`, then `test.input` is not
|
|
36
|
-
quite as described above. Instead, it must first be subjected to another
|
|
37
|
-
round of unescaping (i.e., in addition to any unescaping involved in the
|
|
38
|
-
JSON import), and the result of *that* represents the characters of the
|
|
39
|
-
input stream. Currently, the only unescaping required by this option is
|
|
40
|
-
to convert each sequence of the form \\uHHHH (where H is a hex digit)
|
|
41
|
-
into the corresponding Unicode code point. (Note that this option also
|
|
42
|
-
affects the interpretation of `test.output`.)
|
|
43
|
-
|
|
44
|
-
`test.initialStates` is a list of strings, each being the name of a
|
|
45
|
-
tokenizer state. The test should be run once for each string, using it
|
|
46
|
-
to set the tokenizer's initial state for that run. If
|
|
47
|
-
`test.initialStates` is omitted, it defaults to `["data state"]`.
|
|
48
|
-
|
|
49
|
-
`test.lastStartTag` is a lowercase string that should be used as "the
|
|
50
|
-
tag name of the last start tag to have been emitted from this
|
|
51
|
-
tokenizer", referenced in the spec's definition of **appropriate end tag
|
|
52
|
-
token**. If it is omitted, it is treated as if "no start tag has been
|
|
53
|
-
emitted from this tokenizer".
|
|
54
|
-
|
|
55
|
-
### Test results
|
|
56
|
-
|
|
57
|
-
`test.output` is a list of tokens, ordered with the first produced by
|
|
58
|
-
the tokenizer the first (leftmost) in the list. The list must mach the
|
|
59
|
-
**complete** list of tokens that the tokenizer should produce. Valid
|
|
60
|
-
tokens are:
|
|
61
|
-
|
|
62
|
-
["DOCTYPE", name, public_id, system_id, correctness]
|
|
63
|
-
["StartTag", name, {attributes}*, true*]
|
|
64
|
-
["StartTag", name, {attributes}]
|
|
65
|
-
["EndTag", name]
|
|
66
|
-
["Comment", data]
|
|
67
|
-
["Character", data]
|
|
68
|
-
"ParseError"
|
|
69
|
-
|
|
70
|
-
`public_id` and `system_id` are either strings or `null`. `correctness`
|
|
71
|
-
is either `true` or `false`; `true` corresponds to the force-quirks flag
|
|
72
|
-
being false, and vice-versa.
|
|
73
|
-
|
|
74
|
-
When the self-closing flag is set, the `StartTag` array has `true` as
|
|
75
|
-
its fourth entry. When the flag is not set, the array has only three
|
|
76
|
-
entries for backwards compatibility.
|
|
77
|
-
|
|
78
|
-
All adjacent character tokens are coalesced into a single
|
|
79
|
-
`["Character", data]` token.
|
|
80
|
-
|
|
81
|
-
If `test.doubleEscaped` is present and `true`, then every string within
|
|
82
|
-
`test.output` must be further unescaped (as described above) before
|
|
83
|
-
comparing with the tokenizer's output.
|
|
84
|
-
|
|
85
|
-
`test.ignoreErrorOrder` is a boolean value indicating that the order of
|
|
86
|
-
`ParseError` tokens relative to other tokens in the output stream is
|
|
87
|
-
unimportant, and implementations should ignore such differences between
|
|
88
|
-
their output and `expected_output_tokens`. (This is used for errors
|
|
89
|
-
emitted by the input stream preprocessing stage, since it is useful to
|
|
90
|
-
test that code but it is undefined when the errors occur). If it is
|
|
91
|
-
omitted, it defaults to `false`.
|
|
92
|
-
|
|
@@ -1,274 +0,0 @@
|
|
|
1
|
-
{"tests": [
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
{"description":"Comment",
|
|
5
|
-
"input":"<!--comment-->",
|
|
6
|
-
"output":[["Comment", "comment"]]
|
|
7
|
-
},
|
|
8
|
-
|
|
9
|
-
{"description":"--Comment",
|
|
10
|
-
"input":"<!----comment -->",
|
|
11
|
-
"output":[["Comment", "--comment "]]
|
|
12
|
-
},
|
|
13
|
-
|
|
14
|
-
{"description":"--Comment-",
|
|
15
|
-
"input":"<!----comment--->",
|
|
16
|
-
"output":[["Comment", "--comment-"]]
|
|
17
|
-
},
|
|
18
|
-
|
|
19
|
-
{"description":"Error comment --!>",
|
|
20
|
-
"input":"<!----!>",
|
|
21
|
-
"output":["ParseError", ["Comment", ""]]
|
|
22
|
-
},
|
|
23
|
-
|
|
24
|
-
{"description":"EOF inside comment",
|
|
25
|
-
"input":"<!----!",
|
|
26
|
-
"output":["ParseError", ["Comment", ""]]
|
|
27
|
-
},
|
|
28
|
-
|
|
29
|
-
{"description":"EOF inside comment 2",
|
|
30
|
-
"input":"<!----",
|
|
31
|
-
"output":["ParseError", ["Comment", ""]]
|
|
32
|
-
},
|
|
33
|
-
|
|
34
|
-
{"description":"EOF inside comment 3",
|
|
35
|
-
"input":"<!--->",
|
|
36
|
-
"output":["ParseError", ["Comment", ""]]
|
|
37
|
-
},
|
|
38
|
-
|
|
39
|
-
{"description":"EOF inside comment 4",
|
|
40
|
-
"input":"<!-----",
|
|
41
|
-
"output":["ParseError", ["Comment", "-"]]
|
|
42
|
-
},
|
|
43
|
-
|
|
44
|
-
{"description":"EOF inside comment 5",
|
|
45
|
-
"input":"<!-->",
|
|
46
|
-
"output":["ParseError", ["Comment", ""]]
|
|
47
|
-
},
|
|
48
|
-
|
|
49
|
-
{"description":"EOF inside comment 6",
|
|
50
|
-
"input":"<!--",
|
|
51
|
-
"output":["ParseError", ["Comment", ""]]
|
|
52
|
-
},
|
|
53
|
-
|
|
54
|
-
{"description":"EOF inside comment 7",
|
|
55
|
-
"input":"<!--x",
|
|
56
|
-
"output":["ParseError", ["Comment", "x"]]
|
|
57
|
-
},
|
|
58
|
-
|
|
59
|
-
{"description":"EOF inside comment 8",
|
|
60
|
-
"input":"<!--<",
|
|
61
|
-
"output":["ParseError", ["Comment", "<"]]
|
|
62
|
-
},
|
|
63
|
-
|
|
64
|
-
{"description":"EOF inside comment 9",
|
|
65
|
-
"input":"<!--<!",
|
|
66
|
-
"output":["ParseError", ["Comment", "<!"]]
|
|
67
|
-
},
|
|
68
|
-
|
|
69
|
-
{"description":"EOF inside comment 10",
|
|
70
|
-
"input":"<!--<!-",
|
|
71
|
-
"output":["ParseError", ["Comment", "<!"]]
|
|
72
|
-
},
|
|
73
|
-
|
|
74
|
-
{"description":"EOF inside comment 11",
|
|
75
|
-
"input":"<!--<!--",
|
|
76
|
-
"output":["ParseError", ["Comment", "<!"]]
|
|
77
|
-
},
|
|
78
|
-
|
|
79
|
-
{"description":"EOF inside comment 12",
|
|
80
|
-
"input":"<!--<!--!",
|
|
81
|
-
"output":["ParseError", "ParseError", ["Comment", "<!"]]
|
|
82
|
-
},
|
|
83
|
-
|
|
84
|
-
{"description":"<!-- inside comment",
|
|
85
|
-
"input":"<!--<!--!>",
|
|
86
|
-
"output":["ParseError", "ParseError", ["Comment", "<!"]]
|
|
87
|
-
},
|
|
88
|
-
|
|
89
|
-
{"description":"<!-- inside comment 2",
|
|
90
|
-
"input":"<!--<!---",
|
|
91
|
-
"output":["ParseError", "ParseError", ["Comment", "<!-"]]
|
|
92
|
-
},
|
|
93
|
-
|
|
94
|
-
{"description":"<!-- inside comment 3",
|
|
95
|
-
"input":"<!--<!--->",
|
|
96
|
-
"output":["ParseError", ["Comment", "<!-"]]
|
|
97
|
-
},
|
|
98
|
-
|
|
99
|
-
{"description":"<!-- inside comment 4",
|
|
100
|
-
"input":"<!--<!--x",
|
|
101
|
-
"output":["ParseError", "ParseError", ["Comment", "<!--x"]]
|
|
102
|
-
},
|
|
103
|
-
|
|
104
|
-
{"description":"<!-- inside comment 5",
|
|
105
|
-
"input":"<!--<!--x-",
|
|
106
|
-
"output":["ParseError", "ParseError", ["Comment", "<!--x"]]
|
|
107
|
-
},
|
|
108
|
-
|
|
109
|
-
{"description":"<!-- inside comment 6",
|
|
110
|
-
"input":"<!--<!--x--",
|
|
111
|
-
"output":["ParseError", "ParseError", ["Comment", "<!--x"]]
|
|
112
|
-
},
|
|
113
|
-
|
|
114
|
-
{"description":"<!-- inside comment 7",
|
|
115
|
-
"input":"<!--<!--x-->",
|
|
116
|
-
"output":["ParseError", ["Comment", "<!--x"]]
|
|
117
|
-
},
|
|
118
|
-
|
|
119
|
-
{"description":"<!-- inside comment 8",
|
|
120
|
-
"input":"<!--<!-x",
|
|
121
|
-
"output":["ParseError", ["Comment", "<!-x"]]
|
|
122
|
-
},
|
|
123
|
-
|
|
124
|
-
{"description":"<!-- inside comment 9",
|
|
125
|
-
"input":"<!--<!-x-",
|
|
126
|
-
"output":["ParseError", ["Comment", "<!-x"]]
|
|
127
|
-
},
|
|
128
|
-
|
|
129
|
-
{"description":"<!-- inside comment 10",
|
|
130
|
-
"input":"<!--<!-x--",
|
|
131
|
-
"output":["ParseError", ["Comment", "<!-x"]]
|
|
132
|
-
},
|
|
133
|
-
|
|
134
|
-
{"description":"<!-- inside comment 11",
|
|
135
|
-
"input":"<!--<!x",
|
|
136
|
-
"output":["ParseError", ["Comment", "<!x"]]
|
|
137
|
-
},
|
|
138
|
-
|
|
139
|
-
{"description":"<!-- inside comment 12",
|
|
140
|
-
"input":"<!--<!x-",
|
|
141
|
-
"output":["ParseError", ["Comment", "<!x"]]
|
|
142
|
-
},
|
|
143
|
-
|
|
144
|
-
{"description":"<!-- inside comment 13",
|
|
145
|
-
"input":"<!--<!x--",
|
|
146
|
-
"output":["ParseError", ["Comment", "<!x"]]
|
|
147
|
-
},
|
|
148
|
-
|
|
149
|
-
{"description":"<!-- inside comment 14",
|
|
150
|
-
"input":"<!--<<!--x-->",
|
|
151
|
-
"output":["ParseError", ["Comment", "<<!--x"]]
|
|
152
|
-
},
|
|
153
|
-
|
|
154
|
-
{"description":"<!-- inside comment 15",
|
|
155
|
-
"input":"<!--<!<!--x-->",
|
|
156
|
-
"output":["ParseError", ["Comment", "<!<!--x"]]
|
|
157
|
-
},
|
|
158
|
-
|
|
159
|
-
{"description":"<!-- inside comment 16",
|
|
160
|
-
"input":"<!--<!-<!--x-->",
|
|
161
|
-
"output":["ParseError", ["Comment", "<!-<!--x"]]
|
|
162
|
-
},
|
|
163
|
-
|
|
164
|
-
{"description":"EOF inside comment 13",
|
|
165
|
-
"input":"<!----!->",
|
|
166
|
-
"output":["ParseError", ["Comment", "--!->"]]
|
|
167
|
-
},
|
|
168
|
-
|
|
169
|
-
{"description":"EOF inside comment 14",
|
|
170
|
-
"input":"<!----!x>",
|
|
171
|
-
"output":["ParseError", ["Comment", "--!x>"]]
|
|
172
|
-
},
|
|
173
|
-
|
|
174
|
-
{"description":"EOF inside comment 15",
|
|
175
|
-
"input":"<!-----x>",
|
|
176
|
-
"output":["ParseError", ["Comment", "---x>"]]
|
|
177
|
-
},
|
|
178
|
-
|
|
179
|
-
{"description":"Tiny Bogus Comment",
|
|
180
|
-
"input":"<!>",
|
|
181
|
-
"output":["ParseError", ["Comment", ""]]
|
|
182
|
-
},
|
|
183
|
-
|
|
184
|
-
{"description":"<head> in Comment",
|
|
185
|
-
"input":"<!--<head>-->",
|
|
186
|
-
"output":[["Comment", "<head>"]]
|
|
187
|
-
},
|
|
188
|
-
|
|
189
|
-
{"description":"Short Bogus Comment",
|
|
190
|
-
"input":"<!-->",
|
|
191
|
-
"output":["ParseError", ["Comment", ""]]
|
|
192
|
-
},
|
|
193
|
-
|
|
194
|
-
{"description":"Short Bogus Comment2",
|
|
195
|
-
"input":"<!-->test",
|
|
196
|
-
"output":["ParseError", ["Comment", ""], ["Character", "test"]]
|
|
197
|
-
},
|
|
198
|
-
|
|
199
|
-
{"description":"Comments 1",
|
|
200
|
-
"input":"<!----!-->",
|
|
201
|
-
"output":[["Comment", "--!"]]
|
|
202
|
-
},
|
|
203
|
-
|
|
204
|
-
{"description":"Comments 2",
|
|
205
|
-
"input":"<!----!x-->",
|
|
206
|
-
"output":[["Comment", "--!x"]]
|
|
207
|
-
},
|
|
208
|
-
|
|
209
|
-
{"description":"Comments 3",
|
|
210
|
-
"input":"<!----->",
|
|
211
|
-
"output":[["Comment", "-"]]
|
|
212
|
-
},
|
|
213
|
-
|
|
214
|
-
{"description":"Comments 4",
|
|
215
|
-
"input":"<!-----x-->",
|
|
216
|
-
"output":[["Comment", "---x"]]
|
|
217
|
-
},
|
|
218
|
-
|
|
219
|
-
{"description":"Comments 5",
|
|
220
|
-
"input":"<!--x-->",
|
|
221
|
-
"output":[["Comment", "x"]]
|
|
222
|
-
},
|
|
223
|
-
|
|
224
|
-
{"description":"Comments 6",
|
|
225
|
-
"input":"<!--<!-x-->",
|
|
226
|
-
"output":[["Comment", "<!-x"]]
|
|
227
|
-
},
|
|
228
|
-
|
|
229
|
-
{"description":"Comments 7",
|
|
230
|
-
"input":"<!--<!x-->",
|
|
231
|
-
"output":[["Comment", "<!x"]]
|
|
232
|
-
},
|
|
233
|
-
|
|
234
|
-
{"description":"Comments 8",
|
|
235
|
-
"input":"<!--<<!x-->",
|
|
236
|
-
"output":[["Comment", "<<!x"]]
|
|
237
|
-
},
|
|
238
|
-
|
|
239
|
-
{"description":"Comments 9",
|
|
240
|
-
"input":"<!--<<!-x-->",
|
|
241
|
-
"output":[["Comment", "<<!-x"]]
|
|
242
|
-
},
|
|
243
|
-
|
|
244
|
-
{"description":"Comments 10",
|
|
245
|
-
"input":"<!--<x-->",
|
|
246
|
-
"output":[["Comment", "<x"]]
|
|
247
|
-
},
|
|
248
|
-
|
|
249
|
-
{"description":"Comments 11",
|
|
250
|
-
"input":"<!--<>-->",
|
|
251
|
-
"output":[["Comment", "<>"]]
|
|
252
|
-
},
|
|
253
|
-
|
|
254
|
-
{"description":"Comments 12",
|
|
255
|
-
"input":"<!--<-->",
|
|
256
|
-
"output":[["Comment", "<"]]
|
|
257
|
-
},
|
|
258
|
-
|
|
259
|
-
{"description":"Comments 13",
|
|
260
|
-
"input":"<!--<--->",
|
|
261
|
-
"output":[["Comment", "<-"]]
|
|
262
|
-
},
|
|
263
|
-
|
|
264
|
-
{"description":"Comments 13",
|
|
265
|
-
"input":"<!--<!-->",
|
|
266
|
-
"output":[["Comment", "<!"]]
|
|
267
|
-
},
|
|
268
|
-
|
|
269
|
-
{"description":"Comments long",
|
|
270
|
-
"input":"<!---->",
|
|
271
|
-
"output":[["Comment", ""]]
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
]}
|