html-to-markdown 2.24.6 → 2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
- data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
- data/lib/html_to_markdown/version.rb +1 -1
- data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
- data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
- data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
- data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
- data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
- data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
- data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
- data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
- data/rust-vendor/memmap2/CHANGELOG.md +8 -0
- data/rust-vendor/memmap2/Cargo.lock +1 -1
- data/rust-vendor/memmap2/Cargo.toml +2 -1
- data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
- data/rust-vendor/memmap2/src/lib.rs +25 -1
- data/rust-vendor/memmap2/src/stub.rs +1 -4
- data/rust-vendor/memmap2/src/unix.rs +14 -1
- data/rust-vendor/png/.cargo-checksum.json +1 -1
- data/rust-vendor/png/.cargo_vcs_info.json +1 -1
- data/rust-vendor/png/CHANGES.md +44 -0
- data/rust-vendor/png/Cargo.lock +124 -171
- data/rust-vendor/png/Cargo.toml +1 -1
- data/rust-vendor/png/Cargo.toml.orig +1 -1
- data/rust-vendor/png/benches/expand_paletted.rs +5 -5
- data/rust-vendor/png/benches/unfilter.rs +3 -3
- data/rust-vendor/png/src/adam7.rs +17 -10
- data/rust-vendor/png/src/common.rs +8 -8
- data/rust-vendor/png/src/decoder/mod.rs +53 -20
- data/rust-vendor/png/src/decoder/stream.rs +263 -78
- data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
- data/rust-vendor/png/src/decoder/zlib.rs +130 -90
- data/rust-vendor/png/src/encoder.rs +4 -2
- data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
- data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
- data/rust-vendor/png/src/filter/paeth.rs +398 -0
- data/rust-vendor/png/src/filter/simd.rs +308 -0
- data/rust-vendor/png/src/lib.rs +1 -0
- data/rust-vendor/syn/.cargo-checksum.json +1 -1
- data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
- data/rust-vendor/syn/Cargo.lock +40 -41
- data/rust-vendor/syn/Cargo.toml +1 -1
- data/rust-vendor/syn/Cargo.toml.orig +1 -1
- data/rust-vendor/syn/src/item.rs +61 -40
- data/rust-vendor/syn/src/lib.rs +2 -1
- data/rust-vendor/syn/tests/test_item.rs +54 -0
- data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
- data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
- data/rust-vendor/unicode-ident/Cargo.lock +21 -21
- data/rust-vendor/unicode-ident/Cargo.toml +1 -1
- data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
- data/rust-vendor/unicode-ident/src/lib.rs +1 -1
- data/rust-vendor/unicode-ident/src/tables.rs +87 -97
- data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
- metadata +7 -177
- data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
- data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
- data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
- data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
- data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
- data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
- data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
- data/rust-vendor/markup5ever_rcdom/README.md +0 -7
- data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
- data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
- data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
- data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
- data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
- data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
- data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
- data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
- data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
- data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
- data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
- data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
- data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
- data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
- data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
- data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
- data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
- data/rust-vendor/xml5ever/Cargo.lock +0 -752
- data/rust-vendor/xml5ever/Cargo.toml +0 -69
- data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
- data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
- data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
- data/rust-vendor/xml5ever/README.md +0 -72
- data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
- data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
- data/rust-vendor/xml5ever/examples/README.md +0 -223
- data/rust-vendor/xml5ever/examples/example.xml +0 -3
- data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
- data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
- data/rust-vendor/xml5ever/src/driver.rs +0 -90
- data/rust-vendor/xml5ever/src/lib.rs +0 -47
- data/rust-vendor/xml5ever/src/macros.rs +0 -18
- data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
- data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
- data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
- data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
- data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
- data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
- data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
- data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
|
@@ -1,374 +0,0 @@
|
|
|
1
|
-
// Copyright 2014-2017 The html5ever Project Developers. See the
|
|
2
|
-
// COPYRIGHT file at the top-level directory of this distribution.
|
|
3
|
-
//
|
|
4
|
-
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
5
|
-
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
6
|
-
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
7
|
-
// option. This file may not be copied, modified, or distributed
|
|
8
|
-
// except according to those terms.
|
|
9
|
-
|
|
10
|
-
use serde_json::{Map, Value};
|
|
11
|
-
use std::borrow::Cow::Borrowed;
|
|
12
|
-
use std::cell::RefCell;
|
|
13
|
-
use std::ffi::OsStr;
|
|
14
|
-
use std::io::Read;
|
|
15
|
-
use std::path::Path;
|
|
16
|
-
use xml5ever::tokenizer::ProcessResult;
|
|
17
|
-
|
|
18
|
-
use util::find_tests::foreach_xml5lib_test;
|
|
19
|
-
use util::runner::{run_all, Test};
|
|
20
|
-
|
|
21
|
-
use markup5ever::buffer_queue::BufferQueue;
|
|
22
|
-
use xml5ever::tendril::{SliceExt, StrTendril};
|
|
23
|
-
use xml5ever::tokenizer::{
|
|
24
|
-
Doctype, EmptyTag, EndTag, Pi, ShortTag, StartTag, Tag, Token, TokenSink, XmlTokenizer,
|
|
25
|
-
XmlTokenizerOpts,
|
|
26
|
-
};
|
|
27
|
-
use xml5ever::{ns, Attribute, LocalName, QualName};
|
|
28
|
-
|
|
29
|
-
mod util {
|
|
30
|
-
pub mod find_tests;
|
|
31
|
-
pub mod runner;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
// Return all ways of splitting the string into at most n
|
|
35
|
-
// possibly-empty pieces.
|
|
36
|
-
fn splits(s: &str, n: usize) -> Vec<Vec<StrTendril>> {
|
|
37
|
-
if n == 1 {
|
|
38
|
-
return vec![vec![s.to_tendril()]];
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
let mut points: Vec<usize> = s.char_indices().map(|(n, _)| n).collect();
|
|
42
|
-
points.push(s.len());
|
|
43
|
-
|
|
44
|
-
// do this with iterators?
|
|
45
|
-
let mut out = vec![];
|
|
46
|
-
for p in points.into_iter() {
|
|
47
|
-
let y = &s[p..];
|
|
48
|
-
for mut x in splits(&s[..p], n - 1).into_iter() {
|
|
49
|
-
x.push(y.to_tendril());
|
|
50
|
-
out.push(x);
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
out.extend(splits(s, n - 1));
|
|
55
|
-
out
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
struct TokenLogger {
|
|
59
|
-
tokens: RefCell<Vec<Token>>,
|
|
60
|
-
current_str: RefCell<StrTendril>,
|
|
61
|
-
exact_errors: bool,
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
impl TokenLogger {
|
|
65
|
-
fn new(exact_errors: bool) -> TokenLogger {
|
|
66
|
-
TokenLogger {
|
|
67
|
-
tokens: RefCell::new(vec![]),
|
|
68
|
-
current_str: RefCell::new(StrTendril::new()),
|
|
69
|
-
exact_errors,
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
// Push anything other than character tokens
|
|
74
|
-
fn push(&self, token: Token) {
|
|
75
|
-
self.finish_str();
|
|
76
|
-
self.tokens.borrow_mut().push(token);
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
fn finish_str(&self) {
|
|
80
|
-
if !self.current_str.borrow().is_empty() {
|
|
81
|
-
let s = self.current_str.take();
|
|
82
|
-
self.tokens.borrow_mut().push(Token::Characters(s));
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
fn get_tokens(self) -> Vec<Token> {
|
|
87
|
-
self.finish_str();
|
|
88
|
-
self.tokens.take()
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
impl TokenSink for TokenLogger {
|
|
93
|
-
type Handle = ();
|
|
94
|
-
|
|
95
|
-
fn process_token(&self, token: Token) -> ProcessResult<()> {
|
|
96
|
-
match token {
|
|
97
|
-
Token::Characters(characters) => {
|
|
98
|
-
self.current_str.borrow_mut().push_slice(&characters);
|
|
99
|
-
},
|
|
100
|
-
|
|
101
|
-
Token::NullCharacter => {
|
|
102
|
-
self.current_str.borrow_mut().push_char('\0');
|
|
103
|
-
},
|
|
104
|
-
|
|
105
|
-
Token::ParseError(_) => {
|
|
106
|
-
if self.exact_errors {
|
|
107
|
-
self.push(Token::ParseError(Borrowed("")));
|
|
108
|
-
}
|
|
109
|
-
},
|
|
110
|
-
Token::Tag(mut t) => {
|
|
111
|
-
// The spec seems to indicate that one can emit
|
|
112
|
-
// erroneous end tags with attrs, but the test
|
|
113
|
-
// cases don't contain them.
|
|
114
|
-
match t.kind {
|
|
115
|
-
EndTag => {
|
|
116
|
-
t.attrs = vec![];
|
|
117
|
-
},
|
|
118
|
-
_ => t.attrs.sort_by(|a1, a2| a1.name.cmp(&a2.name)),
|
|
119
|
-
}
|
|
120
|
-
self.push(Token::Tag(t));
|
|
121
|
-
},
|
|
122
|
-
Token::EndOfFile => (),
|
|
123
|
-
_ => self.push(token),
|
|
124
|
-
};
|
|
125
|
-
ProcessResult::Continue
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
fn tokenize_xml(input: Vec<StrTendril>, opts: XmlTokenizerOpts) -> Vec<Token> {
|
|
130
|
-
let sink = TokenLogger::new(opts.exact_errors);
|
|
131
|
-
let tok = XmlTokenizer::new(sink, opts);
|
|
132
|
-
let buf = BufferQueue::default();
|
|
133
|
-
|
|
134
|
-
for chunk in input.into_iter() {
|
|
135
|
-
buf.push_back(chunk);
|
|
136
|
-
let _ = tok.feed(&buf);
|
|
137
|
-
}
|
|
138
|
-
let _ = tok.feed(&buf);
|
|
139
|
-
tok.end();
|
|
140
|
-
tok.sink.get_tokens()
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
#[allow(dead_code)]
|
|
144
|
-
trait JsonExt: Sized {
|
|
145
|
-
fn get_str(&self) -> String;
|
|
146
|
-
fn get_tendril(&self) -> StrTendril;
|
|
147
|
-
fn get_nullable_tendril(&self) -> Option<StrTendril>;
|
|
148
|
-
fn get_bool(&self) -> bool;
|
|
149
|
-
fn get_obj(&self) -> &Map<String, Self>;
|
|
150
|
-
fn get_list(&self) -> &Vec<Self>;
|
|
151
|
-
fn find(&self, key: &str) -> &Self;
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
impl JsonExt for Value {
|
|
155
|
-
fn get_str(&self) -> String {
|
|
156
|
-
match *self {
|
|
157
|
-
Value::String(ref s) => s.to_string(),
|
|
158
|
-
_ => panic!("Value::get_str: not a String"),
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
fn get_tendril(&self) -> StrTendril {
|
|
163
|
-
match *self {
|
|
164
|
-
Value::String(ref s) => s.to_tendril(),
|
|
165
|
-
_ => panic!("Value::get_tendril: not a String"),
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
fn get_nullable_tendril(&self) -> Option<StrTendril> {
|
|
170
|
-
match *self {
|
|
171
|
-
Value::Null => None,
|
|
172
|
-
Value::String(ref s) => Some(s.to_tendril()),
|
|
173
|
-
_ => panic!("Value::get_nullable_tendril: not a String"),
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
fn get_bool(&self) -> bool {
|
|
178
|
-
match *self {
|
|
179
|
-
Value::Bool(b) => b,
|
|
180
|
-
_ => panic!("Value::get_bool: not a Boolean"),
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
fn get_obj(&self) -> &Map<String, Value> {
|
|
185
|
-
match self {
|
|
186
|
-
Value::Object(m) => m,
|
|
187
|
-
_ => panic!("Value::get_obj: not an Object"),
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
fn get_list(&self) -> &Vec<Value> {
|
|
192
|
-
match self {
|
|
193
|
-
Value::Array(m) => m,
|
|
194
|
-
_ => panic!("Value::get_list: not an Array"),
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
fn find(&self, key: &str) -> &Value {
|
|
199
|
-
self.get_obj().get(&key.to_string()).unwrap()
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
// Parse a JSON object (other than "ParseError") to a token.
|
|
204
|
-
fn json_to_token(js: &Value) -> Token {
|
|
205
|
-
let parts = js.as_array().unwrap();
|
|
206
|
-
// Collect refs here so we don't have to use "ref" in all the patterns below.
|
|
207
|
-
let args: Vec<&Value> = parts[1..].iter().collect();
|
|
208
|
-
match &*parts[0].get_str() {
|
|
209
|
-
"StartTag" => Token::Tag(Tag {
|
|
210
|
-
kind: StartTag,
|
|
211
|
-
name: QualName::new(None, ns!(), LocalName::from(args[0].get_str())),
|
|
212
|
-
attrs: args[1]
|
|
213
|
-
.get_obj()
|
|
214
|
-
.iter()
|
|
215
|
-
.map(|(k, v)| Attribute {
|
|
216
|
-
name: QualName::new(None, ns!(), LocalName::from(&**k)),
|
|
217
|
-
value: v.get_tendril(),
|
|
218
|
-
})
|
|
219
|
-
.collect(),
|
|
220
|
-
}),
|
|
221
|
-
|
|
222
|
-
"EndTag" => Token::Tag(Tag {
|
|
223
|
-
kind: EndTag,
|
|
224
|
-
name: QualName::new(None, ns!(), LocalName::from(args[0].get_str())),
|
|
225
|
-
attrs: vec![],
|
|
226
|
-
}),
|
|
227
|
-
|
|
228
|
-
"ShortTag" => Token::Tag(Tag {
|
|
229
|
-
kind: ShortTag,
|
|
230
|
-
name: QualName::new(None, ns!(), LocalName::from(args[0].get_str())),
|
|
231
|
-
attrs: vec![],
|
|
232
|
-
}),
|
|
233
|
-
|
|
234
|
-
"EmptyTag" => Token::Tag(Tag {
|
|
235
|
-
kind: EmptyTag,
|
|
236
|
-
name: QualName::new(None, ns!(), LocalName::from(args[0].get_str())),
|
|
237
|
-
attrs: args[1]
|
|
238
|
-
.get_obj()
|
|
239
|
-
.iter()
|
|
240
|
-
.map(|(k, v)| Attribute {
|
|
241
|
-
name: QualName::new(None, ns!(), LocalName::from(&**k)),
|
|
242
|
-
value: v.get_tendril(),
|
|
243
|
-
})
|
|
244
|
-
.collect(),
|
|
245
|
-
}),
|
|
246
|
-
|
|
247
|
-
"Comment" => Token::Comment(args[0].get_tendril()),
|
|
248
|
-
|
|
249
|
-
"Character" => Token::Characters(args[0].get_tendril()),
|
|
250
|
-
|
|
251
|
-
"PI" => Token::ProcessingInstruction(Pi {
|
|
252
|
-
target: args[0].get_tendril(),
|
|
253
|
-
data: args[1].get_tendril(),
|
|
254
|
-
}),
|
|
255
|
-
|
|
256
|
-
"DOCTYPE" => Token::Doctype(Doctype {
|
|
257
|
-
name: args[0].get_nullable_tendril(),
|
|
258
|
-
public_id: args[1].get_nullable_tendril(),
|
|
259
|
-
system_id: args[2].get_nullable_tendril(),
|
|
260
|
-
}),
|
|
261
|
-
|
|
262
|
-
// We don't need to produce NullCharacterToken because
|
|
263
|
-
// the TokenLogger will convert them to CharacterTokens.
|
|
264
|
-
_ => panic!("don't understand token {parts:?}"),
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
// Parse the "output" field of the test case into a vector of tokens.
|
|
269
|
-
fn json_to_tokens(js: &Value, exact_errors: bool) -> Vec<Token> {
|
|
270
|
-
// Use a TokenLogger so that we combine character tokens separated
|
|
271
|
-
// by an ignored error.
|
|
272
|
-
let sink = TokenLogger::new(exact_errors);
|
|
273
|
-
for tok in js.as_array().unwrap().iter() {
|
|
274
|
-
match *tok {
|
|
275
|
-
Value::String(ref s) if &s[..] == "ParseError" => {
|
|
276
|
-
let _ = sink.process_token(Token::ParseError(Borrowed("")));
|
|
277
|
-
},
|
|
278
|
-
_ => {
|
|
279
|
-
let _ = sink.process_token(json_to_token(tok));
|
|
280
|
-
},
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
sink.get_tokens()
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
fn mk_xml_test(name: String, input: String, expect: Value, opts: XmlTokenizerOpts) -> Test {
|
|
287
|
-
Test {
|
|
288
|
-
name,
|
|
289
|
-
skip: false,
|
|
290
|
-
test: Box::new(move || {
|
|
291
|
-
// Split up the input at different points to test incremental tokenization.
|
|
292
|
-
let insplits = splits(&input, 3);
|
|
293
|
-
for input in insplits.into_iter() {
|
|
294
|
-
// Clone 'input' so we have it for the failure message.
|
|
295
|
-
// Also clone opts. If we don't, we get the wrong
|
|
296
|
-
// result but the compiler doesn't catch it!
|
|
297
|
-
// Possibly mozilla/rust#12223.
|
|
298
|
-
let output = tokenize_xml(input.clone(), opts);
|
|
299
|
-
let expect = json_to_tokens(&expect, opts.exact_errors);
|
|
300
|
-
if output != expect {
|
|
301
|
-
panic!("\ninput: {input:?}\ngot: {output:?}\nexpected: {expect:?}");
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
}),
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
fn mk_xml_tests(tests: &mut Vec<Test>, filename: &str, js: &Value) {
|
|
309
|
-
let input: &str = &js.find("input").get_str();
|
|
310
|
-
let expect = js.find("output");
|
|
311
|
-
let desc = format!("tok: {}: {}", filename, js.find("description").get_str());
|
|
312
|
-
|
|
313
|
-
// Some tests want to start in a state other than Data.
|
|
314
|
-
let state_overrides = vec![None];
|
|
315
|
-
|
|
316
|
-
// Build the tests.
|
|
317
|
-
for state in state_overrides.into_iter() {
|
|
318
|
-
for &exact_errors in [false, true].iter() {
|
|
319
|
-
let mut newdesc = desc.clone();
|
|
320
|
-
if let Some(s) = state {
|
|
321
|
-
newdesc = format!("{newdesc} (in state {s:?})")
|
|
322
|
-
};
|
|
323
|
-
if exact_errors {
|
|
324
|
-
newdesc = format!("{newdesc} (exact errors)");
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
tests.push(mk_xml_test(
|
|
328
|
-
newdesc,
|
|
329
|
-
String::from(input),
|
|
330
|
-
expect.clone(),
|
|
331
|
-
XmlTokenizerOpts {
|
|
332
|
-
exact_errors,
|
|
333
|
-
initial_state: state,
|
|
334
|
-
|
|
335
|
-
// Not discarding a BOM is what the test suite expects; see
|
|
336
|
-
// https://github.com/html5lib/html5lib-tests/issues/2
|
|
337
|
-
discard_bom: false,
|
|
338
|
-
|
|
339
|
-
..Default::default()
|
|
340
|
-
},
|
|
341
|
-
));
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
fn tests(src_dir: &Path) -> Vec<Test> {
|
|
347
|
-
let mut tests = vec![];
|
|
348
|
-
foreach_xml5lib_test(
|
|
349
|
-
src_dir,
|
|
350
|
-
"tokenizer",
|
|
351
|
-
OsStr::new("test"),
|
|
352
|
-
|path, mut file| {
|
|
353
|
-
let mut s = String::new();
|
|
354
|
-
file.read_to_string(&mut s).expect("file reading error");
|
|
355
|
-
let js: Value = serde_json::from_str(&s).expect("json parse error");
|
|
356
|
-
|
|
357
|
-
if let Value::Array(ref lst) = js["tests"] {
|
|
358
|
-
for test in lst.iter() {
|
|
359
|
-
mk_xml_tests(
|
|
360
|
-
&mut tests,
|
|
361
|
-
path.file_name().unwrap().to_str().unwrap(),
|
|
362
|
-
test,
|
|
363
|
-
);
|
|
364
|
-
}
|
|
365
|
-
}
|
|
366
|
-
},
|
|
367
|
-
);
|
|
368
|
-
|
|
369
|
-
tests
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
fn main() {
|
|
373
|
-
run_all(tests(Path::new("./")));
|
|
374
|
-
}
|
|
@@ -1,237 +0,0 @@
|
|
|
1
|
-
// Copyright 2014-2017 The html5ever Project Developers. See the
|
|
2
|
-
// COPYRIGHT file at the top-level directory of this distribution.
|
|
3
|
-
//
|
|
4
|
-
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
5
|
-
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
6
|
-
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
7
|
-
// option. This file may not be copied, modified, or distributed
|
|
8
|
-
// except according to those terms.
|
|
9
|
-
|
|
10
|
-
use markup5ever::ns;
|
|
11
|
-
use markup5ever_rcdom::*;
|
|
12
|
-
use std::collections::{HashMap, HashSet};
|
|
13
|
-
use std::ffi::OsStr;
|
|
14
|
-
use std::io::BufRead;
|
|
15
|
-
use std::path::Path;
|
|
16
|
-
use std::{fs, io, iter, mem};
|
|
17
|
-
use util::find_tests::foreach_xml5lib_test;
|
|
18
|
-
use util::runner::{run_all, Test};
|
|
19
|
-
use xml5ever::driver::parse_document;
|
|
20
|
-
use xml5ever::tendril::TendrilSink;
|
|
21
|
-
|
|
22
|
-
mod util {
|
|
23
|
-
pub mod find_tests;
|
|
24
|
-
pub mod runner;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
fn parse_tests<It: Iterator<Item = String>>(mut lines: It) -> Vec<HashMap<String, String>> {
|
|
28
|
-
let mut tests = vec![];
|
|
29
|
-
let mut test = HashMap::new();
|
|
30
|
-
let mut key: Option<String> = None;
|
|
31
|
-
let mut val = String::new();
|
|
32
|
-
|
|
33
|
-
macro_rules! finish_val ( () => (
|
|
34
|
-
match key.take() {
|
|
35
|
-
None => (),
|
|
36
|
-
Some(key) => {
|
|
37
|
-
assert!(test.insert(key, mem::take(&mut val)).is_none());
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
));
|
|
41
|
-
|
|
42
|
-
macro_rules! finish_test ( () => (
|
|
43
|
-
if !test.is_empty() {
|
|
44
|
-
tests.push(mem::take(&mut test));
|
|
45
|
-
}
|
|
46
|
-
));
|
|
47
|
-
|
|
48
|
-
loop {
|
|
49
|
-
match lines.next() {
|
|
50
|
-
None => break,
|
|
51
|
-
Some(line) => {
|
|
52
|
-
if let Some(rest) = line.strip_prefix('#') {
|
|
53
|
-
finish_val!();
|
|
54
|
-
if line == "#data" {
|
|
55
|
-
finish_test!();
|
|
56
|
-
}
|
|
57
|
-
key = Some(rest.to_string());
|
|
58
|
-
} else {
|
|
59
|
-
val.push_str(&line);
|
|
60
|
-
val.push('\n');
|
|
61
|
-
}
|
|
62
|
-
},
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
finish_val!();
|
|
67
|
-
finish_test!();
|
|
68
|
-
tests
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
fn serialize(buf: &mut String, indent: usize, handle: Handle) {
|
|
72
|
-
buf.push('|');
|
|
73
|
-
buf.extend(iter::repeat_n(" ", indent));
|
|
74
|
-
|
|
75
|
-
let node = handle;
|
|
76
|
-
match &node.data {
|
|
77
|
-
NodeData::Document => panic!("should not reach Document"),
|
|
78
|
-
|
|
79
|
-
NodeData::Doctype {
|
|
80
|
-
name,
|
|
81
|
-
public_id,
|
|
82
|
-
system_id,
|
|
83
|
-
} => {
|
|
84
|
-
buf.push_str("<!DOCTYPE ");
|
|
85
|
-
buf.push_str(name);
|
|
86
|
-
if !public_id.is_empty() || !system_id.is_empty() {
|
|
87
|
-
buf.push_str(&format!(" \"{public_id}\" \"{system_id}\""));
|
|
88
|
-
}
|
|
89
|
-
buf.push_str(">\n");
|
|
90
|
-
},
|
|
91
|
-
|
|
92
|
-
NodeData::Text { contents } => {
|
|
93
|
-
buf.push('"');
|
|
94
|
-
buf.push_str(&contents.borrow());
|
|
95
|
-
buf.push_str("\"\n");
|
|
96
|
-
},
|
|
97
|
-
|
|
98
|
-
NodeData::ProcessingInstruction { target, contents } => {
|
|
99
|
-
buf.push_str("<?");
|
|
100
|
-
buf.push_str(target);
|
|
101
|
-
buf.push(' ');
|
|
102
|
-
buf.push_str(contents);
|
|
103
|
-
buf.push_str("?>\n");
|
|
104
|
-
},
|
|
105
|
-
|
|
106
|
-
NodeData::Comment { contents } => {
|
|
107
|
-
buf.push_str("<!-- ");
|
|
108
|
-
buf.push_str(contents);
|
|
109
|
-
buf.push_str(" -->\n");
|
|
110
|
-
},
|
|
111
|
-
|
|
112
|
-
NodeData::Element { name, attrs, .. } => {
|
|
113
|
-
buf.push('<');
|
|
114
|
-
|
|
115
|
-
if name.ns != ns!() {
|
|
116
|
-
buf.push('{');
|
|
117
|
-
buf.push_str(&name.ns);
|
|
118
|
-
buf.push('}');
|
|
119
|
-
};
|
|
120
|
-
|
|
121
|
-
if let Some(prefix) = &name.prefix {
|
|
122
|
-
buf.push_str(prefix);
|
|
123
|
-
buf.push(':');
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
buf.push_str(&name.local);
|
|
127
|
-
buf.push_str(">\n");
|
|
128
|
-
|
|
129
|
-
let mut attrs = attrs.borrow().clone();
|
|
130
|
-
attrs.sort_by(|x, y| x.name.local.cmp(&y.name.local));
|
|
131
|
-
// FIXME: sort by UTF-16 code unit
|
|
132
|
-
|
|
133
|
-
for attr in attrs.into_iter() {
|
|
134
|
-
buf.push('|');
|
|
135
|
-
buf.extend(iter::repeat_n(" ", indent + 2));
|
|
136
|
-
|
|
137
|
-
if !attr.name.ns.is_empty() {
|
|
138
|
-
buf.push('{');
|
|
139
|
-
buf.push_str(&attr.name.ns);
|
|
140
|
-
buf.push('}');
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
if let Some(attr_prefix) = attr.name.prefix {
|
|
144
|
-
buf.push_str(&attr_prefix);
|
|
145
|
-
buf.push(':');
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
buf.push_str(&format!("{}=\"{}\"\n", attr.name.local, attr.value));
|
|
149
|
-
}
|
|
150
|
-
},
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
for child in node.children.borrow().iter() {
|
|
154
|
-
serialize(buf, indent + 2, child.clone());
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// Ignore tests containing these strings; we don't support these features yet.
|
|
159
|
-
static IGNORE_SUBSTRS: &[&str] = &["<template"];
|
|
160
|
-
|
|
161
|
-
fn make_xml_test(
|
|
162
|
-
tests: &mut Vec<Test>,
|
|
163
|
-
ignores: &HashSet<String>,
|
|
164
|
-
filename: &str,
|
|
165
|
-
idx: usize,
|
|
166
|
-
fields: HashMap<String, String>,
|
|
167
|
-
) {
|
|
168
|
-
let get_field = |key| {
|
|
169
|
-
let field = fields.get(key).expect("missing field");
|
|
170
|
-
field.trim_end_matches('\n').to_string()
|
|
171
|
-
};
|
|
172
|
-
|
|
173
|
-
let data = get_field("data");
|
|
174
|
-
let expected = get_field("document");
|
|
175
|
-
let name = format!("tb: {filename}-{idx}");
|
|
176
|
-
let skip = ignores.contains(&name) || IGNORE_SUBSTRS.iter().any(|&ig| data.contains(ig));
|
|
177
|
-
|
|
178
|
-
tests.push(Test {
|
|
179
|
-
name,
|
|
180
|
-
skip,
|
|
181
|
-
test: Box::new(move || {
|
|
182
|
-
let mut result = String::new();
|
|
183
|
-
|
|
184
|
-
let dom = parse_document(RcDom::default(), Default::default()).one(data.clone());
|
|
185
|
-
for child in dom.document.children.borrow().iter() {
|
|
186
|
-
serialize(&mut result, 1, child.clone());
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
let len = result.len();
|
|
190
|
-
result.truncate(len - 1); // drop the trailing newline
|
|
191
|
-
|
|
192
|
-
if result != expected {
|
|
193
|
-
panic!("\ninput: {data}\ngot:\n{result}\nexpected:\n{expected}\n");
|
|
194
|
-
}
|
|
195
|
-
}),
|
|
196
|
-
});
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
fn tests(src_dir: &Path, ignores: &HashSet<String>) -> Vec<Test> {
|
|
200
|
-
let mut tests = vec![];
|
|
201
|
-
|
|
202
|
-
foreach_xml5lib_test(
|
|
203
|
-
src_dir,
|
|
204
|
-
"tree-construction",
|
|
205
|
-
OsStr::new("dat"),
|
|
206
|
-
|path, file| {
|
|
207
|
-
let buf = io::BufReader::new(file);
|
|
208
|
-
let lines = buf.lines().map(|res| res.expect("couldn't read"));
|
|
209
|
-
let data = parse_tests(lines);
|
|
210
|
-
|
|
211
|
-
for (i, test) in data.into_iter().enumerate() {
|
|
212
|
-
make_xml_test(
|
|
213
|
-
&mut tests,
|
|
214
|
-
ignores,
|
|
215
|
-
path.file_name().unwrap().to_str().unwrap(),
|
|
216
|
-
i,
|
|
217
|
-
test,
|
|
218
|
-
);
|
|
219
|
-
}
|
|
220
|
-
},
|
|
221
|
-
);
|
|
222
|
-
|
|
223
|
-
tests
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
fn main() {
|
|
227
|
-
let src_dir = Path::new("./");
|
|
228
|
-
let mut ignores = HashSet::new();
|
|
229
|
-
if let Ok(f) = fs::File::open(src_dir.join("data/test/ignore")) {
|
|
230
|
-
let r = io::BufReader::new(f);
|
|
231
|
-
for ln in r.lines() {
|
|
232
|
-
ignores.insert(ln.unwrap().trim_end().to_string());
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
run_all(tests(src_dir, &ignores));
|
|
237
|
-
}
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
Copyright (c) 2006-2013 James Graham, Geoffrey Sneddon and
|
|
2
|
-
other contributors
|
|
3
|
-
|
|
4
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
-
a copy of this software and associated documentation files (the
|
|
6
|
-
"Software"), to deal in the Software without restriction, including
|
|
7
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
-
permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
-
the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be
|
|
13
|
-
included in all copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
18
|
-
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
19
|
-
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
20
|
-
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
21
|
-
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|