html-to-markdown 2.24.6 → 2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
- data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
- data/lib/html_to_markdown/version.rb +1 -1
- data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
- data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
- data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
- data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
- data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
- data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
- data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
- data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
- data/rust-vendor/memmap2/CHANGELOG.md +8 -0
- data/rust-vendor/memmap2/Cargo.lock +1 -1
- data/rust-vendor/memmap2/Cargo.toml +2 -1
- data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
- data/rust-vendor/memmap2/src/lib.rs +25 -1
- data/rust-vendor/memmap2/src/stub.rs +1 -4
- data/rust-vendor/memmap2/src/unix.rs +14 -1
- data/rust-vendor/png/.cargo-checksum.json +1 -1
- data/rust-vendor/png/.cargo_vcs_info.json +1 -1
- data/rust-vendor/png/CHANGES.md +44 -0
- data/rust-vendor/png/Cargo.lock +124 -171
- data/rust-vendor/png/Cargo.toml +1 -1
- data/rust-vendor/png/Cargo.toml.orig +1 -1
- data/rust-vendor/png/benches/expand_paletted.rs +5 -5
- data/rust-vendor/png/benches/unfilter.rs +3 -3
- data/rust-vendor/png/src/adam7.rs +17 -10
- data/rust-vendor/png/src/common.rs +8 -8
- data/rust-vendor/png/src/decoder/mod.rs +53 -20
- data/rust-vendor/png/src/decoder/stream.rs +263 -78
- data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
- data/rust-vendor/png/src/decoder/zlib.rs +130 -90
- data/rust-vendor/png/src/encoder.rs +4 -2
- data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
- data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
- data/rust-vendor/png/src/filter/paeth.rs +398 -0
- data/rust-vendor/png/src/filter/simd.rs +308 -0
- data/rust-vendor/png/src/lib.rs +1 -0
- data/rust-vendor/syn/.cargo-checksum.json +1 -1
- data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
- data/rust-vendor/syn/Cargo.lock +40 -41
- data/rust-vendor/syn/Cargo.toml +1 -1
- data/rust-vendor/syn/Cargo.toml.orig +1 -1
- data/rust-vendor/syn/src/item.rs +61 -40
- data/rust-vendor/syn/src/lib.rs +2 -1
- data/rust-vendor/syn/tests/test_item.rs +54 -0
- data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
- data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
- data/rust-vendor/unicode-ident/Cargo.lock +21 -21
- data/rust-vendor/unicode-ident/Cargo.toml +1 -1
- data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
- data/rust-vendor/unicode-ident/src/lib.rs +1 -1
- data/rust-vendor/unicode-ident/src/tables.rs +87 -97
- data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
- metadata +7 -177
- data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
- data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
- data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
- data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
- data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
- data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
- data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
- data/rust-vendor/markup5ever_rcdom/README.md +0 -7
- data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
- data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
- data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
- data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
- data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
- data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
- data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
- data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
- data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
- data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
- data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
- data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
- data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
- data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
- data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
- data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
- data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
- data/rust-vendor/xml5ever/Cargo.lock +0 -752
- data/rust-vendor/xml5ever/Cargo.toml +0 -69
- data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
- data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
- data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
- data/rust-vendor/xml5ever/README.md +0 -72
- data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
- data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
- data/rust-vendor/xml5ever/examples/README.md +0 -223
- data/rust-vendor/xml5ever/examples/example.xml +0 -3
- data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
- data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
- data/rust-vendor/xml5ever/src/driver.rs +0 -90
- data/rust-vendor/xml5ever/src/lib.rs +0 -47
- data/rust-vendor/xml5ever/src/macros.rs +0 -18
- data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
- data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
- data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
- data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
- data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
- data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
- data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
- data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
|
@@ -1,1344 +0,0 @@
|
|
|
1
|
-
// Copyright 2014-2017 The html5ever Project Developers. See the
|
|
2
|
-
// COPYRIGHT file at the top-level directory of this distribution.
|
|
3
|
-
//
|
|
4
|
-
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
5
|
-
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
6
|
-
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
7
|
-
// option. This file may not be copied, modified, or distributed
|
|
8
|
-
// except according to those terms.
|
|
9
|
-
|
|
10
|
-
mod char_ref;
|
|
11
|
-
mod interface;
|
|
12
|
-
mod qname;
|
|
13
|
-
pub mod states;
|
|
14
|
-
|
|
15
|
-
pub use self::interface::{
|
|
16
|
-
Doctype, EmptyTag, EndTag, Pi, ShortTag, StartTag, Tag, TagKind, Token, TokenSink,
|
|
17
|
-
};
|
|
18
|
-
pub use crate::{LocalName, Namespace, Prefix};
|
|
19
|
-
|
|
20
|
-
use crate::macros::time;
|
|
21
|
-
use crate::tendril::StrTendril;
|
|
22
|
-
use crate::{buffer_queue, Attribute, QualName, SmallCharSet};
|
|
23
|
-
use log::debug;
|
|
24
|
-
use markup5ever::{local_name, namespace_prefix, ns, small_char_set, TokenizerResult};
|
|
25
|
-
use std::borrow::Cow::{self, Borrowed};
|
|
26
|
-
use std::cell::{Cell, RefCell, RefMut};
|
|
27
|
-
use std::collections::BTreeMap;
|
|
28
|
-
use std::mem::replace;
|
|
29
|
-
|
|
30
|
-
use buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult};
|
|
31
|
-
use char_ref::{CharRef, CharRefTokenizer};
|
|
32
|
-
use qname::QualNameTokenizer;
|
|
33
|
-
use states::{AttrValueKind::*, DoctypeKind, DoctypeKind::*, XmlState};
|
|
34
|
-
|
|
35
|
-
/// Copy of Tokenizer options, with an impl for `Default`.
|
|
36
|
-
#[derive(Copy, Clone)]
|
|
37
|
-
pub struct XmlTokenizerOpts {
|
|
38
|
-
/// Report all parse errors described in the spec, at some
|
|
39
|
-
/// performance penalty? Default: false
|
|
40
|
-
pub exact_errors: bool,
|
|
41
|
-
|
|
42
|
-
/// Discard a `U+FEFF BYTE ORDER MARK` if we see one at the beginning
|
|
43
|
-
/// of the stream? Default: true
|
|
44
|
-
pub discard_bom: bool,
|
|
45
|
-
|
|
46
|
-
/// Keep a record of how long we spent in each state? Printed
|
|
47
|
-
/// when `end()` is called. Default: false
|
|
48
|
-
pub profile: bool,
|
|
49
|
-
|
|
50
|
-
/// Initial state override. Only the test runner should use
|
|
51
|
-
/// a non-`None` value!
|
|
52
|
-
pub initial_state: Option<XmlState>,
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
fn process_qname(tag_name: StrTendril) -> QualName {
|
|
56
|
-
// If tag name can't possibly contain full namespace, skip qualified name
|
|
57
|
-
// parsing altogether. For a tag to have namespace it must look like:
|
|
58
|
-
// a:b
|
|
59
|
-
// Since StrTendril are UTF-8, we know that minimal size in bytes must be
|
|
60
|
-
// three bytes minimum.
|
|
61
|
-
let split = if (*tag_name).len() < 3 {
|
|
62
|
-
None
|
|
63
|
-
} else {
|
|
64
|
-
QualNameTokenizer::new((*tag_name).as_bytes()).run()
|
|
65
|
-
};
|
|
66
|
-
|
|
67
|
-
match split {
|
|
68
|
-
None => QualName::new(None, ns!(), LocalName::from(&*tag_name)),
|
|
69
|
-
Some(col) => {
|
|
70
|
-
let len = (*tag_name).len() as u32;
|
|
71
|
-
let prefix = tag_name.subtendril(0, col);
|
|
72
|
-
let local = tag_name.subtendril(col + 1, len - col - 1);
|
|
73
|
-
let ns = ns!(); // Actual namespace URL set in XmlTreeBuilder::bind_qname
|
|
74
|
-
QualName::new(Some(Prefix::from(&*prefix)), ns, LocalName::from(&*local))
|
|
75
|
-
},
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
fn option_push(opt_str: &mut Option<StrTendril>, c: char) {
|
|
80
|
-
match *opt_str {
|
|
81
|
-
Some(ref mut s) => s.push_char(c),
|
|
82
|
-
None => *opt_str = Some(StrTendril::from_char(c)),
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
impl Default for XmlTokenizerOpts {
|
|
87
|
-
fn default() -> XmlTokenizerOpts {
|
|
88
|
-
XmlTokenizerOpts {
|
|
89
|
-
exact_errors: false,
|
|
90
|
-
discard_bom: true,
|
|
91
|
-
profile: false,
|
|
92
|
-
initial_state: None,
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
/// The Xml tokenizer.
|
|
97
|
-
pub struct XmlTokenizer<Sink> {
|
|
98
|
-
/// Options controlling the behavior of the tokenizer.
|
|
99
|
-
opts: XmlTokenizerOpts,
|
|
100
|
-
|
|
101
|
-
/// Destination for tokens we emit.
|
|
102
|
-
pub sink: Sink,
|
|
103
|
-
|
|
104
|
-
/// The abstract machine state as described in the spec.
|
|
105
|
-
state: Cell<XmlState>,
|
|
106
|
-
|
|
107
|
-
/// Are we at the end of the file, once buffers have been processed
|
|
108
|
-
/// completely? This affects whether we will wait for lookahead or not.
|
|
109
|
-
at_eof: Cell<bool>,
|
|
110
|
-
|
|
111
|
-
/// Tokenizer for character references, if we're tokenizing
|
|
112
|
-
/// one at the moment.
|
|
113
|
-
char_ref_tokenizer: RefCell<Option<Box<CharRefTokenizer>>>,
|
|
114
|
-
|
|
115
|
-
/// Current input character. Just consumed, may reconsume.
|
|
116
|
-
current_char: Cell<char>,
|
|
117
|
-
|
|
118
|
-
/// Should we reconsume the current input character?
|
|
119
|
-
reconsume: Cell<bool>,
|
|
120
|
-
|
|
121
|
-
/// Did we just consume \r, translating it to \n? In that case we need
|
|
122
|
-
/// to ignore the next character if it's \n.
|
|
123
|
-
ignore_lf: Cell<bool>,
|
|
124
|
-
|
|
125
|
-
/// Discard a U+FEFF BYTE ORDER MARK if we see one? Only done at the
|
|
126
|
-
/// beginning of the stream.
|
|
127
|
-
discard_bom: Cell<bool>,
|
|
128
|
-
|
|
129
|
-
/// Temporary buffer
|
|
130
|
-
temp_buf: RefCell<StrTendril>,
|
|
131
|
-
|
|
132
|
-
/// Current tag kind.
|
|
133
|
-
current_tag_kind: Cell<TagKind>,
|
|
134
|
-
|
|
135
|
-
/// Current tag name.
|
|
136
|
-
current_tag_name: RefCell<StrTendril>,
|
|
137
|
-
|
|
138
|
-
/// Current tag attributes.
|
|
139
|
-
current_tag_attrs: RefCell<Vec<Attribute>>,
|
|
140
|
-
|
|
141
|
-
/// Current attribute name.
|
|
142
|
-
current_attr_name: RefCell<StrTendril>,
|
|
143
|
-
|
|
144
|
-
/// Current attribute value.
|
|
145
|
-
current_attr_value: RefCell<StrTendril>,
|
|
146
|
-
|
|
147
|
-
current_doctype: RefCell<Doctype>,
|
|
148
|
-
|
|
149
|
-
/// Current comment.
|
|
150
|
-
current_comment: RefCell<StrTendril>,
|
|
151
|
-
|
|
152
|
-
/// Current processing instruction target.
|
|
153
|
-
current_pi_target: RefCell<StrTendril>,
|
|
154
|
-
|
|
155
|
-
/// Current processing instruction value.
|
|
156
|
-
current_pi_data: RefCell<StrTendril>,
|
|
157
|
-
|
|
158
|
-
/// Record of how many ns we spent in each state, if profiling is enabled.
|
|
159
|
-
state_profile: RefCell<BTreeMap<XmlState, u64>>,
|
|
160
|
-
|
|
161
|
-
/// Record of how many ns we spent in the token sink.
|
|
162
|
-
time_in_sink: Cell<u64>,
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
impl<Sink: TokenSink> XmlTokenizer<Sink> {
|
|
166
|
-
/// Create a new tokenizer which feeds tokens to a particular `TokenSink`.
|
|
167
|
-
pub fn new(sink: Sink, opts: XmlTokenizerOpts) -> XmlTokenizer<Sink> {
|
|
168
|
-
if opts.profile && cfg!(for_c) {
|
|
169
|
-
panic!("Can't profile tokenizer when built as a C library");
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
let state = *opts.initial_state.as_ref().unwrap_or(&XmlState::Data);
|
|
173
|
-
let discard_bom = opts.discard_bom;
|
|
174
|
-
XmlTokenizer {
|
|
175
|
-
opts,
|
|
176
|
-
sink,
|
|
177
|
-
state: Cell::new(state),
|
|
178
|
-
char_ref_tokenizer: RefCell::new(None),
|
|
179
|
-
at_eof: Cell::new(false),
|
|
180
|
-
current_char: Cell::new('\0'),
|
|
181
|
-
reconsume: Cell::new(false),
|
|
182
|
-
ignore_lf: Cell::new(false),
|
|
183
|
-
temp_buf: RefCell::new(StrTendril::new()),
|
|
184
|
-
discard_bom: Cell::new(discard_bom),
|
|
185
|
-
current_tag_kind: Cell::new(StartTag),
|
|
186
|
-
current_tag_name: RefCell::new(StrTendril::new()),
|
|
187
|
-
current_tag_attrs: RefCell::new(vec![]),
|
|
188
|
-
current_attr_name: RefCell::new(StrTendril::new()),
|
|
189
|
-
current_attr_value: RefCell::new(StrTendril::new()),
|
|
190
|
-
current_comment: RefCell::new(StrTendril::new()),
|
|
191
|
-
current_pi_data: RefCell::new(StrTendril::new()),
|
|
192
|
-
current_pi_target: RefCell::new(StrTendril::new()),
|
|
193
|
-
current_doctype: RefCell::new(Doctype::default()),
|
|
194
|
-
state_profile: RefCell::new(BTreeMap::new()),
|
|
195
|
-
time_in_sink: Cell::new(0),
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
/// Feed an input string into the tokenizer.
|
|
200
|
-
pub fn feed(&self, input: &BufferQueue) -> TokenizerResult<Sink::Handle> {
|
|
201
|
-
if input.is_empty() {
|
|
202
|
-
return TokenizerResult::Done;
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
if self.discard_bom.get() {
|
|
206
|
-
if let Some(c) = input.peek() {
|
|
207
|
-
if c == '\u{feff}' {
|
|
208
|
-
input.next();
|
|
209
|
-
}
|
|
210
|
-
} else {
|
|
211
|
-
return TokenizerResult::Done;
|
|
212
|
-
}
|
|
213
|
-
};
|
|
214
|
-
|
|
215
|
-
self.run(input)
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
fn process_token(&self, token: Token) -> ProcessResult<Sink::Handle> {
|
|
219
|
-
if self.opts.profile {
|
|
220
|
-
let (result, dt) = time!(self.sink.process_token(token));
|
|
221
|
-
self.time_in_sink.set(self.time_in_sink.get() + dt);
|
|
222
|
-
result
|
|
223
|
-
} else {
|
|
224
|
-
self.sink.process_token(token)
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
// Get the next input character, which might be the character
|
|
229
|
-
// 'c' that we already consumed from the buffers.
|
|
230
|
-
fn get_preprocessed_char(&self, mut c: char, input: &BufferQueue) -> Option<char> {
|
|
231
|
-
if self.ignore_lf.get() {
|
|
232
|
-
self.ignore_lf.set(false);
|
|
233
|
-
if c == '\n' {
|
|
234
|
-
c = input.next()?;
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
if c == '\r' {
|
|
239
|
-
self.ignore_lf.set(true);
|
|
240
|
-
c = '\n';
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
// Normalize \x00 into \uFFFD
|
|
244
|
-
if c == '\x00' {
|
|
245
|
-
c = '\u{FFFD}'
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
// Exclude forbidden Unicode characters
|
|
249
|
-
if self.opts.exact_errors
|
|
250
|
-
&& match c as u32 {
|
|
251
|
-
0x01..=0x08 | 0x0B | 0x0E..=0x1F | 0x7F..=0x9F | 0xFDD0..=0xFDEF => true,
|
|
252
|
-
n if (n & 0xFFFE) == 0xFFFE => true,
|
|
253
|
-
_ => false,
|
|
254
|
-
}
|
|
255
|
-
{
|
|
256
|
-
let msg = format!("Bad character {c}");
|
|
257
|
-
self.emit_error(Cow::Owned(msg));
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
debug!("got character {c}");
|
|
261
|
-
self.current_char.set(c);
|
|
262
|
-
Some(c)
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
fn bad_eof_error(&self) {
|
|
266
|
-
let msg = if self.opts.exact_errors {
|
|
267
|
-
Cow::from(format!("Saw EOF in state {:?}", self.state))
|
|
268
|
-
} else {
|
|
269
|
-
Cow::from("Unexpected EOF")
|
|
270
|
-
};
|
|
271
|
-
self.emit_error(msg);
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
fn pop_except_from(&self, input: &BufferQueue, set: SmallCharSet) -> Option<SetResult> {
|
|
275
|
-
// Bail to the slow path for various corner cases.
|
|
276
|
-
// This means that `FromSet` can contain characters not in the set!
|
|
277
|
-
// It shouldn't matter because the fallback `FromSet` case should
|
|
278
|
-
// always do the same thing as the `NotFromSet` case.
|
|
279
|
-
if self.opts.exact_errors || self.reconsume.get() || self.ignore_lf.get() {
|
|
280
|
-
return self.get_char(input).map(FromSet);
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
let d = input.pop_except_from(set);
|
|
284
|
-
debug!("got characters {d:?}");
|
|
285
|
-
match d {
|
|
286
|
-
Some(FromSet(c)) => self.get_preprocessed_char(c, input).map(FromSet),
|
|
287
|
-
|
|
288
|
-
// NB: We don't set self.current_char for a run of characters not
|
|
289
|
-
// in the set. It shouldn't matter for the codepaths that use
|
|
290
|
-
// this.
|
|
291
|
-
_ => d,
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
// Check if the next characters are an ASCII case-insensitive match. See
|
|
296
|
-
// BufferQueue::eat.
|
|
297
|
-
//
|
|
298
|
-
// NB: this doesn't do input stream preprocessing or set the current input
|
|
299
|
-
// character.
|
|
300
|
-
fn eat(&self, input: &BufferQueue, pat: &str) -> Option<bool> {
|
|
301
|
-
input.push_front(replace(&mut *self.temp_buf.borrow_mut(), StrTendril::new()));
|
|
302
|
-
match input.eat(pat, u8::eq_ignore_ascii_case) {
|
|
303
|
-
None if self.at_eof.get() => Some(false),
|
|
304
|
-
None => {
|
|
305
|
-
let mut temp_buf = self.temp_buf.borrow_mut();
|
|
306
|
-
while let Some(data) = input.next() {
|
|
307
|
-
temp_buf.push_char(data);
|
|
308
|
-
}
|
|
309
|
-
None
|
|
310
|
-
},
|
|
311
|
-
Some(matched) => Some(matched),
|
|
312
|
-
}
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
/// Run the state machine for as long as we can.
|
|
316
|
-
pub fn run(&self, input: &BufferQueue) -> TokenizerResult<Sink::Handle> {
|
|
317
|
-
if self.opts.profile {
|
|
318
|
-
loop {
|
|
319
|
-
let state = self.state.get();
|
|
320
|
-
let old_sink = self.time_in_sink.get();
|
|
321
|
-
let (run, mut dt) = time!(self.step(input));
|
|
322
|
-
dt -= self.time_in_sink.get() - old_sink;
|
|
323
|
-
let new = match self.state_profile.borrow_mut().get_mut(&state) {
|
|
324
|
-
Some(x) => {
|
|
325
|
-
*x += dt;
|
|
326
|
-
false
|
|
327
|
-
},
|
|
328
|
-
None => true,
|
|
329
|
-
};
|
|
330
|
-
if new {
|
|
331
|
-
// do this here because of borrow shenanigans
|
|
332
|
-
self.state_profile.borrow_mut().insert(state, dt);
|
|
333
|
-
}
|
|
334
|
-
match run {
|
|
335
|
-
ProcessResult::Continue => continue,
|
|
336
|
-
ProcessResult::Done => return TokenizerResult::Done,
|
|
337
|
-
ProcessResult::Script(handle) => return TokenizerResult::Script(handle),
|
|
338
|
-
}
|
|
339
|
-
}
|
|
340
|
-
} else {
|
|
341
|
-
loop {
|
|
342
|
-
match self.step(input) {
|
|
343
|
-
ProcessResult::Continue => continue,
|
|
344
|
-
ProcessResult::Done => return TokenizerResult::Done,
|
|
345
|
-
ProcessResult::Script(handle) => return TokenizerResult::Script(handle),
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
}
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
//§ tokenization
|
|
352
|
-
// Get the next input character, if one is available.
|
|
353
|
-
fn get_char(&self, input: &BufferQueue) -> Option<char> {
|
|
354
|
-
if self.reconsume.get() {
|
|
355
|
-
self.reconsume.set(false);
|
|
356
|
-
Some(self.current_char.get())
|
|
357
|
-
} else {
|
|
358
|
-
input
|
|
359
|
-
.next()
|
|
360
|
-
.and_then(|c| self.get_preprocessed_char(c, input))
|
|
361
|
-
}
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
fn bad_char_error(&self) {
|
|
365
|
-
let msg = if self.opts.exact_errors {
|
|
366
|
-
let c = self.current_char.get();
|
|
367
|
-
let state = self.state.get();
|
|
368
|
-
Cow::from(format!("Saw {c} in state {state:?}"))
|
|
369
|
-
} else {
|
|
370
|
-
Cow::from("Bad character")
|
|
371
|
-
};
|
|
372
|
-
self.emit_error(msg);
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
fn discard_tag(&self) {
|
|
376
|
-
*self.current_tag_name.borrow_mut() = StrTendril::new();
|
|
377
|
-
*self.current_tag_attrs.borrow_mut() = Vec::new();
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
fn create_tag(&self, kind: TagKind, c: char) {
|
|
381
|
-
self.discard_tag();
|
|
382
|
-
self.current_tag_name.borrow_mut().push_char(c);
|
|
383
|
-
self.current_tag_kind.set(kind);
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
// This method creates a PI token and
|
|
387
|
-
// sets its target to given char
|
|
388
|
-
fn create_pi(&self, c: char) {
|
|
389
|
-
*self.current_pi_target.borrow_mut() = StrTendril::new();
|
|
390
|
-
*self.current_pi_data.borrow_mut() = StrTendril::new();
|
|
391
|
-
self.current_pi_target.borrow_mut().push_char(c);
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
fn emit_char(&self, c: char) {
|
|
395
|
-
self.process_token(Token::Characters(StrTendril::from_char(match c {
|
|
396
|
-
'\0' => '\u{FFFD}',
|
|
397
|
-
c => c,
|
|
398
|
-
})));
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
fn emit_short_tag(&self) -> ProcessResult<Sink::Handle> {
|
|
402
|
-
self.current_tag_kind.set(ShortTag);
|
|
403
|
-
*self.current_tag_name.borrow_mut() = StrTendril::new();
|
|
404
|
-
self.emit_current_tag()
|
|
405
|
-
}
|
|
406
|
-
|
|
407
|
-
fn emit_empty_tag(&self) -> ProcessResult<Sink::Handle> {
|
|
408
|
-
self.current_tag_kind.set(EmptyTag);
|
|
409
|
-
self.emit_current_tag()
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
fn set_empty_tag(&self) {
|
|
413
|
-
self.current_tag_kind.set(EmptyTag);
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
fn emit_start_tag(&self) -> ProcessResult<Sink::Handle> {
|
|
417
|
-
self.current_tag_kind.set(StartTag);
|
|
418
|
-
self.emit_current_tag()
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
fn emit_current_tag(&self) -> ProcessResult<Sink::Handle> {
|
|
422
|
-
self.finish_attribute();
|
|
423
|
-
|
|
424
|
-
let qname = process_qname(replace(
|
|
425
|
-
&mut *self.current_tag_name.borrow_mut(),
|
|
426
|
-
StrTendril::new(),
|
|
427
|
-
));
|
|
428
|
-
|
|
429
|
-
match self.current_tag_kind.get() {
|
|
430
|
-
StartTag | EmptyTag => {},
|
|
431
|
-
EndTag => {
|
|
432
|
-
if !self.current_tag_attrs.borrow().is_empty() {
|
|
433
|
-
self.emit_error(Borrowed("Attributes on an end tag"));
|
|
434
|
-
}
|
|
435
|
-
},
|
|
436
|
-
ShortTag => {
|
|
437
|
-
if !self.current_tag_attrs.borrow().is_empty() {
|
|
438
|
-
self.emit_error(Borrowed("Attributes on a short tag"));
|
|
439
|
-
}
|
|
440
|
-
},
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
let token = Token::Tag(Tag {
|
|
444
|
-
kind: self.current_tag_kind.get(),
|
|
445
|
-
name: qname,
|
|
446
|
-
attrs: self.current_tag_attrs.take(),
|
|
447
|
-
});
|
|
448
|
-
|
|
449
|
-
self.process_token(token)
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
// The string must not contain '\0'!
|
|
453
|
-
fn emit_chars(&self, b: StrTendril) {
|
|
454
|
-
self.process_token(Token::Characters(b));
|
|
455
|
-
}
|
|
456
|
-
|
|
457
|
-
// Emits the current Processing Instruction
|
|
458
|
-
fn emit_pi(&self) -> ProcessResult<<Sink as TokenSink>::Handle> {
|
|
459
|
-
let token = Token::ProcessingInstruction(Pi {
|
|
460
|
-
target: replace(&mut *self.current_pi_target.borrow_mut(), StrTendril::new()),
|
|
461
|
-
data: replace(&mut *self.current_pi_data.borrow_mut(), StrTendril::new()),
|
|
462
|
-
});
|
|
463
|
-
self.process_token(token)
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
fn consume_char_ref(&self, addnl_allowed: Option<char>) {
|
|
467
|
-
// NB: The char ref tokenizer assumes we have an additional allowed
|
|
468
|
-
// character iff we're tokenizing in an attribute value.
|
|
469
|
-
*self.char_ref_tokenizer.borrow_mut() =
|
|
470
|
-
Some(Box::new(CharRefTokenizer::new(addnl_allowed)));
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
fn emit_eof(&self) {
|
|
474
|
-
self.process_token(Token::EndOfFile);
|
|
475
|
-
}
|
|
476
|
-
|
|
477
|
-
fn emit_error(&self, error: Cow<'static, str>) {
|
|
478
|
-
self.process_token(Token::ParseError(error));
|
|
479
|
-
}
|
|
480
|
-
|
|
481
|
-
fn emit_current_comment(&self) {
|
|
482
|
-
let comment = self.current_comment.take();
|
|
483
|
-
self.process_token(Token::Comment(comment));
|
|
484
|
-
}
|
|
485
|
-
|
|
486
|
-
fn emit_current_doctype(&self) {
|
|
487
|
-
let doctype = self.current_doctype.take();
|
|
488
|
-
self.process_token(Token::Doctype(doctype));
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
fn doctype_id(&self, kind: DoctypeKind) -> RefMut<'_, Option<StrTendril>> {
|
|
492
|
-
let current_doctype = self.current_doctype.borrow_mut();
|
|
493
|
-
match kind {
|
|
494
|
-
DoctypeKind::Public => RefMut::map(current_doctype, |d| &mut d.public_id),
|
|
495
|
-
DoctypeKind::System => RefMut::map(current_doctype, |d| &mut d.system_id),
|
|
496
|
-
}
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
fn clear_doctype_id(&self, kind: DoctypeKind) {
|
|
500
|
-
let mut id = self.doctype_id(kind);
|
|
501
|
-
match *id {
|
|
502
|
-
Some(ref mut s) => s.clear(),
|
|
503
|
-
None => *id = Some(StrTendril::new()),
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
fn peek(&self, input: &BufferQueue) -> Option<char> {
|
|
508
|
-
if self.reconsume.get() {
|
|
509
|
-
Some(self.current_char.get())
|
|
510
|
-
} else {
|
|
511
|
-
input.peek()
|
|
512
|
-
}
|
|
513
|
-
}
|
|
514
|
-
|
|
515
|
-
fn discard_char(&self, input: &BufferQueue) {
|
|
516
|
-
let c = self.get_char(input);
|
|
517
|
-
assert!(c.is_some());
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
fn unconsume(&self, input: &BufferQueue, buf: StrTendril) {
|
|
521
|
-
input.push_front(buf);
|
|
522
|
-
}
|
|
523
|
-
}
|
|
524
|
-
|
|
525
|
-
// Shorthand for common state machine behaviors.
|
|
526
|
-
macro_rules! shorthand (
|
|
527
|
-
( $me:ident : emit $c:expr ) => ( $me.emit_char($c) );
|
|
528
|
-
( $me:ident : create_tag $kind:ident $c:expr ) => ( $me.create_tag($kind, $c) );
|
|
529
|
-
( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.borrow_mut().push_char($c) );
|
|
530
|
-
( $me:ident : discard_tag $input:expr ) => ( $me.discard_tag($input) );
|
|
531
|
-
( $me:ident : discard_char ) => ( $me.discard_char() );
|
|
532
|
-
( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.borrow_mut().push_char($c) );
|
|
533
|
-
( $me:ident : emit_temp ) => ( $me.emit_temp_buf() );
|
|
534
|
-
( $me:ident : clear_temp ) => ( $me.clear_temp_buf() );
|
|
535
|
-
( $me:ident : create_attr $c:expr ) => ( $me.create_attribute($c) );
|
|
536
|
-
( $me:ident : push_name $c:expr ) => ( $me.current_attr_name.borrow_mut().push_char($c) );
|
|
537
|
-
( $me:ident : push_value $c:expr ) => ( $me.current_attr_value.borrow_mut().push_char($c) );
|
|
538
|
-
( $me:ident : append_value $c:expr ) => ( $me.current_attr_value.borrow_mut().push_tendril($c));
|
|
539
|
-
( $me:ident : push_comment $c:expr ) => ( $me.current_comment.borrow_mut().push_char($c) );
|
|
540
|
-
( $me:ident : append_comment $c:expr ) => ( $me.current_comment.borrow_mut().push_slice($c) );
|
|
541
|
-
( $me:ident : emit_comment ) => ( $me.emit_current_comment() );
|
|
542
|
-
( $me:ident : clear_comment ) => ( $me.current_comment.borrow_mut().clear() );
|
|
543
|
-
( $me:ident : create_doctype ) => ( *$me.current_doctype.borrow_mut() = Doctype::default() );
|
|
544
|
-
( $me:ident : push_doctype_name $c:expr ) => ( option_push(&mut $me.current_doctype.borrow_mut().name, $c) );
|
|
545
|
-
( $me:ident : push_doctype_id $k:ident $c:expr ) => ( option_push(&mut $me.doctype_id($k), $c) );
|
|
546
|
-
( $me:ident : clear_doctype_id $k:ident ) => ( $me.clear_doctype_id($k) );
|
|
547
|
-
( $me:ident : emit_doctype ) => ( $me.emit_current_doctype() );
|
|
548
|
-
( $me:ident : error ) => ( $me.bad_char_error() );
|
|
549
|
-
( $me:ident : error_eof ) => ( $me.bad_eof_error() );
|
|
550
|
-
( $me:ident : create_pi $c:expr ) => ( $me.create_pi($c) );
|
|
551
|
-
( $me:ident : push_pi_target $c:expr ) => ( $me.current_pi_target.borrow_mut().push_char($c) );
|
|
552
|
-
( $me:ident : push_pi_data $c:expr ) => ( $me.current_pi_data.borrow_mut().push_char($c) );
|
|
553
|
-
( $me:ident : set_empty_tag ) => ( $me.set_empty_tag() );
|
|
554
|
-
);
|
|
555
|
-
|
|
556
|
-
// Tracing of tokenizer actions. This adds significant bloat and compile time,
|
|
557
|
-
// so it's behind a cfg flag.
|
|
558
|
-
#[cfg(feature = "trace_tokenizer")]
|
|
559
|
-
macro_rules! sh_trace ( ( $me:ident : $($cmds:tt)* ) => ({
|
|
560
|
-
debug!(" {:?}", stringify!($($cmds)*));
|
|
561
|
-
shorthand!($me : $($cmds)*);
|
|
562
|
-
}));
|
|
563
|
-
|
|
564
|
-
#[cfg(not(feature = "trace_tokenizer"))]
|
|
565
|
-
macro_rules! sh_trace ( ( $me:ident : $($cmds:tt)* ) => ( shorthand!($me: $($cmds)*) ) );
|
|
566
|
-
|
|
567
|
-
// A little DSL for sequencing shorthand actions.
|
|
568
|
-
macro_rules! go (
|
|
569
|
-
// A pattern like $($cmd:tt)* ; $($rest:tt)* causes parse ambiguity.
|
|
570
|
-
// We have to tell the parser how much lookahead we need.
|
|
571
|
-
|
|
572
|
-
( $me:ident : $a:tt ; $($rest:tt)* ) => ({ sh_trace!($me: $a); go!($me: $($rest)*); });
|
|
573
|
-
( $me:ident : $a:tt $b:tt ; $($rest:tt)* ) => ({ sh_trace!($me: $a $b); go!($me: $($rest)*); });
|
|
574
|
-
( $me:ident : $a:tt $b:tt $c:tt ; $($rest:tt)* ) => ({ sh_trace!($me: $a $b $c); go!($me: $($rest)*); });
|
|
575
|
-
( $me:ident : $a:tt $b:tt $c:tt $d:tt ; $($rest:tt)* ) => ({ sh_trace!($me: $a $b $c $d); go!($me: $($rest)*); });
|
|
576
|
-
|
|
577
|
-
// These can only come at the end.
|
|
578
|
-
|
|
579
|
-
( $me:ident : to $s:ident ) => ({ $me.state.set(XmlState::$s); return ProcessResult::Continue; });
|
|
580
|
-
( $me:ident : to $s:ident $k1:expr ) => ({ $me.state.set(XmlState::$s($k1)); return ProcessResult::Continue; });
|
|
581
|
-
( $me:ident : to $s:ident $k1:ident $k2:expr ) => ({ $me.state.set(XmlState::$s($k1($k2))); return ProcessResult::Continue; });
|
|
582
|
-
|
|
583
|
-
( $me:ident : reconsume $s:ident ) => ({ $me.reconsume.set(true); go!($me: to $s); });
|
|
584
|
-
( $me:ident : reconsume $s:ident $k1:expr ) => ({ $me.reconsume.set(true); go!($me: to $s $k1); });
|
|
585
|
-
( $me:ident : reconsume $s:ident $k1:ident $k2:expr ) => ({ $me.reconsume.set(true); go!($me: to $s $k1 $k2); });
|
|
586
|
-
|
|
587
|
-
( $me:ident : consume_char_ref ) => ({ $me.consume_char_ref(None); return ProcessResult::Continue; });
|
|
588
|
-
( $me:ident : consume_char_ref $addnl:expr ) => ({ $me.consume_char_ref(Some($addnl)); return ProcessResult::Continue; });
|
|
589
|
-
|
|
590
|
-
// We have a default next state after emitting a tag, but the sink can override.
|
|
591
|
-
( $me:ident : emit_tag $s:ident ) => ({
|
|
592
|
-
$me.state.set(XmlState::$s);
|
|
593
|
-
return $me.emit_current_tag();
|
|
594
|
-
});
|
|
595
|
-
|
|
596
|
-
// We have a special when dealing with empty and short tags in Xml
|
|
597
|
-
( $me:ident : emit_short_tag $s:ident ) => ({
|
|
598
|
-
$me.state.set(XmlState::$s);
|
|
599
|
-
return $me.emit_short_tag();
|
|
600
|
-
});
|
|
601
|
-
|
|
602
|
-
( $me:ident : emit_empty_tag $s:ident ) => ({
|
|
603
|
-
$me.state.set(XmlState::$s);
|
|
604
|
-
return $me.emit_empty_tag();
|
|
605
|
-
});
|
|
606
|
-
|
|
607
|
-
( $me:ident : emit_start_tag $s:ident ) => ({
|
|
608
|
-
$me.state.set(XmlState::$s);
|
|
609
|
-
return $me.emit_start_tag();
|
|
610
|
-
});
|
|
611
|
-
|
|
612
|
-
( $me:ident : emit_pi $s:ident ) => ({
|
|
613
|
-
$me.state.set(XmlState::$s);
|
|
614
|
-
return $me.emit_pi();
|
|
615
|
-
});
|
|
616
|
-
|
|
617
|
-
( $me:ident : eof ) => ({ $me.emit_eof(); return ProcessResult::Done; });
|
|
618
|
-
|
|
619
|
-
// If nothing else matched, it's a single command
|
|
620
|
-
( $me:ident : $($cmd:tt)+ ) => ( sh_trace!($me: $($cmd)+) );
|
|
621
|
-
|
|
622
|
-
// or nothing.
|
|
623
|
-
( $me:ident : ) => (());
|
|
624
|
-
);
|
|
625
|
-
|
|
626
|
-
// This is a macro because it can cause early return
|
|
627
|
-
// from the function where it is used.
|
|
628
|
-
macro_rules! get_char ( ($me:expr, $input:expr) => {{
|
|
629
|
-
let Some(character) = $me.get_char($input) else {
|
|
630
|
-
return ProcessResult::Done;
|
|
631
|
-
};
|
|
632
|
-
character
|
|
633
|
-
}});
|
|
634
|
-
|
|
635
|
-
macro_rules! pop_except_from ( ($me:expr, $input:expr, $set:expr) => {{
|
|
636
|
-
let Some(popped_element) = $me.pop_except_from($input, $set) else {
|
|
637
|
-
return ProcessResult::Done;
|
|
638
|
-
};
|
|
639
|
-
popped_element
|
|
640
|
-
}});
|
|
641
|
-
|
|
642
|
-
macro_rules! eat ( ($me:expr, $input:expr, $pat:expr) => {{
|
|
643
|
-
let Some(value) = $me.eat($input, $pat) else {
|
|
644
|
-
return ProcessResult::Done;
|
|
645
|
-
};
|
|
646
|
-
value
|
|
647
|
-
}});
|
|
648
|
-
|
|
649
|
-
/// The result of a single tokenization operation
|
|
650
|
-
pub enum ProcessResult<Handle> {
|
|
651
|
-
/// The tokenizer needs more input before it can continue
|
|
652
|
-
Done,
|
|
653
|
-
/// The tokenizer can be invoked again immediately
|
|
654
|
-
Continue,
|
|
655
|
-
/// The tokenizer encountered a script element that must be executed
|
|
656
|
-
/// before tokenization can continue
|
|
657
|
-
Script(Handle),
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
impl<Sink: TokenSink> XmlTokenizer<Sink> {
|
|
661
|
-
// Run the state machine for a while.
|
|
662
|
-
#[allow(clippy::never_loop)]
|
|
663
|
-
fn step(&self, input: &BufferQueue) -> ProcessResult<Sink::Handle> {
|
|
664
|
-
if self.char_ref_tokenizer.borrow().is_some() {
|
|
665
|
-
return self.step_char_ref_tokenizer(input);
|
|
666
|
-
}
|
|
667
|
-
|
|
668
|
-
debug!("processing in state {:?}", self.state);
|
|
669
|
-
match self.state.get() {
|
|
670
|
-
//§ data-state
|
|
671
|
-
XmlState::Data => loop {
|
|
672
|
-
match pop_except_from!(self, input, small_char_set!('\r' '&' '<')) {
|
|
673
|
-
FromSet('&') => go!(self: consume_char_ref),
|
|
674
|
-
FromSet('<') => go!(self: to TagState),
|
|
675
|
-
FromSet(c) => go!(self: emit c),
|
|
676
|
-
NotFromSet(b) => self.emit_chars(b),
|
|
677
|
-
}
|
|
678
|
-
},
|
|
679
|
-
//§ tag-state
|
|
680
|
-
XmlState::TagState => loop {
|
|
681
|
-
match get_char!(self, input) {
|
|
682
|
-
'!' => go!(self: to MarkupDecl),
|
|
683
|
-
'/' => go!(self: to EndTagState),
|
|
684
|
-
'?' => go!(self: to Pi),
|
|
685
|
-
'\t' | '\n' | ' ' | ':' | '<' | '>' => {
|
|
686
|
-
go!(self: error; emit '<'; reconsume Data)
|
|
687
|
-
},
|
|
688
|
-
cl => go!(self: create_tag StartTag cl; to TagName),
|
|
689
|
-
}
|
|
690
|
-
},
|
|
691
|
-
//§ end-tag-state
|
|
692
|
-
XmlState::EndTagState => loop {
|
|
693
|
-
match get_char!(self, input) {
|
|
694
|
-
'>' => go!(self: emit_short_tag Data),
|
|
695
|
-
'\t' | '\n' | ' ' | '<' | ':' => {
|
|
696
|
-
go!(self: error; emit '<'; emit '/'; reconsume Data)
|
|
697
|
-
},
|
|
698
|
-
cl => go!(self: create_tag EndTag cl; to EndTagName),
|
|
699
|
-
}
|
|
700
|
-
},
|
|
701
|
-
//§ end-tag-name-state
|
|
702
|
-
XmlState::EndTagName => loop {
|
|
703
|
-
match get_char!(self, input) {
|
|
704
|
-
'\t' | '\n' | ' ' => go!(self: to EndTagNameAfter),
|
|
705
|
-
'/' => go!(self: error; to EndTagNameAfter),
|
|
706
|
-
'>' => go!(self: emit_tag Data),
|
|
707
|
-
cl => go!(self: push_tag cl),
|
|
708
|
-
}
|
|
709
|
-
},
|
|
710
|
-
//§ end-tag-name-after-state
|
|
711
|
-
XmlState::EndTagNameAfter => loop {
|
|
712
|
-
match get_char!(self, input) {
|
|
713
|
-
'>' => go!(self: emit_tag Data),
|
|
714
|
-
'\t' | '\n' | ' ' => (),
|
|
715
|
-
_ => self.emit_error(Borrowed("Unexpected element in tag name")),
|
|
716
|
-
}
|
|
717
|
-
},
|
|
718
|
-
//§ pi-state
|
|
719
|
-
XmlState::Pi => loop {
|
|
720
|
-
match get_char!(self, input) {
|
|
721
|
-
'\t' | '\n' | ' ' => go!(self: error; reconsume BogusComment),
|
|
722
|
-
cl => go!(self: create_pi cl; to PiTarget),
|
|
723
|
-
}
|
|
724
|
-
},
|
|
725
|
-
//§ pi-target-state
|
|
726
|
-
XmlState::PiTarget => loop {
|
|
727
|
-
match get_char!(self, input) {
|
|
728
|
-
'\t' | '\n' | ' ' => go!(self: to PiTargetAfter),
|
|
729
|
-
'?' => go!(self: to PiAfter),
|
|
730
|
-
cl => go!(self: push_pi_target cl),
|
|
731
|
-
}
|
|
732
|
-
},
|
|
733
|
-
//§ pi-target-after-state
|
|
734
|
-
XmlState::PiTargetAfter => loop {
|
|
735
|
-
match get_char!(self, input) {
|
|
736
|
-
'\t' | '\n' | ' ' => (),
|
|
737
|
-
_ => go!(self: reconsume PiData),
|
|
738
|
-
}
|
|
739
|
-
},
|
|
740
|
-
//§ pi-data-state
|
|
741
|
-
XmlState::PiData => loop {
|
|
742
|
-
match get_char!(self, input) {
|
|
743
|
-
'?' => go!(self: to PiAfter),
|
|
744
|
-
cl => go!(self: push_pi_data cl),
|
|
745
|
-
}
|
|
746
|
-
},
|
|
747
|
-
//§ pi-after-state
|
|
748
|
-
XmlState::PiAfter => loop {
|
|
749
|
-
match get_char!(self, input) {
|
|
750
|
-
'>' => go!(self: emit_pi Data),
|
|
751
|
-
'?' => go!(self: to PiAfter),
|
|
752
|
-
cl => go!(self: push_pi_data cl),
|
|
753
|
-
}
|
|
754
|
-
},
|
|
755
|
-
//§ markup-declaration-state
|
|
756
|
-
XmlState::MarkupDecl => loop {
|
|
757
|
-
if eat!(self, input, "--") {
|
|
758
|
-
go!(self: clear_comment; to CommentStart);
|
|
759
|
-
} else if eat!(self, input, "[CDATA[") {
|
|
760
|
-
go!(self: to Cdata);
|
|
761
|
-
} else if eat!(self, input, "DOCTYPE") {
|
|
762
|
-
go!(self: to Doctype);
|
|
763
|
-
} else {
|
|
764
|
-
// FIXME: 'error' gives wrong message
|
|
765
|
-
go!(self: error; to BogusComment);
|
|
766
|
-
}
|
|
767
|
-
},
|
|
768
|
-
//§ comment-start-state
|
|
769
|
-
XmlState::CommentStart => loop {
|
|
770
|
-
match get_char!(self, input) {
|
|
771
|
-
'-' => go!(self: to CommentStartDash),
|
|
772
|
-
'>' => go!(self: error; emit_comment; to Data),
|
|
773
|
-
_ => go!(self: reconsume Comment),
|
|
774
|
-
}
|
|
775
|
-
},
|
|
776
|
-
//§ comment-start-dash-state
|
|
777
|
-
XmlState::CommentStartDash => loop {
|
|
778
|
-
match get_char!(self, input) {
|
|
779
|
-
'-' => go!(self: to CommentEnd),
|
|
780
|
-
'>' => go!(self: error; emit_comment; to Data),
|
|
781
|
-
_ => go!(self: push_comment '-'; reconsume Comment),
|
|
782
|
-
}
|
|
783
|
-
},
|
|
784
|
-
//§ comment-state
|
|
785
|
-
XmlState::Comment => loop {
|
|
786
|
-
match get_char!(self, input) {
|
|
787
|
-
'<' => go!(self: push_comment '<'; to CommentLessThan),
|
|
788
|
-
'-' => go!(self: to CommentEndDash),
|
|
789
|
-
c => go!(self: push_comment c),
|
|
790
|
-
}
|
|
791
|
-
},
|
|
792
|
-
//§ comment-less-than-sign-state
|
|
793
|
-
XmlState::CommentLessThan => loop {
|
|
794
|
-
match get_char!(self, input) {
|
|
795
|
-
'!' => go!(self: push_comment '!';to CommentLessThanBang),
|
|
796
|
-
'<' => go!(self: push_comment '<'),
|
|
797
|
-
_ => go!(self: reconsume Comment),
|
|
798
|
-
}
|
|
799
|
-
},
|
|
800
|
-
//§ comment-less-than-sign-bang-state
|
|
801
|
-
XmlState::CommentLessThanBang => loop {
|
|
802
|
-
match get_char!(self, input) {
|
|
803
|
-
'-' => go!(self: to CommentLessThanBangDash),
|
|
804
|
-
_ => go!(self: reconsume Comment),
|
|
805
|
-
}
|
|
806
|
-
},
|
|
807
|
-
//§ comment-less-than-sign-bang-dash-state
|
|
808
|
-
XmlState::CommentLessThanBangDash => loop {
|
|
809
|
-
match get_char!(self, input) {
|
|
810
|
-
'-' => go!(self: to CommentLessThanBangDashDash),
|
|
811
|
-
_ => go!(self: reconsume CommentEndDash),
|
|
812
|
-
}
|
|
813
|
-
},
|
|
814
|
-
//§ comment-less-than-sign-bang-dash-dash-state
|
|
815
|
-
XmlState::CommentLessThanBangDashDash => loop {
|
|
816
|
-
match get_char!(self, input) {
|
|
817
|
-
'>' => go!(self: reconsume CommentEnd),
|
|
818
|
-
_ => go!(self: error; reconsume CommentEnd),
|
|
819
|
-
}
|
|
820
|
-
},
|
|
821
|
-
//§ comment-end-dash-state
|
|
822
|
-
XmlState::CommentEndDash => loop {
|
|
823
|
-
match get_char!(self, input) {
|
|
824
|
-
'-' => go!(self: to CommentEnd),
|
|
825
|
-
_ => go!(self: push_comment '-'; reconsume Comment),
|
|
826
|
-
}
|
|
827
|
-
},
|
|
828
|
-
//§ comment-end-state
|
|
829
|
-
XmlState::CommentEnd => loop {
|
|
830
|
-
match get_char!(self, input) {
|
|
831
|
-
'>' => go!(self: emit_comment; to Data),
|
|
832
|
-
'!' => go!(self: to CommentEndBang),
|
|
833
|
-
'-' => go!(self: push_comment '-'),
|
|
834
|
-
_ => go!(self: append_comment "--"; reconsume Comment),
|
|
835
|
-
}
|
|
836
|
-
},
|
|
837
|
-
//§ comment-end-bang-state
|
|
838
|
-
XmlState::CommentEndBang => loop {
|
|
839
|
-
match get_char!(self, input) {
|
|
840
|
-
'-' => go!(self: append_comment "--!"; to CommentEndDash),
|
|
841
|
-
'>' => go!(self: error; emit_comment; to Data),
|
|
842
|
-
_ => go!(self: append_comment "--!"; reconsume Comment),
|
|
843
|
-
}
|
|
844
|
-
},
|
|
845
|
-
//§ bogus-comment-state
|
|
846
|
-
XmlState::BogusComment => loop {
|
|
847
|
-
match get_char!(self, input) {
|
|
848
|
-
'>' => go!(self: emit_comment; to Data),
|
|
849
|
-
c => go!(self: push_comment c),
|
|
850
|
-
}
|
|
851
|
-
},
|
|
852
|
-
//§ cdata-state
|
|
853
|
-
XmlState::Cdata => loop {
|
|
854
|
-
match get_char!(self, input) {
|
|
855
|
-
']' => go!(self: to CdataBracket),
|
|
856
|
-
cl => go!(self: emit cl),
|
|
857
|
-
}
|
|
858
|
-
},
|
|
859
|
-
//§ cdata-bracket-state
|
|
860
|
-
XmlState::CdataBracket => loop {
|
|
861
|
-
match get_char!(self, input) {
|
|
862
|
-
']' => go!(self: to CdataEnd),
|
|
863
|
-
cl => go!(self: emit ']'; emit cl; to Cdata),
|
|
864
|
-
}
|
|
865
|
-
},
|
|
866
|
-
//§ cdata-end-state
|
|
867
|
-
XmlState::CdataEnd => loop {
|
|
868
|
-
match get_char!(self, input) {
|
|
869
|
-
'>' => go!(self: to Data),
|
|
870
|
-
']' => go!(self: emit ']'),
|
|
871
|
-
cl => go!(self: emit ']'; emit ']'; emit cl; to Cdata),
|
|
872
|
-
}
|
|
873
|
-
},
|
|
874
|
-
//§ tag-name-state
|
|
875
|
-
XmlState::TagName => loop {
|
|
876
|
-
match get_char!(self, input) {
|
|
877
|
-
'\t' | '\n' | ' ' => go!(self: to TagAttrNameBefore),
|
|
878
|
-
'>' => go!(self: emit_tag Data),
|
|
879
|
-
'/' => go!(self: set_empty_tag; to TagEmpty),
|
|
880
|
-
cl => go!(self: push_tag cl),
|
|
881
|
-
}
|
|
882
|
-
},
|
|
883
|
-
//§ empty-tag-state
|
|
884
|
-
XmlState::TagEmpty => loop {
|
|
885
|
-
match get_char!(self, input) {
|
|
886
|
-
'>' => go!(self: emit_empty_tag Data),
|
|
887
|
-
_ => go!(self: reconsume TagAttrValueBefore),
|
|
888
|
-
}
|
|
889
|
-
},
|
|
890
|
-
//§ tag-attribute-name-before-state
|
|
891
|
-
XmlState::TagAttrNameBefore => loop {
|
|
892
|
-
match get_char!(self, input) {
|
|
893
|
-
'\t' | '\n' | ' ' => (),
|
|
894
|
-
'>' => go!(self: emit_tag Data),
|
|
895
|
-
'/' => go!(self: set_empty_tag; to TagEmpty),
|
|
896
|
-
':' => go!(self: error),
|
|
897
|
-
cl => go!(self: create_attr cl; to TagAttrName),
|
|
898
|
-
}
|
|
899
|
-
},
|
|
900
|
-
//§ tag-attribute-name-state
|
|
901
|
-
XmlState::TagAttrName => loop {
|
|
902
|
-
match get_char!(self, input) {
|
|
903
|
-
'=' => go!(self: to TagAttrValueBefore),
|
|
904
|
-
'>' => go!(self: emit_tag Data),
|
|
905
|
-
'\t' | '\n' | ' ' => go!(self: to TagAttrNameAfter),
|
|
906
|
-
'/' => go!(self: set_empty_tag; to TagEmpty),
|
|
907
|
-
cl => go!(self: push_name cl),
|
|
908
|
-
}
|
|
909
|
-
},
|
|
910
|
-
//§ tag-attribute-name-after-state
|
|
911
|
-
XmlState::TagAttrNameAfter => loop {
|
|
912
|
-
match get_char!(self, input) {
|
|
913
|
-
'\t' | '\n' | ' ' => (),
|
|
914
|
-
'=' => go!(self: to TagAttrValueBefore),
|
|
915
|
-
'>' => go!(self: emit_tag Data),
|
|
916
|
-
'/' => go!(self: set_empty_tag; to TagEmpty),
|
|
917
|
-
cl => go!(self: create_attr cl; to TagAttrName),
|
|
918
|
-
}
|
|
919
|
-
},
|
|
920
|
-
//§ tag-attribute-value-before-state
|
|
921
|
-
XmlState::TagAttrValueBefore => loop {
|
|
922
|
-
match get_char!(self, input) {
|
|
923
|
-
'\t' | '\n' | ' ' => (),
|
|
924
|
-
'"' => go!(self: to TagAttrValue DoubleQuoted),
|
|
925
|
-
'\'' => go!(self: to TagAttrValue SingleQuoted),
|
|
926
|
-
'&' => go!(self: reconsume TagAttrValue(Unquoted)),
|
|
927
|
-
'>' => go!(self: emit_tag Data),
|
|
928
|
-
cl => go!(self: push_value cl; to TagAttrValue(Unquoted)),
|
|
929
|
-
}
|
|
930
|
-
},
|
|
931
|
-
//§ tag-attribute-value-double-quoted-state
|
|
932
|
-
XmlState::TagAttrValue(DoubleQuoted) => loop {
|
|
933
|
-
match pop_except_from!(self, input, small_char_set!('\n' '"' '&')) {
|
|
934
|
-
FromSet('"') => go!(self: to TagAttrNameBefore),
|
|
935
|
-
FromSet('&') => go!(self: consume_char_ref '"' ),
|
|
936
|
-
FromSet(c) => go!(self: push_value c),
|
|
937
|
-
NotFromSet(ref b) => go!(self: append_value b),
|
|
938
|
-
}
|
|
939
|
-
},
|
|
940
|
-
//§ tag-attribute-value-single-quoted-state
|
|
941
|
-
XmlState::TagAttrValue(SingleQuoted) => loop {
|
|
942
|
-
match pop_except_from!(self, input, small_char_set!('\n' '\'' '&')) {
|
|
943
|
-
FromSet('\'') => go!(self: to TagAttrNameBefore),
|
|
944
|
-
FromSet('&') => go!(self: consume_char_ref '\''),
|
|
945
|
-
FromSet(c) => go!(self: push_value c),
|
|
946
|
-
NotFromSet(ref b) => go!(self: append_value b),
|
|
947
|
-
}
|
|
948
|
-
},
|
|
949
|
-
//§ tag-attribute-value-double-quoted-state
|
|
950
|
-
XmlState::TagAttrValue(Unquoted) => loop {
|
|
951
|
-
match pop_except_from!(self, input, small_char_set!('\n' '\t' ' ' '&' '>')) {
|
|
952
|
-
FromSet('\t') | FromSet('\n') | FromSet(' ') => go!(self: to TagAttrNameBefore),
|
|
953
|
-
FromSet('&') => go!(self: consume_char_ref),
|
|
954
|
-
FromSet('>') => go!(self: emit_tag Data),
|
|
955
|
-
FromSet(c) => go!(self: push_value c),
|
|
956
|
-
NotFromSet(ref b) => go!(self: append_value b),
|
|
957
|
-
}
|
|
958
|
-
},
|
|
959
|
-
|
|
960
|
-
//§ doctype-state
|
|
961
|
-
XmlState::Doctype => loop {
|
|
962
|
-
match get_char!(self, input) {
|
|
963
|
-
'\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeName),
|
|
964
|
-
_ => go!(self: error; reconsume BeforeDoctypeName),
|
|
965
|
-
}
|
|
966
|
-
},
|
|
967
|
-
//§ before-doctype-name-state
|
|
968
|
-
XmlState::BeforeDoctypeName => loop {
|
|
969
|
-
match get_char!(self, input) {
|
|
970
|
-
'\t' | '\n' | '\x0C' | ' ' => (),
|
|
971
|
-
'>' => go!(self: error; emit_doctype; to Data),
|
|
972
|
-
c => go!(self: create_doctype; push_doctype_name (c.to_ascii_lowercase());
|
|
973
|
-
to DoctypeName),
|
|
974
|
-
}
|
|
975
|
-
},
|
|
976
|
-
//§ doctype-name-state
|
|
977
|
-
XmlState::DoctypeName => loop {
|
|
978
|
-
match get_char!(self, input) {
|
|
979
|
-
'\t' | '\n' | '\x0C' | ' ' => go!(self: to AfterDoctypeName),
|
|
980
|
-
'>' => go!(self: emit_doctype; to Data),
|
|
981
|
-
c => go!(self: push_doctype_name (c.to_ascii_lowercase());
|
|
982
|
-
to DoctypeName),
|
|
983
|
-
}
|
|
984
|
-
},
|
|
985
|
-
//§ after-doctype-name-state
|
|
986
|
-
XmlState::AfterDoctypeName => loop {
|
|
987
|
-
if eat!(self, input, "public") {
|
|
988
|
-
go!(self: to AfterDoctypeKeyword Public);
|
|
989
|
-
} else if eat!(self, input, "system") {
|
|
990
|
-
go!(self: to AfterDoctypeKeyword System);
|
|
991
|
-
} else {
|
|
992
|
-
match get_char!(self, input) {
|
|
993
|
-
'\t' | '\n' | '\x0C' | ' ' => (),
|
|
994
|
-
'>' => go!(self: emit_doctype; to Data),
|
|
995
|
-
_ => go!(self: error; to BogusDoctype),
|
|
996
|
-
}
|
|
997
|
-
}
|
|
998
|
-
},
|
|
999
|
-
//§ after-doctype-public-keyword-state
|
|
1000
|
-
XmlState::AfterDoctypeKeyword(Public) => loop {
|
|
1001
|
-
match get_char!(self, input) {
|
|
1002
|
-
'\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeIdentifier Public),
|
|
1003
|
-
'"' => {
|
|
1004
|
-
go!(self: error; clear_doctype_id Public; to DoctypeIdentifierDoubleQuoted Public)
|
|
1005
|
-
},
|
|
1006
|
-
'\'' => {
|
|
1007
|
-
go!(self: error; clear_doctype_id Public; to DoctypeIdentifierSingleQuoted Public)
|
|
1008
|
-
},
|
|
1009
|
-
'>' => go!(self: error; emit_doctype; to Data),
|
|
1010
|
-
_ => go!(self: error; to BogusDoctype),
|
|
1011
|
-
}
|
|
1012
|
-
},
|
|
1013
|
-
//§ after-doctype-system-keyword-state
|
|
1014
|
-
XmlState::AfterDoctypeKeyword(System) => loop {
|
|
1015
|
-
match get_char!(self, input) {
|
|
1016
|
-
'\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeIdentifier System),
|
|
1017
|
-
'"' => {
|
|
1018
|
-
go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System)
|
|
1019
|
-
},
|
|
1020
|
-
'\'' => {
|
|
1021
|
-
go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted System)
|
|
1022
|
-
},
|
|
1023
|
-
'>' => go!(self: error; emit_doctype; to Data),
|
|
1024
|
-
_ => go!(self: error; to BogusDoctype),
|
|
1025
|
-
}
|
|
1026
|
-
},
|
|
1027
|
-
//§ before_doctype_public_identifier_state before_doctype_system_identifier_state
|
|
1028
|
-
XmlState::BeforeDoctypeIdentifier(kind) => loop {
|
|
1029
|
-
match get_char!(self, input) {
|
|
1030
|
-
'\t' | '\n' | '\x0C' | ' ' => (),
|
|
1031
|
-
'"' => go!(self: clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind),
|
|
1032
|
-
'\'' => go!(self: clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind),
|
|
1033
|
-
'>' => go!(self: error; emit_doctype; to Data),
|
|
1034
|
-
_ => go!(self: error; to BogusDoctype),
|
|
1035
|
-
}
|
|
1036
|
-
},
|
|
1037
|
-
//§ doctype_public_identifier_double_quoted_state doctype_system_identifier_double_quoted_state
|
|
1038
|
-
XmlState::DoctypeIdentifierDoubleQuoted(kind) => loop {
|
|
1039
|
-
match get_char!(self, input) {
|
|
1040
|
-
'"' => go!(self: to AfterDoctypeIdentifier kind),
|
|
1041
|
-
'>' => go!(self: error; emit_doctype; to Data),
|
|
1042
|
-
c => go!(self: push_doctype_id kind c),
|
|
1043
|
-
}
|
|
1044
|
-
},
|
|
1045
|
-
//§ doctype_public_identifier_single_quoted_state doctype_system_identifier_single_quoted_state
|
|
1046
|
-
XmlState::DoctypeIdentifierSingleQuoted(kind) => loop {
|
|
1047
|
-
match get_char!(self, input) {
|
|
1048
|
-
'\'' => go!(self: to AfterDoctypeIdentifier kind),
|
|
1049
|
-
'>' => go!(self: error; emit_doctype; to Data),
|
|
1050
|
-
c => go!(self: push_doctype_id kind c),
|
|
1051
|
-
}
|
|
1052
|
-
},
|
|
1053
|
-
//§ doctype_public_identifier_single_quoted_state
|
|
1054
|
-
XmlState::AfterDoctypeIdentifier(Public) => loop {
|
|
1055
|
-
match get_char!(self, input) {
|
|
1056
|
-
'\t' | '\n' | '\x0C' | ' ' => {
|
|
1057
|
-
go!(self: to BetweenDoctypePublicAndSystemIdentifiers)
|
|
1058
|
-
},
|
|
1059
|
-
'\'' => {
|
|
1060
|
-
go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted(System))
|
|
1061
|
-
},
|
|
1062
|
-
'"' => {
|
|
1063
|
-
go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted(System))
|
|
1064
|
-
},
|
|
1065
|
-
'>' => go!(self: emit_doctype; to Data),
|
|
1066
|
-
_ => go!(self: error; to BogusDoctype),
|
|
1067
|
-
}
|
|
1068
|
-
},
|
|
1069
|
-
//§ doctype_system_identifier_single_quoted_state
|
|
1070
|
-
XmlState::AfterDoctypeIdentifier(System) => loop {
|
|
1071
|
-
match get_char!(self, input) {
|
|
1072
|
-
'\t' | '\n' | '\x0C' | ' ' => (),
|
|
1073
|
-
'>' => go!(self: emit_doctype; to Data),
|
|
1074
|
-
_ => go!(self: error; to BogusDoctype),
|
|
1075
|
-
}
|
|
1076
|
-
},
|
|
1077
|
-
//§ between_doctype_public_and_system_identifier_state
|
|
1078
|
-
XmlState::BetweenDoctypePublicAndSystemIdentifiers => loop {
|
|
1079
|
-
match get_char!(self, input) {
|
|
1080
|
-
'\t' | '\n' | '\x0C' | ' ' => (),
|
|
1081
|
-
'>' => go!(self: emit_doctype; to Data),
|
|
1082
|
-
'\'' => go!(self: to DoctypeIdentifierSingleQuoted System),
|
|
1083
|
-
'"' => go!(self: to DoctypeIdentifierDoubleQuoted System),
|
|
1084
|
-
_ => go!(self: error; to BogusDoctype),
|
|
1085
|
-
}
|
|
1086
|
-
},
|
|
1087
|
-
//§ bogus_doctype_state
|
|
1088
|
-
XmlState::BogusDoctype => loop {
|
|
1089
|
-
if get_char!(self, input) == '>' {
|
|
1090
|
-
go!(self: emit_doctype; to Data);
|
|
1091
|
-
}
|
|
1092
|
-
},
|
|
1093
|
-
}
|
|
1094
|
-
}
|
|
1095
|
-
|
|
1096
|
-
/// Indicate that we have reached the end of the input.
|
|
1097
|
-
pub fn end(&self) {
|
|
1098
|
-
// Handle EOF in the char ref sub-tokenizer, if there is one.
|
|
1099
|
-
// Do this first because it might un-consume stuff.
|
|
1100
|
-
let input = BufferQueue::default();
|
|
1101
|
-
match self.char_ref_tokenizer.take() {
|
|
1102
|
-
None => (),
|
|
1103
|
-
Some(mut tok) => {
|
|
1104
|
-
tok.end_of_file(self, &input);
|
|
1105
|
-
self.process_char_ref(tok.get_result());
|
|
1106
|
-
},
|
|
1107
|
-
}
|
|
1108
|
-
|
|
1109
|
-
// Process all remaining buffered input.
|
|
1110
|
-
// If we're waiting for lookahead, we're not gonna get it.
|
|
1111
|
-
self.at_eof.set(true);
|
|
1112
|
-
let _ = self.run(&input);
|
|
1113
|
-
|
|
1114
|
-
loop {
|
|
1115
|
-
if !matches!(self.eof_step(), ProcessResult::Continue) {
|
|
1116
|
-
break;
|
|
1117
|
-
}
|
|
1118
|
-
}
|
|
1119
|
-
|
|
1120
|
-
self.sink.end();
|
|
1121
|
-
|
|
1122
|
-
if self.opts.profile {
|
|
1123
|
-
self.dump_profile();
|
|
1124
|
-
}
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
#[cfg(for_c)]
|
|
1128
|
-
fn dump_profile(&self) {
|
|
1129
|
-
unreachable!();
|
|
1130
|
-
}
|
|
1131
|
-
|
|
1132
|
-
#[cfg(not(for_c))]
|
|
1133
|
-
fn dump_profile(&self) {
|
|
1134
|
-
let mut results: Vec<(XmlState, u64)> = self
|
|
1135
|
-
.state_profile
|
|
1136
|
-
.borrow()
|
|
1137
|
-
.iter()
|
|
1138
|
-
.map(|(s, t)| (*s, *t))
|
|
1139
|
-
.collect();
|
|
1140
|
-
results.sort_by(|&(_, x), &(_, y)| y.cmp(&x));
|
|
1141
|
-
|
|
1142
|
-
let total: u64 = results
|
|
1143
|
-
.iter()
|
|
1144
|
-
.map(|&(_, t)| t)
|
|
1145
|
-
.fold(0, ::std::ops::Add::add);
|
|
1146
|
-
debug!("\nTokenizer profile, in nanoseconds");
|
|
1147
|
-
debug!(
|
|
1148
|
-
"\n{:12} total in token sink",
|
|
1149
|
-
self.time_in_sink.get()
|
|
1150
|
-
);
|
|
1151
|
-
debug!("\n{total:12} total in tokenizer");
|
|
1152
|
-
|
|
1153
|
-
for (k, v) in results.into_iter() {
|
|
1154
|
-
let pct = 100.0 * (v as f64) / (total as f64);
|
|
1155
|
-
debug!("{v:12} {pct:4.1}% {k:?}");
|
|
1156
|
-
}
|
|
1157
|
-
}
|
|
1158
|
-
|
|
1159
|
-
fn eof_step(&self) -> ProcessResult<Sink::Handle> {
|
|
1160
|
-
debug!("processing EOF in state {:?}", self.state.get());
|
|
1161
|
-
match self.state.get() {
|
|
1162
|
-
XmlState::Data => go!(self: eof),
|
|
1163
|
-
XmlState::CommentStart | XmlState::CommentLessThan | XmlState::CommentLessThanBang => {
|
|
1164
|
-
go!(self: reconsume Comment)
|
|
1165
|
-
},
|
|
1166
|
-
XmlState::CommentLessThanBangDash => go!(self: reconsume CommentEndDash),
|
|
1167
|
-
XmlState::CommentLessThanBangDashDash => go!(self: reconsume CommentEnd),
|
|
1168
|
-
XmlState::CommentStartDash
|
|
1169
|
-
| XmlState::Comment
|
|
1170
|
-
| XmlState::CommentEndDash
|
|
1171
|
-
| XmlState::CommentEnd
|
|
1172
|
-
| XmlState::CommentEndBang => go!(self: error_eof; emit_comment; eof),
|
|
1173
|
-
XmlState::TagState => go!(self: error_eof; emit '<'; to Data),
|
|
1174
|
-
XmlState::EndTagState => go!(self: error_eof; emit '<'; emit '/'; to Data),
|
|
1175
|
-
XmlState::TagEmpty => go!(self: error_eof; to TagAttrNameBefore),
|
|
1176
|
-
XmlState::Cdata | XmlState::CdataBracket | XmlState::CdataEnd => {
|
|
1177
|
-
go!(self: error_eof; to Data)
|
|
1178
|
-
},
|
|
1179
|
-
XmlState::Pi => go!(self: error_eof; to BogusComment),
|
|
1180
|
-
XmlState::PiTargetAfter | XmlState::PiAfter => go!(self: reconsume PiData),
|
|
1181
|
-
XmlState::MarkupDecl => go!(self: error_eof; to BogusComment),
|
|
1182
|
-
XmlState::TagName
|
|
1183
|
-
| XmlState::TagAttrNameBefore
|
|
1184
|
-
| XmlState::EndTagName
|
|
1185
|
-
| XmlState::TagAttrNameAfter
|
|
1186
|
-
| XmlState::EndTagNameAfter
|
|
1187
|
-
| XmlState::TagAttrValueBefore
|
|
1188
|
-
| XmlState::TagAttrValue(_) => go!(self: error_eof; emit_tag Data),
|
|
1189
|
-
XmlState::PiData | XmlState::PiTarget => go!(self: error_eof; emit_pi Data),
|
|
1190
|
-
XmlState::TagAttrName => go!(self: error_eof; emit_start_tag Data),
|
|
1191
|
-
XmlState::BeforeDoctypeName
|
|
1192
|
-
| XmlState::Doctype
|
|
1193
|
-
| XmlState::DoctypeName
|
|
1194
|
-
| XmlState::AfterDoctypeName
|
|
1195
|
-
| XmlState::AfterDoctypeKeyword(_)
|
|
1196
|
-
| XmlState::BeforeDoctypeIdentifier(_)
|
|
1197
|
-
| XmlState::AfterDoctypeIdentifier(_)
|
|
1198
|
-
| XmlState::DoctypeIdentifierSingleQuoted(_)
|
|
1199
|
-
| XmlState::DoctypeIdentifierDoubleQuoted(_)
|
|
1200
|
-
| XmlState::BetweenDoctypePublicAndSystemIdentifiers => {
|
|
1201
|
-
go!(self: error_eof; emit_doctype; to Data)
|
|
1202
|
-
},
|
|
1203
|
-
XmlState::BogusDoctype => go!(self: emit_doctype; to Data),
|
|
1204
|
-
XmlState::BogusComment => go!(self: emit_comment; to Data),
|
|
1205
|
-
}
|
|
1206
|
-
}
|
|
1207
|
-
|
|
1208
|
-
fn process_char_ref(&self, char_ref: CharRef) {
|
|
1209
|
-
let CharRef {
|
|
1210
|
-
mut chars,
|
|
1211
|
-
mut num_chars,
|
|
1212
|
-
} = char_ref;
|
|
1213
|
-
|
|
1214
|
-
if num_chars == 0 {
|
|
1215
|
-
chars[0] = '&';
|
|
1216
|
-
num_chars = 1;
|
|
1217
|
-
}
|
|
1218
|
-
|
|
1219
|
-
for i in 0..num_chars {
|
|
1220
|
-
let c = chars[i as usize];
|
|
1221
|
-
match self.state.get() {
|
|
1222
|
-
XmlState::Data | XmlState::Cdata => go!(self: emit c),
|
|
1223
|
-
|
|
1224
|
-
XmlState::TagAttrValue(_) => go!(self: push_value c),
|
|
1225
|
-
|
|
1226
|
-
_ => panic!(
|
|
1227
|
-
"state {:?} should not be reachable in process_char_ref",
|
|
1228
|
-
self.state.get()
|
|
1229
|
-
),
|
|
1230
|
-
}
|
|
1231
|
-
}
|
|
1232
|
-
}
|
|
1233
|
-
|
|
1234
|
-
fn step_char_ref_tokenizer(&self, input: &BufferQueue) -> ProcessResult<Sink::Handle> {
|
|
1235
|
-
let mut tok = self.char_ref_tokenizer.take().unwrap();
|
|
1236
|
-
let outcome = tok.step(self, input);
|
|
1237
|
-
|
|
1238
|
-
let progress = match outcome {
|
|
1239
|
-
char_ref::Done => {
|
|
1240
|
-
self.process_char_ref(tok.get_result());
|
|
1241
|
-
return ProcessResult::Continue;
|
|
1242
|
-
},
|
|
1243
|
-
|
|
1244
|
-
char_ref::Stuck => ProcessResult::Done,
|
|
1245
|
-
char_ref::Progress => ProcessResult::Continue,
|
|
1246
|
-
};
|
|
1247
|
-
|
|
1248
|
-
*self.char_ref_tokenizer.borrow_mut() = Some(tok);
|
|
1249
|
-
progress
|
|
1250
|
-
}
|
|
1251
|
-
|
|
1252
|
-
fn finish_attribute(&self) {
|
|
1253
|
-
if self.current_attr_name.borrow().is_empty() {
|
|
1254
|
-
return;
|
|
1255
|
-
}
|
|
1256
|
-
|
|
1257
|
-
// Check for a duplicate attribute.
|
|
1258
|
-
// FIXME: the spec says we should error as soon as the name is finished.
|
|
1259
|
-
// FIXME: linear time search, do we care?
|
|
1260
|
-
let dup = {
|
|
1261
|
-
let current_attr_name = self.current_attr_name.borrow();
|
|
1262
|
-
let name = ¤t_attr_name[..];
|
|
1263
|
-
self.current_tag_attrs
|
|
1264
|
-
.borrow()
|
|
1265
|
-
.iter()
|
|
1266
|
-
.any(|a| &*a.name.local == name)
|
|
1267
|
-
};
|
|
1268
|
-
|
|
1269
|
-
if dup {
|
|
1270
|
-
self.emit_error(Borrowed("Duplicate attribute"));
|
|
1271
|
-
self.current_attr_name.borrow_mut().clear();
|
|
1272
|
-
self.current_attr_value.borrow_mut().clear();
|
|
1273
|
-
} else {
|
|
1274
|
-
let qname = process_qname(replace(
|
|
1275
|
-
&mut self.current_attr_name.borrow_mut(),
|
|
1276
|
-
StrTendril::new(),
|
|
1277
|
-
));
|
|
1278
|
-
let attr = Attribute {
|
|
1279
|
-
name: qname.clone(),
|
|
1280
|
-
value: replace(&mut self.current_attr_value.borrow_mut(), StrTendril::new()),
|
|
1281
|
-
};
|
|
1282
|
-
|
|
1283
|
-
if qname.local == local_name!("xmlns")
|
|
1284
|
-
|| qname.prefix == Some(namespace_prefix!("xmlns"))
|
|
1285
|
-
{
|
|
1286
|
-
self.current_tag_attrs.borrow_mut().insert(0, attr);
|
|
1287
|
-
} else {
|
|
1288
|
-
self.current_tag_attrs.borrow_mut().push(attr);
|
|
1289
|
-
}
|
|
1290
|
-
}
|
|
1291
|
-
}
|
|
1292
|
-
|
|
1293
|
-
fn create_attribute(&self, c: char) {
|
|
1294
|
-
self.finish_attribute();
|
|
1295
|
-
|
|
1296
|
-
self.current_attr_name.borrow_mut().push_char(c);
|
|
1297
|
-
}
|
|
1298
|
-
}
|
|
1299
|
-
|
|
1300
|
-
#[cfg(test)]
|
|
1301
|
-
mod test {
|
|
1302
|
-
|
|
1303
|
-
use super::process_qname;
|
|
1304
|
-
use crate::tendril::SliceExt;
|
|
1305
|
-
use crate::{LocalName, Prefix};
|
|
1306
|
-
|
|
1307
|
-
#[test]
|
|
1308
|
-
fn simple_namespace() {
|
|
1309
|
-
let qname = process_qname("prefix:local".to_tendril());
|
|
1310
|
-
assert_eq!(qname.prefix, Some(Prefix::from("prefix")));
|
|
1311
|
-
assert_eq!(qname.local, LocalName::from("local"));
|
|
1312
|
-
|
|
1313
|
-
let qname = process_qname("a:b".to_tendril());
|
|
1314
|
-
assert_eq!(qname.prefix, Some(Prefix::from("a")));
|
|
1315
|
-
assert_eq!(qname.local, LocalName::from("b"));
|
|
1316
|
-
}
|
|
1317
|
-
|
|
1318
|
-
#[test]
|
|
1319
|
-
fn wrong_namespaces() {
|
|
1320
|
-
let qname = process_qname(":local".to_tendril());
|
|
1321
|
-
assert_eq!(qname.prefix, None);
|
|
1322
|
-
assert_eq!(qname.local, LocalName::from(":local"));
|
|
1323
|
-
|
|
1324
|
-
let qname = process_qname("::local".to_tendril());
|
|
1325
|
-
assert_eq!(qname.prefix, None);
|
|
1326
|
-
assert_eq!(qname.local, LocalName::from("::local"));
|
|
1327
|
-
|
|
1328
|
-
let qname = process_qname("a::local".to_tendril());
|
|
1329
|
-
assert_eq!(qname.prefix, None);
|
|
1330
|
-
assert_eq!(qname.local, LocalName::from("a::local"));
|
|
1331
|
-
|
|
1332
|
-
let qname = process_qname("fake::".to_tendril());
|
|
1333
|
-
assert_eq!(qname.prefix, None);
|
|
1334
|
-
assert_eq!(qname.local, LocalName::from("fake::"));
|
|
1335
|
-
|
|
1336
|
-
let qname = process_qname(":::".to_tendril());
|
|
1337
|
-
assert_eq!(qname.prefix, None);
|
|
1338
|
-
assert_eq!(qname.local, LocalName::from(":::"));
|
|
1339
|
-
|
|
1340
|
-
let qname = process_qname(":a:b:".to_tendril());
|
|
1341
|
-
assert_eq!(qname.prefix, None);
|
|
1342
|
-
assert_eq!(qname.local, LocalName::from(":a:b:"));
|
|
1343
|
-
}
|
|
1344
|
-
}
|