html-to-markdown 2.24.6 → 2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
- data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
- data/lib/html_to_markdown/version.rb +1 -1
- data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
- data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
- data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
- data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
- data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
- data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
- data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
- data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
- data/rust-vendor/memmap2/CHANGELOG.md +8 -0
- data/rust-vendor/memmap2/Cargo.lock +1 -1
- data/rust-vendor/memmap2/Cargo.toml +2 -1
- data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
- data/rust-vendor/memmap2/src/lib.rs +25 -1
- data/rust-vendor/memmap2/src/stub.rs +1 -4
- data/rust-vendor/memmap2/src/unix.rs +14 -1
- data/rust-vendor/png/.cargo-checksum.json +1 -1
- data/rust-vendor/png/.cargo_vcs_info.json +1 -1
- data/rust-vendor/png/CHANGES.md +44 -0
- data/rust-vendor/png/Cargo.lock +124 -171
- data/rust-vendor/png/Cargo.toml +1 -1
- data/rust-vendor/png/Cargo.toml.orig +1 -1
- data/rust-vendor/png/benches/expand_paletted.rs +5 -5
- data/rust-vendor/png/benches/unfilter.rs +3 -3
- data/rust-vendor/png/src/adam7.rs +17 -10
- data/rust-vendor/png/src/common.rs +8 -8
- data/rust-vendor/png/src/decoder/mod.rs +53 -20
- data/rust-vendor/png/src/decoder/stream.rs +263 -78
- data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
- data/rust-vendor/png/src/decoder/zlib.rs +130 -90
- data/rust-vendor/png/src/encoder.rs +4 -2
- data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
- data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
- data/rust-vendor/png/src/filter/paeth.rs +398 -0
- data/rust-vendor/png/src/filter/simd.rs +308 -0
- data/rust-vendor/png/src/lib.rs +1 -0
- data/rust-vendor/syn/.cargo-checksum.json +1 -1
- data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
- data/rust-vendor/syn/Cargo.lock +40 -41
- data/rust-vendor/syn/Cargo.toml +1 -1
- data/rust-vendor/syn/Cargo.toml.orig +1 -1
- data/rust-vendor/syn/src/item.rs +61 -40
- data/rust-vendor/syn/src/lib.rs +2 -1
- data/rust-vendor/syn/tests/test_item.rs +54 -0
- data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
- data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
- data/rust-vendor/unicode-ident/Cargo.lock +21 -21
- data/rust-vendor/unicode-ident/Cargo.toml +1 -1
- data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
- data/rust-vendor/unicode-ident/src/lib.rs +1 -1
- data/rust-vendor/unicode-ident/src/tables.rs +87 -97
- data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
- metadata +7 -177
- data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
- data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
- data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
- data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
- data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
- data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
- data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
- data/rust-vendor/markup5ever_rcdom/README.md +0 -7
- data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
- data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
- data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
- data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
- data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
- data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
- data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
- data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
- data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
- data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
- data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
- data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
- data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
- data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
- data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
- data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
- data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
- data/rust-vendor/xml5ever/Cargo.lock +0 -752
- data/rust-vendor/xml5ever/Cargo.toml +0 -69
- data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
- data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
- data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
- data/rust-vendor/xml5ever/README.md +0 -72
- data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
- data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
- data/rust-vendor/xml5ever/examples/README.md +0 -223
- data/rust-vendor/xml5ever/examples/example.xml +0 -3
- data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
- data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
- data/rust-vendor/xml5ever/src/driver.rs +0 -90
- data/rust-vendor/xml5ever/src/lib.rs +0 -47
- data/rust-vendor/xml5ever/src/macros.rs +0 -18
- data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
- data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
- data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
- data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
- data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
- data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
- data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
- data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
// Copyright 2014-2017 The html5ever Project Developers. See the
|
|
2
|
-
// COPYRIGHT file at the top-level directory of this distribution.
|
|
3
|
-
//
|
|
4
|
-
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
5
|
-
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
6
|
-
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
7
|
-
// option. This file may not be copied, modified, or distributed
|
|
8
|
-
// except according to those terms.
|
|
9
|
-
|
|
10
|
-
enum QualNameState {
|
|
11
|
-
BeforeName,
|
|
12
|
-
InName,
|
|
13
|
-
AfterColon,
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
pub struct QualNameTokenizer<'a> {
|
|
17
|
-
state: QualNameState,
|
|
18
|
-
slice: &'a [u8],
|
|
19
|
-
valid_index: Option<u32>,
|
|
20
|
-
curr_ind: usize,
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
impl QualNameTokenizer<'_> {
|
|
24
|
-
pub fn new(tag: &[u8]) -> QualNameTokenizer<'_> {
|
|
25
|
-
QualNameTokenizer {
|
|
26
|
-
state: QualNameState::BeforeName,
|
|
27
|
-
slice: tag,
|
|
28
|
-
valid_index: None,
|
|
29
|
-
curr_ind: 0,
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
pub fn run(&mut self) -> Option<u32> {
|
|
34
|
-
if !self.slice.is_empty() {
|
|
35
|
-
loop {
|
|
36
|
-
if !self.step() {
|
|
37
|
-
break;
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
self.valid_index
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
fn incr(&mut self) -> bool {
|
|
45
|
-
if self.curr_ind + 1 < self.slice.len() {
|
|
46
|
-
self.curr_ind += 1;
|
|
47
|
-
return true;
|
|
48
|
-
}
|
|
49
|
-
false
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
fn step(&mut self) -> bool {
|
|
53
|
-
match self.state {
|
|
54
|
-
QualNameState::BeforeName => self.do_before_name(),
|
|
55
|
-
QualNameState::InName => self.do_in_name(),
|
|
56
|
-
QualNameState::AfterColon => self.do_after_colon(),
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
fn do_before_name(&mut self) -> bool {
|
|
61
|
-
if self.slice[self.curr_ind] == b':' {
|
|
62
|
-
false
|
|
63
|
-
} else {
|
|
64
|
-
self.state = QualNameState::InName;
|
|
65
|
-
self.incr()
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
fn do_in_name(&mut self) -> bool {
|
|
70
|
-
if self.slice[self.curr_ind] == b':' && self.curr_ind + 1 < self.slice.len() {
|
|
71
|
-
self.valid_index = Some(self.curr_ind as u32);
|
|
72
|
-
self.state = QualNameState::AfterColon;
|
|
73
|
-
}
|
|
74
|
-
self.incr()
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
fn do_after_colon(&mut self) -> bool {
|
|
78
|
-
if self.slice[self.curr_ind] == b':' {
|
|
79
|
-
self.valid_index = None;
|
|
80
|
-
return false;
|
|
81
|
-
}
|
|
82
|
-
self.incr()
|
|
83
|
-
}
|
|
84
|
-
}
|
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
// Copyright 2014-2017 The html5ever Project Developers. See the
|
|
2
|
-
// COPYRIGHT file at the top-level directory of this distribution.
|
|
3
|
-
//
|
|
4
|
-
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
5
|
-
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
6
|
-
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
7
|
-
// option. This file may not be copied, modified, or distributed
|
|
8
|
-
// except according to those terms.
|
|
9
|
-
|
|
10
|
-
//! Tokenizer states.
|
|
11
|
-
|
|
12
|
-
/// Specifies either the public or system identifier from a [Document Type Declaration] (DTD).
|
|
13
|
-
///
|
|
14
|
-
/// [Document Type Declaration]: https://en.wikipedia.org/wiki/Document_type_declaration
|
|
15
|
-
#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
|
|
16
|
-
pub enum DoctypeKind {
|
|
17
|
-
/// The public identifier.
|
|
18
|
-
Public,
|
|
19
|
-
/// The system identifier.
|
|
20
|
-
System,
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
/// Specifies the different states a XML tokenizer will assume during parsing.
|
|
24
|
-
#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
|
|
25
|
-
pub enum XmlState {
|
|
26
|
-
/// The initial state of the parser.
|
|
27
|
-
///
|
|
28
|
-
/// It is equivalent to the [`Data`](https://html.spec.whatwg.org/#data-state) state of the html parser,
|
|
29
|
-
/// except null codepoints do not cause errors.
|
|
30
|
-
Data,
|
|
31
|
-
/// Indicates that the parser has found a `<` character and will try to parse a tag.
|
|
32
|
-
TagState,
|
|
33
|
-
/// Indicates that the parser has consumed the `/` of a closing tag, like `</foo>`.
|
|
34
|
-
EndTagState,
|
|
35
|
-
/// Indicates that the parser is currently parsing the name of a closing tag, like the `foo` of `</foo>`.
|
|
36
|
-
EndTagName,
|
|
37
|
-
/// Indicates that the parser has finished parsing the name of a closing tag and expects a `>` to follow.
|
|
38
|
-
EndTagNameAfter,
|
|
39
|
-
/// Indicates that the parser has started parsing a [processing instruction] (PI).
|
|
40
|
-
///
|
|
41
|
-
/// This state is reached after the initial `?` character has been consumed.
|
|
42
|
-
///
|
|
43
|
-
/// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
|
|
44
|
-
Pi,
|
|
45
|
-
/// Indicates that the parser is currently parsing the target of a [processing instruction].
|
|
46
|
-
///
|
|
47
|
-
/// For example, the target of `<?xml-stylesheet type="text/xsl" href="style.xsl"?>` is `xml-stylesheet`.
|
|
48
|
-
///
|
|
49
|
-
/// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
|
|
50
|
-
PiTarget,
|
|
51
|
-
/// Indicates that the parser has finished parsing the target of a [processing instruction].
|
|
52
|
-
///
|
|
53
|
-
/// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
|
|
54
|
-
PiTargetAfter,
|
|
55
|
-
/// Indicates that the parser is currently parsing the data of a [processing instruction].
|
|
56
|
-
///
|
|
57
|
-
/// The "data" refers to everything between the target and the closing `?` character.
|
|
58
|
-
///
|
|
59
|
-
/// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
|
|
60
|
-
PiData,
|
|
61
|
-
/// Indicates that the parser has parsed the closing `?` of a [processing instruction].
|
|
62
|
-
///
|
|
63
|
-
/// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
|
|
64
|
-
PiAfter,
|
|
65
|
-
/// Indicates that the parser has parsed the initial `!` of a markup declaration.
|
|
66
|
-
///
|
|
67
|
-
/// Examples of such declarations include `<!ENTITY chap1 SYSTEM "chap1.xml">` or `<!-- Comment -->`.
|
|
68
|
-
MarkupDecl,
|
|
69
|
-
/// Indicates that the parser has parsed the start of a comment (`<!--`).
|
|
70
|
-
CommentStart,
|
|
71
|
-
/// Indicates that the parser has parsed the start of a comment and a `-` directly after it.
|
|
72
|
-
CommentStartDash,
|
|
73
|
-
/// Indicates that the parser is currently parsing the data within a comment.
|
|
74
|
-
Comment,
|
|
75
|
-
/// Indicates that the parser has parsed a `<` character within a comment.
|
|
76
|
-
CommentLessThan,
|
|
77
|
-
/// Indicates that the parser has parsed `<!` within a comment.
|
|
78
|
-
CommentLessThanBang,
|
|
79
|
-
/// Indicates that the parser has parsed `<!-` within a comment.
|
|
80
|
-
CommentLessThanBangDash,
|
|
81
|
-
/// Indicates that the parser has parsed `<!--` within a comment.
|
|
82
|
-
CommentLessThanBangDashDash,
|
|
83
|
-
/// Indicates that the parser has parsed two `-` characters within a comment which may or may not
|
|
84
|
-
/// be the beginning of the comment end (`-->`).
|
|
85
|
-
CommentEnd,
|
|
86
|
-
/// Indicates that the parser has parsed a `-` character within a comment which may or may not
|
|
87
|
-
/// be the beginning of the comment end (`-->`).
|
|
88
|
-
CommentEndDash,
|
|
89
|
-
/// Indicates that the parser has parsed `--!` within a comment which may or may not be part of the
|
|
90
|
-
/// end of the comment. Comments in XML can be closed with `--!>`.
|
|
91
|
-
CommentEndBang,
|
|
92
|
-
/// Indicates that the parser has parsed the beginning of a CDATA section (`<![CDATA[`).
|
|
93
|
-
Cdata,
|
|
94
|
-
/// Indicates that the parser has parsed a `]` character within a CDATA section, which may be part of
|
|
95
|
-
/// the end of the section (`]]>`).
|
|
96
|
-
CdataBracket,
|
|
97
|
-
/// Indicates that the parser has parsed two `]` characters within a CDATA section, which may be part of
|
|
98
|
-
/// the end of the section (`]]>`).
|
|
99
|
-
CdataEnd,
|
|
100
|
-
/// Indicates that the parser is currently parsing the name of a tag, such as `foo` in `<foo>`.
|
|
101
|
-
TagName,
|
|
102
|
-
/// Indicates that the parser has parsed the `/` of a self-closing tag, such as `<foo/>`.
|
|
103
|
-
TagEmpty,
|
|
104
|
-
/// Indicates that the parser has finished parsing the name of a tag and is now expecting either attributes or
|
|
105
|
-
/// a `>`.
|
|
106
|
-
TagAttrNameBefore,
|
|
107
|
-
/// Indicates that the parser is currently parsing the name of an attribute within a tag, such as
|
|
108
|
-
/// `bar` in `<foo bar=baz>`.
|
|
109
|
-
TagAttrName,
|
|
110
|
-
/// Indicates that the parser has finished parsing the name of an attribute.
|
|
111
|
-
TagAttrNameAfter,
|
|
112
|
-
/// Indicates that the parser is about to parse the value of an attribute.
|
|
113
|
-
TagAttrValueBefore,
|
|
114
|
-
/// Indicates that the parser is currently parsing the value of an attribute, such as `baz` in
|
|
115
|
-
/// `<foo bar=baz>`.
|
|
116
|
-
///
|
|
117
|
-
/// Includes information about how the value is quoted, because the quotes before and after the attribute
|
|
118
|
-
/// value need to match.
|
|
119
|
-
TagAttrValue(AttrValueKind),
|
|
120
|
-
/// Indicates that the parser has parsed the beginning of a document type definition (`<!DOCTYPE`).
|
|
121
|
-
Doctype,
|
|
122
|
-
/// Indicates that the parser expects to parse the name of the document type definition next.
|
|
123
|
-
BeforeDoctypeName,
|
|
124
|
-
/// Indicates that the parser is currently parsing the name of a document type definition, such as
|
|
125
|
-
/// `html` in `<!DOCTYPE html>`.
|
|
126
|
-
DoctypeName,
|
|
127
|
-
/// Indicates that the parser has finished parsing the name of the document type definition and now optionally
|
|
128
|
-
/// expects either a public or a system identifier.
|
|
129
|
-
AfterDoctypeName,
|
|
130
|
-
/// Indicates that the parser has parsed a keyword for either a public or system identifier (`PUBLIC` or `SYSTEM`).
|
|
131
|
-
AfterDoctypeKeyword(DoctypeKind),
|
|
132
|
-
/// Indicates that the parser is about to parse the value of a public or system identifier within
|
|
133
|
-
/// a document type definition, such as `foo` in
|
|
134
|
-
/// `<!DOCTYPE html PUBLIC "foo" "bar">`.
|
|
135
|
-
BeforeDoctypeIdentifier(DoctypeKind),
|
|
136
|
-
/// Indicates that the parser is currently parsing the value of a public or system identifier
|
|
137
|
-
/// that is surrounded by double quotes , such as `foo` in
|
|
138
|
-
/// `<!DOCTYPE html PUBLIC "foo" "bar">`.
|
|
139
|
-
DoctypeIdentifierDoubleQuoted(DoctypeKind),
|
|
140
|
-
/// Indicates that the parser is currently parsing the value of a public or system identifier
|
|
141
|
-
/// that is surrounded by single quotes , such as `foo` in
|
|
142
|
-
/// `<!DOCTYPE html PUBLIC 'foo' 'bar'>`.
|
|
143
|
-
DoctypeIdentifierSingleQuoted(DoctypeKind),
|
|
144
|
-
/// Indicates that the parser has finished parsing either a public or system identifier within a
|
|
145
|
-
/// document type definition.
|
|
146
|
-
AfterDoctypeIdentifier(DoctypeKind),
|
|
147
|
-
/// Indicates that the parser has finished parsing a public identifier and now expects
|
|
148
|
-
/// a system identifier.
|
|
149
|
-
BetweenDoctypePublicAndSystemIdentifiers,
|
|
150
|
-
/// Indicates that the parser is currently parsing an ill-formed document type defintion, such as
|
|
151
|
-
/// `<!DOCTYPE html what-is-this>`.
|
|
152
|
-
BogusDoctype,
|
|
153
|
-
/// Indicates that the parser is currently parsing an ill-formed comment, such as
|
|
154
|
-
/// `<? this is not what a comment should look like! >`.
|
|
155
|
-
BogusComment,
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
/// Specifies how an attribute value is quoted, if at all.
|
|
159
|
-
#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
|
|
160
|
-
pub enum AttrValueKind {
|
|
161
|
-
/// A attribute value that is not surrounded by quotes, like `bar` in `foo=bar`.
|
|
162
|
-
Unquoted,
|
|
163
|
-
/// A attribute value that is not surrounded by quotes, like `bar` in `foo='bar'`.
|
|
164
|
-
SingleQuoted,
|
|
165
|
-
/// A attribute value that is not surrounded by quotes, like `bar` in `foo="bar"`.
|
|
166
|
-
DoubleQuoted,
|
|
167
|
-
}
|