html-to-markdown 2.24.6 → 2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
- data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
- data/lib/html_to_markdown/version.rb +1 -1
- data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
- data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
- data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
- data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
- data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
- data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
- data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
- data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
- data/rust-vendor/memmap2/CHANGELOG.md +8 -0
- data/rust-vendor/memmap2/Cargo.lock +1 -1
- data/rust-vendor/memmap2/Cargo.toml +2 -1
- data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
- data/rust-vendor/memmap2/src/lib.rs +25 -1
- data/rust-vendor/memmap2/src/stub.rs +1 -4
- data/rust-vendor/memmap2/src/unix.rs +14 -1
- data/rust-vendor/png/.cargo-checksum.json +1 -1
- data/rust-vendor/png/.cargo_vcs_info.json +1 -1
- data/rust-vendor/png/CHANGES.md +44 -0
- data/rust-vendor/png/Cargo.lock +124 -171
- data/rust-vendor/png/Cargo.toml +1 -1
- data/rust-vendor/png/Cargo.toml.orig +1 -1
- data/rust-vendor/png/benches/expand_paletted.rs +5 -5
- data/rust-vendor/png/benches/unfilter.rs +3 -3
- data/rust-vendor/png/src/adam7.rs +17 -10
- data/rust-vendor/png/src/common.rs +8 -8
- data/rust-vendor/png/src/decoder/mod.rs +53 -20
- data/rust-vendor/png/src/decoder/stream.rs +263 -78
- data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
- data/rust-vendor/png/src/decoder/zlib.rs +130 -90
- data/rust-vendor/png/src/encoder.rs +4 -2
- data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
- data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
- data/rust-vendor/png/src/filter/paeth.rs +398 -0
- data/rust-vendor/png/src/filter/simd.rs +308 -0
- data/rust-vendor/png/src/lib.rs +1 -0
- data/rust-vendor/syn/.cargo-checksum.json +1 -1
- data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
- data/rust-vendor/syn/Cargo.lock +40 -41
- data/rust-vendor/syn/Cargo.toml +1 -1
- data/rust-vendor/syn/Cargo.toml.orig +1 -1
- data/rust-vendor/syn/src/item.rs +61 -40
- data/rust-vendor/syn/src/lib.rs +2 -1
- data/rust-vendor/syn/tests/test_item.rs +54 -0
- data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
- data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
- data/rust-vendor/unicode-ident/Cargo.lock +21 -21
- data/rust-vendor/unicode-ident/Cargo.toml +1 -1
- data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
- data/rust-vendor/unicode-ident/src/lib.rs +1 -1
- data/rust-vendor/unicode-ident/src/tables.rs +87 -97
- data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
- metadata +7 -177
- data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
- data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
- data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
- data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
- data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
- data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
- data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
- data/rust-vendor/markup5ever_rcdom/README.md +0 -7
- data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
- data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
- data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
- data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
- data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
- data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
- data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
- data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
- data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
- data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
- data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
- data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
- data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
- data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
- data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
- data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
- data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
- data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
- data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
- data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
- data/rust-vendor/xml5ever/Cargo.lock +0 -752
- data/rust-vendor/xml5ever/Cargo.toml +0 -69
- data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
- data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
- data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
- data/rust-vendor/xml5ever/README.md +0 -72
- data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
- data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
- data/rust-vendor/xml5ever/examples/README.md +0 -223
- data/rust-vendor/xml5ever/examples/example.xml +0 -3
- data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
- data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
- data/rust-vendor/xml5ever/src/driver.rs +0 -90
- data/rust-vendor/xml5ever/src/lib.rs +0 -47
- data/rust-vendor/xml5ever/src/macros.rs +0 -18
- data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
- data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
- data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
- data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
- data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
- data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
- data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
- data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
|
@@ -297,7 +297,7 @@ impl<'a, W: Write> Encoder<'a, W> {
|
|
|
297
297
|
self.info.bit_depth = depth;
|
|
298
298
|
}
|
|
299
299
|
|
|
300
|
-
/// Set compression parameters, see [Compression] for the available options.
|
|
300
|
+
/// Set compression parameters, see [`Compression`] for the available options.
|
|
301
301
|
pub fn set_compression(&mut self, compression: Compression) {
|
|
302
302
|
self.set_deflate_compression(DeflateCompression::from_simple(compression));
|
|
303
303
|
self.set_filter(Filter::from_simple(compression));
|
|
@@ -305,7 +305,9 @@ impl<'a, W: Write> Encoder<'a, W> {
|
|
|
305
305
|
|
|
306
306
|
/// Provides in-depth customization of DEFLATE compression options.
|
|
307
307
|
///
|
|
308
|
-
/// For a simpler selection of compression options see [
|
|
308
|
+
/// For a simpler selection of compression options see [`set_compression`].
|
|
309
|
+
///
|
|
310
|
+
/// [`set_compression`]: Self::set_compression
|
|
309
311
|
pub fn set_deflate_compression(&mut self, compression: DeflateCompression) {
|
|
310
312
|
self.options.compression = compression;
|
|
311
313
|
}
|
|
@@ -2,6 +2,11 @@ use core::convert::TryInto;
|
|
|
2
2
|
|
|
3
3
|
use crate::{common::BytesPerPixel, Compression};
|
|
4
4
|
|
|
5
|
+
mod paeth;
|
|
6
|
+
|
|
7
|
+
#[cfg(feature = "unstable")]
|
|
8
|
+
mod simd;
|
|
9
|
+
|
|
5
10
|
/// The byte level filter applied to scanlines to prepare them for compression.
|
|
6
11
|
///
|
|
7
12
|
/// Compression in general benefits from repetitive data. The filter is a content-aware method of
|
|
@@ -20,6 +25,7 @@ pub enum Filter {
|
|
|
20
25
|
Avg,
|
|
21
26
|
Paeth,
|
|
22
27
|
Adaptive,
|
|
28
|
+
MinEntropy,
|
|
23
29
|
}
|
|
24
30
|
|
|
25
31
|
impl Default for Filter {
|
|
@@ -88,91 +94,11 @@ impl RowFilter {
|
|
|
88
94
|
Filter::Up => Some(Self::Up),
|
|
89
95
|
Filter::Avg => Some(Self::Avg),
|
|
90
96
|
Filter::Paeth => Some(Self::Paeth),
|
|
91
|
-
Filter::Adaptive => None,
|
|
97
|
+
Filter::Adaptive | Filter::MinEntropy => None,
|
|
92
98
|
}
|
|
93
99
|
}
|
|
94
100
|
}
|
|
95
101
|
|
|
96
|
-
fn filter_paeth(a: u8, b: u8, c: u8) -> u8 {
|
|
97
|
-
// On ARM this algorithm performs much better than the one above adapted from stb,
|
|
98
|
-
// and this is the better-studied algorithm we've always used here,
|
|
99
|
-
// so we default to it on all non-x86 platforms.
|
|
100
|
-
let pa = (i16::from(b) - i16::from(c)).abs();
|
|
101
|
-
let pb = (i16::from(a) - i16::from(c)).abs();
|
|
102
|
-
let pc = ((i16::from(a) - i16::from(c)) + (i16::from(b) - i16::from(c))).abs();
|
|
103
|
-
|
|
104
|
-
let mut out = a;
|
|
105
|
-
let mut min = pa;
|
|
106
|
-
|
|
107
|
-
if pb < min {
|
|
108
|
-
min = pb;
|
|
109
|
-
out = b;
|
|
110
|
-
}
|
|
111
|
-
if pc < min {
|
|
112
|
-
out = c;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
out
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
fn filter_paeth_stbi(a: u8, b: u8, c: u8) -> u8 {
|
|
119
|
-
// Decoding optimizes better with this algorithm than with `filter_paeth`
|
|
120
|
-
//
|
|
121
|
-
// This formulation looks very different from the reference in the PNG spec, but is
|
|
122
|
-
// actually equivalent and has favorable data dependencies and admits straightforward
|
|
123
|
-
// generation of branch-free code, which helps performance significantly.
|
|
124
|
-
//
|
|
125
|
-
// Adapted from public domain PNG implementation:
|
|
126
|
-
// https://github.com/nothings/stb/blob/5c205738c191bcb0abc65c4febfa9bd25ff35234/stb_image.h#L4657-L4668
|
|
127
|
-
let thresh = i16::from(c) * 3 - (i16::from(a) + i16::from(b));
|
|
128
|
-
let lo = a.min(b);
|
|
129
|
-
let hi = a.max(b);
|
|
130
|
-
let t0 = if hi as i16 <= thresh { lo } else { c };
|
|
131
|
-
let t1 = if thresh <= lo as i16 { hi } else { t0 };
|
|
132
|
-
t1
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
fn filter_paeth_fpnge(a: u8, b: u8, c: u8) -> u8 {
|
|
136
|
-
// This is an optimized version of the paeth filter from the PNG specification, proposed by
|
|
137
|
-
// Luca Versari for [FPNGE](https://www.lucaversari.it/FJXL_and_FPNGE.pdf). It operates
|
|
138
|
-
// entirely on unsigned 8-bit quantities, making it more conducive to vectorization.
|
|
139
|
-
//
|
|
140
|
-
// p = a + b - c
|
|
141
|
-
// pa = |p - a| = |a + b - c - a| = |b - c| = max(b, c) - min(b, c)
|
|
142
|
-
// pb = |p - b| = |a + b - c - b| = |a - c| = max(a, c) - min(a, c)
|
|
143
|
-
// pc = |p - c| = |a + b - c - c| = |(b - c) + (a - c)| = ...
|
|
144
|
-
//
|
|
145
|
-
// Further optimizing the calculation of `pc` a bit tricker. However, notice that:
|
|
146
|
-
//
|
|
147
|
-
// a > c && b > c
|
|
148
|
-
// ==> (a - c) > 0 && (b - c) > 0
|
|
149
|
-
// ==> pc > (a - c) && pc > (b - c)
|
|
150
|
-
// ==> pc > |a - c| && pc > |b - c|
|
|
151
|
-
// ==> pc > pb && pc > pa
|
|
152
|
-
//
|
|
153
|
-
// Meaning that if `c` is smaller than `a` and `b`, the value of `pc` is irrelevant. Similar
|
|
154
|
-
// reasoning applies if `c` is larger than the other two inputs. Assuming that `c >= b` and
|
|
155
|
-
// `c <= b` or vice versa:
|
|
156
|
-
//
|
|
157
|
-
// pc = ||b - c| - |a - c|| = |pa - pb| = max(pa, pb) - min(pa, pb)
|
|
158
|
-
//
|
|
159
|
-
let pa = b.max(c) - c.min(b);
|
|
160
|
-
let pb = a.max(c) - c.min(a);
|
|
161
|
-
let pc = if (a < c) == (c < b) {
|
|
162
|
-
pa.max(pb) - pa.min(pb)
|
|
163
|
-
} else {
|
|
164
|
-
255
|
|
165
|
-
};
|
|
166
|
-
|
|
167
|
-
if pa <= pb && pa <= pc {
|
|
168
|
-
a
|
|
169
|
-
} else if pb <= pc {
|
|
170
|
-
b
|
|
171
|
-
} else {
|
|
172
|
-
c
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
|
|
176
102
|
pub(crate) fn unfilter(
|
|
177
103
|
mut filter: RowFilter,
|
|
178
104
|
tbpp: BytesPerPixel,
|
|
@@ -190,110 +116,6 @@ pub(crate) fn unfilter(
|
|
|
190
116
|
}
|
|
191
117
|
}
|
|
192
118
|
|
|
193
|
-
// Auto-vectorization notes
|
|
194
|
-
// ========================
|
|
195
|
-
//
|
|
196
|
-
// [2023/01 @okaneco] - Notes on optimizing decoding filters
|
|
197
|
-
//
|
|
198
|
-
// Links:
|
|
199
|
-
// [PR]: https://github.com/image-rs/image-png/pull/382
|
|
200
|
-
// [SWAR]: http://aggregate.org/SWAR/over.html
|
|
201
|
-
// [AVG]: http://aggregate.org/MAGIC/#Average%20of%20Integers
|
|
202
|
-
//
|
|
203
|
-
// #382 heavily refactored and optimized the following filters making the
|
|
204
|
-
// implementation nonobvious. These comments function as a summary of that
|
|
205
|
-
// PR with an explanation of the choices made below.
|
|
206
|
-
//
|
|
207
|
-
// #382 originally started with trying to optimize using a technique called
|
|
208
|
-
// SWAR, SIMD Within a Register. SWAR uses regular integer types like `u32`
|
|
209
|
-
// and `u64` as SIMD registers to perform vertical operations in parallel,
|
|
210
|
-
// usually involving bit-twiddling. This allowed each `BytesPerPixel` (bpp)
|
|
211
|
-
// pixel to be decoded in parallel: 3bpp and 4bpp in a `u32`, 6bpp and 8pp
|
|
212
|
-
// in a `u64`. The `Sub` filter looked like the following code block, `Avg`
|
|
213
|
-
// was similar but used a bitwise average method from [AVG]:
|
|
214
|
-
// ```
|
|
215
|
-
// // See "Unpartitioned Operations With Correction Code" from [SWAR]
|
|
216
|
-
// fn swar_add_u32(x: u32, y: u32) -> u32 {
|
|
217
|
-
// // 7-bit addition so there's no carry over the most significant bit
|
|
218
|
-
// let n = (x & 0x7f7f7f7f) + (y & 0x7f7f7f7f); // 0x7F = 0b_0111_1111
|
|
219
|
-
// // 1-bit parity/XOR addition to fill in the missing MSB
|
|
220
|
-
// n ^ (x ^ y) & 0x80808080 // 0x80 = 0b_1000_0000
|
|
221
|
-
// }
|
|
222
|
-
//
|
|
223
|
-
// let mut prev =
|
|
224
|
-
// u32::from_ne_bytes([current[0], current[1], current[2], current[3]]);
|
|
225
|
-
// for chunk in current[4..].chunks_exact_mut(4) {
|
|
226
|
-
// let cur = u32::from_ne_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
|
|
227
|
-
// let new_chunk = swar_add_u32(cur, prev);
|
|
228
|
-
// chunk.copy_from_slice(&new_chunk.to_ne_bytes());
|
|
229
|
-
// prev = new_chunk;
|
|
230
|
-
// }
|
|
231
|
-
// ```
|
|
232
|
-
// While this provided a measurable increase, @fintelia found that this idea
|
|
233
|
-
// could be taken even further by unrolling the chunks component-wise and
|
|
234
|
-
// avoiding unnecessary byte-shuffling by using byte arrays instead of
|
|
235
|
-
// `u32::from|to_ne_bytes`. The bitwise operations were no longer necessary
|
|
236
|
-
// so they were reverted to their obvious arithmetic equivalent. Lastly,
|
|
237
|
-
// `TryInto` was used instead of `copy_from_slice`. The `Sub` code now
|
|
238
|
-
// looked like this (with asserts to remove `0..bpp` bounds checks):
|
|
239
|
-
// ```
|
|
240
|
-
// assert!(len > 3);
|
|
241
|
-
// let mut prev = [current[0], current[1], current[2], current[3]];
|
|
242
|
-
// for chunk in current[4..].chunks_exact_mut(4) {
|
|
243
|
-
// let new_chunk = [
|
|
244
|
-
// chunk[0].wrapping_add(prev[0]),
|
|
245
|
-
// chunk[1].wrapping_add(prev[1]),
|
|
246
|
-
// chunk[2].wrapping_add(prev[2]),
|
|
247
|
-
// chunk[3].wrapping_add(prev[3]),
|
|
248
|
-
// ];
|
|
249
|
-
// *TryInto::<&mut [u8; 4]>::try_into(chunk).unwrap() = new_chunk;
|
|
250
|
-
// prev = new_chunk;
|
|
251
|
-
// }
|
|
252
|
-
// ```
|
|
253
|
-
// The compiler was able to optimize the code to be even faster and this
|
|
254
|
-
// method even sped up Paeth filtering! Assertions were experimentally
|
|
255
|
-
// added within loop bodies which produced better instructions but no
|
|
256
|
-
// difference in speed. Finally, the code was refactored to remove manual
|
|
257
|
-
// slicing and start the previous pixel chunks with arrays of `[0; N]`.
|
|
258
|
-
// ```
|
|
259
|
-
// let mut prev = [0; 4];
|
|
260
|
-
// for chunk in current.chunks_exact_mut(4) {
|
|
261
|
-
// let new_chunk = [
|
|
262
|
-
// chunk[0].wrapping_add(prev[0]),
|
|
263
|
-
// chunk[1].wrapping_add(prev[1]),
|
|
264
|
-
// chunk[2].wrapping_add(prev[2]),
|
|
265
|
-
// chunk[3].wrapping_add(prev[3]),
|
|
266
|
-
// ];
|
|
267
|
-
// *TryInto::<&mut [u8; 4]>::try_into(chunk).unwrap() = new_chunk;
|
|
268
|
-
// prev = new_chunk;
|
|
269
|
-
// }
|
|
270
|
-
// ```
|
|
271
|
-
// While we're not manually bit-twiddling anymore, a possible takeaway from
|
|
272
|
-
// this is to "think in SWAR" when dealing with small byte arrays. Unrolling
|
|
273
|
-
// array operations and performing them component-wise may unlock previously
|
|
274
|
-
// unavailable optimizations from the compiler, even when using the
|
|
275
|
-
// `chunks_exact` methods for their potential auto-vectorization benefits.
|
|
276
|
-
//
|
|
277
|
-
// `std::simd` notes
|
|
278
|
-
// =================
|
|
279
|
-
//
|
|
280
|
-
// In the past we have experimented with `std::simd` for unfiltering. This
|
|
281
|
-
// experiment was removed in https://github.com/image-rs/image-png/pull/585
|
|
282
|
-
// because:
|
|
283
|
-
//
|
|
284
|
-
// * The crate's microbenchmarks showed that `std::simd` didn't have a
|
|
285
|
-
// significant advantage over auto-vectorization for most filters, except
|
|
286
|
-
// for Paeth unfiltering - see
|
|
287
|
-
// https://github.com/image-rs/image-png/pull/414#issuecomment-1736655668
|
|
288
|
-
// * In the crate's microbenchmarks `std::simd` seemed to help with Paeth
|
|
289
|
-
// unfiltering only on x86/x64, with mixed results on ARM - see
|
|
290
|
-
// https://github.com/image-rs/image-png/pull/539#issuecomment-2512748043
|
|
291
|
-
// * In Chromium end-to-end microbenchmarks `std::simd` either didn't help
|
|
292
|
-
// or resulted in a small regression (as measured on x64). See
|
|
293
|
-
// https://crrev.com/c/6090592.
|
|
294
|
-
// * Field trial data from some "real world" scenarios shows that
|
|
295
|
-
// performance can be quite good without relying on `std::simd` - see
|
|
296
|
-
// https://github.com/image-rs/image-png/discussions/562#discussioncomment-13303307
|
|
297
119
|
match filter {
|
|
298
120
|
NoFilter => {}
|
|
299
121
|
Sub => match tbpp {
|
|
@@ -532,150 +354,7 @@ pub(crate) fn unfilter(
|
|
|
532
354
|
}
|
|
533
355
|
}
|
|
534
356
|
},
|
|
535
|
-
|
|
536
|
-
Paeth => {
|
|
537
|
-
// Select the fastest Paeth filter implementation based on the target architecture.
|
|
538
|
-
let filter_paeth_decode = if cfg!(target_arch = "x86_64") {
|
|
539
|
-
filter_paeth_stbi
|
|
540
|
-
} else {
|
|
541
|
-
filter_paeth
|
|
542
|
-
};
|
|
543
|
-
|
|
544
|
-
// Paeth filter pixels:
|
|
545
|
-
// C B D
|
|
546
|
-
// A X
|
|
547
|
-
match tbpp {
|
|
548
|
-
BytesPerPixel::One => {
|
|
549
|
-
let mut a_bpp = [0; 1];
|
|
550
|
-
let mut c_bpp = [0; 1];
|
|
551
|
-
for (chunk, b_bpp) in current.chunks_exact_mut(1).zip(previous.chunks_exact(1))
|
|
552
|
-
{
|
|
553
|
-
let new_chunk = [chunk[0]
|
|
554
|
-
.wrapping_add(filter_paeth_decode(a_bpp[0], b_bpp[0], c_bpp[0]))];
|
|
555
|
-
*TryInto::<&mut [u8; 1]>::try_into(chunk).unwrap() = new_chunk;
|
|
556
|
-
a_bpp = new_chunk;
|
|
557
|
-
c_bpp = b_bpp.try_into().unwrap();
|
|
558
|
-
}
|
|
559
|
-
}
|
|
560
|
-
BytesPerPixel::Two => {
|
|
561
|
-
let mut a_bpp = [0; 2];
|
|
562
|
-
let mut c_bpp = [0; 2];
|
|
563
|
-
for (chunk, b_bpp) in current.chunks_exact_mut(2).zip(previous.chunks_exact(2))
|
|
564
|
-
{
|
|
565
|
-
let new_chunk = [
|
|
566
|
-
chunk[0]
|
|
567
|
-
.wrapping_add(filter_paeth_decode(a_bpp[0], b_bpp[0], c_bpp[0])),
|
|
568
|
-
chunk[1]
|
|
569
|
-
.wrapping_add(filter_paeth_decode(a_bpp[1], b_bpp[1], c_bpp[1])),
|
|
570
|
-
];
|
|
571
|
-
*TryInto::<&mut [u8; 2]>::try_into(chunk).unwrap() = new_chunk;
|
|
572
|
-
a_bpp = new_chunk;
|
|
573
|
-
c_bpp = b_bpp.try_into().unwrap();
|
|
574
|
-
}
|
|
575
|
-
}
|
|
576
|
-
BytesPerPixel::Three => {
|
|
577
|
-
let mut a_bpp = [0; 3];
|
|
578
|
-
let mut c_bpp = [0; 3];
|
|
579
|
-
|
|
580
|
-
let mut previous = &previous[..previous.len() / 3 * 3];
|
|
581
|
-
let current_len = current.len();
|
|
582
|
-
let mut current = &mut current[..current_len / 3 * 3];
|
|
583
|
-
|
|
584
|
-
while let ([c0, c1, c2, c_rest @ ..], [p0, p1, p2, p_rest @ ..]) =
|
|
585
|
-
(current, previous)
|
|
586
|
-
{
|
|
587
|
-
current = c_rest;
|
|
588
|
-
previous = p_rest;
|
|
589
|
-
|
|
590
|
-
*c0 = c0.wrapping_add(filter_paeth_decode(a_bpp[0], *p0, c_bpp[0]));
|
|
591
|
-
*c1 = c1.wrapping_add(filter_paeth_decode(a_bpp[1], *p1, c_bpp[1]));
|
|
592
|
-
*c2 = c2.wrapping_add(filter_paeth_decode(a_bpp[2], *p2, c_bpp[2]));
|
|
593
|
-
|
|
594
|
-
a_bpp = [*c0, *c1, *c2];
|
|
595
|
-
c_bpp = [*p0, *p1, *p2];
|
|
596
|
-
}
|
|
597
|
-
}
|
|
598
|
-
BytesPerPixel::Four => {
|
|
599
|
-
// Using the `simd` module here has no effect on Linux
|
|
600
|
-
// and appears to regress performance on Windows, so we don't use it here.
|
|
601
|
-
// See https://github.com/image-rs/image-png/issues/567
|
|
602
|
-
|
|
603
|
-
let mut a_bpp = [0; 4];
|
|
604
|
-
let mut c_bpp = [0; 4];
|
|
605
|
-
|
|
606
|
-
let mut previous = &previous[..previous.len() & !3];
|
|
607
|
-
let current_len = current.len();
|
|
608
|
-
let mut current = &mut current[..current_len & !3];
|
|
609
|
-
|
|
610
|
-
while let ([c0, c1, c2, c3, c_rest @ ..], [p0, p1, p2, p3, p_rest @ ..]) =
|
|
611
|
-
(current, previous)
|
|
612
|
-
{
|
|
613
|
-
current = c_rest;
|
|
614
|
-
previous = p_rest;
|
|
615
|
-
|
|
616
|
-
*c0 = c0.wrapping_add(filter_paeth_decode(a_bpp[0], *p0, c_bpp[0]));
|
|
617
|
-
*c1 = c1.wrapping_add(filter_paeth_decode(a_bpp[1], *p1, c_bpp[1]));
|
|
618
|
-
*c2 = c2.wrapping_add(filter_paeth_decode(a_bpp[2], *p2, c_bpp[2]));
|
|
619
|
-
*c3 = c3.wrapping_add(filter_paeth_decode(a_bpp[3], *p3, c_bpp[3]));
|
|
620
|
-
|
|
621
|
-
a_bpp = [*c0, *c1, *c2, *c3];
|
|
622
|
-
c_bpp = [*p0, *p1, *p2, *p3];
|
|
623
|
-
}
|
|
624
|
-
}
|
|
625
|
-
BytesPerPixel::Six => {
|
|
626
|
-
let mut a_bpp = [0; 6];
|
|
627
|
-
let mut c_bpp = [0; 6];
|
|
628
|
-
for (chunk, b_bpp) in current.chunks_exact_mut(6).zip(previous.chunks_exact(6))
|
|
629
|
-
{
|
|
630
|
-
let new_chunk = [
|
|
631
|
-
chunk[0]
|
|
632
|
-
.wrapping_add(filter_paeth_decode(a_bpp[0], b_bpp[0], c_bpp[0])),
|
|
633
|
-
chunk[1]
|
|
634
|
-
.wrapping_add(filter_paeth_decode(a_bpp[1], b_bpp[1], c_bpp[1])),
|
|
635
|
-
chunk[2]
|
|
636
|
-
.wrapping_add(filter_paeth_decode(a_bpp[2], b_bpp[2], c_bpp[2])),
|
|
637
|
-
chunk[3]
|
|
638
|
-
.wrapping_add(filter_paeth_decode(a_bpp[3], b_bpp[3], c_bpp[3])),
|
|
639
|
-
chunk[4]
|
|
640
|
-
.wrapping_add(filter_paeth_decode(a_bpp[4], b_bpp[4], c_bpp[4])),
|
|
641
|
-
chunk[5]
|
|
642
|
-
.wrapping_add(filter_paeth_decode(a_bpp[5], b_bpp[5], c_bpp[5])),
|
|
643
|
-
];
|
|
644
|
-
*TryInto::<&mut [u8; 6]>::try_into(chunk).unwrap() = new_chunk;
|
|
645
|
-
a_bpp = new_chunk;
|
|
646
|
-
c_bpp = b_bpp.try_into().unwrap();
|
|
647
|
-
}
|
|
648
|
-
}
|
|
649
|
-
BytesPerPixel::Eight => {
|
|
650
|
-
let mut a_bpp = [0; 8];
|
|
651
|
-
let mut c_bpp = [0; 8];
|
|
652
|
-
for (chunk, b_bpp) in current.chunks_exact_mut(8).zip(previous.chunks_exact(8))
|
|
653
|
-
{
|
|
654
|
-
let new_chunk = [
|
|
655
|
-
chunk[0]
|
|
656
|
-
.wrapping_add(filter_paeth_decode(a_bpp[0], b_bpp[0], c_bpp[0])),
|
|
657
|
-
chunk[1]
|
|
658
|
-
.wrapping_add(filter_paeth_decode(a_bpp[1], b_bpp[1], c_bpp[1])),
|
|
659
|
-
chunk[2]
|
|
660
|
-
.wrapping_add(filter_paeth_decode(a_bpp[2], b_bpp[2], c_bpp[2])),
|
|
661
|
-
chunk[3]
|
|
662
|
-
.wrapping_add(filter_paeth_decode(a_bpp[3], b_bpp[3], c_bpp[3])),
|
|
663
|
-
chunk[4]
|
|
664
|
-
.wrapping_add(filter_paeth_decode(a_bpp[4], b_bpp[4], c_bpp[4])),
|
|
665
|
-
chunk[5]
|
|
666
|
-
.wrapping_add(filter_paeth_decode(a_bpp[5], b_bpp[5], c_bpp[5])),
|
|
667
|
-
chunk[6]
|
|
668
|
-
.wrapping_add(filter_paeth_decode(a_bpp[6], b_bpp[6], c_bpp[6])),
|
|
669
|
-
chunk[7]
|
|
670
|
-
.wrapping_add(filter_paeth_decode(a_bpp[7], b_bpp[7], c_bpp[7])),
|
|
671
|
-
];
|
|
672
|
-
*TryInto::<&mut [u8; 8]>::try_into(chunk).unwrap() = new_chunk;
|
|
673
|
-
a_bpp = new_chunk;
|
|
674
|
-
c_bpp = b_bpp.try_into().unwrap();
|
|
675
|
-
}
|
|
676
|
-
}
|
|
677
|
-
}
|
|
678
|
-
}
|
|
357
|
+
Paeth => paeth::unfilter(tbpp, previous, current),
|
|
679
358
|
}
|
|
680
359
|
}
|
|
681
360
|
|
|
@@ -795,7 +474,7 @@ fn filter_internal(
|
|
|
795
474
|
.zip(&mut c_chunks)
|
|
796
475
|
{
|
|
797
476
|
for i in 0..CHUNK_SIZE {
|
|
798
|
-
out[i] = cur[i].wrapping_sub(filter_paeth_fpnge(a[i], b[i], c[i]));
|
|
477
|
+
out[i] = cur[i].wrapping_sub(paeth::filter_paeth_fpnge(a[i], b[i], c[i]));
|
|
799
478
|
}
|
|
800
479
|
}
|
|
801
480
|
|
|
@@ -807,17 +486,47 @@ fn filter_internal(
|
|
|
807
486
|
.zip(b_chunks.remainder())
|
|
808
487
|
.zip(c_chunks.remainder())
|
|
809
488
|
{
|
|
810
|
-
*out = cur.wrapping_sub(filter_paeth_fpnge(a, b, c));
|
|
489
|
+
*out = cur.wrapping_sub(paeth::filter_paeth_fpnge(a, b, c));
|
|
811
490
|
}
|
|
812
491
|
|
|
813
492
|
for i in 0..bpp {
|
|
814
|
-
output[i] = current[i].wrapping_sub(filter_paeth_fpnge(0, previous[i], 0));
|
|
493
|
+
output[i] = current[i].wrapping_sub(paeth::filter_paeth_fpnge(0, previous[i], 0));
|
|
815
494
|
}
|
|
816
495
|
Paeth
|
|
817
496
|
}
|
|
818
497
|
}
|
|
819
498
|
}
|
|
820
499
|
|
|
500
|
+
fn adaptive_filter(
|
|
501
|
+
f: impl Fn(&[u8]) -> u64,
|
|
502
|
+
bpp: usize,
|
|
503
|
+
len: usize,
|
|
504
|
+
previous: &[u8],
|
|
505
|
+
current: &[u8],
|
|
506
|
+
output: &mut [u8],
|
|
507
|
+
) -> RowFilter {
|
|
508
|
+
use RowFilter::*;
|
|
509
|
+
|
|
510
|
+
let mut min_cost: u64 = u64::MAX;
|
|
511
|
+
let mut filter_choice = RowFilter::NoFilter;
|
|
512
|
+
for &filter in [Up, Sub, Avg, Paeth].iter() {
|
|
513
|
+
filter_internal(filter, bpp, len, previous, current, output);
|
|
514
|
+
let cost = f(output);
|
|
515
|
+
if cost <= min_cost {
|
|
516
|
+
min_cost = cost;
|
|
517
|
+
filter_choice = filter;
|
|
518
|
+
|
|
519
|
+
if cost == 0 {
|
|
520
|
+
return filter_choice;
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
if filter_choice != Paeth {
|
|
525
|
+
filter_internal(filter_choice, bpp, len, previous, current, output);
|
|
526
|
+
}
|
|
527
|
+
filter_choice
|
|
528
|
+
}
|
|
529
|
+
|
|
821
530
|
pub(crate) fn filter(
|
|
822
531
|
method: Filter,
|
|
823
532
|
bpp: BytesPerPixel,
|
|
@@ -825,28 +534,12 @@ pub(crate) fn filter(
|
|
|
825
534
|
current: &[u8],
|
|
826
535
|
output: &mut [u8],
|
|
827
536
|
) -> RowFilter {
|
|
828
|
-
use RowFilter::*;
|
|
829
537
|
let bpp = bpp.into_usize();
|
|
830
538
|
let len = current.len();
|
|
831
539
|
|
|
832
540
|
match method {
|
|
833
|
-
Filter::Adaptive =>
|
|
834
|
-
|
|
835
|
-
let mut filter_choice = RowFilter::NoFilter;
|
|
836
|
-
for &filter in [Sub, Up, Avg, Paeth].iter() {
|
|
837
|
-
filter_internal(filter, bpp, len, previous, current, output);
|
|
838
|
-
let sum = sum_buffer(output);
|
|
839
|
-
if sum <= min_sum {
|
|
840
|
-
min_sum = sum;
|
|
841
|
-
filter_choice = filter;
|
|
842
|
-
}
|
|
843
|
-
}
|
|
844
|
-
|
|
845
|
-
if filter_choice != Paeth {
|
|
846
|
-
filter_internal(filter_choice, bpp, len, previous, current, output);
|
|
847
|
-
}
|
|
848
|
-
filter_choice
|
|
849
|
-
}
|
|
541
|
+
Filter::Adaptive => adaptive_filter(sum_buffer, bpp, len, previous, current, output),
|
|
542
|
+
Filter::MinEntropy => adaptive_filter(entropy, bpp, len, previous, current, output),
|
|
850
543
|
_ => {
|
|
851
544
|
let filter = RowFilter::from_method(method).unwrap();
|
|
852
545
|
filter_internal(filter, bpp, len, previous, current, output)
|
|
@@ -854,6 +547,63 @@ pub(crate) fn filter(
|
|
|
854
547
|
}
|
|
855
548
|
}
|
|
856
549
|
|
|
550
|
+
/// Estimate the value of i * log2(i) without using floating point operations,
|
|
551
|
+
/// implementation originally from oxipng.
|
|
552
|
+
fn ilog2i(i: u32) -> u32 {
|
|
553
|
+
let log = 32 - i.leading_zeros() - 1;
|
|
554
|
+
i * log + ((i - (1 << log)) << 1)
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
fn entropy(buf: &[u8]) -> u64 {
|
|
558
|
+
let mut counts = [[0_u32; 256]; 4];
|
|
559
|
+
let mut total = 0;
|
|
560
|
+
|
|
561
|
+
// Count the number of occurrences of each byte value.
|
|
562
|
+
let mut chunks = buf.chunks_exact(8);
|
|
563
|
+
for chunk in &mut chunks {
|
|
564
|
+
// Runs of zeros are common and very compressible, so treat them as free.
|
|
565
|
+
if chunk == [0; 8] {
|
|
566
|
+
continue;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// Scatter the counts into 4 separate arrays to reduce contention.
|
|
570
|
+
for j in 0..2 {
|
|
571
|
+
counts[0][chunk[j * 4] as usize] += 1;
|
|
572
|
+
counts[1][chunk[1 + j * 4] as usize] += 1;
|
|
573
|
+
counts[2][chunk[2 + j * 4] as usize] += 1;
|
|
574
|
+
counts[3][chunk[3 + j * 4] as usize] += 1;
|
|
575
|
+
}
|
|
576
|
+
total += 8;
|
|
577
|
+
}
|
|
578
|
+
for &lit in chunks.remainder() {
|
|
579
|
+
counts[0][lit as usize] += 1;
|
|
580
|
+
total += 1;
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
// If the input is entirely zeros, short-circuit the entropy calculation.
|
|
584
|
+
if counts[0][0] == total {
|
|
585
|
+
return 0;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
// Consolidate the counts.
|
|
589
|
+
//
|
|
590
|
+
// Upstream bug: <https://github.com/rust-lang/rust-clippy/issues/11529>
|
|
591
|
+
#[allow(clippy::needless_range_loop)]
|
|
592
|
+
for i in 0..256 {
|
|
593
|
+
counts[0][i] += counts[1][i] + counts[2][i] + counts[3][i];
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
// Compute the entropy.
|
|
597
|
+
let mut entropy = ilog2i(total);
|
|
598
|
+
for &count in &counts[0] {
|
|
599
|
+
if count > 0 {
|
|
600
|
+
entropy = entropy.saturating_sub(ilog2i(count));
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
entropy as u64
|
|
605
|
+
}
|
|
606
|
+
|
|
857
607
|
// Helper function for Adaptive filter buffer summation
|
|
858
608
|
fn sum_buffer(buf: &[u8]) -> u64 {
|
|
859
609
|
const CHUNK_SIZE: usize = 32;
|
|
@@ -926,23 +676,6 @@ mod test {
|
|
|
926
676
|
}
|
|
927
677
|
}
|
|
928
678
|
|
|
929
|
-
#[test]
|
|
930
|
-
#[ignore] // takes ~20s without optimizations
|
|
931
|
-
fn paeth_impls_are_equivalent() {
|
|
932
|
-
for a in 0..=255 {
|
|
933
|
-
for b in 0..=255 {
|
|
934
|
-
for c in 0..=255 {
|
|
935
|
-
let baseline = filter_paeth(a, b, c);
|
|
936
|
-
let fpnge = filter_paeth_fpnge(a, b, c);
|
|
937
|
-
let stbi = filter_paeth_stbi(a, b, c);
|
|
938
|
-
|
|
939
|
-
assert_eq!(baseline, fpnge);
|
|
940
|
-
assert_eq!(baseline, stbi);
|
|
941
|
-
}
|
|
942
|
-
}
|
|
943
|
-
}
|
|
944
|
-
}
|
|
945
|
-
|
|
946
679
|
#[test]
|
|
947
680
|
fn roundtrip_ascending_previous_line() {
|
|
948
681
|
// A multiple of 8, 6, 4, 3, 2, 1
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
Auto-vectorization notes
|
|
2
|
+
========================
|
|
3
|
+
|
|
4
|
+
[2023/01 @okaneco] - Notes on optimizing decoding filters
|
|
5
|
+
|
|
6
|
+
Links:
|
|
7
|
+
[PR]: https://github.com/image-rs/image-png/pull/382
|
|
8
|
+
[SWAR]: http://aggregate.org/SWAR/over.html
|
|
9
|
+
[AVG]: http://aggregate.org/MAGIC/#Average%20of%20Integers
|
|
10
|
+
|
|
11
|
+
#382 heavily refactored and optimized the following filters making the
|
|
12
|
+
implementation nonobvious. These comments function as a summary of that
|
|
13
|
+
PR with an explanation of the choices made below.
|
|
14
|
+
|
|
15
|
+
#382 originally started with trying to optimize using a technique called
|
|
16
|
+
SWAR, SIMD Within a Register. SWAR uses regular integer types like `u32`
|
|
17
|
+
and `u64` as SIMD registers to perform vertical operations in parallel,
|
|
18
|
+
usually involving bit-twiddling. This allowed each `BytesPerPixel` (bpp)
|
|
19
|
+
pixel to be decoded in parallel: 3bpp and 4bpp in a `u32`, 6bpp and 8pp
|
|
20
|
+
in a `u64`. The `Sub` filter looked like the following code block, `Avg`
|
|
21
|
+
was similar but used a bitwise average method from [AVG]:
|
|
22
|
+
```
|
|
23
|
+
// See "Unpartitioned Operations With Correction Code" from [SWAR]
|
|
24
|
+
fn swar_add_u32(x: u32, y: u32) -> u32 {
|
|
25
|
+
// 7-bit addition so there's no carry over the most significant bit
|
|
26
|
+
let n = (x & 0x7f7f7f7f) + (y & 0x7f7f7f7f); // 0x7F = 0b_0111_1111
|
|
27
|
+
// 1-bit parity/XOR addition to fill in the missing MSB
|
|
28
|
+
n ^ (x ^ y) & 0x80808080 // 0x80 = 0b_1000_0000
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
let mut prev =
|
|
32
|
+
u32::from_ne_bytes([current[0], current[1], current[2], current[3]]);
|
|
33
|
+
for chunk in current[4..].chunks_exact_mut(4) {
|
|
34
|
+
let cur = u32::from_ne_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
|
|
35
|
+
let new_chunk = swar_add_u32(cur, prev);
|
|
36
|
+
chunk.copy_from_slice(&new_chunk.to_ne_bytes());
|
|
37
|
+
prev = new_chunk;
|
|
38
|
+
}
|
|
39
|
+
```
|
|
40
|
+
While this provided a measurable increase, @fintelia found that this idea
|
|
41
|
+
could be taken even further by unrolling the chunks component-wise and
|
|
42
|
+
avoiding unnecessary byte-shuffling by using byte arrays instead of
|
|
43
|
+
`u32::from|to_ne_bytes`. The bitwise operations were no longer necessary
|
|
44
|
+
so they were reverted to their obvious arithmetic equivalent. Lastly,
|
|
45
|
+
`TryInto` was used instead of `copy_from_slice`. The `Sub` code now
|
|
46
|
+
looked like this (with asserts to remove `0..bpp` bounds checks):
|
|
47
|
+
```
|
|
48
|
+
assert!(len > 3);
|
|
49
|
+
let mut prev = [current[0], current[1], current[2], current[3]];
|
|
50
|
+
for chunk in current[4..].chunks_exact_mut(4) {
|
|
51
|
+
let new_chunk = [
|
|
52
|
+
chunk[0].wrapping_add(prev[0]),
|
|
53
|
+
chunk[1].wrapping_add(prev[1]),
|
|
54
|
+
chunk[2].wrapping_add(prev[2]),
|
|
55
|
+
chunk[3].wrapping_add(prev[3]),
|
|
56
|
+
];
|
|
57
|
+
*TryInto::<&mut [u8; 4]>::try_into(chunk).unwrap() = new_chunk;
|
|
58
|
+
prev = new_chunk;
|
|
59
|
+
}
|
|
60
|
+
```
|
|
61
|
+
The compiler was able to optimize the code to be even faster and this
|
|
62
|
+
method even sped up Paeth filtering! Assertions were experimentally
|
|
63
|
+
added within loop bodies which produced better instructions but no
|
|
64
|
+
difference in speed. Finally, the code was refactored to remove manual
|
|
65
|
+
slicing and start the previous pixel chunks with arrays of `[0; N]`.
|
|
66
|
+
```
|
|
67
|
+
let mut prev = [0; 4];
|
|
68
|
+
for chunk in current.chunks_exact_mut(4) {
|
|
69
|
+
let new_chunk = [
|
|
70
|
+
chunk[0].wrapping_add(prev[0]),
|
|
71
|
+
chunk[1].wrapping_add(prev[1]),
|
|
72
|
+
chunk[2].wrapping_add(prev[2]),
|
|
73
|
+
chunk[3].wrapping_add(prev[3]),
|
|
74
|
+
];
|
|
75
|
+
*TryInto::<&mut [u8; 4]>::try_into(chunk).unwrap() = new_chunk;
|
|
76
|
+
prev = new_chunk;
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
While we're not manually bit-twiddling anymore, a possible takeaway from
|
|
80
|
+
this is to "think in SWAR" when dealing with small byte arrays. Unrolling
|
|
81
|
+
array operations and performing them component-wise may unlock previously
|
|
82
|
+
unavailable optimizations from the compiler, even when using the
|
|
83
|
+
`chunks_exact` methods for their potential auto-vectorization benefits.
|
|
84
|
+
|
|
85
|
+
`std::simd` notes
|
|
86
|
+
=================
|
|
87
|
+
|
|
88
|
+
In the past we have experimented with `std::simd` for unfiltering. This
|
|
89
|
+
experiment was removed in https://github.com/image-rs/image-png/pull/585
|
|
90
|
+
because:
|
|
91
|
+
|
|
92
|
+
* The crate's microbenchmarks showed that `std::simd` didn't have a
|
|
93
|
+
significant advantage over auto-vectorization for most filters, except
|
|
94
|
+
for Paeth unfiltering - see
|
|
95
|
+
https://github.com/image-rs/image-png/pull/414#issuecomment-1736655668
|
|
96
|
+
* In the crate's microbenchmarks `std::simd` seemed to help with Paeth
|
|
97
|
+
unfiltering only on x86/x64, with mixed results on ARM - see
|
|
98
|
+
https://github.com/image-rs/image-png/pull/539#issuecomment-2512748043
|
|
99
|
+
* In Chromium end-to-end microbenchmarks `std::simd` either didn't help
|
|
100
|
+
or resulted in a small regression (as measured on x64). See
|
|
101
|
+
https://crrev.com/c/6090592.
|
|
102
|
+
* Field trial data from some "real world" scenarios shows that
|
|
103
|
+
performance can be quite good without relying on `std::simd` - see
|
|
104
|
+
https://github.com/image-rs/image-png/discussions/562#discussioncomment-13303307
|