libdeflate 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +9 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +52 -0
- data/Rakefile +15 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/libdeflate/extconf.rb +14 -0
- data/ext/libdeflate/libdeflate/.gitignore +19 -0
- data/ext/libdeflate/libdeflate/COPYING +21 -0
- data/ext/libdeflate/libdeflate/Makefile +231 -0
- data/ext/libdeflate/libdeflate/Makefile.msc +64 -0
- data/ext/libdeflate/libdeflate/NEWS +57 -0
- data/ext/libdeflate/libdeflate/README.md +170 -0
- data/ext/libdeflate/libdeflate/common/common_defs.h +351 -0
- data/ext/libdeflate/libdeflate/common/compiler_gcc.h +134 -0
- data/ext/libdeflate/libdeflate/common/compiler_msc.h +95 -0
- data/ext/libdeflate/libdeflate/lib/adler32.c +213 -0
- data/ext/libdeflate/libdeflate/lib/adler32_impl.h +281 -0
- data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +57 -0
- data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +13 -0
- data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +357 -0
- data/ext/libdeflate/libdeflate/lib/crc32.c +368 -0
- data/ext/libdeflate/libdeflate/lib/crc32_impl.h +286 -0
- data/ext/libdeflate/libdeflate/lib/crc32_table.h +526 -0
- data/ext/libdeflate/libdeflate/lib/decompress_impl.h +404 -0
- data/ext/libdeflate/libdeflate/lib/deflate_compress.c +2817 -0
- data/ext/libdeflate/libdeflate/lib/deflate_compress.h +14 -0
- data/ext/libdeflate/libdeflate/lib/deflate_constants.h +66 -0
- data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +889 -0
- data/ext/libdeflate/libdeflate/lib/gzip_compress.c +95 -0
- data/ext/libdeflate/libdeflate/lib/gzip_constants.h +45 -0
- data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +130 -0
- data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +405 -0
- data/ext/libdeflate/libdeflate/lib/lib_common.h +35 -0
- data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +53 -0
- data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +205 -0
- data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +61 -0
- data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +53 -0
- data/ext/libdeflate/libdeflate/lib/unaligned.h +202 -0
- data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +169 -0
- data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +48 -0
- data/ext/libdeflate/libdeflate/lib/zlib_compress.c +87 -0
- data/ext/libdeflate/libdeflate/lib/zlib_constants.h +21 -0
- data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +91 -0
- data/ext/libdeflate/libdeflate/libdeflate.h +274 -0
- data/ext/libdeflate/libdeflate/programs/benchmark.c +558 -0
- data/ext/libdeflate/libdeflate/programs/checksum.c +197 -0
- data/ext/libdeflate/libdeflate/programs/detect.sh +62 -0
- data/ext/libdeflate/libdeflate/programs/gzip.c +603 -0
- data/ext/libdeflate/libdeflate/programs/prog_util.c +530 -0
- data/ext/libdeflate/libdeflate/programs/prog_util.h +162 -0
- data/ext/libdeflate/libdeflate/programs/test_checksums.c +135 -0
- data/ext/libdeflate/libdeflate/programs/tgetopt.c +118 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +12 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +40 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +28 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +3 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +28 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +14 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +28 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +3 -0
- data/ext/libdeflate/libdeflate/tools/android_build.sh +104 -0
- data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +76 -0
- data/ext/libdeflate/libdeflate/tools/exec_tests.sh +30 -0
- data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +108 -0
- data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +100 -0
- data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +412 -0
- data/ext/libdeflate/libdeflate/tools/make-windows-releases +21 -0
- data/ext/libdeflate/libdeflate/tools/mips_build.sh +9 -0
- data/ext/libdeflate/libdeflate/tools/msc_test.bat +3 -0
- data/ext/libdeflate/libdeflate/tools/pgo_build.sh +23 -0
- data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +37 -0
- data/ext/libdeflate/libdeflate/tools/run_tests.sh +305 -0
- data/ext/libdeflate/libdeflate/tools/windows_build.sh +10 -0
- data/ext/libdeflate/libdeflate_ext.c +389 -0
- data/ext/libdeflate/libdeflate_ext.h +8 -0
- data/lib/libdeflate.rb +2 -0
- data/lib/libdeflate/version.rb +3 -0
- data/libdeflate.gemspec +33 -0
- metadata +230 -0
@@ -0,0 +1,95 @@
|
|
1
|
+
/*
|
2
|
+
* gzip_compress.c - compress with a gzip wrapper
|
3
|
+
*
|
4
|
+
* Originally public domain; changes after 2016-09-07 are copyrighted.
|
5
|
+
*
|
6
|
+
* Copyright 2016 Eric Biggers
|
7
|
+
*
|
8
|
+
* Permission is hereby granted, free of charge, to any person
|
9
|
+
* obtaining a copy of this software and associated documentation
|
10
|
+
* files (the "Software"), to deal in the Software without
|
11
|
+
* restriction, including without limitation the rights to use,
|
12
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
13
|
+
* copies of the Software, and to permit persons to whom the
|
14
|
+
* Software is furnished to do so, subject to the following
|
15
|
+
* conditions:
|
16
|
+
*
|
17
|
+
* The above copyright notice and this permission notice shall be
|
18
|
+
* included in all copies or substantial portions of the Software.
|
19
|
+
*
|
20
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
22
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
24
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
25
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
26
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
27
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
28
|
+
*/
|
29
|
+
|
30
|
+
#include "deflate_compress.h"
|
31
|
+
#include "gzip_constants.h"
|
32
|
+
#include "unaligned.h"
|
33
|
+
|
34
|
+
#include "libdeflate.h"
|
35
|
+
|
36
|
+
LIBDEFLATEAPI size_t
|
37
|
+
libdeflate_gzip_compress(struct libdeflate_compressor *c,
|
38
|
+
const void *in, size_t in_size,
|
39
|
+
void *out, size_t out_nbytes_avail)
|
40
|
+
{
|
41
|
+
u8 *out_next = out;
|
42
|
+
unsigned compression_level;
|
43
|
+
u8 xfl;
|
44
|
+
size_t deflate_size;
|
45
|
+
|
46
|
+
if (out_nbytes_avail <= GZIP_MIN_OVERHEAD)
|
47
|
+
return 0;
|
48
|
+
|
49
|
+
/* ID1 */
|
50
|
+
*out_next++ = GZIP_ID1;
|
51
|
+
/* ID2 */
|
52
|
+
*out_next++ = GZIP_ID2;
|
53
|
+
/* CM */
|
54
|
+
*out_next++ = GZIP_CM_DEFLATE;
|
55
|
+
/* FLG */
|
56
|
+
*out_next++ = 0;
|
57
|
+
/* MTIME */
|
58
|
+
put_unaligned_le32(GZIP_MTIME_UNAVAILABLE, out_next);
|
59
|
+
out_next += 4;
|
60
|
+
/* XFL */
|
61
|
+
xfl = 0;
|
62
|
+
compression_level = deflate_get_compression_level(c);
|
63
|
+
if (compression_level < 2)
|
64
|
+
xfl |= GZIP_XFL_FASTEST_COMRESSION;
|
65
|
+
else if (compression_level >= 8)
|
66
|
+
xfl |= GZIP_XFL_SLOWEST_COMRESSION;
|
67
|
+
*out_next++ = xfl;
|
68
|
+
/* OS */
|
69
|
+
*out_next++ = GZIP_OS_UNKNOWN; /* OS */
|
70
|
+
|
71
|
+
/* Compressed data */
|
72
|
+
deflate_size = libdeflate_deflate_compress(c, in, in_size, out_next,
|
73
|
+
out_nbytes_avail - GZIP_MIN_OVERHEAD);
|
74
|
+
if (deflate_size == 0)
|
75
|
+
return 0;
|
76
|
+
out_next += deflate_size;
|
77
|
+
|
78
|
+
/* CRC32 */
|
79
|
+
put_unaligned_le32(libdeflate_crc32(0, in, in_size), out_next);
|
80
|
+
out_next += 4;
|
81
|
+
|
82
|
+
/* ISIZE */
|
83
|
+
put_unaligned_le32((u32)in_size, out_next);
|
84
|
+
out_next += 4;
|
85
|
+
|
86
|
+
return out_next - (u8 *)out;
|
87
|
+
}
|
88
|
+
|
89
|
+
LIBDEFLATEAPI size_t
|
90
|
+
libdeflate_gzip_compress_bound(struct libdeflate_compressor *c,
|
91
|
+
size_t in_nbytes)
|
92
|
+
{
|
93
|
+
return GZIP_MIN_OVERHEAD +
|
94
|
+
libdeflate_deflate_compress_bound(c, in_nbytes);
|
95
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
/*
|
2
|
+
* gzip_constants.h - constants for the gzip wrapper format
|
3
|
+
*/
|
4
|
+
|
5
|
+
#ifndef LIB_GZIP_CONSTANTS_H
|
6
|
+
#define LIB_GZIP_CONSTANTS_H
|
7
|
+
|
8
|
+
#define GZIP_MIN_HEADER_SIZE 10
|
9
|
+
#define GZIP_FOOTER_SIZE 8
|
10
|
+
#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
|
11
|
+
|
12
|
+
#define GZIP_ID1 0x1F
|
13
|
+
#define GZIP_ID2 0x8B
|
14
|
+
|
15
|
+
#define GZIP_CM_DEFLATE 8
|
16
|
+
|
17
|
+
#define GZIP_FTEXT 0x01
|
18
|
+
#define GZIP_FHCRC 0x02
|
19
|
+
#define GZIP_FEXTRA 0x04
|
20
|
+
#define GZIP_FNAME 0x08
|
21
|
+
#define GZIP_FCOMMENT 0x10
|
22
|
+
#define GZIP_FRESERVED 0xE0
|
23
|
+
|
24
|
+
#define GZIP_MTIME_UNAVAILABLE 0
|
25
|
+
|
26
|
+
#define GZIP_XFL_SLOWEST_COMRESSION 0x02
|
27
|
+
#define GZIP_XFL_FASTEST_COMRESSION 0x04
|
28
|
+
|
29
|
+
#define GZIP_OS_FAT 0
|
30
|
+
#define GZIP_OS_AMIGA 1
|
31
|
+
#define GZIP_OS_VMS 2
|
32
|
+
#define GZIP_OS_UNIX 3
|
33
|
+
#define GZIP_OS_VM_CMS 4
|
34
|
+
#define GZIP_OS_ATARI_TOS 5
|
35
|
+
#define GZIP_OS_HPFS 6
|
36
|
+
#define GZIP_OS_MACINTOSH 7
|
37
|
+
#define GZIP_OS_Z_SYSTEM 8
|
38
|
+
#define GZIP_OS_CP_M 9
|
39
|
+
#define GZIP_OS_TOPS_20 10
|
40
|
+
#define GZIP_OS_NTFS 11
|
41
|
+
#define GZIP_OS_QDOS 12
|
42
|
+
#define GZIP_OS_RISCOS 13
|
43
|
+
#define GZIP_OS_UNKNOWN 255
|
44
|
+
|
45
|
+
#endif /* LIB_GZIP_CONSTANTS_H */
|
@@ -0,0 +1,130 @@
|
|
1
|
+
/*
|
2
|
+
* gzip_decompress.c - decompress with a gzip wrapper
|
3
|
+
*
|
4
|
+
* Originally public domain; changes after 2016-09-07 are copyrighted.
|
5
|
+
*
|
6
|
+
* Copyright 2016 Eric Biggers
|
7
|
+
*
|
8
|
+
* Permission is hereby granted, free of charge, to any person
|
9
|
+
* obtaining a copy of this software and associated documentation
|
10
|
+
* files (the "Software"), to deal in the Software without
|
11
|
+
* restriction, including without limitation the rights to use,
|
12
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
13
|
+
* copies of the Software, and to permit persons to whom the
|
14
|
+
* Software is furnished to do so, subject to the following
|
15
|
+
* conditions:
|
16
|
+
*
|
17
|
+
* The above copyright notice and this permission notice shall be
|
18
|
+
* included in all copies or substantial portions of the Software.
|
19
|
+
*
|
20
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
22
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
24
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
25
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
26
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
27
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
28
|
+
*/
|
29
|
+
|
30
|
+
#include "gzip_constants.h"
|
31
|
+
#include "unaligned.h"
|
32
|
+
|
33
|
+
#include "libdeflate.h"
|
34
|
+
|
35
|
+
LIBDEFLATEAPI enum libdeflate_result
|
36
|
+
libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
|
37
|
+
const void *in, size_t in_nbytes,
|
38
|
+
void *out, size_t out_nbytes_avail,
|
39
|
+
size_t *actual_out_nbytes_ret)
|
40
|
+
{
|
41
|
+
const u8 *in_next = in;
|
42
|
+
const u8 * const in_end = in_next + in_nbytes;
|
43
|
+
u8 flg;
|
44
|
+
size_t actual_out_nbytes;
|
45
|
+
enum libdeflate_result result;
|
46
|
+
|
47
|
+
if (in_nbytes < GZIP_MIN_OVERHEAD)
|
48
|
+
return LIBDEFLATE_BAD_DATA;
|
49
|
+
|
50
|
+
/* ID1 */
|
51
|
+
if (*in_next++ != GZIP_ID1)
|
52
|
+
return LIBDEFLATE_BAD_DATA;
|
53
|
+
/* ID2 */
|
54
|
+
if (*in_next++ != GZIP_ID2)
|
55
|
+
return LIBDEFLATE_BAD_DATA;
|
56
|
+
/* CM */
|
57
|
+
if (*in_next++ != GZIP_CM_DEFLATE)
|
58
|
+
return LIBDEFLATE_BAD_DATA;
|
59
|
+
flg = *in_next++;
|
60
|
+
/* MTIME */
|
61
|
+
in_next += 4;
|
62
|
+
/* XFL */
|
63
|
+
in_next += 1;
|
64
|
+
/* OS */
|
65
|
+
in_next += 1;
|
66
|
+
|
67
|
+
if (flg & GZIP_FRESERVED)
|
68
|
+
return LIBDEFLATE_BAD_DATA;
|
69
|
+
|
70
|
+
/* Extra field */
|
71
|
+
if (flg & GZIP_FEXTRA) {
|
72
|
+
u16 xlen = get_unaligned_le16(in_next);
|
73
|
+
in_next += 2;
|
74
|
+
|
75
|
+
if (in_end - in_next < (u32)xlen + GZIP_FOOTER_SIZE)
|
76
|
+
return LIBDEFLATE_BAD_DATA;
|
77
|
+
|
78
|
+
in_next += xlen;
|
79
|
+
}
|
80
|
+
|
81
|
+
/* Original file name (zero terminated) */
|
82
|
+
if (flg & GZIP_FNAME) {
|
83
|
+
while (*in_next++ != 0 && in_next != in_end)
|
84
|
+
;
|
85
|
+
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
86
|
+
return LIBDEFLATE_BAD_DATA;
|
87
|
+
}
|
88
|
+
|
89
|
+
/* File comment (zero terminated) */
|
90
|
+
if (flg & GZIP_FCOMMENT) {
|
91
|
+
while (*in_next++ != 0 && in_next != in_end)
|
92
|
+
;
|
93
|
+
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
94
|
+
return LIBDEFLATE_BAD_DATA;
|
95
|
+
}
|
96
|
+
|
97
|
+
/* CRC16 for gzip header */
|
98
|
+
if (flg & GZIP_FHCRC) {
|
99
|
+
in_next += 2;
|
100
|
+
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
101
|
+
return LIBDEFLATE_BAD_DATA;
|
102
|
+
}
|
103
|
+
|
104
|
+
/* Compressed data */
|
105
|
+
result = libdeflate_deflate_decompress(d, in_next,
|
106
|
+
in_end - GZIP_FOOTER_SIZE - in_next,
|
107
|
+
out, out_nbytes_avail,
|
108
|
+
actual_out_nbytes_ret);
|
109
|
+
if (result != LIBDEFLATE_SUCCESS)
|
110
|
+
return result;
|
111
|
+
|
112
|
+
if (actual_out_nbytes_ret)
|
113
|
+
actual_out_nbytes = *actual_out_nbytes_ret;
|
114
|
+
else
|
115
|
+
actual_out_nbytes = out_nbytes_avail;
|
116
|
+
|
117
|
+
in_next = in_end - GZIP_FOOTER_SIZE;
|
118
|
+
|
119
|
+
/* CRC32 */
|
120
|
+
if (libdeflate_crc32(0, out, actual_out_nbytes) !=
|
121
|
+
get_unaligned_le32(in_next))
|
122
|
+
return LIBDEFLATE_BAD_DATA;
|
123
|
+
in_next += 4;
|
124
|
+
|
125
|
+
/* ISIZE */
|
126
|
+
if ((u32)actual_out_nbytes != get_unaligned_le32(in_next))
|
127
|
+
return LIBDEFLATE_BAD_DATA;
|
128
|
+
|
129
|
+
return LIBDEFLATE_SUCCESS;
|
130
|
+
}
|
@@ -0,0 +1,405 @@
|
|
1
|
+
/*
|
2
|
+
* hc_matchfinder.h - Lempel-Ziv matchfinding with a hash table of linked lists
|
3
|
+
*
|
4
|
+
* Originally public domain; changes after 2016-09-07 are copyrighted.
|
5
|
+
*
|
6
|
+
* Copyright 2016 Eric Biggers
|
7
|
+
*
|
8
|
+
* Permission is hereby granted, free of charge, to any person
|
9
|
+
* obtaining a copy of this software and associated documentation
|
10
|
+
* files (the "Software"), to deal in the Software without
|
11
|
+
* restriction, including without limitation the rights to use,
|
12
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
13
|
+
* copies of the Software, and to permit persons to whom the
|
14
|
+
* Software is furnished to do so, subject to the following
|
15
|
+
* conditions:
|
16
|
+
*
|
17
|
+
* The above copyright notice and this permission notice shall be
|
18
|
+
* included in all copies or substantial portions of the Software.
|
19
|
+
*
|
20
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
22
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
24
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
25
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
26
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
27
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
28
|
+
*
|
29
|
+
* ---------------------------------------------------------------------------
|
30
|
+
*
|
31
|
+
* Algorithm
|
32
|
+
*
|
33
|
+
* This is a Hash Chains (hc) based matchfinder.
|
34
|
+
*
|
35
|
+
* The main data structure is a hash table where each hash bucket contains a
|
36
|
+
* linked list (or "chain") of sequences whose first 4 bytes share the same hash
|
37
|
+
* code. Each sequence is identified by its starting position in the input
|
38
|
+
* buffer.
|
39
|
+
*
|
40
|
+
* The algorithm processes the input buffer sequentially. At each byte
|
41
|
+
* position, the hash code of the first 4 bytes of the sequence beginning at
|
42
|
+
* that position (the sequence being matched against) is computed. This
|
43
|
+
* identifies the hash bucket to use for that position. Then, this hash
|
44
|
+
* bucket's linked list is searched for matches. Then, a new linked list node
|
45
|
+
* is created to represent the current sequence and is prepended to the list.
|
46
|
+
*
|
47
|
+
* This algorithm has several useful properties:
|
48
|
+
*
|
49
|
+
* - It only finds true Lempel-Ziv matches; i.e., those where the matching
|
50
|
+
* sequence occurs prior to the sequence being matched against.
|
51
|
+
*
|
52
|
+
* - The sequences in each linked list are always sorted by decreasing starting
|
53
|
+
* position. Therefore, the closest (smallest offset) matches are found
|
54
|
+
* first, which in many compression formats tend to be the cheapest to encode.
|
55
|
+
*
|
56
|
+
* - Although fast running time is not guaranteed due to the possibility of the
|
57
|
+
* lists getting very long, the worst degenerate behavior can be easily
|
58
|
+
* prevented by capping the number of nodes searched at each position.
|
59
|
+
*
|
60
|
+
* - If the compressor decides not to search for matches at a certain position,
|
61
|
+
* then that position can be quickly inserted without searching the list.
|
62
|
+
*
|
63
|
+
* - The algorithm is adaptable to sliding windows: just store the positions
|
64
|
+
* relative to a "base" value that is updated from time to time, and stop
|
65
|
+
* searching each list when the sequences get too far away.
|
66
|
+
*
|
67
|
+
* ----------------------------------------------------------------------------
|
68
|
+
*
|
69
|
+
* Optimizations
|
70
|
+
*
|
71
|
+
* The main hash table and chains handle length 4+ matches. Length 3 matches
|
72
|
+
* are handled by a separate hash table with no chains. This works well for
|
73
|
+
* typical "greedy" or "lazy"-style compressors, where length 3 matches are
|
74
|
+
* often only helpful if they have small offsets. Instead of searching a full
|
75
|
+
* chain for length 3+ matches, the algorithm just checks for one close length 3
|
76
|
+
* match, then focuses on finding length 4+ matches.
|
77
|
+
*
|
78
|
+
* The longest_match() and skip_positions() functions are inlined into the
|
79
|
+
* compressors that use them. This isn't just about saving the overhead of a
|
80
|
+
* function call. These functions are intended to be called from the inner
|
81
|
+
* loops of compressors, where giving the compiler more control over register
|
82
|
+
* allocation is very helpful. There is also significant benefit to be gained
|
83
|
+
* from allowing the CPU to predict branches independently at each call site.
|
84
|
+
* For example, "lazy"-style compressors can be written with two calls to
|
85
|
+
* longest_match(), each of which starts with a different 'best_len' and
|
86
|
+
* therefore has significantly different performance characteristics.
|
87
|
+
*
|
88
|
+
* Although any hash function can be used, a multiplicative hash is fast and
|
89
|
+
* works well.
|
90
|
+
*
|
91
|
+
* On some processors, it is significantly faster to extend matches by whole
|
92
|
+
* words (32 or 64 bits) instead of by individual bytes. For this to be the
|
93
|
+
* case, the processor must implement unaligned memory accesses efficiently and
|
94
|
+
* must have either a fast "find first set bit" instruction or a fast "find last
|
95
|
+
* set bit" instruction, depending on the processor's endianness.
|
96
|
+
*
|
97
|
+
* The code uses one loop for finding the first match and one loop for finding a
|
98
|
+
* longer match. Each of these loops is tuned for its respective task and in
|
99
|
+
* combination are faster than a single generalized loop that handles both
|
100
|
+
* tasks.
|
101
|
+
*
|
102
|
+
* The code also uses a tight inner loop that only compares the last and first
|
103
|
+
* bytes of a potential match. It is only when these bytes match that a full
|
104
|
+
* match extension is attempted.
|
105
|
+
*
|
106
|
+
* ----------------------------------------------------------------------------
|
107
|
+
*/
|
108
|
+
|
109
|
+
#include "matchfinder_common.h"
|
110
|
+
|
111
|
+
#define HC_MATCHFINDER_HASH3_ORDER 15
|
112
|
+
#define HC_MATCHFINDER_HASH4_ORDER 16
|
113
|
+
|
114
|
+
#define HC_MATCHFINDER_TOTAL_HASH_LENGTH \
|
115
|
+
((1UL << HC_MATCHFINDER_HASH3_ORDER) + \
|
116
|
+
(1UL << HC_MATCHFINDER_HASH4_ORDER))
|
117
|
+
|
118
|
+
struct hc_matchfinder {
|
119
|
+
|
120
|
+
/* The hash table for finding length 3 matches */
|
121
|
+
mf_pos_t hash3_tab[1UL << HC_MATCHFINDER_HASH3_ORDER];
|
122
|
+
|
123
|
+
/* The hash table which contains the first nodes of the linked lists for
|
124
|
+
* finding length 4+ matches */
|
125
|
+
mf_pos_t hash4_tab[1UL << HC_MATCHFINDER_HASH4_ORDER];
|
126
|
+
|
127
|
+
/* The "next node" references for the linked lists. The "next node" of
|
128
|
+
* the node for the sequence with position 'pos' is 'next_tab[pos]'. */
|
129
|
+
mf_pos_t next_tab[MATCHFINDER_WINDOW_SIZE];
|
130
|
+
|
131
|
+
}
|
132
|
+
#ifdef _aligned_attribute
|
133
|
+
_aligned_attribute(MATCHFINDER_ALIGNMENT)
|
134
|
+
#endif
|
135
|
+
;
|
136
|
+
|
137
|
+
/* Prepare the matchfinder for a new input buffer. */
|
138
|
+
static forceinline void
|
139
|
+
hc_matchfinder_init(struct hc_matchfinder *mf)
|
140
|
+
{
|
141
|
+
matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_LENGTH);
|
142
|
+
}
|
143
|
+
|
144
|
+
static forceinline void
|
145
|
+
hc_matchfinder_slide_window(struct hc_matchfinder *mf)
|
146
|
+
{
|
147
|
+
matchfinder_rebase((mf_pos_t *)mf,
|
148
|
+
sizeof(struct hc_matchfinder) / sizeof(mf_pos_t));
|
149
|
+
}
|
150
|
+
|
151
|
+
/*
|
152
|
+
* Find the longest match longer than 'best_len' bytes.
|
153
|
+
*
|
154
|
+
* @mf
|
155
|
+
* The matchfinder structure.
|
156
|
+
* @in_base_p
|
157
|
+
* Location of a pointer which points to the place in the input data the
|
158
|
+
* matchfinder currently stores positions relative to. This may be updated
|
159
|
+
* by this function.
|
160
|
+
* @cur_pos
|
161
|
+
* The current position in the input buffer relative to @in_base (the
|
162
|
+
* position of the sequence being matched against).
|
163
|
+
* @best_len
|
164
|
+
* Require a match longer than this length.
|
165
|
+
* @max_len
|
166
|
+
* The maximum permissible match length at this position.
|
167
|
+
* @nice_len
|
168
|
+
* Stop searching if a match of at least this length is found.
|
169
|
+
* Must be <= @max_len.
|
170
|
+
* @max_search_depth
|
171
|
+
* Limit on the number of potential matches to consider. Must be >= 1.
|
172
|
+
* @next_hashes
|
173
|
+
* The precomputed hash codes for the sequence beginning at @in_next.
|
174
|
+
* These will be used and then updated with the precomputed hashcodes for
|
175
|
+
* the sequence beginning at @in_next + 1.
|
176
|
+
* @offset_ret
|
177
|
+
* If a match is found, its offset is returned in this location.
|
178
|
+
*
|
179
|
+
* Return the length of the match found, or 'best_len' if no match longer than
|
180
|
+
* 'best_len' was found.
|
181
|
+
*/
|
182
|
+
static forceinline u32
|
183
|
+
hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
|
184
|
+
const u8 ** const restrict in_base_p,
|
185
|
+
const u8 * const restrict in_next,
|
186
|
+
u32 best_len,
|
187
|
+
const u32 max_len,
|
188
|
+
const u32 nice_len,
|
189
|
+
const u32 max_search_depth,
|
190
|
+
u32 * const restrict next_hashes,
|
191
|
+
u32 * const restrict offset_ret)
|
192
|
+
{
|
193
|
+
u32 depth_remaining = max_search_depth;
|
194
|
+
const u8 *best_matchptr = in_next;
|
195
|
+
mf_pos_t cur_node3, cur_node4;
|
196
|
+
u32 hash3, hash4;
|
197
|
+
u32 next_seq3, next_seq4;
|
198
|
+
u32 seq4;
|
199
|
+
const u8 *matchptr;
|
200
|
+
u32 len;
|
201
|
+
u32 cur_pos = in_next - *in_base_p;
|
202
|
+
const u8 *in_base;
|
203
|
+
mf_pos_t cutoff;
|
204
|
+
|
205
|
+
if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
|
206
|
+
hc_matchfinder_slide_window(mf);
|
207
|
+
*in_base_p += MATCHFINDER_WINDOW_SIZE;
|
208
|
+
cur_pos = 0;
|
209
|
+
}
|
210
|
+
|
211
|
+
in_base = *in_base_p;
|
212
|
+
cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
|
213
|
+
|
214
|
+
if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */
|
215
|
+
goto out;
|
216
|
+
|
217
|
+
/* Get the precomputed hash codes. */
|
218
|
+
hash3 = next_hashes[0];
|
219
|
+
hash4 = next_hashes[1];
|
220
|
+
|
221
|
+
/* From the hash buckets, get the first node of each linked list. */
|
222
|
+
cur_node3 = mf->hash3_tab[hash3];
|
223
|
+
cur_node4 = mf->hash4_tab[hash4];
|
224
|
+
|
225
|
+
/* Update for length 3 matches. This replaces the singleton node in the
|
226
|
+
* 'hash3' bucket with the node for the current sequence. */
|
227
|
+
mf->hash3_tab[hash3] = cur_pos;
|
228
|
+
|
229
|
+
/* Update for length 4 matches. This prepends the node for the current
|
230
|
+
* sequence to the linked list in the 'hash4' bucket. */
|
231
|
+
mf->hash4_tab[hash4] = cur_pos;
|
232
|
+
mf->next_tab[cur_pos] = cur_node4;
|
233
|
+
|
234
|
+
/* Compute the next hash codes. */
|
235
|
+
next_seq4 = load_u32_unaligned(in_next + 1);
|
236
|
+
next_seq3 = loaded_u32_to_u24(next_seq4);
|
237
|
+
next_hashes[0] = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER);
|
238
|
+
next_hashes[1] = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER);
|
239
|
+
prefetchw(&mf->hash3_tab[next_hashes[0]]);
|
240
|
+
prefetchw(&mf->hash4_tab[next_hashes[1]]);
|
241
|
+
|
242
|
+
if (best_len < 4) { /* No match of length >= 4 found yet? */
|
243
|
+
|
244
|
+
/* Check for a length 3 match if needed. */
|
245
|
+
|
246
|
+
if (cur_node3 <= cutoff)
|
247
|
+
goto out;
|
248
|
+
|
249
|
+
seq4 = load_u32_unaligned(in_next);
|
250
|
+
|
251
|
+
if (best_len < 3) {
|
252
|
+
matchptr = &in_base[cur_node3];
|
253
|
+
if (load_u24_unaligned(matchptr) == loaded_u32_to_u24(seq4)) {
|
254
|
+
best_len = 3;
|
255
|
+
best_matchptr = matchptr;
|
256
|
+
}
|
257
|
+
}
|
258
|
+
|
259
|
+
/* Check for a length 4 match. */
|
260
|
+
|
261
|
+
if (cur_node4 <= cutoff)
|
262
|
+
goto out;
|
263
|
+
|
264
|
+
for (;;) {
|
265
|
+
/* No length 4 match found yet. Check the first 4 bytes. */
|
266
|
+
matchptr = &in_base[cur_node4];
|
267
|
+
|
268
|
+
if (load_u32_unaligned(matchptr) == seq4)
|
269
|
+
break;
|
270
|
+
|
271
|
+
/* The first 4 bytes did not match. Keep trying. */
|
272
|
+
cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
|
273
|
+
if (cur_node4 <= cutoff || !--depth_remaining)
|
274
|
+
goto out;
|
275
|
+
}
|
276
|
+
|
277
|
+
/* Found a match of length >= 4. Extend it to its full length. */
|
278
|
+
best_matchptr = matchptr;
|
279
|
+
best_len = lz_extend(in_next, best_matchptr, 4, max_len);
|
280
|
+
if (best_len >= nice_len)
|
281
|
+
goto out;
|
282
|
+
cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
|
283
|
+
if (cur_node4 <= cutoff || !--depth_remaining)
|
284
|
+
goto out;
|
285
|
+
} else {
|
286
|
+
if (cur_node4 <= cutoff || best_len >= nice_len)
|
287
|
+
goto out;
|
288
|
+
}
|
289
|
+
|
290
|
+
/* Check for matches of length >= 5. */
|
291
|
+
|
292
|
+
for (;;) {
|
293
|
+
for (;;) {
|
294
|
+
matchptr = &in_base[cur_node4];
|
295
|
+
|
296
|
+
/* Already found a length 4 match. Try for a longer
|
297
|
+
* match; start by checking either the last 4 bytes and
|
298
|
+
* the first 4 bytes, or the last byte. (The last byte,
|
299
|
+
* the one which would extend the match length by 1, is
|
300
|
+
* the most important.) */
|
301
|
+
#if UNALIGNED_ACCESS_IS_FAST
|
302
|
+
if ((load_u32_unaligned(matchptr + best_len - 3) ==
|
303
|
+
load_u32_unaligned(in_next + best_len - 3)) &&
|
304
|
+
(load_u32_unaligned(matchptr) ==
|
305
|
+
load_u32_unaligned(in_next)))
|
306
|
+
#else
|
307
|
+
if (matchptr[best_len] == in_next[best_len])
|
308
|
+
#endif
|
309
|
+
break;
|
310
|
+
|
311
|
+
/* Continue to the next node in the list. */
|
312
|
+
cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
|
313
|
+
if (cur_node4 <= cutoff || !--depth_remaining)
|
314
|
+
goto out;
|
315
|
+
}
|
316
|
+
|
317
|
+
#if UNALIGNED_ACCESS_IS_FAST
|
318
|
+
len = 4;
|
319
|
+
#else
|
320
|
+
len = 0;
|
321
|
+
#endif
|
322
|
+
len = lz_extend(in_next, matchptr, len, max_len);
|
323
|
+
if (len > best_len) {
|
324
|
+
/* This is the new longest match. */
|
325
|
+
best_len = len;
|
326
|
+
best_matchptr = matchptr;
|
327
|
+
if (best_len >= nice_len)
|
328
|
+
goto out;
|
329
|
+
}
|
330
|
+
|
331
|
+
/* Continue to the next node in the list. */
|
332
|
+
cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
|
333
|
+
if (cur_node4 <= cutoff || !--depth_remaining)
|
334
|
+
goto out;
|
335
|
+
}
|
336
|
+
out:
|
337
|
+
*offset_ret = in_next - best_matchptr;
|
338
|
+
return best_len;
|
339
|
+
}
|
340
|
+
|
341
|
+
/*
|
342
|
+
* Advance the matchfinder, but don't search for matches.
|
343
|
+
*
|
344
|
+
* @mf
|
345
|
+
* The matchfinder structure.
|
346
|
+
* @in_base_p
|
347
|
+
* Location of a pointer which points to the place in the input data the
|
348
|
+
* matchfinder currently stores positions relative to. This may be updated
|
349
|
+
* by this function.
|
350
|
+
* @cur_pos
|
351
|
+
* The current position in the input buffer relative to @in_base.
|
352
|
+
* @end_pos
|
353
|
+
* The end position of the input buffer, relative to @in_base.
|
354
|
+
* @next_hashes
|
355
|
+
* The precomputed hash codes for the sequence beginning at @in_next.
|
356
|
+
* These will be used and then updated with the precomputed hashcodes for
|
357
|
+
* the sequence beginning at @in_next + @count.
|
358
|
+
* @count
|
359
|
+
* The number of bytes to advance. Must be > 0.
|
360
|
+
*
|
361
|
+
* Returns @in_next + @count.
|
362
|
+
*/
|
363
|
+
static forceinline const u8 *
|
364
|
+
hc_matchfinder_skip_positions(struct hc_matchfinder * const restrict mf,
|
365
|
+
const u8 ** const restrict in_base_p,
|
366
|
+
const u8 *in_next,
|
367
|
+
const u8 * const in_end,
|
368
|
+
const u32 count,
|
369
|
+
u32 * const restrict next_hashes)
|
370
|
+
{
|
371
|
+
u32 cur_pos;
|
372
|
+
u32 hash3, hash4;
|
373
|
+
u32 next_seq3, next_seq4;
|
374
|
+
u32 remaining = count;
|
375
|
+
|
376
|
+
if (unlikely(count + 5 > in_end - in_next))
|
377
|
+
return &in_next[count];
|
378
|
+
|
379
|
+
cur_pos = in_next - *in_base_p;
|
380
|
+
hash3 = next_hashes[0];
|
381
|
+
hash4 = next_hashes[1];
|
382
|
+
do {
|
383
|
+
if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
|
384
|
+
hc_matchfinder_slide_window(mf);
|
385
|
+
*in_base_p += MATCHFINDER_WINDOW_SIZE;
|
386
|
+
cur_pos = 0;
|
387
|
+
}
|
388
|
+
mf->hash3_tab[hash3] = cur_pos;
|
389
|
+
mf->next_tab[cur_pos] = mf->hash4_tab[hash4];
|
390
|
+
mf->hash4_tab[hash4] = cur_pos;
|
391
|
+
|
392
|
+
next_seq4 = load_u32_unaligned(++in_next);
|
393
|
+
next_seq3 = loaded_u32_to_u24(next_seq4);
|
394
|
+
hash3 = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER);
|
395
|
+
hash4 = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER);
|
396
|
+
cur_pos++;
|
397
|
+
} while (--remaining);
|
398
|
+
|
399
|
+
prefetchw(&mf->hash3_tab[hash3]);
|
400
|
+
prefetchw(&mf->hash4_tab[hash4]);
|
401
|
+
next_hashes[0] = hash3;
|
402
|
+
next_hashes[1] = hash4;
|
403
|
+
|
404
|
+
return in_next;
|
405
|
+
}
|