libdeflate 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.rubocop.yml +1 -0
  6. data/.rubocop_todo.yml +9 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +52 -0
  11. data/Rakefile +15 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/ext/libdeflate/extconf.rb +14 -0
  15. data/ext/libdeflate/libdeflate/.gitignore +19 -0
  16. data/ext/libdeflate/libdeflate/COPYING +21 -0
  17. data/ext/libdeflate/libdeflate/Makefile +231 -0
  18. data/ext/libdeflate/libdeflate/Makefile.msc +64 -0
  19. data/ext/libdeflate/libdeflate/NEWS +57 -0
  20. data/ext/libdeflate/libdeflate/README.md +170 -0
  21. data/ext/libdeflate/libdeflate/common/common_defs.h +351 -0
  22. data/ext/libdeflate/libdeflate/common/compiler_gcc.h +134 -0
  23. data/ext/libdeflate/libdeflate/common/compiler_msc.h +95 -0
  24. data/ext/libdeflate/libdeflate/lib/adler32.c +213 -0
  25. data/ext/libdeflate/libdeflate/lib/adler32_impl.h +281 -0
  26. data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +57 -0
  27. data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +13 -0
  28. data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +357 -0
  29. data/ext/libdeflate/libdeflate/lib/crc32.c +368 -0
  30. data/ext/libdeflate/libdeflate/lib/crc32_impl.h +286 -0
  31. data/ext/libdeflate/libdeflate/lib/crc32_table.h +526 -0
  32. data/ext/libdeflate/libdeflate/lib/decompress_impl.h +404 -0
  33. data/ext/libdeflate/libdeflate/lib/deflate_compress.c +2817 -0
  34. data/ext/libdeflate/libdeflate/lib/deflate_compress.h +14 -0
  35. data/ext/libdeflate/libdeflate/lib/deflate_constants.h +66 -0
  36. data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +889 -0
  37. data/ext/libdeflate/libdeflate/lib/gzip_compress.c +95 -0
  38. data/ext/libdeflate/libdeflate/lib/gzip_constants.h +45 -0
  39. data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +130 -0
  40. data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +405 -0
  41. data/ext/libdeflate/libdeflate/lib/lib_common.h +35 -0
  42. data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +53 -0
  43. data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +205 -0
  44. data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +61 -0
  45. data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +53 -0
  46. data/ext/libdeflate/libdeflate/lib/unaligned.h +202 -0
  47. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +169 -0
  48. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +48 -0
  49. data/ext/libdeflate/libdeflate/lib/zlib_compress.c +87 -0
  50. data/ext/libdeflate/libdeflate/lib/zlib_constants.h +21 -0
  51. data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +91 -0
  52. data/ext/libdeflate/libdeflate/libdeflate.h +274 -0
  53. data/ext/libdeflate/libdeflate/programs/benchmark.c +558 -0
  54. data/ext/libdeflate/libdeflate/programs/checksum.c +197 -0
  55. data/ext/libdeflate/libdeflate/programs/detect.sh +62 -0
  56. data/ext/libdeflate/libdeflate/programs/gzip.c +603 -0
  57. data/ext/libdeflate/libdeflate/programs/prog_util.c +530 -0
  58. data/ext/libdeflate/libdeflate/programs/prog_util.h +162 -0
  59. data/ext/libdeflate/libdeflate/programs/test_checksums.c +135 -0
  60. data/ext/libdeflate/libdeflate/programs/tgetopt.c +118 -0
  61. data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +12 -0
  62. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +40 -0
  63. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
  64. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +28 -0
  65. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +3 -0
  66. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +28 -0
  67. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
  68. data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +14 -0
  69. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +28 -0
  70. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +3 -0
  71. data/ext/libdeflate/libdeflate/tools/android_build.sh +104 -0
  72. data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +76 -0
  73. data/ext/libdeflate/libdeflate/tools/exec_tests.sh +30 -0
  74. data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +108 -0
  75. data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +100 -0
  76. data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +412 -0
  77. data/ext/libdeflate/libdeflate/tools/make-windows-releases +21 -0
  78. data/ext/libdeflate/libdeflate/tools/mips_build.sh +9 -0
  79. data/ext/libdeflate/libdeflate/tools/msc_test.bat +3 -0
  80. data/ext/libdeflate/libdeflate/tools/pgo_build.sh +23 -0
  81. data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +37 -0
  82. data/ext/libdeflate/libdeflate/tools/run_tests.sh +305 -0
  83. data/ext/libdeflate/libdeflate/tools/windows_build.sh +10 -0
  84. data/ext/libdeflate/libdeflate_ext.c +389 -0
  85. data/ext/libdeflate/libdeflate_ext.h +8 -0
  86. data/lib/libdeflate.rb +2 -0
  87. data/lib/libdeflate/version.rb +3 -0
  88. data/libdeflate.gemspec +33 -0
  89. metadata +230 -0
@@ -0,0 +1,95 @@
1
+ /*
2
+ * gzip_compress.c - compress with a gzip wrapper
3
+ *
4
+ * Originally public domain; changes after 2016-09-07 are copyrighted.
5
+ *
6
+ * Copyright 2016 Eric Biggers
7
+ *
8
+ * Permission is hereby granted, free of charge, to any person
9
+ * obtaining a copy of this software and associated documentation
10
+ * files (the "Software"), to deal in the Software without
11
+ * restriction, including without limitation the rights to use,
12
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ * copies of the Software, and to permit persons to whom the
14
+ * Software is furnished to do so, subject to the following
15
+ * conditions:
16
+ *
17
+ * The above copyright notice and this permission notice shall be
18
+ * included in all copies or substantial portions of the Software.
19
+ *
20
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27
+ * OTHER DEALINGS IN THE SOFTWARE.
28
+ */
29
+
30
+ #include "deflate_compress.h"
31
+ #include "gzip_constants.h"
32
+ #include "unaligned.h"
33
+
34
+ #include "libdeflate.h"
35
+
36
+ LIBDEFLATEAPI size_t
37
+ libdeflate_gzip_compress(struct libdeflate_compressor *c,
38
+ const void *in, size_t in_size,
39
+ void *out, size_t out_nbytes_avail)
40
+ {
41
+ u8 *out_next = out;
42
+ unsigned compression_level;
43
+ u8 xfl;
44
+ size_t deflate_size;
45
+
46
+ if (out_nbytes_avail <= GZIP_MIN_OVERHEAD)
47
+ return 0;
48
+
49
+ /* ID1 */
50
+ *out_next++ = GZIP_ID1;
51
+ /* ID2 */
52
+ *out_next++ = GZIP_ID2;
53
+ /* CM */
54
+ *out_next++ = GZIP_CM_DEFLATE;
55
+ /* FLG */
56
+ *out_next++ = 0;
57
+ /* MTIME */
58
+ put_unaligned_le32(GZIP_MTIME_UNAVAILABLE, out_next);
59
+ out_next += 4;
60
+ /* XFL */
61
+ xfl = 0;
62
+ compression_level = deflate_get_compression_level(c);
63
+ if (compression_level < 2)
64
+ xfl |= GZIP_XFL_FASTEST_COMRESSION;
65
+ else if (compression_level >= 8)
66
+ xfl |= GZIP_XFL_SLOWEST_COMRESSION;
67
+ *out_next++ = xfl;
68
+ /* OS */
69
+ *out_next++ = GZIP_OS_UNKNOWN; /* OS */
70
+
71
+ /* Compressed data */
72
+ deflate_size = libdeflate_deflate_compress(c, in, in_size, out_next,
73
+ out_nbytes_avail - GZIP_MIN_OVERHEAD);
74
+ if (deflate_size == 0)
75
+ return 0;
76
+ out_next += deflate_size;
77
+
78
+ /* CRC32 */
79
+ put_unaligned_le32(libdeflate_crc32(0, in, in_size), out_next);
80
+ out_next += 4;
81
+
82
+ /* ISIZE */
83
+ put_unaligned_le32((u32)in_size, out_next);
84
+ out_next += 4;
85
+
86
+ return out_next - (u8 *)out;
87
+ }
88
+
89
+ LIBDEFLATEAPI size_t
90
+ libdeflate_gzip_compress_bound(struct libdeflate_compressor *c,
91
+ size_t in_nbytes)
92
+ {
93
+ return GZIP_MIN_OVERHEAD +
94
+ libdeflate_deflate_compress_bound(c, in_nbytes);
95
+ }
@@ -0,0 +1,45 @@
1
+ /*
2
+ * gzip_constants.h - constants for the gzip wrapper format
3
+ */
4
+
5
+ #ifndef LIB_GZIP_CONSTANTS_H
6
+ #define LIB_GZIP_CONSTANTS_H
7
+
8
+ #define GZIP_MIN_HEADER_SIZE 10
9
+ #define GZIP_FOOTER_SIZE 8
10
+ #define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
11
+
12
+ #define GZIP_ID1 0x1F
13
+ #define GZIP_ID2 0x8B
14
+
15
+ #define GZIP_CM_DEFLATE 8
16
+
17
+ #define GZIP_FTEXT 0x01
18
+ #define GZIP_FHCRC 0x02
19
+ #define GZIP_FEXTRA 0x04
20
+ #define GZIP_FNAME 0x08
21
+ #define GZIP_FCOMMENT 0x10
22
+ #define GZIP_FRESERVED 0xE0
23
+
24
+ #define GZIP_MTIME_UNAVAILABLE 0
25
+
26
+ #define GZIP_XFL_SLOWEST_COMRESSION 0x02
27
+ #define GZIP_XFL_FASTEST_COMRESSION 0x04
28
+
29
+ #define GZIP_OS_FAT 0
30
+ #define GZIP_OS_AMIGA 1
31
+ #define GZIP_OS_VMS 2
32
+ #define GZIP_OS_UNIX 3
33
+ #define GZIP_OS_VM_CMS 4
34
+ #define GZIP_OS_ATARI_TOS 5
35
+ #define GZIP_OS_HPFS 6
36
+ #define GZIP_OS_MACINTOSH 7
37
+ #define GZIP_OS_Z_SYSTEM 8
38
+ #define GZIP_OS_CP_M 9
39
+ #define GZIP_OS_TOPS_20 10
40
+ #define GZIP_OS_NTFS 11
41
+ #define GZIP_OS_QDOS 12
42
+ #define GZIP_OS_RISCOS 13
43
+ #define GZIP_OS_UNKNOWN 255
44
+
45
+ #endif /* LIB_GZIP_CONSTANTS_H */
@@ -0,0 +1,130 @@
1
+ /*
2
+ * gzip_decompress.c - decompress with a gzip wrapper
3
+ *
4
+ * Originally public domain; changes after 2016-09-07 are copyrighted.
5
+ *
6
+ * Copyright 2016 Eric Biggers
7
+ *
8
+ * Permission is hereby granted, free of charge, to any person
9
+ * obtaining a copy of this software and associated documentation
10
+ * files (the "Software"), to deal in the Software without
11
+ * restriction, including without limitation the rights to use,
12
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ * copies of the Software, and to permit persons to whom the
14
+ * Software is furnished to do so, subject to the following
15
+ * conditions:
16
+ *
17
+ * The above copyright notice and this permission notice shall be
18
+ * included in all copies or substantial portions of the Software.
19
+ *
20
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27
+ * OTHER DEALINGS IN THE SOFTWARE.
28
+ */
29
+
30
+ #include "gzip_constants.h"
31
+ #include "unaligned.h"
32
+
33
+ #include "libdeflate.h"
34
+
35
+ LIBDEFLATEAPI enum libdeflate_result
36
+ libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
37
+ const void *in, size_t in_nbytes,
38
+ void *out, size_t out_nbytes_avail,
39
+ size_t *actual_out_nbytes_ret)
40
+ {
41
+ const u8 *in_next = in;
42
+ const u8 * const in_end = in_next + in_nbytes;
43
+ u8 flg;
44
+ size_t actual_out_nbytes;
45
+ enum libdeflate_result result;
46
+
47
+ if (in_nbytes < GZIP_MIN_OVERHEAD)
48
+ return LIBDEFLATE_BAD_DATA;
49
+
50
+ /* ID1 */
51
+ if (*in_next++ != GZIP_ID1)
52
+ return LIBDEFLATE_BAD_DATA;
53
+ /* ID2 */
54
+ if (*in_next++ != GZIP_ID2)
55
+ return LIBDEFLATE_BAD_DATA;
56
+ /* CM */
57
+ if (*in_next++ != GZIP_CM_DEFLATE)
58
+ return LIBDEFLATE_BAD_DATA;
59
+ flg = *in_next++;
60
+ /* MTIME */
61
+ in_next += 4;
62
+ /* XFL */
63
+ in_next += 1;
64
+ /* OS */
65
+ in_next += 1;
66
+
67
+ if (flg & GZIP_FRESERVED)
68
+ return LIBDEFLATE_BAD_DATA;
69
+
70
+ /* Extra field */
71
+ if (flg & GZIP_FEXTRA) {
72
+ u16 xlen = get_unaligned_le16(in_next);
73
+ in_next += 2;
74
+
75
+ if (in_end - in_next < (u32)xlen + GZIP_FOOTER_SIZE)
76
+ return LIBDEFLATE_BAD_DATA;
77
+
78
+ in_next += xlen;
79
+ }
80
+
81
+ /* Original file name (zero terminated) */
82
+ if (flg & GZIP_FNAME) {
83
+ while (*in_next++ != 0 && in_next != in_end)
84
+ ;
85
+ if (in_end - in_next < GZIP_FOOTER_SIZE)
86
+ return LIBDEFLATE_BAD_DATA;
87
+ }
88
+
89
+ /* File comment (zero terminated) */
90
+ if (flg & GZIP_FCOMMENT) {
91
+ while (*in_next++ != 0 && in_next != in_end)
92
+ ;
93
+ if (in_end - in_next < GZIP_FOOTER_SIZE)
94
+ return LIBDEFLATE_BAD_DATA;
95
+ }
96
+
97
+ /* CRC16 for gzip header */
98
+ if (flg & GZIP_FHCRC) {
99
+ in_next += 2;
100
+ if (in_end - in_next < GZIP_FOOTER_SIZE)
101
+ return LIBDEFLATE_BAD_DATA;
102
+ }
103
+
104
+ /* Compressed data */
105
+ result = libdeflate_deflate_decompress(d, in_next,
106
+ in_end - GZIP_FOOTER_SIZE - in_next,
107
+ out, out_nbytes_avail,
108
+ actual_out_nbytes_ret);
109
+ if (result != LIBDEFLATE_SUCCESS)
110
+ return result;
111
+
112
+ if (actual_out_nbytes_ret)
113
+ actual_out_nbytes = *actual_out_nbytes_ret;
114
+ else
115
+ actual_out_nbytes = out_nbytes_avail;
116
+
117
+ in_next = in_end - GZIP_FOOTER_SIZE;
118
+
119
+ /* CRC32 */
120
+ if (libdeflate_crc32(0, out, actual_out_nbytes) !=
121
+ get_unaligned_le32(in_next))
122
+ return LIBDEFLATE_BAD_DATA;
123
+ in_next += 4;
124
+
125
+ /* ISIZE */
126
+ if ((u32)actual_out_nbytes != get_unaligned_le32(in_next))
127
+ return LIBDEFLATE_BAD_DATA;
128
+
129
+ return LIBDEFLATE_SUCCESS;
130
+ }
@@ -0,0 +1,405 @@
1
+ /*
2
+ * hc_matchfinder.h - Lempel-Ziv matchfinding with a hash table of linked lists
3
+ *
4
+ * Originally public domain; changes after 2016-09-07 are copyrighted.
5
+ *
6
+ * Copyright 2016 Eric Biggers
7
+ *
8
+ * Permission is hereby granted, free of charge, to any person
9
+ * obtaining a copy of this software and associated documentation
10
+ * files (the "Software"), to deal in the Software without
11
+ * restriction, including without limitation the rights to use,
12
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ * copies of the Software, and to permit persons to whom the
14
+ * Software is furnished to do so, subject to the following
15
+ * conditions:
16
+ *
17
+ * The above copyright notice and this permission notice shall be
18
+ * included in all copies or substantial portions of the Software.
19
+ *
20
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27
+ * OTHER DEALINGS IN THE SOFTWARE.
28
+ *
29
+ * ---------------------------------------------------------------------------
30
+ *
31
+ * Algorithm
32
+ *
33
+ * This is a Hash Chains (hc) based matchfinder.
34
+ *
35
+ * The main data structure is a hash table where each hash bucket contains a
36
+ * linked list (or "chain") of sequences whose first 4 bytes share the same hash
37
+ * code. Each sequence is identified by its starting position in the input
38
+ * buffer.
39
+ *
40
+ * The algorithm processes the input buffer sequentially. At each byte
41
+ * position, the hash code of the first 4 bytes of the sequence beginning at
42
+ * that position (the sequence being matched against) is computed. This
43
+ * identifies the hash bucket to use for that position. Then, this hash
44
+ * bucket's linked list is searched for matches. Then, a new linked list node
45
+ * is created to represent the current sequence and is prepended to the list.
46
+ *
47
+ * This algorithm has several useful properties:
48
+ *
49
+ * - It only finds true Lempel-Ziv matches; i.e., those where the matching
50
+ * sequence occurs prior to the sequence being matched against.
51
+ *
52
+ * - The sequences in each linked list are always sorted by decreasing starting
53
+ * position. Therefore, the closest (smallest offset) matches are found
54
+ * first, which in many compression formats tend to be the cheapest to encode.
55
+ *
56
+ * - Although fast running time is not guaranteed due to the possibility of the
57
+ * lists getting very long, the worst degenerate behavior can be easily
58
+ * prevented by capping the number of nodes searched at each position.
59
+ *
60
+ * - If the compressor decides not to search for matches at a certain position,
61
+ * then that position can be quickly inserted without searching the list.
62
+ *
63
+ * - The algorithm is adaptable to sliding windows: just store the positions
64
+ * relative to a "base" value that is updated from time to time, and stop
65
+ * searching each list when the sequences get too far away.
66
+ *
67
+ * ----------------------------------------------------------------------------
68
+ *
69
+ * Optimizations
70
+ *
71
+ * The main hash table and chains handle length 4+ matches. Length 3 matches
72
+ * are handled by a separate hash table with no chains. This works well for
73
+ * typical "greedy" or "lazy"-style compressors, where length 3 matches are
74
+ * often only helpful if they have small offsets. Instead of searching a full
75
+ * chain for length 3+ matches, the algorithm just checks for one close length 3
76
+ * match, then focuses on finding length 4+ matches.
77
+ *
78
+ * The longest_match() and skip_positions() functions are inlined into the
79
+ * compressors that use them. This isn't just about saving the overhead of a
80
+ * function call. These functions are intended to be called from the inner
81
+ * loops of compressors, where giving the compiler more control over register
82
+ * allocation is very helpful. There is also significant benefit to be gained
83
+ * from allowing the CPU to predict branches independently at each call site.
84
+ * For example, "lazy"-style compressors can be written with two calls to
85
+ * longest_match(), each of which starts with a different 'best_len' and
86
+ * therefore has significantly different performance characteristics.
87
+ *
88
+ * Although any hash function can be used, a multiplicative hash is fast and
89
+ * works well.
90
+ *
91
+ * On some processors, it is significantly faster to extend matches by whole
92
+ * words (32 or 64 bits) instead of by individual bytes. For this to be the
93
+ * case, the processor must implement unaligned memory accesses efficiently and
94
+ * must have either a fast "find first set bit" instruction or a fast "find last
95
+ * set bit" instruction, depending on the processor's endianness.
96
+ *
97
+ * The code uses one loop for finding the first match and one loop for finding a
98
+ * longer match. Each of these loops is tuned for its respective task and in
99
+ * combination are faster than a single generalized loop that handles both
100
+ * tasks.
101
+ *
102
+ * The code also uses a tight inner loop that only compares the last and first
103
+ * bytes of a potential match. It is only when these bytes match that a full
104
+ * match extension is attempted.
105
+ *
106
+ * ----------------------------------------------------------------------------
107
+ */
108
+
109
+ #include "matchfinder_common.h"
110
+
111
+ #define HC_MATCHFINDER_HASH3_ORDER 15
112
+ #define HC_MATCHFINDER_HASH4_ORDER 16
113
+
114
+ #define HC_MATCHFINDER_TOTAL_HASH_LENGTH \
115
+ ((1UL << HC_MATCHFINDER_HASH3_ORDER) + \
116
+ (1UL << HC_MATCHFINDER_HASH4_ORDER))
117
+
118
+ struct hc_matchfinder {
119
+
120
+ /* The hash table for finding length 3 matches */
121
+ mf_pos_t hash3_tab[1UL << HC_MATCHFINDER_HASH3_ORDER];
122
+
123
+ /* The hash table which contains the first nodes of the linked lists for
124
+ * finding length 4+ matches */
125
+ mf_pos_t hash4_tab[1UL << HC_MATCHFINDER_HASH4_ORDER];
126
+
127
+ /* The "next node" references for the linked lists. The "next node" of
128
+ * the node for the sequence with position 'pos' is 'next_tab[pos]'. */
129
+ mf_pos_t next_tab[MATCHFINDER_WINDOW_SIZE];
130
+
131
+ }
132
+ #ifdef _aligned_attribute
133
+ _aligned_attribute(MATCHFINDER_ALIGNMENT)
134
+ #endif
135
+ ;
136
+
137
+ /* Prepare the matchfinder for a new input buffer. */
138
+ static forceinline void
139
+ hc_matchfinder_init(struct hc_matchfinder *mf)
140
+ {
141
+ matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_LENGTH);
142
+ }
143
+
144
+ static forceinline void
145
+ hc_matchfinder_slide_window(struct hc_matchfinder *mf)
146
+ {
147
+ matchfinder_rebase((mf_pos_t *)mf,
148
+ sizeof(struct hc_matchfinder) / sizeof(mf_pos_t));
149
+ }
150
+
151
+ /*
152
+ * Find the longest match longer than 'best_len' bytes.
153
+ *
154
+ * @mf
155
+ * The matchfinder structure.
156
+ * @in_base_p
157
+ * Location of a pointer which points to the place in the input data the
158
+ * matchfinder currently stores positions relative to. This may be updated
159
+ * by this function.
160
+ * @cur_pos
161
+ * The current position in the input buffer relative to @in_base (the
162
+ * position of the sequence being matched against).
163
+ * @best_len
164
+ * Require a match longer than this length.
165
+ * @max_len
166
+ * The maximum permissible match length at this position.
167
+ * @nice_len
168
+ * Stop searching if a match of at least this length is found.
169
+ * Must be <= @max_len.
170
+ * @max_search_depth
171
+ * Limit on the number of potential matches to consider. Must be >= 1.
172
+ * @next_hashes
173
+ * The precomputed hash codes for the sequence beginning at @in_next.
174
+ * These will be used and then updated with the precomputed hashcodes for
175
+ * the sequence beginning at @in_next + 1.
176
+ * @offset_ret
177
+ * If a match is found, its offset is returned in this location.
178
+ *
179
+ * Return the length of the match found, or 'best_len' if no match longer than
180
+ * 'best_len' was found.
181
+ */
182
+ static forceinline u32
183
+ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
184
+ const u8 ** const restrict in_base_p,
185
+ const u8 * const restrict in_next,
186
+ u32 best_len,
187
+ const u32 max_len,
188
+ const u32 nice_len,
189
+ const u32 max_search_depth,
190
+ u32 * const restrict next_hashes,
191
+ u32 * const restrict offset_ret)
192
+ {
193
+ u32 depth_remaining = max_search_depth;
194
+ const u8 *best_matchptr = in_next;
195
+ mf_pos_t cur_node3, cur_node4;
196
+ u32 hash3, hash4;
197
+ u32 next_seq3, next_seq4;
198
+ u32 seq4;
199
+ const u8 *matchptr;
200
+ u32 len;
201
+ u32 cur_pos = in_next - *in_base_p;
202
+ const u8 *in_base;
203
+ mf_pos_t cutoff;
204
+
205
+ if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
206
+ hc_matchfinder_slide_window(mf);
207
+ *in_base_p += MATCHFINDER_WINDOW_SIZE;
208
+ cur_pos = 0;
209
+ }
210
+
211
+ in_base = *in_base_p;
212
+ cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
213
+
214
+ if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */
215
+ goto out;
216
+
217
+ /* Get the precomputed hash codes. */
218
+ hash3 = next_hashes[0];
219
+ hash4 = next_hashes[1];
220
+
221
+ /* From the hash buckets, get the first node of each linked list. */
222
+ cur_node3 = mf->hash3_tab[hash3];
223
+ cur_node4 = mf->hash4_tab[hash4];
224
+
225
+ /* Update for length 3 matches. This replaces the singleton node in the
226
+ * 'hash3' bucket with the node for the current sequence. */
227
+ mf->hash3_tab[hash3] = cur_pos;
228
+
229
+ /* Update for length 4 matches. This prepends the node for the current
230
+ * sequence to the linked list in the 'hash4' bucket. */
231
+ mf->hash4_tab[hash4] = cur_pos;
232
+ mf->next_tab[cur_pos] = cur_node4;
233
+
234
+ /* Compute the next hash codes. */
235
+ next_seq4 = load_u32_unaligned(in_next + 1);
236
+ next_seq3 = loaded_u32_to_u24(next_seq4);
237
+ next_hashes[0] = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER);
238
+ next_hashes[1] = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER);
239
+ prefetchw(&mf->hash3_tab[next_hashes[0]]);
240
+ prefetchw(&mf->hash4_tab[next_hashes[1]]);
241
+
242
+ if (best_len < 4) { /* No match of length >= 4 found yet? */
243
+
244
+ /* Check for a length 3 match if needed. */
245
+
246
+ if (cur_node3 <= cutoff)
247
+ goto out;
248
+
249
+ seq4 = load_u32_unaligned(in_next);
250
+
251
+ if (best_len < 3) {
252
+ matchptr = &in_base[cur_node3];
253
+ if (load_u24_unaligned(matchptr) == loaded_u32_to_u24(seq4)) {
254
+ best_len = 3;
255
+ best_matchptr = matchptr;
256
+ }
257
+ }
258
+
259
+ /* Check for a length 4 match. */
260
+
261
+ if (cur_node4 <= cutoff)
262
+ goto out;
263
+
264
+ for (;;) {
265
+ /* No length 4 match found yet. Check the first 4 bytes. */
266
+ matchptr = &in_base[cur_node4];
267
+
268
+ if (load_u32_unaligned(matchptr) == seq4)
269
+ break;
270
+
271
+ /* The first 4 bytes did not match. Keep trying. */
272
+ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
273
+ if (cur_node4 <= cutoff || !--depth_remaining)
274
+ goto out;
275
+ }
276
+
277
+ /* Found a match of length >= 4. Extend it to its full length. */
278
+ best_matchptr = matchptr;
279
+ best_len = lz_extend(in_next, best_matchptr, 4, max_len);
280
+ if (best_len >= nice_len)
281
+ goto out;
282
+ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
283
+ if (cur_node4 <= cutoff || !--depth_remaining)
284
+ goto out;
285
+ } else {
286
+ if (cur_node4 <= cutoff || best_len >= nice_len)
287
+ goto out;
288
+ }
289
+
290
+ /* Check for matches of length >= 5. */
291
+
292
+ for (;;) {
293
+ for (;;) {
294
+ matchptr = &in_base[cur_node4];
295
+
296
+ /* Already found a length 4 match. Try for a longer
297
+ * match; start by checking either the last 4 bytes and
298
+ * the first 4 bytes, or the last byte. (The last byte,
299
+ * the one which would extend the match length by 1, is
300
+ * the most important.) */
301
+ #if UNALIGNED_ACCESS_IS_FAST
302
+ if ((load_u32_unaligned(matchptr + best_len - 3) ==
303
+ load_u32_unaligned(in_next + best_len - 3)) &&
304
+ (load_u32_unaligned(matchptr) ==
305
+ load_u32_unaligned(in_next)))
306
+ #else
307
+ if (matchptr[best_len] == in_next[best_len])
308
+ #endif
309
+ break;
310
+
311
+ /* Continue to the next node in the list. */
312
+ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
313
+ if (cur_node4 <= cutoff || !--depth_remaining)
314
+ goto out;
315
+ }
316
+
317
+ #if UNALIGNED_ACCESS_IS_FAST
318
+ len = 4;
319
+ #else
320
+ len = 0;
321
+ #endif
322
+ len = lz_extend(in_next, matchptr, len, max_len);
323
+ if (len > best_len) {
324
+ /* This is the new longest match. */
325
+ best_len = len;
326
+ best_matchptr = matchptr;
327
+ if (best_len >= nice_len)
328
+ goto out;
329
+ }
330
+
331
+ /* Continue to the next node in the list. */
332
+ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
333
+ if (cur_node4 <= cutoff || !--depth_remaining)
334
+ goto out;
335
+ }
336
+ out:
337
+ *offset_ret = in_next - best_matchptr;
338
+ return best_len;
339
+ }
340
+
341
+ /*
342
+ * Advance the matchfinder, but don't search for matches.
343
+ *
344
+ * @mf
345
+ * The matchfinder structure.
346
+ * @in_base_p
347
+ * Location of a pointer which points to the place in the input data the
348
+ * matchfinder currently stores positions relative to. This may be updated
349
+ * by this function.
350
+ * @cur_pos
351
+ * The current position in the input buffer relative to @in_base.
352
+ * @end_pos
353
+ * The end position of the input buffer, relative to @in_base.
354
+ * @next_hashes
355
+ * The precomputed hash codes for the sequence beginning at @in_next.
356
+ * These will be used and then updated with the precomputed hashcodes for
357
+ * the sequence beginning at @in_next + @count.
358
+ * @count
359
+ * The number of bytes to advance. Must be > 0.
360
+ *
361
+ * Returns @in_next + @count.
362
+ */
363
+ static forceinline const u8 *
364
+ hc_matchfinder_skip_positions(struct hc_matchfinder * const restrict mf,
365
+ const u8 ** const restrict in_base_p,
366
+ const u8 *in_next,
367
+ const u8 * const in_end,
368
+ const u32 count,
369
+ u32 * const restrict next_hashes)
370
+ {
371
+ u32 cur_pos;
372
+ u32 hash3, hash4;
373
+ u32 next_seq3, next_seq4;
374
+ u32 remaining = count;
375
+
376
+ if (unlikely(count + 5 > in_end - in_next))
377
+ return &in_next[count];
378
+
379
+ cur_pos = in_next - *in_base_p;
380
+ hash3 = next_hashes[0];
381
+ hash4 = next_hashes[1];
382
+ do {
383
+ if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
384
+ hc_matchfinder_slide_window(mf);
385
+ *in_base_p += MATCHFINDER_WINDOW_SIZE;
386
+ cur_pos = 0;
387
+ }
388
+ mf->hash3_tab[hash3] = cur_pos;
389
+ mf->next_tab[cur_pos] = mf->hash4_tab[hash4];
390
+ mf->hash4_tab[hash4] = cur_pos;
391
+
392
+ next_seq4 = load_u32_unaligned(++in_next);
393
+ next_seq3 = loaded_u32_to_u24(next_seq4);
394
+ hash3 = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER);
395
+ hash4 = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER);
396
+ cur_pos++;
397
+ } while (--remaining);
398
+
399
+ prefetchw(&mf->hash3_tab[hash3]);
400
+ prefetchw(&mf->hash4_tab[hash4]);
401
+ next_hashes[0] = hash3;
402
+ next_hashes[1] = hash4;
403
+
404
+ return in_next;
405
+ }