deflate-ruby 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/CLAUDE.md +95 -92
  3. data/GEM_VERIFICATION_REPORT.md +140 -0
  4. data/LICENSE.txt +6 -6
  5. data/README.md +87 -65
  6. data/Rakefile +23 -0
  7. data/ext/deflate_ruby/{libdeflate/lib/x86/adler32_impl.h → adler32_impl.h} +8 -7
  8. data/ext/deflate_ruby/common_defs.h +748 -0
  9. data/ext/deflate_ruby/{libdeflate/lib/x86/cpu_features.c → cpu_features.c} +46 -16
  10. data/ext/deflate_ruby/{libdeflate/lib/x86/cpu_features.h → cpu_features.h} +2 -1
  11. data/ext/deflate_ruby/{libdeflate/lib/x86/crc32_impl.h → crc32_impl.h} +22 -23
  12. data/ext/deflate_ruby/{libdeflate/lib/crc32_multipliers.h → crc32_multipliers.h} +2 -4
  13. data/ext/deflate_ruby/{libdeflate/lib/x86/crc32_pclmul_template.h → crc32_pclmul_template.h} +23 -94
  14. data/ext/deflate_ruby/{libdeflate/lib/crc32_tables.h → crc32_tables.h} +1 -1
  15. data/ext/deflate_ruby/{libdeflate/lib/deflate_compress.c → deflate_compress.c} +59 -60
  16. data/ext/deflate_ruby/deflate_ruby.c +392 -218
  17. data/ext/deflate_ruby/deflate_ruby.h +6 -0
  18. data/ext/deflate_ruby/extconf.rb +35 -25
  19. data/ext/deflate_ruby/libdeflate/adler32.c +162 -0
  20. data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/adler32_impl.h +14 -7
  21. data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/crc32_impl.h +25 -31
  22. data/ext/deflate_ruby/libdeflate/arm/crc32_pmull_helpers.h +156 -0
  23. data/ext/deflate_ruby/libdeflate/arm/crc32_pmull_wide.h +226 -0
  24. data/ext/deflate_ruby/libdeflate/bt_matchfinder.h +342 -0
  25. data/ext/deflate_ruby/libdeflate/common_defs.h +2 -1
  26. data/ext/deflate_ruby/libdeflate/cpu_features_common.h +93 -0
  27. data/ext/deflate_ruby/libdeflate/crc32.c +262 -0
  28. data/ext/deflate_ruby/libdeflate/crc32_multipliers.h +375 -0
  29. data/ext/deflate_ruby/libdeflate/crc32_tables.h +587 -0
  30. data/ext/deflate_ruby/libdeflate/decompress_template.h +777 -0
  31. data/ext/deflate_ruby/libdeflate/deflate_compress.c +4128 -0
  32. data/ext/deflate_ruby/libdeflate/deflate_compress.h +15 -0
  33. data/ext/deflate_ruby/libdeflate/deflate_constants.h +56 -0
  34. data/ext/deflate_ruby/libdeflate/deflate_decompress.c +1208 -0
  35. data/ext/deflate_ruby/libdeflate/gzip_compress.c +90 -0
  36. data/ext/deflate_ruby/libdeflate/gzip_constants.h +45 -0
  37. data/ext/deflate_ruby/libdeflate/gzip_decompress.c +144 -0
  38. data/ext/deflate_ruby/libdeflate/hc_matchfinder.h +401 -0
  39. data/ext/deflate_ruby/libdeflate/ht_matchfinder.h +234 -0
  40. data/ext/deflate_ruby/libdeflate/lib_common.h +106 -0
  41. data/ext/deflate_ruby/libdeflate/libdeflate.h +2 -2
  42. data/ext/deflate_ruby/libdeflate/{lib/matchfinder_common.h → matchfinder_common.h} +3 -3
  43. data/ext/deflate_ruby/libdeflate/x86/adler32_impl.h +135 -0
  44. data/ext/deflate_ruby/libdeflate/x86/adler32_template.h +518 -0
  45. data/ext/deflate_ruby/libdeflate/x86/cpu_features.c +213 -0
  46. data/ext/deflate_ruby/libdeflate/x86/cpu_features.h +170 -0
  47. data/ext/deflate_ruby/libdeflate/x86/crc32_impl.h +159 -0
  48. data/ext/deflate_ruby/libdeflate/x86/crc32_pclmul_template.h +424 -0
  49. data/ext/deflate_ruby/libdeflate/x86/decompress_impl.h +57 -0
  50. data/ext/deflate_ruby/libdeflate.h +411 -0
  51. data/ext/deflate_ruby/matchfinder_common.h +224 -0
  52. data/ext/deflate_ruby/matchfinder_impl.h +122 -0
  53. data/ext/deflate_ruby/utils.c +141 -0
  54. data/ext/deflate_ruby/zlib_compress.c +82 -0
  55. data/ext/deflate_ruby/zlib_constants.h +21 -0
  56. data/ext/deflate_ruby/zlib_decompress.c +104 -0
  57. data/lib/deflate_ruby/version.rb +1 -1
  58. data/lib/deflate_ruby.rb +1 -63
  59. data/sig/deflate_ruby.rbs +4 -0
  60. data/test/test_deflate_ruby.rb +220 -0
  61. data/test/test_helper.rb +6 -0
  62. metadata +90 -144
  63. data/ext/deflate_ruby/libdeflate/CMakeLists.txt +0 -270
  64. data/ext/deflate_ruby/libdeflate/NEWS.md +0 -494
  65. data/ext/deflate_ruby/libdeflate/README.md +0 -228
  66. data/ext/deflate_ruby/libdeflate/libdeflate-config.cmake.in +0 -3
  67. data/ext/deflate_ruby/libdeflate/libdeflate.pc.in +0 -18
  68. data/ext/deflate_ruby/libdeflate/programs/CMakeLists.txt +0 -105
  69. data/ext/deflate_ruby/libdeflate/programs/benchmark.c +0 -696
  70. data/ext/deflate_ruby/libdeflate/programs/checksum.c +0 -218
  71. data/ext/deflate_ruby/libdeflate/programs/config.h.in +0 -19
  72. data/ext/deflate_ruby/libdeflate/programs/gzip.c +0 -688
  73. data/ext/deflate_ruby/libdeflate/programs/prog_util.c +0 -521
  74. data/ext/deflate_ruby/libdeflate/programs/prog_util.h +0 -225
  75. data/ext/deflate_ruby/libdeflate/programs/test_checksums.c +0 -200
  76. data/ext/deflate_ruby/libdeflate/programs/test_custom_malloc.c +0 -155
  77. data/ext/deflate_ruby/libdeflate/programs/test_incomplete_codes.c +0 -385
  78. data/ext/deflate_ruby/libdeflate/programs/test_invalid_streams.c +0 -130
  79. data/ext/deflate_ruby/libdeflate/programs/test_litrunlen_overflow.c +0 -72
  80. data/ext/deflate_ruby/libdeflate/programs/test_overread.c +0 -95
  81. data/ext/deflate_ruby/libdeflate/programs/test_slow_decompression.c +0 -472
  82. data/ext/deflate_ruby/libdeflate/programs/test_trailing_bytes.c +0 -151
  83. data/ext/deflate_ruby/libdeflate/programs/test_util.c +0 -237
  84. data/ext/deflate_ruby/libdeflate/programs/test_util.h +0 -61
  85. data/ext/deflate_ruby/libdeflate/programs/tgetopt.c +0 -118
  86. data/ext/deflate_ruby/libdeflate/scripts/android_build.sh +0 -118
  87. data/ext/deflate_ruby/libdeflate/scripts/android_tests.sh +0 -69
  88. data/ext/deflate_ruby/libdeflate/scripts/benchmark.sh +0 -10
  89. data/ext/deflate_ruby/libdeflate/scripts/checksum.sh +0 -10
  90. data/ext/deflate_ruby/libdeflate/scripts/checksum_benchmarks.sh +0 -253
  91. data/ext/deflate_ruby/libdeflate/scripts/cmake-helper.sh +0 -17
  92. data/ext/deflate_ruby/libdeflate/scripts/deflate_benchmarks.sh +0 -119
  93. data/ext/deflate_ruby/libdeflate/scripts/exec_tests.sh +0 -38
  94. data/ext/deflate_ruby/libdeflate/scripts/gen-release-archives.sh +0 -37
  95. data/ext/deflate_ruby/libdeflate/scripts/gen_bitreverse_tab.py +0 -19
  96. data/ext/deflate_ruby/libdeflate/scripts/gen_crc32_multipliers.c +0 -199
  97. data/ext/deflate_ruby/libdeflate/scripts/gen_crc32_tables.c +0 -105
  98. data/ext/deflate_ruby/libdeflate/scripts/gen_default_litlen_costs.py +0 -44
  99. data/ext/deflate_ruby/libdeflate/scripts/gen_offset_slot_map.py +0 -29
  100. data/ext/deflate_ruby/libdeflate/scripts/gzip_tests.sh +0 -523
  101. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_compress/corpus/0 +0 -0
  102. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_compress/fuzz.c +0 -95
  103. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_decompress/corpus/0 +0 -3
  104. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_decompress/fuzz.c +0 -62
  105. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/fuzz.sh +0 -108
  106. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/gzip_decompress/corpus/0 +0 -0
  107. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/gzip_decompress/fuzz.c +0 -19
  108. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/zlib_decompress/corpus/0 +0 -3
  109. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/zlib_decompress/fuzz.c +0 -19
  110. data/ext/deflate_ruby/libdeflate/scripts/run_tests.sh +0 -416
  111. data/ext/deflate_ruby/libdeflate/scripts/toolchain-i686-w64-mingw32.cmake +0 -8
  112. data/ext/deflate_ruby/libdeflate/scripts/toolchain-x86_64-w64-mingw32.cmake +0 -8
  113. /data/ext/deflate_ruby/{libdeflate/lib/adler32.c → adler32.c} +0 -0
  114. /data/ext/deflate_ruby/{libdeflate/lib/x86/adler32_template.h → adler32_template.h} +0 -0
  115. /data/ext/deflate_ruby/{libdeflate/lib/bt_matchfinder.h → bt_matchfinder.h} +0 -0
  116. /data/ext/deflate_ruby/{libdeflate/lib/cpu_features_common.h → cpu_features_common.h} +0 -0
  117. /data/ext/deflate_ruby/{libdeflate/lib/crc32.c → crc32.c} +0 -0
  118. /data/ext/deflate_ruby/{libdeflate/lib/arm/crc32_pmull_helpers.h → crc32_pmull_helpers.h} +0 -0
  119. /data/ext/deflate_ruby/{libdeflate/lib/arm/crc32_pmull_wide.h → crc32_pmull_wide.h} +0 -0
  120. /data/ext/deflate_ruby/{libdeflate/lib/x86/decompress_impl.h → decompress_impl.h} +0 -0
  121. /data/ext/deflate_ruby/{libdeflate/lib/decompress_template.h → decompress_template.h} +0 -0
  122. /data/ext/deflate_ruby/{libdeflate/lib/deflate_compress.h → deflate_compress.h} +0 -0
  123. /data/ext/deflate_ruby/{libdeflate/lib/deflate_constants.h → deflate_constants.h} +0 -0
  124. /data/ext/deflate_ruby/{libdeflate/lib/deflate_decompress.c → deflate_decompress.c} +0 -0
  125. /data/ext/deflate_ruby/{libdeflate/lib/gzip_compress.c → gzip_compress.c} +0 -0
  126. /data/ext/deflate_ruby/{libdeflate/lib/gzip_constants.h → gzip_constants.h} +0 -0
  127. /data/ext/deflate_ruby/{libdeflate/lib/gzip_decompress.c → gzip_decompress.c} +0 -0
  128. /data/ext/deflate_ruby/{libdeflate/lib/hc_matchfinder.h → hc_matchfinder.h} +0 -0
  129. /data/ext/deflate_ruby/{libdeflate/lib/ht_matchfinder.h → ht_matchfinder.h} +0 -0
  130. /data/ext/deflate_ruby/{libdeflate/lib/lib_common.h → lib_common.h} +0 -0
  131. /data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/cpu_features.c +0 -0
  132. /data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/cpu_features.h +0 -0
  133. /data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/matchfinder_impl.h +0 -0
  134. /data/ext/deflate_ruby/libdeflate/{lib/riscv → riscv}/matchfinder_impl.h +0 -0
  135. /data/ext/deflate_ruby/libdeflate/{lib/utils.c → utils.c} +0 -0
  136. /data/ext/deflate_ruby/libdeflate/{lib/x86 → x86}/matchfinder_impl.h +0 -0
  137. /data/ext/deflate_ruby/libdeflate/{lib/zlib_compress.c → zlib_compress.c} +0 -0
  138. /data/ext/deflate_ruby/libdeflate/{lib/zlib_constants.h → zlib_constants.h} +0 -0
  139. /data/ext/deflate_ruby/libdeflate/{lib/zlib_decompress.c → zlib_decompress.c} +0 -0
@@ -0,0 +1,90 @@
1
+ /*
2
+ * gzip_compress.c - compress with a gzip wrapper
3
+ *
4
+ * Copyright 2016 Eric Biggers
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person
7
+ * obtaining a copy of this software and associated documentation
8
+ * files (the "Software"), to deal in the Software without
9
+ * restriction, including without limitation the rights to use,
10
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ * copies of the Software, and to permit persons to whom the
12
+ * Software is furnished to do so, subject to the following
13
+ * conditions:
14
+ *
15
+ * The above copyright notice and this permission notice shall be
16
+ * included in all copies or substantial portions of the Software.
17
+ *
18
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
+ * OTHER DEALINGS IN THE SOFTWARE.
26
+ */
27
+
28
+ #include "deflate_compress.h"
29
+ #include "gzip_constants.h"
30
+
31
+ LIBDEFLATEAPI size_t
32
+ libdeflate_gzip_compress(struct libdeflate_compressor *c,
33
+ const void *in, size_t in_nbytes,
34
+ void *out, size_t out_nbytes_avail)
35
+ {
36
+ u8 *out_next = out;
37
+ unsigned compression_level;
38
+ u8 xfl;
39
+ size_t deflate_size;
40
+
41
+ if (out_nbytes_avail <= GZIP_MIN_OVERHEAD)
42
+ return 0;
43
+
44
+ /* ID1 */
45
+ *out_next++ = GZIP_ID1;
46
+ /* ID2 */
47
+ *out_next++ = GZIP_ID2;
48
+ /* CM */
49
+ *out_next++ = GZIP_CM_DEFLATE;
50
+ /* FLG */
51
+ *out_next++ = 0;
52
+ /* MTIME */
53
+ put_unaligned_le32(GZIP_MTIME_UNAVAILABLE, out_next);
54
+ out_next += 4;
55
+ /* XFL */
56
+ xfl = 0;
57
+ compression_level = libdeflate_get_compression_level(c);
58
+ if (compression_level < 2)
59
+ xfl |= GZIP_XFL_FASTEST_COMPRESSION;
60
+ else if (compression_level >= 8)
61
+ xfl |= GZIP_XFL_SLOWEST_COMPRESSION;
62
+ *out_next++ = xfl;
63
+ /* OS */
64
+ *out_next++ = GZIP_OS_UNKNOWN; /* OS */
65
+
66
+ /* Compressed data */
67
+ deflate_size = libdeflate_deflate_compress(c, in, in_nbytes, out_next,
68
+ out_nbytes_avail - GZIP_MIN_OVERHEAD);
69
+ if (deflate_size == 0)
70
+ return 0;
71
+ out_next += deflate_size;
72
+
73
+ /* CRC32 */
74
+ put_unaligned_le32(libdeflate_crc32(0, in, in_nbytes), out_next);
75
+ out_next += 4;
76
+
77
+ /* ISIZE */
78
+ put_unaligned_le32((u32)in_nbytes, out_next);
79
+ out_next += 4;
80
+
81
+ return out_next - (u8 *)out;
82
+ }
83
+
84
+ LIBDEFLATEAPI size_t
85
+ libdeflate_gzip_compress_bound(struct libdeflate_compressor *c,
86
+ size_t in_nbytes)
87
+ {
88
+ return GZIP_MIN_OVERHEAD +
89
+ libdeflate_deflate_compress_bound(c, in_nbytes);
90
+ }
@@ -0,0 +1,45 @@
1
+ /*
2
+ * gzip_constants.h - constants for the gzip wrapper format
3
+ */
4
+
5
+ #ifndef LIB_GZIP_CONSTANTS_H
6
+ #define LIB_GZIP_CONSTANTS_H
7
+
8
+ #define GZIP_MIN_HEADER_SIZE 10
9
+ #define GZIP_FOOTER_SIZE 8
10
+ #define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
11
+
12
+ #define GZIP_ID1 0x1F
13
+ #define GZIP_ID2 0x8B
14
+
15
+ #define GZIP_CM_DEFLATE 8
16
+
17
+ #define GZIP_FTEXT 0x01
18
+ #define GZIP_FHCRC 0x02
19
+ #define GZIP_FEXTRA 0x04
20
+ #define GZIP_FNAME 0x08
21
+ #define GZIP_FCOMMENT 0x10
22
+ #define GZIP_FRESERVED 0xE0
23
+
24
+ #define GZIP_MTIME_UNAVAILABLE 0
25
+
26
+ #define GZIP_XFL_SLOWEST_COMPRESSION 0x02
27
+ #define GZIP_XFL_FASTEST_COMPRESSION 0x04
28
+
29
+ #define GZIP_OS_FAT 0
30
+ #define GZIP_OS_AMIGA 1
31
+ #define GZIP_OS_VMS 2
32
+ #define GZIP_OS_UNIX 3
33
+ #define GZIP_OS_VM_CMS 4
34
+ #define GZIP_OS_ATARI_TOS 5
35
+ #define GZIP_OS_HPFS 6
36
+ #define GZIP_OS_MACINTOSH 7
37
+ #define GZIP_OS_Z_SYSTEM 8
38
+ #define GZIP_OS_CP_M 9
39
+ #define GZIP_OS_TOPS_20 10
40
+ #define GZIP_OS_NTFS 11
41
+ #define GZIP_OS_QDOS 12
42
+ #define GZIP_OS_RISCOS 13
43
+ #define GZIP_OS_UNKNOWN 255
44
+
45
+ #endif /* LIB_GZIP_CONSTANTS_H */
@@ -0,0 +1,144 @@
1
+ /*
2
+ * gzip_decompress.c - decompress with a gzip wrapper
3
+ *
4
+ * Copyright 2016 Eric Biggers
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person
7
+ * obtaining a copy of this software and associated documentation
8
+ * files (the "Software"), to deal in the Software without
9
+ * restriction, including without limitation the rights to use,
10
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ * copies of the Software, and to permit persons to whom the
12
+ * Software is furnished to do so, subject to the following
13
+ * conditions:
14
+ *
15
+ * The above copyright notice and this permission notice shall be
16
+ * included in all copies or substantial portions of the Software.
17
+ *
18
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
+ * OTHER DEALINGS IN THE SOFTWARE.
26
+ */
27
+
28
+ #include "lib_common.h"
29
+ #include "gzip_constants.h"
30
+
31
+ LIBDEFLATEAPI enum libdeflate_result
32
+ libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *d,
33
+ const void *in, size_t in_nbytes,
34
+ void *out, size_t out_nbytes_avail,
35
+ size_t *actual_in_nbytes_ret,
36
+ size_t *actual_out_nbytes_ret)
37
+ {
38
+ const u8 *in_next = in;
39
+ const u8 * const in_end = in_next + in_nbytes;
40
+ u8 flg;
41
+ size_t actual_in_nbytes;
42
+ size_t actual_out_nbytes;
43
+ enum libdeflate_result result;
44
+
45
+ if (in_nbytes < GZIP_MIN_OVERHEAD)
46
+ return LIBDEFLATE_BAD_DATA;
47
+
48
+ /* ID1 */
49
+ if (*in_next++ != GZIP_ID1)
50
+ return LIBDEFLATE_BAD_DATA;
51
+ /* ID2 */
52
+ if (*in_next++ != GZIP_ID2)
53
+ return LIBDEFLATE_BAD_DATA;
54
+ /* CM */
55
+ if (*in_next++ != GZIP_CM_DEFLATE)
56
+ return LIBDEFLATE_BAD_DATA;
57
+ flg = *in_next++;
58
+ /* MTIME */
59
+ in_next += 4;
60
+ /* XFL */
61
+ in_next += 1;
62
+ /* OS */
63
+ in_next += 1;
64
+
65
+ if (flg & GZIP_FRESERVED)
66
+ return LIBDEFLATE_BAD_DATA;
67
+
68
+ /* Extra field */
69
+ if (flg & GZIP_FEXTRA) {
70
+ u16 xlen = get_unaligned_le16(in_next);
71
+ in_next += 2;
72
+
73
+ if (in_end - in_next < (u32)xlen + GZIP_FOOTER_SIZE)
74
+ return LIBDEFLATE_BAD_DATA;
75
+
76
+ in_next += xlen;
77
+ }
78
+
79
+ /* Original file name (zero terminated) */
80
+ if (flg & GZIP_FNAME) {
81
+ while (*in_next++ != 0 && in_next != in_end)
82
+ ;
83
+ if (in_end - in_next < GZIP_FOOTER_SIZE)
84
+ return LIBDEFLATE_BAD_DATA;
85
+ }
86
+
87
+ /* File comment (zero terminated) */
88
+ if (flg & GZIP_FCOMMENT) {
89
+ while (*in_next++ != 0 && in_next != in_end)
90
+ ;
91
+ if (in_end - in_next < GZIP_FOOTER_SIZE)
92
+ return LIBDEFLATE_BAD_DATA;
93
+ }
94
+
95
+ /* CRC16 for gzip header */
96
+ if (flg & GZIP_FHCRC) {
97
+ in_next += 2;
98
+ if (in_end - in_next < GZIP_FOOTER_SIZE)
99
+ return LIBDEFLATE_BAD_DATA;
100
+ }
101
+
102
+ /* Compressed data */
103
+ result = libdeflate_deflate_decompress_ex(d, in_next,
104
+ in_end - GZIP_FOOTER_SIZE - in_next,
105
+ out, out_nbytes_avail,
106
+ &actual_in_nbytes,
107
+ actual_out_nbytes_ret);
108
+ if (result != LIBDEFLATE_SUCCESS)
109
+ return result;
110
+
111
+ if (actual_out_nbytes_ret)
112
+ actual_out_nbytes = *actual_out_nbytes_ret;
113
+ else
114
+ actual_out_nbytes = out_nbytes_avail;
115
+
116
+ in_next += actual_in_nbytes;
117
+
118
+ /* CRC32 */
119
+ if (libdeflate_crc32(0, out, actual_out_nbytes) !=
120
+ get_unaligned_le32(in_next))
121
+ return LIBDEFLATE_BAD_DATA;
122
+ in_next += 4;
123
+
124
+ /* ISIZE */
125
+ if ((u32)actual_out_nbytes != get_unaligned_le32(in_next))
126
+ return LIBDEFLATE_BAD_DATA;
127
+ in_next += 4;
128
+
129
+ if (actual_in_nbytes_ret)
130
+ *actual_in_nbytes_ret = in_next - (u8 *)in;
131
+
132
+ return LIBDEFLATE_SUCCESS;
133
+ }
134
+
135
+ LIBDEFLATEAPI enum libdeflate_result
136
+ libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
137
+ const void *in, size_t in_nbytes,
138
+ void *out, size_t out_nbytes_avail,
139
+ size_t *actual_out_nbytes_ret)
140
+ {
141
+ return libdeflate_gzip_decompress_ex(d, in, in_nbytes,
142
+ out, out_nbytes_avail,
143
+ NULL, actual_out_nbytes_ret);
144
+ }
@@ -0,0 +1,401 @@
1
+ /*
2
+ * hc_matchfinder.h - Lempel-Ziv matchfinding with a hash table of linked lists
3
+ *
4
+ * Copyright 2016 Eric Biggers
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person
7
+ * obtaining a copy of this software and associated documentation
8
+ * files (the "Software"), to deal in the Software without
9
+ * restriction, including without limitation the rights to use,
10
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ * copies of the Software, and to permit persons to whom the
12
+ * Software is furnished to do so, subject to the following
13
+ * conditions:
14
+ *
15
+ * The above copyright notice and this permission notice shall be
16
+ * included in all copies or substantial portions of the Software.
17
+ *
18
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
+ * OTHER DEALINGS IN THE SOFTWARE.
26
+ *
27
+ * ---------------------------------------------------------------------------
28
+ *
29
+ * Algorithm
30
+ *
31
+ * This is a Hash Chains (hc) based matchfinder.
32
+ *
33
+ * The main data structure is a hash table where each hash bucket contains a
34
+ * linked list (or "chain") of sequences whose first 4 bytes share the same hash
35
+ * code. Each sequence is identified by its starting position in the input
36
+ * buffer.
37
+ *
38
+ * The algorithm processes the input buffer sequentially. At each byte
39
+ * position, the hash code of the first 4 bytes of the sequence beginning at
40
+ * that position (the sequence being matched against) is computed. This
41
+ * identifies the hash bucket to use for that position. Then, this hash
42
+ * bucket's linked list is searched for matches. Then, a new linked list node
43
+ * is created to represent the current sequence and is prepended to the list.
44
+ *
45
+ * This algorithm has several useful properties:
46
+ *
47
+ * - It only finds true Lempel-Ziv matches; i.e., those where the matching
48
+ * sequence occurs prior to the sequence being matched against.
49
+ *
50
+ * - The sequences in each linked list are always sorted by decreasing starting
51
+ * position. Therefore, the closest (smallest offset) matches are found
52
+ * first, which in many compression formats tend to be the cheapest to encode.
53
+ *
54
+ * - Although fast running time is not guaranteed due to the possibility of the
55
+ * lists getting very long, the worst degenerate behavior can be easily
56
+ * prevented by capping the number of nodes searched at each position.
57
+ *
58
+ * - If the compressor decides not to search for matches at a certain position,
59
+ * then that position can be quickly inserted without searching the list.
60
+ *
61
+ * - The algorithm is adaptable to sliding windows: just store the positions
62
+ * relative to a "base" value that is updated from time to time, and stop
63
+ * searching each list when the sequences get too far away.
64
+ *
65
+ * ----------------------------------------------------------------------------
66
+ *
67
+ * Optimizations
68
+ *
69
+ * The main hash table and chains handle length 4+ matches. Length 3 matches
70
+ * are handled by a separate hash table with no chains. This works well for
71
+ * typical "greedy" or "lazy"-style compressors, where length 3 matches are
72
+ * often only helpful if they have small offsets. Instead of searching a full
73
+ * chain for length 3+ matches, the algorithm just checks for one close length 3
74
+ * match, then focuses on finding length 4+ matches.
75
+ *
76
+ * The longest_match() and skip_bytes() functions are inlined into the
77
+ * compressors that use them. This isn't just about saving the overhead of a
78
+ * function call. These functions are intended to be called from the inner
79
+ * loops of compressors, where giving the compiler more control over register
80
+ * allocation is very helpful. There is also significant benefit to be gained
81
+ * from allowing the CPU to predict branches independently at each call site.
82
+ * For example, "lazy"-style compressors can be written with two calls to
83
+ * longest_match(), each of which starts with a different 'best_len' and
84
+ * therefore has significantly different performance characteristics.
85
+ *
86
+ * Although any hash function can be used, a multiplicative hash is fast and
87
+ * works well.
88
+ *
89
+ * On some processors, it is significantly faster to extend matches by whole
90
+ * words (32 or 64 bits) instead of by individual bytes. For this to be the
91
+ * case, the processor must implement unaligned memory accesses efficiently and
92
+ * must have either a fast "find first set bit" instruction or a fast "find last
93
+ * set bit" instruction, depending on the processor's endianness.
94
+ *
95
+ * The code uses one loop for finding the first match and one loop for finding a
96
+ * longer match. Each of these loops is tuned for its respective task and in
97
+ * combination are faster than a single generalized loop that handles both
98
+ * tasks.
99
+ *
100
+ * The code also uses a tight inner loop that only compares the last and first
101
+ * bytes of a potential match. It is only when these bytes match that a full
102
+ * match extension is attempted.
103
+ *
104
+ * ----------------------------------------------------------------------------
105
+ */
106
+
107
+ #ifndef LIB_HC_MATCHFINDER_H
108
+ #define LIB_HC_MATCHFINDER_H
109
+
110
+ #include "matchfinder_common.h"
111
+
112
+ #define HC_MATCHFINDER_HASH3_ORDER 15
113
+ #define HC_MATCHFINDER_HASH4_ORDER 16
114
+
115
+ #define HC_MATCHFINDER_TOTAL_HASH_SIZE \
116
+ (((1UL << HC_MATCHFINDER_HASH3_ORDER) + \
117
+ (1UL << HC_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
118
+
119
+ struct MATCHFINDER_ALIGNED hc_matchfinder {
120
+
121
+ /* The hash table for finding length 3 matches */
122
+ mf_pos_t hash3_tab[1UL << HC_MATCHFINDER_HASH3_ORDER];
123
+
124
+ /* The hash table which contains the first nodes of the linked lists for
125
+ * finding length 4+ matches */
126
+ mf_pos_t hash4_tab[1UL << HC_MATCHFINDER_HASH4_ORDER];
127
+
128
+ /* The "next node" references for the linked lists. The "next node" of
129
+ * the node for the sequence with position 'pos' is 'next_tab[pos]'. */
130
+ mf_pos_t next_tab[MATCHFINDER_WINDOW_SIZE];
131
+ };
132
+
133
+ /* Prepare the matchfinder for a new input buffer. */
134
+ static forceinline void
135
+ hc_matchfinder_init(struct hc_matchfinder *mf)
136
+ {
137
+ STATIC_ASSERT(HC_MATCHFINDER_TOTAL_HASH_SIZE %
138
+ MATCHFINDER_SIZE_ALIGNMENT == 0);
139
+
140
+ matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_SIZE);
141
+ }
142
+
143
+ static forceinline void
144
+ hc_matchfinder_slide_window(struct hc_matchfinder *mf)
145
+ {
146
+ STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
147
+
148
+ matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
149
+ }
150
+
151
+ /*
152
+ * Find the longest match longer than 'best_len' bytes.
153
+ *
154
+ * @mf
155
+ * The matchfinder structure.
156
+ * @in_base_p
157
+ * Location of a pointer which points to the place in the input data the
158
+ * matchfinder currently stores positions relative to. This may be updated
159
+ * by this function.
160
+ * @in_next
161
+ * Pointer to the next position in the input buffer, i.e. the sequence
162
+ * being matched against.
163
+ * @best_len
164
+ * Require a match longer than this length.
165
+ * @max_len
166
+ * The maximum permissible match length at this position.
167
+ * @nice_len
168
+ * Stop searching if a match of at least this length is found.
169
+ * Must be <= @max_len.
170
+ * @max_search_depth
171
+ * Limit on the number of potential matches to consider. Must be >= 1.
172
+ * @next_hashes
173
+ * The precomputed hash codes for the sequence beginning at @in_next.
174
+ * These will be used and then updated with the precomputed hashcodes for
175
+ * the sequence beginning at @in_next + 1.
176
+ * @offset_ret
177
+ * If a match is found, its offset is returned in this location.
178
+ *
179
+ * Return the length of the match found, or 'best_len' if no match longer than
180
+ * 'best_len' was found.
181
+ */
182
+ static forceinline u32
183
+ hc_matchfinder_longest_match(struct hc_matchfinder * const mf,
184
+ const u8 ** const in_base_p,
185
+ const u8 * const in_next,
186
+ u32 best_len,
187
+ const u32 max_len,
188
+ const u32 nice_len,
189
+ const u32 max_search_depth,
190
+ u32 * const next_hashes,
191
+ u32 * const offset_ret)
192
+ {
193
+ u32 depth_remaining = max_search_depth;
194
+ const u8 *best_matchptr = in_next;
195
+ mf_pos_t cur_node3, cur_node4;
196
+ u32 hash3, hash4;
197
+ u32 next_hashseq;
198
+ u32 seq4;
199
+ const u8 *matchptr;
200
+ u32 len;
201
+ u32 cur_pos = in_next - *in_base_p;
202
+ const u8 *in_base;
203
+ mf_pos_t cutoff;
204
+
205
+ if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
206
+ hc_matchfinder_slide_window(mf);
207
+ *in_base_p += MATCHFINDER_WINDOW_SIZE;
208
+ cur_pos = 0;
209
+ }
210
+
211
+ in_base = *in_base_p;
212
+ cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
213
+
214
+ if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */
215
+ goto out;
216
+
217
+ /* Get the precomputed hash codes. */
218
+ hash3 = next_hashes[0];
219
+ hash4 = next_hashes[1];
220
+
221
+ /* From the hash buckets, get the first node of each linked list. */
222
+ cur_node3 = mf->hash3_tab[hash3];
223
+ cur_node4 = mf->hash4_tab[hash4];
224
+
225
+ /* Update for length 3 matches. This replaces the singleton node in the
226
+ * 'hash3' bucket with the node for the current sequence. */
227
+ mf->hash3_tab[hash3] = cur_pos;
228
+
229
+ /* Update for length 4 matches. This prepends the node for the current
230
+ * sequence to the linked list in the 'hash4' bucket. */
231
+ mf->hash4_tab[hash4] = cur_pos;
232
+ mf->next_tab[cur_pos] = cur_node4;
233
+
234
+ /* Compute the next hash codes. */
235
+ next_hashseq = get_unaligned_le32(in_next + 1);
236
+ next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
237
+ next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
238
+ prefetchw(&mf->hash3_tab[next_hashes[0]]);
239
+ prefetchw(&mf->hash4_tab[next_hashes[1]]);
240
+
241
+ if (best_len < 4) { /* No match of length >= 4 found yet? */
242
+
243
+ /* Check for a length 3 match if needed. */
244
+
245
+ if (cur_node3 <= cutoff)
246
+ goto out;
247
+
248
+ seq4 = load_u32_unaligned(in_next);
249
+
250
+ if (best_len < 3) {
251
+ matchptr = &in_base[cur_node3];
252
+ if (load_u24_unaligned(matchptr) == loaded_u32_to_u24(seq4)) {
253
+ best_len = 3;
254
+ best_matchptr = matchptr;
255
+ }
256
+ }
257
+
258
+ /* Check for a length 4 match. */
259
+
260
+ if (cur_node4 <= cutoff)
261
+ goto out;
262
+
263
+ for (;;) {
264
+ /* No length 4 match found yet. Check the first 4 bytes. */
265
+ matchptr = &in_base[cur_node4];
266
+
267
+ if (load_u32_unaligned(matchptr) == seq4)
268
+ break;
269
+
270
+ /* The first 4 bytes did not match. Keep trying. */
271
+ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
272
+ if (cur_node4 <= cutoff || !--depth_remaining)
273
+ goto out;
274
+ }
275
+
276
+ /* Found a match of length >= 4. Extend it to its full length. */
277
+ best_matchptr = matchptr;
278
+ best_len = lz_extend(in_next, best_matchptr, 4, max_len);
279
+ if (best_len >= nice_len)
280
+ goto out;
281
+ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
282
+ if (cur_node4 <= cutoff || !--depth_remaining)
283
+ goto out;
284
+ } else {
285
+ if (cur_node4 <= cutoff || best_len >= nice_len)
286
+ goto out;
287
+ }
288
+
289
+ /* Check for matches of length >= 5. */
290
+
291
+ for (;;) {
292
+ for (;;) {
293
+ matchptr = &in_base[cur_node4];
294
+
295
+ /* Already found a length 4 match. Try for a longer
296
+ * match; start by checking either the last 4 bytes and
297
+ * the first 4 bytes, or the last byte. (The last byte,
298
+ * the one which would extend the match length by 1, is
299
+ * the most important.) */
300
+ #if UNALIGNED_ACCESS_IS_FAST
301
+ if ((load_u32_unaligned(matchptr + best_len - 3) ==
302
+ load_u32_unaligned(in_next + best_len - 3)) &&
303
+ (load_u32_unaligned(matchptr) ==
304
+ load_u32_unaligned(in_next)))
305
+ #else
306
+ if (matchptr[best_len] == in_next[best_len])
307
+ #endif
308
+ break;
309
+
310
+ /* Continue to the next node in the list. */
311
+ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
312
+ if (cur_node4 <= cutoff || !--depth_remaining)
313
+ goto out;
314
+ }
315
+
316
+ #if UNALIGNED_ACCESS_IS_FAST
317
+ len = 4;
318
+ #else
319
+ len = 0;
320
+ #endif
321
+ len = lz_extend(in_next, matchptr, len, max_len);
322
+ if (len > best_len) {
323
+ /* This is the new longest match. */
324
+ best_len = len;
325
+ best_matchptr = matchptr;
326
+ if (best_len >= nice_len)
327
+ goto out;
328
+ }
329
+
330
+ /* Continue to the next node in the list. */
331
+ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
332
+ if (cur_node4 <= cutoff || !--depth_remaining)
333
+ goto out;
334
+ }
335
+ out:
336
+ *offset_ret = in_next - best_matchptr;
337
+ return best_len;
338
+ }
339
+
340
+ /*
341
+ * Advance the matchfinder, but don't search for matches.
342
+ *
343
+ * @mf
344
+ * The matchfinder structure.
345
+ * @in_base_p
346
+ * Location of a pointer which points to the place in the input data the
347
+ * matchfinder currently stores positions relative to. This may be updated
348
+ * by this function.
349
+ * @in_next
350
+ * Pointer to the next position in the input buffer.
351
+ * @in_end
352
+ * Pointer to the end of the input buffer.
353
+ * @count
354
+ * The number of bytes to advance. Must be > 0.
355
+ * @next_hashes
356
+ * The precomputed hash codes for the sequence beginning at @in_next.
357
+ * These will be used and then updated with the precomputed hashcodes for
358
+ * the sequence beginning at @in_next + @count.
359
+ */
360
+ static forceinline void
361
+ hc_matchfinder_skip_bytes(struct hc_matchfinder * const mf,
362
+ const u8 ** const in_base_p,
363
+ const u8 *in_next,
364
+ const u8 * const in_end,
365
+ const u32 count,
366
+ u32 * const next_hashes)
367
+ {
368
+ u32 cur_pos;
369
+ u32 hash3, hash4;
370
+ u32 next_hashseq;
371
+ u32 remaining = count;
372
+
373
+ if (unlikely(count + 5 > in_end - in_next))
374
+ return;
375
+
376
+ cur_pos = in_next - *in_base_p;
377
+ hash3 = next_hashes[0];
378
+ hash4 = next_hashes[1];
379
+ do {
380
+ if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
381
+ hc_matchfinder_slide_window(mf);
382
+ *in_base_p += MATCHFINDER_WINDOW_SIZE;
383
+ cur_pos = 0;
384
+ }
385
+ mf->hash3_tab[hash3] = cur_pos;
386
+ mf->next_tab[cur_pos] = mf->hash4_tab[hash4];
387
+ mf->hash4_tab[hash4] = cur_pos;
388
+
389
+ next_hashseq = get_unaligned_le32(++in_next);
390
+ hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
391
+ hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
392
+ cur_pos++;
393
+ } while (--remaining);
394
+
395
+ prefetchw(&mf->hash3_tab[hash3]);
396
+ prefetchw(&mf->hash4_tab[hash4]);
397
+ next_hashes[0] = hash3;
398
+ next_hashes[1] = hash4;
399
+ }
400
+
401
+ #endif /* LIB_HC_MATCHFINDER_H */