libdeflate 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.rubocop.yml +1 -0
  6. data/.rubocop_todo.yml +9 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +52 -0
  11. data/Rakefile +15 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/ext/libdeflate/extconf.rb +14 -0
  15. data/ext/libdeflate/libdeflate/.gitignore +19 -0
  16. data/ext/libdeflate/libdeflate/COPYING +21 -0
  17. data/ext/libdeflate/libdeflate/Makefile +231 -0
  18. data/ext/libdeflate/libdeflate/Makefile.msc +64 -0
  19. data/ext/libdeflate/libdeflate/NEWS +57 -0
  20. data/ext/libdeflate/libdeflate/README.md +170 -0
  21. data/ext/libdeflate/libdeflate/common/common_defs.h +351 -0
  22. data/ext/libdeflate/libdeflate/common/compiler_gcc.h +134 -0
  23. data/ext/libdeflate/libdeflate/common/compiler_msc.h +95 -0
  24. data/ext/libdeflate/libdeflate/lib/adler32.c +213 -0
  25. data/ext/libdeflate/libdeflate/lib/adler32_impl.h +281 -0
  26. data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +57 -0
  27. data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +13 -0
  28. data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +357 -0
  29. data/ext/libdeflate/libdeflate/lib/crc32.c +368 -0
  30. data/ext/libdeflate/libdeflate/lib/crc32_impl.h +286 -0
  31. data/ext/libdeflate/libdeflate/lib/crc32_table.h +526 -0
  32. data/ext/libdeflate/libdeflate/lib/decompress_impl.h +404 -0
  33. data/ext/libdeflate/libdeflate/lib/deflate_compress.c +2817 -0
  34. data/ext/libdeflate/libdeflate/lib/deflate_compress.h +14 -0
  35. data/ext/libdeflate/libdeflate/lib/deflate_constants.h +66 -0
  36. data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +889 -0
  37. data/ext/libdeflate/libdeflate/lib/gzip_compress.c +95 -0
  38. data/ext/libdeflate/libdeflate/lib/gzip_constants.h +45 -0
  39. data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +130 -0
  40. data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +405 -0
  41. data/ext/libdeflate/libdeflate/lib/lib_common.h +35 -0
  42. data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +53 -0
  43. data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +205 -0
  44. data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +61 -0
  45. data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +53 -0
  46. data/ext/libdeflate/libdeflate/lib/unaligned.h +202 -0
  47. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +169 -0
  48. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +48 -0
  49. data/ext/libdeflate/libdeflate/lib/zlib_compress.c +87 -0
  50. data/ext/libdeflate/libdeflate/lib/zlib_constants.h +21 -0
  51. data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +91 -0
  52. data/ext/libdeflate/libdeflate/libdeflate.h +274 -0
  53. data/ext/libdeflate/libdeflate/programs/benchmark.c +558 -0
  54. data/ext/libdeflate/libdeflate/programs/checksum.c +197 -0
  55. data/ext/libdeflate/libdeflate/programs/detect.sh +62 -0
  56. data/ext/libdeflate/libdeflate/programs/gzip.c +603 -0
  57. data/ext/libdeflate/libdeflate/programs/prog_util.c +530 -0
  58. data/ext/libdeflate/libdeflate/programs/prog_util.h +162 -0
  59. data/ext/libdeflate/libdeflate/programs/test_checksums.c +135 -0
  60. data/ext/libdeflate/libdeflate/programs/tgetopt.c +118 -0
  61. data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +12 -0
  62. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +40 -0
  63. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
  64. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +28 -0
  65. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +3 -0
  66. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +28 -0
  67. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
  68. data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +14 -0
  69. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +28 -0
  70. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +3 -0
  71. data/ext/libdeflate/libdeflate/tools/android_build.sh +104 -0
  72. data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +76 -0
  73. data/ext/libdeflate/libdeflate/tools/exec_tests.sh +30 -0
  74. data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +108 -0
  75. data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +100 -0
  76. data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +412 -0
  77. data/ext/libdeflate/libdeflate/tools/make-windows-releases +21 -0
  78. data/ext/libdeflate/libdeflate/tools/mips_build.sh +9 -0
  79. data/ext/libdeflate/libdeflate/tools/msc_test.bat +3 -0
  80. data/ext/libdeflate/libdeflate/tools/pgo_build.sh +23 -0
  81. data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +37 -0
  82. data/ext/libdeflate/libdeflate/tools/run_tests.sh +305 -0
  83. data/ext/libdeflate/libdeflate/tools/windows_build.sh +10 -0
  84. data/ext/libdeflate/libdeflate_ext.c +389 -0
  85. data/ext/libdeflate/libdeflate_ext.h +8 -0
  86. data/lib/libdeflate.rb +2 -0
  87. data/lib/libdeflate/version.rb +3 -0
  88. data/libdeflate.gemspec +33 -0
  89. metadata +230 -0
@@ -0,0 +1,35 @@
1
+ /*
2
+ * lib_common.h - internal header included by all library code
3
+ */
4
+
5
+ #ifndef LIB_LIB_COMMON_H
6
+ #define LIB_LIB_COMMON_H
7
+
8
+ #ifdef LIBDEFLATE_H
9
+ # error "lib_common.h must always be included before libdeflate.h"
10
+ /* because BUILDING_LIBDEFLATE must be set first */
11
+ #endif
12
+
13
+ #define BUILDING_LIBDEFLATE
14
+
15
+ #include "common_defs.h"
16
+
17
+ /*
18
+ * Prefix with "_libdeflate_" all global symbols which are not part of the API.
19
+ * This avoids exposing overly generic names when libdeflate is built as a
20
+ * static library.
21
+ *
22
+ * Note that the chosen prefix is not really important and can be changed
23
+ * without breaking library users. It was just chosen so that the resulting
24
+ * symbol names are unlikely to conflict with those from any other software.
25
+ * Also note that this fixup has no useful effect when libdeflate is built as a
26
+ * shared library, since these symbols are not exported.
27
+ */
28
+ #define SYM_FIXUP(sym) _libdeflate_##sym
29
+ #define aligned_malloc SYM_FIXUP(aligned_malloc)
30
+ #define aligned_free SYM_FIXUP(aligned_free)
31
+ #define deflate_get_compression_level SYM_FIXUP(deflate_get_compression_level)
32
+ #define _x86_cpu_features SYM_FIXUP(_x86_cpu_features)
33
+ #define x86_setup_cpu_features SYM_FIXUP(x86_setup_cpu_features)
34
+
35
+ #endif /* LIB_LIB_COMMON_H */
@@ -0,0 +1,53 @@
1
+ /*
2
+ * matchfinder_avx2.h - matchfinding routines optimized for Intel AVX2 (Advanced
3
+ * Vector Extensions)
4
+ */
5
+
6
+ #include <immintrin.h>
7
+
8
+ static forceinline bool
9
+ matchfinder_init_avx2(mf_pos_t *data, size_t size)
10
+ {
11
+ __m256i v, *p;
12
+ size_t n;
13
+
14
+ if (size % sizeof(__m256i) * 4)
15
+ return false;
16
+
17
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
18
+ v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
19
+ p = (__m256i *)data;
20
+ n = size / (sizeof(__m256i) * 4);
21
+ do {
22
+ p[0] = v;
23
+ p[1] = v;
24
+ p[2] = v;
25
+ p[3] = v;
26
+ p += 4;
27
+ } while (--n);
28
+ return true;
29
+ }
30
+
31
+ static forceinline bool
32
+ matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
33
+ {
34
+ __m256i v, *p;
35
+ size_t n;
36
+
37
+ if ((size % sizeof(__m256i) * 4 != 0))
38
+ return false;
39
+
40
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
41
+ v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
42
+ p = (__m256i *)data;
43
+ n = size / (sizeof(__m256i) * 4);
44
+ do {
45
+ /* PADDSW: Add Packed Signed Integers With Signed Saturation */
46
+ p[0] = _mm256_adds_epi16(p[0], v);
47
+ p[1] = _mm256_adds_epi16(p[1], v);
48
+ p[2] = _mm256_adds_epi16(p[2], v);
49
+ p[3] = _mm256_adds_epi16(p[3], v);
50
+ p += 4;
51
+ } while (--n);
52
+ return true;
53
+ }
@@ -0,0 +1,205 @@
1
+ /*
2
+ * matchfinder_common.h - common code for Lempel-Ziv matchfinding
3
+ */
4
+
5
+ #ifndef LIB_MATCHFINDER_COMMON_H
6
+ #define LIB_MATCHFINDER_COMMON_H
7
+
8
+ #include "lib_common.h"
9
+ #include "unaligned.h"
10
+
11
+ #ifndef MATCHFINDER_WINDOW_ORDER
12
+ # error "MATCHFINDER_WINDOW_ORDER must be defined!"
13
+ #endif
14
+
15
+ #define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
16
+
17
+ typedef s16 mf_pos_t;
18
+
19
+ #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
20
+
21
+ #define MATCHFINDER_ALIGNMENT 8
22
+
23
+ #ifdef __AVX2__
24
+ # include "matchfinder_avx2.h"
25
+ # if MATCHFINDER_ALIGNMENT < 32
26
+ # undef MATCHFINDER_ALIGNMENT
27
+ # define MATCHFINDER_ALIGNMENT 32
28
+ # endif
29
+ #endif
30
+
31
+ #ifdef __SSE2__
32
+ # include "matchfinder_sse2.h"
33
+ # if MATCHFINDER_ALIGNMENT < 16
34
+ # undef MATCHFINDER_ALIGNMENT
35
+ # define MATCHFINDER_ALIGNMENT 16
36
+ # endif
37
+ #endif
38
+
39
+ #ifdef __ARM_NEON
40
+ # include "matchfinder_neon.h"
41
+ # if MATCHFINDER_ALIGNMENT < 16
42
+ # undef MATCHFINDER_ALIGNMENT
43
+ # define MATCHFINDER_ALIGNMENT 16
44
+ # endif
45
+ #endif
46
+
47
+ /*
48
+ * Initialize the hash table portion of the matchfinder.
49
+ *
50
+ * Essentially, this is an optimized memset().
51
+ *
52
+ * 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary.
53
+ */
54
+ static forceinline void
55
+ matchfinder_init(mf_pos_t *data, size_t num_entries)
56
+ {
57
+ size_t i;
58
+
59
+ #if defined(__AVX2__) && defined(_aligned_attribute)
60
+ if (matchfinder_init_avx2(data, num_entries * sizeof(data[0])))
61
+ return;
62
+ #endif
63
+
64
+ #if defined(__SSE2__) && defined(_aligned_attribute)
65
+ if (matchfinder_init_sse2(data, num_entries * sizeof(data[0])))
66
+ return;
67
+ #endif
68
+
69
+ #if defined(__ARM_NEON) && defined(_aligned_attribute)
70
+ if (matchfinder_init_neon(data, num_entries * sizeof(data[0])))
71
+ return;
72
+ #endif
73
+
74
+ for (i = 0; i < num_entries; i++)
75
+ data[i] = MATCHFINDER_INITVAL;
76
+ }
77
+
78
+ /*
79
+ * Slide the matchfinder by WINDOW_SIZE bytes.
80
+ *
81
+ * This must be called just after each WINDOW_SIZE bytes have been run through
82
+ * the matchfinder.
83
+ *
84
+ * This will subtract WINDOW_SIZE bytes from each entry in the array specified.
85
+ * The effect is that all entries are updated to be relative to the current
86
+ * position, rather than the position WINDOW_SIZE bytes prior.
87
+ *
88
+ * Underflow is detected and replaced with signed saturation. This ensures that
89
+ * once the sliding window has passed over a position, that position forever
90
+ * remains out of bounds.
91
+ *
92
+ * The array passed in must contain all matchfinder data that is
93
+ * position-relative. Concretely, this will include the hash table as well as
94
+ * the table of positions that is used to link together the sequences in each
95
+ * hash bucket. Note that in the latter table, the links are 1-ary in the case
96
+ * of "hash chains", and 2-ary in the case of "binary trees". In either case,
97
+ * the links need to be rebased in the same way.
98
+ */
99
+ static forceinline void
100
+ matchfinder_rebase(mf_pos_t *data, size_t num_entries)
101
+ {
102
+ size_t i;
103
+
104
+ #if defined(__AVX2__) && defined(_aligned_attribute)
105
+ if (matchfinder_rebase_avx2(data, num_entries * sizeof(data[0])))
106
+ return;
107
+ #endif
108
+
109
+ #if defined(__SSE2__) && defined(_aligned_attribute)
110
+ if (matchfinder_rebase_sse2(data, num_entries * sizeof(data[0])))
111
+ return;
112
+ #endif
113
+
114
+ #if defined(__ARM_NEON) && defined(_aligned_attribute)
115
+ if (matchfinder_rebase_neon(data, num_entries * sizeof(data[0])))
116
+ return;
117
+ #endif
118
+
119
+ if (MATCHFINDER_WINDOW_SIZE == 32768) {
120
+ /* Branchless version for 32768 byte windows. If the value was
121
+ * already negative, clear all bits except the sign bit; this
122
+ * changes the value to -32768. Otherwise, set the sign bit;
123
+ * this is equivalent to subtracting 32768. */
124
+ for (i = 0; i < num_entries; i++) {
125
+ u16 v = data[i];
126
+ u16 sign_bit = v & 0x8000;
127
+ v &= sign_bit - ((sign_bit >> 15) ^ 1);
128
+ v |= 0x8000;
129
+ data[i] = v;
130
+ }
131
+ return;
132
+ }
133
+
134
+ for (i = 0; i < num_entries; i++) {
135
+ if (data[i] >= 0)
136
+ data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
137
+ else
138
+ data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
139
+ }
140
+ }
141
+
142
+ /*
143
+ * The hash function: given a sequence prefix held in the low-order bits of a
144
+ * 32-bit value, multiply by a carefully-chosen large constant. Discard any
145
+ * bits of the product that don't fit in a 32-bit value, but take the
146
+ * next-highest @num_bits bits of the product as the hash value, as those have
147
+ * the most randomness.
148
+ */
149
+ static forceinline u32
150
+ lz_hash(u32 seq, unsigned num_bits)
151
+ {
152
+ return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
153
+ }
154
+
155
+ /*
156
+ * Return the number of bytes at @matchptr that match the bytes at @strptr, up
157
+ * to a maximum of @max_len. Initially, @start_len bytes are matched.
158
+ */
159
+ static forceinline unsigned
160
+ lz_extend(const u8 * const strptr, const u8 * const matchptr,
161
+ const unsigned start_len, const unsigned max_len)
162
+ {
163
+ unsigned len = start_len;
164
+ machine_word_t v_word;
165
+
166
+ if (UNALIGNED_ACCESS_IS_FAST) {
167
+
168
+ if (likely(max_len - len >= 4 * WORDBYTES)) {
169
+
170
+ #define COMPARE_WORD_STEP \
171
+ v_word = load_word_unaligned(&matchptr[len]) ^ \
172
+ load_word_unaligned(&strptr[len]); \
173
+ if (v_word != 0) \
174
+ goto word_differs; \
175
+ len += WORDBYTES; \
176
+
177
+ COMPARE_WORD_STEP
178
+ COMPARE_WORD_STEP
179
+ COMPARE_WORD_STEP
180
+ COMPARE_WORD_STEP
181
+ #undef COMPARE_WORD_STEP
182
+ }
183
+
184
+ while (len + WORDBYTES <= max_len) {
185
+ v_word = load_word_unaligned(&matchptr[len]) ^
186
+ load_word_unaligned(&strptr[len]);
187
+ if (v_word != 0)
188
+ goto word_differs;
189
+ len += WORDBYTES;
190
+ }
191
+ }
192
+
193
+ while (len < max_len && matchptr[len] == strptr[len])
194
+ len++;
195
+ return len;
196
+
197
+ word_differs:
198
+ if (CPU_IS_LITTLE_ENDIAN())
199
+ len += (bsfw(v_word) >> 3);
200
+ else
201
+ len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
202
+ return len;
203
+ }
204
+
205
+ #endif /* LIB_MATCHFINDER_COMMON_H */
@@ -0,0 +1,61 @@
1
+ /*
2
+ * matchfinder_neon.h - matchfinding routines optimized for ARM NEON (Advanced
3
+ * SIMD) instructions
4
+ */
5
+
6
+ #include <arm_neon.h>
7
+
8
+ static forceinline bool
9
+ matchfinder_init_neon(mf_pos_t *data, size_t size)
10
+ {
11
+ int16x8_t v, *p;
12
+ size_t n;
13
+
14
+ if (size % sizeof(int16x8_t) * 4)
15
+ return false;
16
+
17
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
18
+ v = (int16x8_t) {
19
+ MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
20
+ MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
21
+ MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
22
+ };
23
+ p = (int16x8_t *)data;
24
+ n = size / (sizeof(int16x8_t) * 4);
25
+ do {
26
+ p[0] = v;
27
+ p[1] = v;
28
+ p[2] = v;
29
+ p[3] = v;
30
+ p += 4;
31
+ } while (--n);
32
+ return true;
33
+ }
34
+
35
+ static forceinline bool
36
+ matchfinder_rebase_neon(mf_pos_t *data, size_t size)
37
+ {
38
+ int16x8_t v, *p;
39
+ size_t n;
40
+
41
+ if ((size % sizeof(int16x8_t) * 4 != 0))
42
+ return false;
43
+
44
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
45
+ v = (int16x8_t) {
46
+ (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
47
+ (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
48
+ (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
49
+ (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
50
+ };
51
+ p = (int16x8_t *)data;
52
+ n = size / (sizeof(int16x8_t) * 4);
53
+ do {
54
+ p[0] = vqaddq_s16(p[0], v);
55
+ p[1] = vqaddq_s16(p[1], v);
56
+ p[2] = vqaddq_s16(p[2], v);
57
+ p[3] = vqaddq_s16(p[3], v);
58
+ p += 4;
59
+ } while (--n);
60
+ return true;
61
+ }
@@ -0,0 +1,53 @@
1
+ /*
2
+ * matchfinder_sse2.h - matchfinding routines optimized for Intel SSE2
3
+ * (Streaming SIMD Extensions).
4
+ */
5
+
6
+ #include <emmintrin.h>
7
+
8
+ static forceinline bool
9
+ matchfinder_init_sse2(mf_pos_t *data, size_t size)
10
+ {
11
+ __m128i v, *p;
12
+ size_t n;
13
+
14
+ if (size % sizeof(__m128i) * 4)
15
+ return false;
16
+
17
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
18
+ v = _mm_set1_epi16(MATCHFINDER_INITVAL);
19
+ p = (__m128i *)data;
20
+ n = size / (sizeof(__m128i) * 4);
21
+ do {
22
+ p[0] = v;
23
+ p[1] = v;
24
+ p[2] = v;
25
+ p[3] = v;
26
+ p += 4;
27
+ } while (--n);
28
+ return true;
29
+ }
30
+
31
+ static forceinline bool
32
+ matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
33
+ {
34
+ __m128i v, *p;
35
+ size_t n;
36
+
37
+ if ((size % sizeof(__m128i) * 4 != 0))
38
+ return false;
39
+
40
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
41
+ v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
42
+ p = (__m128i *)data;
43
+ n = size / (sizeof(__m128i) * 4);
44
+ do {
45
+ /* PADDSW: Add Packed Signed Integers With Signed Saturation */
46
+ p[0] = _mm_adds_epi16(p[0], v);
47
+ p[1] = _mm_adds_epi16(p[1], v);
48
+ p[2] = _mm_adds_epi16(p[2], v);
49
+ p[3] = _mm_adds_epi16(p[3], v);
50
+ p += 4;
51
+ } while (--n);
52
+ return true;
53
+ }
@@ -0,0 +1,202 @@
1
+ /*
2
+ * unaligned.h - inline functions for unaligned memory accesses
3
+ */
4
+
5
+ #ifndef LIB_UNALIGNED_H
6
+ #define LIB_UNALIGNED_H
7
+
8
+ #include "lib_common.h"
9
+
10
+ /*
11
+ * Naming note:
12
+ *
13
+ * {load,store}_*_unaligned() deal with raw bytes without endianness conversion.
14
+ * {get,put}_unaligned_*() deal with a specific endianness.
15
+ */
16
+
17
+ DEFINE_UNALIGNED_TYPE(u16)
18
+ DEFINE_UNALIGNED_TYPE(u32)
19
+ DEFINE_UNALIGNED_TYPE(u64)
20
+ DEFINE_UNALIGNED_TYPE(machine_word_t)
21
+
22
+ #define load_word_unaligned load_machine_word_t_unaligned
23
+ #define store_word_unaligned store_machine_word_t_unaligned
24
+
25
+ /***** Unaligned loads *****/
26
+
27
+ static forceinline u16
28
+ get_unaligned_le16(const u8 *p)
29
+ {
30
+ if (UNALIGNED_ACCESS_IS_FAST)
31
+ return le16_bswap(load_u16_unaligned(p));
32
+ else
33
+ return ((u16)p[1] << 8) | p[0];
34
+ }
35
+
36
+ static forceinline u16
37
+ get_unaligned_be16(const u8 *p)
38
+ {
39
+ if (UNALIGNED_ACCESS_IS_FAST)
40
+ return be16_bswap(load_u16_unaligned(p));
41
+ else
42
+ return ((u16)p[0] << 8) | p[1];
43
+ }
44
+
45
+ static forceinline u32
46
+ get_unaligned_le32(const u8 *p)
47
+ {
48
+ if (UNALIGNED_ACCESS_IS_FAST)
49
+ return le32_bswap(load_u32_unaligned(p));
50
+ else
51
+ return ((u32)p[3] << 24) | ((u32)p[2] << 16) |
52
+ ((u32)p[1] << 8) | p[0];
53
+ }
54
+
55
+ static forceinline u32
56
+ get_unaligned_be32(const u8 *p)
57
+ {
58
+ if (UNALIGNED_ACCESS_IS_FAST)
59
+ return be32_bswap(load_u32_unaligned(p));
60
+ else
61
+ return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
62
+ ((u32)p[2] << 8) | p[3];
63
+ }
64
+
65
+ static forceinline u64
66
+ get_unaligned_le64(const u8 *p)
67
+ {
68
+ if (UNALIGNED_ACCESS_IS_FAST)
69
+ return le64_bswap(load_u64_unaligned(p));
70
+ else
71
+ return ((u64)p[7] << 56) | ((u64)p[6] << 48) |
72
+ ((u64)p[5] << 40) | ((u64)p[4] << 32) |
73
+ ((u64)p[3] << 24) | ((u64)p[2] << 16) |
74
+ ((u64)p[1] << 8) | p[0];
75
+ }
76
+
77
+ static forceinline machine_word_t
78
+ get_unaligned_leword(const u8 *p)
79
+ {
80
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
81
+ if (WORDBITS == 32)
82
+ return get_unaligned_le32(p);
83
+ else
84
+ return get_unaligned_le64(p);
85
+ }
86
+
87
+ /***** Unaligned stores *****/
88
+
89
+ static forceinline void
90
+ put_unaligned_le16(u16 v, u8 *p)
91
+ {
92
+ if (UNALIGNED_ACCESS_IS_FAST) {
93
+ store_u16_unaligned(le16_bswap(v), p);
94
+ } else {
95
+ p[0] = (u8)(v >> 0);
96
+ p[1] = (u8)(v >> 8);
97
+ }
98
+ }
99
+
100
+ static forceinline void
101
+ put_unaligned_be16(u16 v, u8 *p)
102
+ {
103
+ if (UNALIGNED_ACCESS_IS_FAST) {
104
+ store_u16_unaligned(be16_bswap(v), p);
105
+ } else {
106
+ p[0] = (u8)(v >> 8);
107
+ p[1] = (u8)(v >> 0);
108
+ }
109
+ }
110
+
111
+ static forceinline void
112
+ put_unaligned_le32(u32 v, u8 *p)
113
+ {
114
+ if (UNALIGNED_ACCESS_IS_FAST) {
115
+ store_u32_unaligned(le32_bswap(v), p);
116
+ } else {
117
+ p[0] = (u8)(v >> 0);
118
+ p[1] = (u8)(v >> 8);
119
+ p[2] = (u8)(v >> 16);
120
+ p[3] = (u8)(v >> 24);
121
+ }
122
+ }
123
+
124
+ static forceinline void
125
+ put_unaligned_be32(u32 v, u8 *p)
126
+ {
127
+ if (UNALIGNED_ACCESS_IS_FAST) {
128
+ store_u32_unaligned(be32_bswap(v), p);
129
+ } else {
130
+ p[0] = (u8)(v >> 24);
131
+ p[1] = (u8)(v >> 16);
132
+ p[2] = (u8)(v >> 8);
133
+ p[3] = (u8)(v >> 0);
134
+ }
135
+ }
136
+
137
+ static forceinline void
138
+ put_unaligned_le64(u64 v, u8 *p)
139
+ {
140
+ if (UNALIGNED_ACCESS_IS_FAST) {
141
+ store_u64_unaligned(le64_bswap(v), p);
142
+ } else {
143
+ p[0] = (u8)(v >> 0);
144
+ p[1] = (u8)(v >> 8);
145
+ p[2] = (u8)(v >> 16);
146
+ p[3] = (u8)(v >> 24);
147
+ p[4] = (u8)(v >> 32);
148
+ p[5] = (u8)(v >> 40);
149
+ p[6] = (u8)(v >> 48);
150
+ p[7] = (u8)(v >> 56);
151
+ }
152
+ }
153
+
154
+ static forceinline void
155
+ put_unaligned_leword(machine_word_t v, u8 *p)
156
+ {
157
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
158
+ if (WORDBITS == 32)
159
+ put_unaligned_le32(v, p);
160
+ else
161
+ put_unaligned_le64(v, p);
162
+ }
163
+
164
+ /***** 24-bit loads *****/
165
+
166
+ /*
167
+ * Given a 32-bit value that was loaded with the platform's native endianness,
168
+ * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
169
+ * bits contain the first 3 bytes, arranged in octets in a platform-dependent
170
+ * order, at the memory location from which the input 32-bit value was loaded.
171
+ */
172
+ static forceinline u32
173
+ loaded_u32_to_u24(u32 v)
174
+ {
175
+ if (CPU_IS_LITTLE_ENDIAN())
176
+ return v & 0xFFFFFF;
177
+ else
178
+ return v >> 8;
179
+ }
180
+
181
+ /*
182
+ * Load the next 3 bytes from the memory location @p into the 24 low-order bits
183
+ * of a 32-bit value. The order in which the 3 bytes will be arranged as octets
184
+ * in the 24 bits is platform-dependent. At least LOAD_U24_REQUIRED_NBYTES
185
+ * bytes must be available at @p; note that this may be more than 3.
186
+ */
187
+ static forceinline u32
188
+ load_u24_unaligned(const u8 *p)
189
+ {
190
+ #if UNALIGNED_ACCESS_IS_FAST
191
+ # define LOAD_U24_REQUIRED_NBYTES 4
192
+ return loaded_u32_to_u24(load_u32_unaligned(p));
193
+ #else
194
+ # define LOAD_U24_REQUIRED_NBYTES 3
195
+ if (CPU_IS_LITTLE_ENDIAN())
196
+ return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
197
+ else
198
+ return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
199
+ #endif
200
+ }
201
+
202
+ #endif /* LIB_UNALIGNED_H */