libdeflate 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.rubocop.yml +1 -0
  6. data/.rubocop_todo.yml +9 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +52 -0
  11. data/Rakefile +15 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/ext/libdeflate/extconf.rb +14 -0
  15. data/ext/libdeflate/libdeflate/.gitignore +19 -0
  16. data/ext/libdeflate/libdeflate/COPYING +21 -0
  17. data/ext/libdeflate/libdeflate/Makefile +231 -0
  18. data/ext/libdeflate/libdeflate/Makefile.msc +64 -0
  19. data/ext/libdeflate/libdeflate/NEWS +57 -0
  20. data/ext/libdeflate/libdeflate/README.md +170 -0
  21. data/ext/libdeflate/libdeflate/common/common_defs.h +351 -0
  22. data/ext/libdeflate/libdeflate/common/compiler_gcc.h +134 -0
  23. data/ext/libdeflate/libdeflate/common/compiler_msc.h +95 -0
  24. data/ext/libdeflate/libdeflate/lib/adler32.c +213 -0
  25. data/ext/libdeflate/libdeflate/lib/adler32_impl.h +281 -0
  26. data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +57 -0
  27. data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +13 -0
  28. data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +357 -0
  29. data/ext/libdeflate/libdeflate/lib/crc32.c +368 -0
  30. data/ext/libdeflate/libdeflate/lib/crc32_impl.h +286 -0
  31. data/ext/libdeflate/libdeflate/lib/crc32_table.h +526 -0
  32. data/ext/libdeflate/libdeflate/lib/decompress_impl.h +404 -0
  33. data/ext/libdeflate/libdeflate/lib/deflate_compress.c +2817 -0
  34. data/ext/libdeflate/libdeflate/lib/deflate_compress.h +14 -0
  35. data/ext/libdeflate/libdeflate/lib/deflate_constants.h +66 -0
  36. data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +889 -0
  37. data/ext/libdeflate/libdeflate/lib/gzip_compress.c +95 -0
  38. data/ext/libdeflate/libdeflate/lib/gzip_constants.h +45 -0
  39. data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +130 -0
  40. data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +405 -0
  41. data/ext/libdeflate/libdeflate/lib/lib_common.h +35 -0
  42. data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +53 -0
  43. data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +205 -0
  44. data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +61 -0
  45. data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +53 -0
  46. data/ext/libdeflate/libdeflate/lib/unaligned.h +202 -0
  47. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +169 -0
  48. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +48 -0
  49. data/ext/libdeflate/libdeflate/lib/zlib_compress.c +87 -0
  50. data/ext/libdeflate/libdeflate/lib/zlib_constants.h +21 -0
  51. data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +91 -0
  52. data/ext/libdeflate/libdeflate/libdeflate.h +274 -0
  53. data/ext/libdeflate/libdeflate/programs/benchmark.c +558 -0
  54. data/ext/libdeflate/libdeflate/programs/checksum.c +197 -0
  55. data/ext/libdeflate/libdeflate/programs/detect.sh +62 -0
  56. data/ext/libdeflate/libdeflate/programs/gzip.c +603 -0
  57. data/ext/libdeflate/libdeflate/programs/prog_util.c +530 -0
  58. data/ext/libdeflate/libdeflate/programs/prog_util.h +162 -0
  59. data/ext/libdeflate/libdeflate/programs/test_checksums.c +135 -0
  60. data/ext/libdeflate/libdeflate/programs/tgetopt.c +118 -0
  61. data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +12 -0
  62. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +40 -0
  63. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
  64. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +28 -0
  65. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +3 -0
  66. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +28 -0
  67. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
  68. data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +14 -0
  69. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +28 -0
  70. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +3 -0
  71. data/ext/libdeflate/libdeflate/tools/android_build.sh +104 -0
  72. data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +76 -0
  73. data/ext/libdeflate/libdeflate/tools/exec_tests.sh +30 -0
  74. data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +108 -0
  75. data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +100 -0
  76. data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +412 -0
  77. data/ext/libdeflate/libdeflate/tools/make-windows-releases +21 -0
  78. data/ext/libdeflate/libdeflate/tools/mips_build.sh +9 -0
  79. data/ext/libdeflate/libdeflate/tools/msc_test.bat +3 -0
  80. data/ext/libdeflate/libdeflate/tools/pgo_build.sh +23 -0
  81. data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +37 -0
  82. data/ext/libdeflate/libdeflate/tools/run_tests.sh +305 -0
  83. data/ext/libdeflate/libdeflate/tools/windows_build.sh +10 -0
  84. data/ext/libdeflate/libdeflate_ext.c +389 -0
  85. data/ext/libdeflate/libdeflate_ext.h +8 -0
  86. data/lib/libdeflate.rb +2 -0
  87. data/lib/libdeflate/version.rb +3 -0
  88. data/libdeflate.gemspec +33 -0
  89. metadata +230 -0
@@ -0,0 +1,35 @@
1
+ /*
2
+ * lib_common.h - internal header included by all library code
3
+ */
4
+
5
+ #ifndef LIB_LIB_COMMON_H
6
+ #define LIB_LIB_COMMON_H
7
+
8
+ #ifdef LIBDEFLATE_H
9
+ # error "lib_common.h must always be included before libdeflate.h"
10
+ /* because BUILDING_LIBDEFLATE must be set first */
11
+ #endif
12
+
13
+ #define BUILDING_LIBDEFLATE
14
+
15
+ #include "common_defs.h"
16
+
17
+ /*
18
+ * Prefix with "_libdeflate_" all global symbols which are not part of the API.
19
+ * This avoids exposing overly generic names when libdeflate is built as a
20
+ * static library.
21
+ *
22
+ * Note that the chosen prefix is not really important and can be changed
23
+ * without breaking library users. It was just chosen so that the resulting
24
+ * symbol names are unlikely to conflict with those from any other software.
25
+ * Also note that this fixup has no useful effect when libdeflate is built as a
26
+ * shared library, since these symbols are not exported.
27
+ */
28
+ #define SYM_FIXUP(sym) _libdeflate_##sym
29
+ #define aligned_malloc SYM_FIXUP(aligned_malloc)
30
+ #define aligned_free SYM_FIXUP(aligned_free)
31
+ #define deflate_get_compression_level SYM_FIXUP(deflate_get_compression_level)
32
+ #define _x86_cpu_features SYM_FIXUP(_x86_cpu_features)
33
+ #define x86_setup_cpu_features SYM_FIXUP(x86_setup_cpu_features)
34
+
35
+ #endif /* LIB_LIB_COMMON_H */
@@ -0,0 +1,53 @@
1
+ /*
2
+ * matchfinder_avx2.h - matchfinding routines optimized for Intel AVX2 (Advanced
3
+ * Vector Extensions)
4
+ */
5
+
6
+ #include <immintrin.h>
7
+
8
+ static forceinline bool
9
+ matchfinder_init_avx2(mf_pos_t *data, size_t size)
10
+ {
11
+ __m256i v, *p;
12
+ size_t n;
13
+
14
+ if (size % sizeof(__m256i) * 4)
15
+ return false;
16
+
17
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
18
+ v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
19
+ p = (__m256i *)data;
20
+ n = size / (sizeof(__m256i) * 4);
21
+ do {
22
+ p[0] = v;
23
+ p[1] = v;
24
+ p[2] = v;
25
+ p[3] = v;
26
+ p += 4;
27
+ } while (--n);
28
+ return true;
29
+ }
30
+
31
+ static forceinline bool
32
+ matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
33
+ {
34
+ __m256i v, *p;
35
+ size_t n;
36
+
37
+ if ((size % sizeof(__m256i) * 4 != 0))
38
+ return false;
39
+
40
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
41
+ v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
42
+ p = (__m256i *)data;
43
+ n = size / (sizeof(__m256i) * 4);
44
+ do {
45
+ /* PADDSW: Add Packed Signed Integers With Signed Saturation */
46
+ p[0] = _mm256_adds_epi16(p[0], v);
47
+ p[1] = _mm256_adds_epi16(p[1], v);
48
+ p[2] = _mm256_adds_epi16(p[2], v);
49
+ p[3] = _mm256_adds_epi16(p[3], v);
50
+ p += 4;
51
+ } while (--n);
52
+ return true;
53
+ }
@@ -0,0 +1,205 @@
1
+ /*
2
+ * matchfinder_common.h - common code for Lempel-Ziv matchfinding
3
+ */
4
+
5
+ #ifndef LIB_MATCHFINDER_COMMON_H
6
+ #define LIB_MATCHFINDER_COMMON_H
7
+
8
+ #include "lib_common.h"
9
+ #include "unaligned.h"
10
+
11
+ #ifndef MATCHFINDER_WINDOW_ORDER
12
+ # error "MATCHFINDER_WINDOW_ORDER must be defined!"
13
+ #endif
14
+
15
+ #define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
16
+
17
+ typedef s16 mf_pos_t;
18
+
19
+ #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
20
+
21
+ #define MATCHFINDER_ALIGNMENT 8
22
+
23
+ #ifdef __AVX2__
24
+ # include "matchfinder_avx2.h"
25
+ # if MATCHFINDER_ALIGNMENT < 32
26
+ # undef MATCHFINDER_ALIGNMENT
27
+ # define MATCHFINDER_ALIGNMENT 32
28
+ # endif
29
+ #endif
30
+
31
+ #ifdef __SSE2__
32
+ # include "matchfinder_sse2.h"
33
+ # if MATCHFINDER_ALIGNMENT < 16
34
+ # undef MATCHFINDER_ALIGNMENT
35
+ # define MATCHFINDER_ALIGNMENT 16
36
+ # endif
37
+ #endif
38
+
39
+ #ifdef __ARM_NEON
40
+ # include "matchfinder_neon.h"
41
+ # if MATCHFINDER_ALIGNMENT < 16
42
+ # undef MATCHFINDER_ALIGNMENT
43
+ # define MATCHFINDER_ALIGNMENT 16
44
+ # endif
45
+ #endif
46
+
47
+ /*
48
+ * Initialize the hash table portion of the matchfinder.
49
+ *
50
+ * Essentially, this is an optimized memset().
51
+ *
52
+ * 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary.
53
+ */
54
+ static forceinline void
55
+ matchfinder_init(mf_pos_t *data, size_t num_entries)
56
+ {
57
+ size_t i;
58
+
59
+ #if defined(__AVX2__) && defined(_aligned_attribute)
60
+ if (matchfinder_init_avx2(data, num_entries * sizeof(data[0])))
61
+ return;
62
+ #endif
63
+
64
+ #if defined(__SSE2__) && defined(_aligned_attribute)
65
+ if (matchfinder_init_sse2(data, num_entries * sizeof(data[0])))
66
+ return;
67
+ #endif
68
+
69
+ #if defined(__ARM_NEON) && defined(_aligned_attribute)
70
+ if (matchfinder_init_neon(data, num_entries * sizeof(data[0])))
71
+ return;
72
+ #endif
73
+
74
+ for (i = 0; i < num_entries; i++)
75
+ data[i] = MATCHFINDER_INITVAL;
76
+ }
77
+
78
+ /*
79
+ * Slide the matchfinder by WINDOW_SIZE bytes.
80
+ *
81
+ * This must be called just after each WINDOW_SIZE bytes have been run through
82
+ * the matchfinder.
83
+ *
84
+ * This will subtract WINDOW_SIZE bytes from each entry in the array specified.
85
+ * The effect is that all entries are updated to be relative to the current
86
+ * position, rather than the position WINDOW_SIZE bytes prior.
87
+ *
88
+ * Underflow is detected and replaced with signed saturation. This ensures that
89
+ * once the sliding window has passed over a position, that position forever
90
+ * remains out of bounds.
91
+ *
92
+ * The array passed in must contain all matchfinder data that is
93
+ * position-relative. Concretely, this will include the hash table as well as
94
+ * the table of positions that is used to link together the sequences in each
95
+ * hash bucket. Note that in the latter table, the links are 1-ary in the case
96
+ * of "hash chains", and 2-ary in the case of "binary trees". In either case,
97
+ * the links need to be rebased in the same way.
98
+ */
99
+ static forceinline void
100
+ matchfinder_rebase(mf_pos_t *data, size_t num_entries)
101
+ {
102
+ size_t i;
103
+
104
+ #if defined(__AVX2__) && defined(_aligned_attribute)
105
+ if (matchfinder_rebase_avx2(data, num_entries * sizeof(data[0])))
106
+ return;
107
+ #endif
108
+
109
+ #if defined(__SSE2__) && defined(_aligned_attribute)
110
+ if (matchfinder_rebase_sse2(data, num_entries * sizeof(data[0])))
111
+ return;
112
+ #endif
113
+
114
+ #if defined(__ARM_NEON) && defined(_aligned_attribute)
115
+ if (matchfinder_rebase_neon(data, num_entries * sizeof(data[0])))
116
+ return;
117
+ #endif
118
+
119
+ if (MATCHFINDER_WINDOW_SIZE == 32768) {
120
+ /* Branchless version for 32768 byte windows. If the value was
121
+ * already negative, clear all bits except the sign bit; this
122
+ * changes the value to -32768. Otherwise, set the sign bit;
123
+ * this is equivalent to subtracting 32768. */
124
+ for (i = 0; i < num_entries; i++) {
125
+ u16 v = data[i];
126
+ u16 sign_bit = v & 0x8000;
127
+ v &= sign_bit - ((sign_bit >> 15) ^ 1);
128
+ v |= 0x8000;
129
+ data[i] = v;
130
+ }
131
+ return;
132
+ }
133
+
134
+ for (i = 0; i < num_entries; i++) {
135
+ if (data[i] >= 0)
136
+ data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
137
+ else
138
+ data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
139
+ }
140
+ }
141
+
142
+ /*
143
+ * The hash function: given a sequence prefix held in the low-order bits of a
144
+ * 32-bit value, multiply by a carefully-chosen large constant. Discard any
145
+ * bits of the product that don't fit in a 32-bit value, but take the
146
+ * next-highest @num_bits bits of the product as the hash value, as those have
147
+ * the most randomness.
148
+ */
149
+ static forceinline u32
150
+ lz_hash(u32 seq, unsigned num_bits)
151
+ {
152
+ return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
153
+ }
154
+
155
+ /*
156
+ * Return the number of bytes at @matchptr that match the bytes at @strptr, up
157
+ * to a maximum of @max_len. Initially, @start_len bytes are matched.
158
+ */
159
+ static forceinline unsigned
160
+ lz_extend(const u8 * const strptr, const u8 * const matchptr,
161
+ const unsigned start_len, const unsigned max_len)
162
+ {
163
+ unsigned len = start_len;
164
+ machine_word_t v_word;
165
+
166
+ if (UNALIGNED_ACCESS_IS_FAST) {
167
+
168
+ if (likely(max_len - len >= 4 * WORDBYTES)) {
169
+
170
+ #define COMPARE_WORD_STEP \
171
+ v_word = load_word_unaligned(&matchptr[len]) ^ \
172
+ load_word_unaligned(&strptr[len]); \
173
+ if (v_word != 0) \
174
+ goto word_differs; \
175
+ len += WORDBYTES; \
176
+
177
+ COMPARE_WORD_STEP
178
+ COMPARE_WORD_STEP
179
+ COMPARE_WORD_STEP
180
+ COMPARE_WORD_STEP
181
+ #undef COMPARE_WORD_STEP
182
+ }
183
+
184
+ while (len + WORDBYTES <= max_len) {
185
+ v_word = load_word_unaligned(&matchptr[len]) ^
186
+ load_word_unaligned(&strptr[len]);
187
+ if (v_word != 0)
188
+ goto word_differs;
189
+ len += WORDBYTES;
190
+ }
191
+ }
192
+
193
+ while (len < max_len && matchptr[len] == strptr[len])
194
+ len++;
195
+ return len;
196
+
197
+ word_differs:
198
+ if (CPU_IS_LITTLE_ENDIAN())
199
+ len += (bsfw(v_word) >> 3);
200
+ else
201
+ len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
202
+ return len;
203
+ }
204
+
205
+ #endif /* LIB_MATCHFINDER_COMMON_H */
@@ -0,0 +1,61 @@
1
+ /*
2
+ * matchfinder_neon.h - matchfinding routines optimized for ARM NEON (Advanced
3
+ * SIMD) instructions
4
+ */
5
+
6
+ #include <arm_neon.h>
7
+
8
+ static forceinline bool
9
+ matchfinder_init_neon(mf_pos_t *data, size_t size)
10
+ {
11
+ int16x8_t v, *p;
12
+ size_t n;
13
+
14
+ if (size % sizeof(int16x8_t) * 4)
15
+ return false;
16
+
17
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
18
+ v = (int16x8_t) {
19
+ MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
20
+ MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
21
+ MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
22
+ };
23
+ p = (int16x8_t *)data;
24
+ n = size / (sizeof(int16x8_t) * 4);
25
+ do {
26
+ p[0] = v;
27
+ p[1] = v;
28
+ p[2] = v;
29
+ p[3] = v;
30
+ p += 4;
31
+ } while (--n);
32
+ return true;
33
+ }
34
+
35
+ static forceinline bool
36
+ matchfinder_rebase_neon(mf_pos_t *data, size_t size)
37
+ {
38
+ int16x8_t v, *p;
39
+ size_t n;
40
+
41
+ if ((size % sizeof(int16x8_t) * 4 != 0))
42
+ return false;
43
+
44
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
45
+ v = (int16x8_t) {
46
+ (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
47
+ (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
48
+ (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
49
+ (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
50
+ };
51
+ p = (int16x8_t *)data;
52
+ n = size / (sizeof(int16x8_t) * 4);
53
+ do {
54
+ p[0] = vqaddq_s16(p[0], v);
55
+ p[1] = vqaddq_s16(p[1], v);
56
+ p[2] = vqaddq_s16(p[2], v);
57
+ p[3] = vqaddq_s16(p[3], v);
58
+ p += 4;
59
+ } while (--n);
60
+ return true;
61
+ }
@@ -0,0 +1,53 @@
1
+ /*
2
+ * matchfinder_sse2.h - matchfinding routines optimized for Intel SSE2
3
+ * (Streaming SIMD Extensions).
4
+ */
5
+
6
+ #include <emmintrin.h>
7
+
8
+ static forceinline bool
9
+ matchfinder_init_sse2(mf_pos_t *data, size_t size)
10
+ {
11
+ __m128i v, *p;
12
+ size_t n;
13
+
14
+ if (size % sizeof(__m128i) * 4)
15
+ return false;
16
+
17
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
18
+ v = _mm_set1_epi16(MATCHFINDER_INITVAL);
19
+ p = (__m128i *)data;
20
+ n = size / (sizeof(__m128i) * 4);
21
+ do {
22
+ p[0] = v;
23
+ p[1] = v;
24
+ p[2] = v;
25
+ p[3] = v;
26
+ p += 4;
27
+ } while (--n);
28
+ return true;
29
+ }
30
+
31
+ static forceinline bool
32
+ matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
33
+ {
34
+ __m128i v, *p;
35
+ size_t n;
36
+
37
+ if ((size % sizeof(__m128i) * 4 != 0))
38
+ return false;
39
+
40
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
41
+ v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
42
+ p = (__m128i *)data;
43
+ n = size / (sizeof(__m128i) * 4);
44
+ do {
45
+ /* PADDSW: Add Packed Signed Integers With Signed Saturation */
46
+ p[0] = _mm_adds_epi16(p[0], v);
47
+ p[1] = _mm_adds_epi16(p[1], v);
48
+ p[2] = _mm_adds_epi16(p[2], v);
49
+ p[3] = _mm_adds_epi16(p[3], v);
50
+ p += 4;
51
+ } while (--n);
52
+ return true;
53
+ }
@@ -0,0 +1,202 @@
1
+ /*
2
+ * unaligned.h - inline functions for unaligned memory accesses
3
+ */
4
+
5
+ #ifndef LIB_UNALIGNED_H
6
+ #define LIB_UNALIGNED_H
7
+
8
+ #include "lib_common.h"
9
+
10
+ /*
11
+ * Naming note:
12
+ *
13
+ * {load,store}_*_unaligned() deal with raw bytes without endianness conversion.
14
+ * {get,put}_unaligned_*() deal with a specific endianness.
15
+ */
16
+
17
+ DEFINE_UNALIGNED_TYPE(u16)
18
+ DEFINE_UNALIGNED_TYPE(u32)
19
+ DEFINE_UNALIGNED_TYPE(u64)
20
+ DEFINE_UNALIGNED_TYPE(machine_word_t)
21
+
22
+ #define load_word_unaligned load_machine_word_t_unaligned
23
+ #define store_word_unaligned store_machine_word_t_unaligned
24
+
25
+ /***** Unaligned loads *****/
26
+
27
+ static forceinline u16
28
+ get_unaligned_le16(const u8 *p)
29
+ {
30
+ if (UNALIGNED_ACCESS_IS_FAST)
31
+ return le16_bswap(load_u16_unaligned(p));
32
+ else
33
+ return ((u16)p[1] << 8) | p[0];
34
+ }
35
+
36
+ static forceinline u16
37
+ get_unaligned_be16(const u8 *p)
38
+ {
39
+ if (UNALIGNED_ACCESS_IS_FAST)
40
+ return be16_bswap(load_u16_unaligned(p));
41
+ else
42
+ return ((u16)p[0] << 8) | p[1];
43
+ }
44
+
45
+ static forceinline u32
46
+ get_unaligned_le32(const u8 *p)
47
+ {
48
+ if (UNALIGNED_ACCESS_IS_FAST)
49
+ return le32_bswap(load_u32_unaligned(p));
50
+ else
51
+ return ((u32)p[3] << 24) | ((u32)p[2] << 16) |
52
+ ((u32)p[1] << 8) | p[0];
53
+ }
54
+
55
+ static forceinline u32
56
+ get_unaligned_be32(const u8 *p)
57
+ {
58
+ if (UNALIGNED_ACCESS_IS_FAST)
59
+ return be32_bswap(load_u32_unaligned(p));
60
+ else
61
+ return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
62
+ ((u32)p[2] << 8) | p[3];
63
+ }
64
+
65
+ static forceinline u64
66
+ get_unaligned_le64(const u8 *p)
67
+ {
68
+ if (UNALIGNED_ACCESS_IS_FAST)
69
+ return le64_bswap(load_u64_unaligned(p));
70
+ else
71
+ return ((u64)p[7] << 56) | ((u64)p[6] << 48) |
72
+ ((u64)p[5] << 40) | ((u64)p[4] << 32) |
73
+ ((u64)p[3] << 24) | ((u64)p[2] << 16) |
74
+ ((u64)p[1] << 8) | p[0];
75
+ }
76
+
77
+ static forceinline machine_word_t
78
+ get_unaligned_leword(const u8 *p)
79
+ {
80
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
81
+ if (WORDBITS == 32)
82
+ return get_unaligned_le32(p);
83
+ else
84
+ return get_unaligned_le64(p);
85
+ }
86
+
87
+ /***** Unaligned stores *****/
88
+
89
+ static forceinline void
90
+ put_unaligned_le16(u16 v, u8 *p)
91
+ {
92
+ if (UNALIGNED_ACCESS_IS_FAST) {
93
+ store_u16_unaligned(le16_bswap(v), p);
94
+ } else {
95
+ p[0] = (u8)(v >> 0);
96
+ p[1] = (u8)(v >> 8);
97
+ }
98
+ }
99
+
100
+ static forceinline void
101
+ put_unaligned_be16(u16 v, u8 *p)
102
+ {
103
+ if (UNALIGNED_ACCESS_IS_FAST) {
104
+ store_u16_unaligned(be16_bswap(v), p);
105
+ } else {
106
+ p[0] = (u8)(v >> 8);
107
+ p[1] = (u8)(v >> 0);
108
+ }
109
+ }
110
+
111
+ static forceinline void
112
+ put_unaligned_le32(u32 v, u8 *p)
113
+ {
114
+ if (UNALIGNED_ACCESS_IS_FAST) {
115
+ store_u32_unaligned(le32_bswap(v), p);
116
+ } else {
117
+ p[0] = (u8)(v >> 0);
118
+ p[1] = (u8)(v >> 8);
119
+ p[2] = (u8)(v >> 16);
120
+ p[3] = (u8)(v >> 24);
121
+ }
122
+ }
123
+
124
+ static forceinline void
125
+ put_unaligned_be32(u32 v, u8 *p)
126
+ {
127
+ if (UNALIGNED_ACCESS_IS_FAST) {
128
+ store_u32_unaligned(be32_bswap(v), p);
129
+ } else {
130
+ p[0] = (u8)(v >> 24);
131
+ p[1] = (u8)(v >> 16);
132
+ p[2] = (u8)(v >> 8);
133
+ p[3] = (u8)(v >> 0);
134
+ }
135
+ }
136
+
137
+ static forceinline void
138
+ put_unaligned_le64(u64 v, u8 *p)
139
+ {
140
+ if (UNALIGNED_ACCESS_IS_FAST) {
141
+ store_u64_unaligned(le64_bswap(v), p);
142
+ } else {
143
+ p[0] = (u8)(v >> 0);
144
+ p[1] = (u8)(v >> 8);
145
+ p[2] = (u8)(v >> 16);
146
+ p[3] = (u8)(v >> 24);
147
+ p[4] = (u8)(v >> 32);
148
+ p[5] = (u8)(v >> 40);
149
+ p[6] = (u8)(v >> 48);
150
+ p[7] = (u8)(v >> 56);
151
+ }
152
+ }
153
+
154
+ static forceinline void
155
+ put_unaligned_leword(machine_word_t v, u8 *p)
156
+ {
157
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
158
+ if (WORDBITS == 32)
159
+ put_unaligned_le32(v, p);
160
+ else
161
+ put_unaligned_le64(v, p);
162
+ }
163
+
164
+ /***** 24-bit loads *****/
165
+
166
+ /*
167
+ * Given a 32-bit value that was loaded with the platform's native endianness,
168
+ * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
169
+ * bits contain the first 3 bytes, arranged in octets in a platform-dependent
170
+ * order, at the memory location from which the input 32-bit value was loaded.
171
+ */
172
+ static forceinline u32
173
+ loaded_u32_to_u24(u32 v)
174
+ {
175
+ if (CPU_IS_LITTLE_ENDIAN())
176
+ return v & 0xFFFFFF;
177
+ else
178
+ return v >> 8;
179
+ }
180
+
181
+ /*
182
+ * Load the next 3 bytes from the memory location @p into the 24 low-order bits
183
+ * of a 32-bit value. The order in which the 3 bytes will be arranged as octets
184
+ * in the 24 bits is platform-dependent. At least LOAD_U24_REQUIRED_NBYTES
185
+ * bytes must be available at @p; note that this may be more than 3.
186
+ */
187
+ static forceinline u32
188
+ load_u24_unaligned(const u8 *p)
189
+ {
190
+ #if UNALIGNED_ACCESS_IS_FAST
191
+ # define LOAD_U24_REQUIRED_NBYTES 4
192
+ return loaded_u32_to_u24(load_u32_unaligned(p));
193
+ #else
194
+ # define LOAD_U24_REQUIRED_NBYTES 3
195
+ if (CPU_IS_LITTLE_ENDIAN())
196
+ return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
197
+ else
198
+ return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
199
+ #endif
200
+ }
201
+
202
+ #endif /* LIB_UNALIGNED_H */