ttcrypt 0.0.7 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1976 @@
1
+ /* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */
2
+ /**
3
+ * Basic type definitions.
4
+ *
5
+ * This header file defines the generic integer types that will be used
6
+ * for the implementation of hash functions; it also contains helper
7
+ * functions which encode and decode multi-byte integer values, using
8
+ * either little-endian or big-endian conventions.
9
+ *
10
+ * This file contains a compile-time test on the size of a byte
11
+ * (the <code>unsigned char</code> C type). If bytes are not octets,
12
+ * i.e. if they do not have a size of exactly 8 bits, then compilation
13
+ * is aborted. Architectures where bytes are not octets are relatively
14
+ * rare, even in the embedded devices market. We forbid non-octet bytes
15
+ * because there is no clear convention on how octet streams are encoded
16
+ * on such systems.
17
+ *
18
+ * ==========================(LICENSE BEGIN)============================
19
+ *
20
+ * Copyright (c) 2007-2010 Projet RNRT SAPHIR
21
+ *
22
+ * Permission is hereby granted, free of charge, to any person obtaining
23
+ * a copy of this software and associated documentation files (the
24
+ * "Software"), to deal in the Software without restriction, including
25
+ * without limitation the rights to use, copy, modify, merge, publish,
26
+ * distribute, sublicense, and/or sell copies of the Software, and to
27
+ * permit persons to whom the Software is furnished to do so, subject to
28
+ * the following conditions:
29
+ *
30
+ * The above copyright notice and this permission notice shall be
31
+ * included in all copies or substantial portions of the Software.
32
+ *
33
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
34
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
35
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
36
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
37
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
38
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
39
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40
+ *
41
+ * ===========================(LICENSE END)=============================
42
+ *
43
+ * @file sph_types.h
44
+ * @author Thomas Pornin <thomas.pornin@cryptolog.com>
45
+ */
46
+
47
+ #ifndef SPH_TYPES_H__
48
+ #define SPH_TYPES_H__
49
+
50
+ #include <limits.h>
51
+
52
+ /*
53
+ * All our I/O functions are defined over octet streams. We do not know
54
+ * how to handle input data if bytes are not octets.
55
+ */
56
+ #if CHAR_BIT != 8
57
+ #error This code requires 8-bit bytes
58
+ #endif
59
+
60
+ /* ============= BEGIN documentation block for Doxygen ============ */
61
+
62
+ #ifdef DOXYGEN_IGNORE
63
+
64
+ /** @mainpage sphlib C code documentation
65
+ *
66
+ * @section overview Overview
67
+ *
68
+ * <code>sphlib</code> is a library which contains implementations of
69
+ * various cryptographic hash functions. These pages have been generated
70
+ * with <a href="http://www.doxygen.org/index.html">doxygen</a> and
71
+ * document the API for the C implementations.
72
+ *
73
+ * The API is described in appropriate header files, which are available
74
+ * in the "Files" section. Each hash function family has its own header,
75
+ * whose name begins with <code>"sph_"</code> and contains the family
76
+ * name. For instance, the API for the RIPEMD hash functions is available
77
+ * in the header file <code>sph_ripemd.h</code>.
78
+ *
79
+ * @section principles API structure and conventions
80
+ *
81
+ * @subsection io Input/output conventions
82
+ *
83
+ * In all generality, hash functions operate over strings of bits.
84
+ * Individual bits are rarely encountered in C programming or actual
85
+ * communication protocols; most protocols converge on the ubiquitous
86
+ * "octet" which is a group of eight bits. Data is thus expressed as a
87
+ * stream of octets. The C programming language contains the notion of a
88
+ * "byte", which is a data unit managed under the type <code>"unsigned
89
+ * char"</code>. The C standard prescribes that a byte should hold at
90
+ * least eight bits, but possibly more. Most modern architectures, even
91
+ * in the embedded world, feature eight-bit bytes, i.e. map bytes to
92
+ * octets.
93
+ *
94
+ * Nevertheless, for some of the implemented hash functions, an extra
95
+ * API has been added, which allows the input of arbitrary sequences of
96
+ * bits: when the computation is about to be closed, 1 to 7 extra bits
97
+ * can be added. The functions for which this API is implemented include
98
+ * the SHA-2 functions and all SHA-3 candidates.
99
+ *
100
+ * <code>sphlib</code> defines hash function which may hash octet streams,
101
+ * i.e. streams of bits where the number of bits is a multiple of eight.
102
+ * The data input functions in the <code>sphlib</code> API expect data
103
+ * as anonymous pointers (<code>"const void *"</code>) with a length
104
+ * (of type <code>"size_t"</code>) which gives the input data chunk length
105
+ * in bytes. A byte is assumed to be an octet; the <code>sph_types.h</code>
106
+ * header contains a compile-time test which prevents compilation on
107
+ * architectures where this property is not met.
108
+ *
109
+ * The hash function output is also converted into bytes. All currently
110
+ * implemented hash functions have an output width which is a multiple of
111
+ * eight, and this is likely to remain true for new designs.
112
+ *
113
+ * Most hash functions internally convert input data into 32-bit of 64-bit
114
+ * words, using either little-endian or big-endian conversion. The hash
115
+ * output also often consists of such words, which are encoded into output
116
+ * bytes with a similar endianness convention. Some hash functions have
117
+ * been only loosely specified on that subject; when necessary,
118
+ * <code>sphlib</code> has been tested against published "reference"
119
+ * implementations in order to use the same conventions.
120
+ *
121
+ * @subsection shortname Function short name
122
+ *
123
+ * Each implemented hash function has a "short name" which is used
124
+ * internally to derive the identifiers for the functions and context
125
+ * structures which the function uses. For instance, MD5 has the short
126
+ * name <code>"md5"</code>. Short names are listed in the next section,
127
+ * for the implemented hash functions. In subsequent sections, the
128
+ * short name will be assumed to be <code>"XXX"</code>: replace with the
129
+ * actual hash function name to get the C identifier.
130
+ *
131
+ * Note: some functions within the same family share the same core
132
+ * elements, such as update function or context structure. Correspondingly,
133
+ * some of the defined types or functions may actually be macros which
134
+ * transparently evaluate to another type or function name.
135
+ *
136
+ * @subsection context Context structure
137
+ *
138
+ * Each implemented hash fonction has its own context structure, available
139
+ * under the type name <code>"sph_XXX_context"</code> for the hash function
140
+ * with short name <code>"XXX"</code>. This structure holds all needed
141
+ * state for a running hash computation.
142
+ *
143
+ * The contents of these structures are meant to be opaque, and private
144
+ * to the implementation. However, these contents are specified in the
145
+ * header files so that application code which uses <code>sphlib</code>
146
+ * may access the size of those structures.
147
+ *
148
+ * The caller is responsible for allocating the context structure,
149
+ * whether by dynamic allocation (<code>malloc()</code> or equivalent),
150
+ * static allocation (a global permanent variable), as an automatic
151
+ * variable ("on the stack"), or by any other mean which ensures proper
152
+ * structure alignment. <code>sphlib</code> code performs no dynamic
153
+ * allocation by itself.
154
+ *
155
+ * The context must be initialized before use, using the
156
+ * <code>sph_XXX_init()</code> function. This function sets the context
157
+ * state to proper initial values for hashing.
158
+ *
159
+ * Since all state data is contained within the context structure,
160
+ * <code>sphlib</code> is thread-safe and reentrant: several hash
161
+ * computations may be performed in parallel, provided that they do not
162
+ * operate on the same context. Moreover, a running computation can be
163
+ * cloned by copying the context (with a simple <code>memcpy()</code>):
164
+ * the context and its clone are then independant and may be updated
165
+ * with new data and/or closed without interfering with each other.
166
+ * Similarly, a context structure can be moved in memory at will:
167
+ * context structures contain no pointer, in particular no pointer to
168
+ * themselves.
169
+ *
170
+ * @subsection dataio Data input
171
+ *
172
+ * Hashed data is input with the <code>sph_XXX()</code> fonction, which
173
+ * takes as parameters a pointer to the context, a pointer to the data
174
+ * to hash, and the number of data bytes to hash. The context is updated
175
+ * with the new data.
176
+ *
177
+ * Data can be input in one or several calls, with arbitrary input lengths.
178
+ * However, it is best, performance wise, to input data by relatively big
179
+ * chunks (say a few kilobytes), because this allows <code>sphlib</code> to
180
+ * optimize things and avoid internal copying.
181
+ *
182
+ * When all data has been input, the context can be closed with
183
+ * <code>sph_XXX_close()</code>. The hash output is computed and written
184
+ * into the provided buffer. The caller must take care to provide a
185
+ * buffer of appropriate length; e.g., when using SHA-1, the output is
186
+ * a 20-byte word, therefore the output buffer must be at least 20-byte
187
+ * long.
188
+ *
189
+ * For some hash functions, the <code>sph_XXX_addbits_and_close()</code>
190
+ * function can be used instead of <code>sph_XXX_close()</code>. This
191
+ * function can take a few extra <strong>bits</strong> to be added at
192
+ * the end of the input message. This allows hashing messages with a
193
+ * bit length which is not a multiple of 8. The extra bits are provided
194
+ * as an unsigned integer value, and a bit count. The bit count must be
195
+ * between 0 and 7, inclusive. The extra bits are provided as bits 7 to
196
+ * 0 (bits of numerical value 128, 64, 32... downto 0), in that order.
197
+ * For instance, to add three bits of value 1, 1 and 0, the unsigned
198
+ * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count
199
+ * will be 3.
200
+ *
201
+ * The <code>SPH_SIZE_XXX</code> macro is defined for each hash function;
202
+ * it evaluates to the function output size, expressed in bits. For instance,
203
+ * <code>SPH_SIZE_sha1</code> evaluates to <code>160</code>.
204
+ *
205
+ * When closed, the context is automatically reinitialized and can be
206
+ * immediately used for another computation. It is not necessary to call
207
+ * <code>sph_XXX_init()</code> after a close. Note that
208
+ * <code>sph_XXX_init()</code> can still be called to "reset" a context,
209
+ * i.e. forget previously input data, and get back to the initial state.
210
+ *
211
+ * @subsection alignment Data alignment
212
+ *
213
+ * "Alignment" is a property of data, which is said to be "properly
214
+ * aligned" when its emplacement in memory is such that the data can
215
+ * be optimally read by full words. This depends on the type of access;
216
+ * basically, some hash functions will read data by 32-bit or 64-bit
217
+ * words. <code>sphlib</code> does not mandate such alignment for input
218
+ * data, but using aligned data can substantially improve performance.
219
+ *
220
+ * As a rule, it is best to input data by chunks whose length (in bytes)
221
+ * is a multiple of eight, and which begins at "generally aligned"
222
+ * addresses, such as the base address returned by a call to
223
+ * <code>malloc()</code>.
224
+ *
225
+ * @section functions Implemented functions
226
+ *
227
+ * We give here the list of implemented functions. They are grouped by
228
+ * family; to each family corresponds a specific header file. Each
229
+ * individual function has its associated "short name". Please refer to
230
+ * the documentation for that header file to get details on the hash
231
+ * function denomination and provenance.
232
+ *
233
+ * Note: the functions marked with a '(64)' in the list below are
234
+ * available only if the C compiler provides an integer type of length
235
+ * 64 bits or more. Such a type is mandatory in the latest C standard
236
+ * (ISO 9899:1999, aka "C99") and is present in several older compilers
237
+ * as well, so chances are that such a type is available.
238
+ *
239
+ * - HAVAL family: file <code>sph_haval.h</code>
240
+ * - HAVAL-128/3 (128-bit, 3 passes): short name: <code>haval128_3</code>
241
+ * - HAVAL-128/4 (128-bit, 4 passes): short name: <code>haval128_4</code>
242
+ * - HAVAL-128/5 (128-bit, 5 passes): short name: <code>haval128_5</code>
243
+ * - HAVAL-160/3 (160-bit, 3 passes): short name: <code>haval160_3</code>
244
+ * - HAVAL-160/4 (160-bit, 4 passes): short name: <code>haval160_4</code>
245
+ * - HAVAL-160/5 (160-bit, 5 passes): short name: <code>haval160_5</code>
246
+ * - HAVAL-192/3 (192-bit, 3 passes): short name: <code>haval192_3</code>
247
+ * - HAVAL-192/4 (192-bit, 4 passes): short name: <code>haval192_4</code>
248
+ * - HAVAL-192/5 (192-bit, 5 passes): short name: <code>haval192_5</code>
249
+ * - HAVAL-224/3 (224-bit, 3 passes): short name: <code>haval224_3</code>
250
+ * - HAVAL-224/4 (224-bit, 4 passes): short name: <code>haval224_4</code>
251
+ * - HAVAL-224/5 (224-bit, 5 passes): short name: <code>haval224_5</code>
252
+ * - HAVAL-256/3 (256-bit, 3 passes): short name: <code>haval256_3</code>
253
+ * - HAVAL-256/4 (256-bit, 4 passes): short name: <code>haval256_4</code>
254
+ * - HAVAL-256/5 (256-bit, 5 passes): short name: <code>haval256_5</code>
255
+ * - MD2: file <code>sph_md2.h</code>, short name: <code>md2</code>
256
+ * - MD4: file <code>sph_md4.h</code>, short name: <code>md4</code>
257
+ * - MD5: file <code>sph_md5.h</code>, short name: <code>md5</code>
258
+ * - PANAMA: file <code>sph_panama.h</code>, short name: <code>panama</code>
259
+ * - RadioGatun family: file <code>sph_radiogatun.h</code>
260
+ * - RadioGatun[32]: short name: <code>radiogatun32</code>
261
+ * - RadioGatun[64]: short name: <code>radiogatun64</code> (64)
262
+ * - RIPEMD family: file <code>sph_ripemd.h</code>
263
+ * - RIPEMD: short name: <code>ripemd</code>
264
+ * - RIPEMD-128: short name: <code>ripemd128</code>
265
+ * - RIPEMD-160: short name: <code>ripemd160</code>
266
+ * - SHA-0: file <code>sph_sha0.h</code>, short name: <code>sha0</code>
267
+ * - SHA-1: file <code>sph_sha1.h</code>, short name: <code>sha1</code>
268
+ * - SHA-2 family, 32-bit hashes: file <code>sph_sha2.h</code>
269
+ * - SHA-224: short name: <code>sha224</code>
270
+ * - SHA-256: short name: <code>sha256</code>
271
+ * - SHA-384: short name: <code>sha384</code> (64)
272
+ * - SHA-512: short name: <code>sha512</code> (64)
273
+ * - Tiger family: file <code>sph_tiger.h</code>
274
+ * - Tiger: short name: <code>tiger</code> (64)
275
+ * - Tiger2: short name: <code>tiger2</code> (64)
276
+ * - WHIRLPOOL family: file <code>sph_whirlpool.h</code>
277
+ * - WHIRLPOOL-0: short name: <code>whirlpool0</code> (64)
278
+ * - WHIRLPOOL-1: short name: <code>whirlpool1</code> (64)
279
+ * - WHIRLPOOL: short name: <code>whirlpool</code> (64)
280
+ *
281
+ * The fourteen second-round SHA-3 candidates are also implemented;
282
+ * when applicable, the implementations follow the "final" specifications
283
+ * as published for the third round of the SHA-3 competition (BLAKE,
284
+ * Groestl, JH, Keccak and Skein have been tweaked for third round).
285
+ *
286
+ * - BLAKE family: file <code>sph_blake.h</code>
287
+ * - BLAKE-224: short name: <code>blake224</code>
288
+ * - BLAKE-256: short name: <code>blake256</code>
289
+ * - BLAKE-384: short name: <code>blake384</code>
290
+ * - BLAKE-512: short name: <code>blake512</code>
291
+ * - BMW (Blue Midnight Wish) family: file <code>sph_bmw.h</code>
292
+ * - BMW-224: short name: <code>bmw224</code>
293
+ * - BMW-256: short name: <code>bmw256</code>
294
+ * - BMW-384: short name: <code>bmw384</code> (64)
295
+ * - BMW-512: short name: <code>bmw512</code> (64)
296
+ * - CubeHash family: file <code>sph_cubehash.h</code> (specified as
297
+ * CubeHash16/32 in the CubeHash specification)
298
+ * - CubeHash-224: short name: <code>cubehash224</code>
299
+ * - CubeHash-256: short name: <code>cubehash256</code>
300
+ * - CubeHash-384: short name: <code>cubehash384</code>
301
+ * - CubeHash-512: short name: <code>cubehash512</code>
302
+ * - ECHO family: file <code>sph_echo.h</code>
303
+ * - ECHO-224: short name: <code>echo224</code>
304
+ * - ECHO-256: short name: <code>echo256</code>
305
+ * - ECHO-384: short name: <code>echo384</code>
306
+ * - ECHO-512: short name: <code>echo512</code>
307
+ * - Fugue family: file <code>sph_fugue.h</code>
308
+ * - Fugue-224: short name: <code>fugue224</code>
309
+ * - Fugue-256: short name: <code>fugue256</code>
310
+ * - Fugue-384: short name: <code>fugue384</code>
311
+ * - Fugue-512: short name: <code>fugue512</code>
312
+ * - Groestl family: file <code>sph_groestl.h</code>
313
+ * - Groestl-224: short name: <code>groestl224</code>
314
+ * - Groestl-256: short name: <code>groestl256</code>
315
+ * - Groestl-384: short name: <code>groestl384</code>
316
+ * - Groestl-512: short name: <code>groestl512</code>
317
+ * - Hamsi family: file <code>sph_hamsi.h</code>
318
+ * - Hamsi-224: short name: <code>hamsi224</code>
319
+ * - Hamsi-256: short name: <code>hamsi256</code>
320
+ * - Hamsi-384: short name: <code>hamsi384</code>
321
+ * - Hamsi-512: short name: <code>hamsi512</code>
322
+ * - JH family: file <code>sph_jh.h</code>
323
+ * - JH-224: short name: <code>jh224</code>
324
+ * - JH-256: short name: <code>jh256</code>
325
+ * - JH-384: short name: <code>jh384</code>
326
+ * - JH-512: short name: <code>jh512</code>
327
+ * - Keccak family: file <code>sph_keccak.h</code>
328
+ * - Keccak-224: short name: <code>keccak224</code>
329
+ * - Keccak-256: short name: <code>keccak256</code>
330
+ * - Keccak-384: short name: <code>keccak384</code>
331
+ * - Keccak-512: short name: <code>keccak512</code>
332
+ * - Luffa family: file <code>sph_luffa.h</code>
333
+ * - Luffa-224: short name: <code>luffa224</code>
334
+ * - Luffa-256: short name: <code>luffa256</code>
335
+ * - Luffa-384: short name: <code>luffa384</code>
336
+ * - Luffa-512: short name: <code>luffa512</code>
337
+ * - Shabal family: file <code>sph_shabal.h</code>
338
+ * - Shabal-192: short name: <code>shabal192</code>
339
+ * - Shabal-224: short name: <code>shabal224</code>
340
+ * - Shabal-256: short name: <code>shabal256</code>
341
+ * - Shabal-384: short name: <code>shabal384</code>
342
+ * - Shabal-512: short name: <code>shabal512</code>
343
+ * - SHAvite-3 family: file <code>sph_shavite.h</code>
344
+ * - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"):
345
+ * short name: <code>shabal224</code>
346
+ * - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"):
347
+ * short name: <code>shabal256</code>
348
+ * - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"):
349
+ * short name: <code>shabal384</code>
350
+ * - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"):
351
+ * short name: <code>shabal512</code>
352
+ * - SIMD family: file <code>sph_simd.h</code>
353
+ * - SIMD-224: short name: <code>simd224</code>
354
+ * - SIMD-256: short name: <code>simd256</code>
355
+ * - SIMD-384: short name: <code>simd384</code>
356
+ * - SIMD-512: short name: <code>simd512</code>
357
+ * - Skein family: file <code>sph_skein.h</code>
358
+ * - Skein-224 (nominally specified as Skein-512-224): short name:
359
+ * <code>skein224</code> (64)
360
+ * - Skein-256 (nominally specified as Skein-512-256): short name:
361
+ * <code>skein256</code> (64)
362
+ * - Skein-384 (nominally specified as Skein-512-384): short name:
363
+ * <code>skein384</code> (64)
364
+ * - Skein-512 (nominally specified as Skein-512-512): short name:
365
+ * <code>skein512</code> (64)
366
+ *
367
+ * For the second-round SHA-3 candidates, the functions are as specified
368
+ * for round 2, i.e. with the "tweaks" that some candidates added
369
+ * between round 1 and round 2. Also, some of the submitted packages for
370
+ * round 2 contained errors, in the specification, reference code, or
371
+ * both. <code>sphlib</code> implements the corrected versions.
372
+ */
373
+
374
+ /** @hideinitializer
375
+ * Unsigned integer type whose length is at least 32 bits; on most
376
+ * architectures, it will have a width of exactly 32 bits. Unsigned C
377
+ * types implement arithmetics modulo a power of 2; use the
378
+ * <code>SPH_T32()</code> macro to ensure that the value is truncated
379
+ * to exactly 32 bits. Unless otherwise specified, all macros and
380
+ * functions which accept <code>sph_u32</code> values assume that these
381
+ * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures
382
+ * where <code>sph_u32</code> is larger than that.
383
+ */
384
+ typedef __arch_dependant__ sph_u32;
385
+
386
+ /** @hideinitializer
387
+ * Signed integer type corresponding to <code>sph_u32</code>; it has
388
+ * width 32 bits or more.
389
+ */
390
+ typedef __arch_dependant__ sph_s32;
391
+
392
+ /** @hideinitializer
393
+ * Unsigned integer type whose length is at least 64 bits; on most
394
+ * architectures which feature such a type, it will have a width of
395
+ * exactly 64 bits. C99-compliant platform will have this type; it
396
+ * is also defined when the GNU compiler (gcc) is used, and on
397
+ * platforms where <code>unsigned long</code> is large enough. If this
398
+ * type is not available, then some hash functions which depends on
399
+ * a 64-bit type will not be available (most notably SHA-384, SHA-512,
400
+ * Tiger and WHIRLPOOL).
401
+ */
402
+ typedef __arch_dependant__ sph_u64;
403
+
404
+ /** @hideinitializer
405
+ * Signed integer type corresponding to <code>sph_u64</code>; it has
406
+ * width 64 bits or more.
407
+ */
408
+ typedef __arch_dependant__ sph_s64;
409
+
410
+ /**
411
+ * This macro expands the token <code>x</code> into a suitable
412
+ * constant expression of type <code>sph_u32</code>. Depending on
413
+ * how this type is defined, a suffix such as <code>UL</code> may
414
+ * be appended to the argument.
415
+ *
416
+ * @param x the token to expand into a suitable constant expression
417
+ */
418
+ #define SPH_C32(x)
419
+
420
+ /**
421
+ * Truncate a 32-bit value to exactly 32 bits. On most systems, this is
422
+ * a no-op, recognized as such by the compiler.
423
+ *
424
+ * @param x the value to truncate (of type <code>sph_u32</code>)
425
+ */
426
+ #define SPH_T32(x)
427
+
428
+ /**
429
+ * Rotate a 32-bit value by a number of bits to the left. The rotate
430
+ * count must reside between 1 and 31. This macro assumes that its
431
+ * first argument fits in 32 bits (no extra bit allowed on machines where
432
+ * <code>sph_u32</code> is wider); both arguments may be evaluated
433
+ * several times.
434
+ *
435
+ * @param x the value to rotate (of type <code>sph_u32</code>)
436
+ * @param n the rotation count (between 1 and 31, inclusive)
437
+ */
438
+ #define SPH_ROTL32(x, n)
439
+
440
+ /**
441
+ * Rotate a 32-bit value by a number of bits to the left. The rotate
442
+ * count must reside between 1 and 31. This macro assumes that its
443
+ * first argument fits in 32 bits (no extra bit allowed on machines where
444
+ * <code>sph_u32</code> is wider); both arguments may be evaluated
445
+ * several times.
446
+ *
447
+ * @param x the value to rotate (of type <code>sph_u32</code>)
448
+ * @param n the rotation count (between 1 and 31, inclusive)
449
+ */
450
+ #define SPH_ROTR32(x, n)
451
+
452
+ /**
453
+ * This macro is defined on systems for which a 64-bit type has been
454
+ * detected, and is used for <code>sph_u64</code>.
455
+ */
456
+ #define SPH_64
457
+
458
+ /**
459
+ * This macro is defined on systems for the "native" integer size is
460
+ * 64 bits (64-bit values fit in one register).
461
+ */
462
+ #define SPH_64_TRUE
463
+
464
+ /**
465
+ * This macro expands the token <code>x</code> into a suitable
466
+ * constant expression of type <code>sph_u64</code>. Depending on
467
+ * how this type is defined, a suffix such as <code>ULL</code> may
468
+ * be appended to the argument. This macro is defined only if a
469
+ * 64-bit type was detected and used for <code>sph_u64</code>.
470
+ *
471
+ * @param x the token to expand into a suitable constant expression
472
+ */
473
+ #define SPH_C64(x)
474
+
475
+ /**
476
+ * Truncate a 64-bit value to exactly 64 bits. On most systems, this is
477
+ * a no-op, recognized as such by the compiler. This macro is defined only
478
+ * if a 64-bit type was detected and used for <code>sph_u64</code>.
479
+ *
480
+ * @param x the value to truncate (of type <code>sph_u64</code>)
481
+ */
482
+ #define SPH_T64(x)
483
+
484
+ /**
485
+ * Rotate a 64-bit value by a number of bits to the left. The rotate
486
+ * count must reside between 1 and 63. This macro assumes that its
487
+ * first argument fits in 64 bits (no extra bit allowed on machines where
488
+ * <code>sph_u64</code> is wider); both arguments may be evaluated
489
+ * several times. This macro is defined only if a 64-bit type was detected
490
+ * and used for <code>sph_u64</code>.
491
+ *
492
+ * @param x the value to rotate (of type <code>sph_u64</code>)
493
+ * @param n the rotation count (between 1 and 63, inclusive)
494
+ */
495
+ #define SPH_ROTL64(x, n)
496
+
497
+ /**
498
+ * Rotate a 64-bit value by a number of bits to the left. The rotate
499
+ * count must reside between 1 and 63. This macro assumes that its
500
+ * first argument fits in 64 bits (no extra bit allowed on machines where
501
+ * <code>sph_u64</code> is wider); both arguments may be evaluated
502
+ * several times. This macro is defined only if a 64-bit type was detected
503
+ * and used for <code>sph_u64</code>.
504
+ *
505
+ * @param x the value to rotate (of type <code>sph_u64</code>)
506
+ * @param n the rotation count (between 1 and 63, inclusive)
507
+ */
508
+ #define SPH_ROTR64(x, n)
509
+
510
+ /**
511
+ * This macro evaluates to <code>inline</code> or an equivalent construction,
512
+ * if available on the compilation platform, or to nothing otherwise. This
513
+ * is used to declare inline functions, for which the compiler should
514
+ * endeavour to include the code directly in the caller. Inline functions
515
+ * are typically defined in header files as replacement for macros.
516
+ */
517
+ #define SPH_INLINE
518
+
519
+ /**
520
+ * This macro is defined if the platform has been detected as using
521
+ * little-endian convention. This implies that the <code>sph_u32</code>
522
+ * type (and the <code>sph_u64</code> type also, if it is defined) has
523
+ * an exact width (i.e. exactly 32-bit, respectively 64-bit).
524
+ */
525
+ #define SPH_LITTLE_ENDIAN
526
+
527
+ /**
528
+ * This macro is defined if the platform has been detected as using
529
+ * big-endian convention. This implies that the <code>sph_u32</code>
530
+ * type (and the <code>sph_u64</code> type also, if it is defined) has
531
+ * an exact width (i.e. exactly 32-bit, respectively 64-bit).
532
+ */
533
+ #define SPH_BIG_ENDIAN
534
+
535
+ /**
536
+ * This macro is defined if 32-bit words (and 64-bit words, if defined)
537
+ * can be read from and written to memory efficiently in little-endian
538
+ * convention. This is the case for little-endian platforms, and also
539
+ * for the big-endian platforms which have special little-endian access
540
+ * opcodes (e.g. Ultrasparc).
541
+ */
542
+ #define SPH_LITTLE_FAST
543
+
544
+ /**
545
+ * This macro is defined if 32-bit words (and 64-bit words, if defined)
546
+ * can be read from and written to memory efficiently in big-endian
547
+ * convention. This is the case for little-endian platforms, and also
548
+ * for the little-endian platforms which have special big-endian access
549
+ * opcodes.
550
+ */
551
+ #define SPH_BIG_FAST
552
+
553
+ /**
554
+ * On some platforms, this macro is defined to an unsigned integer type
555
+ * into which pointer values may be cast. The resulting value can then
556
+ * be tested for being a multiple of 2, 4 or 8, indicating an aligned
557
+ * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses.
558
+ */
559
+ #define SPH_UPTR
560
+
561
+ /**
562
+ * When defined, this macro indicates that unaligned memory accesses
563
+ * are possible with only a minor penalty, and thus should be prefered
564
+ * over strategies which first copy data to an aligned buffer.
565
+ */
566
+ #define SPH_UNALIGNED
567
+
568
+ /**
569
+ * Byte-swap a 32-bit word (i.e. <code>0x12345678</code> becomes
570
+ * <code>0x78563412</code>). This is an inline function which resorts
571
+ * to inline assembly on some platforms, for better performance.
572
+ *
573
+ * @param x the 32-bit value to byte-swap
574
+ * @return the byte-swapped value
575
+ */
576
+ static inline sph_u32 sph_bswap32(sph_u32 x);
577
+
578
+ /**
579
+ * Byte-swap a 64-bit word. This is an inline function which resorts
580
+ * to inline assembly on some platforms, for better performance. This
581
+ * function is defined only if a suitable 64-bit type was found for
582
+ * <code>sph_u64</code>
583
+ *
584
+ * @param x the 64-bit value to byte-swap
585
+ * @return the byte-swapped value
586
+ */
587
+ static inline sph_u64 sph_bswap64(sph_u64 x);
588
+
589
+ /**
590
+ * Decode a 16-bit unsigned value from memory, in little-endian convention
591
+ * (least significant byte comes first).
592
+ *
593
+ * @param src the source address
594
+ * @return the decoded value
595
+ */
596
+ static inline unsigned sph_dec16le(const void *src);
597
+
598
+ /**
599
+ * Encode a 16-bit unsigned value into memory, in little-endian convention
600
+ * (least significant byte comes first).
601
+ *
602
+ * @param dst the destination buffer
603
+ * @param val the value to encode
604
+ */
605
+ static inline void sph_enc16le(void *dst, unsigned val);
606
+
607
+ /**
608
+ * Decode a 16-bit unsigned value from memory, in big-endian convention
609
+ * (most significant byte comes first).
610
+ *
611
+ * @param src the source address
612
+ * @return the decoded value
613
+ */
614
+ static inline unsigned sph_dec16be(const void *src);
615
+
616
+ /**
617
+ * Encode a 16-bit unsigned value into memory, in big-endian convention
618
+ * (most significant byte comes first).
619
+ *
620
+ * @param dst the destination buffer
621
+ * @param val the value to encode
622
+ */
623
+ static inline void sph_enc16be(void *dst, unsigned val);
624
+
625
+ /**
626
+ * Decode a 32-bit unsigned value from memory, in little-endian convention
627
+ * (least significant byte comes first).
628
+ *
629
+ * @param src the source address
630
+ * @return the decoded value
631
+ */
632
+ static inline sph_u32 sph_dec32le(const void *src);
633
+
634
+ /**
635
+ * Decode a 32-bit unsigned value from memory, in little-endian convention
636
+ * (least significant byte comes first). This function assumes that the
637
+ * source address is suitably aligned for a direct access, if the platform
638
+ * supports such things; it can thus be marginally faster than the generic
639
+ * <code>sph_dec32le()</code> function.
640
+ *
641
+ * @param src the source address
642
+ * @return the decoded value
643
+ */
644
+ static inline sph_u32 sph_dec32le_aligned(const void *src);
645
+
646
+ /**
647
+ * Encode a 32-bit unsigned value into memory, in little-endian convention
648
+ * (least significant byte comes first).
649
+ *
650
+ * @param dst the destination buffer
651
+ * @param val the value to encode
652
+ */
653
+ static inline void sph_enc32le(void *dst, sph_u32 val);
654
+
655
+ /**
656
+ * Encode a 32-bit unsigned value into memory, in little-endian convention
657
+ * (least significant byte comes first). This function assumes that the
658
+ * destination address is suitably aligned for a direct access, if the
659
+ * platform supports such things; it can thus be marginally faster than
660
+ * the generic <code>sph_enc32le()</code> function.
661
+ *
662
+ * @param dst the destination buffer
663
+ * @param val the value to encode
664
+ */
665
+ static inline void sph_enc32le_aligned(void *dst, sph_u32 val);
666
+
667
+ /**
668
+ * Decode a 32-bit unsigned value from memory, in big-endian convention
669
+ * (most significant byte comes first).
670
+ *
671
+ * @param src the source address
672
+ * @return the decoded value
673
+ */
674
+ static inline sph_u32 sph_dec32be(const void *src);
675
+
676
+ /**
677
+ * Decode a 32-bit unsigned value from memory, in big-endian convention
678
+ * (most significant byte comes first). This function assumes that the
679
+ * source address is suitably aligned for a direct access, if the platform
680
+ * supports such things; it can thus be marginally faster than the generic
681
+ * <code>sph_dec32be()</code> function.
682
+ *
683
+ * @param src the source address
684
+ * @return the decoded value
685
+ */
686
+ static inline sph_u32 sph_dec32be_aligned(const void *src);
687
+
688
+ /**
689
+ * Encode a 32-bit unsigned value into memory, in big-endian convention
690
+ * (most significant byte comes first).
691
+ *
692
+ * @param dst the destination buffer
693
+ * @param val the value to encode
694
+ */
695
+ static inline void sph_enc32be(void *dst, sph_u32 val);
696
+
697
+ /**
698
+ * Encode a 32-bit unsigned value into memory, in big-endian convention
699
+ * (most significant byte comes first). This function assumes that the
700
+ * destination address is suitably aligned for a direct access, if the
701
+ * platform supports such things; it can thus be marginally faster than
702
+ * the generic <code>sph_enc32be()</code> function.
703
+ *
704
+ * @param dst the destination buffer
705
+ * @param val the value to encode
706
+ */
707
+ static inline void sph_enc32be_aligned(void *dst, sph_u32 val);
708
+
709
+ /**
710
+ * Decode a 64-bit unsigned value from memory, in little-endian convention
711
+ * (least significant byte comes first). This function is defined only
712
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
713
+ *
714
+ * @param src the source address
715
+ * @return the decoded value
716
+ */
717
+ static inline sph_u64 sph_dec64le(const void *src);
718
+
719
+ /**
720
+ * Decode a 64-bit unsigned value from memory, in little-endian convention
721
+ * (least significant byte comes first). This function assumes that the
722
+ * source address is suitably aligned for a direct access, if the platform
723
+ * supports such things; it can thus be marginally faster than the generic
724
+ * <code>sph_dec64le()</code> function. This function is defined only
725
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
726
+ *
727
+ * @param src the source address
728
+ * @return the decoded value
729
+ */
730
+ static inline sph_u64 sph_dec64le_aligned(const void *src);
731
+
732
+ /**
733
+ * Encode a 64-bit unsigned value into memory, in little-endian convention
734
+ * (least significant byte comes first). This function is defined only
735
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
736
+ *
737
+ * @param dst the destination buffer
738
+ * @param val the value to encode
739
+ */
740
+ static inline void sph_enc64le(void *dst, sph_u64 val);
741
+
742
+ /**
743
+ * Encode a 64-bit unsigned value into memory, in little-endian convention
744
+ * (least significant byte comes first). This function assumes that the
745
+ * destination address is suitably aligned for a direct access, if the
746
+ * platform supports such things; it can thus be marginally faster than
747
+ * the generic <code>sph_enc64le()</code> function. This function is defined
748
+ * only if a suitable 64-bit type was detected and used for
749
+ * <code>sph_u64</code>.
750
+ *
751
+ * @param dst the destination buffer
752
+ * @param val the value to encode
753
+ */
754
+ static inline void sph_enc64le_aligned(void *dst, sph_u64 val);
755
+
756
+ /**
757
+ * Decode a 64-bit unsigned value from memory, in big-endian convention
758
+ * (most significant byte comes first). This function is defined only
759
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
760
+ *
761
+ * @param src the source address
762
+ * @return the decoded value
763
+ */
764
+ static inline sph_u64 sph_dec64be(const void *src);
765
+
766
+ /**
767
+ * Decode a 64-bit unsigned value from memory, in big-endian convention
768
+ * (most significant byte comes first). This function assumes that the
769
+ * source address is suitably aligned for a direct access, if the platform
770
+ * supports such things; it can thus be marginally faster than the generic
771
+ * <code>sph_dec64be()</code> function. This function is defined only
772
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
773
+ *
774
+ * @param src the source address
775
+ * @return the decoded value
776
+ */
777
+ static inline sph_u64 sph_dec64be_aligned(const void *src);
778
+
779
+ /**
780
+ * Encode a 64-bit unsigned value into memory, in big-endian convention
781
+ * (most significant byte comes first). This function is defined only
782
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
783
+ *
784
+ * @param dst the destination buffer
785
+ * @param val the value to encode
786
+ */
787
+ static inline void sph_enc64be(void *dst, sph_u64 val);
788
+
789
+ /**
790
+ * Encode a 64-bit unsigned value into memory, in big-endian convention
791
+ * (most significant byte comes first). This function assumes that the
792
+ * destination address is suitably aligned for a direct access, if the
793
+ * platform supports such things; it can thus be marginally faster than
794
+ * the generic <code>sph_enc64be()</code> function. This function is defined
795
+ * only if a suitable 64-bit type was detected and used for
796
+ * <code>sph_u64</code>.
797
+ *
798
+ * @param dst the destination buffer
799
+ * @param val the value to encode
800
+ */
801
+ static inline void sph_enc64be_aligned(void *dst, sph_u64 val);
802
+
803
+ #endif
804
+
805
+ /* ============== END documentation block for Doxygen ============= */
806
+
807
+ #ifndef DOXYGEN_IGNORE
808
+
809
+ /*
810
+ * We want to define the types "sph_u32" and "sph_u64" which hold
811
+ * unsigned values of at least, respectively, 32 and 64 bits. These
812
+ * tests should select appropriate types for most platforms. The
813
+ * macro "SPH_64" is defined if the 64-bit is supported.
814
+ */
815
+
816
+ #undef SPH_64
817
+ #undef SPH_64_TRUE
818
+
819
+ #if defined __STDC__ && __STDC_VERSION__ >= 199901L
820
+
821
+ /*
822
+ * On C99 implementations, we can use <stdint.h> to get an exact 64-bit
823
+ * type, if any, or otherwise use a wider type (which must exist, for
824
+ * C99 conformance).
825
+ */
826
+
827
+ #include <stdint.h>
828
+
829
+ #ifdef UINT32_MAX
830
+ typedef uint32_t sph_u32;
831
+ typedef int32_t sph_s32;
832
+ #else
833
+ typedef uint_fast32_t sph_u32;
834
+ typedef int_fast32_t sph_s32;
835
+ #endif
836
+ #if !SPH_NO_64
837
+ #ifdef UINT64_MAX
838
+ typedef uint64_t sph_u64;
839
+ typedef int64_t sph_s64;
840
+ #else
841
+ typedef uint_fast64_t sph_u64;
842
+ typedef int_fast64_t sph_s64;
843
+ #endif
844
+ #endif
845
+
846
+ #define SPH_C32(x) ((sph_u32)(x))
847
+ #if !SPH_NO_64
848
+ #define SPH_C64(x) ((sph_u64)(x))
849
+ #define SPH_64 1
850
+ #endif
851
+
852
+ #else
853
+
854
+ /*
855
+ * On non-C99 systems, we use "unsigned int" if it is wide enough,
856
+ * "unsigned long" otherwise. This supports all "reasonable" architectures.
857
+ * We have to be cautious: pre-C99 preprocessors handle constants
858
+ * differently in '#if' expressions. Hence the shifts to test UINT_MAX.
859
+ */
860
+
861
+ #if ((UINT_MAX >> 11) >> 11) >= 0x3FF
862
+
863
+ typedef unsigned int sph_u32;
864
+ typedef int sph_s32;
865
+
866
+ #define SPH_C32(x) ((sph_u32)(x ## U))
867
+
868
+ #else
869
+
870
+ typedef unsigned long sph_u32;
871
+ typedef long sph_s32;
872
+
873
+ #define SPH_C32(x) ((sph_u32)(x ## UL))
874
+
875
+ #endif
876
+
877
+ #if !SPH_NO_64
878
+
879
+ /*
880
+ * We want a 64-bit type. We use "unsigned long" if it is wide enough (as
881
+ * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9),
882
+ * "unsigned long long" otherwise, if available. We use ULLONG_MAX to
883
+ * test whether "unsigned long long" is available; we also know that
884
+ * gcc features this type, even if the libc header do not know it.
885
+ */
886
+
887
+ #if ((ULONG_MAX >> 31) >> 31) >= 3
888
+
889
+ typedef unsigned long sph_u64;
890
+ typedef long sph_s64;
891
+
892
+ #define SPH_C64(x) ((sph_u64)(x ## UL))
893
+
894
+ #define SPH_64 1
895
+
896
+ #elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__
897
+
898
+ typedef unsigned long long sph_u64;
899
+ typedef long long sph_s64;
900
+
901
+ #define SPH_C64(x) ((sph_u64)(x ## ULL))
902
+
903
+ #define SPH_64 1
904
+
905
+ #else
906
+
907
+ /*
908
+ * No 64-bit type...
909
+ */
910
+
911
+ #endif
912
+
913
+ #endif
914
+
915
+ #endif
916
+
917
+ /*
918
+ * If the "unsigned long" type has length 64 bits or more, then this is
919
+ * a "true" 64-bit architectures. This is also true with Visual C on
920
+ * amd64, even though the "long" type is limited to 32 bits.
921
+ */
922
+ #if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64)
923
+ #define SPH_64_TRUE 1
924
+ #endif
925
+
926
+ /*
927
+ * Implementation note: some processors have specific opcodes to perform
928
+ * a rotation. Recent versions of gcc recognize the expression above and
929
+ * use the relevant opcodes, when appropriate.
930
+ */
931
+
932
+ #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
933
+ #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
934
+ #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
935
+
936
+ #if SPH_64
937
+
938
+ #define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
939
+ #define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
940
+ #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
941
+
942
+ #endif
943
+
944
+ #ifndef DOXYGEN_IGNORE
945
+ /*
946
+ * Define SPH_INLINE to be an "inline" qualifier, if available. We define
947
+ * some small macro-like functions which benefit greatly from being inlined.
948
+ */
949
+ #if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__
950
+ #define SPH_INLINE inline
951
+ #elif defined _MSC_VER
952
+ #define SPH_INLINE __inline
953
+ #else
954
+ #define SPH_INLINE
955
+ #endif
956
+ #endif
957
+
958
+ /*
959
+ * We define some macros which qualify the architecture. These macros
960
+ * may be explicit set externally (e.g. as compiler parameters). The
961
+ * code below sets those macros if they are not already defined.
962
+ *
963
+ * Most macros are boolean, thus evaluate to either zero or non-zero.
964
+ * The SPH_UPTR macro is special, in that it evaluates to a C type,
965
+ * or is not defined.
966
+ *
967
+ * SPH_UPTR if defined: unsigned type to cast pointers into
968
+ *
969
+ * SPH_UNALIGNED non-zero if unaligned accesses are efficient
970
+ * SPH_LITTLE_ENDIAN non-zero if architecture is known to be little-endian
971
+ * SPH_BIG_ENDIAN non-zero if architecture is known to be big-endian
972
+ * SPH_LITTLE_FAST non-zero if little-endian decoding is fast
973
+ * SPH_BIG_FAST non-zero if big-endian decoding is fast
974
+ *
975
+ * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit
976
+ * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN
977
+ * _must_ be non-zero in those situations. The 32-bit and 64-bit types
978
+ * _must_ also have an exact width.
979
+ *
980
+ * SPH_SPARCV9_GCC_32 UltraSPARC-compatible with gcc, 32-bit mode
981
+ * SPH_SPARCV9_GCC_64 UltraSPARC-compatible with gcc, 64-bit mode
982
+ * SPH_SPARCV9_GCC UltraSPARC-compatible with gcc
983
+ * SPH_I386_GCC x86-compatible (32-bit) with gcc
984
+ * SPH_I386_MSVC x86-compatible (32-bit) with Microsoft Visual C
985
+ * SPH_AMD64_GCC x86-compatible (64-bit) with gcc
986
+ * SPH_AMD64_MSVC x86-compatible (64-bit) with Microsoft Visual C
987
+ * SPH_PPC32_GCC PowerPC, 32-bit, with gcc
988
+ * SPH_PPC64_GCC PowerPC, 64-bit, with gcc
989
+ *
990
+ * TODO: enhance automatic detection, for more architectures and compilers.
991
+ * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with
992
+ * some very fast functions (e.g. MD4) when using unaligned input data.
993
+ * The CPU-specific-with-GCC macros are useful only for inline assembly,
994
+ * normally restrained to this header file.
995
+ */
996
+
997
+ /*
998
+ * 32-bit x86, aka "i386 compatible".
999
+ */
1000
+ #if defined __i386__ || defined _M_IX86
1001
+
1002
+ #define SPH_DETECT_UNALIGNED 1
1003
+ #define SPH_DETECT_LITTLE_ENDIAN 1
1004
+ #define SPH_DETECT_UPTR sph_u32
1005
+ #ifdef __GNUC__
1006
+ #define SPH_DETECT_I386_GCC 1
1007
+ #endif
1008
+ #ifdef _MSC_VER
1009
+ #define SPH_DETECT_I386_MSVC 1
1010
+ #endif
1011
+
1012
+ /*
1013
+ * 64-bit x86, hereafter known as "amd64".
1014
+ */
1015
+ #elif defined __x86_64 || defined _M_X64
1016
+
1017
+ #define SPH_DETECT_UNALIGNED 1
1018
+ #define SPH_DETECT_LITTLE_ENDIAN 1
1019
+ #define SPH_DETECT_UPTR sph_u64
1020
+ #ifdef __GNUC__
1021
+ #define SPH_DETECT_AMD64_GCC 1
1022
+ #endif
1023
+ #ifdef _MSC_VER
1024
+ #define SPH_DETECT_AMD64_MSVC 1
1025
+ #endif
1026
+
1027
+ /*
1028
+ * 64-bit Sparc architecture (implies v9).
1029
+ */
1030
+ #elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \
1031
+ || defined __sparcv9
1032
+
1033
+ #define SPH_DETECT_BIG_ENDIAN 1
1034
+ #define SPH_DETECT_UPTR sph_u64
1035
+ #ifdef __GNUC__
1036
+ #define SPH_DETECT_SPARCV9_GCC_64 1
1037
+ #define SPH_DETECT_LITTLE_FAST 1
1038
+ #endif
1039
+
1040
+ /*
1041
+ * 32-bit Sparc.
1042
+ */
1043
+ #elif (defined __sparc__ || defined __sparc) \
1044
+ && !(defined __sparcv9 || defined __arch64__)
1045
+
1046
+ #define SPH_DETECT_BIG_ENDIAN 1
1047
+ #define SPH_DETECT_UPTR sph_u32
1048
+ #if defined __GNUC__ && defined __sparc_v9__
1049
+ #define SPH_DETECT_SPARCV9_GCC_32 1
1050
+ #define SPH_DETECT_LITTLE_FAST 1
1051
+ #endif
1052
+
1053
+ /*
1054
+ * ARM, little-endian.
1055
+ */
1056
+ #elif defined __arm__ && __ARMEL__
1057
+
1058
+ #define SPH_DETECT_LITTLE_ENDIAN 1
1059
+
1060
+ /*
1061
+ * MIPS, little-endian.
1062
+ */
1063
+ #elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__
1064
+
1065
+ #define SPH_DETECT_LITTLE_ENDIAN 1
1066
+
1067
+ /*
1068
+ * MIPS, big-endian.
1069
+ */
1070
+ #elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__
1071
+
1072
+ #define SPH_DETECT_BIG_ENDIAN 1
1073
+
1074
+ /*
1075
+ * PowerPC.
1076
+ */
1077
+ #elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \
1078
+ || defined _ARCH_PPC
1079
+
1080
+ /*
1081
+ * Note: we do not declare cross-endian access to be "fast": even if
1082
+ * using inline assembly, implementation should still assume that
1083
+ * keeping the decoded word in a temporary is faster than decoding
1084
+ * it again.
1085
+ */
1086
+ #if defined __GNUC__
1087
+ #if SPH_64_TRUE
1088
+ #define SPH_DETECT_PPC64_GCC 1
1089
+ #else
1090
+ #define SPH_DETECT_PPC32_GCC 1
1091
+ #endif
1092
+ #endif
1093
+
1094
+ #if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
1095
+ #define SPH_DETECT_BIG_ENDIAN 1
1096
+ #elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
1097
+ #define SPH_DETECT_LITTLE_ENDIAN 1
1098
+ #endif
1099
+
1100
+ /*
1101
+ * Itanium, 64-bit.
1102
+ */
1103
+ #elif defined __ia64 || defined __ia64__ \
1104
+ || defined __itanium__ || defined _M_IA64
1105
+
1106
+ #if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
1107
+ #define SPH_DETECT_BIG_ENDIAN 1
1108
+ #else
1109
+ #define SPH_DETECT_LITTLE_ENDIAN 1
1110
+ #endif
1111
+ #if defined __LP64__ || defined _LP64
1112
+ #define SPH_DETECT_UPTR sph_u64
1113
+ #else
1114
+ #define SPH_DETECT_UPTR sph_u32
1115
+ #endif
1116
+
1117
+ #endif
1118
+
1119
+ #if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64
1120
+ #define SPH_DETECT_SPARCV9_GCC 1
1121
+ #endif
1122
+
1123
+ #if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED
1124
+ #define SPH_UNALIGNED SPH_DETECT_UNALIGNED
1125
+ #endif
1126
+ #if defined SPH_DETECT_UPTR && !defined SPH_UPTR
1127
+ #define SPH_UPTR SPH_DETECT_UPTR
1128
+ #endif
1129
+ #if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN
1130
+ #define SPH_LITTLE_ENDIAN SPH_DETECT_LITTLE_ENDIAN
1131
+ #endif
1132
+ #if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN
1133
+ #define SPH_BIG_ENDIAN SPH_DETECT_BIG_ENDIAN
1134
+ #endif
1135
+ #if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST
1136
+ #define SPH_LITTLE_FAST SPH_DETECT_LITTLE_FAST
1137
+ #endif
1138
+ #if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST
1139
+ #define SPH_BIG_FAST SPH_DETECT_BIG_FAST
1140
+ #endif
1141
+ #if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32
1142
+ #define SPH_SPARCV9_GCC_32 SPH_DETECT_SPARCV9_GCC_32
1143
+ #endif
1144
+ #if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64
1145
+ #define SPH_SPARCV9_GCC_64 SPH_DETECT_SPARCV9_GCC_64
1146
+ #endif
1147
+ #if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC
1148
+ #define SPH_SPARCV9_GCC SPH_DETECT_SPARCV9_GCC
1149
+ #endif
1150
+ #if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC
1151
+ #define SPH_I386_GCC SPH_DETECT_I386_GCC
1152
+ #endif
1153
+ #if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC
1154
+ #define SPH_I386_MSVC SPH_DETECT_I386_MSVC
1155
+ #endif
1156
+ #if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC
1157
+ #define SPH_AMD64_GCC SPH_DETECT_AMD64_GCC
1158
+ #endif
1159
+ #if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC
1160
+ #define SPH_AMD64_MSVC SPH_DETECT_AMD64_MSVC
1161
+ #endif
1162
+ #if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC
1163
+ #define SPH_PPC32_GCC SPH_DETECT_PPC32_GCC
1164
+ #endif
1165
+ #if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC
1166
+ #define SPH_PPC64_GCC SPH_DETECT_PPC64_GCC
1167
+ #endif
1168
+
1169
+ #if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST
1170
+ #define SPH_LITTLE_FAST 1
1171
+ #endif
1172
+ #if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST
1173
+ #define SPH_BIG_FAST 1
1174
+ #endif
1175
+
1176
+ #if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN)
1177
+ #error SPH_UPTR defined, but endianness is not known.
1178
+ #endif
1179
+
1180
+ #if SPH_I386_GCC && !SPH_NO_ASM
1181
+
1182
+ /*
1183
+ * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
1184
+ * values.
1185
+ */
1186
+
1187
+ static SPH_INLINE sph_u32
1188
+ sph_bswap32(sph_u32 x)
1189
+ {
1190
+ __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
1191
+ return x;
1192
+ }
1193
+
1194
+ #if SPH_64
1195
+
1196
+ static SPH_INLINE sph_u64
1197
+ sph_bswap64(sph_u64 x)
1198
+ {
1199
+ return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
1200
+ | (sph_u64)sph_bswap32((sph_u32)(x >> 32));
1201
+ }
1202
+
1203
+ #endif
1204
+
1205
+ #elif SPH_AMD64_GCC && !SPH_NO_ASM
1206
+
1207
+ /*
1208
+ * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
1209
+ * and 64-bit values.
1210
+ */
1211
+
1212
+ static SPH_INLINE sph_u32
1213
+ sph_bswap32(sph_u32 x)
1214
+ {
1215
+ __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
1216
+ return x;
1217
+ }
1218
+
1219
+ #if SPH_64
1220
+
1221
+ static SPH_INLINE sph_u64
1222
+ sph_bswap64(sph_u64 x)
1223
+ {
1224
+ __asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x));
1225
+ return x;
1226
+ }
1227
+
1228
+ #endif
1229
+
1230
+ /*
1231
+ * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough
1232
+ * to generate proper opcodes for endianness swapping with the pure C
1233
+ * implementation below.
1234
+ *
1235
+
1236
+ #elif SPH_I386_MSVC && !SPH_NO_ASM
1237
+
1238
+ static __inline sph_u32 __declspec(naked) __fastcall
1239
+ sph_bswap32(sph_u32 x)
1240
+ {
1241
+ __asm {
1242
+ bswap ecx
1243
+ mov eax,ecx
1244
+ ret
1245
+ }
1246
+ }
1247
+
1248
+ #if SPH_64
1249
+
1250
+ static SPH_INLINE sph_u64
1251
+ sph_bswap64(sph_u64 x)
1252
+ {
1253
+ return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
1254
+ | (sph_u64)sph_bswap32((sph_u32)(x >> 32));
1255
+ }
1256
+
1257
+ #endif
1258
+
1259
+ *
1260
+ * [end of disabled code]
1261
+ */
1262
+
1263
+ #else
1264
+
1265
+ static SPH_INLINE sph_u32
1266
+ sph_bswap32(sph_u32 x)
1267
+ {
1268
+ x = SPH_T32((x << 16) | (x >> 16));
1269
+ x = ((x & SPH_C32(0xFF00FF00)) >> 8)
1270
+ | ((x & SPH_C32(0x00FF00FF)) << 8);
1271
+ return x;
1272
+ }
1273
+
1274
+ #if SPH_64
1275
+
1276
+ /**
1277
+ * Byte-swap a 64-bit value.
1278
+ *
1279
+ * @param x the input value
1280
+ * @return the byte-swapped value
1281
+ */
1282
+ static SPH_INLINE sph_u64
1283
+ sph_bswap64(sph_u64 x)
1284
+ {
1285
+ x = SPH_T64((x << 32) | (x >> 32));
1286
+ x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16)
1287
+ | ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16);
1288
+ x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8)
1289
+ | ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8);
1290
+ return x;
1291
+ }
1292
+
1293
+ #endif
1294
+
1295
+ #endif
1296
+
1297
+ #if SPH_SPARCV9_GCC && !SPH_NO_ASM
1298
+
1299
+ /*
1300
+ * On UltraSPARC systems, native ordering is big-endian, but it is
1301
+ * possible to perform little-endian read accesses by specifying the
1302
+ * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use
1303
+ * the opcode "lda [%reg]0x88,%dst", where %reg is the register which
1304
+ * contains the source address and %dst is the destination register,
1305
+ * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register
1306
+ * to get the address space name. The latter format is better since it
1307
+ * combines an addition and the actual access in a single opcode; but
1308
+ * it requires the setting (and subsequent resetting) of %asi, which is
1309
+ * slow. Some operations (i.e. MD5 compression function) combine many
1310
+ * successive little-endian read accesses, which may share the same
1311
+ * %asi setting. The macros below contain the appropriate inline
1312
+ * assembly.
1313
+ */
1314
+
1315
+ #define SPH_SPARCV9_SET_ASI \
1316
+ sph_u32 sph_sparcv9_asi; \
1317
+ __asm__ __volatile__ ( \
1318
+ "rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi));
1319
+
1320
+ #define SPH_SPARCV9_RESET_ASI \
1321
+ __asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi));
1322
+
1323
+ #define SPH_SPARCV9_DEC32LE(base, idx) ({ \
1324
+ sph_u32 sph_sparcv9_tmp; \
1325
+ __asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \
1326
+ : "=r" (sph_sparcv9_tmp) : "r" (base)); \
1327
+ sph_sparcv9_tmp; \
1328
+ })
1329
+
1330
+ #endif
1331
+
1332
+ static SPH_INLINE void
1333
+ sph_enc16be(void *dst, unsigned val)
1334
+ {
1335
+ ((unsigned char *)dst)[0] = (val >> 8);
1336
+ ((unsigned char *)dst)[1] = val;
1337
+ }
1338
+
1339
+ static SPH_INLINE unsigned
1340
+ sph_dec16be(const void *src)
1341
+ {
1342
+ return ((unsigned)(((const unsigned char *)src)[0]) << 8)
1343
+ | (unsigned)(((const unsigned char *)src)[1]);
1344
+ }
1345
+
1346
+ static SPH_INLINE void
1347
+ sph_enc16le(void *dst, unsigned val)
1348
+ {
1349
+ ((unsigned char *)dst)[0] = val;
1350
+ ((unsigned char *)dst)[1] = val >> 8;
1351
+ }
1352
+
1353
+ static SPH_INLINE unsigned
1354
+ sph_dec16le(const void *src)
1355
+ {
1356
+ return (unsigned)(((const unsigned char *)src)[0])
1357
+ | ((unsigned)(((const unsigned char *)src)[1]) << 8);
1358
+ }
1359
+
1360
+ /**
1361
+ * Encode a 32-bit value into the provided buffer (big endian convention).
1362
+ *
1363
+ * @param dst the destination buffer
1364
+ * @param val the 32-bit value to encode
1365
+ */
1366
+ static SPH_INLINE void
1367
+ sph_enc32be(void *dst, sph_u32 val)
1368
+ {
1369
+ #if defined SPH_UPTR
1370
+ #if SPH_UNALIGNED
1371
+ #if SPH_LITTLE_ENDIAN
1372
+ val = sph_bswap32(val);
1373
+ #endif
1374
+ *(sph_u32 *)dst = val;
1375
+ #else
1376
+ if (((SPH_UPTR)dst & 3) == 0) {
1377
+ #if SPH_LITTLE_ENDIAN
1378
+ val = sph_bswap32(val);
1379
+ #endif
1380
+ *(sph_u32 *)dst = val;
1381
+ } else {
1382
+ ((unsigned char *)dst)[0] = (val >> 24);
1383
+ ((unsigned char *)dst)[1] = (val >> 16);
1384
+ ((unsigned char *)dst)[2] = (val >> 8);
1385
+ ((unsigned char *)dst)[3] = val;
1386
+ }
1387
+ #endif
1388
+ #else
1389
+ ((unsigned char *)dst)[0] = (val >> 24);
1390
+ ((unsigned char *)dst)[1] = (val >> 16);
1391
+ ((unsigned char *)dst)[2] = (val >> 8);
1392
+ ((unsigned char *)dst)[3] = val;
1393
+ #endif
1394
+ }
1395
+
1396
+ /**
1397
+ * Encode a 32-bit value into the provided buffer (big endian convention).
1398
+ * The destination buffer must be properly aligned.
1399
+ *
1400
+ * @param dst the destination buffer (32-bit aligned)
1401
+ * @param val the value to encode
1402
+ */
1403
+ static SPH_INLINE void
1404
+ sph_enc32be_aligned(void *dst, sph_u32 val)
1405
+ {
1406
+ #if SPH_LITTLE_ENDIAN
1407
+ *(sph_u32 *)dst = sph_bswap32(val);
1408
+ #elif SPH_BIG_ENDIAN
1409
+ *(sph_u32 *)dst = val;
1410
+ #else
1411
+ ((unsigned char *)dst)[0] = (val >> 24);
1412
+ ((unsigned char *)dst)[1] = (val >> 16);
1413
+ ((unsigned char *)dst)[2] = (val >> 8);
1414
+ ((unsigned char *)dst)[3] = val;
1415
+ #endif
1416
+ }
1417
+
1418
+ /**
1419
+ * Decode a 32-bit value from the provided buffer (big endian convention).
1420
+ *
1421
+ * @param src the source buffer
1422
+ * @return the decoded value
1423
+ */
1424
+ static SPH_INLINE sph_u32
1425
+ sph_dec32be(const void *src)
1426
+ {
1427
+ #if defined SPH_UPTR
1428
+ #if SPH_UNALIGNED
1429
+ #if SPH_LITTLE_ENDIAN
1430
+ return sph_bswap32(*(const sph_u32 *)src);
1431
+ #else
1432
+ return *(const sph_u32 *)src;
1433
+ #endif
1434
+ #else
1435
+ if (((SPH_UPTR)src & 3) == 0) {
1436
+ #if SPH_LITTLE_ENDIAN
1437
+ return sph_bswap32(*(const sph_u32 *)src);
1438
+ #else
1439
+ return *(const sph_u32 *)src;
1440
+ #endif
1441
+ } else {
1442
+ return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1443
+ | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1444
+ | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1445
+ | (sph_u32)(((const unsigned char *)src)[3]);
1446
+ }
1447
+ #endif
1448
+ #else
1449
+ return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1450
+ | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1451
+ | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1452
+ | (sph_u32)(((const unsigned char *)src)[3]);
1453
+ #endif
1454
+ }
1455
+
1456
+ /**
1457
+ * Decode a 32-bit value from the provided buffer (big endian convention).
1458
+ * The source buffer must be properly aligned.
1459
+ *
1460
+ * @param src the source buffer (32-bit aligned)
1461
+ * @return the decoded value
1462
+ */
1463
+ static SPH_INLINE sph_u32
1464
+ sph_dec32be_aligned(const void *src)
1465
+ {
1466
+ #if SPH_LITTLE_ENDIAN
1467
+ return sph_bswap32(*(const sph_u32 *)src);
1468
+ #elif SPH_BIG_ENDIAN
1469
+ return *(const sph_u32 *)src;
1470
+ #else
1471
+ return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1472
+ | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1473
+ | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1474
+ | (sph_u32)(((const unsigned char *)src)[3]);
1475
+ #endif
1476
+ }
1477
+
1478
+ /**
1479
+ * Encode a 32-bit value into the provided buffer (little endian convention).
1480
+ *
1481
+ * @param dst the destination buffer
1482
+ * @param val the 32-bit value to encode
1483
+ */
1484
+ static SPH_INLINE void
1485
+ sph_enc32le(void *dst, sph_u32 val)
1486
+ {
1487
+ #if defined SPH_UPTR
1488
+ #if SPH_UNALIGNED
1489
+ #if SPH_BIG_ENDIAN
1490
+ val = sph_bswap32(val);
1491
+ #endif
1492
+ *(sph_u32 *)dst = val;
1493
+ #else
1494
+ if (((SPH_UPTR)dst & 3) == 0) {
1495
+ #if SPH_BIG_ENDIAN
1496
+ val = sph_bswap32(val);
1497
+ #endif
1498
+ *(sph_u32 *)dst = val;
1499
+ } else {
1500
+ ((unsigned char *)dst)[0] = val;
1501
+ ((unsigned char *)dst)[1] = (val >> 8);
1502
+ ((unsigned char *)dst)[2] = (val >> 16);
1503
+ ((unsigned char *)dst)[3] = (val >> 24);
1504
+ }
1505
+ #endif
1506
+ #else
1507
+ ((unsigned char *)dst)[0] = val;
1508
+ ((unsigned char *)dst)[1] = (val >> 8);
1509
+ ((unsigned char *)dst)[2] = (val >> 16);
1510
+ ((unsigned char *)dst)[3] = (val >> 24);
1511
+ #endif
1512
+ }
1513
+
1514
+ /**
1515
+ * Encode a 32-bit value into the provided buffer (little endian convention).
1516
+ * The destination buffer must be properly aligned.
1517
+ *
1518
+ * @param dst the destination buffer (32-bit aligned)
1519
+ * @param val the value to encode
1520
+ */
1521
+ static SPH_INLINE void
1522
+ sph_enc32le_aligned(void *dst, sph_u32 val)
1523
+ {
1524
+ #if SPH_LITTLE_ENDIAN
1525
+ *(sph_u32 *)dst = val;
1526
+ #elif SPH_BIG_ENDIAN
1527
+ *(sph_u32 *)dst = sph_bswap32(val);
1528
+ #else
1529
+ ((unsigned char *)dst)[0] = val;
1530
+ ((unsigned char *)dst)[1] = (val >> 8);
1531
+ ((unsigned char *)dst)[2] = (val >> 16);
1532
+ ((unsigned char *)dst)[3] = (val >> 24);
1533
+ #endif
1534
+ }
1535
+
1536
+ /**
1537
+ * Decode a 32-bit value from the provided buffer (little endian convention).
1538
+ *
1539
+ * @param src the source buffer
1540
+ * @return the decoded value
1541
+ */
1542
+ static SPH_INLINE sph_u32
1543
+ sph_dec32le(const void *src)
1544
+ {
1545
+ #if defined SPH_UPTR
1546
+ #if SPH_UNALIGNED
1547
+ #if SPH_BIG_ENDIAN
1548
+ return sph_bswap32(*(const sph_u32 *)src);
1549
+ #else
1550
+ return *(const sph_u32 *)src;
1551
+ #endif
1552
+ #else
1553
+ if (((SPH_UPTR)src & 3) == 0) {
1554
+ #if SPH_BIG_ENDIAN
1555
+ #if SPH_SPARCV9_GCC && !SPH_NO_ASM
1556
+ sph_u32 tmp;
1557
+
1558
+ /*
1559
+ * "__volatile__" is needed here because without it,
1560
+ * gcc-3.4.3 miscompiles the code and performs the
1561
+ * access before the test on the address, thus triggering
1562
+ * a bus error...
1563
+ */
1564
+ __asm__ __volatile__ (
1565
+ "lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1566
+ return tmp;
1567
+ /*
1568
+ * On PowerPC, this turns out not to be worth the effort: the inline
1569
+ * assembly makes GCC optimizer uncomfortable, which tends to nullify
1570
+ * the decoding gains.
1571
+ *
1572
+ * For most hash functions, using this inline assembly trick changes
1573
+ * hashing speed by less than 5% and often _reduces_ it. The biggest
1574
+ * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is
1575
+ * less then 10%. The speed gain on CubeHash is probably due to the
1576
+ * chronic shortage of registers that CubeHash endures; for the other
1577
+ * functions, the generic code appears to be efficient enough already.
1578
+ *
1579
+ #elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
1580
+ sph_u32 tmp;
1581
+
1582
+ __asm__ __volatile__ (
1583
+ "lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1584
+ return tmp;
1585
+ */
1586
+ #else
1587
+ return sph_bswap32(*(const sph_u32 *)src);
1588
+ #endif
1589
+ #else
1590
+ return *(const sph_u32 *)src;
1591
+ #endif
1592
+ } else {
1593
+ return (sph_u32)(((const unsigned char *)src)[0])
1594
+ | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1595
+ | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1596
+ | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1597
+ }
1598
+ #endif
1599
+ #else
1600
+ return (sph_u32)(((const unsigned char *)src)[0])
1601
+ | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1602
+ | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1603
+ | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1604
+ #endif
1605
+ }
1606
+
1607
+ /**
1608
+ * Decode a 32-bit value from the provided buffer (little endian convention).
1609
+ * The source buffer must be properly aligned.
1610
+ *
1611
+ * @param src the source buffer (32-bit aligned)
1612
+ * @return the decoded value
1613
+ */
1614
+ static SPH_INLINE sph_u32
1615
+ sph_dec32le_aligned(const void *src)
1616
+ {
1617
+ #if SPH_LITTLE_ENDIAN
1618
+ return *(const sph_u32 *)src;
1619
+ #elif SPH_BIG_ENDIAN
1620
+ #if SPH_SPARCV9_GCC && !SPH_NO_ASM
1621
+ sph_u32 tmp;
1622
+
1623
+ __asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1624
+ return tmp;
1625
+ /*
1626
+ * Not worth it generally.
1627
+ *
1628
+ #elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
1629
+ sph_u32 tmp;
1630
+
1631
+ __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1632
+ return tmp;
1633
+ */
1634
+ #else
1635
+ return sph_bswap32(*(const sph_u32 *)src);
1636
+ #endif
1637
+ #else
1638
+ return (sph_u32)(((const unsigned char *)src)[0])
1639
+ | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1640
+ | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1641
+ | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1642
+ #endif
1643
+ }
1644
+
1645
+ #if SPH_64
1646
+
1647
+ /**
1648
+ * Encode a 64-bit value into the provided buffer (big endian convention).
1649
+ *
1650
+ * @param dst the destination buffer
1651
+ * @param val the 64-bit value to encode
1652
+ */
1653
+ static SPH_INLINE void
1654
+ sph_enc64be(void *dst, sph_u64 val)
1655
+ {
1656
+ #if defined SPH_UPTR
1657
+ #if SPH_UNALIGNED
1658
+ #if SPH_LITTLE_ENDIAN
1659
+ val = sph_bswap64(val);
1660
+ #endif
1661
+ *(sph_u64 *)dst = val;
1662
+ #else
1663
+ if (((SPH_UPTR)dst & 7) == 0) {
1664
+ #if SPH_LITTLE_ENDIAN
1665
+ val = sph_bswap64(val);
1666
+ #endif
1667
+ *(sph_u64 *)dst = val;
1668
+ } else {
1669
+ ((unsigned char *)dst)[0] = (val >> 56);
1670
+ ((unsigned char *)dst)[1] = (val >> 48);
1671
+ ((unsigned char *)dst)[2] = (val >> 40);
1672
+ ((unsigned char *)dst)[3] = (val >> 32);
1673
+ ((unsigned char *)dst)[4] = (val >> 24);
1674
+ ((unsigned char *)dst)[5] = (val >> 16);
1675
+ ((unsigned char *)dst)[6] = (val >> 8);
1676
+ ((unsigned char *)dst)[7] = val;
1677
+ }
1678
+ #endif
1679
+ #else
1680
+ ((unsigned char *)dst)[0] = (val >> 56);
1681
+ ((unsigned char *)dst)[1] = (val >> 48);
1682
+ ((unsigned char *)dst)[2] = (val >> 40);
1683
+ ((unsigned char *)dst)[3] = (val >> 32);
1684
+ ((unsigned char *)dst)[4] = (val >> 24);
1685
+ ((unsigned char *)dst)[5] = (val >> 16);
1686
+ ((unsigned char *)dst)[6] = (val >> 8);
1687
+ ((unsigned char *)dst)[7] = val;
1688
+ #endif
1689
+ }
1690
+
1691
+ /**
1692
+ * Encode a 64-bit value into the provided buffer (big endian convention).
1693
+ * The destination buffer must be properly aligned.
1694
+ *
1695
+ * @param dst the destination buffer (64-bit aligned)
1696
+ * @param val the value to encode
1697
+ */
1698
+ static SPH_INLINE void
1699
+ sph_enc64be_aligned(void *dst, sph_u64 val)
1700
+ {
1701
+ #if SPH_LITTLE_ENDIAN
1702
+ *(sph_u64 *)dst = sph_bswap64(val);
1703
+ #elif SPH_BIG_ENDIAN
1704
+ *(sph_u64 *)dst = val;
1705
+ #else
1706
+ ((unsigned char *)dst)[0] = (val >> 56);
1707
+ ((unsigned char *)dst)[1] = (val >> 48);
1708
+ ((unsigned char *)dst)[2] = (val >> 40);
1709
+ ((unsigned char *)dst)[3] = (val >> 32);
1710
+ ((unsigned char *)dst)[4] = (val >> 24);
1711
+ ((unsigned char *)dst)[5] = (val >> 16);
1712
+ ((unsigned char *)dst)[6] = (val >> 8);
1713
+ ((unsigned char *)dst)[7] = val;
1714
+ #endif
1715
+ }
1716
+
1717
+ /**
1718
+ * Decode a 64-bit value from the provided buffer (big endian convention).
1719
+ *
1720
+ * @param src the source buffer
1721
+ * @return the decoded value
1722
+ */
1723
+ static SPH_INLINE sph_u64
1724
+ sph_dec64be(const void *src)
1725
+ {
1726
+ #if defined SPH_UPTR
1727
+ #if SPH_UNALIGNED
1728
+ #if SPH_LITTLE_ENDIAN
1729
+ return sph_bswap64(*(const sph_u64 *)src);
1730
+ #else
1731
+ return *(const sph_u64 *)src;
1732
+ #endif
1733
+ #else
1734
+ if (((SPH_UPTR)src & 7) == 0) {
1735
+ #if SPH_LITTLE_ENDIAN
1736
+ return sph_bswap64(*(const sph_u64 *)src);
1737
+ #else
1738
+ return *(const sph_u64 *)src;
1739
+ #endif
1740
+ } else {
1741
+ return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1742
+ | ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1743
+ | ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1744
+ | ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1745
+ | ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1746
+ | ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1747
+ | ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1748
+ | (sph_u64)(((const unsigned char *)src)[7]);
1749
+ }
1750
+ #endif
1751
+ #else
1752
+ return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1753
+ | ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1754
+ | ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1755
+ | ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1756
+ | ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1757
+ | ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1758
+ | ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1759
+ | (sph_u64)(((const unsigned char *)src)[7]);
1760
+ #endif
1761
+ }
1762
+
1763
+ /**
1764
+ * Decode a 64-bit value from the provided buffer (big endian convention).
1765
+ * The source buffer must be properly aligned.
1766
+ *
1767
+ * @param src the source buffer (64-bit aligned)
1768
+ * @return the decoded value
1769
+ */
1770
+ static SPH_INLINE sph_u64
1771
+ sph_dec64be_aligned(const void *src)
1772
+ {
1773
+ #if SPH_LITTLE_ENDIAN
1774
+ return sph_bswap64(*(const sph_u64 *)src);
1775
+ #elif SPH_BIG_ENDIAN
1776
+ return *(const sph_u64 *)src;
1777
+ #else
1778
+ return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1779
+ | ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1780
+ | ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1781
+ | ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1782
+ | ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1783
+ | ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1784
+ | ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1785
+ | (sph_u64)(((const unsigned char *)src)[7]);
1786
+ #endif
1787
+ }
1788
+
1789
+ /**
1790
+ * Encode a 64-bit value into the provided buffer (little endian convention).
1791
+ *
1792
+ * @param dst the destination buffer
1793
+ * @param val the 64-bit value to encode
1794
+ */
1795
+ static SPH_INLINE void
1796
+ sph_enc64le(void *dst, sph_u64 val)
1797
+ {
1798
+ #if defined SPH_UPTR
1799
+ #if SPH_UNALIGNED
1800
+ #if SPH_BIG_ENDIAN
1801
+ val = sph_bswap64(val);
1802
+ #endif
1803
+ *(sph_u64 *)dst = val;
1804
+ #else
1805
+ if (((SPH_UPTR)dst & 7) == 0) {
1806
+ #if SPH_BIG_ENDIAN
1807
+ val = sph_bswap64(val);
1808
+ #endif
1809
+ *(sph_u64 *)dst = val;
1810
+ } else {
1811
+ ((unsigned char *)dst)[0] = val;
1812
+ ((unsigned char *)dst)[1] = (val >> 8);
1813
+ ((unsigned char *)dst)[2] = (val >> 16);
1814
+ ((unsigned char *)dst)[3] = (val >> 24);
1815
+ ((unsigned char *)dst)[4] = (val >> 32);
1816
+ ((unsigned char *)dst)[5] = (val >> 40);
1817
+ ((unsigned char *)dst)[6] = (val >> 48);
1818
+ ((unsigned char *)dst)[7] = (val >> 56);
1819
+ }
1820
+ #endif
1821
+ #else
1822
+ ((unsigned char *)dst)[0] = val;
1823
+ ((unsigned char *)dst)[1] = (val >> 8);
1824
+ ((unsigned char *)dst)[2] = (val >> 16);
1825
+ ((unsigned char *)dst)[3] = (val >> 24);
1826
+ ((unsigned char *)dst)[4] = (val >> 32);
1827
+ ((unsigned char *)dst)[5] = (val >> 40);
1828
+ ((unsigned char *)dst)[6] = (val >> 48);
1829
+ ((unsigned char *)dst)[7] = (val >> 56);
1830
+ #endif
1831
+ }
1832
+
1833
+ /**
1834
+ * Encode a 64-bit value into the provided buffer (little endian convention).
1835
+ * The destination buffer must be properly aligned.
1836
+ *
1837
+ * @param dst the destination buffer (64-bit aligned)
1838
+ * @param val the value to encode
1839
+ */
1840
+ static SPH_INLINE void
1841
+ sph_enc64le_aligned(void *dst, sph_u64 val)
1842
+ {
1843
+ #if SPH_LITTLE_ENDIAN
1844
+ *(sph_u64 *)dst = val;
1845
+ #elif SPH_BIG_ENDIAN
1846
+ *(sph_u64 *)dst = sph_bswap64(val);
1847
+ #else
1848
+ ((unsigned char *)dst)[0] = val;
1849
+ ((unsigned char *)dst)[1] = (val >> 8);
1850
+ ((unsigned char *)dst)[2] = (val >> 16);
1851
+ ((unsigned char *)dst)[3] = (val >> 24);
1852
+ ((unsigned char *)dst)[4] = (val >> 32);
1853
+ ((unsigned char *)dst)[5] = (val >> 40);
1854
+ ((unsigned char *)dst)[6] = (val >> 48);
1855
+ ((unsigned char *)dst)[7] = (val >> 56);
1856
+ #endif
1857
+ }
1858
+
1859
+ /**
1860
+ * Decode a 64-bit value from the provided buffer (little endian convention).
1861
+ *
1862
+ * @param src the source buffer
1863
+ * @return the decoded value
1864
+ */
1865
+ static SPH_INLINE sph_u64
1866
+ sph_dec64le(const void *src)
1867
+ {
1868
+ #if defined SPH_UPTR
1869
+ #if SPH_UNALIGNED
1870
+ #if SPH_BIG_ENDIAN
1871
+ return sph_bswap64(*(const sph_u64 *)src);
1872
+ #else
1873
+ return *(const sph_u64 *)src;
1874
+ #endif
1875
+ #else
1876
+ if (((SPH_UPTR)src & 7) == 0) {
1877
+ #if SPH_BIG_ENDIAN
1878
+ #if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
1879
+ sph_u64 tmp;
1880
+
1881
+ __asm__ __volatile__ (
1882
+ "ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1883
+ return tmp;
1884
+ /*
1885
+ * Not worth it generally.
1886
+ *
1887
+ #elif SPH_PPC32_GCC && !SPH_NO_ASM
1888
+ return (sph_u64)sph_dec32le_aligned(src)
1889
+ | ((sph_u64)sph_dec32le_aligned(
1890
+ (const char *)src + 4) << 32);
1891
+ #elif SPH_PPC64_GCC && !SPH_NO_ASM
1892
+ sph_u64 tmp;
1893
+
1894
+ __asm__ __volatile__ (
1895
+ "ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1896
+ return tmp;
1897
+ */
1898
+ #else
1899
+ return sph_bswap64(*(const sph_u64 *)src);
1900
+ #endif
1901
+ #else
1902
+ return *(const sph_u64 *)src;
1903
+ #endif
1904
+ } else {
1905
+ return (sph_u64)(((const unsigned char *)src)[0])
1906
+ | ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1907
+ | ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1908
+ | ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1909
+ | ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1910
+ | ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1911
+ | ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1912
+ | ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1913
+ }
1914
+ #endif
1915
+ #else
1916
+ return (sph_u64)(((const unsigned char *)src)[0])
1917
+ | ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1918
+ | ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1919
+ | ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1920
+ | ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1921
+ | ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1922
+ | ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1923
+ | ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1924
+ #endif
1925
+ }
1926
+
1927
+ /**
1928
+ * Decode a 64-bit value from the provided buffer (little endian convention).
1929
+ * The source buffer must be properly aligned.
1930
+ *
1931
+ * @param src the source buffer (64-bit aligned)
1932
+ * @return the decoded value
1933
+ */
1934
+ static SPH_INLINE sph_u64
1935
+ sph_dec64le_aligned(const void *src)
1936
+ {
1937
+ #if SPH_LITTLE_ENDIAN
1938
+ return *(const sph_u64 *)src;
1939
+ #elif SPH_BIG_ENDIAN
1940
+ #if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
1941
+ sph_u64 tmp;
1942
+
1943
+ __asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1944
+ return tmp;
1945
+ /*
1946
+ * Not worth it generally.
1947
+ *
1948
+ #elif SPH_PPC32_GCC && !SPH_NO_ASM
1949
+ return (sph_u64)sph_dec32le_aligned(src)
1950
+ | ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32);
1951
+ #elif SPH_PPC64_GCC && !SPH_NO_ASM
1952
+ sph_u64 tmp;
1953
+
1954
+ __asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1955
+ return tmp;
1956
+ */
1957
+ #else
1958
+ return sph_bswap64(*(const sph_u64 *)src);
1959
+ #endif
1960
+ #else
1961
+ return (sph_u64)(((const unsigned char *)src)[0])
1962
+ | ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1963
+ | ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1964
+ | ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1965
+ | ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1966
+ | ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1967
+ | ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1968
+ | ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1969
+ #endif
1970
+ }
1971
+
1972
+ #endif
1973
+
1974
+ #endif /* Doxygen excluded block */
1975
+
1976
+ #endif