ttcrypt 0.0.7 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1976 @@
1
+ /* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */
2
+ /**
3
+ * Basic type definitions.
4
+ *
5
+ * This header file defines the generic integer types that will be used
6
+ * for the implementation of hash functions; it also contains helper
7
+ * functions which encode and decode multi-byte integer values, using
8
+ * either little-endian or big-endian conventions.
9
+ *
10
+ * This file contains a compile-time test on the size of a byte
11
+ * (the <code>unsigned char</code> C type). If bytes are not octets,
12
+ * i.e. if they do not have a size of exactly 8 bits, then compilation
13
+ * is aborted. Architectures where bytes are not octets are relatively
14
+ * rare, even in the embedded devices market. We forbid non-octet bytes
15
+ * because there is no clear convention on how octet streams are encoded
16
+ * on such systems.
17
+ *
18
+ * ==========================(LICENSE BEGIN)============================
19
+ *
20
+ * Copyright (c) 2007-2010 Projet RNRT SAPHIR
21
+ *
22
+ * Permission is hereby granted, free of charge, to any person obtaining
23
+ * a copy of this software and associated documentation files (the
24
+ * "Software"), to deal in the Software without restriction, including
25
+ * without limitation the rights to use, copy, modify, merge, publish,
26
+ * distribute, sublicense, and/or sell copies of the Software, and to
27
+ * permit persons to whom the Software is furnished to do so, subject to
28
+ * the following conditions:
29
+ *
30
+ * The above copyright notice and this permission notice shall be
31
+ * included in all copies or substantial portions of the Software.
32
+ *
33
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
34
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
35
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
36
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
37
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
38
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
39
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40
+ *
41
+ * ===========================(LICENSE END)=============================
42
+ *
43
+ * @file sph_types.h
44
+ * @author Thomas Pornin <thomas.pornin@cryptolog.com>
45
+ */
46
+
47
+ #ifndef SPH_TYPES_H__
48
+ #define SPH_TYPES_H__
49
+
50
+ #include <limits.h>
51
+
52
+ /*
53
+ * All our I/O functions are defined over octet streams. We do not know
54
+ * how to handle input data if bytes are not octets.
55
+ */
56
+ #if CHAR_BIT != 8
57
+ #error This code requires 8-bit bytes
58
+ #endif
59
+
60
+ /* ============= BEGIN documentation block for Doxygen ============ */
61
+
62
+ #ifdef DOXYGEN_IGNORE
63
+
64
+ /** @mainpage sphlib C code documentation
65
+ *
66
+ * @section overview Overview
67
+ *
68
+ * <code>sphlib</code> is a library which contains implementations of
69
+ * various cryptographic hash functions. These pages have been generated
70
+ * with <a href="http://www.doxygen.org/index.html">doxygen</a> and
71
+ * document the API for the C implementations.
72
+ *
73
+ * The API is described in appropriate header files, which are available
74
+ * in the "Files" section. Each hash function family has its own header,
75
+ * whose name begins with <code>"sph_"</code> and contains the family
76
+ * name. For instance, the API for the RIPEMD hash functions is available
77
+ * in the header file <code>sph_ripemd.h</code>.
78
+ *
79
+ * @section principles API structure and conventions
80
+ *
81
+ * @subsection io Input/output conventions
82
+ *
83
+ * In all generality, hash functions operate over strings of bits.
84
+ * Individual bits are rarely encountered in C programming or actual
85
+ * communication protocols; most protocols converge on the ubiquitous
86
+ * "octet" which is a group of eight bits. Data is thus expressed as a
87
+ * stream of octets. The C programming language contains the notion of a
88
+ * "byte", which is a data unit managed under the type <code>"unsigned
89
+ * char"</code>. The C standard prescribes that a byte should hold at
90
+ * least eight bits, but possibly more. Most modern architectures, even
91
+ * in the embedded world, feature eight-bit bytes, i.e. map bytes to
92
+ * octets.
93
+ *
94
+ * Nevertheless, for some of the implemented hash functions, an extra
95
+ * API has been added, which allows the input of arbitrary sequences of
96
+ * bits: when the computation is about to be closed, 1 to 7 extra bits
97
+ * can be added. The functions for which this API is implemented include
98
+ * the SHA-2 functions and all SHA-3 candidates.
99
+ *
100
+ * <code>sphlib</code> defines hash function which may hash octet streams,
101
+ * i.e. streams of bits where the number of bits is a multiple of eight.
102
+ * The data input functions in the <code>sphlib</code> API expect data
103
+ * as anonymous pointers (<code>"const void *"</code>) with a length
104
+ * (of type <code>"size_t"</code>) which gives the input data chunk length
105
+ * in bytes. A byte is assumed to be an octet; the <code>sph_types.h</code>
106
+ * header contains a compile-time test which prevents compilation on
107
+ * architectures where this property is not met.
108
+ *
109
+ * The hash function output is also converted into bytes. All currently
110
+ * implemented hash functions have an output width which is a multiple of
111
+ * eight, and this is likely to remain true for new designs.
112
+ *
113
+ * Most hash functions internally convert input data into 32-bit of 64-bit
114
+ * words, using either little-endian or big-endian conversion. The hash
115
+ * output also often consists of such words, which are encoded into output
116
+ * bytes with a similar endianness convention. Some hash functions have
117
+ * been only loosely specified on that subject; when necessary,
118
+ * <code>sphlib</code> has been tested against published "reference"
119
+ * implementations in order to use the same conventions.
120
+ *
121
+ * @subsection shortname Function short name
122
+ *
123
+ * Each implemented hash function has a "short name" which is used
124
+ * internally to derive the identifiers for the functions and context
125
+ * structures which the function uses. For instance, MD5 has the short
126
+ * name <code>"md5"</code>. Short names are listed in the next section,
127
+ * for the implemented hash functions. In subsequent sections, the
128
+ * short name will be assumed to be <code>"XXX"</code>: replace with the
129
+ * actual hash function name to get the C identifier.
130
+ *
131
+ * Note: some functions within the same family share the same core
132
+ * elements, such as update function or context structure. Correspondingly,
133
+ * some of the defined types or functions may actually be macros which
134
+ * transparently evaluate to another type or function name.
135
+ *
136
+ * @subsection context Context structure
137
+ *
138
+ * Each implemented hash fonction has its own context structure, available
139
+ * under the type name <code>"sph_XXX_context"</code> for the hash function
140
+ * with short name <code>"XXX"</code>. This structure holds all needed
141
+ * state for a running hash computation.
142
+ *
143
+ * The contents of these structures are meant to be opaque, and private
144
+ * to the implementation. However, these contents are specified in the
145
+ * header files so that application code which uses <code>sphlib</code>
146
+ * may access the size of those structures.
147
+ *
148
+ * The caller is responsible for allocating the context structure,
149
+ * whether by dynamic allocation (<code>malloc()</code> or equivalent),
150
+ * static allocation (a global permanent variable), as an automatic
151
+ * variable ("on the stack"), or by any other mean which ensures proper
152
+ * structure alignment. <code>sphlib</code> code performs no dynamic
153
+ * allocation by itself.
154
+ *
155
+ * The context must be initialized before use, using the
156
+ * <code>sph_XXX_init()</code> function. This function sets the context
157
+ * state to proper initial values for hashing.
158
+ *
159
+ * Since all state data is contained within the context structure,
160
+ * <code>sphlib</code> is thread-safe and reentrant: several hash
161
+ * computations may be performed in parallel, provided that they do not
162
+ * operate on the same context. Moreover, a running computation can be
163
+ * cloned by copying the context (with a simple <code>memcpy()</code>):
164
+ * the context and its clone are then independant and may be updated
165
+ * with new data and/or closed without interfering with each other.
166
+ * Similarly, a context structure can be moved in memory at will:
167
+ * context structures contain no pointer, in particular no pointer to
168
+ * themselves.
169
+ *
170
+ * @subsection dataio Data input
171
+ *
172
+ * Hashed data is input with the <code>sph_XXX()</code> fonction, which
173
+ * takes as parameters a pointer to the context, a pointer to the data
174
+ * to hash, and the number of data bytes to hash. The context is updated
175
+ * with the new data.
176
+ *
177
+ * Data can be input in one or several calls, with arbitrary input lengths.
178
+ * However, it is best, performance wise, to input data by relatively big
179
+ * chunks (say a few kilobytes), because this allows <code>sphlib</code> to
180
+ * optimize things and avoid internal copying.
181
+ *
182
+ * When all data has been input, the context can be closed with
183
+ * <code>sph_XXX_close()</code>. The hash output is computed and written
184
+ * into the provided buffer. The caller must take care to provide a
185
+ * buffer of appropriate length; e.g., when using SHA-1, the output is
186
+ * a 20-byte word, therefore the output buffer must be at least 20-byte
187
+ * long.
188
+ *
189
+ * For some hash functions, the <code>sph_XXX_addbits_and_close()</code>
190
+ * function can be used instead of <code>sph_XXX_close()</code>. This
191
+ * function can take a few extra <strong>bits</strong> to be added at
192
+ * the end of the input message. This allows hashing messages with a
193
+ * bit length which is not a multiple of 8. The extra bits are provided
194
+ * as an unsigned integer value, and a bit count. The bit count must be
195
+ * between 0 and 7, inclusive. The extra bits are provided as bits 7 to
196
+ * 0 (bits of numerical value 128, 64, 32... downto 0), in that order.
197
+ * For instance, to add three bits of value 1, 1 and 0, the unsigned
198
+ * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count
199
+ * will be 3.
200
+ *
201
+ * The <code>SPH_SIZE_XXX</code> macro is defined for each hash function;
202
+ * it evaluates to the function output size, expressed in bits. For instance,
203
+ * <code>SPH_SIZE_sha1</code> evaluates to <code>160</code>.
204
+ *
205
+ * When closed, the context is automatically reinitialized and can be
206
+ * immediately used for another computation. It is not necessary to call
207
+ * <code>sph_XXX_init()</code> after a close. Note that
208
+ * <code>sph_XXX_init()</code> can still be called to "reset" a context,
209
+ * i.e. forget previously input data, and get back to the initial state.
210
+ *
211
+ * @subsection alignment Data alignment
212
+ *
213
+ * "Alignment" is a property of data, which is said to be "properly
214
+ * aligned" when its emplacement in memory is such that the data can
215
+ * be optimally read by full words. This depends on the type of access;
216
+ * basically, some hash functions will read data by 32-bit or 64-bit
217
+ * words. <code>sphlib</code> does not mandate such alignment for input
218
+ * data, but using aligned data can substantially improve performance.
219
+ *
220
+ * As a rule, it is best to input data by chunks whose length (in bytes)
221
+ * is a multiple of eight, and which begins at "generally aligned"
222
+ * addresses, such as the base address returned by a call to
223
+ * <code>malloc()</code>.
224
+ *
225
+ * @section functions Implemented functions
226
+ *
227
+ * We give here the list of implemented functions. They are grouped by
228
+ * family; to each family corresponds a specific header file. Each
229
+ * individual function has its associated "short name". Please refer to
230
+ * the documentation for that header file to get details on the hash
231
+ * function denomination and provenance.
232
+ *
233
+ * Note: the functions marked with a '(64)' in the list below are
234
+ * available only if the C compiler provides an integer type of length
235
+ * 64 bits or more. Such a type is mandatory in the latest C standard
236
+ * (ISO 9899:1999, aka "C99") and is present in several older compilers
237
+ * as well, so chances are that such a type is available.
238
+ *
239
+ * - HAVAL family: file <code>sph_haval.h</code>
240
+ * - HAVAL-128/3 (128-bit, 3 passes): short name: <code>haval128_3</code>
241
+ * - HAVAL-128/4 (128-bit, 4 passes): short name: <code>haval128_4</code>
242
+ * - HAVAL-128/5 (128-bit, 5 passes): short name: <code>haval128_5</code>
243
+ * - HAVAL-160/3 (160-bit, 3 passes): short name: <code>haval160_3</code>
244
+ * - HAVAL-160/4 (160-bit, 4 passes): short name: <code>haval160_4</code>
245
+ * - HAVAL-160/5 (160-bit, 5 passes): short name: <code>haval160_5</code>
246
+ * - HAVAL-192/3 (192-bit, 3 passes): short name: <code>haval192_3</code>
247
+ * - HAVAL-192/4 (192-bit, 4 passes): short name: <code>haval192_4</code>
248
+ * - HAVAL-192/5 (192-bit, 5 passes): short name: <code>haval192_5</code>
249
+ * - HAVAL-224/3 (224-bit, 3 passes): short name: <code>haval224_3</code>
250
+ * - HAVAL-224/4 (224-bit, 4 passes): short name: <code>haval224_4</code>
251
+ * - HAVAL-224/5 (224-bit, 5 passes): short name: <code>haval224_5</code>
252
+ * - HAVAL-256/3 (256-bit, 3 passes): short name: <code>haval256_3</code>
253
+ * - HAVAL-256/4 (256-bit, 4 passes): short name: <code>haval256_4</code>
254
+ * - HAVAL-256/5 (256-bit, 5 passes): short name: <code>haval256_5</code>
255
+ * - MD2: file <code>sph_md2.h</code>, short name: <code>md2</code>
256
+ * - MD4: file <code>sph_md4.h</code>, short name: <code>md4</code>
257
+ * - MD5: file <code>sph_md5.h</code>, short name: <code>md5</code>
258
+ * - PANAMA: file <code>sph_panama.h</code>, short name: <code>panama</code>
259
+ * - RadioGatun family: file <code>sph_radiogatun.h</code>
260
+ * - RadioGatun[32]: short name: <code>radiogatun32</code>
261
+ * - RadioGatun[64]: short name: <code>radiogatun64</code> (64)
262
+ * - RIPEMD family: file <code>sph_ripemd.h</code>
263
+ * - RIPEMD: short name: <code>ripemd</code>
264
+ * - RIPEMD-128: short name: <code>ripemd128</code>
265
+ * - RIPEMD-160: short name: <code>ripemd160</code>
266
+ * - SHA-0: file <code>sph_sha0.h</code>, short name: <code>sha0</code>
267
+ * - SHA-1: file <code>sph_sha1.h</code>, short name: <code>sha1</code>
268
+ * - SHA-2 family, 32-bit hashes: file <code>sph_sha2.h</code>
269
+ * - SHA-224: short name: <code>sha224</code>
270
+ * - SHA-256: short name: <code>sha256</code>
271
+ * - SHA-384: short name: <code>sha384</code> (64)
272
+ * - SHA-512: short name: <code>sha512</code> (64)
273
+ * - Tiger family: file <code>sph_tiger.h</code>
274
+ * - Tiger: short name: <code>tiger</code> (64)
275
+ * - Tiger2: short name: <code>tiger2</code> (64)
276
+ * - WHIRLPOOL family: file <code>sph_whirlpool.h</code>
277
+ * - WHIRLPOOL-0: short name: <code>whirlpool0</code> (64)
278
+ * - WHIRLPOOL-1: short name: <code>whirlpool1</code> (64)
279
+ * - WHIRLPOOL: short name: <code>whirlpool</code> (64)
280
+ *
281
+ * The fourteen second-round SHA-3 candidates are also implemented;
282
+ * when applicable, the implementations follow the "final" specifications
283
+ * as published for the third round of the SHA-3 competition (BLAKE,
284
+ * Groestl, JH, Keccak and Skein have been tweaked for third round).
285
+ *
286
+ * - BLAKE family: file <code>sph_blake.h</code>
287
+ * - BLAKE-224: short name: <code>blake224</code>
288
+ * - BLAKE-256: short name: <code>blake256</code>
289
+ * - BLAKE-384: short name: <code>blake384</code>
290
+ * - BLAKE-512: short name: <code>blake512</code>
291
+ * - BMW (Blue Midnight Wish) family: file <code>sph_bmw.h</code>
292
+ * - BMW-224: short name: <code>bmw224</code>
293
+ * - BMW-256: short name: <code>bmw256</code>
294
+ * - BMW-384: short name: <code>bmw384</code> (64)
295
+ * - BMW-512: short name: <code>bmw512</code> (64)
296
+ * - CubeHash family: file <code>sph_cubehash.h</code> (specified as
297
+ * CubeHash16/32 in the CubeHash specification)
298
+ * - CubeHash-224: short name: <code>cubehash224</code>
299
+ * - CubeHash-256: short name: <code>cubehash256</code>
300
+ * - CubeHash-384: short name: <code>cubehash384</code>
301
+ * - CubeHash-512: short name: <code>cubehash512</code>
302
+ * - ECHO family: file <code>sph_echo.h</code>
303
+ * - ECHO-224: short name: <code>echo224</code>
304
+ * - ECHO-256: short name: <code>echo256</code>
305
+ * - ECHO-384: short name: <code>echo384</code>
306
+ * - ECHO-512: short name: <code>echo512</code>
307
+ * - Fugue family: file <code>sph_fugue.h</code>
308
+ * - Fugue-224: short name: <code>fugue224</code>
309
+ * - Fugue-256: short name: <code>fugue256</code>
310
+ * - Fugue-384: short name: <code>fugue384</code>
311
+ * - Fugue-512: short name: <code>fugue512</code>
312
+ * - Groestl family: file <code>sph_groestl.h</code>
313
+ * - Groestl-224: short name: <code>groestl224</code>
314
+ * - Groestl-256: short name: <code>groestl256</code>
315
+ * - Groestl-384: short name: <code>groestl384</code>
316
+ * - Groestl-512: short name: <code>groestl512</code>
317
+ * - Hamsi family: file <code>sph_hamsi.h</code>
318
+ * - Hamsi-224: short name: <code>hamsi224</code>
319
+ * - Hamsi-256: short name: <code>hamsi256</code>
320
+ * - Hamsi-384: short name: <code>hamsi384</code>
321
+ * - Hamsi-512: short name: <code>hamsi512</code>
322
+ * - JH family: file <code>sph_jh.h</code>
323
+ * - JH-224: short name: <code>jh224</code>
324
+ * - JH-256: short name: <code>jh256</code>
325
+ * - JH-384: short name: <code>jh384</code>
326
+ * - JH-512: short name: <code>jh512</code>
327
+ * - Keccak family: file <code>sph_keccak.h</code>
328
+ * - Keccak-224: short name: <code>keccak224</code>
329
+ * - Keccak-256: short name: <code>keccak256</code>
330
+ * - Keccak-384: short name: <code>keccak384</code>
331
+ * - Keccak-512: short name: <code>keccak512</code>
332
+ * - Luffa family: file <code>sph_luffa.h</code>
333
+ * - Luffa-224: short name: <code>luffa224</code>
334
+ * - Luffa-256: short name: <code>luffa256</code>
335
+ * - Luffa-384: short name: <code>luffa384</code>
336
+ * - Luffa-512: short name: <code>luffa512</code>
337
+ * - Shabal family: file <code>sph_shabal.h</code>
338
+ * - Shabal-192: short name: <code>shabal192</code>
339
+ * - Shabal-224: short name: <code>shabal224</code>
340
+ * - Shabal-256: short name: <code>shabal256</code>
341
+ * - Shabal-384: short name: <code>shabal384</code>
342
+ * - Shabal-512: short name: <code>shabal512</code>
343
+ * - SHAvite-3 family: file <code>sph_shavite.h</code>
344
+ * - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"):
345
+ * short name: <code>shabal224</code>
346
+ * - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"):
347
+ * short name: <code>shabal256</code>
348
+ * - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"):
349
+ * short name: <code>shabal384</code>
350
+ * - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"):
351
+ * short name: <code>shabal512</code>
352
+ * - SIMD family: file <code>sph_simd.h</code>
353
+ * - SIMD-224: short name: <code>simd224</code>
354
+ * - SIMD-256: short name: <code>simd256</code>
355
+ * - SIMD-384: short name: <code>simd384</code>
356
+ * - SIMD-512: short name: <code>simd512</code>
357
+ * - Skein family: file <code>sph_skein.h</code>
358
+ * - Skein-224 (nominally specified as Skein-512-224): short name:
359
+ * <code>skein224</code> (64)
360
+ * - Skein-256 (nominally specified as Skein-512-256): short name:
361
+ * <code>skein256</code> (64)
362
+ * - Skein-384 (nominally specified as Skein-512-384): short name:
363
+ * <code>skein384</code> (64)
364
+ * - Skein-512 (nominally specified as Skein-512-512): short name:
365
+ * <code>skein512</code> (64)
366
+ *
367
+ * For the second-round SHA-3 candidates, the functions are as specified
368
+ * for round 2, i.e. with the "tweaks" that some candidates added
369
+ * between round 1 and round 2. Also, some of the submitted packages for
370
+ * round 2 contained errors, in the specification, reference code, or
371
+ * both. <code>sphlib</code> implements the corrected versions.
372
+ */
373
+
374
+ /** @hideinitializer
375
+ * Unsigned integer type whose length is at least 32 bits; on most
376
+ * architectures, it will have a width of exactly 32 bits. Unsigned C
377
+ * types implement arithmetics modulo a power of 2; use the
378
+ * <code>SPH_T32()</code> macro to ensure that the value is truncated
379
+ * to exactly 32 bits. Unless otherwise specified, all macros and
380
+ * functions which accept <code>sph_u32</code> values assume that these
381
+ * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures
382
+ * where <code>sph_u32</code> is larger than that.
383
+ */
384
+ typedef __arch_dependant__ sph_u32;
385
+
386
+ /** @hideinitializer
387
+ * Signed integer type corresponding to <code>sph_u32</code>; it has
388
+ * width 32 bits or more.
389
+ */
390
+ typedef __arch_dependant__ sph_s32;
391
+
392
+ /** @hideinitializer
393
+ * Unsigned integer type whose length is at least 64 bits; on most
394
+ * architectures which feature such a type, it will have a width of
395
+ * exactly 64 bits. C99-compliant platform will have this type; it
396
+ * is also defined when the GNU compiler (gcc) is used, and on
397
+ * platforms where <code>unsigned long</code> is large enough. If this
398
+ * type is not available, then some hash functions which depends on
399
+ * a 64-bit type will not be available (most notably SHA-384, SHA-512,
400
+ * Tiger and WHIRLPOOL).
401
+ */
402
+ typedef __arch_dependant__ sph_u64;
403
+
404
+ /** @hideinitializer
405
+ * Signed integer type corresponding to <code>sph_u64</code>; it has
406
+ * width 64 bits or more.
407
+ */
408
+ typedef __arch_dependant__ sph_s64;
409
+
410
+ /**
411
+ * This macro expands the token <code>x</code> into a suitable
412
+ * constant expression of type <code>sph_u32</code>. Depending on
413
+ * how this type is defined, a suffix such as <code>UL</code> may
414
+ * be appended to the argument.
415
+ *
416
+ * @param x the token to expand into a suitable constant expression
417
+ */
418
+ #define SPH_C32(x)
419
+
420
+ /**
421
+ * Truncate a 32-bit value to exactly 32 bits. On most systems, this is
422
+ * a no-op, recognized as such by the compiler.
423
+ *
424
+ * @param x the value to truncate (of type <code>sph_u32</code>)
425
+ */
426
+ #define SPH_T32(x)
427
+
428
+ /**
429
+ * Rotate a 32-bit value by a number of bits to the left. The rotate
430
+ * count must reside between 1 and 31. This macro assumes that its
431
+ * first argument fits in 32 bits (no extra bit allowed on machines where
432
+ * <code>sph_u32</code> is wider); both arguments may be evaluated
433
+ * several times.
434
+ *
435
+ * @param x the value to rotate (of type <code>sph_u32</code>)
436
+ * @param n the rotation count (between 1 and 31, inclusive)
437
+ */
438
+ #define SPH_ROTL32(x, n)
439
+
440
+ /**
441
+ * Rotate a 32-bit value by a number of bits to the left. The rotate
442
+ * count must reside between 1 and 31. This macro assumes that its
443
+ * first argument fits in 32 bits (no extra bit allowed on machines where
444
+ * <code>sph_u32</code> is wider); both arguments may be evaluated
445
+ * several times.
446
+ *
447
+ * @param x the value to rotate (of type <code>sph_u32</code>)
448
+ * @param n the rotation count (between 1 and 31, inclusive)
449
+ */
450
+ #define SPH_ROTR32(x, n)
451
+
452
+ /**
453
+ * This macro is defined on systems for which a 64-bit type has been
454
+ * detected, and is used for <code>sph_u64</code>.
455
+ */
456
+ #define SPH_64
457
+
458
+ /**
459
+ * This macro is defined on systems for the "native" integer size is
460
+ * 64 bits (64-bit values fit in one register).
461
+ */
462
+ #define SPH_64_TRUE
463
+
464
+ /**
465
+ * This macro expands the token <code>x</code> into a suitable
466
+ * constant expression of type <code>sph_u64</code>. Depending on
467
+ * how this type is defined, a suffix such as <code>ULL</code> may
468
+ * be appended to the argument. This macro is defined only if a
469
+ * 64-bit type was detected and used for <code>sph_u64</code>.
470
+ *
471
+ * @param x the token to expand into a suitable constant expression
472
+ */
473
+ #define SPH_C64(x)
474
+
475
+ /**
476
+ * Truncate a 64-bit value to exactly 64 bits. On most systems, this is
477
+ * a no-op, recognized as such by the compiler. This macro is defined only
478
+ * if a 64-bit type was detected and used for <code>sph_u64</code>.
479
+ *
480
+ * @param x the value to truncate (of type <code>sph_u64</code>)
481
+ */
482
+ #define SPH_T64(x)
483
+
484
+ /**
485
+ * Rotate a 64-bit value by a number of bits to the left. The rotate
486
+ * count must reside between 1 and 63. This macro assumes that its
487
+ * first argument fits in 64 bits (no extra bit allowed on machines where
488
+ * <code>sph_u64</code> is wider); both arguments may be evaluated
489
+ * several times. This macro is defined only if a 64-bit type was detected
490
+ * and used for <code>sph_u64</code>.
491
+ *
492
+ * @param x the value to rotate (of type <code>sph_u64</code>)
493
+ * @param n the rotation count (between 1 and 63, inclusive)
494
+ */
495
+ #define SPH_ROTL64(x, n)
496
+
497
+ /**
498
+ * Rotate a 64-bit value by a number of bits to the left. The rotate
499
+ * count must reside between 1 and 63. This macro assumes that its
500
+ * first argument fits in 64 bits (no extra bit allowed on machines where
501
+ * <code>sph_u64</code> is wider); both arguments may be evaluated
502
+ * several times. This macro is defined only if a 64-bit type was detected
503
+ * and used for <code>sph_u64</code>.
504
+ *
505
+ * @param x the value to rotate (of type <code>sph_u64</code>)
506
+ * @param n the rotation count (between 1 and 63, inclusive)
507
+ */
508
+ #define SPH_ROTR64(x, n)
509
+
510
+ /**
511
+ * This macro evaluates to <code>inline</code> or an equivalent construction,
512
+ * if available on the compilation platform, or to nothing otherwise. This
513
+ * is used to declare inline functions, for which the compiler should
514
+ * endeavour to include the code directly in the caller. Inline functions
515
+ * are typically defined in header files as replacement for macros.
516
+ */
517
+ #define SPH_INLINE
518
+
519
+ /**
520
+ * This macro is defined if the platform has been detected as using
521
+ * little-endian convention. This implies that the <code>sph_u32</code>
522
+ * type (and the <code>sph_u64</code> type also, if it is defined) has
523
+ * an exact width (i.e. exactly 32-bit, respectively 64-bit).
524
+ */
525
+ #define SPH_LITTLE_ENDIAN
526
+
527
+ /**
528
+ * This macro is defined if the platform has been detected as using
529
+ * big-endian convention. This implies that the <code>sph_u32</code>
530
+ * type (and the <code>sph_u64</code> type also, if it is defined) has
531
+ * an exact width (i.e. exactly 32-bit, respectively 64-bit).
532
+ */
533
+ #define SPH_BIG_ENDIAN
534
+
535
+ /**
536
+ * This macro is defined if 32-bit words (and 64-bit words, if defined)
537
+ * can be read from and written to memory efficiently in little-endian
538
+ * convention. This is the case for little-endian platforms, and also
539
+ * for the big-endian platforms which have special little-endian access
540
+ * opcodes (e.g. Ultrasparc).
541
+ */
542
+ #define SPH_LITTLE_FAST
543
+
544
+ /**
545
+ * This macro is defined if 32-bit words (and 64-bit words, if defined)
546
+ * can be read from and written to memory efficiently in big-endian
547
+ * convention. This is the case for little-endian platforms, and also
548
+ * for the little-endian platforms which have special big-endian access
549
+ * opcodes.
550
+ */
551
+ #define SPH_BIG_FAST
552
+
553
+ /**
554
+ * On some platforms, this macro is defined to an unsigned integer type
555
+ * into which pointer values may be cast. The resulting value can then
556
+ * be tested for being a multiple of 2, 4 or 8, indicating an aligned
557
+ * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses.
558
+ */
559
+ #define SPH_UPTR
560
+
561
+ /**
562
+ * When defined, this macro indicates that unaligned memory accesses
563
+ * are possible with only a minor penalty, and thus should be prefered
564
+ * over strategies which first copy data to an aligned buffer.
565
+ */
566
+ #define SPH_UNALIGNED
567
+
568
+ /**
569
+ * Byte-swap a 32-bit word (i.e. <code>0x12345678</code> becomes
570
+ * <code>0x78563412</code>). This is an inline function which resorts
571
+ * to inline assembly on some platforms, for better performance.
572
+ *
573
+ * @param x the 32-bit value to byte-swap
574
+ * @return the byte-swapped value
575
+ */
576
+ static inline sph_u32 sph_bswap32(sph_u32 x);
577
+
578
+ /**
579
+ * Byte-swap a 64-bit word. This is an inline function which resorts
580
+ * to inline assembly on some platforms, for better performance. This
581
+ * function is defined only if a suitable 64-bit type was found for
582
+ * <code>sph_u64</code>
583
+ *
584
+ * @param x the 64-bit value to byte-swap
585
+ * @return the byte-swapped value
586
+ */
587
+ static inline sph_u64 sph_bswap64(sph_u64 x);
588
+
589
+ /**
590
+ * Decode a 16-bit unsigned value from memory, in little-endian convention
591
+ * (least significant byte comes first).
592
+ *
593
+ * @param src the source address
594
+ * @return the decoded value
595
+ */
596
+ static inline unsigned sph_dec16le(const void *src);
597
+
598
+ /**
599
+ * Encode a 16-bit unsigned value into memory, in little-endian convention
600
+ * (least significant byte comes first).
601
+ *
602
+ * @param dst the destination buffer
603
+ * @param val the value to encode
604
+ */
605
+ static inline void sph_enc16le(void *dst, unsigned val);
606
+
607
+ /**
608
+ * Decode a 16-bit unsigned value from memory, in big-endian convention
609
+ * (most significant byte comes first).
610
+ *
611
+ * @param src the source address
612
+ * @return the decoded value
613
+ */
614
+ static inline unsigned sph_dec16be(const void *src);
615
+
616
+ /**
617
+ * Encode a 16-bit unsigned value into memory, in big-endian convention
618
+ * (most significant byte comes first).
619
+ *
620
+ * @param dst the destination buffer
621
+ * @param val the value to encode
622
+ */
623
+ static inline void sph_enc16be(void *dst, unsigned val);
624
+
625
+ /**
626
+ * Decode a 32-bit unsigned value from memory, in little-endian convention
627
+ * (least significant byte comes first).
628
+ *
629
+ * @param src the source address
630
+ * @return the decoded value
631
+ */
632
+ static inline sph_u32 sph_dec32le(const void *src);
633
+
634
+ /**
635
+ * Decode a 32-bit unsigned value from memory, in little-endian convention
636
+ * (least significant byte comes first). This function assumes that the
637
+ * source address is suitably aligned for a direct access, if the platform
638
+ * supports such things; it can thus be marginally faster than the generic
639
+ * <code>sph_dec32le()</code> function.
640
+ *
641
+ * @param src the source address
642
+ * @return the decoded value
643
+ */
644
+ static inline sph_u32 sph_dec32le_aligned(const void *src);
645
+
646
+ /**
647
+ * Encode a 32-bit unsigned value into memory, in little-endian convention
648
+ * (least significant byte comes first).
649
+ *
650
+ * @param dst the destination buffer
651
+ * @param val the value to encode
652
+ */
653
+ static inline void sph_enc32le(void *dst, sph_u32 val);
654
+
655
+ /**
656
+ * Encode a 32-bit unsigned value into memory, in little-endian convention
657
+ * (least significant byte comes first). This function assumes that the
658
+ * destination address is suitably aligned for a direct access, if the
659
+ * platform supports such things; it can thus be marginally faster than
660
+ * the generic <code>sph_enc32le()</code> function.
661
+ *
662
+ * @param dst the destination buffer
663
+ * @param val the value to encode
664
+ */
665
+ static inline void sph_enc32le_aligned(void *dst, sph_u32 val);
666
+
667
+ /**
668
+ * Decode a 32-bit unsigned value from memory, in big-endian convention
669
+ * (most significant byte comes first).
670
+ *
671
+ * @param src the source address
672
+ * @return the decoded value
673
+ */
674
+ static inline sph_u32 sph_dec32be(const void *src);
675
+
676
+ /**
677
+ * Decode a 32-bit unsigned value from memory, in big-endian convention
678
+ * (most significant byte comes first). This function assumes that the
679
+ * source address is suitably aligned for a direct access, if the platform
680
+ * supports such things; it can thus be marginally faster than the generic
681
+ * <code>sph_dec32be()</code> function.
682
+ *
683
+ * @param src the source address
684
+ * @return the decoded value
685
+ */
686
+ static inline sph_u32 sph_dec32be_aligned(const void *src);
687
+
688
+ /**
689
+ * Encode a 32-bit unsigned value into memory, in big-endian convention
690
+ * (most significant byte comes first).
691
+ *
692
+ * @param dst the destination buffer
693
+ * @param val the value to encode
694
+ */
695
+ static inline void sph_enc32be(void *dst, sph_u32 val);
696
+
697
+ /**
698
+ * Encode a 32-bit unsigned value into memory, in big-endian convention
699
+ * (most significant byte comes first). This function assumes that the
700
+ * destination address is suitably aligned for a direct access, if the
701
+ * platform supports such things; it can thus be marginally faster than
702
+ * the generic <code>sph_enc32be()</code> function.
703
+ *
704
+ * @param dst the destination buffer
705
+ * @param val the value to encode
706
+ */
707
+ static inline void sph_enc32be_aligned(void *dst, sph_u32 val);
708
+
709
+ /**
710
+ * Decode a 64-bit unsigned value from memory, in little-endian convention
711
+ * (least significant byte comes first). This function is defined only
712
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
713
+ *
714
+ * @param src the source address
715
+ * @return the decoded value
716
+ */
717
+ static inline sph_u64 sph_dec64le(const void *src);
718
+
719
+ /**
720
+ * Decode a 64-bit unsigned value from memory, in little-endian convention
721
+ * (least significant byte comes first). This function assumes that the
722
+ * source address is suitably aligned for a direct access, if the platform
723
+ * supports such things; it can thus be marginally faster than the generic
724
+ * <code>sph_dec64le()</code> function. This function is defined only
725
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
726
+ *
727
+ * @param src the source address
728
+ * @return the decoded value
729
+ */
730
+ static inline sph_u64 sph_dec64le_aligned(const void *src);
731
+
732
+ /**
733
+ * Encode a 64-bit unsigned value into memory, in little-endian convention
734
+ * (least significant byte comes first). This function is defined only
735
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
736
+ *
737
+ * @param dst the destination buffer
738
+ * @param val the value to encode
739
+ */
740
+ static inline void sph_enc64le(void *dst, sph_u64 val);
741
+
742
+ /**
743
+ * Encode a 64-bit unsigned value into memory, in little-endian convention
744
+ * (least significant byte comes first). This function assumes that the
745
+ * destination address is suitably aligned for a direct access, if the
746
+ * platform supports such things; it can thus be marginally faster than
747
+ * the generic <code>sph_enc64le()</code> function. This function is defined
748
+ * only if a suitable 64-bit type was detected and used for
749
+ * <code>sph_u64</code>.
750
+ *
751
+ * @param dst the destination buffer
752
+ * @param val the value to encode
753
+ */
754
+ static inline void sph_enc64le_aligned(void *dst, sph_u64 val);
755
+
756
+ /**
757
+ * Decode a 64-bit unsigned value from memory, in big-endian convention
758
+ * (most significant byte comes first). This function is defined only
759
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
760
+ *
761
+ * @param src the source address
762
+ * @return the decoded value
763
+ */
764
+ static inline sph_u64 sph_dec64be(const void *src);
765
+
766
+ /**
767
+ * Decode a 64-bit unsigned value from memory, in big-endian convention
768
+ * (most significant byte comes first). This function assumes that the
769
+ * source address is suitably aligned for a direct access, if the platform
770
+ * supports such things; it can thus be marginally faster than the generic
771
+ * <code>sph_dec64be()</code> function. This function is defined only
772
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
773
+ *
774
+ * @param src the source address
775
+ * @return the decoded value
776
+ */
777
+ static inline sph_u64 sph_dec64be_aligned(const void *src);
778
+
779
+ /**
780
+ * Encode a 64-bit unsigned value into memory, in big-endian convention
781
+ * (most significant byte comes first). This function is defined only
782
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
783
+ *
784
+ * @param dst the destination buffer
785
+ * @param val the value to encode
786
+ */
787
+ static inline void sph_enc64be(void *dst, sph_u64 val);
788
+
789
+ /**
790
+ * Encode a 64-bit unsigned value into memory, in big-endian convention
791
+ * (most significant byte comes first). This function assumes that the
792
+ * destination address is suitably aligned for a direct access, if the
793
+ * platform supports such things; it can thus be marginally faster than
794
+ * the generic <code>sph_enc64be()</code> function. This function is defined
795
+ * only if a suitable 64-bit type was detected and used for
796
+ * <code>sph_u64</code>.
797
+ *
798
+ * @param dst the destination buffer
799
+ * @param val the value to encode
800
+ */
801
+ static inline void sph_enc64be_aligned(void *dst, sph_u64 val);
802
+
803
+ #endif
804
+
805
+ /* ============== END documentation block for Doxygen ============= */
806
+
807
+ #ifndef DOXYGEN_IGNORE
808
+
809
+ /*
810
+ * We want to define the types "sph_u32" and "sph_u64" which hold
811
+ * unsigned values of at least, respectively, 32 and 64 bits. These
812
+ * tests should select appropriate types for most platforms. The
813
+ * macro "SPH_64" is defined if the 64-bit is supported.
814
+ */
815
+
816
+ #undef SPH_64
817
+ #undef SPH_64_TRUE
818
+
819
+ #if defined __STDC__ && __STDC_VERSION__ >= 199901L
820
+
821
+ /*
822
+ * On C99 implementations, we can use <stdint.h> to get an exact 64-bit
823
+ * type, if any, or otherwise use a wider type (which must exist, for
824
+ * C99 conformance).
825
+ */
826
+
827
+ #include <stdint.h>
828
+
829
+ #ifdef UINT32_MAX
830
+ typedef uint32_t sph_u32;
831
+ typedef int32_t sph_s32;
832
+ #else
833
+ typedef uint_fast32_t sph_u32;
834
+ typedef int_fast32_t sph_s32;
835
+ #endif
836
+ #if !SPH_NO_64
837
+ #ifdef UINT64_MAX
838
+ typedef uint64_t sph_u64;
839
+ typedef int64_t sph_s64;
840
+ #else
841
+ typedef uint_fast64_t sph_u64;
842
+ typedef int_fast64_t sph_s64;
843
+ #endif
844
+ #endif
845
+
846
+ #define SPH_C32(x) ((sph_u32)(x))
847
+ #if !SPH_NO_64
848
+ #define SPH_C64(x) ((sph_u64)(x))
849
+ #define SPH_64 1
850
+ #endif
851
+
852
+ #else
853
+
854
+ /*
855
+ * On non-C99 systems, we use "unsigned int" if it is wide enough,
856
+ * "unsigned long" otherwise. This supports all "reasonable" architectures.
857
+ * We have to be cautious: pre-C99 preprocessors handle constants
858
+ * differently in '#if' expressions. Hence the shifts to test UINT_MAX.
859
+ */
860
+
861
+ #if ((UINT_MAX >> 11) >> 11) >= 0x3FF
862
+
863
+ typedef unsigned int sph_u32;
864
+ typedef int sph_s32;
865
+
866
+ #define SPH_C32(x) ((sph_u32)(x ## U))
867
+
868
+ #else
869
+
870
+ typedef unsigned long sph_u32;
871
+ typedef long sph_s32;
872
+
873
+ #define SPH_C32(x) ((sph_u32)(x ## UL))
874
+
875
+ #endif
876
+
877
+ #if !SPH_NO_64
878
+
879
+ /*
880
+ * We want a 64-bit type. We use "unsigned long" if it is wide enough (as
881
+ * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9),
882
+ * "unsigned long long" otherwise, if available. We use ULLONG_MAX to
883
+ * test whether "unsigned long long" is available; we also know that
884
+ * gcc features this type, even if the libc header do not know it.
885
+ */
886
+
887
+ #if ((ULONG_MAX >> 31) >> 31) >= 3
888
+
889
+ typedef unsigned long sph_u64;
890
+ typedef long sph_s64;
891
+
892
+ #define SPH_C64(x) ((sph_u64)(x ## UL))
893
+
894
+ #define SPH_64 1
895
+
896
+ #elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__
897
+
898
+ typedef unsigned long long sph_u64;
899
+ typedef long long sph_s64;
900
+
901
+ #define SPH_C64(x) ((sph_u64)(x ## ULL))
902
+
903
+ #define SPH_64 1
904
+
905
+ #else
906
+
907
+ /*
908
+ * No 64-bit type...
909
+ */
910
+
911
+ #endif
912
+
913
+ #endif
914
+
915
+ #endif
916
+
917
+ /*
918
+ * If the "unsigned long" type has length 64 bits or more, then this is
919
+ * a "true" 64-bit architectures. This is also true with Visual C on
920
+ * amd64, even though the "long" type is limited to 32 bits.
921
+ */
922
+ #if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64)
923
+ #define SPH_64_TRUE 1
924
+ #endif
925
+
926
+ /*
927
+ * Implementation note: some processors have specific opcodes to perform
928
+ * a rotation. Recent versions of gcc recognize the expression above and
929
+ * use the relevant opcodes, when appropriate.
930
+ */
931
+
932
+ #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
933
+ #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
934
+ #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
935
+
936
+ #if SPH_64
937
+
938
+ #define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
939
+ #define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
940
+ #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
941
+
942
+ #endif
943
+
944
+ #ifndef DOXYGEN_IGNORE
945
+ /*
946
+ * Define SPH_INLINE to be an "inline" qualifier, if available. We define
947
+ * some small macro-like functions which benefit greatly from being inlined.
948
+ */
949
+ #if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__
950
+ #define SPH_INLINE inline
951
+ #elif defined _MSC_VER
952
+ #define SPH_INLINE __inline
953
+ #else
954
+ #define SPH_INLINE
955
+ #endif
956
+ #endif
957
+
958
+ /*
959
+ * We define some macros which qualify the architecture. These macros
960
+ * may be explicit set externally (e.g. as compiler parameters). The
961
+ * code below sets those macros if they are not already defined.
962
+ *
963
+ * Most macros are boolean, thus evaluate to either zero or non-zero.
964
+ * The SPH_UPTR macro is special, in that it evaluates to a C type,
965
+ * or is not defined.
966
+ *
967
+ * SPH_UPTR if defined: unsigned type to cast pointers into
968
+ *
969
+ * SPH_UNALIGNED non-zero if unaligned accesses are efficient
970
+ * SPH_LITTLE_ENDIAN non-zero if architecture is known to be little-endian
971
+ * SPH_BIG_ENDIAN non-zero if architecture is known to be big-endian
972
+ * SPH_LITTLE_FAST non-zero if little-endian decoding is fast
973
+ * SPH_BIG_FAST non-zero if big-endian decoding is fast
974
+ *
975
+ * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit
976
+ * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN
977
+ * _must_ be non-zero in those situations. The 32-bit and 64-bit types
978
+ * _must_ also have an exact width.
979
+ *
980
+ * SPH_SPARCV9_GCC_32 UltraSPARC-compatible with gcc, 32-bit mode
981
+ * SPH_SPARCV9_GCC_64 UltraSPARC-compatible with gcc, 64-bit mode
982
+ * SPH_SPARCV9_GCC UltraSPARC-compatible with gcc
983
+ * SPH_I386_GCC x86-compatible (32-bit) with gcc
984
+ * SPH_I386_MSVC x86-compatible (32-bit) with Microsoft Visual C
985
+ * SPH_AMD64_GCC x86-compatible (64-bit) with gcc
986
+ * SPH_AMD64_MSVC x86-compatible (64-bit) with Microsoft Visual C
987
+ * SPH_PPC32_GCC PowerPC, 32-bit, with gcc
988
+ * SPH_PPC64_GCC PowerPC, 64-bit, with gcc
989
+ *
990
+ * TODO: enhance automatic detection, for more architectures and compilers.
991
+ * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with
992
+ * some very fast functions (e.g. MD4) when using unaligned input data.
993
+ * The CPU-specific-with-GCC macros are useful only for inline assembly,
994
+ * normally restrained to this header file.
995
+ */
996
+
997
+ /*
998
+ * 32-bit x86, aka "i386 compatible".
999
+ */
1000
+ #if defined __i386__ || defined _M_IX86
1001
+
1002
+ #define SPH_DETECT_UNALIGNED 1
1003
+ #define SPH_DETECT_LITTLE_ENDIAN 1
1004
+ #define SPH_DETECT_UPTR sph_u32
1005
+ #ifdef __GNUC__
1006
+ #define SPH_DETECT_I386_GCC 1
1007
+ #endif
1008
+ #ifdef _MSC_VER
1009
+ #define SPH_DETECT_I386_MSVC 1
1010
+ #endif
1011
+
1012
+ /*
1013
+ * 64-bit x86, hereafter known as "amd64".
1014
+ */
1015
+ #elif defined __x86_64 || defined _M_X64
1016
+
1017
+ #define SPH_DETECT_UNALIGNED 1
1018
+ #define SPH_DETECT_LITTLE_ENDIAN 1
1019
+ #define SPH_DETECT_UPTR sph_u64
1020
+ #ifdef __GNUC__
1021
+ #define SPH_DETECT_AMD64_GCC 1
1022
+ #endif
1023
+ #ifdef _MSC_VER
1024
+ #define SPH_DETECT_AMD64_MSVC 1
1025
+ #endif
1026
+
1027
+ /*
1028
+ * 64-bit Sparc architecture (implies v9).
1029
+ */
1030
+ #elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \
1031
+ || defined __sparcv9
1032
+
1033
+ #define SPH_DETECT_BIG_ENDIAN 1
1034
+ #define SPH_DETECT_UPTR sph_u64
1035
+ #ifdef __GNUC__
1036
+ #define SPH_DETECT_SPARCV9_GCC_64 1
1037
+ #define SPH_DETECT_LITTLE_FAST 1
1038
+ #endif
1039
+
1040
+ /*
1041
+ * 32-bit Sparc.
1042
+ */
1043
+ #elif (defined __sparc__ || defined __sparc) \
1044
+ && !(defined __sparcv9 || defined __arch64__)
1045
+
1046
+ #define SPH_DETECT_BIG_ENDIAN 1
1047
+ #define SPH_DETECT_UPTR sph_u32
1048
+ #if defined __GNUC__ && defined __sparc_v9__
1049
+ #define SPH_DETECT_SPARCV9_GCC_32 1
1050
+ #define SPH_DETECT_LITTLE_FAST 1
1051
+ #endif
1052
+
1053
+ /*
1054
+ * ARM, little-endian.
1055
+ */
1056
+ #elif defined __arm__ && __ARMEL__
1057
+
1058
+ #define SPH_DETECT_LITTLE_ENDIAN 1
1059
+
1060
+ /*
1061
+ * MIPS, little-endian.
1062
+ */
1063
+ #elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__
1064
+
1065
+ #define SPH_DETECT_LITTLE_ENDIAN 1
1066
+
1067
+ /*
1068
+ * MIPS, big-endian.
1069
+ */
1070
+ #elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__
1071
+
1072
+ #define SPH_DETECT_BIG_ENDIAN 1
1073
+
1074
+ /*
1075
+ * PowerPC.
1076
+ */
1077
+ #elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \
1078
+ || defined _ARCH_PPC
1079
+
1080
+ /*
1081
+ * Note: we do not declare cross-endian access to be "fast": even if
1082
+ * using inline assembly, implementation should still assume that
1083
+ * keeping the decoded word in a temporary is faster than decoding
1084
+ * it again.
1085
+ */
1086
+ #if defined __GNUC__
1087
+ #if SPH_64_TRUE
1088
+ #define SPH_DETECT_PPC64_GCC 1
1089
+ #else
1090
+ #define SPH_DETECT_PPC32_GCC 1
1091
+ #endif
1092
+ #endif
1093
+
1094
+ #if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
1095
+ #define SPH_DETECT_BIG_ENDIAN 1
1096
+ #elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
1097
+ #define SPH_DETECT_LITTLE_ENDIAN 1
1098
+ #endif
1099
+
1100
+ /*
1101
+ * Itanium, 64-bit.
1102
+ */
1103
+ #elif defined __ia64 || defined __ia64__ \
1104
+ || defined __itanium__ || defined _M_IA64
1105
+
1106
+ #if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
1107
+ #define SPH_DETECT_BIG_ENDIAN 1
1108
+ #else
1109
+ #define SPH_DETECT_LITTLE_ENDIAN 1
1110
+ #endif
1111
+ #if defined __LP64__ || defined _LP64
1112
+ #define SPH_DETECT_UPTR sph_u64
1113
+ #else
1114
+ #define SPH_DETECT_UPTR sph_u32
1115
+ #endif
1116
+
1117
+ #endif
1118
+
1119
+ #if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64
1120
+ #define SPH_DETECT_SPARCV9_GCC 1
1121
+ #endif
1122
+
1123
+ #if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED
1124
+ #define SPH_UNALIGNED SPH_DETECT_UNALIGNED
1125
+ #endif
1126
+ #if defined SPH_DETECT_UPTR && !defined SPH_UPTR
1127
+ #define SPH_UPTR SPH_DETECT_UPTR
1128
+ #endif
1129
+ #if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN
1130
+ #define SPH_LITTLE_ENDIAN SPH_DETECT_LITTLE_ENDIAN
1131
+ #endif
1132
+ #if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN
1133
+ #define SPH_BIG_ENDIAN SPH_DETECT_BIG_ENDIAN
1134
+ #endif
1135
+ #if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST
1136
+ #define SPH_LITTLE_FAST SPH_DETECT_LITTLE_FAST
1137
+ #endif
1138
+ #if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST
1139
+ #define SPH_BIG_FAST SPH_DETECT_BIG_FAST
1140
+ #endif
1141
+ #if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32
1142
+ #define SPH_SPARCV9_GCC_32 SPH_DETECT_SPARCV9_GCC_32
1143
+ #endif
1144
+ #if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64
1145
+ #define SPH_SPARCV9_GCC_64 SPH_DETECT_SPARCV9_GCC_64
1146
+ #endif
1147
+ #if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC
1148
+ #define SPH_SPARCV9_GCC SPH_DETECT_SPARCV9_GCC
1149
+ #endif
1150
+ #if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC
1151
+ #define SPH_I386_GCC SPH_DETECT_I386_GCC
1152
+ #endif
1153
+ #if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC
1154
+ #define SPH_I386_MSVC SPH_DETECT_I386_MSVC
1155
+ #endif
1156
+ #if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC
1157
+ #define SPH_AMD64_GCC SPH_DETECT_AMD64_GCC
1158
+ #endif
1159
+ #if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC
1160
+ #define SPH_AMD64_MSVC SPH_DETECT_AMD64_MSVC
1161
+ #endif
1162
+ #if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC
1163
+ #define SPH_PPC32_GCC SPH_DETECT_PPC32_GCC
1164
+ #endif
1165
+ #if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC
1166
+ #define SPH_PPC64_GCC SPH_DETECT_PPC64_GCC
1167
+ #endif
1168
+
1169
+ #if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST
1170
+ #define SPH_LITTLE_FAST 1
1171
+ #endif
1172
+ #if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST
1173
+ #define SPH_BIG_FAST 1
1174
+ #endif
1175
+
1176
+ #if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN)
1177
+ #error SPH_UPTR defined, but endianness is not known.
1178
+ #endif
1179
+
1180
+ #if SPH_I386_GCC && !SPH_NO_ASM
1181
+
1182
+ /*
1183
+ * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
1184
+ * values.
1185
+ */
1186
+
1187
+ static SPH_INLINE sph_u32
1188
+ sph_bswap32(sph_u32 x)
1189
+ {
1190
+ __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
1191
+ return x;
1192
+ }
1193
+
1194
+ #if SPH_64
1195
+
1196
+ static SPH_INLINE sph_u64
1197
+ sph_bswap64(sph_u64 x)
1198
+ {
1199
+ return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
1200
+ | (sph_u64)sph_bswap32((sph_u32)(x >> 32));
1201
+ }
1202
+
1203
+ #endif
1204
+
1205
+ #elif SPH_AMD64_GCC && !SPH_NO_ASM
1206
+
1207
+ /*
1208
+ * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
1209
+ * and 64-bit values.
1210
+ */
1211
+
1212
+ static SPH_INLINE sph_u32
1213
+ sph_bswap32(sph_u32 x)
1214
+ {
1215
+ __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
1216
+ return x;
1217
+ }
1218
+
1219
+ #if SPH_64
1220
+
1221
+ static SPH_INLINE sph_u64
1222
+ sph_bswap64(sph_u64 x)
1223
+ {
1224
+ __asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x));
1225
+ return x;
1226
+ }
1227
+
1228
+ #endif
1229
+
1230
+ /*
1231
+ * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough
1232
+ * to generate proper opcodes for endianness swapping with the pure C
1233
+ * implementation below.
1234
+ *
1235
+
1236
+ #elif SPH_I386_MSVC && !SPH_NO_ASM
1237
+
1238
+ static __inline sph_u32 __declspec(naked) __fastcall
1239
+ sph_bswap32(sph_u32 x)
1240
+ {
1241
+ __asm {
1242
+ bswap ecx
1243
+ mov eax,ecx
1244
+ ret
1245
+ }
1246
+ }
1247
+
1248
+ #if SPH_64
1249
+
1250
+ static SPH_INLINE sph_u64
1251
+ sph_bswap64(sph_u64 x)
1252
+ {
1253
+ return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
1254
+ | (sph_u64)sph_bswap32((sph_u32)(x >> 32));
1255
+ }
1256
+
1257
+ #endif
1258
+
1259
+ *
1260
+ * [end of disabled code]
1261
+ */
1262
+
1263
+ #else
1264
+
1265
+ static SPH_INLINE sph_u32
1266
+ sph_bswap32(sph_u32 x)
1267
+ {
1268
+ x = SPH_T32((x << 16) | (x >> 16));
1269
+ x = ((x & SPH_C32(0xFF00FF00)) >> 8)
1270
+ | ((x & SPH_C32(0x00FF00FF)) << 8);
1271
+ return x;
1272
+ }
1273
+
1274
+ #if SPH_64
1275
+
1276
+ /**
1277
+ * Byte-swap a 64-bit value.
1278
+ *
1279
+ * @param x the input value
1280
+ * @return the byte-swapped value
1281
+ */
1282
+ static SPH_INLINE sph_u64
1283
+ sph_bswap64(sph_u64 x)
1284
+ {
1285
+ x = SPH_T64((x << 32) | (x >> 32));
1286
+ x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16)
1287
+ | ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16);
1288
+ x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8)
1289
+ | ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8);
1290
+ return x;
1291
+ }
1292
+
1293
+ #endif
1294
+
1295
+ #endif
1296
+
1297
+ #if SPH_SPARCV9_GCC && !SPH_NO_ASM
1298
+
1299
+ /*
1300
+ * On UltraSPARC systems, native ordering is big-endian, but it is
1301
+ * possible to perform little-endian read accesses by specifying the
1302
+ * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use
1303
+ * the opcode "lda [%reg]0x88,%dst", where %reg is the register which
1304
+ * contains the source address and %dst is the destination register,
1305
+ * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register
1306
+ * to get the address space name. The latter format is better since it
1307
+ * combines an addition and the actual access in a single opcode; but
1308
+ * it requires the setting (and subsequent resetting) of %asi, which is
1309
+ * slow. Some operations (i.e. MD5 compression function) combine many
1310
+ * successive little-endian read accesses, which may share the same
1311
+ * %asi setting. The macros below contain the appropriate inline
1312
+ * assembly.
1313
+ */
1314
+
1315
+ #define SPH_SPARCV9_SET_ASI \
1316
+ sph_u32 sph_sparcv9_asi; \
1317
+ __asm__ __volatile__ ( \
1318
+ "rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi));
1319
+
1320
+ #define SPH_SPARCV9_RESET_ASI \
1321
+ __asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi));
1322
+
1323
+ #define SPH_SPARCV9_DEC32LE(base, idx) ({ \
1324
+ sph_u32 sph_sparcv9_tmp; \
1325
+ __asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \
1326
+ : "=r" (sph_sparcv9_tmp) : "r" (base)); \
1327
+ sph_sparcv9_tmp; \
1328
+ })
1329
+
1330
+ #endif
1331
+
1332
+ static SPH_INLINE void
1333
+ sph_enc16be(void *dst, unsigned val)
1334
+ {
1335
+ ((unsigned char *)dst)[0] = (val >> 8);
1336
+ ((unsigned char *)dst)[1] = val;
1337
+ }
1338
+
1339
+ static SPH_INLINE unsigned
1340
+ sph_dec16be(const void *src)
1341
+ {
1342
+ return ((unsigned)(((const unsigned char *)src)[0]) << 8)
1343
+ | (unsigned)(((const unsigned char *)src)[1]);
1344
+ }
1345
+
1346
+ static SPH_INLINE void
1347
+ sph_enc16le(void *dst, unsigned val)
1348
+ {
1349
+ ((unsigned char *)dst)[0] = val;
1350
+ ((unsigned char *)dst)[1] = val >> 8;
1351
+ }
1352
+
1353
+ static SPH_INLINE unsigned
1354
+ sph_dec16le(const void *src)
1355
+ {
1356
+ return (unsigned)(((const unsigned char *)src)[0])
1357
+ | ((unsigned)(((const unsigned char *)src)[1]) << 8);
1358
+ }
1359
+
1360
+ /**
1361
+ * Encode a 32-bit value into the provided buffer (big endian convention).
1362
+ *
1363
+ * @param dst the destination buffer
1364
+ * @param val the 32-bit value to encode
1365
+ */
1366
+ static SPH_INLINE void
1367
+ sph_enc32be(void *dst, sph_u32 val)
1368
+ {
1369
+ #if defined SPH_UPTR
1370
+ #if SPH_UNALIGNED
1371
+ #if SPH_LITTLE_ENDIAN
1372
+ val = sph_bswap32(val);
1373
+ #endif
1374
+ *(sph_u32 *)dst = val;
1375
+ #else
1376
+ if (((SPH_UPTR)dst & 3) == 0) {
1377
+ #if SPH_LITTLE_ENDIAN
1378
+ val = sph_bswap32(val);
1379
+ #endif
1380
+ *(sph_u32 *)dst = val;
1381
+ } else {
1382
+ ((unsigned char *)dst)[0] = (val >> 24);
1383
+ ((unsigned char *)dst)[1] = (val >> 16);
1384
+ ((unsigned char *)dst)[2] = (val >> 8);
1385
+ ((unsigned char *)dst)[3] = val;
1386
+ }
1387
+ #endif
1388
+ #else
1389
+ ((unsigned char *)dst)[0] = (val >> 24);
1390
+ ((unsigned char *)dst)[1] = (val >> 16);
1391
+ ((unsigned char *)dst)[2] = (val >> 8);
1392
+ ((unsigned char *)dst)[3] = val;
1393
+ #endif
1394
+ }
1395
+
1396
+ /**
1397
+ * Encode a 32-bit value into the provided buffer (big endian convention).
1398
+ * The destination buffer must be properly aligned.
1399
+ *
1400
+ * @param dst the destination buffer (32-bit aligned)
1401
+ * @param val the value to encode
1402
+ */
1403
+ static SPH_INLINE void
1404
+ sph_enc32be_aligned(void *dst, sph_u32 val)
1405
+ {
1406
+ #if SPH_LITTLE_ENDIAN
1407
+ *(sph_u32 *)dst = sph_bswap32(val);
1408
+ #elif SPH_BIG_ENDIAN
1409
+ *(sph_u32 *)dst = val;
1410
+ #else
1411
+ ((unsigned char *)dst)[0] = (val >> 24);
1412
+ ((unsigned char *)dst)[1] = (val >> 16);
1413
+ ((unsigned char *)dst)[2] = (val >> 8);
1414
+ ((unsigned char *)dst)[3] = val;
1415
+ #endif
1416
+ }
1417
+
1418
+ /**
1419
+ * Decode a 32-bit value from the provided buffer (big endian convention).
1420
+ *
1421
+ * @param src the source buffer
1422
+ * @return the decoded value
1423
+ */
1424
+ static SPH_INLINE sph_u32
1425
+ sph_dec32be(const void *src)
1426
+ {
1427
+ #if defined SPH_UPTR
1428
+ #if SPH_UNALIGNED
1429
+ #if SPH_LITTLE_ENDIAN
1430
+ return sph_bswap32(*(const sph_u32 *)src);
1431
+ #else
1432
+ return *(const sph_u32 *)src;
1433
+ #endif
1434
+ #else
1435
+ if (((SPH_UPTR)src & 3) == 0) {
1436
+ #if SPH_LITTLE_ENDIAN
1437
+ return sph_bswap32(*(const sph_u32 *)src);
1438
+ #else
1439
+ return *(const sph_u32 *)src;
1440
+ #endif
1441
+ } else {
1442
+ return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1443
+ | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1444
+ | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1445
+ | (sph_u32)(((const unsigned char *)src)[3]);
1446
+ }
1447
+ #endif
1448
+ #else
1449
+ return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1450
+ | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1451
+ | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1452
+ | (sph_u32)(((const unsigned char *)src)[3]);
1453
+ #endif
1454
+ }
1455
+
1456
+ /**
1457
+ * Decode a 32-bit value from the provided buffer (big endian convention).
1458
+ * The source buffer must be properly aligned.
1459
+ *
1460
+ * @param src the source buffer (32-bit aligned)
1461
+ * @return the decoded value
1462
+ */
1463
+ static SPH_INLINE sph_u32
1464
+ sph_dec32be_aligned(const void *src)
1465
+ {
1466
+ #if SPH_LITTLE_ENDIAN
1467
+ return sph_bswap32(*(const sph_u32 *)src);
1468
+ #elif SPH_BIG_ENDIAN
1469
+ return *(const sph_u32 *)src;
1470
+ #else
1471
+ return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1472
+ | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1473
+ | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1474
+ | (sph_u32)(((const unsigned char *)src)[3]);
1475
+ #endif
1476
+ }
1477
+
1478
+ /**
1479
+ * Encode a 32-bit value into the provided buffer (little endian convention).
1480
+ *
1481
+ * @param dst the destination buffer
1482
+ * @param val the 32-bit value to encode
1483
+ */
1484
+ static SPH_INLINE void
1485
+ sph_enc32le(void *dst, sph_u32 val)
1486
+ {
1487
+ #if defined SPH_UPTR
1488
+ #if SPH_UNALIGNED
1489
+ #if SPH_BIG_ENDIAN
1490
+ val = sph_bswap32(val);
1491
+ #endif
1492
+ *(sph_u32 *)dst = val;
1493
+ #else
1494
+ if (((SPH_UPTR)dst & 3) == 0) {
1495
+ #if SPH_BIG_ENDIAN
1496
+ val = sph_bswap32(val);
1497
+ #endif
1498
+ *(sph_u32 *)dst = val;
1499
+ } else {
1500
+ ((unsigned char *)dst)[0] = val;
1501
+ ((unsigned char *)dst)[1] = (val >> 8);
1502
+ ((unsigned char *)dst)[2] = (val >> 16);
1503
+ ((unsigned char *)dst)[3] = (val >> 24);
1504
+ }
1505
+ #endif
1506
+ #else
1507
+ ((unsigned char *)dst)[0] = val;
1508
+ ((unsigned char *)dst)[1] = (val >> 8);
1509
+ ((unsigned char *)dst)[2] = (val >> 16);
1510
+ ((unsigned char *)dst)[3] = (val >> 24);
1511
+ #endif
1512
+ }
1513
+
1514
+ /**
1515
+ * Encode a 32-bit value into the provided buffer (little endian convention).
1516
+ * The destination buffer must be properly aligned.
1517
+ *
1518
+ * @param dst the destination buffer (32-bit aligned)
1519
+ * @param val the value to encode
1520
+ */
1521
+ static SPH_INLINE void
1522
+ sph_enc32le_aligned(void *dst, sph_u32 val)
1523
+ {
1524
+ #if SPH_LITTLE_ENDIAN
1525
+ *(sph_u32 *)dst = val;
1526
+ #elif SPH_BIG_ENDIAN
1527
+ *(sph_u32 *)dst = sph_bswap32(val);
1528
+ #else
1529
+ ((unsigned char *)dst)[0] = val;
1530
+ ((unsigned char *)dst)[1] = (val >> 8);
1531
+ ((unsigned char *)dst)[2] = (val >> 16);
1532
+ ((unsigned char *)dst)[3] = (val >> 24);
1533
+ #endif
1534
+ }
1535
+
1536
+ /**
1537
+ * Decode a 32-bit value from the provided buffer (little endian convention).
1538
+ *
1539
+ * @param src the source buffer
1540
+ * @return the decoded value
1541
+ */
1542
+ static SPH_INLINE sph_u32
1543
+ sph_dec32le(const void *src)
1544
+ {
1545
+ #if defined SPH_UPTR
1546
+ #if SPH_UNALIGNED
1547
+ #if SPH_BIG_ENDIAN
1548
+ return sph_bswap32(*(const sph_u32 *)src);
1549
+ #else
1550
+ return *(const sph_u32 *)src;
1551
+ #endif
1552
+ #else
1553
+ if (((SPH_UPTR)src & 3) == 0) {
1554
+ #if SPH_BIG_ENDIAN
1555
+ #if SPH_SPARCV9_GCC && !SPH_NO_ASM
1556
+ sph_u32 tmp;
1557
+
1558
+ /*
1559
+ * "__volatile__" is needed here because without it,
1560
+ * gcc-3.4.3 miscompiles the code and performs the
1561
+ * access before the test on the address, thus triggering
1562
+ * a bus error...
1563
+ */
1564
+ __asm__ __volatile__ (
1565
+ "lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1566
+ return tmp;
1567
+ /*
1568
+ * On PowerPC, this turns out not to be worth the effort: the inline
1569
+ * assembly makes GCC optimizer uncomfortable, which tends to nullify
1570
+ * the decoding gains.
1571
+ *
1572
+ * For most hash functions, using this inline assembly trick changes
1573
+ * hashing speed by less than 5% and often _reduces_ it. The biggest
1574
+ * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is
1575
+ * less then 10%. The speed gain on CubeHash is probably due to the
1576
+ * chronic shortage of registers that CubeHash endures; for the other
1577
+ * functions, the generic code appears to be efficient enough already.
1578
+ *
1579
+ #elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
1580
+ sph_u32 tmp;
1581
+
1582
+ __asm__ __volatile__ (
1583
+ "lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1584
+ return tmp;
1585
+ */
1586
+ #else
1587
+ return sph_bswap32(*(const sph_u32 *)src);
1588
+ #endif
1589
+ #else
1590
+ return *(const sph_u32 *)src;
1591
+ #endif
1592
+ } else {
1593
+ return (sph_u32)(((const unsigned char *)src)[0])
1594
+ | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1595
+ | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1596
+ | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1597
+ }
1598
+ #endif
1599
+ #else
1600
+ return (sph_u32)(((const unsigned char *)src)[0])
1601
+ | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1602
+ | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1603
+ | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1604
+ #endif
1605
+ }
1606
+
1607
+ /**
1608
+ * Decode a 32-bit value from the provided buffer (little endian convention).
1609
+ * The source buffer must be properly aligned.
1610
+ *
1611
+ * @param src the source buffer (32-bit aligned)
1612
+ * @return the decoded value
1613
+ */
1614
+ static SPH_INLINE sph_u32
1615
+ sph_dec32le_aligned(const void *src)
1616
+ {
1617
+ #if SPH_LITTLE_ENDIAN
1618
+ return *(const sph_u32 *)src;
1619
+ #elif SPH_BIG_ENDIAN
1620
+ #if SPH_SPARCV9_GCC && !SPH_NO_ASM
1621
+ sph_u32 tmp;
1622
+
1623
+ __asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1624
+ return tmp;
1625
+ /*
1626
+ * Not worth it generally.
1627
+ *
1628
+ #elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
1629
+ sph_u32 tmp;
1630
+
1631
+ __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1632
+ return tmp;
1633
+ */
1634
+ #else
1635
+ return sph_bswap32(*(const sph_u32 *)src);
1636
+ #endif
1637
+ #else
1638
+ return (sph_u32)(((const unsigned char *)src)[0])
1639
+ | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1640
+ | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1641
+ | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1642
+ #endif
1643
+ }
1644
+
1645
+ #if SPH_64
1646
+
1647
+ /**
1648
+ * Encode a 64-bit value into the provided buffer (big endian convention).
1649
+ *
1650
+ * @param dst the destination buffer
1651
+ * @param val the 64-bit value to encode
1652
+ */
1653
+ static SPH_INLINE void
1654
+ sph_enc64be(void *dst, sph_u64 val)
1655
+ {
1656
+ #if defined SPH_UPTR
1657
+ #if SPH_UNALIGNED
1658
+ #if SPH_LITTLE_ENDIAN
1659
+ val = sph_bswap64(val);
1660
+ #endif
1661
+ *(sph_u64 *)dst = val;
1662
+ #else
1663
+ if (((SPH_UPTR)dst & 7) == 0) {
1664
+ #if SPH_LITTLE_ENDIAN
1665
+ val = sph_bswap64(val);
1666
+ #endif
1667
+ *(sph_u64 *)dst = val;
1668
+ } else {
1669
+ ((unsigned char *)dst)[0] = (val >> 56);
1670
+ ((unsigned char *)dst)[1] = (val >> 48);
1671
+ ((unsigned char *)dst)[2] = (val >> 40);
1672
+ ((unsigned char *)dst)[3] = (val >> 32);
1673
+ ((unsigned char *)dst)[4] = (val >> 24);
1674
+ ((unsigned char *)dst)[5] = (val >> 16);
1675
+ ((unsigned char *)dst)[6] = (val >> 8);
1676
+ ((unsigned char *)dst)[7] = val;
1677
+ }
1678
+ #endif
1679
+ #else
1680
+ ((unsigned char *)dst)[0] = (val >> 56);
1681
+ ((unsigned char *)dst)[1] = (val >> 48);
1682
+ ((unsigned char *)dst)[2] = (val >> 40);
1683
+ ((unsigned char *)dst)[3] = (val >> 32);
1684
+ ((unsigned char *)dst)[4] = (val >> 24);
1685
+ ((unsigned char *)dst)[5] = (val >> 16);
1686
+ ((unsigned char *)dst)[6] = (val >> 8);
1687
+ ((unsigned char *)dst)[7] = val;
1688
+ #endif
1689
+ }
1690
+
1691
+ /**
1692
+ * Encode a 64-bit value into the provided buffer (big endian convention).
1693
+ * The destination buffer must be properly aligned.
1694
+ *
1695
+ * @param dst the destination buffer (64-bit aligned)
1696
+ * @param val the value to encode
1697
+ */
1698
+ static SPH_INLINE void
1699
+ sph_enc64be_aligned(void *dst, sph_u64 val)
1700
+ {
1701
+ #if SPH_LITTLE_ENDIAN
1702
+ *(sph_u64 *)dst = sph_bswap64(val);
1703
+ #elif SPH_BIG_ENDIAN
1704
+ *(sph_u64 *)dst = val;
1705
+ #else
1706
+ ((unsigned char *)dst)[0] = (val >> 56);
1707
+ ((unsigned char *)dst)[1] = (val >> 48);
1708
+ ((unsigned char *)dst)[2] = (val >> 40);
1709
+ ((unsigned char *)dst)[3] = (val >> 32);
1710
+ ((unsigned char *)dst)[4] = (val >> 24);
1711
+ ((unsigned char *)dst)[5] = (val >> 16);
1712
+ ((unsigned char *)dst)[6] = (val >> 8);
1713
+ ((unsigned char *)dst)[7] = val;
1714
+ #endif
1715
+ }
1716
+
1717
+ /**
1718
+ * Decode a 64-bit value from the provided buffer (big endian convention).
1719
+ *
1720
+ * @param src the source buffer
1721
+ * @return the decoded value
1722
+ */
1723
+ static SPH_INLINE sph_u64
1724
+ sph_dec64be(const void *src)
1725
+ {
1726
+ #if defined SPH_UPTR
1727
+ #if SPH_UNALIGNED
1728
+ #if SPH_LITTLE_ENDIAN
1729
+ return sph_bswap64(*(const sph_u64 *)src);
1730
+ #else
1731
+ return *(const sph_u64 *)src;
1732
+ #endif
1733
+ #else
1734
+ if (((SPH_UPTR)src & 7) == 0) {
1735
+ #if SPH_LITTLE_ENDIAN
1736
+ return sph_bswap64(*(const sph_u64 *)src);
1737
+ #else
1738
+ return *(const sph_u64 *)src;
1739
+ #endif
1740
+ } else {
1741
+ return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1742
+ | ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1743
+ | ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1744
+ | ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1745
+ | ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1746
+ | ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1747
+ | ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1748
+ | (sph_u64)(((const unsigned char *)src)[7]);
1749
+ }
1750
+ #endif
1751
+ #else
1752
+ return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1753
+ | ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1754
+ | ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1755
+ | ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1756
+ | ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1757
+ | ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1758
+ | ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1759
+ | (sph_u64)(((const unsigned char *)src)[7]);
1760
+ #endif
1761
+ }
1762
+
1763
+ /**
1764
+ * Decode a 64-bit value from the provided buffer (big endian convention).
1765
+ * The source buffer must be properly aligned.
1766
+ *
1767
+ * @param src the source buffer (64-bit aligned)
1768
+ * @return the decoded value
1769
+ */
1770
+ static SPH_INLINE sph_u64
1771
+ sph_dec64be_aligned(const void *src)
1772
+ {
1773
+ #if SPH_LITTLE_ENDIAN
1774
+ return sph_bswap64(*(const sph_u64 *)src);
1775
+ #elif SPH_BIG_ENDIAN
1776
+ return *(const sph_u64 *)src;
1777
+ #else
1778
+ return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1779
+ | ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1780
+ | ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1781
+ | ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1782
+ | ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1783
+ | ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1784
+ | ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1785
+ | (sph_u64)(((const unsigned char *)src)[7]);
1786
+ #endif
1787
+ }
1788
+
1789
+ /**
1790
+ * Encode a 64-bit value into the provided buffer (little endian convention).
1791
+ *
1792
+ * @param dst the destination buffer
1793
+ * @param val the 64-bit value to encode
1794
+ */
1795
+ static SPH_INLINE void
1796
+ sph_enc64le(void *dst, sph_u64 val)
1797
+ {
1798
+ #if defined SPH_UPTR
1799
+ #if SPH_UNALIGNED
1800
+ #if SPH_BIG_ENDIAN
1801
+ val = sph_bswap64(val);
1802
+ #endif
1803
+ *(sph_u64 *)dst = val;
1804
+ #else
1805
+ if (((SPH_UPTR)dst & 7) == 0) {
1806
+ #if SPH_BIG_ENDIAN
1807
+ val = sph_bswap64(val);
1808
+ #endif
1809
+ *(sph_u64 *)dst = val;
1810
+ } else {
1811
+ ((unsigned char *)dst)[0] = val;
1812
+ ((unsigned char *)dst)[1] = (val >> 8);
1813
+ ((unsigned char *)dst)[2] = (val >> 16);
1814
+ ((unsigned char *)dst)[3] = (val >> 24);
1815
+ ((unsigned char *)dst)[4] = (val >> 32);
1816
+ ((unsigned char *)dst)[5] = (val >> 40);
1817
+ ((unsigned char *)dst)[6] = (val >> 48);
1818
+ ((unsigned char *)dst)[7] = (val >> 56);
1819
+ }
1820
+ #endif
1821
+ #else
1822
+ ((unsigned char *)dst)[0] = val;
1823
+ ((unsigned char *)dst)[1] = (val >> 8);
1824
+ ((unsigned char *)dst)[2] = (val >> 16);
1825
+ ((unsigned char *)dst)[3] = (val >> 24);
1826
+ ((unsigned char *)dst)[4] = (val >> 32);
1827
+ ((unsigned char *)dst)[5] = (val >> 40);
1828
+ ((unsigned char *)dst)[6] = (val >> 48);
1829
+ ((unsigned char *)dst)[7] = (val >> 56);
1830
+ #endif
1831
+ }
1832
+
1833
+ /**
1834
+ * Encode a 64-bit value into the provided buffer (little endian convention).
1835
+ * The destination buffer must be properly aligned.
1836
+ *
1837
+ * @param dst the destination buffer (64-bit aligned)
1838
+ * @param val the value to encode
1839
+ */
1840
+ static SPH_INLINE void
1841
+ sph_enc64le_aligned(void *dst, sph_u64 val)
1842
+ {
1843
+ #if SPH_LITTLE_ENDIAN
1844
+ *(sph_u64 *)dst = val;
1845
+ #elif SPH_BIG_ENDIAN
1846
+ *(sph_u64 *)dst = sph_bswap64(val);
1847
+ #else
1848
+ ((unsigned char *)dst)[0] = val;
1849
+ ((unsigned char *)dst)[1] = (val >> 8);
1850
+ ((unsigned char *)dst)[2] = (val >> 16);
1851
+ ((unsigned char *)dst)[3] = (val >> 24);
1852
+ ((unsigned char *)dst)[4] = (val >> 32);
1853
+ ((unsigned char *)dst)[5] = (val >> 40);
1854
+ ((unsigned char *)dst)[6] = (val >> 48);
1855
+ ((unsigned char *)dst)[7] = (val >> 56);
1856
+ #endif
1857
+ }
1858
+
1859
+ /**
1860
+ * Decode a 64-bit value from the provided buffer (little endian convention).
1861
+ *
1862
+ * @param src the source buffer
1863
+ * @return the decoded value
1864
+ */
1865
+ static SPH_INLINE sph_u64
1866
+ sph_dec64le(const void *src)
1867
+ {
1868
+ #if defined SPH_UPTR
1869
+ #if SPH_UNALIGNED
1870
+ #if SPH_BIG_ENDIAN
1871
+ return sph_bswap64(*(const sph_u64 *)src);
1872
+ #else
1873
+ return *(const sph_u64 *)src;
1874
+ #endif
1875
+ #else
1876
+ if (((SPH_UPTR)src & 7) == 0) {
1877
+ #if SPH_BIG_ENDIAN
1878
+ #if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
1879
+ sph_u64 tmp;
1880
+
1881
+ __asm__ __volatile__ (
1882
+ "ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1883
+ return tmp;
1884
+ /*
1885
+ * Not worth it generally.
1886
+ *
1887
+ #elif SPH_PPC32_GCC && !SPH_NO_ASM
1888
+ return (sph_u64)sph_dec32le_aligned(src)
1889
+ | ((sph_u64)sph_dec32le_aligned(
1890
+ (const char *)src + 4) << 32);
1891
+ #elif SPH_PPC64_GCC && !SPH_NO_ASM
1892
+ sph_u64 tmp;
1893
+
1894
+ __asm__ __volatile__ (
1895
+ "ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1896
+ return tmp;
1897
+ */
1898
+ #else
1899
+ return sph_bswap64(*(const sph_u64 *)src);
1900
+ #endif
1901
+ #else
1902
+ return *(const sph_u64 *)src;
1903
+ #endif
1904
+ } else {
1905
+ return (sph_u64)(((const unsigned char *)src)[0])
1906
+ | ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1907
+ | ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1908
+ | ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1909
+ | ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1910
+ | ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1911
+ | ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1912
+ | ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1913
+ }
1914
+ #endif
1915
+ #else
1916
+ return (sph_u64)(((const unsigned char *)src)[0])
1917
+ | ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1918
+ | ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1919
+ | ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1920
+ | ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1921
+ | ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1922
+ | ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1923
+ | ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1924
+ #endif
1925
+ }
1926
+
1927
+ /**
1928
+ * Decode a 64-bit value from the provided buffer (little endian convention).
1929
+ * The source buffer must be properly aligned.
1930
+ *
1931
+ * @param src the source buffer (64-bit aligned)
1932
+ * @return the decoded value
1933
+ */
1934
+ static SPH_INLINE sph_u64
1935
+ sph_dec64le_aligned(const void *src)
1936
+ {
1937
+ #if SPH_LITTLE_ENDIAN
1938
+ return *(const sph_u64 *)src;
1939
+ #elif SPH_BIG_ENDIAN
1940
+ #if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
1941
+ sph_u64 tmp;
1942
+
1943
+ __asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1944
+ return tmp;
1945
+ /*
1946
+ * Not worth it generally.
1947
+ *
1948
+ #elif SPH_PPC32_GCC && !SPH_NO_ASM
1949
+ return (sph_u64)sph_dec32le_aligned(src)
1950
+ | ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32);
1951
+ #elif SPH_PPC64_GCC && !SPH_NO_ASM
1952
+ sph_u64 tmp;
1953
+
1954
+ __asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1955
+ return tmp;
1956
+ */
1957
+ #else
1958
+ return sph_bswap64(*(const sph_u64 *)src);
1959
+ #endif
1960
+ #else
1961
+ return (sph_u64)(((const unsigned char *)src)[0])
1962
+ | ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1963
+ | ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1964
+ | ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1965
+ | ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1966
+ | ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1967
+ | ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1968
+ | ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1969
+ #endif
1970
+ }
1971
+
1972
+ #endif
1973
+
1974
+ #endif /* Doxygen excluded block */
1975
+
1976
+ #endif