xxhash 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/CHANGELOG.md +9 -0
- data/README.md +10 -2
- data/ext/xxhash/libxxhash.c +181 -89
- data/ext/xxhash/libxxhash.h +21 -16
- data/ext/xxhash/xxhash.cc +37 -0
- data/lib/xxhash.rb +12 -0
- data/lib/xxhash/version.rb +1 -1
- data/test/xxhash_test.rb +19 -0
- metadata +5 -5
- data/CHANGELOG +0 -5
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
### 0.2.0 (September 4, 2013)
|
|
2
|
+
* xxHash updated to [r32](https://code.google.com/p/xxhash/source/detail?r=32)
|
|
3
|
+
* add `XXhash.xxh32_stream` method (by [@maltoe](https://github.com/maltoe))
|
|
4
|
+
|
|
5
|
+
### 0.1.1 (June 4, 2013)
|
|
6
|
+
* remove .bundle from gem
|
|
7
|
+
|
|
8
|
+
### 0.1.0 (May 16, 2013)
|
|
9
|
+
* xxHash updated to [r29](https://code.google.com/p/xxhash/source/detail?r=29)
|
data/README.md
CHANGED
|
@@ -17,6 +17,14 @@ seed = 12345
|
|
|
17
17
|
XXhash.xxh32(text, seed) # => 3834992036
|
|
18
18
|
```
|
|
19
19
|
|
|
20
|
+
You can use it with `IO` objects too:
|
|
21
|
+
|
|
22
|
+
```ruby
|
|
23
|
+
XXhash.xxh32_stream(StringIO.new('test'), 123) # => 2758658570
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Note that you can also pass a chunk size as third param (it's 32 bytes by default)
|
|
27
|
+
|
|
20
28
|
### Supported Ruby versions
|
|
21
29
|
|
|
22
30
|
- MRI 1.9.3
|
|
@@ -26,7 +34,7 @@ Note: It doesn't work on JRuby as it uses C extension.
|
|
|
26
34
|
|
|
27
35
|
### Versioning
|
|
28
36
|
|
|
29
|
-
Version 0.
|
|
37
|
+
Version 0.2.0 is equal to [r32](https://code.google.com/p/xxhash/source/detail?r32)
|
|
30
38
|
|
|
31
39
|
## Contributing
|
|
32
40
|
|
|
@@ -38,5 +46,5 @@ Version 0.1.0 is equal to [r29](https://code.google.com/p/xxhash/source/detail?r
|
|
|
38
46
|
|
|
39
47
|
### Copyright
|
|
40
48
|
|
|
41
|
-
Copyright (c)
|
|
49
|
+
Copyright (c) 2013 Vasiliy Ermolovich. See LICENSE.txt for
|
|
42
50
|
further details.
|
data/ext/xxhash/libxxhash.c
CHANGED
|
@@ -31,13 +31,20 @@ You can contact the author at :
|
|
|
31
31
|
*/
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
|
|
35
34
|
//**************************************
|
|
36
35
|
// Tuning parameters
|
|
37
36
|
//**************************************
|
|
37
|
+
// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
|
|
38
|
+
// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
|
|
39
|
+
// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
|
|
40
|
+
// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
|
|
41
|
+
#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
|
|
42
|
+
# define XXH_USE_UNALIGNED_ACCESS 1
|
|
43
|
+
#endif
|
|
44
|
+
|
|
38
45
|
// XXH_ACCEPT_NULL_INPUT_POINTER :
|
|
39
|
-
// If the input pointer is a null pointer, xxHash default behavior is to
|
|
40
|
-
//
|
|
46
|
+
// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
|
|
47
|
+
// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
|
|
41
48
|
// This option has a very small performance cost (only measurable on small inputs).
|
|
42
49
|
// By default, this option is disabled. To enable it, uncomment below define :
|
|
43
50
|
//#define XXH_ACCEPT_NULL_INPUT_POINTER 1
|
|
@@ -45,49 +52,44 @@ You can contact the author at :
|
|
|
45
52
|
// XXH_FORCE_NATIVE_FORMAT :
|
|
46
53
|
// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
|
|
47
54
|
// Results are therefore identical for little-endian and big-endian CPU.
|
|
48
|
-
// This comes at a
|
|
49
|
-
// Should endian-independance be of no importance
|
|
55
|
+
// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
|
|
56
|
+
// Should endian-independance be of no importance for your application, you may set the #define below to 1.
|
|
50
57
|
// It will improve speed for Big-endian CPU.
|
|
51
58
|
// This option has no impact on Little_Endian CPU.
|
|
52
|
-
|
|
53
|
-
|
|
59
|
+
#define XXH_FORCE_NATIVE_FORMAT 0
|
|
54
60
|
|
|
55
61
|
|
|
56
62
|
//**************************************
|
|
57
|
-
//
|
|
63
|
+
// Compiler Specific Options
|
|
58
64
|
//**************************************
|
|
59
|
-
|
|
60
|
-
#
|
|
61
|
-
#
|
|
65
|
+
// Disable some Visual warning messages
|
|
66
|
+
#ifdef _MSC_VER // Visual Studio
|
|
67
|
+
# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
|
|
68
|
+
#endif
|
|
62
69
|
|
|
70
|
+
#ifdef _MSC_VER // Visual Studio
|
|
71
|
+
# define forceinline static __forceinline
|
|
72
|
+
#else
|
|
73
|
+
# ifdef __GNUC__
|
|
74
|
+
# define forceinline static inline __attribute__((always_inline))
|
|
75
|
+
# else
|
|
76
|
+
# define forceinline static inline
|
|
77
|
+
# endif
|
|
78
|
+
#endif
|
|
63
79
|
|
|
64
80
|
|
|
65
81
|
//**************************************
|
|
66
|
-
//
|
|
82
|
+
// Includes & Memory related functions
|
|
67
83
|
//**************************************
|
|
68
|
-
|
|
69
|
-
//
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
#
|
|
76
|
-
|
|
77
|
-
# endif
|
|
78
|
-
#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
|
|
79
|
-
# define XXH_BIG_ENDIAN 1
|
|
80
|
-
#elif defined(__sparc) || defined(__sparc__) \
|
|
81
|
-
|| defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \
|
|
82
|
-
|| defined(__hpux) || defined(__hppa) \
|
|
83
|
-
|| defined(_MIPSEB) || defined(__s390__)
|
|
84
|
-
# define XXH_BIG_ENDIAN 1
|
|
85
|
-
#endif
|
|
86
|
-
|
|
87
|
-
#if !defined(XXH_BIG_ENDIAN)
|
|
88
|
-
// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
|
|
89
|
-
# define XXH_BIG_ENDIAN 0
|
|
90
|
-
#endif
|
|
84
|
+
#include "libxxhash.h"
|
|
85
|
+
// Modify the local functions below should you wish to use some other memory related routines
|
|
86
|
+
// for malloc(), free()
|
|
87
|
+
#include <stdlib.h>
|
|
88
|
+
forceinline void* XXH_malloc(size_t s) { return malloc(s); }
|
|
89
|
+
forceinline void XXH_free (void* p) { free(p); }
|
|
90
|
+
// for memcpy()
|
|
91
|
+
#include <string.h>
|
|
92
|
+
forceinline void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
|
|
91
93
|
|
|
92
94
|
|
|
93
95
|
//**************************************
|
|
@@ -101,21 +103,43 @@ You can contact the author at :
|
|
|
101
103
|
typedef int32_t S32;
|
|
102
104
|
typedef uint64_t U64;
|
|
103
105
|
#else
|
|
104
|
-
typedef unsigned char
|
|
105
|
-
typedef unsigned short
|
|
106
|
-
typedef unsigned int
|
|
107
|
-
typedef signed int
|
|
108
|
-
typedef unsigned long long
|
|
106
|
+
typedef unsigned char BYTE;
|
|
107
|
+
typedef unsigned short U16;
|
|
108
|
+
typedef unsigned int U32;
|
|
109
|
+
typedef signed int S32;
|
|
110
|
+
typedef unsigned long long U64;
|
|
109
111
|
#endif
|
|
110
112
|
|
|
113
|
+
#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS)
|
|
114
|
+
# define _PACKED __attribute__ ((packed))
|
|
115
|
+
#else
|
|
116
|
+
# define _PACKED
|
|
117
|
+
#endif
|
|
111
118
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
119
|
+
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
|
|
120
|
+
# ifdef __IBMC__
|
|
121
|
+
# pragma pack(1)
|
|
122
|
+
# else
|
|
123
|
+
# pragma pack(push, 1)
|
|
124
|
+
# endif
|
|
125
|
+
#endif
|
|
126
|
+
|
|
127
|
+
typedef struct _U32_S { U32 v; } _PACKED U32_S;
|
|
128
|
+
|
|
129
|
+
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
|
|
130
|
+
# pragma pack(pop)
|
|
131
|
+
#endif
|
|
132
|
+
|
|
133
|
+
#define A32(x) (((U32_S *)(x))->v)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
//***************************************
|
|
137
|
+
// Compiler-specific Functions and Macros
|
|
138
|
+
//***************************************
|
|
115
139
|
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
|
116
140
|
|
|
117
|
-
// Note :
|
|
118
|
-
#if defined(
|
|
141
|
+
// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
|
|
142
|
+
#if defined(_MSC_VER)
|
|
119
143
|
# define XXH_rotl32(x,r) _rotl(x,r)
|
|
120
144
|
#else
|
|
121
145
|
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
|
@@ -145,31 +169,48 @@ static inline U32 XXH_swap32 (U32 x) {
|
|
|
145
169
|
|
|
146
170
|
|
|
147
171
|
//**************************************
|
|
148
|
-
// Macros
|
|
172
|
+
// Architecture Macros
|
|
149
173
|
//**************************************
|
|
150
|
-
|
|
174
|
+
typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
|
|
175
|
+
#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
|
|
176
|
+
static const int one = 1;
|
|
177
|
+
# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one))
|
|
178
|
+
#endif
|
|
151
179
|
|
|
152
180
|
|
|
181
|
+
//**************************************
|
|
182
|
+
// Macros
|
|
183
|
+
//**************************************
|
|
184
|
+
#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations
|
|
185
|
+
|
|
153
186
|
|
|
154
187
|
//****************************
|
|
155
|
-
//
|
|
188
|
+
// Memory reads
|
|
156
189
|
//****************************
|
|
190
|
+
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
|
|
157
191
|
|
|
158
|
-
U32
|
|
192
|
+
forceinline U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align)
|
|
159
193
|
{
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
194
|
+
if (align==XXH_unaligned)
|
|
195
|
+
return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
|
|
196
|
+
else
|
|
197
|
+
return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
forceinline U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); }
|
|
201
|
+
|
|
166
202
|
|
|
203
|
+
//****************************
|
|
204
|
+
// Simple Hash Functions
|
|
205
|
+
//****************************
|
|
206
|
+
forceinline U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align)
|
|
207
|
+
{
|
|
167
208
|
const BYTE* p = (const BYTE*)input;
|
|
168
209
|
const BYTE* const bEnd = p + len;
|
|
169
210
|
U32 h32;
|
|
170
211
|
|
|
171
212
|
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
|
|
172
|
-
if (p==NULL) { len=0; p=(const BYTE*)16; }
|
|
213
|
+
if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; }
|
|
173
214
|
#endif
|
|
174
215
|
|
|
175
216
|
if (len>=16)
|
|
@@ -182,10 +223,10 @@ U32 XXH32(const void* input, int len, U32 seed)
|
|
|
182
223
|
|
|
183
224
|
do
|
|
184
225
|
{
|
|
185
|
-
v1 +=
|
|
186
|
-
v2 +=
|
|
187
|
-
v3 +=
|
|
188
|
-
v4 +=
|
|
226
|
+
v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
|
|
227
|
+
v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
|
|
228
|
+
v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
|
|
229
|
+
v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
|
|
189
230
|
} while (p<=limit);
|
|
190
231
|
|
|
191
232
|
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
|
|
@@ -199,8 +240,8 @@ U32 XXH32(const void* input, int len, U32 seed)
|
|
|
199
240
|
|
|
200
241
|
while (p<=bEnd-4)
|
|
201
242
|
{
|
|
202
|
-
h32 +=
|
|
203
|
-
h32
|
|
243
|
+
h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3;
|
|
244
|
+
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
|
|
204
245
|
p+=4;
|
|
205
246
|
}
|
|
206
247
|
|
|
@@ -218,7 +259,33 @@ U32 XXH32(const void* input, int len, U32 seed)
|
|
|
218
259
|
h32 ^= h32 >> 16;
|
|
219
260
|
|
|
220
261
|
return h32;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
U32 XXH32(const void* input, int len, U32 seed)
|
|
266
|
+
{
|
|
267
|
+
#if 0
|
|
268
|
+
// Simple version, good for code maintenance, but unfortunately slow for small inputs
|
|
269
|
+
void* state = XXH32_init(seed);
|
|
270
|
+
XXH32_update(state, input, len);
|
|
271
|
+
return XXH32_digest(state);
|
|
272
|
+
#else
|
|
273
|
+
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
|
221
274
|
|
|
275
|
+
# if !defined(XXH_USE_UNALIGNED_ACCESS)
|
|
276
|
+
if ((((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage
|
|
277
|
+
{
|
|
278
|
+
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
|
279
|
+
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
|
|
280
|
+
else
|
|
281
|
+
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
|
|
282
|
+
}
|
|
283
|
+
# endif
|
|
284
|
+
|
|
285
|
+
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
|
286
|
+
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
|
|
287
|
+
else
|
|
288
|
+
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
|
|
222
289
|
#endif
|
|
223
290
|
}
|
|
224
291
|
|
|
@@ -229,21 +296,25 @@ U32 XXH32(const void* input, int len, U32 seed)
|
|
|
229
296
|
|
|
230
297
|
struct XXH_state32_t
|
|
231
298
|
{
|
|
299
|
+
U64 total_len;
|
|
232
300
|
U32 seed;
|
|
233
301
|
U32 v1;
|
|
234
302
|
U32 v2;
|
|
235
303
|
U32 v3;
|
|
236
304
|
U32 v4;
|
|
237
|
-
U64 total_len;
|
|
238
|
-
char memory[16];
|
|
239
305
|
int memsize;
|
|
306
|
+
char memory[16];
|
|
240
307
|
};
|
|
241
308
|
|
|
242
309
|
|
|
243
|
-
int XXH32_sizeofState()
|
|
310
|
+
int XXH32_sizeofState()
|
|
311
|
+
{
|
|
312
|
+
XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough
|
|
313
|
+
return sizeof(struct XXH_state32_t);
|
|
314
|
+
}
|
|
244
315
|
|
|
245
316
|
|
|
246
|
-
XXH_errorcode XXH32_resetState(void* state_in,
|
|
317
|
+
XXH_errorcode XXH32_resetState(void* state_in, U32 seed)
|
|
247
318
|
{
|
|
248
319
|
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
|
|
249
320
|
state->seed = seed;
|
|
@@ -253,19 +324,19 @@ XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed)
|
|
|
253
324
|
state->v4 = seed - PRIME32_1;
|
|
254
325
|
state->total_len = 0;
|
|
255
326
|
state->memsize = 0;
|
|
256
|
-
return
|
|
327
|
+
return XXH_OK;
|
|
257
328
|
}
|
|
258
329
|
|
|
259
330
|
|
|
260
331
|
void* XXH32_init (U32 seed)
|
|
261
332
|
{
|
|
262
|
-
|
|
333
|
+
void* state = XXH_malloc (sizeof(struct XXH_state32_t));
|
|
263
334
|
XXH32_resetState(state, seed);
|
|
264
|
-
return
|
|
335
|
+
return state;
|
|
265
336
|
}
|
|
266
337
|
|
|
267
338
|
|
|
268
|
-
XXH_errorcode
|
|
339
|
+
forceinline XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian)
|
|
269
340
|
{
|
|
270
341
|
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
|
|
271
342
|
const BYTE* p = (const BYTE*)input;
|
|
@@ -279,20 +350,20 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
|
|
|
279
350
|
|
|
280
351
|
if (state->memsize + len < 16) // fill in tmp buffer
|
|
281
352
|
{
|
|
282
|
-
|
|
353
|
+
XXH_memcpy(state->memory + state->memsize, input, len);
|
|
283
354
|
state->memsize += len;
|
|
284
|
-
return
|
|
355
|
+
return XXH_OK;
|
|
285
356
|
}
|
|
286
357
|
|
|
287
358
|
if (state->memsize) // some data left from previous update
|
|
288
359
|
{
|
|
289
|
-
|
|
360
|
+
XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize);
|
|
290
361
|
{
|
|
291
362
|
const U32* p32 = (const U32*)state->memory;
|
|
292
|
-
state->v1 +=
|
|
293
|
-
state->v2 +=
|
|
294
|
-
state->v3 +=
|
|
295
|
-
state->v4 +=
|
|
363
|
+
state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
|
|
364
|
+
state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++;
|
|
365
|
+
state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
|
|
366
|
+
state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
|
|
296
367
|
}
|
|
297
368
|
p += 16-state->memsize;
|
|
298
369
|
state->memsize = 0;
|
|
@@ -308,10 +379,10 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
|
|
|
308
379
|
|
|
309
380
|
do
|
|
310
381
|
{
|
|
311
|
-
v1 +=
|
|
312
|
-
v2 +=
|
|
313
|
-
v3 +=
|
|
314
|
-
v4 +=
|
|
382
|
+
v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
|
|
383
|
+
v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
|
|
384
|
+
v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
|
|
385
|
+
v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
|
|
315
386
|
} while (p<=limit);
|
|
316
387
|
|
|
317
388
|
state->v1 = v1;
|
|
@@ -322,22 +393,32 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
|
|
|
322
393
|
|
|
323
394
|
if (p < bEnd)
|
|
324
395
|
{
|
|
325
|
-
|
|
396
|
+
XXH_memcpy(state->memory, p, bEnd-p);
|
|
326
397
|
state->memsize = (int)(bEnd-p);
|
|
327
398
|
}
|
|
328
399
|
|
|
329
|
-
return
|
|
400
|
+
return XXH_OK;
|
|
330
401
|
}
|
|
331
402
|
|
|
403
|
+
XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
|
|
404
|
+
{
|
|
405
|
+
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
|
332
406
|
|
|
333
|
-
|
|
407
|
+
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
|
408
|
+
return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
|
|
409
|
+
else
|
|
410
|
+
return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
forceinline U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian)
|
|
334
416
|
{
|
|
335
417
|
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
|
|
336
|
-
BYTE * p
|
|
418
|
+
const BYTE * p = (const BYTE*)state->memory;
|
|
337
419
|
BYTE* bEnd = (BYTE*)state->memory + state->memsize;
|
|
338
420
|
U32 h32;
|
|
339
421
|
|
|
340
|
-
|
|
341
422
|
if (state->total_len >= 16)
|
|
342
423
|
{
|
|
343
424
|
h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
|
|
@@ -351,8 +432,8 @@ U32 XXH32_intermediateDigest (void* state_in)
|
|
|
351
432
|
|
|
352
433
|
while (p<=bEnd-4)
|
|
353
434
|
{
|
|
354
|
-
h32 +=
|
|
355
|
-
h32
|
|
435
|
+
h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3;
|
|
436
|
+
h32 = XXH_rotl32(h32, 17) * PRIME32_4;
|
|
356
437
|
p+=4;
|
|
357
438
|
}
|
|
358
439
|
|
|
@@ -373,11 +454,22 @@ U32 XXH32_intermediateDigest (void* state_in)
|
|
|
373
454
|
}
|
|
374
455
|
|
|
375
456
|
|
|
457
|
+
U32 XXH32_intermediateDigest (void* state_in)
|
|
458
|
+
{
|
|
459
|
+
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
|
460
|
+
|
|
461
|
+
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
|
462
|
+
return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian);
|
|
463
|
+
else
|
|
464
|
+
return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian);
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
|
|
376
468
|
U32 XXH32_digest (void* state_in)
|
|
377
469
|
{
|
|
378
470
|
U32 h32 = XXH32_intermediateDigest(state_in);
|
|
379
471
|
|
|
380
|
-
|
|
472
|
+
XXH_free(state_in);
|
|
381
473
|
|
|
382
474
|
return h32;
|
|
383
475
|
}
|
data/ext/xxhash/libxxhash.h
CHANGED
|
@@ -27,8 +27,8 @@
|
|
|
27
27
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
28
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
You can contact the author at :
|
|
31
|
+
- xxHash source repository : http://code.google.com/p/xxhash/
|
|
32
32
|
*/
|
|
33
33
|
|
|
34
34
|
/* Notice extracted from xxHash homepage :
|
|
@@ -67,7 +67,7 @@ extern "C" {
|
|
|
67
67
|
//****************************
|
|
68
68
|
// Type
|
|
69
69
|
//****************************
|
|
70
|
-
typedef enum {
|
|
70
|
+
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
|
|
@@ -79,13 +79,13 @@ unsigned int XXH32 (const void* input, int len, unsigned int seed);
|
|
|
79
79
|
|
|
80
80
|
/*
|
|
81
81
|
XXH32() :
|
|
82
|
-
|
|
82
|
+
Calculate the 32-bits hash of sequence of length "len" stored at memory address "input".
|
|
83
83
|
The memory between input & input+len must be valid (allocated and read-accessible).
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
84
|
+
"seed" can be used to alter the result predictably.
|
|
85
|
+
This function successfully passes all SMHasher tests.
|
|
86
|
+
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
|
|
87
|
+
Note that "len" is type "int", which means it is limited to 2^31-1.
|
|
88
|
+
If your data is larger, use the advanced functions below.
|
|
89
89
|
*/
|
|
90
90
|
|
|
91
91
|
|
|
@@ -122,14 +122,19 @@ Memory will be freed by XXH32_digest().
|
|
|
122
122
|
|
|
123
123
|
|
|
124
124
|
int XXH32_sizeofState();
|
|
125
|
-
XXH_errorcode XXH32_resetState(void*
|
|
125
|
+
XXH_errorcode XXH32_resetState(void* state, unsigned int seed);
|
|
126
|
+
|
|
127
|
+
#define XXH32_SIZEOFSTATE 48
|
|
128
|
+
typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t;
|
|
126
129
|
/*
|
|
127
|
-
These functions
|
|
128
|
-
|
|
130
|
+
These functions allow user application to make its own allocation for state.
|
|
131
|
+
|
|
132
|
+
XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state.
|
|
133
|
+
Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer.
|
|
134
|
+
This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state.
|
|
129
135
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
This pointer must be provided as 'state_in' into XXH32_resetState(), which initializes the state.
|
|
136
|
+
For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()),
|
|
137
|
+
use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields.
|
|
133
138
|
*/
|
|
134
139
|
|
|
135
140
|
|
|
@@ -138,7 +143,7 @@ unsigned int XXH32_intermediateDigest (void* state);
|
|
|
138
143
|
This function does the same as XXH32_digest(), generating a 32-bit hash,
|
|
139
144
|
but preserve memory context.
|
|
140
145
|
This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update().
|
|
141
|
-
To free memory context, use XXH32_digest().
|
|
146
|
+
To free memory context, use XXH32_digest(), or free().
|
|
142
147
|
*/
|
|
143
148
|
|
|
144
149
|
|
data/ext/xxhash/xxhash.cc
CHANGED
|
@@ -10,10 +10,47 @@ extern "C" VALUE xxhash_xxh32(VALUE mod, VALUE input, VALUE seed)
|
|
|
10
10
|
return ULL2NUM(XXH32(StringValuePtr(input), RSTRING_LEN(input), NUM2ULL(seed)));
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
+
extern "C" void xxhash_streaming_hash_free(void* state)
|
|
14
|
+
{
|
|
15
|
+
// Digest frees the memory.
|
|
16
|
+
(void) XXH32_digest(state);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
extern "C" VALUE xxhash_streaming_hash_new(VALUE klass, VALUE seed)
|
|
20
|
+
{
|
|
21
|
+
void* state = XXH32_init(NUM2ULL(seed));
|
|
22
|
+
return Data_Wrap_Struct(klass, 0, xxhash_streaming_hash_free, state);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
extern "C" VALUE xxhash_streaming_hash_update(VALUE self, VALUE data)
|
|
26
|
+
{
|
|
27
|
+
void* state;
|
|
28
|
+
Data_Get_Struct(self, void, state);
|
|
29
|
+
|
|
30
|
+
XXH32_update(state, StringValuePtr(data), RSTRING_LEN(data));
|
|
31
|
+
return Qnil;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
extern "C" VALUE xxhash_streaming_hash_digest(VALUE self)
|
|
35
|
+
{
|
|
36
|
+
void* state;
|
|
37
|
+
Data_Get_Struct(self, void, state);
|
|
38
|
+
|
|
39
|
+
// Do not free memory now.
|
|
40
|
+
return ULL2NUM(XXH32_intermediateDigest(state));
|
|
41
|
+
}
|
|
42
|
+
|
|
13
43
|
extern "C" void Init_xxhash()
|
|
14
44
|
{
|
|
15
45
|
VALUE mXXhash = rb_define_module("XXhash");
|
|
16
46
|
VALUE mInternal = rb_define_module_under(mXXhash, "Internal");
|
|
17
47
|
|
|
18
48
|
rb_define_singleton_method(mInternal, "xxh32", (ruby_method*) &xxhash_xxh32, 2);
|
|
49
|
+
|
|
50
|
+
VALUE cStreamingHash = rb_define_class_under(mInternal, "StreamingHash", rb_cObject);
|
|
51
|
+
|
|
52
|
+
rb_define_singleton_method(cStreamingHash, "new", (ruby_method*) &xxhash_streaming_hash_new, 1);
|
|
53
|
+
rb_define_method(cStreamingHash, "update", (ruby_method*) &xxhash_streaming_hash_update, 1);
|
|
54
|
+
rb_define_method(cStreamingHash, "digest", (ruby_method*) &xxhash_streaming_hash_digest, 0);
|
|
55
|
+
rb_define_method(cStreamingHash, "intermediate_digest", (ruby_method*) &xxhash_streaming_hash_digest, 0);
|
|
19
56
|
}
|
data/lib/xxhash.rb
CHANGED
|
@@ -5,4 +5,16 @@ module XXhash
|
|
|
5
5
|
def self.xxh32(input, seed)
|
|
6
6
|
Internal.xxh32(input, seed)
|
|
7
7
|
end
|
|
8
|
+
|
|
9
|
+
def self.xxh32_stream(io, seed, chunk_size = 32)
|
|
10
|
+
raise ArgumentError, 'first argument should be IO' if !io.is_a?(IO) && !io.is_a?(StringIO)
|
|
11
|
+
|
|
12
|
+
hash = Internal::StreamingHash.new(seed)
|
|
13
|
+
|
|
14
|
+
while chunk = io.read(chunk_size)
|
|
15
|
+
hash.update(chunk)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
hash.digest
|
|
19
|
+
end
|
|
8
20
|
end
|
data/lib/xxhash/version.rb
CHANGED
data/test/xxhash_test.rb
CHANGED
|
@@ -1,7 +1,26 @@
|
|
|
1
1
|
require 'test_helper'
|
|
2
|
+
require 'stringio'
|
|
2
3
|
|
|
3
4
|
describe XXhash do
|
|
4
5
|
it 'returns hash' do
|
|
5
6
|
assert_equal 2758658570, XXhash.xxh32('test', 123)
|
|
6
7
|
end
|
|
8
|
+
|
|
9
|
+
describe 'StreamingHash' do
|
|
10
|
+
it 'rises ArgumentError if forst argument is not IO object' do
|
|
11
|
+
assert_raises(ArgumentError) do
|
|
12
|
+
XXhash.xxh32_stream('test', 123)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it 'returns the hash for streamed strings' do
|
|
17
|
+
assert_equal 2758658570, XXhash.xxh32_stream(StringIO.new('test'), 123)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it 'returns the hash for streamed files' do
|
|
21
|
+
h1 = XXhash.xxh32(File.read(__FILE__), 123)
|
|
22
|
+
h2 = XXhash.xxh32_stream(File.open(__FILE__), 123)
|
|
23
|
+
assert_equal h1, h2
|
|
24
|
+
end
|
|
25
|
+
end
|
|
7
26
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xxhash
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2013-
|
|
12
|
+
date: 2013-09-04 00:00:00.000000000 Z
|
|
13
13
|
dependencies: []
|
|
14
14
|
description: Ruby wrapper for xxHash lib
|
|
15
15
|
email:
|
|
@@ -21,7 +21,7 @@ extra_rdoc_files: []
|
|
|
21
21
|
files:
|
|
22
22
|
- .gitignore
|
|
23
23
|
- .travis.yml
|
|
24
|
-
- CHANGELOG
|
|
24
|
+
- CHANGELOG.md
|
|
25
25
|
- Gemfile
|
|
26
26
|
- LICENSE.txt
|
|
27
27
|
- README.md
|
|
@@ -49,7 +49,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
49
49
|
version: '0'
|
|
50
50
|
segments:
|
|
51
51
|
- 0
|
|
52
|
-
hash: -
|
|
52
|
+
hash: -3728750430932162062
|
|
53
53
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
54
54
|
none: false
|
|
55
55
|
requirements:
|
|
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
58
58
|
version: '0'
|
|
59
59
|
segments:
|
|
60
60
|
- 0
|
|
61
|
-
hash: -
|
|
61
|
+
hash: -3728750430932162062
|
|
62
62
|
requirements: []
|
|
63
63
|
rubyforge_project:
|
|
64
64
|
rubygems_version: 1.8.24
|