xxhash 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/CHANGELOG.md +9 -0
- data/README.md +10 -2
- data/ext/xxhash/libxxhash.c +181 -89
- data/ext/xxhash/libxxhash.h +21 -16
- data/ext/xxhash/xxhash.cc +37 -0
- data/lib/xxhash.rb +12 -0
- data/lib/xxhash/version.rb +1 -1
- data/test/xxhash_test.rb +19 -0
- metadata +5 -5
- data/CHANGELOG +0 -5
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
### 0.2.0 (September 4, 2013)
|
2
|
+
* xxHash updated to [r32](https://code.google.com/p/xxhash/source/detail?r=32)
|
3
|
+
* add `XXhash.xxh32_stream` method (by [@maltoe](https://github.com/maltoe))
|
4
|
+
|
5
|
+
### 0.1.1 (June 4, 2013)
|
6
|
+
* remove .bundle from gem
|
7
|
+
|
8
|
+
### 0.1.0 (May 16, 2013)
|
9
|
+
* xxHash updated to [r29](https://code.google.com/p/xxhash/source/detail?r=29)
|
data/README.md
CHANGED
@@ -17,6 +17,14 @@ seed = 12345
|
|
17
17
|
XXhash.xxh32(text, seed) # => 3834992036
|
18
18
|
```
|
19
19
|
|
20
|
+
You can use it with `IO` objects too:
|
21
|
+
|
22
|
+
```ruby
|
23
|
+
XXhash.xxh32_stream(StringIO.new('test'), 123) # => 2758658570
|
24
|
+
```
|
25
|
+
|
26
|
+
Note that you can also pass a chunk size as third param (it's 32 bytes by default)
|
27
|
+
|
20
28
|
### Supported Ruby versions
|
21
29
|
|
22
30
|
- MRI 1.9.3
|
@@ -26,7 +34,7 @@ Note: It doesn't work on JRuby as it uses C extension.
|
|
26
34
|
|
27
35
|
### Versioning
|
28
36
|
|
29
|
-
Version 0.
|
37
|
+
Version 0.2.0 is equal to [r32](https://code.google.com/p/xxhash/source/detail?r32)
|
30
38
|
|
31
39
|
## Contributing
|
32
40
|
|
@@ -38,5 +46,5 @@ Version 0.1.0 is equal to [r29](https://code.google.com/p/xxhash/source/detail?r
|
|
38
46
|
|
39
47
|
### Copyright
|
40
48
|
|
41
|
-
Copyright (c)
|
49
|
+
Copyright (c) 2013 Vasiliy Ermolovich. See LICENSE.txt for
|
42
50
|
further details.
|
data/ext/xxhash/libxxhash.c
CHANGED
@@ -31,13 +31,20 @@ You can contact the author at :
|
|
31
31
|
*/
|
32
32
|
|
33
33
|
|
34
|
-
|
35
34
|
//**************************************
|
36
35
|
// Tuning parameters
|
37
36
|
//**************************************
|
37
|
+
// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
|
38
|
+
// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
|
39
|
+
// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
|
40
|
+
// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
|
41
|
+
#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
|
42
|
+
# define XXH_USE_UNALIGNED_ACCESS 1
|
43
|
+
#endif
|
44
|
+
|
38
45
|
// XXH_ACCEPT_NULL_INPUT_POINTER :
|
39
|
-
// If the input pointer is a null pointer, xxHash default behavior is to
|
40
|
-
//
|
46
|
+
// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
|
47
|
+
// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
|
41
48
|
// This option has a very small performance cost (only measurable on small inputs).
|
42
49
|
// By default, this option is disabled. To enable it, uncomment below define :
|
43
50
|
//#define XXH_ACCEPT_NULL_INPUT_POINTER 1
|
@@ -45,49 +52,44 @@ You can contact the author at :
|
|
45
52
|
// XXH_FORCE_NATIVE_FORMAT :
|
46
53
|
// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
|
47
54
|
// Results are therefore identical for little-endian and big-endian CPU.
|
48
|
-
// This comes at a
|
49
|
-
// Should endian-independance be of no importance
|
55
|
+
// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
|
56
|
+
// Should endian-independance be of no importance for your application, you may set the #define below to 1.
|
50
57
|
// It will improve speed for Big-endian CPU.
|
51
58
|
// This option has no impact on Little_Endian CPU.
|
52
|
-
|
53
|
-
|
59
|
+
#define XXH_FORCE_NATIVE_FORMAT 0
|
54
60
|
|
55
61
|
|
56
62
|
//**************************************
|
57
|
-
//
|
63
|
+
// Compiler Specific Options
|
58
64
|
//**************************************
|
59
|
-
|
60
|
-
#
|
61
|
-
#
|
65
|
+
// Disable some Visual warning messages
|
66
|
+
#ifdef _MSC_VER // Visual Studio
|
67
|
+
# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
|
68
|
+
#endif
|
62
69
|
|
70
|
+
#ifdef _MSC_VER // Visual Studio
|
71
|
+
# define forceinline static __forceinline
|
72
|
+
#else
|
73
|
+
# ifdef __GNUC__
|
74
|
+
# define forceinline static inline __attribute__((always_inline))
|
75
|
+
# else
|
76
|
+
# define forceinline static inline
|
77
|
+
# endif
|
78
|
+
#endif
|
63
79
|
|
64
80
|
|
65
81
|
//**************************************
|
66
|
-
//
|
82
|
+
// Includes & Memory related functions
|
67
83
|
//**************************************
|
68
|
-
|
69
|
-
//
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
#
|
76
|
-
|
77
|
-
# endif
|
78
|
-
#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
|
79
|
-
# define XXH_BIG_ENDIAN 1
|
80
|
-
#elif defined(__sparc) || defined(__sparc__) \
|
81
|
-
|| defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \
|
82
|
-
|| defined(__hpux) || defined(__hppa) \
|
83
|
-
|| defined(_MIPSEB) || defined(__s390__)
|
84
|
-
# define XXH_BIG_ENDIAN 1
|
85
|
-
#endif
|
86
|
-
|
87
|
-
#if !defined(XXH_BIG_ENDIAN)
|
88
|
-
// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
|
89
|
-
# define XXH_BIG_ENDIAN 0
|
90
|
-
#endif
|
84
|
+
#include "libxxhash.h"
|
85
|
+
// Modify the local functions below should you wish to use some other memory related routines
|
86
|
+
// for malloc(), free()
|
87
|
+
#include <stdlib.h>
|
88
|
+
forceinline void* XXH_malloc(size_t s) { return malloc(s); }
|
89
|
+
forceinline void XXH_free (void* p) { free(p); }
|
90
|
+
// for memcpy()
|
91
|
+
#include <string.h>
|
92
|
+
forceinline void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
|
91
93
|
|
92
94
|
|
93
95
|
//**************************************
|
@@ -101,21 +103,43 @@ You can contact the author at :
|
|
101
103
|
typedef int32_t S32;
|
102
104
|
typedef uint64_t U64;
|
103
105
|
#else
|
104
|
-
typedef unsigned char
|
105
|
-
typedef unsigned short
|
106
|
-
typedef unsigned int
|
107
|
-
typedef signed int
|
108
|
-
typedef unsigned long long
|
106
|
+
typedef unsigned char BYTE;
|
107
|
+
typedef unsigned short U16;
|
108
|
+
typedef unsigned int U32;
|
109
|
+
typedef signed int S32;
|
110
|
+
typedef unsigned long long U64;
|
109
111
|
#endif
|
110
112
|
|
113
|
+
#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS)
|
114
|
+
# define _PACKED __attribute__ ((packed))
|
115
|
+
#else
|
116
|
+
# define _PACKED
|
117
|
+
#endif
|
111
118
|
|
112
|
-
|
113
|
-
|
114
|
-
|
119
|
+
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
|
120
|
+
# ifdef __IBMC__
|
121
|
+
# pragma pack(1)
|
122
|
+
# else
|
123
|
+
# pragma pack(push, 1)
|
124
|
+
# endif
|
125
|
+
#endif
|
126
|
+
|
127
|
+
typedef struct _U32_S { U32 v; } _PACKED U32_S;
|
128
|
+
|
129
|
+
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
|
130
|
+
# pragma pack(pop)
|
131
|
+
#endif
|
132
|
+
|
133
|
+
#define A32(x) (((U32_S *)(x))->v)
|
134
|
+
|
135
|
+
|
136
|
+
//***************************************
|
137
|
+
// Compiler-specific Functions and Macros
|
138
|
+
//***************************************
|
115
139
|
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
116
140
|
|
117
|
-
// Note :
|
118
|
-
#if defined(
|
141
|
+
// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
|
142
|
+
#if defined(_MSC_VER)
|
119
143
|
# define XXH_rotl32(x,r) _rotl(x,r)
|
120
144
|
#else
|
121
145
|
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
@@ -145,31 +169,48 @@ static inline U32 XXH_swap32 (U32 x) {
|
|
145
169
|
|
146
170
|
|
147
171
|
//**************************************
|
148
|
-
// Macros
|
172
|
+
// Architecture Macros
|
149
173
|
//**************************************
|
150
|
-
|
174
|
+
typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
|
175
|
+
#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
|
176
|
+
static const int one = 1;
|
177
|
+
# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one))
|
178
|
+
#endif
|
151
179
|
|
152
180
|
|
181
|
+
//**************************************
|
182
|
+
// Macros
|
183
|
+
//**************************************
|
184
|
+
#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations
|
185
|
+
|
153
186
|
|
154
187
|
//****************************
|
155
|
-
//
|
188
|
+
// Memory reads
|
156
189
|
//****************************
|
190
|
+
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
|
157
191
|
|
158
|
-
U32
|
192
|
+
forceinline U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align)
|
159
193
|
{
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
194
|
+
if (align==XXH_unaligned)
|
195
|
+
return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
|
196
|
+
else
|
197
|
+
return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr);
|
198
|
+
}
|
199
|
+
|
200
|
+
forceinline U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); }
|
201
|
+
|
166
202
|
|
203
|
+
//****************************
|
204
|
+
// Simple Hash Functions
|
205
|
+
//****************************
|
206
|
+
forceinline U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align)
|
207
|
+
{
|
167
208
|
const BYTE* p = (const BYTE*)input;
|
168
209
|
const BYTE* const bEnd = p + len;
|
169
210
|
U32 h32;
|
170
211
|
|
171
212
|
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
|
172
|
-
if (p==NULL) { len=0; p=(const BYTE*)16; }
|
213
|
+
if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; }
|
173
214
|
#endif
|
174
215
|
|
175
216
|
if (len>=16)
|
@@ -182,10 +223,10 @@ U32 XXH32(const void* input, int len, U32 seed)
|
|
182
223
|
|
183
224
|
do
|
184
225
|
{
|
185
|
-
v1 +=
|
186
|
-
v2 +=
|
187
|
-
v3 +=
|
188
|
-
v4 +=
|
226
|
+
v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
|
227
|
+
v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
|
228
|
+
v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
|
229
|
+
v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
|
189
230
|
} while (p<=limit);
|
190
231
|
|
191
232
|
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
|
@@ -199,8 +240,8 @@ U32 XXH32(const void* input, int len, U32 seed)
|
|
199
240
|
|
200
241
|
while (p<=bEnd-4)
|
201
242
|
{
|
202
|
-
h32 +=
|
203
|
-
h32
|
243
|
+
h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3;
|
244
|
+
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
|
204
245
|
p+=4;
|
205
246
|
}
|
206
247
|
|
@@ -218,7 +259,33 @@ U32 XXH32(const void* input, int len, U32 seed)
|
|
218
259
|
h32 ^= h32 >> 16;
|
219
260
|
|
220
261
|
return h32;
|
262
|
+
}
|
263
|
+
|
264
|
+
|
265
|
+
U32 XXH32(const void* input, int len, U32 seed)
|
266
|
+
{
|
267
|
+
#if 0
|
268
|
+
// Simple version, good for code maintenance, but unfortunately slow for small inputs
|
269
|
+
void* state = XXH32_init(seed);
|
270
|
+
XXH32_update(state, input, len);
|
271
|
+
return XXH32_digest(state);
|
272
|
+
#else
|
273
|
+
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
221
274
|
|
275
|
+
# if !defined(XXH_USE_UNALIGNED_ACCESS)
|
276
|
+
if ((((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage
|
277
|
+
{
|
278
|
+
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
279
|
+
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
|
280
|
+
else
|
281
|
+
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
|
282
|
+
}
|
283
|
+
# endif
|
284
|
+
|
285
|
+
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
286
|
+
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
|
287
|
+
else
|
288
|
+
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
|
222
289
|
#endif
|
223
290
|
}
|
224
291
|
|
@@ -229,21 +296,25 @@ U32 XXH32(const void* input, int len, U32 seed)
|
|
229
296
|
|
230
297
|
struct XXH_state32_t
|
231
298
|
{
|
299
|
+
U64 total_len;
|
232
300
|
U32 seed;
|
233
301
|
U32 v1;
|
234
302
|
U32 v2;
|
235
303
|
U32 v3;
|
236
304
|
U32 v4;
|
237
|
-
U64 total_len;
|
238
|
-
char memory[16];
|
239
305
|
int memsize;
|
306
|
+
char memory[16];
|
240
307
|
};
|
241
308
|
|
242
309
|
|
243
|
-
int XXH32_sizeofState()
|
310
|
+
int XXH32_sizeofState()
|
311
|
+
{
|
312
|
+
XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough
|
313
|
+
return sizeof(struct XXH_state32_t);
|
314
|
+
}
|
244
315
|
|
245
316
|
|
246
|
-
XXH_errorcode XXH32_resetState(void* state_in,
|
317
|
+
XXH_errorcode XXH32_resetState(void* state_in, U32 seed)
|
247
318
|
{
|
248
319
|
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
|
249
320
|
state->seed = seed;
|
@@ -253,19 +324,19 @@ XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed)
|
|
253
324
|
state->v4 = seed - PRIME32_1;
|
254
325
|
state->total_len = 0;
|
255
326
|
state->memsize = 0;
|
256
|
-
return
|
327
|
+
return XXH_OK;
|
257
328
|
}
|
258
329
|
|
259
330
|
|
260
331
|
void* XXH32_init (U32 seed)
|
261
332
|
{
|
262
|
-
|
333
|
+
void* state = XXH_malloc (sizeof(struct XXH_state32_t));
|
263
334
|
XXH32_resetState(state, seed);
|
264
|
-
return
|
335
|
+
return state;
|
265
336
|
}
|
266
337
|
|
267
338
|
|
268
|
-
XXH_errorcode
|
339
|
+
forceinline XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian)
|
269
340
|
{
|
270
341
|
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
|
271
342
|
const BYTE* p = (const BYTE*)input;
|
@@ -279,20 +350,20 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
|
|
279
350
|
|
280
351
|
if (state->memsize + len < 16) // fill in tmp buffer
|
281
352
|
{
|
282
|
-
|
353
|
+
XXH_memcpy(state->memory + state->memsize, input, len);
|
283
354
|
state->memsize += len;
|
284
|
-
return
|
355
|
+
return XXH_OK;
|
285
356
|
}
|
286
357
|
|
287
358
|
if (state->memsize) // some data left from previous update
|
288
359
|
{
|
289
|
-
|
360
|
+
XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize);
|
290
361
|
{
|
291
362
|
const U32* p32 = (const U32*)state->memory;
|
292
|
-
state->v1 +=
|
293
|
-
state->v2 +=
|
294
|
-
state->v3 +=
|
295
|
-
state->v4 +=
|
363
|
+
state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
|
364
|
+
state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++;
|
365
|
+
state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
|
366
|
+
state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
|
296
367
|
}
|
297
368
|
p += 16-state->memsize;
|
298
369
|
state->memsize = 0;
|
@@ -308,10 +379,10 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
|
|
308
379
|
|
309
380
|
do
|
310
381
|
{
|
311
|
-
v1 +=
|
312
|
-
v2 +=
|
313
|
-
v3 +=
|
314
|
-
v4 +=
|
382
|
+
v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
|
383
|
+
v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
|
384
|
+
v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
|
385
|
+
v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
|
315
386
|
} while (p<=limit);
|
316
387
|
|
317
388
|
state->v1 = v1;
|
@@ -322,22 +393,32 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
|
|
322
393
|
|
323
394
|
if (p < bEnd)
|
324
395
|
{
|
325
|
-
|
396
|
+
XXH_memcpy(state->memory, p, bEnd-p);
|
326
397
|
state->memsize = (int)(bEnd-p);
|
327
398
|
}
|
328
399
|
|
329
|
-
return
|
400
|
+
return XXH_OK;
|
330
401
|
}
|
331
402
|
|
403
|
+
XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
|
404
|
+
{
|
405
|
+
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
332
406
|
|
333
|
-
|
407
|
+
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
408
|
+
return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
|
409
|
+
else
|
410
|
+
return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
|
411
|
+
}
|
412
|
+
|
413
|
+
|
414
|
+
|
415
|
+
forceinline U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian)
|
334
416
|
{
|
335
417
|
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
|
336
|
-
BYTE * p
|
418
|
+
const BYTE * p = (const BYTE*)state->memory;
|
337
419
|
BYTE* bEnd = (BYTE*)state->memory + state->memsize;
|
338
420
|
U32 h32;
|
339
421
|
|
340
|
-
|
341
422
|
if (state->total_len >= 16)
|
342
423
|
{
|
343
424
|
h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
|
@@ -351,8 +432,8 @@ U32 XXH32_intermediateDigest (void* state_in)
|
|
351
432
|
|
352
433
|
while (p<=bEnd-4)
|
353
434
|
{
|
354
|
-
h32 +=
|
355
|
-
h32
|
435
|
+
h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3;
|
436
|
+
h32 = XXH_rotl32(h32, 17) * PRIME32_4;
|
356
437
|
p+=4;
|
357
438
|
}
|
358
439
|
|
@@ -373,11 +454,22 @@ U32 XXH32_intermediateDigest (void* state_in)
|
|
373
454
|
}
|
374
455
|
|
375
456
|
|
457
|
+
U32 XXH32_intermediateDigest (void* state_in)
|
458
|
+
{
|
459
|
+
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
460
|
+
|
461
|
+
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
462
|
+
return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian);
|
463
|
+
else
|
464
|
+
return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian);
|
465
|
+
}
|
466
|
+
|
467
|
+
|
376
468
|
U32 XXH32_digest (void* state_in)
|
377
469
|
{
|
378
470
|
U32 h32 = XXH32_intermediateDigest(state_in);
|
379
471
|
|
380
|
-
|
472
|
+
XXH_free(state_in);
|
381
473
|
|
382
474
|
return h32;
|
383
475
|
}
|
data/ext/xxhash/libxxhash.h
CHANGED
@@ -27,8 +27,8 @@
|
|
27
27
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
28
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
29
|
|
30
|
-
|
31
|
-
|
30
|
+
You can contact the author at :
|
31
|
+
- xxHash source repository : http://code.google.com/p/xxhash/
|
32
32
|
*/
|
33
33
|
|
34
34
|
/* Notice extracted from xxHash homepage :
|
@@ -67,7 +67,7 @@ extern "C" {
|
|
67
67
|
//****************************
|
68
68
|
// Type
|
69
69
|
//****************************
|
70
|
-
typedef enum {
|
70
|
+
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
|
71
71
|
|
72
72
|
|
73
73
|
|
@@ -79,13 +79,13 @@ unsigned int XXH32 (const void* input, int len, unsigned int seed);
|
|
79
79
|
|
80
80
|
/*
|
81
81
|
XXH32() :
|
82
|
-
|
82
|
+
Calculate the 32-bits hash of sequence of length "len" stored at memory address "input".
|
83
83
|
The memory between input & input+len must be valid (allocated and read-accessible).
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
84
|
+
"seed" can be used to alter the result predictably.
|
85
|
+
This function successfully passes all SMHasher tests.
|
86
|
+
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
|
87
|
+
Note that "len" is type "int", which means it is limited to 2^31-1.
|
88
|
+
If your data is larger, use the advanced functions below.
|
89
89
|
*/
|
90
90
|
|
91
91
|
|
@@ -122,14 +122,19 @@ Memory will be freed by XXH32_digest().
|
|
122
122
|
|
123
123
|
|
124
124
|
int XXH32_sizeofState();
|
125
|
-
XXH_errorcode XXH32_resetState(void*
|
125
|
+
XXH_errorcode XXH32_resetState(void* state, unsigned int seed);
|
126
|
+
|
127
|
+
#define XXH32_SIZEOFSTATE 48
|
128
|
+
typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t;
|
126
129
|
/*
|
127
|
-
These functions
|
128
|
-
|
130
|
+
These functions allow user application to make its own allocation for state.
|
131
|
+
|
132
|
+
XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state.
|
133
|
+
Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer.
|
134
|
+
This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state.
|
129
135
|
|
130
|
-
|
131
|
-
|
132
|
-
This pointer must be provided as 'state_in' into XXH32_resetState(), which initializes the state.
|
136
|
+
For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()),
|
137
|
+
use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields.
|
133
138
|
*/
|
134
139
|
|
135
140
|
|
@@ -138,7 +143,7 @@ unsigned int XXH32_intermediateDigest (void* state);
|
|
138
143
|
This function does the same as XXH32_digest(), generating a 32-bit hash,
|
139
144
|
but preserve memory context.
|
140
145
|
This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update().
|
141
|
-
To free memory context, use XXH32_digest().
|
146
|
+
To free memory context, use XXH32_digest(), or free().
|
142
147
|
*/
|
143
148
|
|
144
149
|
|
data/ext/xxhash/xxhash.cc
CHANGED
@@ -10,10 +10,47 @@ extern "C" VALUE xxhash_xxh32(VALUE mod, VALUE input, VALUE seed)
|
|
10
10
|
return ULL2NUM(XXH32(StringValuePtr(input), RSTRING_LEN(input), NUM2ULL(seed)));
|
11
11
|
}
|
12
12
|
|
13
|
+
extern "C" void xxhash_streaming_hash_free(void* state)
|
14
|
+
{
|
15
|
+
// Digest frees the memory.
|
16
|
+
(void) XXH32_digest(state);
|
17
|
+
}
|
18
|
+
|
19
|
+
extern "C" VALUE xxhash_streaming_hash_new(VALUE klass, VALUE seed)
|
20
|
+
{
|
21
|
+
void* state = XXH32_init(NUM2ULL(seed));
|
22
|
+
return Data_Wrap_Struct(klass, 0, xxhash_streaming_hash_free, state);
|
23
|
+
}
|
24
|
+
|
25
|
+
extern "C" VALUE xxhash_streaming_hash_update(VALUE self, VALUE data)
|
26
|
+
{
|
27
|
+
void* state;
|
28
|
+
Data_Get_Struct(self, void, state);
|
29
|
+
|
30
|
+
XXH32_update(state, StringValuePtr(data), RSTRING_LEN(data));
|
31
|
+
return Qnil;
|
32
|
+
}
|
33
|
+
|
34
|
+
extern "C" VALUE xxhash_streaming_hash_digest(VALUE self)
|
35
|
+
{
|
36
|
+
void* state;
|
37
|
+
Data_Get_Struct(self, void, state);
|
38
|
+
|
39
|
+
// Do not free memory now.
|
40
|
+
return ULL2NUM(XXH32_intermediateDigest(state));
|
41
|
+
}
|
42
|
+
|
13
43
|
extern "C" void Init_xxhash()
|
14
44
|
{
|
15
45
|
VALUE mXXhash = rb_define_module("XXhash");
|
16
46
|
VALUE mInternal = rb_define_module_under(mXXhash, "Internal");
|
17
47
|
|
18
48
|
rb_define_singleton_method(mInternal, "xxh32", (ruby_method*) &xxhash_xxh32, 2);
|
49
|
+
|
50
|
+
VALUE cStreamingHash = rb_define_class_under(mInternal, "StreamingHash", rb_cObject);
|
51
|
+
|
52
|
+
rb_define_singleton_method(cStreamingHash, "new", (ruby_method*) &xxhash_streaming_hash_new, 1);
|
53
|
+
rb_define_method(cStreamingHash, "update", (ruby_method*) &xxhash_streaming_hash_update, 1);
|
54
|
+
rb_define_method(cStreamingHash, "digest", (ruby_method*) &xxhash_streaming_hash_digest, 0);
|
55
|
+
rb_define_method(cStreamingHash, "intermediate_digest", (ruby_method*) &xxhash_streaming_hash_digest, 0);
|
19
56
|
}
|
data/lib/xxhash.rb
CHANGED
@@ -5,4 +5,16 @@ module XXhash
|
|
5
5
|
def self.xxh32(input, seed)
|
6
6
|
Internal.xxh32(input, seed)
|
7
7
|
end
|
8
|
+
|
9
|
+
def self.xxh32_stream(io, seed, chunk_size = 32)
|
10
|
+
raise ArgumentError, 'first argument should be IO' if !io.is_a?(IO) && !io.is_a?(StringIO)
|
11
|
+
|
12
|
+
hash = Internal::StreamingHash.new(seed)
|
13
|
+
|
14
|
+
while chunk = io.read(chunk_size)
|
15
|
+
hash.update(chunk)
|
16
|
+
end
|
17
|
+
|
18
|
+
hash.digest
|
19
|
+
end
|
8
20
|
end
|
data/lib/xxhash/version.rb
CHANGED
data/test/xxhash_test.rb
CHANGED
@@ -1,7 +1,26 @@
|
|
1
1
|
require 'test_helper'
|
2
|
+
require 'stringio'
|
2
3
|
|
3
4
|
describe XXhash do
|
4
5
|
it 'returns hash' do
|
5
6
|
assert_equal 2758658570, XXhash.xxh32('test', 123)
|
6
7
|
end
|
8
|
+
|
9
|
+
describe 'StreamingHash' do
|
10
|
+
it 'rises ArgumentError if forst argument is not IO object' do
|
11
|
+
assert_raises(ArgumentError) do
|
12
|
+
XXhash.xxh32_stream('test', 123)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'returns the hash for streamed strings' do
|
17
|
+
assert_equal 2758658570, XXhash.xxh32_stream(StringIO.new('test'), 123)
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'returns the hash for streamed files' do
|
21
|
+
h1 = XXhash.xxh32(File.read(__FILE__), 123)
|
22
|
+
h2 = XXhash.xxh32_stream(File.open(__FILE__), 123)
|
23
|
+
assert_equal h1, h2
|
24
|
+
end
|
25
|
+
end
|
7
26
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xxhash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-09-04 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Ruby wrapper for xxHash lib
|
15
15
|
email:
|
@@ -21,7 +21,7 @@ extra_rdoc_files: []
|
|
21
21
|
files:
|
22
22
|
- .gitignore
|
23
23
|
- .travis.yml
|
24
|
-
- CHANGELOG
|
24
|
+
- CHANGELOG.md
|
25
25
|
- Gemfile
|
26
26
|
- LICENSE.txt
|
27
27
|
- README.md
|
@@ -49,7 +49,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
49
49
|
version: '0'
|
50
50
|
segments:
|
51
51
|
- 0
|
52
|
-
hash: -
|
52
|
+
hash: -3728750430932162062
|
53
53
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
54
|
none: false
|
55
55
|
requirements:
|
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
58
|
version: '0'
|
59
59
|
segments:
|
60
60
|
- 0
|
61
|
-
hash: -
|
61
|
+
hash: -3728750430932162062
|
62
62
|
requirements: []
|
63
63
|
rubyforge_project:
|
64
64
|
rubygems_version: 1.8.24
|