xxhash 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ lib/xxhash/xxhash.so
data/CHANGELOG.md ADDED
@@ -0,0 +1,9 @@
1
+ ### 0.2.0 (September 4, 2013)
2
+ * xxHash updated to [r32](https://code.google.com/p/xxhash/source/detail?r=32)
3
+ * add `XXhash.xxh32_stream` method (by [@maltoe](https://github.com/maltoe))
4
+
5
+ ### 0.1.1 (June 4, 2013)
6
+ * remove .bundle from gem
7
+
8
+ ### 0.1.0 (May 16, 2013)
9
+ * xxHash updated to [r29](https://code.google.com/p/xxhash/source/detail?r=29)
data/README.md CHANGED
@@ -17,6 +17,14 @@ seed = 12345
17
17
  XXhash.xxh32(text, seed) # => 3834992036
18
18
  ```
19
19
 
20
+ You can use it with `IO` objects too:
21
+
22
+ ```ruby
23
+ XXhash.xxh32_stream(StringIO.new('test'), 123) # => 2758658570
24
+ ```
25
+
26
+ Note that you can also pass a chunk size as third param (it's 32 bytes by default)
27
+
20
28
  ### Supported Ruby versions
21
29
 
22
30
  - MRI 1.9.3
@@ -26,7 +34,7 @@ Note: It doesn't work on JRuby as it uses C extension.
26
34
 
27
35
  ### Versioning
28
36
 
29
- Version 0.1.0 is equal to [r29](https://code.google.com/p/xxhash/source/detail?r=29)
37
+ Version 0.2.0 is equal to [r32](https://code.google.com/p/xxhash/source/detail?r32)
30
38
 
31
39
  ## Contributing
32
40
 
@@ -38,5 +46,5 @@ Version 0.1.0 is equal to [r29](https://code.google.com/p/xxhash/source/detail?r
38
46
 
39
47
  ### Copyright
40
48
 
41
- Copyright (c) 2012 Vasiliy Ermolovich. See LICENSE.txt for
49
+ Copyright (c) 2013 Vasiliy Ermolovich. See LICENSE.txt for
42
50
  further details.
@@ -31,13 +31,20 @@ You can contact the author at :
31
31
  */
32
32
 
33
33
 
34
-
35
34
  //**************************************
36
35
  // Tuning parameters
37
36
  //**************************************
37
+ // Unaligned memory access is automatically enabled for "common" CPU, such as x86.
38
+ // For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
39
+ // If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
40
+ // You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
41
+ #if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
42
+ # define XXH_USE_UNALIGNED_ACCESS 1
43
+ #endif
44
+
38
45
  // XXH_ACCEPT_NULL_INPUT_POINTER :
39
- // If the input pointer is a null pointer, xxHash default behavior is to crash, since it is a bad input.
40
- // If this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
46
+ // If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
47
+ // When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
41
48
  // This option has a very small performance cost (only measurable on small inputs).
42
49
  // By default, this option is disabled. To enable it, uncomment below define :
43
50
  //#define XXH_ACCEPT_NULL_INPUT_POINTER 1
@@ -45,49 +52,44 @@ You can contact the author at :
45
52
  // XXH_FORCE_NATIVE_FORMAT :
46
53
  // By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
47
54
  // Results are therefore identical for little-endian and big-endian CPU.
48
- // This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
49
- // Should endian-independance be of no importance to your application, you may uncomment the #define below
55
+ // This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
56
+ // Should endian-independance be of no importance for your application, you may set the #define below to 1.
50
57
  // It will improve speed for Big-endian CPU.
51
58
  // This option has no impact on Little_Endian CPU.
52
- //#define XXH_FORCE_NATIVE_FORMAT 1
53
-
59
+ #define XXH_FORCE_NATIVE_FORMAT 0
54
60
 
55
61
 
56
62
  //**************************************
57
- // Includes
63
+ // Compiler Specific Options
58
64
  //**************************************
59
- #include <stdlib.h> // for malloc(), free()
60
- #include <string.h> // for memcpy()
61
- #include "libxxhash.h"
65
+ // Disable some Visual warning messages
66
+ #ifdef _MSC_VER // Visual Studio
67
+ # pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
68
+ #endif
62
69
 
70
+ #ifdef _MSC_VER // Visual Studio
71
+ # define forceinline static __forceinline
72
+ #else
73
+ # ifdef __GNUC__
74
+ # define forceinline static inline __attribute__((always_inline))
75
+ # else
76
+ # define forceinline static inline
77
+ # endif
78
+ #endif
63
79
 
64
80
 
65
81
  //**************************************
66
- // CPU Feature Detection
82
+ // Includes & Memory related functions
67
83
  //**************************************
68
- // Little Endian or Big Endian ?
69
- // You can overwrite the #define below if you know your architecture endianess
70
- #if defined(XXH_FORCE_NATIVE_FORMAT) && (XXH_FORCE_NATIVE_FORMAT==1)
71
- // Force native format. The result will be endian dependant.
72
- # define XXH_BIG_ENDIAN 0
73
- #elif defined (__GLIBC__)
74
- # include <endian.h>
75
- # if (__BYTE_ORDER == __BIG_ENDIAN)
76
- # define XXH_BIG_ENDIAN 1
77
- # endif
78
- #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
79
- # define XXH_BIG_ENDIAN 1
80
- #elif defined(__sparc) || defined(__sparc__) \
81
- || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \
82
- || defined(__hpux) || defined(__hppa) \
83
- || defined(_MIPSEB) || defined(__s390__)
84
- # define XXH_BIG_ENDIAN 1
85
- #endif
86
-
87
- #if !defined(XXH_BIG_ENDIAN)
88
- // Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
89
- # define XXH_BIG_ENDIAN 0
90
- #endif
84
+ #include "libxxhash.h"
85
+ // Modify the local functions below should you wish to use some other memory related routines
86
+ // for malloc(), free()
87
+ #include <stdlib.h>
88
+ forceinline void* XXH_malloc(size_t s) { return malloc(s); }
89
+ forceinline void XXH_free (void* p) { free(p); }
90
+ // for memcpy()
91
+ #include <string.h>
92
+ forceinline void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
91
93
 
92
94
 
93
95
  //**************************************
@@ -101,21 +103,43 @@ You can contact the author at :
101
103
  typedef int32_t S32;
102
104
  typedef uint64_t U64;
103
105
  #else
104
- typedef unsigned char BYTE;
105
- typedef unsigned short U16;
106
- typedef unsigned int U32;
107
- typedef signed int S32;
108
- typedef unsigned long long U64;
106
+ typedef unsigned char BYTE;
107
+ typedef unsigned short U16;
108
+ typedef unsigned int U32;
109
+ typedef signed int S32;
110
+ typedef unsigned long long U64;
109
111
  #endif
110
112
 
113
+ #if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS)
114
+ # define _PACKED __attribute__ ((packed))
115
+ #else
116
+ # define _PACKED
117
+ #endif
111
118
 
112
- //**************************************
113
- // Compiler-specific Options & Functions
114
- //**************************************
119
+ #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
120
+ # ifdef __IBMC__
121
+ # pragma pack(1)
122
+ # else
123
+ # pragma pack(push, 1)
124
+ # endif
125
+ #endif
126
+
127
+ typedef struct _U32_S { U32 v; } _PACKED U32_S;
128
+
129
+ #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
130
+ # pragma pack(pop)
131
+ #endif
132
+
133
+ #define A32(x) (((U32_S *)(x))->v)
134
+
135
+
136
+ //***************************************
137
+ // Compiler-specific Functions and Macros
138
+ //***************************************
115
139
  #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
116
140
 
117
- // Note : under GCC, it may sometimes be faster to enable the (2nd) macro definition, instead of using win32 intrinsic
118
- #if defined(_WIN32)
141
+ // Note : although _rotl exists for minGW (GCC under windows), performance seems poor
142
+ #if defined(_MSC_VER)
119
143
  # define XXH_rotl32(x,r) _rotl(x,r)
120
144
  #else
121
145
  # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
@@ -145,31 +169,48 @@ static inline U32 XXH_swap32 (U32 x) {
145
169
 
146
170
 
147
171
  //**************************************
148
- // Macros
172
+ // Architecture Macros
149
173
  //**************************************
150
- #define XXH_LE32(p) (XXH_BIG_ENDIAN ? XXH_swap32(*(U32*)(p)) : *(U32*)(p))
174
+ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
175
+ #ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
176
+ static const int one = 1;
177
+ # define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one))
178
+ #endif
151
179
 
152
180
 
181
+ //**************************************
182
+ // Macros
183
+ //**************************************
184
+ #define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations
185
+
153
186
 
154
187
  //****************************
155
- // Simple Hash Functions
188
+ // Memory reads
156
189
  //****************************
190
+ typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
157
191
 
158
- U32 XXH32(const void* input, int len, U32 seed)
192
+ forceinline U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align)
159
193
  {
160
- #if 0
161
- // Simple version, good for code maintenance, but unfortunately slow for small inputs
162
- void* state = XXH32_init(seed);
163
- XXH32_update(state, input, len);
164
- return XXH32_digest(state);
165
- #else
194
+ if (align==XXH_unaligned)
195
+ return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
196
+ else
197
+ return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr);
198
+ }
199
+
200
+ forceinline U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); }
201
+
166
202
 
203
+ //****************************
204
+ // Simple Hash Functions
205
+ //****************************
206
+ forceinline U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align)
207
+ {
167
208
  const BYTE* p = (const BYTE*)input;
168
209
  const BYTE* const bEnd = p + len;
169
210
  U32 h32;
170
211
 
171
212
  #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
172
- if (p==NULL) { len=0; p=(const BYTE*)16; }
213
+ if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; }
173
214
  #endif
174
215
 
175
216
  if (len>=16)
@@ -182,10 +223,10 @@ U32 XXH32(const void* input, int len, U32 seed)
182
223
 
183
224
  do
184
225
  {
185
- v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
186
- v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
187
- v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
188
- v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
226
+ v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
227
+ v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
228
+ v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
229
+ v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
189
230
  } while (p<=limit);
190
231
 
191
232
  h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
@@ -199,8 +240,8 @@ U32 XXH32(const void* input, int len, U32 seed)
199
240
 
200
241
  while (p<=bEnd-4)
201
242
  {
202
- h32 += XXH_LE32(p) * PRIME32_3;
203
- h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
243
+ h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3;
244
+ h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
204
245
  p+=4;
205
246
  }
206
247
 
@@ -218,7 +259,33 @@ U32 XXH32(const void* input, int len, U32 seed)
218
259
  h32 ^= h32 >> 16;
219
260
 
220
261
  return h32;
262
+ }
263
+
264
+
265
+ U32 XXH32(const void* input, int len, U32 seed)
266
+ {
267
+ #if 0
268
+ // Simple version, good for code maintenance, but unfortunately slow for small inputs
269
+ void* state = XXH32_init(seed);
270
+ XXH32_update(state, input, len);
271
+ return XXH32_digest(state);
272
+ #else
273
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
221
274
 
275
+ # if !defined(XXH_USE_UNALIGNED_ACCESS)
276
+ if ((((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage
277
+ {
278
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
279
+ return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
280
+ else
281
+ return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
282
+ }
283
+ # endif
284
+
285
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
286
+ return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
287
+ else
288
+ return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
222
289
  #endif
223
290
  }
224
291
 
@@ -229,21 +296,25 @@ U32 XXH32(const void* input, int len, U32 seed)
229
296
 
230
297
  struct XXH_state32_t
231
298
  {
299
+ U64 total_len;
232
300
  U32 seed;
233
301
  U32 v1;
234
302
  U32 v2;
235
303
  U32 v3;
236
304
  U32 v4;
237
- U64 total_len;
238
- char memory[16];
239
305
  int memsize;
306
+ char memory[16];
240
307
  };
241
308
 
242
309
 
243
- int XXH32_sizeofState() { return sizeof(struct XXH_state32_t); }
310
+ int XXH32_sizeofState()
311
+ {
312
+ XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough
313
+ return sizeof(struct XXH_state32_t);
314
+ }
244
315
 
245
316
 
246
- XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed)
317
+ XXH_errorcode XXH32_resetState(void* state_in, U32 seed)
247
318
  {
248
319
  struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
249
320
  state->seed = seed;
@@ -253,19 +324,19 @@ XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed)
253
324
  state->v4 = seed - PRIME32_1;
254
325
  state->total_len = 0;
255
326
  state->memsize = 0;
256
- return OK;
327
+ return XXH_OK;
257
328
  }
258
329
 
259
330
 
260
331
  void* XXH32_init (U32 seed)
261
332
  {
262
- struct XXH_state32_t * state = (struct XXH_state32_t *) malloc (sizeof(struct XXH_state32_t));
333
+ void* state = XXH_malloc (sizeof(struct XXH_state32_t));
263
334
  XXH32_resetState(state, seed);
264
- return (void*)state;
335
+ return state;
265
336
  }
266
337
 
267
338
 
268
- XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
339
+ forceinline XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian)
269
340
  {
270
341
  struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
271
342
  const BYTE* p = (const BYTE*)input;
@@ -279,20 +350,20 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
279
350
 
280
351
  if (state->memsize + len < 16) // fill in tmp buffer
281
352
  {
282
- memcpy(state->memory + state->memsize, input, len);
353
+ XXH_memcpy(state->memory + state->memsize, input, len);
283
354
  state->memsize += len;
284
- return OK;
355
+ return XXH_OK;
285
356
  }
286
357
 
287
358
  if (state->memsize) // some data left from previous update
288
359
  {
289
- memcpy(state->memory + state->memsize, input, 16-state->memsize);
360
+ XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize);
290
361
  {
291
362
  const U32* p32 = (const U32*)state->memory;
292
- state->v1 += XXH_LE32(p32) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
293
- state->v2 += XXH_LE32(p32) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++;
294
- state->v3 += XXH_LE32(p32) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
295
- state->v4 += XXH_LE32(p32) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
363
+ state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
364
+ state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++;
365
+ state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
366
+ state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
296
367
  }
297
368
  p += 16-state->memsize;
298
369
  state->memsize = 0;
@@ -308,10 +379,10 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
308
379
 
309
380
  do
310
381
  {
311
- v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
312
- v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
313
- v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
314
- v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
382
+ v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
383
+ v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
384
+ v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
385
+ v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
315
386
  } while (p<=limit);
316
387
 
317
388
  state->v1 = v1;
@@ -322,22 +393,32 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
322
393
 
323
394
  if (p < bEnd)
324
395
  {
325
- memcpy(state->memory, p, bEnd-p);
396
+ XXH_memcpy(state->memory, p, bEnd-p);
326
397
  state->memsize = (int)(bEnd-p);
327
398
  }
328
399
 
329
- return OK;
400
+ return XXH_OK;
330
401
  }
331
402
 
403
+ XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
404
+ {
405
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
332
406
 
333
- U32 XXH32_intermediateDigest (void* state_in)
407
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
408
+ return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
409
+ else
410
+ return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
411
+ }
412
+
413
+
414
+
415
+ forceinline U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian)
334
416
  {
335
417
  struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
336
- BYTE * p = (BYTE*)state->memory;
418
+ const BYTE * p = (const BYTE*)state->memory;
337
419
  BYTE* bEnd = (BYTE*)state->memory + state->memsize;
338
420
  U32 h32;
339
421
 
340
-
341
422
  if (state->total_len >= 16)
342
423
  {
343
424
  h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
@@ -351,8 +432,8 @@ U32 XXH32_intermediateDigest (void* state_in)
351
432
 
352
433
  while (p<=bEnd-4)
353
434
  {
354
- h32 += XXH_LE32(p) * PRIME32_3;
355
- h32 = XXH_rotl32(h32, 17) * PRIME32_4;
435
+ h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3;
436
+ h32 = XXH_rotl32(h32, 17) * PRIME32_4;
356
437
  p+=4;
357
438
  }
358
439
 
@@ -373,11 +454,22 @@ U32 XXH32_intermediateDigest (void* state_in)
373
454
  }
374
455
 
375
456
 
457
+ U32 XXH32_intermediateDigest (void* state_in)
458
+ {
459
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
460
+
461
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
462
+ return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian);
463
+ else
464
+ return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian);
465
+ }
466
+
467
+
376
468
  U32 XXH32_digest (void* state_in)
377
469
  {
378
470
  U32 h32 = XXH32_intermediateDigest(state_in);
379
471
 
380
- free(state_in);
472
+ XXH_free(state_in);
381
473
 
382
474
  return h32;
383
475
  }
@@ -27,8 +27,8 @@
27
27
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
28
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
29
 
30
- You can contact the author at :
31
- - xxHash source repository : http://code.google.com/p/xxhash/
30
+ You can contact the author at :
31
+ - xxHash source repository : http://code.google.com/p/xxhash/
32
32
  */
33
33
 
34
34
  /* Notice extracted from xxHash homepage :
@@ -67,7 +67,7 @@ extern "C" {
67
67
  //****************************
68
68
  // Type
69
69
  //****************************
70
- typedef enum { OK=0, XXH_ERROR } XXH_errorcode;
70
+ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
71
71
 
72
72
 
73
73
 
@@ -79,13 +79,13 @@ unsigned int XXH32 (const void* input, int len, unsigned int seed);
79
79
 
80
80
  /*
81
81
  XXH32() :
82
- Calculate the 32-bits hash of sequence of length "len" stored at memory address "input".
82
+ Calculate the 32-bits hash of sequence of length "len" stored at memory address "input".
83
83
  The memory between input & input+len must be valid (allocated and read-accessible).
84
- "seed" can be used to alter the result predictably.
85
- This function successfully passes all SMHasher tests.
86
- Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
87
- Note that "len" is type "int", which means it is limited to 2^31-1.
88
- If your data is larger, use the advanced functions below.
84
+ "seed" can be used to alter the result predictably.
85
+ This function successfully passes all SMHasher tests.
86
+ Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
87
+ Note that "len" is type "int", which means it is limited to 2^31-1.
88
+ If your data is larger, use the advanced functions below.
89
89
  */
90
90
 
91
91
 
@@ -122,14 +122,19 @@ Memory will be freed by XXH32_digest().
122
122
 
123
123
 
124
124
  int XXH32_sizeofState();
125
- XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed);
125
+ XXH_errorcode XXH32_resetState(void* state, unsigned int seed);
126
+
127
+ #define XXH32_SIZEOFSTATE 48
128
+ typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t;
126
129
  /*
127
- These functions are the basic elements of XXH32_init();
128
- The objective is to allow user application to make its own allocation.
130
+ These functions allow user application to make its own allocation for state.
131
+
132
+ XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state.
133
+ Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer.
134
+ This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state.
129
135
 
130
- XXH32_sizeofState() is used to know how much space must be allocated by the application.
131
- This space must be referenced by a void* pointer.
132
- This pointer must be provided as 'state_in' into XXH32_resetState(), which initializes the state.
136
+ For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()),
137
+ use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields.
133
138
  */
134
139
 
135
140
 
@@ -138,7 +143,7 @@ unsigned int XXH32_intermediateDigest (void* state);
138
143
  This function does the same as XXH32_digest(), generating a 32-bit hash,
139
144
  but preserve memory context.
140
145
  This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update().
141
- To free memory context, use XXH32_digest().
146
+ To free memory context, use XXH32_digest(), or free().
142
147
  */
143
148
 
144
149
 
data/ext/xxhash/xxhash.cc CHANGED
@@ -10,10 +10,47 @@ extern "C" VALUE xxhash_xxh32(VALUE mod, VALUE input, VALUE seed)
10
10
  return ULL2NUM(XXH32(StringValuePtr(input), RSTRING_LEN(input), NUM2ULL(seed)));
11
11
  }
12
12
 
13
+ extern "C" void xxhash_streaming_hash_free(void* state)
14
+ {
15
+ // Digest frees the memory.
16
+ (void) XXH32_digest(state);
17
+ }
18
+
19
+ extern "C" VALUE xxhash_streaming_hash_new(VALUE klass, VALUE seed)
20
+ {
21
+ void* state = XXH32_init(NUM2ULL(seed));
22
+ return Data_Wrap_Struct(klass, 0, xxhash_streaming_hash_free, state);
23
+ }
24
+
25
+ extern "C" VALUE xxhash_streaming_hash_update(VALUE self, VALUE data)
26
+ {
27
+ void* state;
28
+ Data_Get_Struct(self, void, state);
29
+
30
+ XXH32_update(state, StringValuePtr(data), RSTRING_LEN(data));
31
+ return Qnil;
32
+ }
33
+
34
+ extern "C" VALUE xxhash_streaming_hash_digest(VALUE self)
35
+ {
36
+ void* state;
37
+ Data_Get_Struct(self, void, state);
38
+
39
+ // Do not free memory now.
40
+ return ULL2NUM(XXH32_intermediateDigest(state));
41
+ }
42
+
13
43
  extern "C" void Init_xxhash()
14
44
  {
15
45
  VALUE mXXhash = rb_define_module("XXhash");
16
46
  VALUE mInternal = rb_define_module_under(mXXhash, "Internal");
17
47
 
18
48
  rb_define_singleton_method(mInternal, "xxh32", (ruby_method*) &xxhash_xxh32, 2);
49
+
50
+ VALUE cStreamingHash = rb_define_class_under(mInternal, "StreamingHash", rb_cObject);
51
+
52
+ rb_define_singleton_method(cStreamingHash, "new", (ruby_method*) &xxhash_streaming_hash_new, 1);
53
+ rb_define_method(cStreamingHash, "update", (ruby_method*) &xxhash_streaming_hash_update, 1);
54
+ rb_define_method(cStreamingHash, "digest", (ruby_method*) &xxhash_streaming_hash_digest, 0);
55
+ rb_define_method(cStreamingHash, "intermediate_digest", (ruby_method*) &xxhash_streaming_hash_digest, 0);
19
56
  }
data/lib/xxhash.rb CHANGED
@@ -5,4 +5,16 @@ module XXhash
5
5
  def self.xxh32(input, seed)
6
6
  Internal.xxh32(input, seed)
7
7
  end
8
+
9
+ def self.xxh32_stream(io, seed, chunk_size = 32)
10
+ raise ArgumentError, 'first argument should be IO' if !io.is_a?(IO) && !io.is_a?(StringIO)
11
+
12
+ hash = Internal::StreamingHash.new(seed)
13
+
14
+ while chunk = io.read(chunk_size)
15
+ hash.update(chunk)
16
+ end
17
+
18
+ hash.digest
19
+ end
8
20
  end
@@ -1,3 +1,3 @@
1
1
  module XXhash
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
data/test/xxhash_test.rb CHANGED
@@ -1,7 +1,26 @@
1
1
  require 'test_helper'
2
+ require 'stringio'
2
3
 
3
4
  describe XXhash do
4
5
  it 'returns hash' do
5
6
  assert_equal 2758658570, XXhash.xxh32('test', 123)
6
7
  end
8
+
9
+ describe 'StreamingHash' do
10
+ it 'rises ArgumentError if forst argument is not IO object' do
11
+ assert_raises(ArgumentError) do
12
+ XXhash.xxh32_stream('test', 123)
13
+ end
14
+ end
15
+
16
+ it 'returns the hash for streamed strings' do
17
+ assert_equal 2758658570, XXhash.xxh32_stream(StringIO.new('test'), 123)
18
+ end
19
+
20
+ it 'returns the hash for streamed files' do
21
+ h1 = XXhash.xxh32(File.read(__FILE__), 123)
22
+ h2 = XXhash.xxh32_stream(File.open(__FILE__), 123)
23
+ assert_equal h1, h2
24
+ end
25
+ end
7
26
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xxhash
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-06-04 00:00:00.000000000 Z
12
+ date: 2013-09-04 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Ruby wrapper for xxHash lib
15
15
  email:
@@ -21,7 +21,7 @@ extra_rdoc_files: []
21
21
  files:
22
22
  - .gitignore
23
23
  - .travis.yml
24
- - CHANGELOG
24
+ - CHANGELOG.md
25
25
  - Gemfile
26
26
  - LICENSE.txt
27
27
  - README.md
@@ -49,7 +49,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
49
49
  version: '0'
50
50
  segments:
51
51
  - 0
52
- hash: -2669924421598786933
52
+ hash: -3728750430932162062
53
53
  required_rubygems_version: !ruby/object:Gem::Requirement
54
54
  none: false
55
55
  requirements:
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
58
58
  version: '0'
59
59
  segments:
60
60
  - 0
61
- hash: -2669924421598786933
61
+ hash: -3728750430932162062
62
62
  requirements: []
63
63
  rubyforge_project:
64
64
  rubygems_version: 1.8.24
data/CHANGELOG DELETED
@@ -1,5 +0,0 @@
1
- ### 0.1.1 (June 4, 2013)
2
- * remove .bundle from gem
3
-
4
- ### 0.1.0 (May 16, 2013)
5
- * xxHash updated to [r29](https://code.google.com/p/xxhash/source/detail?r=28)