cityhash 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,3 +3,4 @@ rvm:
3
3
  - 1.8.7
4
4
  - 1.9.2
5
5
  - 1.9.3
6
+ - ruby-head
@@ -0,0 +1,5 @@
1
+ ## 0.7.0 (October 25, 2012) ##
2
+
3
+ ### enhancements
4
+ * add CityHash.hash32 method
5
+ * update libcityhash to 1.1. See [NEWS](http://code.google.com/p/cityhash/source/browse/trunk/NEWS)
data/README.md CHANGED
@@ -15,11 +15,12 @@ text = "test"
15
15
  seed1 = 12345
16
16
  seed2 = 54321
17
17
 
18
- CityHash.hash64(text) # => 17703940110308125106
19
- CityHash.hash64(text, seed1) # => 14900027982776226655
20
- CityHash.hash64(text, seed1, seed2) # => 11136353178704814373
21
- CityHash.hash128(text) # => 1800071687761605184910580728449884026697
22
- CityHash.hash128(text, seed1) # => 6087407617808651818174120599816915369
18
+ CityHash.hash32(text) # => 1633095781
19
+ CityHash.hash64(text) # => 8581389452482819506
20
+ CityHash.hash64(text, seed1) # => 9154302171269876511
21
+ CityHash.hash64(text, seed1, seed2) # => 4854399283587686019
22
+ CityHash.hash128(text) # => 124124989950401219618153994964897029896
23
+ CityHash.hash128(text, seed1) # => 101668641288246442316643001405184598611
23
24
  ```
24
25
 
25
26
  ### Contributing to cityhash
@@ -27,7 +27,7 @@
27
27
  // possible hash functions, by using SIMD instructions, or by
28
28
  // compromising on hash quality.
29
29
 
30
- #include "city.h"
30
+ #include <city.h>
31
31
 
32
32
  #include <algorithm>
33
33
  #include <string.h> // for memcpy and memset
@@ -46,32 +46,32 @@ static uint32 UNALIGNED_LOAD32(const char *p) {
46
46
  return result;
47
47
  }
48
48
 
49
- #if !defined(WORDS_BIGENDIAN)
50
-
51
- #define uint32_in_expected_order(x) (x)
52
- #define uint64_in_expected_order(x) (x)
53
-
54
- #else
55
-
56
49
  #ifdef _MSC_VER
50
+
57
51
  #include <stdlib.h>
58
52
  #define bswap_32(x) _byteswap_ulong(x)
59
53
  #define bswap_64(x) _byteswap_uint64(x)
60
54
 
61
55
  #elif defined(__APPLE__)
56
+
62
57
  // Mac OS X / Darwin features
63
58
  #include <libkern/OSByteOrder.h>
64
59
  #define bswap_32(x) OSSwapInt32(x)
65
60
  #define bswap_64(x) OSSwapInt64(x)
66
61
 
67
62
  #else
63
+
68
64
  #include <byteswap.h>
65
+
69
66
  #endif
70
67
 
68
+ #ifdef WORDS_BIGENDIAN
71
69
  #define uint32_in_expected_order(x) (bswap_32(x))
72
70
  #define uint64_in_expected_order(x) (bswap_64(x))
73
-
74
- #endif // WORDS_BIGENDIAN
71
+ #else
72
+ #define uint32_in_expected_order(x) (x)
73
+ #define uint64_in_expected_order(x) (x)
74
+ #endif
75
75
 
76
76
  #if !defined(LIKELY)
77
77
  #if HAVE_BUILTIN_EXPECT
@@ -93,7 +93,138 @@ static uint32 Fetch32(const char *p) {
93
93
  static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
94
94
  static const uint64 k1 = 0xb492b66fbe98f273ULL;
95
95
  static const uint64 k2 = 0x9ae16a3b2f90404fULL;
96
- static const uint64 k3 = 0xc949d7c7509e6557ULL;
96
+
97
+ // Magic numbers for 32-bit hashing. Copied from Murmur3.
98
+ static const uint32_t c1 = 0xcc9e2d51;
99
+ static const uint32_t c2 = 0x1b873593;
100
+
101
+ // A 32-bit to 32-bit integer hash copied from Murmur3.
102
+ static uint32 fmix(uint32 h)
103
+ {
104
+ h ^= h >> 16;
105
+ h *= 0x85ebca6b;
106
+ h ^= h >> 13;
107
+ h *= 0xc2b2ae35;
108
+ h ^= h >> 16;
109
+ return h;
110
+ }
111
+
112
+ static uint32 Rotate32(uint32 val, int shift) {
113
+ // Avoid shifting by 32: doing so yields an undefined result.
114
+ return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
115
+ }
116
+
117
+ #undef PERMUTE3
118
+ #define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0)
119
+
120
+ static uint32 Mur(uint32 a, uint32 h) {
121
+ // Helper from Murmur3 for combining two 32-bit values.
122
+ a *= c1;
123
+ a = Rotate32(a, 17);
124
+ a *= c2;
125
+ h ^= a;
126
+ h = Rotate32(h, 19);
127
+ return h * 5 + 0xe6546b64;
128
+ }
129
+
130
+ static uint32 Hash32Len13to24(const char *s, size_t len) {
131
+ uint32 a = Fetch32(s - 4 + (len >> 1));
132
+ uint32 b = Fetch32(s + 4);
133
+ uint32 c = Fetch32(s + len - 8);
134
+ uint32 d = Fetch32(s + (len >> 1));
135
+ uint32 e = Fetch32(s);
136
+ uint32 f = Fetch32(s + len - 4);
137
+ uint32 h = len;
138
+
139
+ return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
140
+ }
141
+
142
+ static uint32 Hash32Len0to4(const char *s, size_t len) {
143
+ uint32 b = 0;
144
+ uint32 c = 9;
145
+ for (int i = 0; i < len; i++) {
146
+ b = b * c1 + s[i];
147
+ c ^= b;
148
+ }
149
+ return fmix(Mur(b, Mur(len, c)));
150
+ }
151
+
152
+ static uint32 Hash32Len5to12(const char *s, size_t len) {
153
+ uint32 a = len, b = len * 5, c = 9, d = b;
154
+ a += Fetch32(s);
155
+ b += Fetch32(s + len - 4);
156
+ c += Fetch32(s + ((len >> 1) & 4));
157
+ return fmix(Mur(c, Mur(b, Mur(a, d))));
158
+ }
159
+
160
+ uint32 CityHash32(const char *s, size_t len) {
161
+ if (len <= 24) {
162
+ return len <= 12 ?
163
+ (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
164
+ Hash32Len13to24(s, len);
165
+ }
166
+
167
+ // len > 24
168
+ uint32 h = len, g = c1 * len, f = g;
169
+ uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
170
+ uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
171
+ uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
172
+ uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
173
+ uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
174
+ h ^= a0;
175
+ h = Rotate32(h, 19);
176
+ h = h * 5 + 0xe6546b64;
177
+ h ^= a2;
178
+ h = Rotate32(h, 19);
179
+ h = h * 5 + 0xe6546b64;
180
+ g ^= a1;
181
+ g = Rotate32(g, 19);
182
+ g = g * 5 + 0xe6546b64;
183
+ g ^= a3;
184
+ g = Rotate32(g, 19);
185
+ g = g * 5 + 0xe6546b64;
186
+ f += a4;
187
+ f = Rotate32(f, 19);
188
+ f = f * 5 + 0xe6546b64;
189
+ size_t iters = (len - 1) / 20;
190
+ do {
191
+ uint32 a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
192
+ uint32 a1 = Fetch32(s + 4);
193
+ uint32 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
194
+ uint32 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
195
+ uint32 a4 = Fetch32(s + 16);
196
+ h ^= a0;
197
+ h = Rotate32(h, 18);
198
+ h = h * 5 + 0xe6546b64;
199
+ f += a1;
200
+ f = Rotate32(f, 19);
201
+ f = f * c1;
202
+ g += a2;
203
+ g = Rotate32(g, 18);
204
+ g = g * 5 + 0xe6546b64;
205
+ h ^= a3 + a1;
206
+ h = Rotate32(h, 19);
207
+ h = h * 5 + 0xe6546b64;
208
+ g ^= a4;
209
+ g = bswap_32(g) * 5;
210
+ h += a4 * 5;
211
+ h = bswap_32(h);
212
+ f += a0;
213
+ PERMUTE3(f, h, g);
214
+ s += 20;
215
+ } while (--iters != 0);
216
+ g = Rotate32(g, 11) * c1;
217
+ g = Rotate32(g, 17) * c1;
218
+ f = Rotate32(f, 11) * c1;
219
+ f = Rotate32(f, 17) * c1;
220
+ h = Rotate32(h + g, 19);
221
+ h = h * 5 + 0xe6546b64;
222
+ h = Rotate32(h, 17) * c1;
223
+ h = Rotate32(h + f, 19);
224
+ h = h * 5 + 0xe6546b64;
225
+ h = Rotate32(h, 17) * c1;
226
+ return h;
227
+ }
97
228
 
98
229
  // Bitwise right rotate. Normally this will compile to a single
99
230
  // instruction, especially if the shift is a manifest constant.
@@ -102,13 +233,6 @@ static uint64 Rotate(uint64 val, int shift) {
102
233
  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
103
234
  }
104
235
 
105
- // Equivalent to Rotate(), but requires the second arg to be non-zero.
106
- // On x86-64, and probably others, it's possible for this to compile
107
- // to a single instruction if both args are already in registers.
108
- static uint64 RotateByAtLeast1(uint64 val, int shift) {
109
- return (val >> shift) | (val << (64 - shift));
110
- }
111
-
112
236
  static uint64 ShiftMix(uint64 val) {
113
237
  return val ^ (val >> 47);
114
238
  }
@@ -117,15 +241,29 @@ static uint64 HashLen16(uint64 u, uint64 v) {
117
241
  return Hash128to64(uint128(u, v));
118
242
  }
119
243
 
244
+ static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
245
+ // Murmur-inspired hashing.
246
+ uint64 a = (u ^ v) * mul;
247
+ a ^= (a >> 47);
248
+ uint64 b = (v ^ a) * mul;
249
+ b ^= (b >> 47);
250
+ b *= mul;
251
+ return b;
252
+ }
253
+
120
254
  static uint64 HashLen0to16(const char *s, size_t len) {
121
- if (len > 8) {
122
- uint64 a = Fetch64(s);
255
+ if (len >= 8) {
256
+ uint64 mul = k2 + len * 2;
257
+ uint64 a = Fetch64(s) + k2;
123
258
  uint64 b = Fetch64(s + len - 8);
124
- return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
259
+ uint64 c = Rotate(b, 37) * mul + a;
260
+ uint64 d = (Rotate(a, 25) + b) * mul;
261
+ return HashLen16(c, d, mul);
125
262
  }
126
263
  if (len >= 4) {
264
+ uint64 mul = k2 + len * 2;
127
265
  uint64 a = Fetch32(s);
128
- return HashLen16(len + (a << 3), Fetch32(s + len - 4));
266
+ return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
129
267
  }
130
268
  if (len > 0) {
131
269
  uint8 a = s[0];
@@ -133,7 +271,7 @@ static uint64 HashLen0to16(const char *s, size_t len) {
133
271
  uint8 c = s[len - 1];
134
272
  uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
135
273
  uint32 z = len + (static_cast<uint32>(c) << 2);
136
- return ShiftMix(y * k2 ^ z * k3) * k2;
274
+ return ShiftMix(y * k2 ^ z * k0) * k2;
137
275
  }
138
276
  return k2;
139
277
  }
@@ -141,12 +279,13 @@ static uint64 HashLen0to16(const char *s, size_t len) {
141
279
  // This probably works well for 16-byte strings as well, but it may be overkill
142
280
  // in that case.
143
281
  static uint64 HashLen17to32(const char *s, size_t len) {
282
+ uint64 mul = k2 + len * 2;
144
283
  uint64 a = Fetch64(s) * k1;
145
284
  uint64 b = Fetch64(s + 8);
146
- uint64 c = Fetch64(s + len - 8) * k2;
147
- uint64 d = Fetch64(s + len - 16) * k0;
148
- return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
149
- a + Rotate(b ^ k3, 20) - c + len);
285
+ uint64 c = Fetch64(s + len - 8) * mul;
286
+ uint64 d = Fetch64(s + len - 16) * k2;
287
+ return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
288
+ a + Rotate(b + k2, 18) + c, mul);
150
289
  }
151
290
 
152
291
  // Return a 16-byte hash for 48 bytes. Quick and dirty.
@@ -175,26 +314,24 @@ static pair<uint64, uint64> WeakHashLen32WithSeeds(
175
314
 
176
315
  // Return an 8-byte hash for 33 to 64 bytes.
177
316
  static uint64 HashLen33to64(const char *s, size_t len) {
178
- uint64 z = Fetch64(s + 24);
179
- uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
180
- uint64 b = Rotate(a + z, 52);
181
- uint64 c = Rotate(a, 37);
182
- a += Fetch64(s + 8);
183
- c += Rotate(a, 7);
184
- a += Fetch64(s + 16);
185
- uint64 vf = a + z;
186
- uint64 vs = b + Rotate(a, 31) + c;
187
- a = Fetch64(s + 16) + Fetch64(s + len - 32);
188
- z = Fetch64(s + len - 8);
189
- b = Rotate(a + z, 52);
190
- c = Rotate(a, 37);
191
- a += Fetch64(s + len - 24);
192
- c += Rotate(a, 7);
193
- a += Fetch64(s + len - 16);
194
- uint64 wf = a + z;
195
- uint64 ws = b + Rotate(a, 31) + c;
196
- uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
197
- return ShiftMix(r * k0 + vs) * k2;
317
+ uint64 mul = k2 + len * 2;
318
+ uint64 a = Fetch64(s) * k2;
319
+ uint64 b = Fetch64(s + 8);
320
+ uint64 c = Fetch64(s + len - 24);
321
+ uint64 d = Fetch64(s + len - 32);
322
+ uint64 e = Fetch64(s + 16) * k2;
323
+ uint64 f = Fetch64(s + 24) * 9;
324
+ uint64 g = Fetch64(s + len - 8);
325
+ uint64 h = Fetch64(s + len - 16) * mul;
326
+ uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
327
+ uint64 v = ((a + g) ^ d) + f + 1;
328
+ uint64 w = bswap_64((u + v) * mul) + h;
329
+ uint64 x = Rotate(e + f, 42) + c;
330
+ uint64 y = (bswap_64((v + w) * mul) + g) * mul;
331
+ uint64 z = e + f + c;
332
+ a = bswap_64((x + z) * mul + y) + b;
333
+ b = ShiftMix((z + a) * mul + d + h) * mul;
334
+ return b + x;
198
335
  }
199
336
 
200
337
  uint64 CityHash64(const char *s, size_t len) {
@@ -315,7 +452,10 @@ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
315
452
  len -= 128;
316
453
  } while (LIKELY(len >= 128));
317
454
  x += Rotate(v.first + z, 49) * k0;
318
- z += Rotate(w.first, 37) * k0;
455
+ y = y * k0 + Rotate(w.second, 37);
456
+ z = z * k0 + Rotate(w.first, 27);
457
+ w.first *= 9;
458
+ v.first *= k0;
319
459
  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
320
460
  for (size_t tail_done = 0; tail_done < len; ) {
321
461
  tail_done += 32;
@@ -325,6 +465,7 @@ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
325
465
  z += w.second + Fetch64(s + len - tail_done);
326
466
  w.second += v.first;
327
467
  v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
468
+ v.first *= k0;
328
469
  }
329
470
  // At this point our 56 bytes of state should contain more than
330
471
  // enough information for a strong 128-bit hash. We use two
@@ -336,19 +477,10 @@ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
336
477
  }
337
478
 
338
479
  uint128 CityHash128(const char *s, size_t len) {
339
- if (len >= 16) {
340
- return CityHash128WithSeed(s + 16,
341
- len - 16,
342
- uint128(Fetch64(s) ^ k3,
343
- Fetch64(s + 8)));
344
- } else if (len >= 8) {
345
- return CityHash128WithSeed(NULL,
346
- 0,
347
- uint128(Fetch64(s) ^ (len * k0),
348
- Fetch64(s + len - 8) ^ k1));
349
- } else {
350
- return CityHash128WithSeed(s, len, uint128(k0, k1));
351
- }
480
+ return len >= 16 ?
481
+ CityHash128WithSeed(s + 16, len - 16,
482
+ uint128(Fetch64(s), Fetch64(s + 8) + k0)) :
483
+ CityHash128WithSeed(s, len, uint128(k0, k1));
352
484
  }
353
485
 
354
486
  #ifdef __SSE4_2__
@@ -363,60 +495,79 @@ static void CityHashCrc256Long(const char *s, size_t len,
363
495
  uint64 c = result[0] = HashLen16(b, len);
364
496
  uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
365
497
  uint64 e = Fetch64(s + 184) + seed;
366
- uint64 f = seed;
498
+ uint64 f = 0;
367
499
  uint64 g = 0;
368
- uint64 h = 0;
369
- uint64 i = 0;
370
- uint64 j = 0;
371
- uint64 t = c + d;
500
+ uint64 h = c + d;
501
+ uint64 x = seed;
502
+ uint64 y = 0;
503
+ uint64 z = 0;
372
504
 
373
505
  // 240 bytes of input per iter.
374
506
  size_t iters = len / 240;
375
507
  len -= iters * 240;
376
508
  do {
377
- #define CHUNK(multiplier, z) \
378
- { \
379
- uint64 old_a = a; \
380
- a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s); \
381
- b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8); \
382
- c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16); \
383
- d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24); \
384
- e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32); \
385
- t = old_a; \
386
- } \
387
- f = _mm_crc32_u64(f, a); \
388
- g = _mm_crc32_u64(g, b); \
389
- h = _mm_crc32_u64(h, c); \
390
- i = _mm_crc32_u64(i, d); \
391
- j = _mm_crc32_u64(j, e); \
509
+ #undef CHUNK
510
+ #define CHUNK(r) \
511
+ PERMUTE3(x, z, y); \
512
+ b += Fetch64(s); \
513
+ c += Fetch64(s + 8); \
514
+ d += Fetch64(s + 16); \
515
+ e += Fetch64(s + 24); \
516
+ f += Fetch64(s + 32); \
517
+ a += b; \
518
+ h += f; \
519
+ b += c; \
520
+ f += d; \
521
+ g += e; \
522
+ e += z; \
523
+ g += x; \
524
+ z = _mm_crc32_u64(z, b + g); \
525
+ y = _mm_crc32_u64(y, e + h); \
526
+ x = _mm_crc32_u64(x, f + a); \
527
+ e = Rotate(e, r); \
528
+ c += e; \
392
529
  s += 40
393
530
 
394
- CHUNK(1, 1); CHUNK(k0, 0);
395
- CHUNK(1, 1); CHUNK(k0, 0);
396
- CHUNK(1, 1); CHUNK(k0, 0);
531
+ CHUNK(0); PERMUTE3(a, h, c);
532
+ CHUNK(33); PERMUTE3(a, h, f);
533
+ CHUNK(0); PERMUTE3(b, h, f);
534
+ CHUNK(42); PERMUTE3(b, h, d);
535
+ CHUNK(0); PERMUTE3(b, h, e);
536
+ CHUNK(33); PERMUTE3(a, h, e);
397
537
  } while (--iters > 0);
398
538
 
399
539
  while (len >= 40) {
400
- CHUNK(k0, 0);
540
+ CHUNK(29);
541
+ e ^= Rotate(a, 20);
542
+ h += Rotate(b, 30);
543
+ g ^= Rotate(c, 40);
544
+ f += Rotate(d, 34);
545
+ PERMUTE3(c, h, g);
401
546
  len -= 40;
402
547
  }
403
548
  if (len > 0) {
404
549
  s = s + len - 40;
405
- CHUNK(k0, 0);
550
+ CHUNK(33);
551
+ e ^= Rotate(a, 43);
552
+ h += Rotate(b, 42);
553
+ g ^= Rotate(c, 41);
554
+ f += Rotate(d, 40);
406
555
  }
407
- j += i << 32;
408
- a = HashLen16(a, j);
409
- h += g << 32;
410
- b += h;
411
- c = HashLen16(c, f) + i;
556
+ result[0] ^= h;
557
+ result[1] ^= g;
558
+ g += h;
559
+ a = HashLen16(a, g + z);
560
+ x += y << 32;
561
+ b += x;
562
+ c = HashLen16(c, z) + h;
412
563
  d = HashLen16(d, e + result[0]);
413
- j += e;
414
- i += HashLen16(h, t);
415
- e = HashLen16(a, d) + j;
416
- f = HashLen16(b, c) + a;
417
- g = HashLen16(j, i) + c;
418
- result[0] = e + f + g + h;
419
- a = ShiftMix((a + g) * k0) * k0 + b;
564
+ g += e;
565
+ h += HashLen16(x, f);
566
+ e = HashLen16(a, d) + g;
567
+ z = HashLen16(b, c) + a;
568
+ y = HashLen16(g, h) + c;
569
+ result[0] = e + z + y + x;
570
+ a = ShiftMix((a + y) * k0) * k0 + b;
420
571
  result[1] += a + result[0];
421
572
  a = ShiftMix(a * k0) * k0 + c;
422
573
  result[2] = a + result[1];
@@ -20,21 +20,40 @@
20
20
  //
21
21
  // CityHash, by Geoff Pike and Jyrki Alakuijala
22
22
  //
23
- // This file provides a few functions for hashing strings. On x86-64
24
- // hardware in 2011, CityHash64() is faster than other high-quality
25
- // hash functions, such as Murmur. This is largely due to higher
26
- // instruction-level parallelism. CityHash64() and CityHash128() also perform
27
- // well on hash-quality tests.
23
+ // http://code.google.com/p/cityhash/
28
24
  //
29
- // CityHash128() is optimized for relatively long strings and returns
30
- // a 128-bit hash. For strings more than about 2000 bytes it can be
31
- // faster than CityHash64().
25
+ // This file provides a few functions for hashing strings. All of them are
26
+ // high-quality functions in the sense that they pass standard tests such
27
+ // as Austin Appleby's SMHasher. They are also fast.
28
+ //
29
+ // For 64-bit x86 code, on short strings, we don't know of anything faster than
30
+ // CityHash64 that is of comparable quality. We believe our nearest competitor
31
+ // is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
32
+ // tables and most other hashing (excluding cryptography).
33
+ //
34
+ // For 64-bit x86 code, on long strings, the picture is more complicated.
35
+ // On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
36
+ // CityHashCrc128 appears to be faster than all competitors of comparable
37
+ // quality. CityHash128 is also good but not quite as fast. We believe our
38
+ // nearest competitor is Bob Jenkins' Spooky. We don't have great data for
39
+ // other 64-bit CPUs, but for long strings we know that Spooky is slightly
40
+ // faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
41
+ // Note that CityHashCrc128 is declared in citycrc.h.
42
+ //
43
+ // For 32-bit x86 code, we don't know of anything faster than CityHash32 that
44
+ // is of comparable quality. We believe our nearest competitor is Murmur3A.
45
+ // (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
32
46
  //
33
47
  // Functions in the CityHash family are not suitable for cryptography.
34
48
  //
35
- // WARNING: This code has not been tested on big-endian platforms!
49
+ // Please see CityHash's README file for more details on our performance
50
+ // measurements and so on.
51
+ //
52
+ // WARNING: This code has been only lightly tested on big-endian platforms!
36
53
  // It is known to work well on little-endian platforms that have a small penalty
37
54
  // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
55
+ // It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
56
+ // bug reports are welcome.
38
57
  //
39
58
  // By the way, for some hash functions, given strings a and b, the hash
40
59
  // of a+b is easily derived from the hashes of a and b. This property
@@ -56,23 +75,26 @@ inline uint64 Uint128Low64(const uint128& x) { return x.first; }
56
75
  inline uint64 Uint128High64(const uint128& x) { return x.second; }
57
76
 
58
77
  // Hash function for a byte array.
59
- extern "C" uint64 CityHash64(const char *buf, size_t len);
78
+ uint64 CityHash64(const char *buf, size_t len);
60
79
 
61
80
  // Hash function for a byte array. For convenience, a 64-bit seed is also
62
81
  // hashed into the result.
63
- extern "C" uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
82
+ uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
64
83
 
65
84
  // Hash function for a byte array. For convenience, two seeds are also
66
85
  // hashed into the result.
67
- extern "C" uint64 CityHash64WithSeeds(const char *buf, size_t len,
86
+ uint64 CityHash64WithSeeds(const char *buf, size_t len,
68
87
  uint64 seed0, uint64 seed1);
69
88
 
70
89
  // Hash function for a byte array.
71
- extern "C" uint128 CityHash128(const char *s, size_t len);
90
+ uint128 CityHash128(const char *s, size_t len);
72
91
 
73
92
  // Hash function for a byte array. For convenience, a 128-bit seed is also
74
93
  // hashed into the result.
75
- extern "C" uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
94
+ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
95
+
96
+ // Hash function for a byte array. Most useful in 32-bit binaries.
97
+ uint32 CityHash32(const char *buf, size_t len);
76
98
 
77
99
  // Hash 128 input bits down to 64 bits of output.
78
100
  // This is intended to be a reasonably good hash function.
@@ -5,6 +5,11 @@
5
5
  // calling rb_define_method()
6
6
  typedef VALUE (ruby_method)(...);
7
7
 
8
+ extern "C" VALUE cityhash_hash32(VALUE mod, VALUE input)
9
+ {
10
+ return ULL2NUM(CityHash32(StringValuePtr(input), RSTRING_LEN(input)));
11
+ }
12
+
8
13
  extern "C" VALUE cityhash_hash64(VALUE mod, VALUE input)
9
14
  {
10
15
  return ULL2NUM(CityHash64(StringValuePtr(input), RSTRING_LEN(input)));
@@ -38,6 +43,8 @@ extern "C" void Init_cityhash()
38
43
  VALUE mCityHash = rb_define_module("CityHash");
39
44
  VALUE mInternal = rb_define_module_under(mCityHash, "Internal");
40
45
 
46
+ rb_define_singleton_method(mInternal, "hash32", (ruby_method*) &cityhash_hash32, 1);
47
+
41
48
  rb_define_singleton_method(mInternal, "hash64", (ruby_method*) &cityhash_hash64, 1);
42
49
  rb_define_singleton_method(mInternal, "hash64_with_seed", (ruby_method*) &cityhash_hash64_with_seed, 2);
43
50
  rb_define_singleton_method(mInternal, "hash64_with_seeds", (ruby_method*) &cityhash_hash64_with_seeds, 3);
@@ -5,6 +5,10 @@ module CityHash
5
5
  LOW64_MASK = 0x0000000000000000ffffffffffffffff
6
6
  HIGH64_MASK = 0xffffffffffffffff0000000000000000
7
7
 
8
+ def self.hash32(input)
9
+ Internal.hash32(input)
10
+ end
11
+
8
12
  def self.hash64(input, seed1=nil, seed2=nil)
9
13
  return Internal.hash64(input) if seed1.nil?
10
14
  return Internal.hash64_with_seed(input, seed1.to_i) if seed2.nil?
@@ -1,3 +1,3 @@
1
1
  module CityHash
2
- VERSION = "0.6.0"
2
+ VERSION = "0.7.0"
3
3
  end
@@ -1,24 +1,28 @@
1
1
  require 'test_helper'
2
2
 
3
3
  describe CityHash do
4
+ it 'returns 32bit hash' do
5
+ assert_equal 1633095781, CityHash.hash32("test")
6
+ end
7
+
4
8
  it 'returns 64bit hash' do
5
- assert_equal 17703940110308125106, CityHash.hash64("test")
9
+ assert_equal 8581389452482819506, CityHash.hash64("test")
6
10
  end
7
11
 
8
12
  it "returns 64bit hash with a seed" do
9
- assert_equal 14900027982776226655, CityHash.hash64("test", 12345)
13
+ assert_equal 9154302171269876511, CityHash.hash64("test", 12345)
10
14
  end
11
15
 
12
16
  it "returns 64bit hash with seeds" do
13
- assert_equal 11136353178704814373, CityHash.hash64("test", 12345, 54321)
17
+ assert_equal 4854399283587686019, CityHash.hash64("test", 12345, 54321)
14
18
  end
15
19
 
16
20
  it "returns 128bit hash" do
17
- assert_equal 1800071687761605184910580728449884026697, CityHash.hash128("test")
21
+ assert_equal 124124989950401219618153994964897029896, CityHash.hash128("test")
18
22
  end
19
23
 
20
24
  it "returns 128bit hash with seed" do
21
25
  seed = (123 << 64) | 123
22
- assert_equal 1631427474705635869517741677842296176559, CityHash.hash128("test", seed)
26
+ assert_equal 1834994000056895780313918994795281207519, CityHash.hash128("test", seed)
23
27
  end
24
28
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cityhash
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-07 00:00:00.000000000Z
12
+ date: 2012-10-25 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: ruby bindings for google's cityhash
15
15
  email:
@@ -21,6 +21,7 @@ extra_rdoc_files: []
21
21
  files:
22
22
  - .gitignore
23
23
  - .travis.yml
24
+ - CHANGELOG.md
24
25
  - Gemfile
25
26
  - LICENSE.txt
26
27
  - README.md
@@ -48,7 +49,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
48
49
  version: '0'
49
50
  segments:
50
51
  - 0
51
- hash: 268501841747995875
52
+ hash: 4171946996269433700
52
53
  required_rubygems_version: !ruby/object:Gem::Requirement
53
54
  none: false
54
55
  requirements:
@@ -57,11 +58,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
57
58
  version: '0'
58
59
  segments:
59
60
  - 0
60
- hash: 268501841747995875
61
+ hash: 4171946996269433700
61
62
  requirements: []
62
63
  rubyforge_project: cityhash
63
- rubygems_version: 1.8.19
64
+ rubygems_version: 1.8.24
64
65
  signing_key:
65
66
  specification_version: 3
66
67
  summary: ruby bindings for google's cityhash
67
- test_files: []
68
+ test_files:
69
+ - test/cityhash_test.rb
70
+ - test/test_helper.rb