cityhash 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,3 +3,4 @@ rvm:
3
3
  - 1.8.7
4
4
  - 1.9.2
5
5
  - 1.9.3
6
+ - ruby-head
@@ -0,0 +1,5 @@
1
+ ## 0.7.0 (October 25, 2012) ##
2
+
3
+ ### enhancements
4
+ * add CityHash.hash32 method
5
+ * update libcityhash to 1.1. See [NEWS](http://code.google.com/p/cityhash/source/browse/trunk/NEWS)
data/README.md CHANGED
@@ -15,11 +15,12 @@ text = "test"
15
15
  seed1 = 12345
16
16
  seed2 = 54321
17
17
 
18
- CityHash.hash64(text) # => 17703940110308125106
19
- CityHash.hash64(text, seed1) # => 14900027982776226655
20
- CityHash.hash64(text, seed1, seed2) # => 11136353178704814373
21
- CityHash.hash128(text) # => 1800071687761605184910580728449884026697
22
- CityHash.hash128(text, seed1) # => 6087407617808651818174120599816915369
18
+ CityHash.hash32(text) # => 1633095781
19
+ CityHash.hash64(text) # => 8581389452482819506
20
+ CityHash.hash64(text, seed1) # => 9154302171269876511
21
+ CityHash.hash64(text, seed1, seed2) # => 4854399283587686019
22
+ CityHash.hash128(text) # => 124124989950401219618153994964897029896
23
+ CityHash.hash128(text, seed1) # => 101668641288246442316643001405184598611
23
24
  ```
24
25
 
25
26
  ### Contributing to cityhash
@@ -27,7 +27,7 @@
27
27
  // possible hash functions, by using SIMD instructions, or by
28
28
  // compromising on hash quality.
29
29
 
30
- #include "city.h"
30
+ #include <city.h>
31
31
 
32
32
  #include <algorithm>
33
33
  #include <string.h> // for memcpy and memset
@@ -46,32 +46,32 @@ static uint32 UNALIGNED_LOAD32(const char *p) {
46
46
  return result;
47
47
  }
48
48
 
49
- #if !defined(WORDS_BIGENDIAN)
50
-
51
- #define uint32_in_expected_order(x) (x)
52
- #define uint64_in_expected_order(x) (x)
53
-
54
- #else
55
-
56
49
  #ifdef _MSC_VER
50
+
57
51
  #include <stdlib.h>
58
52
  #define bswap_32(x) _byteswap_ulong(x)
59
53
  #define bswap_64(x) _byteswap_uint64(x)
60
54
 
61
55
  #elif defined(__APPLE__)
56
+
62
57
  // Mac OS X / Darwin features
63
58
  #include <libkern/OSByteOrder.h>
64
59
  #define bswap_32(x) OSSwapInt32(x)
65
60
  #define bswap_64(x) OSSwapInt64(x)
66
61
 
67
62
  #else
63
+
68
64
  #include <byteswap.h>
65
+
69
66
  #endif
70
67
 
68
+ #ifdef WORDS_BIGENDIAN
71
69
  #define uint32_in_expected_order(x) (bswap_32(x))
72
70
  #define uint64_in_expected_order(x) (bswap_64(x))
73
-
74
- #endif // WORDS_BIGENDIAN
71
+ #else
72
+ #define uint32_in_expected_order(x) (x)
73
+ #define uint64_in_expected_order(x) (x)
74
+ #endif
75
75
 
76
76
  #if !defined(LIKELY)
77
77
  #if HAVE_BUILTIN_EXPECT
@@ -93,7 +93,138 @@ static uint32 Fetch32(const char *p) {
93
93
  static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
94
94
  static const uint64 k1 = 0xb492b66fbe98f273ULL;
95
95
  static const uint64 k2 = 0x9ae16a3b2f90404fULL;
96
- static const uint64 k3 = 0xc949d7c7509e6557ULL;
96
+
97
+ // Magic numbers for 32-bit hashing. Copied from Murmur3.
98
+ static const uint32_t c1 = 0xcc9e2d51;
99
+ static const uint32_t c2 = 0x1b873593;
100
+
101
+ // A 32-bit to 32-bit integer hash copied from Murmur3.
102
+ static uint32 fmix(uint32 h)
103
+ {
104
+ h ^= h >> 16;
105
+ h *= 0x85ebca6b;
106
+ h ^= h >> 13;
107
+ h *= 0xc2b2ae35;
108
+ h ^= h >> 16;
109
+ return h;
110
+ }
111
+
112
+ static uint32 Rotate32(uint32 val, int shift) {
113
+ // Avoid shifting by 32: doing so yields an undefined result.
114
+ return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
115
+ }
116
+
117
+ #undef PERMUTE3
118
+ #define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0)
119
+
120
+ static uint32 Mur(uint32 a, uint32 h) {
121
+ // Helper from Murmur3 for combining two 32-bit values.
122
+ a *= c1;
123
+ a = Rotate32(a, 17);
124
+ a *= c2;
125
+ h ^= a;
126
+ h = Rotate32(h, 19);
127
+ return h * 5 + 0xe6546b64;
128
+ }
129
+
130
+ static uint32 Hash32Len13to24(const char *s, size_t len) {
131
+ uint32 a = Fetch32(s - 4 + (len >> 1));
132
+ uint32 b = Fetch32(s + 4);
133
+ uint32 c = Fetch32(s + len - 8);
134
+ uint32 d = Fetch32(s + (len >> 1));
135
+ uint32 e = Fetch32(s);
136
+ uint32 f = Fetch32(s + len - 4);
137
+ uint32 h = len;
138
+
139
+ return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
140
+ }
141
+
142
+ static uint32 Hash32Len0to4(const char *s, size_t len) {
143
+ uint32 b = 0;
144
+ uint32 c = 9;
145
+ for (int i = 0; i < len; i++) {
146
+ b = b * c1 + s[i];
147
+ c ^= b;
148
+ }
149
+ return fmix(Mur(b, Mur(len, c)));
150
+ }
151
+
152
+ static uint32 Hash32Len5to12(const char *s, size_t len) {
153
+ uint32 a = len, b = len * 5, c = 9, d = b;
154
+ a += Fetch32(s);
155
+ b += Fetch32(s + len - 4);
156
+ c += Fetch32(s + ((len >> 1) & 4));
157
+ return fmix(Mur(c, Mur(b, Mur(a, d))));
158
+ }
159
+
160
+ uint32 CityHash32(const char *s, size_t len) {
161
+ if (len <= 24) {
162
+ return len <= 12 ?
163
+ (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
164
+ Hash32Len13to24(s, len);
165
+ }
166
+
167
+ // len > 24
168
+ uint32 h = len, g = c1 * len, f = g;
169
+ uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
170
+ uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
171
+ uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
172
+ uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
173
+ uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
174
+ h ^= a0;
175
+ h = Rotate32(h, 19);
176
+ h = h * 5 + 0xe6546b64;
177
+ h ^= a2;
178
+ h = Rotate32(h, 19);
179
+ h = h * 5 + 0xe6546b64;
180
+ g ^= a1;
181
+ g = Rotate32(g, 19);
182
+ g = g * 5 + 0xe6546b64;
183
+ g ^= a3;
184
+ g = Rotate32(g, 19);
185
+ g = g * 5 + 0xe6546b64;
186
+ f += a4;
187
+ f = Rotate32(f, 19);
188
+ f = f * 5 + 0xe6546b64;
189
+ size_t iters = (len - 1) / 20;
190
+ do {
191
+ uint32 a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
192
+ uint32 a1 = Fetch32(s + 4);
193
+ uint32 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
194
+ uint32 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
195
+ uint32 a4 = Fetch32(s + 16);
196
+ h ^= a0;
197
+ h = Rotate32(h, 18);
198
+ h = h * 5 + 0xe6546b64;
199
+ f += a1;
200
+ f = Rotate32(f, 19);
201
+ f = f * c1;
202
+ g += a2;
203
+ g = Rotate32(g, 18);
204
+ g = g * 5 + 0xe6546b64;
205
+ h ^= a3 + a1;
206
+ h = Rotate32(h, 19);
207
+ h = h * 5 + 0xe6546b64;
208
+ g ^= a4;
209
+ g = bswap_32(g) * 5;
210
+ h += a4 * 5;
211
+ h = bswap_32(h);
212
+ f += a0;
213
+ PERMUTE3(f, h, g);
214
+ s += 20;
215
+ } while (--iters != 0);
216
+ g = Rotate32(g, 11) * c1;
217
+ g = Rotate32(g, 17) * c1;
218
+ f = Rotate32(f, 11) * c1;
219
+ f = Rotate32(f, 17) * c1;
220
+ h = Rotate32(h + g, 19);
221
+ h = h * 5 + 0xe6546b64;
222
+ h = Rotate32(h, 17) * c1;
223
+ h = Rotate32(h + f, 19);
224
+ h = h * 5 + 0xe6546b64;
225
+ h = Rotate32(h, 17) * c1;
226
+ return h;
227
+ }
97
228
 
98
229
  // Bitwise right rotate. Normally this will compile to a single
99
230
  // instruction, especially if the shift is a manifest constant.
@@ -102,13 +233,6 @@ static uint64 Rotate(uint64 val, int shift) {
102
233
  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
103
234
  }
104
235
 
105
- // Equivalent to Rotate(), but requires the second arg to be non-zero.
106
- // On x86-64, and probably others, it's possible for this to compile
107
- // to a single instruction if both args are already in registers.
108
- static uint64 RotateByAtLeast1(uint64 val, int shift) {
109
- return (val >> shift) | (val << (64 - shift));
110
- }
111
-
112
236
  static uint64 ShiftMix(uint64 val) {
113
237
  return val ^ (val >> 47);
114
238
  }
@@ -117,15 +241,29 @@ static uint64 HashLen16(uint64 u, uint64 v) {
117
241
  return Hash128to64(uint128(u, v));
118
242
  }
119
243
 
244
+ static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
245
+ // Murmur-inspired hashing.
246
+ uint64 a = (u ^ v) * mul;
247
+ a ^= (a >> 47);
248
+ uint64 b = (v ^ a) * mul;
249
+ b ^= (b >> 47);
250
+ b *= mul;
251
+ return b;
252
+ }
253
+
120
254
  static uint64 HashLen0to16(const char *s, size_t len) {
121
- if (len > 8) {
122
- uint64 a = Fetch64(s);
255
+ if (len >= 8) {
256
+ uint64 mul = k2 + len * 2;
257
+ uint64 a = Fetch64(s) + k2;
123
258
  uint64 b = Fetch64(s + len - 8);
124
- return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
259
+ uint64 c = Rotate(b, 37) * mul + a;
260
+ uint64 d = (Rotate(a, 25) + b) * mul;
261
+ return HashLen16(c, d, mul);
125
262
  }
126
263
  if (len >= 4) {
264
+ uint64 mul = k2 + len * 2;
127
265
  uint64 a = Fetch32(s);
128
- return HashLen16(len + (a << 3), Fetch32(s + len - 4));
266
+ return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
129
267
  }
130
268
  if (len > 0) {
131
269
  uint8 a = s[0];
@@ -133,7 +271,7 @@ static uint64 HashLen0to16(const char *s, size_t len) {
133
271
  uint8 c = s[len - 1];
134
272
  uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
135
273
  uint32 z = len + (static_cast<uint32>(c) << 2);
136
- return ShiftMix(y * k2 ^ z * k3) * k2;
274
+ return ShiftMix(y * k2 ^ z * k0) * k2;
137
275
  }
138
276
  return k2;
139
277
  }
@@ -141,12 +279,13 @@ static uint64 HashLen0to16(const char *s, size_t len) {
141
279
  // This probably works well for 16-byte strings as well, but it may be overkill
142
280
  // in that case.
143
281
  static uint64 HashLen17to32(const char *s, size_t len) {
282
+ uint64 mul = k2 + len * 2;
144
283
  uint64 a = Fetch64(s) * k1;
145
284
  uint64 b = Fetch64(s + 8);
146
- uint64 c = Fetch64(s + len - 8) * k2;
147
- uint64 d = Fetch64(s + len - 16) * k0;
148
- return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
149
- a + Rotate(b ^ k3, 20) - c + len);
285
+ uint64 c = Fetch64(s + len - 8) * mul;
286
+ uint64 d = Fetch64(s + len - 16) * k2;
287
+ return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
288
+ a + Rotate(b + k2, 18) + c, mul);
150
289
  }
151
290
 
152
291
  // Return a 16-byte hash for 48 bytes. Quick and dirty.
@@ -175,26 +314,24 @@ static pair<uint64, uint64> WeakHashLen32WithSeeds(
175
314
 
176
315
  // Return an 8-byte hash for 33 to 64 bytes.
177
316
  static uint64 HashLen33to64(const char *s, size_t len) {
178
- uint64 z = Fetch64(s + 24);
179
- uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
180
- uint64 b = Rotate(a + z, 52);
181
- uint64 c = Rotate(a, 37);
182
- a += Fetch64(s + 8);
183
- c += Rotate(a, 7);
184
- a += Fetch64(s + 16);
185
- uint64 vf = a + z;
186
- uint64 vs = b + Rotate(a, 31) + c;
187
- a = Fetch64(s + 16) + Fetch64(s + len - 32);
188
- z = Fetch64(s + len - 8);
189
- b = Rotate(a + z, 52);
190
- c = Rotate(a, 37);
191
- a += Fetch64(s + len - 24);
192
- c += Rotate(a, 7);
193
- a += Fetch64(s + len - 16);
194
- uint64 wf = a + z;
195
- uint64 ws = b + Rotate(a, 31) + c;
196
- uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
197
- return ShiftMix(r * k0 + vs) * k2;
317
+ uint64 mul = k2 + len * 2;
318
+ uint64 a = Fetch64(s) * k2;
319
+ uint64 b = Fetch64(s + 8);
320
+ uint64 c = Fetch64(s + len - 24);
321
+ uint64 d = Fetch64(s + len - 32);
322
+ uint64 e = Fetch64(s + 16) * k2;
323
+ uint64 f = Fetch64(s + 24) * 9;
324
+ uint64 g = Fetch64(s + len - 8);
325
+ uint64 h = Fetch64(s + len - 16) * mul;
326
+ uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
327
+ uint64 v = ((a + g) ^ d) + f + 1;
328
+ uint64 w = bswap_64((u + v) * mul) + h;
329
+ uint64 x = Rotate(e + f, 42) + c;
330
+ uint64 y = (bswap_64((v + w) * mul) + g) * mul;
331
+ uint64 z = e + f + c;
332
+ a = bswap_64((x + z) * mul + y) + b;
333
+ b = ShiftMix((z + a) * mul + d + h) * mul;
334
+ return b + x;
198
335
  }
199
336
 
200
337
  uint64 CityHash64(const char *s, size_t len) {
@@ -315,7 +452,10 @@ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
315
452
  len -= 128;
316
453
  } while (LIKELY(len >= 128));
317
454
  x += Rotate(v.first + z, 49) * k0;
318
- z += Rotate(w.first, 37) * k0;
455
+ y = y * k0 + Rotate(w.second, 37);
456
+ z = z * k0 + Rotate(w.first, 27);
457
+ w.first *= 9;
458
+ v.first *= k0;
319
459
  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
320
460
  for (size_t tail_done = 0; tail_done < len; ) {
321
461
  tail_done += 32;
@@ -325,6 +465,7 @@ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
325
465
  z += w.second + Fetch64(s + len - tail_done);
326
466
  w.second += v.first;
327
467
  v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
468
+ v.first *= k0;
328
469
  }
329
470
  // At this point our 56 bytes of state should contain more than
330
471
  // enough information for a strong 128-bit hash. We use two
@@ -336,19 +477,10 @@ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
336
477
  }
337
478
 
338
479
  uint128 CityHash128(const char *s, size_t len) {
339
- if (len >= 16) {
340
- return CityHash128WithSeed(s + 16,
341
- len - 16,
342
- uint128(Fetch64(s) ^ k3,
343
- Fetch64(s + 8)));
344
- } else if (len >= 8) {
345
- return CityHash128WithSeed(NULL,
346
- 0,
347
- uint128(Fetch64(s) ^ (len * k0),
348
- Fetch64(s + len - 8) ^ k1));
349
- } else {
350
- return CityHash128WithSeed(s, len, uint128(k0, k1));
351
- }
480
+ return len >= 16 ?
481
+ CityHash128WithSeed(s + 16, len - 16,
482
+ uint128(Fetch64(s), Fetch64(s + 8) + k0)) :
483
+ CityHash128WithSeed(s, len, uint128(k0, k1));
352
484
  }
353
485
 
354
486
  #ifdef __SSE4_2__
@@ -363,60 +495,79 @@ static void CityHashCrc256Long(const char *s, size_t len,
363
495
  uint64 c = result[0] = HashLen16(b, len);
364
496
  uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
365
497
  uint64 e = Fetch64(s + 184) + seed;
366
- uint64 f = seed;
498
+ uint64 f = 0;
367
499
  uint64 g = 0;
368
- uint64 h = 0;
369
- uint64 i = 0;
370
- uint64 j = 0;
371
- uint64 t = c + d;
500
+ uint64 h = c + d;
501
+ uint64 x = seed;
502
+ uint64 y = 0;
503
+ uint64 z = 0;
372
504
 
373
505
  // 240 bytes of input per iter.
374
506
  size_t iters = len / 240;
375
507
  len -= iters * 240;
376
508
  do {
377
- #define CHUNK(multiplier, z) \
378
- { \
379
- uint64 old_a = a; \
380
- a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s); \
381
- b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8); \
382
- c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16); \
383
- d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24); \
384
- e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32); \
385
- t = old_a; \
386
- } \
387
- f = _mm_crc32_u64(f, a); \
388
- g = _mm_crc32_u64(g, b); \
389
- h = _mm_crc32_u64(h, c); \
390
- i = _mm_crc32_u64(i, d); \
391
- j = _mm_crc32_u64(j, e); \
509
+ #undef CHUNK
510
+ #define CHUNK(r) \
511
+ PERMUTE3(x, z, y); \
512
+ b += Fetch64(s); \
513
+ c += Fetch64(s + 8); \
514
+ d += Fetch64(s + 16); \
515
+ e += Fetch64(s + 24); \
516
+ f += Fetch64(s + 32); \
517
+ a += b; \
518
+ h += f; \
519
+ b += c; \
520
+ f += d; \
521
+ g += e; \
522
+ e += z; \
523
+ g += x; \
524
+ z = _mm_crc32_u64(z, b + g); \
525
+ y = _mm_crc32_u64(y, e + h); \
526
+ x = _mm_crc32_u64(x, f + a); \
527
+ e = Rotate(e, r); \
528
+ c += e; \
392
529
  s += 40
393
530
 
394
- CHUNK(1, 1); CHUNK(k0, 0);
395
- CHUNK(1, 1); CHUNK(k0, 0);
396
- CHUNK(1, 1); CHUNK(k0, 0);
531
+ CHUNK(0); PERMUTE3(a, h, c);
532
+ CHUNK(33); PERMUTE3(a, h, f);
533
+ CHUNK(0); PERMUTE3(b, h, f);
534
+ CHUNK(42); PERMUTE3(b, h, d);
535
+ CHUNK(0); PERMUTE3(b, h, e);
536
+ CHUNK(33); PERMUTE3(a, h, e);
397
537
  } while (--iters > 0);
398
538
 
399
539
  while (len >= 40) {
400
- CHUNK(k0, 0);
540
+ CHUNK(29);
541
+ e ^= Rotate(a, 20);
542
+ h += Rotate(b, 30);
543
+ g ^= Rotate(c, 40);
544
+ f += Rotate(d, 34);
545
+ PERMUTE3(c, h, g);
401
546
  len -= 40;
402
547
  }
403
548
  if (len > 0) {
404
549
  s = s + len - 40;
405
- CHUNK(k0, 0);
550
+ CHUNK(33);
551
+ e ^= Rotate(a, 43);
552
+ h += Rotate(b, 42);
553
+ g ^= Rotate(c, 41);
554
+ f += Rotate(d, 40);
406
555
  }
407
- j += i << 32;
408
- a = HashLen16(a, j);
409
- h += g << 32;
410
- b += h;
411
- c = HashLen16(c, f) + i;
556
+ result[0] ^= h;
557
+ result[1] ^= g;
558
+ g += h;
559
+ a = HashLen16(a, g + z);
560
+ x += y << 32;
561
+ b += x;
562
+ c = HashLen16(c, z) + h;
412
563
  d = HashLen16(d, e + result[0]);
413
- j += e;
414
- i += HashLen16(h, t);
415
- e = HashLen16(a, d) + j;
416
- f = HashLen16(b, c) + a;
417
- g = HashLen16(j, i) + c;
418
- result[0] = e + f + g + h;
419
- a = ShiftMix((a + g) * k0) * k0 + b;
564
+ g += e;
565
+ h += HashLen16(x, f);
566
+ e = HashLen16(a, d) + g;
567
+ z = HashLen16(b, c) + a;
568
+ y = HashLen16(g, h) + c;
569
+ result[0] = e + z + y + x;
570
+ a = ShiftMix((a + y) * k0) * k0 + b;
420
571
  result[1] += a + result[0];
421
572
  a = ShiftMix(a * k0) * k0 + c;
422
573
  result[2] = a + result[1];
@@ -20,21 +20,40 @@
20
20
  //
21
21
  // CityHash, by Geoff Pike and Jyrki Alakuijala
22
22
  //
23
- // This file provides a few functions for hashing strings. On x86-64
24
- // hardware in 2011, CityHash64() is faster than other high-quality
25
- // hash functions, such as Murmur. This is largely due to higher
26
- // instruction-level parallelism. CityHash64() and CityHash128() also perform
27
- // well on hash-quality tests.
23
+ // http://code.google.com/p/cityhash/
28
24
  //
29
- // CityHash128() is optimized for relatively long strings and returns
30
- // a 128-bit hash. For strings more than about 2000 bytes it can be
31
- // faster than CityHash64().
25
+ // This file provides a few functions for hashing strings. All of them are
26
+ // high-quality functions in the sense that they pass standard tests such
27
+ // as Austin Appleby's SMHasher. They are also fast.
28
+ //
29
+ // For 64-bit x86 code, on short strings, we don't know of anything faster than
30
+ // CityHash64 that is of comparable quality. We believe our nearest competitor
31
+ // is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
32
+ // tables and most other hashing (excluding cryptography).
33
+ //
34
+ // For 64-bit x86 code, on long strings, the picture is more complicated.
35
+ // On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
36
+ // CityHashCrc128 appears to be faster than all competitors of comparable
37
+ // quality. CityHash128 is also good but not quite as fast. We believe our
38
+ // nearest competitor is Bob Jenkins' Spooky. We don't have great data for
39
+ // other 64-bit CPUs, but for long strings we know that Spooky is slightly
40
+ // faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
41
+ // Note that CityHashCrc128 is declared in citycrc.h.
42
+ //
43
+ // For 32-bit x86 code, we don't know of anything faster than CityHash32 that
44
+ // is of comparable quality. We believe our nearest competitor is Murmur3A.
45
+ // (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
32
46
  //
33
47
  // Functions in the CityHash family are not suitable for cryptography.
34
48
  //
35
- // WARNING: This code has not been tested on big-endian platforms!
49
+ // Please see CityHash's README file for more details on our performance
50
+ // measurements and so on.
51
+ //
52
+ // WARNING: This code has been only lightly tested on big-endian platforms!
36
53
  // It is known to work well on little-endian platforms that have a small penalty
37
54
  // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
55
+ // It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
56
+ // bug reports are welcome.
38
57
  //
39
58
  // By the way, for some hash functions, given strings a and b, the hash
40
59
  // of a+b is easily derived from the hashes of a and b. This property
@@ -56,23 +75,26 @@ inline uint64 Uint128Low64(const uint128& x) { return x.first; }
56
75
  inline uint64 Uint128High64(const uint128& x) { return x.second; }
57
76
 
58
77
  // Hash function for a byte array.
59
- extern "C" uint64 CityHash64(const char *buf, size_t len);
78
+ uint64 CityHash64(const char *buf, size_t len);
60
79
 
61
80
  // Hash function for a byte array. For convenience, a 64-bit seed is also
62
81
  // hashed into the result.
63
- extern "C" uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
82
+ uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
64
83
 
65
84
  // Hash function for a byte array. For convenience, two seeds are also
66
85
  // hashed into the result.
67
- extern "C" uint64 CityHash64WithSeeds(const char *buf, size_t len,
86
+ uint64 CityHash64WithSeeds(const char *buf, size_t len,
68
87
  uint64 seed0, uint64 seed1);
69
88
 
70
89
  // Hash function for a byte array.
71
- extern "C" uint128 CityHash128(const char *s, size_t len);
90
+ uint128 CityHash128(const char *s, size_t len);
72
91
 
73
92
  // Hash function for a byte array. For convenience, a 128-bit seed is also
74
93
  // hashed into the result.
75
- extern "C" uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
94
+ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
95
+
96
+ // Hash function for a byte array. Most useful in 32-bit binaries.
97
+ uint32 CityHash32(const char *buf, size_t len);
76
98
 
77
99
  // Hash 128 input bits down to 64 bits of output.
78
100
  // This is intended to be a reasonably good hash function.
@@ -5,6 +5,11 @@
5
5
  // calling rb_define_method()
6
6
  typedef VALUE (ruby_method)(...);
7
7
 
8
+ extern "C" VALUE cityhash_hash32(VALUE mod, VALUE input)
9
+ {
10
+ return ULL2NUM(CityHash32(StringValuePtr(input), RSTRING_LEN(input)));
11
+ }
12
+
8
13
  extern "C" VALUE cityhash_hash64(VALUE mod, VALUE input)
9
14
  {
10
15
  return ULL2NUM(CityHash64(StringValuePtr(input), RSTRING_LEN(input)));
@@ -38,6 +43,8 @@ extern "C" void Init_cityhash()
38
43
  VALUE mCityHash = rb_define_module("CityHash");
39
44
  VALUE mInternal = rb_define_module_under(mCityHash, "Internal");
40
45
 
46
+ rb_define_singleton_method(mInternal, "hash32", (ruby_method*) &cityhash_hash32, 1);
47
+
41
48
  rb_define_singleton_method(mInternal, "hash64", (ruby_method*) &cityhash_hash64, 1);
42
49
  rb_define_singleton_method(mInternal, "hash64_with_seed", (ruby_method*) &cityhash_hash64_with_seed, 2);
43
50
  rb_define_singleton_method(mInternal, "hash64_with_seeds", (ruby_method*) &cityhash_hash64_with_seeds, 3);
@@ -5,6 +5,10 @@ module CityHash
5
5
  LOW64_MASK = 0x0000000000000000ffffffffffffffff
6
6
  HIGH64_MASK = 0xffffffffffffffff0000000000000000
7
7
 
8
+ def self.hash32(input)
9
+ Internal.hash32(input)
10
+ end
11
+
8
12
  def self.hash64(input, seed1=nil, seed2=nil)
9
13
  return Internal.hash64(input) if seed1.nil?
10
14
  return Internal.hash64_with_seed(input, seed1.to_i) if seed2.nil?
@@ -1,3 +1,3 @@
1
1
  module CityHash
2
- VERSION = "0.6.0"
2
+ VERSION = "0.7.0"
3
3
  end
@@ -1,24 +1,28 @@
1
1
  require 'test_helper'
2
2
 
3
3
  describe CityHash do
4
+ it 'returns 32bit hash' do
5
+ assert_equal 1633095781, CityHash.hash32("test")
6
+ end
7
+
4
8
  it 'returns 64bit hash' do
5
- assert_equal 17703940110308125106, CityHash.hash64("test")
9
+ assert_equal 8581389452482819506, CityHash.hash64("test")
6
10
  end
7
11
 
8
12
  it "returns 64bit hash with a seed" do
9
- assert_equal 14900027982776226655, CityHash.hash64("test", 12345)
13
+ assert_equal 9154302171269876511, CityHash.hash64("test", 12345)
10
14
  end
11
15
 
12
16
  it "returns 64bit hash with seeds" do
13
- assert_equal 11136353178704814373, CityHash.hash64("test", 12345, 54321)
17
+ assert_equal 4854399283587686019, CityHash.hash64("test", 12345, 54321)
14
18
  end
15
19
 
16
20
  it "returns 128bit hash" do
17
- assert_equal 1800071687761605184910580728449884026697, CityHash.hash128("test")
21
+ assert_equal 124124989950401219618153994964897029896, CityHash.hash128("test")
18
22
  end
19
23
 
20
24
  it "returns 128bit hash with seed" do
21
25
  seed = (123 << 64) | 123
22
- assert_equal 1631427474705635869517741677842296176559, CityHash.hash128("test", seed)
26
+ assert_equal 1834994000056895780313918994795281207519, CityHash.hash128("test", seed)
23
27
  end
24
28
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cityhash
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-07 00:00:00.000000000Z
12
+ date: 2012-10-25 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: ruby bindings for google's cityhash
15
15
  email:
@@ -21,6 +21,7 @@ extra_rdoc_files: []
21
21
  files:
22
22
  - .gitignore
23
23
  - .travis.yml
24
+ - CHANGELOG.md
24
25
  - Gemfile
25
26
  - LICENSE.txt
26
27
  - README.md
@@ -48,7 +49,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
48
49
  version: '0'
49
50
  segments:
50
51
  - 0
51
- hash: 268501841747995875
52
+ hash: 4171946996269433700
52
53
  required_rubygems_version: !ruby/object:Gem::Requirement
53
54
  none: false
54
55
  requirements:
@@ -57,11 +58,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
57
58
  version: '0'
58
59
  segments:
59
60
  - 0
60
- hash: 268501841747995875
61
+ hash: 4171946996269433700
61
62
  requirements: []
62
63
  rubyforge_project: cityhash
63
- rubygems_version: 1.8.19
64
+ rubygems_version: 1.8.24
64
65
  signing_key:
65
66
  specification_version: 3
66
67
  summary: ruby bindings for google's cityhash
67
- test_files: []
68
+ test_files:
69
+ - test/cityhash_test.rb
70
+ - test/test_helper.rb