cityhash 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +1 -0
- data/CHANGELOG.md +5 -0
- data/README.md +6 -5
- data/ext/cityhash/city.cc +249 -98
- data/ext/cityhash/city.h +36 -14
- data/ext/cityhash/cityhash.cc +7 -0
- data/lib/cityhash.rb +4 -0
- data/lib/cityhash/version.rb +1 -1
- data/test/cityhash_test.rb +9 -5
- metadata +9 -6
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -15,11 +15,12 @@ text = "test"
|
|
15
15
|
seed1 = 12345
|
16
16
|
seed2 = 54321
|
17
17
|
|
18
|
-
CityHash.
|
19
|
-
CityHash.hash64(text
|
20
|
-
CityHash.hash64(text, seed1
|
21
|
-
CityHash.
|
22
|
-
CityHash.hash128(text
|
18
|
+
CityHash.hash32(text) # => 1633095781
|
19
|
+
CityHash.hash64(text) # => 8581389452482819506
|
20
|
+
CityHash.hash64(text, seed1) # => 9154302171269876511
|
21
|
+
CityHash.hash64(text, seed1, seed2) # => 4854399283587686019
|
22
|
+
CityHash.hash128(text) # => 124124989950401219618153994964897029896
|
23
|
+
CityHash.hash128(text, seed1) # => 101668641288246442316643001405184598611
|
23
24
|
```
|
24
25
|
|
25
26
|
### Contributing to cityhash
|
data/ext/cityhash/city.cc
CHANGED
@@ -27,7 +27,7 @@
|
|
27
27
|
// possible hash functions, by using SIMD instructions, or by
|
28
28
|
// compromising on hash quality.
|
29
29
|
|
30
|
-
#include
|
30
|
+
#include <city.h>
|
31
31
|
|
32
32
|
#include <algorithm>
|
33
33
|
#include <string.h> // for memcpy and memset
|
@@ -46,32 +46,32 @@ static uint32 UNALIGNED_LOAD32(const char *p) {
|
|
46
46
|
return result;
|
47
47
|
}
|
48
48
|
|
49
|
-
#if !defined(WORDS_BIGENDIAN)
|
50
|
-
|
51
|
-
#define uint32_in_expected_order(x) (x)
|
52
|
-
#define uint64_in_expected_order(x) (x)
|
53
|
-
|
54
|
-
#else
|
55
|
-
|
56
49
|
#ifdef _MSC_VER
|
50
|
+
|
57
51
|
#include <stdlib.h>
|
58
52
|
#define bswap_32(x) _byteswap_ulong(x)
|
59
53
|
#define bswap_64(x) _byteswap_uint64(x)
|
60
54
|
|
61
55
|
#elif defined(__APPLE__)
|
56
|
+
|
62
57
|
// Mac OS X / Darwin features
|
63
58
|
#include <libkern/OSByteOrder.h>
|
64
59
|
#define bswap_32(x) OSSwapInt32(x)
|
65
60
|
#define bswap_64(x) OSSwapInt64(x)
|
66
61
|
|
67
62
|
#else
|
63
|
+
|
68
64
|
#include <byteswap.h>
|
65
|
+
|
69
66
|
#endif
|
70
67
|
|
68
|
+
#ifdef WORDS_BIGENDIAN
|
71
69
|
#define uint32_in_expected_order(x) (bswap_32(x))
|
72
70
|
#define uint64_in_expected_order(x) (bswap_64(x))
|
73
|
-
|
74
|
-
#
|
71
|
+
#else
|
72
|
+
#define uint32_in_expected_order(x) (x)
|
73
|
+
#define uint64_in_expected_order(x) (x)
|
74
|
+
#endif
|
75
75
|
|
76
76
|
#if !defined(LIKELY)
|
77
77
|
#if HAVE_BUILTIN_EXPECT
|
@@ -93,7 +93,138 @@ static uint32 Fetch32(const char *p) {
|
|
93
93
|
static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
|
94
94
|
static const uint64 k1 = 0xb492b66fbe98f273ULL;
|
95
95
|
static const uint64 k2 = 0x9ae16a3b2f90404fULL;
|
96
|
-
|
96
|
+
|
97
|
+
// Magic numbers for 32-bit hashing. Copied from Murmur3.
|
98
|
+
static const uint32_t c1 = 0xcc9e2d51;
|
99
|
+
static const uint32_t c2 = 0x1b873593;
|
100
|
+
|
101
|
+
// A 32-bit to 32-bit integer hash copied from Murmur3.
|
102
|
+
static uint32 fmix(uint32 h)
|
103
|
+
{
|
104
|
+
h ^= h >> 16;
|
105
|
+
h *= 0x85ebca6b;
|
106
|
+
h ^= h >> 13;
|
107
|
+
h *= 0xc2b2ae35;
|
108
|
+
h ^= h >> 16;
|
109
|
+
return h;
|
110
|
+
}
|
111
|
+
|
112
|
+
static uint32 Rotate32(uint32 val, int shift) {
|
113
|
+
// Avoid shifting by 32: doing so yields an undefined result.
|
114
|
+
return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
|
115
|
+
}
|
116
|
+
|
117
|
+
#undef PERMUTE3
|
118
|
+
#define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0)
|
119
|
+
|
120
|
+
static uint32 Mur(uint32 a, uint32 h) {
|
121
|
+
// Helper from Murmur3 for combining two 32-bit values.
|
122
|
+
a *= c1;
|
123
|
+
a = Rotate32(a, 17);
|
124
|
+
a *= c2;
|
125
|
+
h ^= a;
|
126
|
+
h = Rotate32(h, 19);
|
127
|
+
return h * 5 + 0xe6546b64;
|
128
|
+
}
|
129
|
+
|
130
|
+
static uint32 Hash32Len13to24(const char *s, size_t len) {
|
131
|
+
uint32 a = Fetch32(s - 4 + (len >> 1));
|
132
|
+
uint32 b = Fetch32(s + 4);
|
133
|
+
uint32 c = Fetch32(s + len - 8);
|
134
|
+
uint32 d = Fetch32(s + (len >> 1));
|
135
|
+
uint32 e = Fetch32(s);
|
136
|
+
uint32 f = Fetch32(s + len - 4);
|
137
|
+
uint32 h = len;
|
138
|
+
|
139
|
+
return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
|
140
|
+
}
|
141
|
+
|
142
|
+
static uint32 Hash32Len0to4(const char *s, size_t len) {
|
143
|
+
uint32 b = 0;
|
144
|
+
uint32 c = 9;
|
145
|
+
for (int i = 0; i < len; i++) {
|
146
|
+
b = b * c1 + s[i];
|
147
|
+
c ^= b;
|
148
|
+
}
|
149
|
+
return fmix(Mur(b, Mur(len, c)));
|
150
|
+
}
|
151
|
+
|
152
|
+
static uint32 Hash32Len5to12(const char *s, size_t len) {
|
153
|
+
uint32 a = len, b = len * 5, c = 9, d = b;
|
154
|
+
a += Fetch32(s);
|
155
|
+
b += Fetch32(s + len - 4);
|
156
|
+
c += Fetch32(s + ((len >> 1) & 4));
|
157
|
+
return fmix(Mur(c, Mur(b, Mur(a, d))));
|
158
|
+
}
|
159
|
+
|
160
|
+
uint32 CityHash32(const char *s, size_t len) {
|
161
|
+
if (len <= 24) {
|
162
|
+
return len <= 12 ?
|
163
|
+
(len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
|
164
|
+
Hash32Len13to24(s, len);
|
165
|
+
}
|
166
|
+
|
167
|
+
// len > 24
|
168
|
+
uint32 h = len, g = c1 * len, f = g;
|
169
|
+
uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
|
170
|
+
uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
|
171
|
+
uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
|
172
|
+
uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
|
173
|
+
uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
|
174
|
+
h ^= a0;
|
175
|
+
h = Rotate32(h, 19);
|
176
|
+
h = h * 5 + 0xe6546b64;
|
177
|
+
h ^= a2;
|
178
|
+
h = Rotate32(h, 19);
|
179
|
+
h = h * 5 + 0xe6546b64;
|
180
|
+
g ^= a1;
|
181
|
+
g = Rotate32(g, 19);
|
182
|
+
g = g * 5 + 0xe6546b64;
|
183
|
+
g ^= a3;
|
184
|
+
g = Rotate32(g, 19);
|
185
|
+
g = g * 5 + 0xe6546b64;
|
186
|
+
f += a4;
|
187
|
+
f = Rotate32(f, 19);
|
188
|
+
f = f * 5 + 0xe6546b64;
|
189
|
+
size_t iters = (len - 1) / 20;
|
190
|
+
do {
|
191
|
+
uint32 a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
|
192
|
+
uint32 a1 = Fetch32(s + 4);
|
193
|
+
uint32 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
|
194
|
+
uint32 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
|
195
|
+
uint32 a4 = Fetch32(s + 16);
|
196
|
+
h ^= a0;
|
197
|
+
h = Rotate32(h, 18);
|
198
|
+
h = h * 5 + 0xe6546b64;
|
199
|
+
f += a1;
|
200
|
+
f = Rotate32(f, 19);
|
201
|
+
f = f * c1;
|
202
|
+
g += a2;
|
203
|
+
g = Rotate32(g, 18);
|
204
|
+
g = g * 5 + 0xe6546b64;
|
205
|
+
h ^= a3 + a1;
|
206
|
+
h = Rotate32(h, 19);
|
207
|
+
h = h * 5 + 0xe6546b64;
|
208
|
+
g ^= a4;
|
209
|
+
g = bswap_32(g) * 5;
|
210
|
+
h += a4 * 5;
|
211
|
+
h = bswap_32(h);
|
212
|
+
f += a0;
|
213
|
+
PERMUTE3(f, h, g);
|
214
|
+
s += 20;
|
215
|
+
} while (--iters != 0);
|
216
|
+
g = Rotate32(g, 11) * c1;
|
217
|
+
g = Rotate32(g, 17) * c1;
|
218
|
+
f = Rotate32(f, 11) * c1;
|
219
|
+
f = Rotate32(f, 17) * c1;
|
220
|
+
h = Rotate32(h + g, 19);
|
221
|
+
h = h * 5 + 0xe6546b64;
|
222
|
+
h = Rotate32(h, 17) * c1;
|
223
|
+
h = Rotate32(h + f, 19);
|
224
|
+
h = h * 5 + 0xe6546b64;
|
225
|
+
h = Rotate32(h, 17) * c1;
|
226
|
+
return h;
|
227
|
+
}
|
97
228
|
|
98
229
|
// Bitwise right rotate. Normally this will compile to a single
|
99
230
|
// instruction, especially if the shift is a manifest constant.
|
@@ -102,13 +233,6 @@ static uint64 Rotate(uint64 val, int shift) {
|
|
102
233
|
return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
|
103
234
|
}
|
104
235
|
|
105
|
-
// Equivalent to Rotate(), but requires the second arg to be non-zero.
|
106
|
-
// On x86-64, and probably others, it's possible for this to compile
|
107
|
-
// to a single instruction if both args are already in registers.
|
108
|
-
static uint64 RotateByAtLeast1(uint64 val, int shift) {
|
109
|
-
return (val >> shift) | (val << (64 - shift));
|
110
|
-
}
|
111
|
-
|
112
236
|
static uint64 ShiftMix(uint64 val) {
|
113
237
|
return val ^ (val >> 47);
|
114
238
|
}
|
@@ -117,15 +241,29 @@ static uint64 HashLen16(uint64 u, uint64 v) {
|
|
117
241
|
return Hash128to64(uint128(u, v));
|
118
242
|
}
|
119
243
|
|
244
|
+
static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
|
245
|
+
// Murmur-inspired hashing.
|
246
|
+
uint64 a = (u ^ v) * mul;
|
247
|
+
a ^= (a >> 47);
|
248
|
+
uint64 b = (v ^ a) * mul;
|
249
|
+
b ^= (b >> 47);
|
250
|
+
b *= mul;
|
251
|
+
return b;
|
252
|
+
}
|
253
|
+
|
120
254
|
static uint64 HashLen0to16(const char *s, size_t len) {
|
121
|
-
if (len
|
122
|
-
uint64
|
255
|
+
if (len >= 8) {
|
256
|
+
uint64 mul = k2 + len * 2;
|
257
|
+
uint64 a = Fetch64(s) + k2;
|
123
258
|
uint64 b = Fetch64(s + len - 8);
|
124
|
-
|
259
|
+
uint64 c = Rotate(b, 37) * mul + a;
|
260
|
+
uint64 d = (Rotate(a, 25) + b) * mul;
|
261
|
+
return HashLen16(c, d, mul);
|
125
262
|
}
|
126
263
|
if (len >= 4) {
|
264
|
+
uint64 mul = k2 + len * 2;
|
127
265
|
uint64 a = Fetch32(s);
|
128
|
-
return HashLen16(len + (a << 3), Fetch32(s + len - 4));
|
266
|
+
return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
|
129
267
|
}
|
130
268
|
if (len > 0) {
|
131
269
|
uint8 a = s[0];
|
@@ -133,7 +271,7 @@ static uint64 HashLen0to16(const char *s, size_t len) {
|
|
133
271
|
uint8 c = s[len - 1];
|
134
272
|
uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
|
135
273
|
uint32 z = len + (static_cast<uint32>(c) << 2);
|
136
|
-
return ShiftMix(y * k2 ^ z *
|
274
|
+
return ShiftMix(y * k2 ^ z * k0) * k2;
|
137
275
|
}
|
138
276
|
return k2;
|
139
277
|
}
|
@@ -141,12 +279,13 @@ static uint64 HashLen0to16(const char *s, size_t len) {
|
|
141
279
|
// This probably works well for 16-byte strings as well, but it may be overkill
|
142
280
|
// in that case.
|
143
281
|
static uint64 HashLen17to32(const char *s, size_t len) {
|
282
|
+
uint64 mul = k2 + len * 2;
|
144
283
|
uint64 a = Fetch64(s) * k1;
|
145
284
|
uint64 b = Fetch64(s + 8);
|
146
|
-
uint64 c = Fetch64(s + len - 8) *
|
147
|
-
uint64 d = Fetch64(s + len - 16) *
|
148
|
-
return HashLen16(Rotate(a
|
149
|
-
a + Rotate(b
|
285
|
+
uint64 c = Fetch64(s + len - 8) * mul;
|
286
|
+
uint64 d = Fetch64(s + len - 16) * k2;
|
287
|
+
return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
|
288
|
+
a + Rotate(b + k2, 18) + c, mul);
|
150
289
|
}
|
151
290
|
|
152
291
|
// Return a 16-byte hash for 48 bytes. Quick and dirty.
|
@@ -175,26 +314,24 @@ static pair<uint64, uint64> WeakHashLen32WithSeeds(
|
|
175
314
|
|
176
315
|
// Return an 8-byte hash for 33 to 64 bytes.
|
177
316
|
static uint64 HashLen33to64(const char *s, size_t len) {
|
178
|
-
uint64
|
179
|
-
uint64 a = Fetch64(s)
|
180
|
-
uint64 b =
|
181
|
-
uint64 c =
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
uint64
|
186
|
-
uint64
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
a
|
194
|
-
|
195
|
-
|
196
|
-
uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
|
197
|
-
return ShiftMix(r * k0 + vs) * k2;
|
317
|
+
uint64 mul = k2 + len * 2;
|
318
|
+
uint64 a = Fetch64(s) * k2;
|
319
|
+
uint64 b = Fetch64(s + 8);
|
320
|
+
uint64 c = Fetch64(s + len - 24);
|
321
|
+
uint64 d = Fetch64(s + len - 32);
|
322
|
+
uint64 e = Fetch64(s + 16) * k2;
|
323
|
+
uint64 f = Fetch64(s + 24) * 9;
|
324
|
+
uint64 g = Fetch64(s + len - 8);
|
325
|
+
uint64 h = Fetch64(s + len - 16) * mul;
|
326
|
+
uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
|
327
|
+
uint64 v = ((a + g) ^ d) + f + 1;
|
328
|
+
uint64 w = bswap_64((u + v) * mul) + h;
|
329
|
+
uint64 x = Rotate(e + f, 42) + c;
|
330
|
+
uint64 y = (bswap_64((v + w) * mul) + g) * mul;
|
331
|
+
uint64 z = e + f + c;
|
332
|
+
a = bswap_64((x + z) * mul + y) + b;
|
333
|
+
b = ShiftMix((z + a) * mul + d + h) * mul;
|
334
|
+
return b + x;
|
198
335
|
}
|
199
336
|
|
200
337
|
uint64 CityHash64(const char *s, size_t len) {
|
@@ -315,7 +452,10 @@ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
|
|
315
452
|
len -= 128;
|
316
453
|
} while (LIKELY(len >= 128));
|
317
454
|
x += Rotate(v.first + z, 49) * k0;
|
318
|
-
|
455
|
+
y = y * k0 + Rotate(w.second, 37);
|
456
|
+
z = z * k0 + Rotate(w.first, 27);
|
457
|
+
w.first *= 9;
|
458
|
+
v.first *= k0;
|
319
459
|
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
|
320
460
|
for (size_t tail_done = 0; tail_done < len; ) {
|
321
461
|
tail_done += 32;
|
@@ -325,6 +465,7 @@ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
|
|
325
465
|
z += w.second + Fetch64(s + len - tail_done);
|
326
466
|
w.second += v.first;
|
327
467
|
v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
|
468
|
+
v.first *= k0;
|
328
469
|
}
|
329
470
|
// At this point our 56 bytes of state should contain more than
|
330
471
|
// enough information for a strong 128-bit hash. We use two
|
@@ -336,19 +477,10 @@ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
|
|
336
477
|
}
|
337
478
|
|
338
479
|
uint128 CityHash128(const char *s, size_t len) {
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
Fetch64(s + 8)));
|
344
|
-
} else if (len >= 8) {
|
345
|
-
return CityHash128WithSeed(NULL,
|
346
|
-
0,
|
347
|
-
uint128(Fetch64(s) ^ (len * k0),
|
348
|
-
Fetch64(s + len - 8) ^ k1));
|
349
|
-
} else {
|
350
|
-
return CityHash128WithSeed(s, len, uint128(k0, k1));
|
351
|
-
}
|
480
|
+
return len >= 16 ?
|
481
|
+
CityHash128WithSeed(s + 16, len - 16,
|
482
|
+
uint128(Fetch64(s), Fetch64(s + 8) + k0)) :
|
483
|
+
CityHash128WithSeed(s, len, uint128(k0, k1));
|
352
484
|
}
|
353
485
|
|
354
486
|
#ifdef __SSE4_2__
|
@@ -363,60 +495,79 @@ static void CityHashCrc256Long(const char *s, size_t len,
|
|
363
495
|
uint64 c = result[0] = HashLen16(b, len);
|
364
496
|
uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
|
365
497
|
uint64 e = Fetch64(s + 184) + seed;
|
366
|
-
uint64 f =
|
498
|
+
uint64 f = 0;
|
367
499
|
uint64 g = 0;
|
368
|
-
uint64 h =
|
369
|
-
uint64
|
370
|
-
uint64
|
371
|
-
uint64
|
500
|
+
uint64 h = c + d;
|
501
|
+
uint64 x = seed;
|
502
|
+
uint64 y = 0;
|
503
|
+
uint64 z = 0;
|
372
504
|
|
373
505
|
// 240 bytes of input per iter.
|
374
506
|
size_t iters = len / 240;
|
375
507
|
len -= iters * 240;
|
376
508
|
do {
|
377
|
-
#
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
509
|
+
#undef CHUNK
|
510
|
+
#define CHUNK(r) \
|
511
|
+
PERMUTE3(x, z, y); \
|
512
|
+
b += Fetch64(s); \
|
513
|
+
c += Fetch64(s + 8); \
|
514
|
+
d += Fetch64(s + 16); \
|
515
|
+
e += Fetch64(s + 24); \
|
516
|
+
f += Fetch64(s + 32); \
|
517
|
+
a += b; \
|
518
|
+
h += f; \
|
519
|
+
b += c; \
|
520
|
+
f += d; \
|
521
|
+
g += e; \
|
522
|
+
e += z; \
|
523
|
+
g += x; \
|
524
|
+
z = _mm_crc32_u64(z, b + g); \
|
525
|
+
y = _mm_crc32_u64(y, e + h); \
|
526
|
+
x = _mm_crc32_u64(x, f + a); \
|
527
|
+
e = Rotate(e, r); \
|
528
|
+
c += e; \
|
392
529
|
s += 40
|
393
530
|
|
394
|
-
CHUNK(
|
395
|
-
CHUNK(
|
396
|
-
CHUNK(
|
531
|
+
CHUNK(0); PERMUTE3(a, h, c);
|
532
|
+
CHUNK(33); PERMUTE3(a, h, f);
|
533
|
+
CHUNK(0); PERMUTE3(b, h, f);
|
534
|
+
CHUNK(42); PERMUTE3(b, h, d);
|
535
|
+
CHUNK(0); PERMUTE3(b, h, e);
|
536
|
+
CHUNK(33); PERMUTE3(a, h, e);
|
397
537
|
} while (--iters > 0);
|
398
538
|
|
399
539
|
while (len >= 40) {
|
400
|
-
CHUNK(
|
540
|
+
CHUNK(29);
|
541
|
+
e ^= Rotate(a, 20);
|
542
|
+
h += Rotate(b, 30);
|
543
|
+
g ^= Rotate(c, 40);
|
544
|
+
f += Rotate(d, 34);
|
545
|
+
PERMUTE3(c, h, g);
|
401
546
|
len -= 40;
|
402
547
|
}
|
403
548
|
if (len > 0) {
|
404
549
|
s = s + len - 40;
|
405
|
-
CHUNK(
|
550
|
+
CHUNK(33);
|
551
|
+
e ^= Rotate(a, 43);
|
552
|
+
h += Rotate(b, 42);
|
553
|
+
g ^= Rotate(c, 41);
|
554
|
+
f += Rotate(d, 40);
|
406
555
|
}
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
556
|
+
result[0] ^= h;
|
557
|
+
result[1] ^= g;
|
558
|
+
g += h;
|
559
|
+
a = HashLen16(a, g + z);
|
560
|
+
x += y << 32;
|
561
|
+
b += x;
|
562
|
+
c = HashLen16(c, z) + h;
|
412
563
|
d = HashLen16(d, e + result[0]);
|
413
|
-
|
414
|
-
|
415
|
-
e = HashLen16(a, d) +
|
416
|
-
|
417
|
-
|
418
|
-
result[0] = e +
|
419
|
-
a = ShiftMix((a +
|
564
|
+
g += e;
|
565
|
+
h += HashLen16(x, f);
|
566
|
+
e = HashLen16(a, d) + g;
|
567
|
+
z = HashLen16(b, c) + a;
|
568
|
+
y = HashLen16(g, h) + c;
|
569
|
+
result[0] = e + z + y + x;
|
570
|
+
a = ShiftMix((a + y) * k0) * k0 + b;
|
420
571
|
result[1] += a + result[0];
|
421
572
|
a = ShiftMix(a * k0) * k0 + c;
|
422
573
|
result[2] = a + result[1];
|
data/ext/cityhash/city.h
CHANGED
@@ -20,21 +20,40 @@
|
|
20
20
|
//
|
21
21
|
// CityHash, by Geoff Pike and Jyrki Alakuijala
|
22
22
|
//
|
23
|
-
//
|
24
|
-
// hardware in 2011, CityHash64() is faster than other high-quality
|
25
|
-
// hash functions, such as Murmur. This is largely due to higher
|
26
|
-
// instruction-level parallelism. CityHash64() and CityHash128() also perform
|
27
|
-
// well on hash-quality tests.
|
23
|
+
// http://code.google.com/p/cityhash/
|
28
24
|
//
|
29
|
-
//
|
30
|
-
//
|
31
|
-
//
|
25
|
+
// This file provides a few functions for hashing strings. All of them are
|
26
|
+
// high-quality functions in the sense that they pass standard tests such
|
27
|
+
// as Austin Appleby's SMHasher. They are also fast.
|
28
|
+
//
|
29
|
+
// For 64-bit x86 code, on short strings, we don't know of anything faster than
|
30
|
+
// CityHash64 that is of comparable quality. We believe our nearest competitor
|
31
|
+
// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
|
32
|
+
// tables and most other hashing (excluding cryptography).
|
33
|
+
//
|
34
|
+
// For 64-bit x86 code, on long strings, the picture is more complicated.
|
35
|
+
// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
|
36
|
+
// CityHashCrc128 appears to be faster than all competitors of comparable
|
37
|
+
// quality. CityHash128 is also good but not quite as fast. We believe our
|
38
|
+
// nearest competitor is Bob Jenkins' Spooky. We don't have great data for
|
39
|
+
// other 64-bit CPUs, but for long strings we know that Spooky is slightly
|
40
|
+
// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
|
41
|
+
// Note that CityHashCrc128 is declared in citycrc.h.
|
42
|
+
//
|
43
|
+
// For 32-bit x86 code, we don't know of anything faster than CityHash32 that
|
44
|
+
// is of comparable quality. We believe our nearest competitor is Murmur3A.
|
45
|
+
// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
|
32
46
|
//
|
33
47
|
// Functions in the CityHash family are not suitable for cryptography.
|
34
48
|
//
|
35
|
-
//
|
49
|
+
// Please see CityHash's README file for more details on our performance
|
50
|
+
// measurements and so on.
|
51
|
+
//
|
52
|
+
// WARNING: This code has been only lightly tested on big-endian platforms!
|
36
53
|
// It is known to work well on little-endian platforms that have a small penalty
|
37
54
|
// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
|
55
|
+
// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
|
56
|
+
// bug reports are welcome.
|
38
57
|
//
|
39
58
|
// By the way, for some hash functions, given strings a and b, the hash
|
40
59
|
// of a+b is easily derived from the hashes of a and b. This property
|
@@ -56,23 +75,26 @@ inline uint64 Uint128Low64(const uint128& x) { return x.first; }
|
|
56
75
|
inline uint64 Uint128High64(const uint128& x) { return x.second; }
|
57
76
|
|
58
77
|
// Hash function for a byte array.
|
59
|
-
|
78
|
+
uint64 CityHash64(const char *buf, size_t len);
|
60
79
|
|
61
80
|
// Hash function for a byte array. For convenience, a 64-bit seed is also
|
62
81
|
// hashed into the result.
|
63
|
-
|
82
|
+
uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
|
64
83
|
|
65
84
|
// Hash function for a byte array. For convenience, two seeds are also
|
66
85
|
// hashed into the result.
|
67
|
-
|
86
|
+
uint64 CityHash64WithSeeds(const char *buf, size_t len,
|
68
87
|
uint64 seed0, uint64 seed1);
|
69
88
|
|
70
89
|
// Hash function for a byte array.
|
71
|
-
|
90
|
+
uint128 CityHash128(const char *s, size_t len);
|
72
91
|
|
73
92
|
// Hash function for a byte array. For convenience, a 128-bit seed is also
|
74
93
|
// hashed into the result.
|
75
|
-
|
94
|
+
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
|
95
|
+
|
96
|
+
// Hash function for a byte array. Most useful in 32-bit binaries.
|
97
|
+
uint32 CityHash32(const char *buf, size_t len);
|
76
98
|
|
77
99
|
// Hash 128 input bits down to 64 bits of output.
|
78
100
|
// This is intended to be a reasonably good hash function.
|
data/ext/cityhash/cityhash.cc
CHANGED
@@ -5,6 +5,11 @@
|
|
5
5
|
// calling rb_define_method()
|
6
6
|
typedef VALUE (ruby_method)(...);
|
7
7
|
|
8
|
+
extern "C" VALUE cityhash_hash32(VALUE mod, VALUE input)
|
9
|
+
{
|
10
|
+
return ULL2NUM(CityHash32(StringValuePtr(input), RSTRING_LEN(input)));
|
11
|
+
}
|
12
|
+
|
8
13
|
extern "C" VALUE cityhash_hash64(VALUE mod, VALUE input)
|
9
14
|
{
|
10
15
|
return ULL2NUM(CityHash64(StringValuePtr(input), RSTRING_LEN(input)));
|
@@ -38,6 +43,8 @@ extern "C" void Init_cityhash()
|
|
38
43
|
VALUE mCityHash = rb_define_module("CityHash");
|
39
44
|
VALUE mInternal = rb_define_module_under(mCityHash, "Internal");
|
40
45
|
|
46
|
+
rb_define_singleton_method(mInternal, "hash32", (ruby_method*) &cityhash_hash32, 1);
|
47
|
+
|
41
48
|
rb_define_singleton_method(mInternal, "hash64", (ruby_method*) &cityhash_hash64, 1);
|
42
49
|
rb_define_singleton_method(mInternal, "hash64_with_seed", (ruby_method*) &cityhash_hash64_with_seed, 2);
|
43
50
|
rb_define_singleton_method(mInternal, "hash64_with_seeds", (ruby_method*) &cityhash_hash64_with_seeds, 3);
|
data/lib/cityhash.rb
CHANGED
@@ -5,6 +5,10 @@ module CityHash
|
|
5
5
|
LOW64_MASK = 0x0000000000000000ffffffffffffffff
|
6
6
|
HIGH64_MASK = 0xffffffffffffffff0000000000000000
|
7
7
|
|
8
|
+
def self.hash32(input)
|
9
|
+
Internal.hash32(input)
|
10
|
+
end
|
11
|
+
|
8
12
|
def self.hash64(input, seed1=nil, seed2=nil)
|
9
13
|
return Internal.hash64(input) if seed1.nil?
|
10
14
|
return Internal.hash64_with_seed(input, seed1.to_i) if seed2.nil?
|
data/lib/cityhash/version.rb
CHANGED
data/test/cityhash_test.rb
CHANGED
@@ -1,24 +1,28 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
|
3
3
|
describe CityHash do
|
4
|
+
it 'returns 32bit hash' do
|
5
|
+
assert_equal 1633095781, CityHash.hash32("test")
|
6
|
+
end
|
7
|
+
|
4
8
|
it 'returns 64bit hash' do
|
5
|
-
assert_equal
|
9
|
+
assert_equal 8581389452482819506, CityHash.hash64("test")
|
6
10
|
end
|
7
11
|
|
8
12
|
it "returns 64bit hash with a seed" do
|
9
|
-
assert_equal
|
13
|
+
assert_equal 9154302171269876511, CityHash.hash64("test", 12345)
|
10
14
|
end
|
11
15
|
|
12
16
|
it "returns 64bit hash with seeds" do
|
13
|
-
assert_equal
|
17
|
+
assert_equal 4854399283587686019, CityHash.hash64("test", 12345, 54321)
|
14
18
|
end
|
15
19
|
|
16
20
|
it "returns 128bit hash" do
|
17
|
-
assert_equal
|
21
|
+
assert_equal 124124989950401219618153994964897029896, CityHash.hash128("test")
|
18
22
|
end
|
19
23
|
|
20
24
|
it "returns 128bit hash with seed" do
|
21
25
|
seed = (123 << 64) | 123
|
22
|
-
assert_equal
|
26
|
+
assert_equal 1834994000056895780313918994795281207519, CityHash.hash128("test", seed)
|
23
27
|
end
|
24
28
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cityhash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-10-25 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: ruby bindings for google's cityhash
|
15
15
|
email:
|
@@ -21,6 +21,7 @@ extra_rdoc_files: []
|
|
21
21
|
files:
|
22
22
|
- .gitignore
|
23
23
|
- .travis.yml
|
24
|
+
- CHANGELOG.md
|
24
25
|
- Gemfile
|
25
26
|
- LICENSE.txt
|
26
27
|
- README.md
|
@@ -48,7 +49,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
48
49
|
version: '0'
|
49
50
|
segments:
|
50
51
|
- 0
|
51
|
-
hash:
|
52
|
+
hash: 4171946996269433700
|
52
53
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
54
|
none: false
|
54
55
|
requirements:
|
@@ -57,11 +58,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
57
58
|
version: '0'
|
58
59
|
segments:
|
59
60
|
- 0
|
60
|
-
hash:
|
61
|
+
hash: 4171946996269433700
|
61
62
|
requirements: []
|
62
63
|
rubyforge_project: cityhash
|
63
|
-
rubygems_version: 1.8.
|
64
|
+
rubygems_version: 1.8.24
|
64
65
|
signing_key:
|
65
66
|
specification_version: 3
|
66
67
|
summary: ruby bindings for google's cityhash
|
67
|
-
test_files:
|
68
|
+
test_files:
|
69
|
+
- test/cityhash_test.rb
|
70
|
+
- test/test_helper.rb
|