cityhash 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +1 -0
- data/CHANGELOG.md +5 -0
- data/README.md +6 -5
- data/ext/cityhash/city.cc +249 -98
- data/ext/cityhash/city.h +36 -14
- data/ext/cityhash/cityhash.cc +7 -0
- data/lib/cityhash.rb +4 -0
- data/lib/cityhash/version.rb +1 -1
- data/test/cityhash_test.rb +9 -5
- metadata +9 -6
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
|
@@ -15,11 +15,12 @@ text = "test"
|
|
|
15
15
|
seed1 = 12345
|
|
16
16
|
seed2 = 54321
|
|
17
17
|
|
|
18
|
-
CityHash.
|
|
19
|
-
CityHash.hash64(text
|
|
20
|
-
CityHash.hash64(text, seed1
|
|
21
|
-
CityHash.
|
|
22
|
-
CityHash.hash128(text
|
|
18
|
+
CityHash.hash32(text) # => 1633095781
|
|
19
|
+
CityHash.hash64(text) # => 8581389452482819506
|
|
20
|
+
CityHash.hash64(text, seed1) # => 9154302171269876511
|
|
21
|
+
CityHash.hash64(text, seed1, seed2) # => 4854399283587686019
|
|
22
|
+
CityHash.hash128(text) # => 124124989950401219618153994964897029896
|
|
23
|
+
CityHash.hash128(text, seed1) # => 101668641288246442316643001405184598611
|
|
23
24
|
```
|
|
24
25
|
|
|
25
26
|
### Contributing to cityhash
|
data/ext/cityhash/city.cc
CHANGED
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
// possible hash functions, by using SIMD instructions, or by
|
|
28
28
|
// compromising on hash quality.
|
|
29
29
|
|
|
30
|
-
#include
|
|
30
|
+
#include <city.h>
|
|
31
31
|
|
|
32
32
|
#include <algorithm>
|
|
33
33
|
#include <string.h> // for memcpy and memset
|
|
@@ -46,32 +46,32 @@ static uint32 UNALIGNED_LOAD32(const char *p) {
|
|
|
46
46
|
return result;
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
#if !defined(WORDS_BIGENDIAN)
|
|
50
|
-
|
|
51
|
-
#define uint32_in_expected_order(x) (x)
|
|
52
|
-
#define uint64_in_expected_order(x) (x)
|
|
53
|
-
|
|
54
|
-
#else
|
|
55
|
-
|
|
56
49
|
#ifdef _MSC_VER
|
|
50
|
+
|
|
57
51
|
#include <stdlib.h>
|
|
58
52
|
#define bswap_32(x) _byteswap_ulong(x)
|
|
59
53
|
#define bswap_64(x) _byteswap_uint64(x)
|
|
60
54
|
|
|
61
55
|
#elif defined(__APPLE__)
|
|
56
|
+
|
|
62
57
|
// Mac OS X / Darwin features
|
|
63
58
|
#include <libkern/OSByteOrder.h>
|
|
64
59
|
#define bswap_32(x) OSSwapInt32(x)
|
|
65
60
|
#define bswap_64(x) OSSwapInt64(x)
|
|
66
61
|
|
|
67
62
|
#else
|
|
63
|
+
|
|
68
64
|
#include <byteswap.h>
|
|
65
|
+
|
|
69
66
|
#endif
|
|
70
67
|
|
|
68
|
+
#ifdef WORDS_BIGENDIAN
|
|
71
69
|
#define uint32_in_expected_order(x) (bswap_32(x))
|
|
72
70
|
#define uint64_in_expected_order(x) (bswap_64(x))
|
|
73
|
-
|
|
74
|
-
#
|
|
71
|
+
#else
|
|
72
|
+
#define uint32_in_expected_order(x) (x)
|
|
73
|
+
#define uint64_in_expected_order(x) (x)
|
|
74
|
+
#endif
|
|
75
75
|
|
|
76
76
|
#if !defined(LIKELY)
|
|
77
77
|
#if HAVE_BUILTIN_EXPECT
|
|
@@ -93,7 +93,138 @@ static uint32 Fetch32(const char *p) {
|
|
|
93
93
|
static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
|
|
94
94
|
static const uint64 k1 = 0xb492b66fbe98f273ULL;
|
|
95
95
|
static const uint64 k2 = 0x9ae16a3b2f90404fULL;
|
|
96
|
-
|
|
96
|
+
|
|
97
|
+
// Magic numbers for 32-bit hashing. Copied from Murmur3.
|
|
98
|
+
static const uint32_t c1 = 0xcc9e2d51;
|
|
99
|
+
static const uint32_t c2 = 0x1b873593;
|
|
100
|
+
|
|
101
|
+
// A 32-bit to 32-bit integer hash copied from Murmur3.
|
|
102
|
+
static uint32 fmix(uint32 h)
|
|
103
|
+
{
|
|
104
|
+
h ^= h >> 16;
|
|
105
|
+
h *= 0x85ebca6b;
|
|
106
|
+
h ^= h >> 13;
|
|
107
|
+
h *= 0xc2b2ae35;
|
|
108
|
+
h ^= h >> 16;
|
|
109
|
+
return h;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
static uint32 Rotate32(uint32 val, int shift) {
|
|
113
|
+
// Avoid shifting by 32: doing so yields an undefined result.
|
|
114
|
+
return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
#undef PERMUTE3
|
|
118
|
+
#define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0)
|
|
119
|
+
|
|
120
|
+
static uint32 Mur(uint32 a, uint32 h) {
|
|
121
|
+
// Helper from Murmur3 for combining two 32-bit values.
|
|
122
|
+
a *= c1;
|
|
123
|
+
a = Rotate32(a, 17);
|
|
124
|
+
a *= c2;
|
|
125
|
+
h ^= a;
|
|
126
|
+
h = Rotate32(h, 19);
|
|
127
|
+
return h * 5 + 0xe6546b64;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
static uint32 Hash32Len13to24(const char *s, size_t len) {
|
|
131
|
+
uint32 a = Fetch32(s - 4 + (len >> 1));
|
|
132
|
+
uint32 b = Fetch32(s + 4);
|
|
133
|
+
uint32 c = Fetch32(s + len - 8);
|
|
134
|
+
uint32 d = Fetch32(s + (len >> 1));
|
|
135
|
+
uint32 e = Fetch32(s);
|
|
136
|
+
uint32 f = Fetch32(s + len - 4);
|
|
137
|
+
uint32 h = len;
|
|
138
|
+
|
|
139
|
+
return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
static uint32 Hash32Len0to4(const char *s, size_t len) {
|
|
143
|
+
uint32 b = 0;
|
|
144
|
+
uint32 c = 9;
|
|
145
|
+
for (int i = 0; i < len; i++) {
|
|
146
|
+
b = b * c1 + s[i];
|
|
147
|
+
c ^= b;
|
|
148
|
+
}
|
|
149
|
+
return fmix(Mur(b, Mur(len, c)));
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
static uint32 Hash32Len5to12(const char *s, size_t len) {
|
|
153
|
+
uint32 a = len, b = len * 5, c = 9, d = b;
|
|
154
|
+
a += Fetch32(s);
|
|
155
|
+
b += Fetch32(s + len - 4);
|
|
156
|
+
c += Fetch32(s + ((len >> 1) & 4));
|
|
157
|
+
return fmix(Mur(c, Mur(b, Mur(a, d))));
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
uint32 CityHash32(const char *s, size_t len) {
|
|
161
|
+
if (len <= 24) {
|
|
162
|
+
return len <= 12 ?
|
|
163
|
+
(len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
|
|
164
|
+
Hash32Len13to24(s, len);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// len > 24
|
|
168
|
+
uint32 h = len, g = c1 * len, f = g;
|
|
169
|
+
uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
|
|
170
|
+
uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
|
|
171
|
+
uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
|
|
172
|
+
uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
|
|
173
|
+
uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
|
|
174
|
+
h ^= a0;
|
|
175
|
+
h = Rotate32(h, 19);
|
|
176
|
+
h = h * 5 + 0xe6546b64;
|
|
177
|
+
h ^= a2;
|
|
178
|
+
h = Rotate32(h, 19);
|
|
179
|
+
h = h * 5 + 0xe6546b64;
|
|
180
|
+
g ^= a1;
|
|
181
|
+
g = Rotate32(g, 19);
|
|
182
|
+
g = g * 5 + 0xe6546b64;
|
|
183
|
+
g ^= a3;
|
|
184
|
+
g = Rotate32(g, 19);
|
|
185
|
+
g = g * 5 + 0xe6546b64;
|
|
186
|
+
f += a4;
|
|
187
|
+
f = Rotate32(f, 19);
|
|
188
|
+
f = f * 5 + 0xe6546b64;
|
|
189
|
+
size_t iters = (len - 1) / 20;
|
|
190
|
+
do {
|
|
191
|
+
uint32 a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
|
|
192
|
+
uint32 a1 = Fetch32(s + 4);
|
|
193
|
+
uint32 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
|
|
194
|
+
uint32 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
|
|
195
|
+
uint32 a4 = Fetch32(s + 16);
|
|
196
|
+
h ^= a0;
|
|
197
|
+
h = Rotate32(h, 18);
|
|
198
|
+
h = h * 5 + 0xe6546b64;
|
|
199
|
+
f += a1;
|
|
200
|
+
f = Rotate32(f, 19);
|
|
201
|
+
f = f * c1;
|
|
202
|
+
g += a2;
|
|
203
|
+
g = Rotate32(g, 18);
|
|
204
|
+
g = g * 5 + 0xe6546b64;
|
|
205
|
+
h ^= a3 + a1;
|
|
206
|
+
h = Rotate32(h, 19);
|
|
207
|
+
h = h * 5 + 0xe6546b64;
|
|
208
|
+
g ^= a4;
|
|
209
|
+
g = bswap_32(g) * 5;
|
|
210
|
+
h += a4 * 5;
|
|
211
|
+
h = bswap_32(h);
|
|
212
|
+
f += a0;
|
|
213
|
+
PERMUTE3(f, h, g);
|
|
214
|
+
s += 20;
|
|
215
|
+
} while (--iters != 0);
|
|
216
|
+
g = Rotate32(g, 11) * c1;
|
|
217
|
+
g = Rotate32(g, 17) * c1;
|
|
218
|
+
f = Rotate32(f, 11) * c1;
|
|
219
|
+
f = Rotate32(f, 17) * c1;
|
|
220
|
+
h = Rotate32(h + g, 19);
|
|
221
|
+
h = h * 5 + 0xe6546b64;
|
|
222
|
+
h = Rotate32(h, 17) * c1;
|
|
223
|
+
h = Rotate32(h + f, 19);
|
|
224
|
+
h = h * 5 + 0xe6546b64;
|
|
225
|
+
h = Rotate32(h, 17) * c1;
|
|
226
|
+
return h;
|
|
227
|
+
}
|
|
97
228
|
|
|
98
229
|
// Bitwise right rotate. Normally this will compile to a single
|
|
99
230
|
// instruction, especially if the shift is a manifest constant.
|
|
@@ -102,13 +233,6 @@ static uint64 Rotate(uint64 val, int shift) {
|
|
|
102
233
|
return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
|
|
103
234
|
}
|
|
104
235
|
|
|
105
|
-
// Equivalent to Rotate(), but requires the second arg to be non-zero.
|
|
106
|
-
// On x86-64, and probably others, it's possible for this to compile
|
|
107
|
-
// to a single instruction if both args are already in registers.
|
|
108
|
-
static uint64 RotateByAtLeast1(uint64 val, int shift) {
|
|
109
|
-
return (val >> shift) | (val << (64 - shift));
|
|
110
|
-
}
|
|
111
|
-
|
|
112
236
|
static uint64 ShiftMix(uint64 val) {
|
|
113
237
|
return val ^ (val >> 47);
|
|
114
238
|
}
|
|
@@ -117,15 +241,29 @@ static uint64 HashLen16(uint64 u, uint64 v) {
|
|
|
117
241
|
return Hash128to64(uint128(u, v));
|
|
118
242
|
}
|
|
119
243
|
|
|
244
|
+
static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
|
|
245
|
+
// Murmur-inspired hashing.
|
|
246
|
+
uint64 a = (u ^ v) * mul;
|
|
247
|
+
a ^= (a >> 47);
|
|
248
|
+
uint64 b = (v ^ a) * mul;
|
|
249
|
+
b ^= (b >> 47);
|
|
250
|
+
b *= mul;
|
|
251
|
+
return b;
|
|
252
|
+
}
|
|
253
|
+
|
|
120
254
|
static uint64 HashLen0to16(const char *s, size_t len) {
|
|
121
|
-
if (len
|
|
122
|
-
uint64
|
|
255
|
+
if (len >= 8) {
|
|
256
|
+
uint64 mul = k2 + len * 2;
|
|
257
|
+
uint64 a = Fetch64(s) + k2;
|
|
123
258
|
uint64 b = Fetch64(s + len - 8);
|
|
124
|
-
|
|
259
|
+
uint64 c = Rotate(b, 37) * mul + a;
|
|
260
|
+
uint64 d = (Rotate(a, 25) + b) * mul;
|
|
261
|
+
return HashLen16(c, d, mul);
|
|
125
262
|
}
|
|
126
263
|
if (len >= 4) {
|
|
264
|
+
uint64 mul = k2 + len * 2;
|
|
127
265
|
uint64 a = Fetch32(s);
|
|
128
|
-
return HashLen16(len + (a << 3), Fetch32(s + len - 4));
|
|
266
|
+
return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
|
|
129
267
|
}
|
|
130
268
|
if (len > 0) {
|
|
131
269
|
uint8 a = s[0];
|
|
@@ -133,7 +271,7 @@ static uint64 HashLen0to16(const char *s, size_t len) {
|
|
|
133
271
|
uint8 c = s[len - 1];
|
|
134
272
|
uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
|
|
135
273
|
uint32 z = len + (static_cast<uint32>(c) << 2);
|
|
136
|
-
return ShiftMix(y * k2 ^ z *
|
|
274
|
+
return ShiftMix(y * k2 ^ z * k0) * k2;
|
|
137
275
|
}
|
|
138
276
|
return k2;
|
|
139
277
|
}
|
|
@@ -141,12 +279,13 @@ static uint64 HashLen0to16(const char *s, size_t len) {
|
|
|
141
279
|
// This probably works well for 16-byte strings as well, but it may be overkill
|
|
142
280
|
// in that case.
|
|
143
281
|
static uint64 HashLen17to32(const char *s, size_t len) {
|
|
282
|
+
uint64 mul = k2 + len * 2;
|
|
144
283
|
uint64 a = Fetch64(s) * k1;
|
|
145
284
|
uint64 b = Fetch64(s + 8);
|
|
146
|
-
uint64 c = Fetch64(s + len - 8) *
|
|
147
|
-
uint64 d = Fetch64(s + len - 16) *
|
|
148
|
-
return HashLen16(Rotate(a
|
|
149
|
-
a + Rotate(b
|
|
285
|
+
uint64 c = Fetch64(s + len - 8) * mul;
|
|
286
|
+
uint64 d = Fetch64(s + len - 16) * k2;
|
|
287
|
+
return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
|
|
288
|
+
a + Rotate(b + k2, 18) + c, mul);
|
|
150
289
|
}
|
|
151
290
|
|
|
152
291
|
// Return a 16-byte hash for 48 bytes. Quick and dirty.
|
|
@@ -175,26 +314,24 @@ static pair<uint64, uint64> WeakHashLen32WithSeeds(
|
|
|
175
314
|
|
|
176
315
|
// Return an 8-byte hash for 33 to 64 bytes.
|
|
177
316
|
static uint64 HashLen33to64(const char *s, size_t len) {
|
|
178
|
-
uint64
|
|
179
|
-
uint64 a = Fetch64(s)
|
|
180
|
-
uint64 b =
|
|
181
|
-
uint64 c =
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
uint64
|
|
186
|
-
uint64
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
a
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
|
|
197
|
-
return ShiftMix(r * k0 + vs) * k2;
|
|
317
|
+
uint64 mul = k2 + len * 2;
|
|
318
|
+
uint64 a = Fetch64(s) * k2;
|
|
319
|
+
uint64 b = Fetch64(s + 8);
|
|
320
|
+
uint64 c = Fetch64(s + len - 24);
|
|
321
|
+
uint64 d = Fetch64(s + len - 32);
|
|
322
|
+
uint64 e = Fetch64(s + 16) * k2;
|
|
323
|
+
uint64 f = Fetch64(s + 24) * 9;
|
|
324
|
+
uint64 g = Fetch64(s + len - 8);
|
|
325
|
+
uint64 h = Fetch64(s + len - 16) * mul;
|
|
326
|
+
uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
|
|
327
|
+
uint64 v = ((a + g) ^ d) + f + 1;
|
|
328
|
+
uint64 w = bswap_64((u + v) * mul) + h;
|
|
329
|
+
uint64 x = Rotate(e + f, 42) + c;
|
|
330
|
+
uint64 y = (bswap_64((v + w) * mul) + g) * mul;
|
|
331
|
+
uint64 z = e + f + c;
|
|
332
|
+
a = bswap_64((x + z) * mul + y) + b;
|
|
333
|
+
b = ShiftMix((z + a) * mul + d + h) * mul;
|
|
334
|
+
return b + x;
|
|
198
335
|
}
|
|
199
336
|
|
|
200
337
|
uint64 CityHash64(const char *s, size_t len) {
|
|
@@ -315,7 +452,10 @@ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
|
|
|
315
452
|
len -= 128;
|
|
316
453
|
} while (LIKELY(len >= 128));
|
|
317
454
|
x += Rotate(v.first + z, 49) * k0;
|
|
318
|
-
|
|
455
|
+
y = y * k0 + Rotate(w.second, 37);
|
|
456
|
+
z = z * k0 + Rotate(w.first, 27);
|
|
457
|
+
w.first *= 9;
|
|
458
|
+
v.first *= k0;
|
|
319
459
|
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
|
|
320
460
|
for (size_t tail_done = 0; tail_done < len; ) {
|
|
321
461
|
tail_done += 32;
|
|
@@ -325,6 +465,7 @@ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
|
|
|
325
465
|
z += w.second + Fetch64(s + len - tail_done);
|
|
326
466
|
w.second += v.first;
|
|
327
467
|
v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
|
|
468
|
+
v.first *= k0;
|
|
328
469
|
}
|
|
329
470
|
// At this point our 56 bytes of state should contain more than
|
|
330
471
|
// enough information for a strong 128-bit hash. We use two
|
|
@@ -336,19 +477,10 @@ uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
|
|
|
336
477
|
}
|
|
337
478
|
|
|
338
479
|
uint128 CityHash128(const char *s, size_t len) {
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
Fetch64(s + 8)));
|
|
344
|
-
} else if (len >= 8) {
|
|
345
|
-
return CityHash128WithSeed(NULL,
|
|
346
|
-
0,
|
|
347
|
-
uint128(Fetch64(s) ^ (len * k0),
|
|
348
|
-
Fetch64(s + len - 8) ^ k1));
|
|
349
|
-
} else {
|
|
350
|
-
return CityHash128WithSeed(s, len, uint128(k0, k1));
|
|
351
|
-
}
|
|
480
|
+
return len >= 16 ?
|
|
481
|
+
CityHash128WithSeed(s + 16, len - 16,
|
|
482
|
+
uint128(Fetch64(s), Fetch64(s + 8) + k0)) :
|
|
483
|
+
CityHash128WithSeed(s, len, uint128(k0, k1));
|
|
352
484
|
}
|
|
353
485
|
|
|
354
486
|
#ifdef __SSE4_2__
|
|
@@ -363,60 +495,79 @@ static void CityHashCrc256Long(const char *s, size_t len,
|
|
|
363
495
|
uint64 c = result[0] = HashLen16(b, len);
|
|
364
496
|
uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
|
|
365
497
|
uint64 e = Fetch64(s + 184) + seed;
|
|
366
|
-
uint64 f =
|
|
498
|
+
uint64 f = 0;
|
|
367
499
|
uint64 g = 0;
|
|
368
|
-
uint64 h =
|
|
369
|
-
uint64
|
|
370
|
-
uint64
|
|
371
|
-
uint64
|
|
500
|
+
uint64 h = c + d;
|
|
501
|
+
uint64 x = seed;
|
|
502
|
+
uint64 y = 0;
|
|
503
|
+
uint64 z = 0;
|
|
372
504
|
|
|
373
505
|
// 240 bytes of input per iter.
|
|
374
506
|
size_t iters = len / 240;
|
|
375
507
|
len -= iters * 240;
|
|
376
508
|
do {
|
|
377
|
-
#
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
509
|
+
#undef CHUNK
|
|
510
|
+
#define CHUNK(r) \
|
|
511
|
+
PERMUTE3(x, z, y); \
|
|
512
|
+
b += Fetch64(s); \
|
|
513
|
+
c += Fetch64(s + 8); \
|
|
514
|
+
d += Fetch64(s + 16); \
|
|
515
|
+
e += Fetch64(s + 24); \
|
|
516
|
+
f += Fetch64(s + 32); \
|
|
517
|
+
a += b; \
|
|
518
|
+
h += f; \
|
|
519
|
+
b += c; \
|
|
520
|
+
f += d; \
|
|
521
|
+
g += e; \
|
|
522
|
+
e += z; \
|
|
523
|
+
g += x; \
|
|
524
|
+
z = _mm_crc32_u64(z, b + g); \
|
|
525
|
+
y = _mm_crc32_u64(y, e + h); \
|
|
526
|
+
x = _mm_crc32_u64(x, f + a); \
|
|
527
|
+
e = Rotate(e, r); \
|
|
528
|
+
c += e; \
|
|
392
529
|
s += 40
|
|
393
530
|
|
|
394
|
-
CHUNK(
|
|
395
|
-
CHUNK(
|
|
396
|
-
CHUNK(
|
|
531
|
+
CHUNK(0); PERMUTE3(a, h, c);
|
|
532
|
+
CHUNK(33); PERMUTE3(a, h, f);
|
|
533
|
+
CHUNK(0); PERMUTE3(b, h, f);
|
|
534
|
+
CHUNK(42); PERMUTE3(b, h, d);
|
|
535
|
+
CHUNK(0); PERMUTE3(b, h, e);
|
|
536
|
+
CHUNK(33); PERMUTE3(a, h, e);
|
|
397
537
|
} while (--iters > 0);
|
|
398
538
|
|
|
399
539
|
while (len >= 40) {
|
|
400
|
-
CHUNK(
|
|
540
|
+
CHUNK(29);
|
|
541
|
+
e ^= Rotate(a, 20);
|
|
542
|
+
h += Rotate(b, 30);
|
|
543
|
+
g ^= Rotate(c, 40);
|
|
544
|
+
f += Rotate(d, 34);
|
|
545
|
+
PERMUTE3(c, h, g);
|
|
401
546
|
len -= 40;
|
|
402
547
|
}
|
|
403
548
|
if (len > 0) {
|
|
404
549
|
s = s + len - 40;
|
|
405
|
-
CHUNK(
|
|
550
|
+
CHUNK(33);
|
|
551
|
+
e ^= Rotate(a, 43);
|
|
552
|
+
h += Rotate(b, 42);
|
|
553
|
+
g ^= Rotate(c, 41);
|
|
554
|
+
f += Rotate(d, 40);
|
|
406
555
|
}
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
556
|
+
result[0] ^= h;
|
|
557
|
+
result[1] ^= g;
|
|
558
|
+
g += h;
|
|
559
|
+
a = HashLen16(a, g + z);
|
|
560
|
+
x += y << 32;
|
|
561
|
+
b += x;
|
|
562
|
+
c = HashLen16(c, z) + h;
|
|
412
563
|
d = HashLen16(d, e + result[0]);
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
e = HashLen16(a, d) +
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
result[0] = e +
|
|
419
|
-
a = ShiftMix((a +
|
|
564
|
+
g += e;
|
|
565
|
+
h += HashLen16(x, f);
|
|
566
|
+
e = HashLen16(a, d) + g;
|
|
567
|
+
z = HashLen16(b, c) + a;
|
|
568
|
+
y = HashLen16(g, h) + c;
|
|
569
|
+
result[0] = e + z + y + x;
|
|
570
|
+
a = ShiftMix((a + y) * k0) * k0 + b;
|
|
420
571
|
result[1] += a + result[0];
|
|
421
572
|
a = ShiftMix(a * k0) * k0 + c;
|
|
422
573
|
result[2] = a + result[1];
|
data/ext/cityhash/city.h
CHANGED
|
@@ -20,21 +20,40 @@
|
|
|
20
20
|
//
|
|
21
21
|
// CityHash, by Geoff Pike and Jyrki Alakuijala
|
|
22
22
|
//
|
|
23
|
-
//
|
|
24
|
-
// hardware in 2011, CityHash64() is faster than other high-quality
|
|
25
|
-
// hash functions, such as Murmur. This is largely due to higher
|
|
26
|
-
// instruction-level parallelism. CityHash64() and CityHash128() also perform
|
|
27
|
-
// well on hash-quality tests.
|
|
23
|
+
// http://code.google.com/p/cityhash/
|
|
28
24
|
//
|
|
29
|
-
//
|
|
30
|
-
//
|
|
31
|
-
//
|
|
25
|
+
// This file provides a few functions for hashing strings. All of them are
|
|
26
|
+
// high-quality functions in the sense that they pass standard tests such
|
|
27
|
+
// as Austin Appleby's SMHasher. They are also fast.
|
|
28
|
+
//
|
|
29
|
+
// For 64-bit x86 code, on short strings, we don't know of anything faster than
|
|
30
|
+
// CityHash64 that is of comparable quality. We believe our nearest competitor
|
|
31
|
+
// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
|
|
32
|
+
// tables and most other hashing (excluding cryptography).
|
|
33
|
+
//
|
|
34
|
+
// For 64-bit x86 code, on long strings, the picture is more complicated.
|
|
35
|
+
// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
|
|
36
|
+
// CityHashCrc128 appears to be faster than all competitors of comparable
|
|
37
|
+
// quality. CityHash128 is also good but not quite as fast. We believe our
|
|
38
|
+
// nearest competitor is Bob Jenkins' Spooky. We don't have great data for
|
|
39
|
+
// other 64-bit CPUs, but for long strings we know that Spooky is slightly
|
|
40
|
+
// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
|
|
41
|
+
// Note that CityHashCrc128 is declared in citycrc.h.
|
|
42
|
+
//
|
|
43
|
+
// For 32-bit x86 code, we don't know of anything faster than CityHash32 that
|
|
44
|
+
// is of comparable quality. We believe our nearest competitor is Murmur3A.
|
|
45
|
+
// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
|
|
32
46
|
//
|
|
33
47
|
// Functions in the CityHash family are not suitable for cryptography.
|
|
34
48
|
//
|
|
35
|
-
//
|
|
49
|
+
// Please see CityHash's README file for more details on our performance
|
|
50
|
+
// measurements and so on.
|
|
51
|
+
//
|
|
52
|
+
// WARNING: This code has been only lightly tested on big-endian platforms!
|
|
36
53
|
// It is known to work well on little-endian platforms that have a small penalty
|
|
37
54
|
// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
|
|
55
|
+
// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
|
|
56
|
+
// bug reports are welcome.
|
|
38
57
|
//
|
|
39
58
|
// By the way, for some hash functions, given strings a and b, the hash
|
|
40
59
|
// of a+b is easily derived from the hashes of a and b. This property
|
|
@@ -56,23 +75,26 @@ inline uint64 Uint128Low64(const uint128& x) { return x.first; }
|
|
|
56
75
|
inline uint64 Uint128High64(const uint128& x) { return x.second; }
|
|
57
76
|
|
|
58
77
|
// Hash function for a byte array.
|
|
59
|
-
|
|
78
|
+
uint64 CityHash64(const char *buf, size_t len);
|
|
60
79
|
|
|
61
80
|
// Hash function for a byte array. For convenience, a 64-bit seed is also
|
|
62
81
|
// hashed into the result.
|
|
63
|
-
|
|
82
|
+
uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
|
|
64
83
|
|
|
65
84
|
// Hash function for a byte array. For convenience, two seeds are also
|
|
66
85
|
// hashed into the result.
|
|
67
|
-
|
|
86
|
+
uint64 CityHash64WithSeeds(const char *buf, size_t len,
|
|
68
87
|
uint64 seed0, uint64 seed1);
|
|
69
88
|
|
|
70
89
|
// Hash function for a byte array.
|
|
71
|
-
|
|
90
|
+
uint128 CityHash128(const char *s, size_t len);
|
|
72
91
|
|
|
73
92
|
// Hash function for a byte array. For convenience, a 128-bit seed is also
|
|
74
93
|
// hashed into the result.
|
|
75
|
-
|
|
94
|
+
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
|
|
95
|
+
|
|
96
|
+
// Hash function for a byte array. Most useful in 32-bit binaries.
|
|
97
|
+
uint32 CityHash32(const char *buf, size_t len);
|
|
76
98
|
|
|
77
99
|
// Hash 128 input bits down to 64 bits of output.
|
|
78
100
|
// This is intended to be a reasonably good hash function.
|
data/ext/cityhash/cityhash.cc
CHANGED
|
@@ -5,6 +5,11 @@
|
|
|
5
5
|
// calling rb_define_method()
|
|
6
6
|
typedef VALUE (ruby_method)(...);
|
|
7
7
|
|
|
8
|
+
extern "C" VALUE cityhash_hash32(VALUE mod, VALUE input)
|
|
9
|
+
{
|
|
10
|
+
return ULL2NUM(CityHash32(StringValuePtr(input), RSTRING_LEN(input)));
|
|
11
|
+
}
|
|
12
|
+
|
|
8
13
|
extern "C" VALUE cityhash_hash64(VALUE mod, VALUE input)
|
|
9
14
|
{
|
|
10
15
|
return ULL2NUM(CityHash64(StringValuePtr(input), RSTRING_LEN(input)));
|
|
@@ -38,6 +43,8 @@ extern "C" void Init_cityhash()
|
|
|
38
43
|
VALUE mCityHash = rb_define_module("CityHash");
|
|
39
44
|
VALUE mInternal = rb_define_module_under(mCityHash, "Internal");
|
|
40
45
|
|
|
46
|
+
rb_define_singleton_method(mInternal, "hash32", (ruby_method*) &cityhash_hash32, 1);
|
|
47
|
+
|
|
41
48
|
rb_define_singleton_method(mInternal, "hash64", (ruby_method*) &cityhash_hash64, 1);
|
|
42
49
|
rb_define_singleton_method(mInternal, "hash64_with_seed", (ruby_method*) &cityhash_hash64_with_seed, 2);
|
|
43
50
|
rb_define_singleton_method(mInternal, "hash64_with_seeds", (ruby_method*) &cityhash_hash64_with_seeds, 3);
|
data/lib/cityhash.rb
CHANGED
|
@@ -5,6 +5,10 @@ module CityHash
|
|
|
5
5
|
LOW64_MASK = 0x0000000000000000ffffffffffffffff
|
|
6
6
|
HIGH64_MASK = 0xffffffffffffffff0000000000000000
|
|
7
7
|
|
|
8
|
+
def self.hash32(input)
|
|
9
|
+
Internal.hash32(input)
|
|
10
|
+
end
|
|
11
|
+
|
|
8
12
|
def self.hash64(input, seed1=nil, seed2=nil)
|
|
9
13
|
return Internal.hash64(input) if seed1.nil?
|
|
10
14
|
return Internal.hash64_with_seed(input, seed1.to_i) if seed2.nil?
|
data/lib/cityhash/version.rb
CHANGED
data/test/cityhash_test.rb
CHANGED
|
@@ -1,24 +1,28 @@
|
|
|
1
1
|
require 'test_helper'
|
|
2
2
|
|
|
3
3
|
describe CityHash do
|
|
4
|
+
it 'returns 32bit hash' do
|
|
5
|
+
assert_equal 1633095781, CityHash.hash32("test")
|
|
6
|
+
end
|
|
7
|
+
|
|
4
8
|
it 'returns 64bit hash' do
|
|
5
|
-
assert_equal
|
|
9
|
+
assert_equal 8581389452482819506, CityHash.hash64("test")
|
|
6
10
|
end
|
|
7
11
|
|
|
8
12
|
it "returns 64bit hash with a seed" do
|
|
9
|
-
assert_equal
|
|
13
|
+
assert_equal 9154302171269876511, CityHash.hash64("test", 12345)
|
|
10
14
|
end
|
|
11
15
|
|
|
12
16
|
it "returns 64bit hash with seeds" do
|
|
13
|
-
assert_equal
|
|
17
|
+
assert_equal 4854399283587686019, CityHash.hash64("test", 12345, 54321)
|
|
14
18
|
end
|
|
15
19
|
|
|
16
20
|
it "returns 128bit hash" do
|
|
17
|
-
assert_equal
|
|
21
|
+
assert_equal 124124989950401219618153994964897029896, CityHash.hash128("test")
|
|
18
22
|
end
|
|
19
23
|
|
|
20
24
|
it "returns 128bit hash with seed" do
|
|
21
25
|
seed = (123 << 64) | 123
|
|
22
|
-
assert_equal
|
|
26
|
+
assert_equal 1834994000056895780313918994795281207519, CityHash.hash128("test", seed)
|
|
23
27
|
end
|
|
24
28
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cityhash
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.7.0
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2012-
|
|
12
|
+
date: 2012-10-25 00:00:00.000000000 Z
|
|
13
13
|
dependencies: []
|
|
14
14
|
description: ruby bindings for google's cityhash
|
|
15
15
|
email:
|
|
@@ -21,6 +21,7 @@ extra_rdoc_files: []
|
|
|
21
21
|
files:
|
|
22
22
|
- .gitignore
|
|
23
23
|
- .travis.yml
|
|
24
|
+
- CHANGELOG.md
|
|
24
25
|
- Gemfile
|
|
25
26
|
- LICENSE.txt
|
|
26
27
|
- README.md
|
|
@@ -48,7 +49,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
48
49
|
version: '0'
|
|
49
50
|
segments:
|
|
50
51
|
- 0
|
|
51
|
-
hash:
|
|
52
|
+
hash: 4171946996269433700
|
|
52
53
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
54
|
none: false
|
|
54
55
|
requirements:
|
|
@@ -57,11 +58,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
57
58
|
version: '0'
|
|
58
59
|
segments:
|
|
59
60
|
- 0
|
|
60
|
-
hash:
|
|
61
|
+
hash: 4171946996269433700
|
|
61
62
|
requirements: []
|
|
62
63
|
rubyforge_project: cityhash
|
|
63
|
-
rubygems_version: 1.8.
|
|
64
|
+
rubygems_version: 1.8.24
|
|
64
65
|
signing_key:
|
|
65
66
|
specification_version: 3
|
|
66
67
|
summary: ruby bindings for google's cityhash
|
|
67
|
-
test_files:
|
|
68
|
+
test_files:
|
|
69
|
+
- test/cityhash_test.rb
|
|
70
|
+
- test/test_helper.rb
|