city_hash 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +18 -0
- data/Rakefile +2 -0
- data/city_hash.gemspec +24 -0
- data/lib/city_hash.rb +332 -0
- data/lib/city_hash/version.rb +3 -0
- data/license.txt +20 -0
- data/readme.md +51 -0
- data/test/Makefile +10 -0
- data/test/citymain.cc +97 -0
- data/test/run.sh +18 -0
- data/test/tc_rcity.rb +96 -0
- data/test/test.zip +0 -0
- metadata +107 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
data/Rakefile
ADDED
data/city_hash.gemspec
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
|
3
|
+
require "city_hash/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |s|
|
|
6
|
+
s.name = "city_hash"
|
|
7
|
+
s.version = CityHash::VERSION
|
|
8
|
+
s.platform = Gem::Platform::RUBY
|
|
9
|
+
s.authors = ["Ashwin Ramaswamy"]
|
|
10
|
+
s.email = ["ashwin.raman9@gmail.com"]
|
|
11
|
+
s.homepage = ""
|
|
12
|
+
s.summary = %q{CityHash for Ruby}
|
|
13
|
+
s.description = %q{Google's CityHash Implementation in Ruby}
|
|
14
|
+
|
|
15
|
+
s.rubyforge_project = "city_hash"
|
|
16
|
+
|
|
17
|
+
s.files = `git ls-files`.split("\n")
|
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
|
20
|
+
s.require_paths = ["lib"]
|
|
21
|
+
|
|
22
|
+
s.add_development_dependency('test-unit')
|
|
23
|
+
s.add_development_dependency('rubyzip')
|
|
24
|
+
end
|
data/lib/city_hash.rb
ADDED
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
# All source comments are duplicated from Google's CityHash (1.0.2)
|
|
2
|
+
# implementation at: http://code.google.com/p/cityhash/
|
|
3
|
+
|
|
4
|
+
module CityHash
|
|
5
|
+
|
|
6
|
+
def self.hash64(s, seed0 = nil, seed1 = nil)
|
|
7
|
+
return CityHash::Internal.hash64(s) if seed0.nil?
|
|
8
|
+
return CityHash::Internal.hash64WithSeed(s, seed0) if seed1.nil?
|
|
9
|
+
return CityHash::Internal.hash64WithSeeds(s, seed0, seed1)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def self.hash128(s, seed = nil)
|
|
13
|
+
return CityHash::Internal.hash128(s) if seed.nil?
|
|
14
|
+
return CityHash::Internal.hash128WithSeed(s, seed)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
module Internal
|
|
18
|
+
|
|
19
|
+
# Some primes between 2^63 and 2^64 for various uses
|
|
20
|
+
K0 = 0xc3a5c85c97cb3127
|
|
21
|
+
K1 = 0xb492b66fbe98f273
|
|
22
|
+
K2 = 0x9ae16a3b2f90404f
|
|
23
|
+
K3 = 0xc949d7c7509e6557
|
|
24
|
+
|
|
25
|
+
def self.lower32(x)
|
|
26
|
+
x & 0xffffffff
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def self.lower64(x)
|
|
30
|
+
x & 0xffffffffffffffff
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def self.higher64(x)
|
|
34
|
+
x >> 64
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Return the hex-equivalent of byte-string
|
|
38
|
+
def self.bytes(s)
|
|
39
|
+
h = 0x0
|
|
40
|
+
s.reverse.bytes do |b|
|
|
41
|
+
h <<= 8
|
|
42
|
+
h |= b
|
|
43
|
+
end
|
|
44
|
+
h
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Hash 128 input bits down to 64 bits of output.
|
|
48
|
+
# This is intended to be a reasonably good hash function.
|
|
49
|
+
def self.hash128To64(x)
|
|
50
|
+
# Murmur-inspired hashing.
|
|
51
|
+
kMul = 0x9ddfea08eb382d69
|
|
52
|
+
a = lower64((lower64(x) ^ higher64(x)) * kMul)
|
|
53
|
+
a ^= (a >> 47)
|
|
54
|
+
b = lower64((higher64(x) ^ a) * kMul)
|
|
55
|
+
b ^= (b >> 47)
|
|
56
|
+
b = b * kMul
|
|
57
|
+
lower64(b)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Bitwise right rotate
|
|
61
|
+
def self.rotate(val, shift)
|
|
62
|
+
return val if shift == 0
|
|
63
|
+
(val >> shift) | lower64((val << (64-shift)))
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Equivalent to rotate(...), but requires the second arg to be non-zero.
|
|
67
|
+
def self.rotateByAtleast1(val, shift)
|
|
68
|
+
(val >> shift) | lower64((val << (64-shift)))
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def self.shiftMix(val)
|
|
72
|
+
lower64(val ^ (val >> 47))
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def self.hashLen16(u, v)
|
|
76
|
+
uv = (v << 64) | u
|
|
77
|
+
hash128To64(uv)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def self.hashLen0To16(s)
|
|
81
|
+
len = s.length
|
|
82
|
+
if len > 8
|
|
83
|
+
a = bytes(s[0..7])
|
|
84
|
+
b = bytes(s[-8..-1])
|
|
85
|
+
return hashLen16(a, rotateByAtleast1(b+len, len)) ^ b
|
|
86
|
+
elsif len >= 4
|
|
87
|
+
a = bytes(s[0..3])
|
|
88
|
+
return hashLen16(len + (a << 3), bytes(s[-4..-1]))
|
|
89
|
+
elsif len > 0
|
|
90
|
+
a = bytes(s[0])
|
|
91
|
+
b = bytes(s[len >> 1])
|
|
92
|
+
c = bytes(s[len-1])
|
|
93
|
+
y = lower32(a + (b << 8))
|
|
94
|
+
z = len + c*4
|
|
95
|
+
return lower64(shiftMix(lower64(y * K2 ^ z * K3)) * K2)
|
|
96
|
+
end
|
|
97
|
+
K2
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# This probably works well for 16-byte strings as well, but it may be overkill
|
|
101
|
+
# in that case.
|
|
102
|
+
def self.hashLen17To32(s)
|
|
103
|
+
a = lower64(bytes(s[0..7]) * K1)
|
|
104
|
+
b = bytes(s[8..15])
|
|
105
|
+
c = lower64(bytes(s[-8..-1]) * K2)
|
|
106
|
+
d = lower64(bytes(s[-16..-9]) * K0)
|
|
107
|
+
hashLen16(lower64(rotate(lower64(a-b), 43) + rotate(c, 30) + d),
|
|
108
|
+
lower64(a + rotate(b ^ K3, 20) - c) + s.length)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Return a 16-byte hash for 48 bytes. Quick and dirty.
|
|
112
|
+
# Callers do best to use "random-looking" values for a and b.
|
|
113
|
+
def self._weakHashLen32WithSeeds(w, x, y, z, a, b)
|
|
114
|
+
a += w
|
|
115
|
+
b = rotate(lower64(b+a+z), 21)
|
|
116
|
+
c = a
|
|
117
|
+
a += x
|
|
118
|
+
a = lower64(a+y)
|
|
119
|
+
b += rotate(a, 44)
|
|
120
|
+
lower64(a+z) << 64 | lower64(b+c)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
|
|
124
|
+
def self.weakHashLen32WithSeeds(s, a, b)
|
|
125
|
+
_weakHashLen32WithSeeds(bytes(s[0..7]),
|
|
126
|
+
bytes(s[8..15]),
|
|
127
|
+
bytes(s[16..23]),
|
|
128
|
+
bytes(s[24..31]),
|
|
129
|
+
a,
|
|
130
|
+
b)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Return an 8-byte hash for 33 to 64 bytes.
|
|
134
|
+
def self.hashLen33To64(s)
|
|
135
|
+
len = s.length
|
|
136
|
+
z = bytes(s[24..31])
|
|
137
|
+
a = bytes(s[0..7]) + (len + bytes(s[-16..-9])) * K0
|
|
138
|
+
a = lower64(a)
|
|
139
|
+
b = rotate(lower64(a+z), 52)
|
|
140
|
+
c = rotate(a, 37)
|
|
141
|
+
a = lower64(a+bytes(s[8..15]))
|
|
142
|
+
c = lower64(c+rotate(a, 7))
|
|
143
|
+
a = lower64(a+bytes(s[16..23]))
|
|
144
|
+
vf = lower64(a+z)
|
|
145
|
+
vs = lower64(b + rotate(a, 31) + c)
|
|
146
|
+
a = bytes(s[16..23]) + bytes(s[-32..-25])
|
|
147
|
+
z = bytes(s[-8..-1])
|
|
148
|
+
b = rotate(lower64(a+z), 52)
|
|
149
|
+
c = rotate(a, 37)
|
|
150
|
+
a = lower64(a+bytes(s[-24..-17]))
|
|
151
|
+
c = lower64(c+rotate(a, 7))
|
|
152
|
+
a = lower64(a+bytes(s[-16..-9]))
|
|
153
|
+
wf = lower64(a+z)
|
|
154
|
+
ws = lower64(b + rotate(a, 31) + c)
|
|
155
|
+
r = shiftMix( lower64((vf + ws) * K2 + (wf + vs) * K0) )
|
|
156
|
+
lower64( shiftMix(lower64(r*K0+vs)) * K2)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def self.hashLenAbove64(s)
|
|
160
|
+
len = s.length
|
|
161
|
+
# For strings over 64 bytes we hash the end first, and then as we
|
|
162
|
+
# loop we keep 56 bytes of state: v, w, x, y, and z.
|
|
163
|
+
x = bytes(s[0..7])
|
|
164
|
+
y = bytes(s[-16..-9]) ^ K1
|
|
165
|
+
z = bytes(s[-56..-49]) ^ K0
|
|
166
|
+
v = weakHashLen32WithSeeds(s[-64..-1], len, y)
|
|
167
|
+
w = weakHashLen32WithSeeds(s[-32..-1], lower64(len*K1), K0)
|
|
168
|
+
|
|
169
|
+
z = lower64(z + shiftMix(lower64(v)) * K1)
|
|
170
|
+
x = lower64(rotate(lower64(z+x), 39) * K1)
|
|
171
|
+
y = lower64(rotate(y, 33) * K1)
|
|
172
|
+
|
|
173
|
+
# Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
|
|
174
|
+
len = (len - 1) & ~63;
|
|
175
|
+
begin
|
|
176
|
+
xrv = lower64(x + y + higher64(v) + bytes(s[16..23]))
|
|
177
|
+
yrv = lower64(y + lower64(v) + bytes(s[48..55]))
|
|
178
|
+
x = lower64(rotate(xrv, 37) * K1)
|
|
179
|
+
y = lower64(rotate(yrv, 42) * K1)
|
|
180
|
+
x ^= lower64(w)
|
|
181
|
+
y ^= higher64(v)
|
|
182
|
+
z = rotate(z ^ higher64(w), 33)
|
|
183
|
+
v = weakHashLen32WithSeeds(s, lower64(lower64(v) * K1), lower64(x + higher64(w)))
|
|
184
|
+
w = weakHashLen32WithSeeds(s[32..-1], lower64(z + lower64(w)), y)
|
|
185
|
+
z, x = x, z
|
|
186
|
+
s = s[64..-1]
|
|
187
|
+
len -= 64
|
|
188
|
+
end while len != 0
|
|
189
|
+
|
|
190
|
+
hashLen16(lower64(hashLen16(higher64(v), higher64(w)) + shiftMix(y) * K1 + z),
|
|
191
|
+
lower64(hashLen16(lower64(v), lower64(w)) + x))
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
|
|
195
|
+
# of any length representable in ssize_t. Based on City and Murmur.
|
|
196
|
+
def self.cityMurmur(s, seed)
|
|
197
|
+
len = s.length
|
|
198
|
+
a = lower64(seed)
|
|
199
|
+
b = higher64(seed)
|
|
200
|
+
c,d = 0, 0
|
|
201
|
+
l = s.length - 16
|
|
202
|
+
if l <=0 then
|
|
203
|
+
a = lower64(shiftMix(lower64(a * K1)) * K1)
|
|
204
|
+
c = lower64(b*K1 + hashLen0To16(s))
|
|
205
|
+
d = shiftMix(lower64(a + (len >=8 ? bytes(s[0..7]) : c)))
|
|
206
|
+
else
|
|
207
|
+
c = hashLen16(lower64(bytes(s[-8..-1]) + K1), a)
|
|
208
|
+
d = hashLen16(lower64(b+len), lower64(c + bytes(s[-16..-9])))
|
|
209
|
+
a = lower64(a+d)
|
|
210
|
+
begin
|
|
211
|
+
a ^= lower64(shiftMix(lower64(bytes(s[0..7]) * K1)) * K1)
|
|
212
|
+
a = lower64(a*K1)
|
|
213
|
+
b ^= a
|
|
214
|
+
c ^= lower64(shiftMix(lower64(bytes(s[8..15]) * K1)) * K1)
|
|
215
|
+
c = lower64(c*K1)
|
|
216
|
+
d ^= c
|
|
217
|
+
s = s[16..-1]
|
|
218
|
+
l -= 16
|
|
219
|
+
end while l > 0
|
|
220
|
+
end
|
|
221
|
+
a = hashLen16(a, c)
|
|
222
|
+
b = hashLen16(d, b)
|
|
223
|
+
((a^b) << 64) | hashLen16(b, a)
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def self.hash128WithSeed(s, seed)
|
|
227
|
+
# Create a copy of the input string
|
|
228
|
+
orig_s = String.new(s)
|
|
229
|
+
len = s.length
|
|
230
|
+
return cityMurmur(s, seed) if len < 128
|
|
231
|
+
|
|
232
|
+
# We expect len >= 128 to be the common case. Keep 56 bytes of state:
|
|
233
|
+
# v, w, x, y, and z.
|
|
234
|
+
x = lower64(seed)
|
|
235
|
+
y = higher64(seed)
|
|
236
|
+
z = lower64(len * K1)
|
|
237
|
+
vf = lower64(lower64(rotate(y ^ K1, 49) * K1) + bytes(s[0..7]))
|
|
238
|
+
vs = lower64(lower64(rotate(vf, 42) * K1) + bytes(s[8..15]))
|
|
239
|
+
wf = lower64(lower64(rotate(lower64(y+z), 35) * K1) + x)
|
|
240
|
+
ws = lower64(rotate(lower64(x + bytes(s[88..95])), 53) * K1)
|
|
241
|
+
v = (vf << 64) | vs
|
|
242
|
+
w = (wf << 64) | ws
|
|
243
|
+
|
|
244
|
+
# This is the same inner loop as CityHash64(), manually unrolled.
|
|
245
|
+
begin
|
|
246
|
+
x = lower64(rotate(lower64(x + y + vf + bytes(s[16..23])), 37) * K1)
|
|
247
|
+
y = lower64(rotate(lower64(y + vs + bytes(s[48..55])), 42) * K1)
|
|
248
|
+
x ^= ws
|
|
249
|
+
y ^= vf
|
|
250
|
+
z = rotate(z ^ wf, 33)
|
|
251
|
+
v = weakHashLen32WithSeeds(s, lower64(vs * K1), lower64(x+wf))
|
|
252
|
+
w = weakHashLen32WithSeeds(s[32..-1], lower64(z+ws), y)
|
|
253
|
+
vf, vs = higher64(v), lower64(v)
|
|
254
|
+
wf, ws = higher64(w), lower64(w)
|
|
255
|
+
z,x = x,z
|
|
256
|
+
s = s[64..-1]
|
|
257
|
+
|
|
258
|
+
x = lower64(rotate(lower64(x + y + vf + bytes(s[16..23])), 37) * K1)
|
|
259
|
+
y = lower64(rotate(lower64(y + vs + bytes(s[48..55])), 42) * K1)
|
|
260
|
+
x ^= ws
|
|
261
|
+
y ^= vf
|
|
262
|
+
z = rotate(z ^ wf, 33)
|
|
263
|
+
v = weakHashLen32WithSeeds(s, lower64(vs * K1), lower64(x+wf))
|
|
264
|
+
w = weakHashLen32WithSeeds(s[32..-1], lower64(z+ws), y)
|
|
265
|
+
vf, vs = higher64(v), lower64(v)
|
|
266
|
+
wf, ws = higher64(w), lower64(w)
|
|
267
|
+
z,x = x,z
|
|
268
|
+
s = s[64..-1]
|
|
269
|
+
len -= 128
|
|
270
|
+
end while len >= 128
|
|
271
|
+
|
|
272
|
+
y = lower64(y + rotate(wf, 37) * K0 + z)
|
|
273
|
+
x = lower64(x + rotate(lower64(vf + z), 49) * K0)
|
|
274
|
+
# If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
|
|
275
|
+
tail_done = 0
|
|
276
|
+
while tail_done < len do
|
|
277
|
+
tail_done += 32
|
|
278
|
+
y = lower64(rotate(lower64(y-x), 42) * K0 + vs)
|
|
279
|
+
wf = lower64(wf + bytes(orig_s[16-tail_done..23-tail_done]))
|
|
280
|
+
x = lower64(rotate(x, 49) * K0 + wf)
|
|
281
|
+
wf = lower64(wf + vf)
|
|
282
|
+
v = weakHashLen32WithSeeds(orig_s[-tail_done..-1], vf, vs)
|
|
283
|
+
vf, vs = higher64(v), lower64(v)
|
|
284
|
+
end
|
|
285
|
+
# At this point our 48 bytes of state should contain more than
|
|
286
|
+
# enough information for a strong 128-bit hash. We use two
|
|
287
|
+
# different 48-byte-to-8-byte hashes to get a 16-byte final result.
|
|
288
|
+
x = hashLen16(x, vf)
|
|
289
|
+
y = hashLen16(y, wf)
|
|
290
|
+
hf = lower64(hashLen16(lower64(x + vs), ws) + y)
|
|
291
|
+
hs = lower64(hashLen16(lower64(x + ws), lower64(y + vs)))
|
|
292
|
+
(hf << 64) | hs
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# Internal interface routines for CityHash module
|
|
296
|
+
def self.hash64(s)
|
|
297
|
+
len = s.length
|
|
298
|
+
if len <= 16
|
|
299
|
+
return hashLen0To16(s)
|
|
300
|
+
elsif len <= 32
|
|
301
|
+
return hashLen17To32(s)
|
|
302
|
+
elsif len <= 64
|
|
303
|
+
return hashLen33To64(s)
|
|
304
|
+
else
|
|
305
|
+
return hashLenAbove64(s)
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def self.hash64WithSeed(s, seed)
|
|
310
|
+
hash64WithSeeds(s, K2, seed)
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def self.hash64WithSeeds(s, seed0, seed1)
|
|
314
|
+
hashLen16(lower64(hash64(s) - seed0), seed1)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def self.hash128(s)
|
|
318
|
+
len = s.length
|
|
319
|
+
if len >=16
|
|
320
|
+
seed = ((bytes(s[8..15]) << 64) | (bytes(s[0..7]) ^ K3))
|
|
321
|
+
return hash128WithSeed(s[16..-1], seed)
|
|
322
|
+
elsif len >= 8
|
|
323
|
+
seed = (bytes(s[-8..-1]) ^ K1) << 64
|
|
324
|
+
seed |= (bytes(s[0..7]) ^ lower64(len*K0))
|
|
325
|
+
return hash128WithSeed("", seed)
|
|
326
|
+
else
|
|
327
|
+
return hash128WithSeed(s, (K1<<64) | K0)
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
end # Module Internal
|
|
332
|
+
end # Module CityHash
|
data/license.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Copyright (c) 2011 ashwinr
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
4
|
+
a copy of this software and associated documentation files (the
|
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
9
|
+
the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be
|
|
12
|
+
included in all copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/readme.md
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# CityHash for Ruby
|
|
2
|
+
|
|
3
|
+
This is an implementation of Google's CityHash for Ruby. It supports both 64-bit and 128-bit hashes. The newer CityHashCrc routines have not yet been implemented. Please note that the code has not been optimized for speed.
|
|
4
|
+
|
|
5
|
+
## Installing CityHash
|
|
6
|
+
|
|
7
|
+
Installing CityHash is as simple as
|
|
8
|
+
|
|
9
|
+
gem install CityHash
|
|
10
|
+
|
|
11
|
+
## Using CityHash
|
|
12
|
+
|
|
13
|
+
require 'CityHash'
|
|
14
|
+
|
|
15
|
+
# Calculate a 64-bit hash
|
|
16
|
+
CityHash.hash64('New York City')
|
|
17
|
+
|
|
18
|
+
# Calculate a 64-bit hash with seed
|
|
19
|
+
CityHash.hash64('East Village', 0xef23)
|
|
20
|
+
|
|
21
|
+
# Calculate a 64-bit hash with two seeds
|
|
22
|
+
CityHash.hash64('Meatpacking', 0xba3c, 0x5acd)
|
|
23
|
+
|
|
24
|
+
# Calculate a 128-bit hash
|
|
25
|
+
CityHash.hash128('SoHo')
|
|
26
|
+
|
|
27
|
+
# Calculate a 128-bit hash with seed
|
|
28
|
+
CityHash.hash128('Upper West Side', 0x8ad1)
|
|
29
|
+
|
|
30
|
+
## Testing CityHash
|
|
31
|
+
|
|
32
|
+
The test functions generate random strings and compare the outputs of both the C and Ruby implementations. The source for these strings is Dostoevsky's 'Crime and Punishment', obtained from Project Gutenberg and compressed within test.zip.
|
|
33
|
+
|
|
34
|
+
### Prerequisites
|
|
35
|
+
|
|
36
|
+
Google's implementation of Cityhash must be installed on the test system, since the test routines link against libcityhash.
|
|
37
|
+
|
|
38
|
+
This 'city_hash' gem must already be installed on the test system.
|
|
39
|
+
|
|
40
|
+
### Running the tests
|
|
41
|
+
|
|
42
|
+
cd test/
|
|
43
|
+
./run.sh
|
|
44
|
+
|
|
45
|
+
### Authors
|
|
46
|
+
|
|
47
|
+
Ashwin Ramaswamy
|
|
48
|
+
|
|
49
|
+
### Copyright
|
|
50
|
+
|
|
51
|
+
Copyright (c) 2011 ashwinr. Please see license.txt for further details.
|
data/test/Makefile
ADDED
data/test/citymain.cc
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <string>
|
|
3
|
+
#include <sstream>
|
|
4
|
+
#include <iomanip>
|
|
5
|
+
#include "city.h"
|
|
6
|
+
|
|
7
|
+
void usage(char** argv)
|
|
8
|
+
{
|
|
9
|
+
std::cout << "Usage: " << argv[0] << " <hash function> <seed1> <seed2> <hash string>" << std::endl;
|
|
10
|
+
std::cout << "hashfunction = 1, for CityHash64" << std::endl;
|
|
11
|
+
std::cout << " = 2, for CityHash64WithSeed" << std::endl;
|
|
12
|
+
std::cout << " = 3, for CityHash64WithSeeds" << std::endl;
|
|
13
|
+
std::cout << " = 4, for CityHash128" << std::endl;
|
|
14
|
+
std::cout << " = 5, for CityHash128WithSeed" << std::endl;
|
|
15
|
+
exit(-1);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
int main(int argc, char** argv)
|
|
19
|
+
{
|
|
20
|
+
if(argc < 3)
|
|
21
|
+
{
|
|
22
|
+
usage(argv);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
std::stringstream hss, ss1, ss2;
|
|
26
|
+
int hashFunction = -1;
|
|
27
|
+
uint64 seed1, seed2;
|
|
28
|
+
uint128 seed128;
|
|
29
|
+
std::string hashString;
|
|
30
|
+
hss << argv[1], hss >> hashFunction;
|
|
31
|
+
switch(hashFunction)
|
|
32
|
+
{
|
|
33
|
+
case 1:
|
|
34
|
+
case 4:
|
|
35
|
+
hashString = argv[2];
|
|
36
|
+
break;
|
|
37
|
+
|
|
38
|
+
case 2:
|
|
39
|
+
if(argc != 4)
|
|
40
|
+
usage(argv);
|
|
41
|
+
ss1 << argv[2], ss1 >> seed1;
|
|
42
|
+
hashString = argv[3];
|
|
43
|
+
break;
|
|
44
|
+
|
|
45
|
+
case 3:
|
|
46
|
+
case 5:
|
|
47
|
+
if(argc != 5)
|
|
48
|
+
usage(argv);
|
|
49
|
+
ss1 << argv[2], ss1 >> seed1;
|
|
50
|
+
ss2 << argv[3], ss2 >> seed2;
|
|
51
|
+
hashString = argv[4];
|
|
52
|
+
break;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
uint64 hash64;
|
|
56
|
+
uint128 hash128;
|
|
57
|
+
switch(hashFunction)
|
|
58
|
+
{
|
|
59
|
+
case 1:
|
|
60
|
+
hash64 = CityHash64(hashString.c_str(),
|
|
61
|
+
hashString.length());
|
|
62
|
+
break;
|
|
63
|
+
|
|
64
|
+
case 2:
|
|
65
|
+
hash64 = CityHash64WithSeed(hashString.c_str(),
|
|
66
|
+
hashString.length(), seed1);
|
|
67
|
+
break;
|
|
68
|
+
|
|
69
|
+
case 3:
|
|
70
|
+
hash64 = CityHash64WithSeeds(hashString.c_str(),
|
|
71
|
+
hashString.length(),
|
|
72
|
+
seed1, seed2);
|
|
73
|
+
break;
|
|
74
|
+
|
|
75
|
+
case 4:
|
|
76
|
+
hash128 = CityHash128(hashString.c_str(), hashString.length());
|
|
77
|
+
break;
|
|
78
|
+
|
|
79
|
+
case 5:
|
|
80
|
+
seed128 = uint128(seed1, seed2);
|
|
81
|
+
hash128 = CityHash128WithSeed(hashString.c_str(),
|
|
82
|
+
hashString.length(), seed128);
|
|
83
|
+
break;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if(hashFunction <= 3)
|
|
87
|
+
{
|
|
88
|
+
std::cout << "0x" << std::hex << hash64 << std::endl;
|
|
89
|
+
}
|
|
90
|
+
else
|
|
91
|
+
{
|
|
92
|
+
std::cout << "0x" << std::hex << hash128.first << std::setfill('0')
|
|
93
|
+
<< std::setw(16) << hash128.second << std::endl;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return 0;
|
|
97
|
+
}
|
data/test/run.sh
ADDED
data/test/tc_rcity.rb
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/usr/local/bin/ruby
|
|
2
|
+
|
|
3
|
+
require 'city_hash'
|
|
4
|
+
require 'test/unit'
|
|
5
|
+
require 'zip/zip'
|
|
6
|
+
|
|
7
|
+
=begin
|
|
8
|
+
Run a gamut of test strings against both Google's C++ and
|
|
9
|
+
our Ruby implementation, and verify the results.
|
|
10
|
+
The test verifies both 64 and 128-bit hashes with and without
|
|
11
|
+
random seeds for strings of length from 1 to 2K.
|
|
12
|
+
The strings are sourced in randomly from 'Crime and Punishment'
|
|
13
|
+
obtained from Project Gutenberg.
|
|
14
|
+
=end
|
|
15
|
+
|
|
16
|
+
class TestCityHash < Test::Unit::TestCase
|
|
17
|
+
def initialize(testFunction)
|
|
18
|
+
super testFunction
|
|
19
|
+
puts 'Unzipping contents of test.zip'
|
|
20
|
+
@files = []
|
|
21
|
+
Zip::ZipFile::open('./test.zip') do |zf|
|
|
22
|
+
zf.each { |file|
|
|
23
|
+
fpath = File.join('/tmp', file.name)
|
|
24
|
+
FileUtils.mkdir_p(File.dirname(fpath))
|
|
25
|
+
zf.extract(file, fpath) unless File.exist?(fpath)
|
|
26
|
+
@files.push(File.new(fpath)) if fpath =~ /txt$/
|
|
27
|
+
}
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def getRandomString(file, len)
|
|
32
|
+
size = file.size
|
|
33
|
+
begin
|
|
34
|
+
offset = rand(size)
|
|
35
|
+
end while offset+len >= size
|
|
36
|
+
file.pos = offset
|
|
37
|
+
file.read(len)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def getHash(function, seed1, seed2, s)
|
|
41
|
+
hash = -1
|
|
42
|
+
case function
|
|
43
|
+
when 1
|
|
44
|
+
hash = CityHash.hash64(s)
|
|
45
|
+
when 2
|
|
46
|
+
hash = CityHash.hash64(s, seed1)
|
|
47
|
+
when 3
|
|
48
|
+
hash = CityHash.hash64(s, seed1, seed2)
|
|
49
|
+
when 4
|
|
50
|
+
hash = CityHash.hash128(s)
|
|
51
|
+
else
|
|
52
|
+
hash = CityHash.hash128(s, (seed2 << 64) | seed1)
|
|
53
|
+
end
|
|
54
|
+
hash
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def test_city_hash
|
|
58
|
+
max_int_64 = 2**64-1
|
|
59
|
+
puts 'Running tests'
|
|
60
|
+
start = Time.now
|
|
61
|
+
ffile = File.new('failures.txt', 'w')
|
|
62
|
+
for i in 1..2048 # length of hash string
|
|
63
|
+
for j in 1..2 # number of iterations
|
|
64
|
+
for k in 1..5 # all hash functions
|
|
65
|
+
seed1 = rand(max_int_64)
|
|
66
|
+
seed2 = rand(max_int_64)
|
|
67
|
+
file = @files[0] # only a single test file
|
|
68
|
+
string = getRandomString(file, i)
|
|
69
|
+
# Remove any unicode characters
|
|
70
|
+
string.gsub!(/[\x80-\xff]/,"")
|
|
71
|
+
# Escape a couple of shell characters (anything else missing?)
|
|
72
|
+
cstring = string.gsub("\"", "\\\"")
|
|
73
|
+
cstring = cstring.gsub("$", "\\$")
|
|
74
|
+
# Calculate Google's C++ hash
|
|
75
|
+
cityArgs = "#{k} "
|
|
76
|
+
if(k == 1 || k == 4)
|
|
77
|
+
cityArgs += "\"#{cstring}\""
|
|
78
|
+
elsif (k == 2)
|
|
79
|
+
cityArgs += "#{seed1} \"#{cstring}\""
|
|
80
|
+
else
|
|
81
|
+
cityArgs += "#{seed1} #{seed2} \"#{cstring}\""
|
|
82
|
+
end
|
|
83
|
+
cHex = `./city #{cityArgs}`
|
|
84
|
+
cHex = cHex.hex
|
|
85
|
+
# Calculate our Ruby hash
|
|
86
|
+
rHex = getHash(k, seed1, seed2, string)
|
|
87
|
+
# Verify hashes
|
|
88
|
+
ffile.puts "Failed hash function #{k} for string \"#{string}\" with hashes #{cHex} and #{rHex}" if(cHex != rHex)
|
|
89
|
+
assert(cHex == rHex)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
elapsed = (Time.now - start)/60.0
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
end
|
data/test/test.zip
ADDED
|
Binary file
|
metadata
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: city_hash
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
prerelease: false
|
|
5
|
+
segments:
|
|
6
|
+
- 0
|
|
7
|
+
- 0
|
|
8
|
+
- 1
|
|
9
|
+
version: 0.0.1
|
|
10
|
+
platform: ruby
|
|
11
|
+
authors:
|
|
12
|
+
- Ashwin Ramaswamy
|
|
13
|
+
autorequire:
|
|
14
|
+
bindir: bin
|
|
15
|
+
cert_chain: []
|
|
16
|
+
|
|
17
|
+
date: 2011-05-17 00:00:00 -04:00
|
|
18
|
+
default_executable:
|
|
19
|
+
dependencies:
|
|
20
|
+
- !ruby/object:Gem::Dependency
|
|
21
|
+
name: test-unit
|
|
22
|
+
prerelease: false
|
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
|
24
|
+
none: false
|
|
25
|
+
requirements:
|
|
26
|
+
- - ">="
|
|
27
|
+
- !ruby/object:Gem::Version
|
|
28
|
+
segments:
|
|
29
|
+
- 0
|
|
30
|
+
version: "0"
|
|
31
|
+
type: :development
|
|
32
|
+
version_requirements: *id001
|
|
33
|
+
- !ruby/object:Gem::Dependency
|
|
34
|
+
name: rubyzip
|
|
35
|
+
prerelease: false
|
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
|
37
|
+
none: false
|
|
38
|
+
requirements:
|
|
39
|
+
- - ">="
|
|
40
|
+
- !ruby/object:Gem::Version
|
|
41
|
+
segments:
|
|
42
|
+
- 0
|
|
43
|
+
version: "0"
|
|
44
|
+
type: :development
|
|
45
|
+
version_requirements: *id002
|
|
46
|
+
description: Google's CityHash Implementation in Ruby
|
|
47
|
+
email:
|
|
48
|
+
- ashwin.raman9@gmail.com
|
|
49
|
+
executables: []
|
|
50
|
+
|
|
51
|
+
extensions: []
|
|
52
|
+
|
|
53
|
+
extra_rdoc_files: []
|
|
54
|
+
|
|
55
|
+
files:
|
|
56
|
+
- .gitignore
|
|
57
|
+
- Gemfile
|
|
58
|
+
- Gemfile.lock
|
|
59
|
+
- Rakefile
|
|
60
|
+
- city_hash.gemspec
|
|
61
|
+
- lib/city_hash.rb
|
|
62
|
+
- lib/city_hash/version.rb
|
|
63
|
+
- license.txt
|
|
64
|
+
- readme.md
|
|
65
|
+
- test/Makefile
|
|
66
|
+
- test/citymain.cc
|
|
67
|
+
- test/run.sh
|
|
68
|
+
- test/tc_rcity.rb
|
|
69
|
+
- test/test.zip
|
|
70
|
+
has_rdoc: true
|
|
71
|
+
homepage: ""
|
|
72
|
+
licenses: []
|
|
73
|
+
|
|
74
|
+
post_install_message:
|
|
75
|
+
rdoc_options: []
|
|
76
|
+
|
|
77
|
+
require_paths:
|
|
78
|
+
- lib
|
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
80
|
+
none: false
|
|
81
|
+
requirements:
|
|
82
|
+
- - ">="
|
|
83
|
+
- !ruby/object:Gem::Version
|
|
84
|
+
segments:
|
|
85
|
+
- 0
|
|
86
|
+
version: "0"
|
|
87
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
88
|
+
none: false
|
|
89
|
+
requirements:
|
|
90
|
+
- - ">="
|
|
91
|
+
- !ruby/object:Gem::Version
|
|
92
|
+
segments:
|
|
93
|
+
- 0
|
|
94
|
+
version: "0"
|
|
95
|
+
requirements: []
|
|
96
|
+
|
|
97
|
+
rubyforge_project: city_hash
|
|
98
|
+
rubygems_version: 1.3.7
|
|
99
|
+
signing_key:
|
|
100
|
+
specification_version: 3
|
|
101
|
+
summary: CityHash for Ruby
|
|
102
|
+
test_files:
|
|
103
|
+
- test/Makefile
|
|
104
|
+
- test/citymain.cc
|
|
105
|
+
- test/run.sh
|
|
106
|
+
- test/tc_rcity.rb
|
|
107
|
+
- test/test.zip
|