city_hash 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +18 -0
- data/Rakefile +2 -0
- data/city_hash.gemspec +24 -0
- data/lib/city_hash.rb +332 -0
- data/lib/city_hash/version.rb +3 -0
- data/license.txt +20 -0
- data/readme.md +51 -0
- data/test/Makefile +10 -0
- data/test/citymain.cc +97 -0
- data/test/run.sh +18 -0
- data/test/tc_rcity.rb +96 -0
- data/test/test.zip +0 -0
- metadata +107 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
data/Rakefile
ADDED
data/city_hash.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "city_hash/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "city_hash"
|
7
|
+
s.version = CityHash::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Ashwin Ramaswamy"]
|
10
|
+
s.email = ["ashwin.raman9@gmail.com"]
|
11
|
+
s.homepage = ""
|
12
|
+
s.summary = %q{CityHash for Ruby}
|
13
|
+
s.description = %q{Google's CityHash Implementation in Ruby}
|
14
|
+
|
15
|
+
s.rubyforge_project = "city_hash"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
s.add_development_dependency('test-unit')
|
23
|
+
s.add_development_dependency('rubyzip')
|
24
|
+
end
|
data/lib/city_hash.rb
ADDED
@@ -0,0 +1,332 @@
|
|
1
|
+
# All source comments are duplicated from Google's CityHash (1.0.2)
|
2
|
+
# implementation at: http://code.google.com/p/cityhash/
|
3
|
+
|
4
|
+
module CityHash
|
5
|
+
|
6
|
+
def self.hash64(s, seed0 = nil, seed1 = nil)
|
7
|
+
return CityHash::Internal.hash64(s) if seed0.nil?
|
8
|
+
return CityHash::Internal.hash64WithSeed(s, seed0) if seed1.nil?
|
9
|
+
return CityHash::Internal.hash64WithSeeds(s, seed0, seed1)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.hash128(s, seed = nil)
|
13
|
+
return CityHash::Internal.hash128(s) if seed.nil?
|
14
|
+
return CityHash::Internal.hash128WithSeed(s, seed)
|
15
|
+
end
|
16
|
+
|
17
|
+
module Internal
|
18
|
+
|
19
|
+
# Some primes between 2^63 and 2^64 for various uses
|
20
|
+
K0 = 0xc3a5c85c97cb3127
|
21
|
+
K1 = 0xb492b66fbe98f273
|
22
|
+
K2 = 0x9ae16a3b2f90404f
|
23
|
+
K3 = 0xc949d7c7509e6557
|
24
|
+
|
25
|
+
def self.lower32(x)
|
26
|
+
x & 0xffffffff
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.lower64(x)
|
30
|
+
x & 0xffffffffffffffff
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.higher64(x)
|
34
|
+
x >> 64
|
35
|
+
end
|
36
|
+
|
37
|
+
# Return the hex-equivalent of byte-string
|
38
|
+
def self.bytes(s)
|
39
|
+
h = 0x0
|
40
|
+
s.reverse.bytes do |b|
|
41
|
+
h <<= 8
|
42
|
+
h |= b
|
43
|
+
end
|
44
|
+
h
|
45
|
+
end
|
46
|
+
|
47
|
+
# Hash 128 input bits down to 64 bits of output.
|
48
|
+
# This is intended to be a reasonably good hash function.
|
49
|
+
def self.hash128To64(x)
|
50
|
+
# Murmur-inspired hashing.
|
51
|
+
kMul = 0x9ddfea08eb382d69
|
52
|
+
a = lower64((lower64(x) ^ higher64(x)) * kMul)
|
53
|
+
a ^= (a >> 47)
|
54
|
+
b = lower64((higher64(x) ^ a) * kMul)
|
55
|
+
b ^= (b >> 47)
|
56
|
+
b = b * kMul
|
57
|
+
lower64(b)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Bitwise right rotate
|
61
|
+
def self.rotate(val, shift)
|
62
|
+
return val if shift == 0
|
63
|
+
(val >> shift) | lower64((val << (64-shift)))
|
64
|
+
end
|
65
|
+
|
66
|
+
# Equivalent to rotate(...), but requires the second arg to be non-zero.
|
67
|
+
def self.rotateByAtleast1(val, shift)
|
68
|
+
(val >> shift) | lower64((val << (64-shift)))
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.shiftMix(val)
|
72
|
+
lower64(val ^ (val >> 47))
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.hashLen16(u, v)
|
76
|
+
uv = (v << 64) | u
|
77
|
+
hash128To64(uv)
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.hashLen0To16(s)
|
81
|
+
len = s.length
|
82
|
+
if len > 8
|
83
|
+
a = bytes(s[0..7])
|
84
|
+
b = bytes(s[-8..-1])
|
85
|
+
return hashLen16(a, rotateByAtleast1(b+len, len)) ^ b
|
86
|
+
elsif len >= 4
|
87
|
+
a = bytes(s[0..3])
|
88
|
+
return hashLen16(len + (a << 3), bytes(s[-4..-1]))
|
89
|
+
elsif len > 0
|
90
|
+
a = bytes(s[0])
|
91
|
+
b = bytes(s[len >> 1])
|
92
|
+
c = bytes(s[len-1])
|
93
|
+
y = lower32(a + (b << 8))
|
94
|
+
z = len + c*4
|
95
|
+
return lower64(shiftMix(lower64(y * K2 ^ z * K3)) * K2)
|
96
|
+
end
|
97
|
+
K2
|
98
|
+
end
|
99
|
+
|
100
|
+
# This probably works well for 16-byte strings as well, but it may be overkill
|
101
|
+
# in that case.
|
102
|
+
def self.hashLen17To32(s)
|
103
|
+
a = lower64(bytes(s[0..7]) * K1)
|
104
|
+
b = bytes(s[8..15])
|
105
|
+
c = lower64(bytes(s[-8..-1]) * K2)
|
106
|
+
d = lower64(bytes(s[-16..-9]) * K0)
|
107
|
+
hashLen16(lower64(rotate(lower64(a-b), 43) + rotate(c, 30) + d),
|
108
|
+
lower64(a + rotate(b ^ K3, 20) - c) + s.length)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Return a 16-byte hash for 48 bytes. Quick and dirty.
|
112
|
+
# Callers do best to use "random-looking" values for a and b.
|
113
|
+
def self._weakHashLen32WithSeeds(w, x, y, z, a, b)
|
114
|
+
a += w
|
115
|
+
b = rotate(lower64(b+a+z), 21)
|
116
|
+
c = a
|
117
|
+
a += x
|
118
|
+
a = lower64(a+y)
|
119
|
+
b += rotate(a, 44)
|
120
|
+
lower64(a+z) << 64 | lower64(b+c)
|
121
|
+
end
|
122
|
+
|
123
|
+
# Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
|
124
|
+
def self.weakHashLen32WithSeeds(s, a, b)
|
125
|
+
_weakHashLen32WithSeeds(bytes(s[0..7]),
|
126
|
+
bytes(s[8..15]),
|
127
|
+
bytes(s[16..23]),
|
128
|
+
bytes(s[24..31]),
|
129
|
+
a,
|
130
|
+
b)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Return an 8-byte hash for 33 to 64 bytes.
|
134
|
+
def self.hashLen33To64(s)
|
135
|
+
len = s.length
|
136
|
+
z = bytes(s[24..31])
|
137
|
+
a = bytes(s[0..7]) + (len + bytes(s[-16..-9])) * K0
|
138
|
+
a = lower64(a)
|
139
|
+
b = rotate(lower64(a+z), 52)
|
140
|
+
c = rotate(a, 37)
|
141
|
+
a = lower64(a+bytes(s[8..15]))
|
142
|
+
c = lower64(c+rotate(a, 7))
|
143
|
+
a = lower64(a+bytes(s[16..23]))
|
144
|
+
vf = lower64(a+z)
|
145
|
+
vs = lower64(b + rotate(a, 31) + c)
|
146
|
+
a = bytes(s[16..23]) + bytes(s[-32..-25])
|
147
|
+
z = bytes(s[-8..-1])
|
148
|
+
b = rotate(lower64(a+z), 52)
|
149
|
+
c = rotate(a, 37)
|
150
|
+
a = lower64(a+bytes(s[-24..-17]))
|
151
|
+
c = lower64(c+rotate(a, 7))
|
152
|
+
a = lower64(a+bytes(s[-16..-9]))
|
153
|
+
wf = lower64(a+z)
|
154
|
+
ws = lower64(b + rotate(a, 31) + c)
|
155
|
+
r = shiftMix( lower64((vf + ws) * K2 + (wf + vs) * K0) )
|
156
|
+
lower64( shiftMix(lower64(r*K0+vs)) * K2)
|
157
|
+
end
|
158
|
+
|
159
|
+
def self.hashLenAbove64(s)
|
160
|
+
len = s.length
|
161
|
+
# For strings over 64 bytes we hash the end first, and then as we
|
162
|
+
# loop we keep 56 bytes of state: v, w, x, y, and z.
|
163
|
+
x = bytes(s[0..7])
|
164
|
+
y = bytes(s[-16..-9]) ^ K1
|
165
|
+
z = bytes(s[-56..-49]) ^ K0
|
166
|
+
v = weakHashLen32WithSeeds(s[-64..-1], len, y)
|
167
|
+
w = weakHashLen32WithSeeds(s[-32..-1], lower64(len*K1), K0)
|
168
|
+
|
169
|
+
z = lower64(z + shiftMix(lower64(v)) * K1)
|
170
|
+
x = lower64(rotate(lower64(z+x), 39) * K1)
|
171
|
+
y = lower64(rotate(y, 33) * K1)
|
172
|
+
|
173
|
+
# Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
|
174
|
+
len = (len - 1) & ~63;
|
175
|
+
begin
|
176
|
+
xrv = lower64(x + y + higher64(v) + bytes(s[16..23]))
|
177
|
+
yrv = lower64(y + lower64(v) + bytes(s[48..55]))
|
178
|
+
x = lower64(rotate(xrv, 37) * K1)
|
179
|
+
y = lower64(rotate(yrv, 42) * K1)
|
180
|
+
x ^= lower64(w)
|
181
|
+
y ^= higher64(v)
|
182
|
+
z = rotate(z ^ higher64(w), 33)
|
183
|
+
v = weakHashLen32WithSeeds(s, lower64(lower64(v) * K1), lower64(x + higher64(w)))
|
184
|
+
w = weakHashLen32WithSeeds(s[32..-1], lower64(z + lower64(w)), y)
|
185
|
+
z, x = x, z
|
186
|
+
s = s[64..-1]
|
187
|
+
len -= 64
|
188
|
+
end while len != 0
|
189
|
+
|
190
|
+
hashLen16(lower64(hashLen16(higher64(v), higher64(w)) + shiftMix(y) * K1 + z),
|
191
|
+
lower64(hashLen16(lower64(v), lower64(w)) + x))
|
192
|
+
end
|
193
|
+
|
194
|
+
# A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
|
195
|
+
# of any length representable in ssize_t. Based on City and Murmur.
|
196
|
+
def self.cityMurmur(s, seed)
|
197
|
+
len = s.length
|
198
|
+
a = lower64(seed)
|
199
|
+
b = higher64(seed)
|
200
|
+
c,d = 0, 0
|
201
|
+
l = s.length - 16
|
202
|
+
if l <=0 then
|
203
|
+
a = lower64(shiftMix(lower64(a * K1)) * K1)
|
204
|
+
c = lower64(b*K1 + hashLen0To16(s))
|
205
|
+
d = shiftMix(lower64(a + (len >=8 ? bytes(s[0..7]) : c)))
|
206
|
+
else
|
207
|
+
c = hashLen16(lower64(bytes(s[-8..-1]) + K1), a)
|
208
|
+
d = hashLen16(lower64(b+len), lower64(c + bytes(s[-16..-9])))
|
209
|
+
a = lower64(a+d)
|
210
|
+
begin
|
211
|
+
a ^= lower64(shiftMix(lower64(bytes(s[0..7]) * K1)) * K1)
|
212
|
+
a = lower64(a*K1)
|
213
|
+
b ^= a
|
214
|
+
c ^= lower64(shiftMix(lower64(bytes(s[8..15]) * K1)) * K1)
|
215
|
+
c = lower64(c*K1)
|
216
|
+
d ^= c
|
217
|
+
s = s[16..-1]
|
218
|
+
l -= 16
|
219
|
+
end while l > 0
|
220
|
+
end
|
221
|
+
a = hashLen16(a, c)
|
222
|
+
b = hashLen16(d, b)
|
223
|
+
((a^b) << 64) | hashLen16(b, a)
|
224
|
+
end
|
225
|
+
|
226
|
+
def self.hash128WithSeed(s, seed)
|
227
|
+
# Create a copy of the input string
|
228
|
+
orig_s = String.new(s)
|
229
|
+
len = s.length
|
230
|
+
return cityMurmur(s, seed) if len < 128
|
231
|
+
|
232
|
+
# We expect len >= 128 to be the common case. Keep 56 bytes of state:
|
233
|
+
# v, w, x, y, and z.
|
234
|
+
x = lower64(seed)
|
235
|
+
y = higher64(seed)
|
236
|
+
z = lower64(len * K1)
|
237
|
+
vf = lower64(lower64(rotate(y ^ K1, 49) * K1) + bytes(s[0..7]))
|
238
|
+
vs = lower64(lower64(rotate(vf, 42) * K1) + bytes(s[8..15]))
|
239
|
+
wf = lower64(lower64(rotate(lower64(y+z), 35) * K1) + x)
|
240
|
+
ws = lower64(rotate(lower64(x + bytes(s[88..95])), 53) * K1)
|
241
|
+
v = (vf << 64) | vs
|
242
|
+
w = (wf << 64) | ws
|
243
|
+
|
244
|
+
# This is the same inner loop as CityHash64(), manually unrolled.
|
245
|
+
begin
|
246
|
+
x = lower64(rotate(lower64(x + y + vf + bytes(s[16..23])), 37) * K1)
|
247
|
+
y = lower64(rotate(lower64(y + vs + bytes(s[48..55])), 42) * K1)
|
248
|
+
x ^= ws
|
249
|
+
y ^= vf
|
250
|
+
z = rotate(z ^ wf, 33)
|
251
|
+
v = weakHashLen32WithSeeds(s, lower64(vs * K1), lower64(x+wf))
|
252
|
+
w = weakHashLen32WithSeeds(s[32..-1], lower64(z+ws), y)
|
253
|
+
vf, vs = higher64(v), lower64(v)
|
254
|
+
wf, ws = higher64(w), lower64(w)
|
255
|
+
z,x = x,z
|
256
|
+
s = s[64..-1]
|
257
|
+
|
258
|
+
x = lower64(rotate(lower64(x + y + vf + bytes(s[16..23])), 37) * K1)
|
259
|
+
y = lower64(rotate(lower64(y + vs + bytes(s[48..55])), 42) * K1)
|
260
|
+
x ^= ws
|
261
|
+
y ^= vf
|
262
|
+
z = rotate(z ^ wf, 33)
|
263
|
+
v = weakHashLen32WithSeeds(s, lower64(vs * K1), lower64(x+wf))
|
264
|
+
w = weakHashLen32WithSeeds(s[32..-1], lower64(z+ws), y)
|
265
|
+
vf, vs = higher64(v), lower64(v)
|
266
|
+
wf, ws = higher64(w), lower64(w)
|
267
|
+
z,x = x,z
|
268
|
+
s = s[64..-1]
|
269
|
+
len -= 128
|
270
|
+
end while len >= 128
|
271
|
+
|
272
|
+
y = lower64(y + rotate(wf, 37) * K0 + z)
|
273
|
+
x = lower64(x + rotate(lower64(vf + z), 49) * K0)
|
274
|
+
# If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
|
275
|
+
tail_done = 0
|
276
|
+
while tail_done < len do
|
277
|
+
tail_done += 32
|
278
|
+
y = lower64(rotate(lower64(y-x), 42) * K0 + vs)
|
279
|
+
wf = lower64(wf + bytes(orig_s[16-tail_done..23-tail_done]))
|
280
|
+
x = lower64(rotate(x, 49) * K0 + wf)
|
281
|
+
wf = lower64(wf + vf)
|
282
|
+
v = weakHashLen32WithSeeds(orig_s[-tail_done..-1], vf, vs)
|
283
|
+
vf, vs = higher64(v), lower64(v)
|
284
|
+
end
|
285
|
+
# At this point our 48 bytes of state should contain more than
|
286
|
+
# enough information for a strong 128-bit hash. We use two
|
287
|
+
# different 48-byte-to-8-byte hashes to get a 16-byte final result.
|
288
|
+
x = hashLen16(x, vf)
|
289
|
+
y = hashLen16(y, wf)
|
290
|
+
hf = lower64(hashLen16(lower64(x + vs), ws) + y)
|
291
|
+
hs = lower64(hashLen16(lower64(x + ws), lower64(y + vs)))
|
292
|
+
(hf << 64) | hs
|
293
|
+
end
|
294
|
+
|
295
|
+
# Internal interface routines for CityHash module
|
296
|
+
def self.hash64(s)
|
297
|
+
len = s.length
|
298
|
+
if len <= 16
|
299
|
+
return hashLen0To16(s)
|
300
|
+
elsif len <= 32
|
301
|
+
return hashLen17To32(s)
|
302
|
+
elsif len <= 64
|
303
|
+
return hashLen33To64(s)
|
304
|
+
else
|
305
|
+
return hashLenAbove64(s)
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def self.hash64WithSeed(s, seed)
|
310
|
+
hash64WithSeeds(s, K2, seed)
|
311
|
+
end
|
312
|
+
|
313
|
+
def self.hash64WithSeeds(s, seed0, seed1)
|
314
|
+
hashLen16(lower64(hash64(s) - seed0), seed1)
|
315
|
+
end
|
316
|
+
|
317
|
+
def self.hash128(s)
|
318
|
+
len = s.length
|
319
|
+
if len >=16
|
320
|
+
seed = ((bytes(s[8..15]) << 64) | (bytes(s[0..7]) ^ K3))
|
321
|
+
return hash128WithSeed(s[16..-1], seed)
|
322
|
+
elsif len >= 8
|
323
|
+
seed = (bytes(s[-8..-1]) ^ K1) << 64
|
324
|
+
seed |= (bytes(s[0..7]) ^ lower64(len*K0))
|
325
|
+
return hash128WithSeed("", seed)
|
326
|
+
else
|
327
|
+
return hash128WithSeed(s, (K1<<64) | K0)
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
end # Module Internal
|
332
|
+
end # Module CityHash
|
data/license.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 ashwinr
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/readme.md
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# CityHash for Ruby
|
2
|
+
|
3
|
+
This is an implementation of Google's CityHash for Ruby. It supports both 64-bit and 128-bit hashes. The newer CityHashCrc routines have not yet been implemented. Please note that the code has not been optimized for speed.
|
4
|
+
|
5
|
+
## Installing CityHash
|
6
|
+
|
7
|
+
Installing CityHash is as simple as
|
8
|
+
|
9
|
+
gem install CityHash
|
10
|
+
|
11
|
+
## Using CityHash
|
12
|
+
|
13
|
+
require 'CityHash'
|
14
|
+
|
15
|
+
# Calculate a 64-bit hash
|
16
|
+
CityHash.hash64('New York City')
|
17
|
+
|
18
|
+
# Calculate a 64-bit hash with seed
|
19
|
+
CityHash.hash64('East Village', 0xef23)
|
20
|
+
|
21
|
+
# Calculate a 64-bit hash with two seeds
|
22
|
+
CityHash.hash64('Meatpacking', 0xba3c, 0x5acd)
|
23
|
+
|
24
|
+
# Calculate a 128-bit hash
|
25
|
+
CityHash.hash128('SoHo')
|
26
|
+
|
27
|
+
# Calculate a 128-bit hash with seed
|
28
|
+
CityHash.hash128('Upper West Side', 0x8ad1)
|
29
|
+
|
30
|
+
## Testing CityHash
|
31
|
+
|
32
|
+
The test functions generate random strings and compare the outputs of both the C and Ruby implementations. The source for these strings is Dostoevsky's 'Crime and Punishment', obtained from Project Gutenberg and compressed within test.zip.
|
33
|
+
|
34
|
+
### Prerequisites
|
35
|
+
|
36
|
+
Google's implementation of Cityhash must be installed on the test system, since the test routines link against libcityhash.
|
37
|
+
|
38
|
+
This 'city_hash' gem must already be installed on the test system.
|
39
|
+
|
40
|
+
### Running the tests
|
41
|
+
|
42
|
+
cd test/
|
43
|
+
./run.sh
|
44
|
+
|
45
|
+
### Authors
|
46
|
+
|
47
|
+
Ashwin Ramaswamy
|
48
|
+
|
49
|
+
### Copyright
|
50
|
+
|
51
|
+
Copyright (c) 2011 ashwinr. Please see license.txt for further details.
|
data/test/Makefile
ADDED
data/test/citymain.cc
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
#include <iostream>
|
2
|
+
#include <string>
|
3
|
+
#include <sstream>
|
4
|
+
#include <iomanip>
|
5
|
+
#include "city.h"
|
6
|
+
|
7
|
+
void usage(char** argv)
|
8
|
+
{
|
9
|
+
std::cout << "Usage: " << argv[0] << " <hash function> <seed1> <seed2> <hash string>" << std::endl;
|
10
|
+
std::cout << "hashfunction = 1, for CityHash64" << std::endl;
|
11
|
+
std::cout << " = 2, for CityHash64WithSeed" << std::endl;
|
12
|
+
std::cout << " = 3, for CityHash64WithSeeds" << std::endl;
|
13
|
+
std::cout << " = 4, for CityHash128" << std::endl;
|
14
|
+
std::cout << " = 5, for CityHash128WithSeed" << std::endl;
|
15
|
+
exit(-1);
|
16
|
+
}
|
17
|
+
|
18
|
+
int main(int argc, char** argv)
|
19
|
+
{
|
20
|
+
if(argc < 3)
|
21
|
+
{
|
22
|
+
usage(argv);
|
23
|
+
}
|
24
|
+
|
25
|
+
std::stringstream hss, ss1, ss2;
|
26
|
+
int hashFunction = -1;
|
27
|
+
uint64 seed1, seed2;
|
28
|
+
uint128 seed128;
|
29
|
+
std::string hashString;
|
30
|
+
hss << argv[1], hss >> hashFunction;
|
31
|
+
switch(hashFunction)
|
32
|
+
{
|
33
|
+
case 1:
|
34
|
+
case 4:
|
35
|
+
hashString = argv[2];
|
36
|
+
break;
|
37
|
+
|
38
|
+
case 2:
|
39
|
+
if(argc != 4)
|
40
|
+
usage(argv);
|
41
|
+
ss1 << argv[2], ss1 >> seed1;
|
42
|
+
hashString = argv[3];
|
43
|
+
break;
|
44
|
+
|
45
|
+
case 3:
|
46
|
+
case 5:
|
47
|
+
if(argc != 5)
|
48
|
+
usage(argv);
|
49
|
+
ss1 << argv[2], ss1 >> seed1;
|
50
|
+
ss2 << argv[3], ss2 >> seed2;
|
51
|
+
hashString = argv[4];
|
52
|
+
break;
|
53
|
+
}
|
54
|
+
|
55
|
+
uint64 hash64;
|
56
|
+
uint128 hash128;
|
57
|
+
switch(hashFunction)
|
58
|
+
{
|
59
|
+
case 1:
|
60
|
+
hash64 = CityHash64(hashString.c_str(),
|
61
|
+
hashString.length());
|
62
|
+
break;
|
63
|
+
|
64
|
+
case 2:
|
65
|
+
hash64 = CityHash64WithSeed(hashString.c_str(),
|
66
|
+
hashString.length(), seed1);
|
67
|
+
break;
|
68
|
+
|
69
|
+
case 3:
|
70
|
+
hash64 = CityHash64WithSeeds(hashString.c_str(),
|
71
|
+
hashString.length(),
|
72
|
+
seed1, seed2);
|
73
|
+
break;
|
74
|
+
|
75
|
+
case 4:
|
76
|
+
hash128 = CityHash128(hashString.c_str(), hashString.length());
|
77
|
+
break;
|
78
|
+
|
79
|
+
case 5:
|
80
|
+
seed128 = uint128(seed1, seed2);
|
81
|
+
hash128 = CityHash128WithSeed(hashString.c_str(),
|
82
|
+
hashString.length(), seed128);
|
83
|
+
break;
|
84
|
+
}
|
85
|
+
|
86
|
+
if(hashFunction <= 3)
|
87
|
+
{
|
88
|
+
std::cout << "0x" << std::hex << hash64 << std::endl;
|
89
|
+
}
|
90
|
+
else
|
91
|
+
{
|
92
|
+
std::cout << "0x" << std::hex << hash128.first << std::setfill('0')
|
93
|
+
<< std::setw(16) << hash128.second << std::endl;
|
94
|
+
}
|
95
|
+
|
96
|
+
return 0;
|
97
|
+
}
|
data/test/run.sh
ADDED
data/test/tc_rcity.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
#!/usr/local/bin/ruby
|
2
|
+
|
3
|
+
require 'city_hash'
|
4
|
+
require 'test/unit'
|
5
|
+
require 'zip/zip'
|
6
|
+
|
7
|
+
=begin
|
8
|
+
Run a gamut of test strings against both Google's C++ and
|
9
|
+
our Ruby implementation, and verify the results.
|
10
|
+
The test verifies both 64 and 128-bit hashes with and without
|
11
|
+
random seeds for strings of length from 1 to 2K.
|
12
|
+
The strings are sourced in randomly from 'Crime and Punishment'
|
13
|
+
obtained from Project Gutenberg.
|
14
|
+
=end
|
15
|
+
|
16
|
+
class TestCityHash < Test::Unit::TestCase
|
17
|
+
def initialize(testFunction)
|
18
|
+
super testFunction
|
19
|
+
puts 'Unzipping contents of test.zip'
|
20
|
+
@files = []
|
21
|
+
Zip::ZipFile::open('./test.zip') do |zf|
|
22
|
+
zf.each { |file|
|
23
|
+
fpath = File.join('/tmp', file.name)
|
24
|
+
FileUtils.mkdir_p(File.dirname(fpath))
|
25
|
+
zf.extract(file, fpath) unless File.exist?(fpath)
|
26
|
+
@files.push(File.new(fpath)) if fpath =~ /txt$/
|
27
|
+
}
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def getRandomString(file, len)
|
32
|
+
size = file.size
|
33
|
+
begin
|
34
|
+
offset = rand(size)
|
35
|
+
end while offset+len >= size
|
36
|
+
file.pos = offset
|
37
|
+
file.read(len)
|
38
|
+
end
|
39
|
+
|
40
|
+
def getHash(function, seed1, seed2, s)
|
41
|
+
hash = -1
|
42
|
+
case function
|
43
|
+
when 1
|
44
|
+
hash = CityHash.hash64(s)
|
45
|
+
when 2
|
46
|
+
hash = CityHash.hash64(s, seed1)
|
47
|
+
when 3
|
48
|
+
hash = CityHash.hash64(s, seed1, seed2)
|
49
|
+
when 4
|
50
|
+
hash = CityHash.hash128(s)
|
51
|
+
else
|
52
|
+
hash = CityHash.hash128(s, (seed2 << 64) | seed1)
|
53
|
+
end
|
54
|
+
hash
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_city_hash
|
58
|
+
max_int_64 = 2**64-1
|
59
|
+
puts 'Running tests'
|
60
|
+
start = Time.now
|
61
|
+
ffile = File.new('failures.txt', 'w')
|
62
|
+
for i in 1..2048 # length of hash string
|
63
|
+
for j in 1..2 # number of iterations
|
64
|
+
for k in 1..5 # all hash functions
|
65
|
+
seed1 = rand(max_int_64)
|
66
|
+
seed2 = rand(max_int_64)
|
67
|
+
file = @files[0] # only a single test file
|
68
|
+
string = getRandomString(file, i)
|
69
|
+
# Remove any unicode characters
|
70
|
+
string.gsub!(/[\x80-\xff]/,"")
|
71
|
+
# Escape a couple of shell characters (anything else missing?)
|
72
|
+
cstring = string.gsub("\"", "\\\"")
|
73
|
+
cstring = cstring.gsub("$", "\\$")
|
74
|
+
# Calculate Google's C++ hash
|
75
|
+
cityArgs = "#{k} "
|
76
|
+
if(k == 1 || k == 4)
|
77
|
+
cityArgs += "\"#{cstring}\""
|
78
|
+
elsif (k == 2)
|
79
|
+
cityArgs += "#{seed1} \"#{cstring}\""
|
80
|
+
else
|
81
|
+
cityArgs += "#{seed1} #{seed2} \"#{cstring}\""
|
82
|
+
end
|
83
|
+
cHex = `./city #{cityArgs}`
|
84
|
+
cHex = cHex.hex
|
85
|
+
# Calculate our Ruby hash
|
86
|
+
rHex = getHash(k, seed1, seed2, string)
|
87
|
+
# Verify hashes
|
88
|
+
ffile.puts "Failed hash function #{k} for string \"#{string}\" with hashes #{cHex} and #{rHex}" if(cHex != rHex)
|
89
|
+
assert(cHex == rHex)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
elapsed = (Time.now - start)/60.0
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
data/test/test.zip
ADDED
Binary file
|
metadata
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: city_hash
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Ashwin Ramaswamy
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-05-17 00:00:00 -04:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: test-unit
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
type: :development
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: rubyzip
|
35
|
+
prerelease: false
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
43
|
+
version: "0"
|
44
|
+
type: :development
|
45
|
+
version_requirements: *id002
|
46
|
+
description: Google's CityHash Implementation in Ruby
|
47
|
+
email:
|
48
|
+
- ashwin.raman9@gmail.com
|
49
|
+
executables: []
|
50
|
+
|
51
|
+
extensions: []
|
52
|
+
|
53
|
+
extra_rdoc_files: []
|
54
|
+
|
55
|
+
files:
|
56
|
+
- .gitignore
|
57
|
+
- Gemfile
|
58
|
+
- Gemfile.lock
|
59
|
+
- Rakefile
|
60
|
+
- city_hash.gemspec
|
61
|
+
- lib/city_hash.rb
|
62
|
+
- lib/city_hash/version.rb
|
63
|
+
- license.txt
|
64
|
+
- readme.md
|
65
|
+
- test/Makefile
|
66
|
+
- test/citymain.cc
|
67
|
+
- test/run.sh
|
68
|
+
- test/tc_rcity.rb
|
69
|
+
- test/test.zip
|
70
|
+
has_rdoc: true
|
71
|
+
homepage: ""
|
72
|
+
licenses: []
|
73
|
+
|
74
|
+
post_install_message:
|
75
|
+
rdoc_options: []
|
76
|
+
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
segments:
|
85
|
+
- 0
|
86
|
+
version: "0"
|
87
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
segments:
|
93
|
+
- 0
|
94
|
+
version: "0"
|
95
|
+
requirements: []
|
96
|
+
|
97
|
+
rubyforge_project: city_hash
|
98
|
+
rubygems_version: 1.3.7
|
99
|
+
signing_key:
|
100
|
+
specification_version: 3
|
101
|
+
summary: CityHash for Ruby
|
102
|
+
test_files:
|
103
|
+
- test/Makefile
|
104
|
+
- test/citymain.cc
|
105
|
+
- test/run.sh
|
106
|
+
- test/tc_rcity.rb
|
107
|
+
- test/test.zip
|