city_hash 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in city_hash.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,18 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ city_hash (0.0.1)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ rubyzip (0.9.4)
10
+ test-unit (2.3.0)
11
+
12
+ PLATFORMS
13
+ ruby
14
+
15
+ DEPENDENCIES
16
+ city_hash!
17
+ rubyzip
18
+ test-unit
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
data/city_hash.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "city_hash/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "city_hash"
7
+ s.version = CityHash::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Ashwin Ramaswamy"]
10
+ s.email = ["ashwin.raman9@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{CityHash for Ruby}
13
+ s.description = %q{Google's CityHash Implementation in Ruby}
14
+
15
+ s.rubyforge_project = "city_hash"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_development_dependency('test-unit')
23
+ s.add_development_dependency('rubyzip')
24
+ end
data/lib/city_hash.rb ADDED
@@ -0,0 +1,332 @@
1
+ # All source comments are duplicated from Google's CityHash (1.0.2)
2
+ # implementation at: http://code.google.com/p/cityhash/
3
+
4
+ module CityHash
5
+
6
+ def self.hash64(s, seed0 = nil, seed1 = nil)
7
+ return CityHash::Internal.hash64(s) if seed0.nil?
8
+ return CityHash::Internal.hash64WithSeed(s, seed0) if seed1.nil?
9
+ return CityHash::Internal.hash64WithSeeds(s, seed0, seed1)
10
+ end
11
+
12
+ def self.hash128(s, seed = nil)
13
+ return CityHash::Internal.hash128(s) if seed.nil?
14
+ return CityHash::Internal.hash128WithSeed(s, seed)
15
+ end
16
+
17
+ module Internal
18
+
19
+ # Some primes between 2^63 and 2^64 for various uses
20
+ K0 = 0xc3a5c85c97cb3127
21
+ K1 = 0xb492b66fbe98f273
22
+ K2 = 0x9ae16a3b2f90404f
23
+ K3 = 0xc949d7c7509e6557
24
+
25
+ def self.lower32(x)
26
+ x & 0xffffffff
27
+ end
28
+
29
+ def self.lower64(x)
30
+ x & 0xffffffffffffffff
31
+ end
32
+
33
+ def self.higher64(x)
34
+ x >> 64
35
+ end
36
+
37
+ # Return the hex-equivalent of byte-string
38
+ def self.bytes(s)
39
+ h = 0x0
40
+ s.reverse.bytes do |b|
41
+ h <<= 8
42
+ h |= b
43
+ end
44
+ h
45
+ end
46
+
47
+ # Hash 128 input bits down to 64 bits of output.
48
+ # This is intended to be a reasonably good hash function.
49
+ def self.hash128To64(x)
50
+ # Murmur-inspired hashing.
51
+ kMul = 0x9ddfea08eb382d69
52
+ a = lower64((lower64(x) ^ higher64(x)) * kMul)
53
+ a ^= (a >> 47)
54
+ b = lower64((higher64(x) ^ a) * kMul)
55
+ b ^= (b >> 47)
56
+ b = b * kMul
57
+ lower64(b)
58
+ end
59
+
60
+ # Bitwise right rotate
61
+ def self.rotate(val, shift)
62
+ return val if shift == 0
63
+ (val >> shift) | lower64((val << (64-shift)))
64
+ end
65
+
66
+ # Equivalent to rotate(...), but requires the second arg to be non-zero.
67
+ def self.rotateByAtleast1(val, shift)
68
+ (val >> shift) | lower64((val << (64-shift)))
69
+ end
70
+
71
+ def self.shiftMix(val)
72
+ lower64(val ^ (val >> 47))
73
+ end
74
+
75
+ def self.hashLen16(u, v)
76
+ uv = (v << 64) | u
77
+ hash128To64(uv)
78
+ end
79
+
80
+ def self.hashLen0To16(s)
81
+ len = s.length
82
+ if len > 8
83
+ a = bytes(s[0..7])
84
+ b = bytes(s[-8..-1])
85
+ return hashLen16(a, rotateByAtleast1(b+len, len)) ^ b
86
+ elsif len >= 4
87
+ a = bytes(s[0..3])
88
+ return hashLen16(len + (a << 3), bytes(s[-4..-1]))
89
+ elsif len > 0
90
+ a = bytes(s[0])
91
+ b = bytes(s[len >> 1])
92
+ c = bytes(s[len-1])
93
+ y = lower32(a + (b << 8))
94
+ z = len + c*4
95
+ return lower64(shiftMix(lower64(y * K2 ^ z * K3)) * K2)
96
+ end
97
+ K2
98
+ end
99
+
100
+ # This probably works well for 16-byte strings as well, but it may be overkill
101
+ # in that case.
102
+ def self.hashLen17To32(s)
103
+ a = lower64(bytes(s[0..7]) * K1)
104
+ b = bytes(s[8..15])
105
+ c = lower64(bytes(s[-8..-1]) * K2)
106
+ d = lower64(bytes(s[-16..-9]) * K0)
107
+ hashLen16(lower64(rotate(lower64(a-b), 43) + rotate(c, 30) + d),
108
+ lower64(a + rotate(b ^ K3, 20) - c) + s.length)
109
+ end
110
+
111
+ # Return a 16-byte hash for 48 bytes. Quick and dirty.
112
+ # Callers do best to use "random-looking" values for a and b.
113
+ def self._weakHashLen32WithSeeds(w, x, y, z, a, b)
114
+ a += w
115
+ b = rotate(lower64(b+a+z), 21)
116
+ c = a
117
+ a += x
118
+ a = lower64(a+y)
119
+ b += rotate(a, 44)
120
+ lower64(a+z) << 64 | lower64(b+c)
121
+ end
122
+
123
+ # Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
124
+ def self.weakHashLen32WithSeeds(s, a, b)
125
+ _weakHashLen32WithSeeds(bytes(s[0..7]),
126
+ bytes(s[8..15]),
127
+ bytes(s[16..23]),
128
+ bytes(s[24..31]),
129
+ a,
130
+ b)
131
+ end
132
+
133
+ # Return an 8-byte hash for 33 to 64 bytes.
134
+ def self.hashLen33To64(s)
135
+ len = s.length
136
+ z = bytes(s[24..31])
137
+ a = bytes(s[0..7]) + (len + bytes(s[-16..-9])) * K0
138
+ a = lower64(a)
139
+ b = rotate(lower64(a+z), 52)
140
+ c = rotate(a, 37)
141
+ a = lower64(a+bytes(s[8..15]))
142
+ c = lower64(c+rotate(a, 7))
143
+ a = lower64(a+bytes(s[16..23]))
144
+ vf = lower64(a+z)
145
+ vs = lower64(b + rotate(a, 31) + c)
146
+ a = bytes(s[16..23]) + bytes(s[-32..-25])
147
+ z = bytes(s[-8..-1])
148
+ b = rotate(lower64(a+z), 52)
149
+ c = rotate(a, 37)
150
+ a = lower64(a+bytes(s[-24..-17]))
151
+ c = lower64(c+rotate(a, 7))
152
+ a = lower64(a+bytes(s[-16..-9]))
153
+ wf = lower64(a+z)
154
+ ws = lower64(b + rotate(a, 31) + c)
155
+ r = shiftMix( lower64((vf + ws) * K2 + (wf + vs) * K0) )
156
+ lower64( shiftMix(lower64(r*K0+vs)) * K2)
157
+ end
158
+
159
+ def self.hashLenAbove64(s)
160
+ len = s.length
161
+ # For strings over 64 bytes we hash the end first, and then as we
162
+ # loop we keep 56 bytes of state: v, w, x, y, and z.
163
+ x = bytes(s[0..7])
164
+ y = bytes(s[-16..-9]) ^ K1
165
+ z = bytes(s[-56..-49]) ^ K0
166
+ v = weakHashLen32WithSeeds(s[-64..-1], len, y)
167
+ w = weakHashLen32WithSeeds(s[-32..-1], lower64(len*K1), K0)
168
+
169
+ z = lower64(z + shiftMix(lower64(v)) * K1)
170
+ x = lower64(rotate(lower64(z+x), 39) * K1)
171
+ y = lower64(rotate(y, 33) * K1)
172
+
173
+ # Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
174
+ len = (len - 1) & ~63;
175
+ begin
176
+ xrv = lower64(x + y + higher64(v) + bytes(s[16..23]))
177
+ yrv = lower64(y + lower64(v) + bytes(s[48..55]))
178
+ x = lower64(rotate(xrv, 37) * K1)
179
+ y = lower64(rotate(yrv, 42) * K1)
180
+ x ^= lower64(w)
181
+ y ^= higher64(v)
182
+ z = rotate(z ^ higher64(w), 33)
183
+ v = weakHashLen32WithSeeds(s, lower64(lower64(v) * K1), lower64(x + higher64(w)))
184
+ w = weakHashLen32WithSeeds(s[32..-1], lower64(z + lower64(w)), y)
185
+ z, x = x, z
186
+ s = s[64..-1]
187
+ len -= 64
188
+ end while len != 0
189
+
190
+ hashLen16(lower64(hashLen16(higher64(v), higher64(w)) + shiftMix(y) * K1 + z),
191
+ lower64(hashLen16(lower64(v), lower64(w)) + x))
192
+ end
193
+
194
+ # A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
195
+ # of any length representable in ssize_t. Based on City and Murmur.
196
+ def self.cityMurmur(s, seed)
197
+ len = s.length
198
+ a = lower64(seed)
199
+ b = higher64(seed)
200
+ c,d = 0, 0
201
+ l = s.length - 16
202
+ if l <=0 then
203
+ a = lower64(shiftMix(lower64(a * K1)) * K1)
204
+ c = lower64(b*K1 + hashLen0To16(s))
205
+ d = shiftMix(lower64(a + (len >=8 ? bytes(s[0..7]) : c)))
206
+ else
207
+ c = hashLen16(lower64(bytes(s[-8..-1]) + K1), a)
208
+ d = hashLen16(lower64(b+len), lower64(c + bytes(s[-16..-9])))
209
+ a = lower64(a+d)
210
+ begin
211
+ a ^= lower64(shiftMix(lower64(bytes(s[0..7]) * K1)) * K1)
212
+ a = lower64(a*K1)
213
+ b ^= a
214
+ c ^= lower64(shiftMix(lower64(bytes(s[8..15]) * K1)) * K1)
215
+ c = lower64(c*K1)
216
+ d ^= c
217
+ s = s[16..-1]
218
+ l -= 16
219
+ end while l > 0
220
+ end
221
+ a = hashLen16(a, c)
222
+ b = hashLen16(d, b)
223
+ ((a^b) << 64) | hashLen16(b, a)
224
+ end
225
+
226
+ def self.hash128WithSeed(s, seed)
227
+ # Create a copy of the input string
228
+ orig_s = String.new(s)
229
+ len = s.length
230
+ return cityMurmur(s, seed) if len < 128
231
+
232
+ # We expect len >= 128 to be the common case. Keep 56 bytes of state:
233
+ # v, w, x, y, and z.
234
+ x = lower64(seed)
235
+ y = higher64(seed)
236
+ z = lower64(len * K1)
237
+ vf = lower64(lower64(rotate(y ^ K1, 49) * K1) + bytes(s[0..7]))
238
+ vs = lower64(lower64(rotate(vf, 42) * K1) + bytes(s[8..15]))
239
+ wf = lower64(lower64(rotate(lower64(y+z), 35) * K1) + x)
240
+ ws = lower64(rotate(lower64(x + bytes(s[88..95])), 53) * K1)
241
+ v = (vf << 64) | vs
242
+ w = (wf << 64) | ws
243
+
244
+ # This is the same inner loop as CityHash64(), manually unrolled.
245
+ begin
246
+ x = lower64(rotate(lower64(x + y + vf + bytes(s[16..23])), 37) * K1)
247
+ y = lower64(rotate(lower64(y + vs + bytes(s[48..55])), 42) * K1)
248
+ x ^= ws
249
+ y ^= vf
250
+ z = rotate(z ^ wf, 33)
251
+ v = weakHashLen32WithSeeds(s, lower64(vs * K1), lower64(x+wf))
252
+ w = weakHashLen32WithSeeds(s[32..-1], lower64(z+ws), y)
253
+ vf, vs = higher64(v), lower64(v)
254
+ wf, ws = higher64(w), lower64(w)
255
+ z,x = x,z
256
+ s = s[64..-1]
257
+
258
+ x = lower64(rotate(lower64(x + y + vf + bytes(s[16..23])), 37) * K1)
259
+ y = lower64(rotate(lower64(y + vs + bytes(s[48..55])), 42) * K1)
260
+ x ^= ws
261
+ y ^= vf
262
+ z = rotate(z ^ wf, 33)
263
+ v = weakHashLen32WithSeeds(s, lower64(vs * K1), lower64(x+wf))
264
+ w = weakHashLen32WithSeeds(s[32..-1], lower64(z+ws), y)
265
+ vf, vs = higher64(v), lower64(v)
266
+ wf, ws = higher64(w), lower64(w)
267
+ z,x = x,z
268
+ s = s[64..-1]
269
+ len -= 128
270
+ end while len >= 128
271
+
272
+ y = lower64(y + rotate(wf, 37) * K0 + z)
273
+ x = lower64(x + rotate(lower64(vf + z), 49) * K0)
274
+ # If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
275
+ tail_done = 0
276
+ while tail_done < len do
277
+ tail_done += 32
278
+ y = lower64(rotate(lower64(y-x), 42) * K0 + vs)
279
+ wf = lower64(wf + bytes(orig_s[16-tail_done..23-tail_done]))
280
+ x = lower64(rotate(x, 49) * K0 + wf)
281
+ wf = lower64(wf + vf)
282
+ v = weakHashLen32WithSeeds(orig_s[-tail_done..-1], vf, vs)
283
+ vf, vs = higher64(v), lower64(v)
284
+ end
285
+ # At this point our 48 bytes of state should contain more than
286
+ # enough information for a strong 128-bit hash. We use two
287
+ # different 48-byte-to-8-byte hashes to get a 16-byte final result.
288
+ x = hashLen16(x, vf)
289
+ y = hashLen16(y, wf)
290
+ hf = lower64(hashLen16(lower64(x + vs), ws) + y)
291
+ hs = lower64(hashLen16(lower64(x + ws), lower64(y + vs)))
292
+ (hf << 64) | hs
293
+ end
294
+
295
+ # Internal interface routines for CityHash module
296
+ def self.hash64(s)
297
+ len = s.length
298
+ if len <= 16
299
+ return hashLen0To16(s)
300
+ elsif len <= 32
301
+ return hashLen17To32(s)
302
+ elsif len <= 64
303
+ return hashLen33To64(s)
304
+ else
305
+ return hashLenAbove64(s)
306
+ end
307
+ end
308
+
309
+ def self.hash64WithSeed(s, seed)
310
+ hash64WithSeeds(s, K2, seed)
311
+ end
312
+
313
+ def self.hash64WithSeeds(s, seed0, seed1)
314
+ hashLen16(lower64(hash64(s) - seed0), seed1)
315
+ end
316
+
317
+ def self.hash128(s)
318
+ len = s.length
319
+ if len >=16
320
+ seed = ((bytes(s[8..15]) << 64) | (bytes(s[0..7]) ^ K3))
321
+ return hash128WithSeed(s[16..-1], seed)
322
+ elsif len >= 8
323
+ seed = (bytes(s[-8..-1]) ^ K1) << 64
324
+ seed |= (bytes(s[0..7]) ^ lower64(len*K0))
325
+ return hash128WithSeed("", seed)
326
+ else
327
+ return hash128WithSeed(s, (K1<<64) | K0)
328
+ end
329
+ end
330
+
331
+ end # Module Internal
332
+ end # Module CityHash
@@ -0,0 +1,3 @@
1
+ module CityHash
2
+ VERSION = "0.0.1"
3
+ end
data/license.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 ashwinr
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/readme.md ADDED
@@ -0,0 +1,51 @@
1
+ # CityHash for Ruby
2
+
3
+ This is an implementation of Google's CityHash for Ruby. It supports both 64-bit and 128-bit hashes. The newer CityHashCrc routines have not yet been implemented. Please note that the code has not been optimized for speed.
4
+
5
+ ## Installing CityHash
6
+
7
+ Installing CityHash is as simple as
8
+
9
+ gem install CityHash
10
+
11
+ ## Using CityHash
12
+
13
+ require 'CityHash'
14
+
15
+ # Calculate a 64-bit hash
16
+ CityHash.hash64('New York City')
17
+
18
+ # Calculate a 64-bit hash with seed
19
+ CityHash.hash64('East Village', 0xef23)
20
+
21
+ # Calculate a 64-bit hash with two seeds
22
+ CityHash.hash64('Meatpacking', 0xba3c, 0x5acd)
23
+
24
+ # Calculate a 128-bit hash
25
+ CityHash.hash128('SoHo')
26
+
27
+ # Calculate a 128-bit hash with seed
28
+ CityHash.hash128('Upper West Side', 0x8ad1)
29
+
30
+ ## Testing CityHash
31
+
32
+ The test functions generate random strings and compare the outputs of both the C and Ruby implementations. The source for these strings is Dostoevsky's 'Crime and Punishment', obtained from Project Gutenberg and compressed within test.zip.
33
+
34
+ ### Prerequisites
35
+
36
+ Google's implementation of Cityhash must be installed on the test system, since the test routines link against libcityhash.
37
+
38
+ This 'city_hash' gem must already be installed on the test system.
39
+
40
+ ### Running the tests
41
+
42
+ cd test/
43
+ ./run.sh
44
+
45
+ ### Authors
46
+
47
+ Ashwin Ramaswamy
48
+
49
+ ### Copyright
50
+
51
+ Copyright (c) 2011 ashwinr. Please see license.txt for further details.
data/test/Makefile ADDED
@@ -0,0 +1,10 @@
1
+ CC= g++
2
+ LOCAL_LIBS= -lcityhash
3
+ OBJS= citymain.o
4
+ OUTPUT= city
5
+
6
+ city: citymain.o
7
+ $(CC) $(LOCAL_LIBS) $(OBJS) -o $(OUTPUT)
8
+
9
+ clean:
10
+ rm -f $(OUTPUT) $(OBJS)
data/test/citymain.cc ADDED
@@ -0,0 +1,97 @@
1
+ #include <iostream>
2
+ #include <string>
3
+ #include <sstream>
4
+ #include <iomanip>
5
+ #include "city.h"
6
+
7
+ void usage(char** argv)
8
+ {
9
+ std::cout << "Usage: " << argv[0] << " <hash function> <seed1> <seed2> <hash string>" << std::endl;
10
+ std::cout << "hashfunction = 1, for CityHash64" << std::endl;
11
+ std::cout << " = 2, for CityHash64WithSeed" << std::endl;
12
+ std::cout << " = 3, for CityHash64WithSeeds" << std::endl;
13
+ std::cout << " = 4, for CityHash128" << std::endl;
14
+ std::cout << " = 5, for CityHash128WithSeed" << std::endl;
15
+ exit(-1);
16
+ }
17
+
18
+ int main(int argc, char** argv)
19
+ {
20
+ if(argc < 3)
21
+ {
22
+ usage(argv);
23
+ }
24
+
25
+ std::stringstream hss, ss1, ss2;
26
+ int hashFunction = -1;
27
+ uint64 seed1, seed2;
28
+ uint128 seed128;
29
+ std::string hashString;
30
+ hss << argv[1], hss >> hashFunction;
31
+ switch(hashFunction)
32
+ {
33
+ case 1:
34
+ case 4:
35
+ hashString = argv[2];
36
+ break;
37
+
38
+ case 2:
39
+ if(argc != 4)
40
+ usage(argv);
41
+ ss1 << argv[2], ss1 >> seed1;
42
+ hashString = argv[3];
43
+ break;
44
+
45
+ case 3:
46
+ case 5:
47
+ if(argc != 5)
48
+ usage(argv);
49
+ ss1 << argv[2], ss1 >> seed1;
50
+ ss2 << argv[3], ss2 >> seed2;
51
+ hashString = argv[4];
52
+ break;
53
+ }
54
+
55
+ uint64 hash64;
56
+ uint128 hash128;
57
+ switch(hashFunction)
58
+ {
59
+ case 1:
60
+ hash64 = CityHash64(hashString.c_str(),
61
+ hashString.length());
62
+ break;
63
+
64
+ case 2:
65
+ hash64 = CityHash64WithSeed(hashString.c_str(),
66
+ hashString.length(), seed1);
67
+ break;
68
+
69
+ case 3:
70
+ hash64 = CityHash64WithSeeds(hashString.c_str(),
71
+ hashString.length(),
72
+ seed1, seed2);
73
+ break;
74
+
75
+ case 4:
76
+ hash128 = CityHash128(hashString.c_str(), hashString.length());
77
+ break;
78
+
79
+ case 5:
80
+ seed128 = uint128(seed1, seed2);
81
+ hash128 = CityHash128WithSeed(hashString.c_str(),
82
+ hashString.length(), seed128);
83
+ break;
84
+ }
85
+
86
+ if(hashFunction <= 3)
87
+ {
88
+ std::cout << "0x" << std::hex << hash64 << std::endl;
89
+ }
90
+ else
91
+ {
92
+ std::cout << "0x" << std::hex << hash128.first << std::setfill('0')
93
+ << std::setw(16) << hash128.second << std::endl;
94
+ }
95
+
96
+ return 0;
97
+ }
data/test/run.sh ADDED
@@ -0,0 +1,18 @@
1
+ #!/bin/sh
2
+
3
+ # Compile the C version of city
4
+
5
+ make
6
+
7
+ # Test whether 'city' exists
8
+
9
+ if [ ! -f './city' ]
10
+ then
11
+ echo "./city does not exist. Exiting..."
12
+ exit -1
13
+ fi
14
+
15
+ # Run the comparison tests
16
+
17
+ ruby tc_rcity.rb
18
+
data/test/tc_rcity.rb ADDED
@@ -0,0 +1,96 @@
1
+ #!/usr/local/bin/ruby
2
+
3
+ require 'city_hash'
4
+ require 'test/unit'
5
+ require 'zip/zip'
6
+
7
+ =begin
8
+ Run a gamut of test strings against both Google's C++ and
9
+ our Ruby implementation, and verify the results.
10
+ The test verifies both 64 and 128-bit hashes with and without
11
+ random seeds for strings of length from 1 to 2K.
12
+ The strings are sourced in randomly from 'Crime and Punishment'
13
+ obtained from Project Gutenberg.
14
+ =end
15
+
16
+ class TestCityHash < Test::Unit::TestCase
17
+ def initialize(testFunction)
18
+ super testFunction
19
+ puts 'Unzipping contents of test.zip'
20
+ @files = []
21
+ Zip::ZipFile::open('./test.zip') do |zf|
22
+ zf.each { |file|
23
+ fpath = File.join('/tmp', file.name)
24
+ FileUtils.mkdir_p(File.dirname(fpath))
25
+ zf.extract(file, fpath) unless File.exist?(fpath)
26
+ @files.push(File.new(fpath)) if fpath =~ /txt$/
27
+ }
28
+ end
29
+ end
30
+
31
+ def getRandomString(file, len)
32
+ size = file.size
33
+ begin
34
+ offset = rand(size)
35
+ end while offset+len >= size
36
+ file.pos = offset
37
+ file.read(len)
38
+ end
39
+
40
+ def getHash(function, seed1, seed2, s)
41
+ hash = -1
42
+ case function
43
+ when 1
44
+ hash = CityHash.hash64(s)
45
+ when 2
46
+ hash = CityHash.hash64(s, seed1)
47
+ when 3
48
+ hash = CityHash.hash64(s, seed1, seed2)
49
+ when 4
50
+ hash = CityHash.hash128(s)
51
+ else
52
+ hash = CityHash.hash128(s, (seed2 << 64) | seed1)
53
+ end
54
+ hash
55
+ end
56
+
57
+ def test_city_hash
58
+ max_int_64 = 2**64-1
59
+ puts 'Running tests'
60
+ start = Time.now
61
+ ffile = File.new('failures.txt', 'w')
62
+ for i in 1..2048 # length of hash string
63
+ for j in 1..2 # number of iterations
64
+ for k in 1..5 # all hash functions
65
+ seed1 = rand(max_int_64)
66
+ seed2 = rand(max_int_64)
67
+ file = @files[0] # only a single test file
68
+ string = getRandomString(file, i)
69
+ # Remove any unicode characters
70
+ string.gsub!(/[\x80-\xff]/,"")
71
+ # Escape a couple of shell characters (anything else missing?)
72
+ cstring = string.gsub("\"", "\\\"")
73
+ cstring = cstring.gsub("$", "\\$")
74
+ # Calculate Google's C++ hash
75
+ cityArgs = "#{k} "
76
+ if(k == 1 || k == 4)
77
+ cityArgs += "\"#{cstring}\""
78
+ elsif (k == 2)
79
+ cityArgs += "#{seed1} \"#{cstring}\""
80
+ else
81
+ cityArgs += "#{seed1} #{seed2} \"#{cstring}\""
82
+ end
83
+ cHex = `./city #{cityArgs}`
84
+ cHex = cHex.hex
85
+ # Calculate our Ruby hash
86
+ rHex = getHash(k, seed1, seed2, string)
87
+ # Verify hashes
88
+ ffile.puts "Failed hash function #{k} for string \"#{string}\" with hashes #{cHex} and #{rHex}" if(cHex != rHex)
89
+ assert(cHex == rHex)
90
+ end
91
+ end
92
+ end
93
+ elapsed = (Time.now - start)/60.0
94
+ end
95
+
96
+ end
data/test/test.zip ADDED
Binary file
metadata ADDED
@@ -0,0 +1,107 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: city_hash
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Ashwin Ramaswamy
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-05-17 00:00:00 -04:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: test-unit
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :development
32
+ version_requirements: *id001
33
+ - !ruby/object:Gem::Dependency
34
+ name: rubyzip
35
+ prerelease: false
36
+ requirement: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 0
43
+ version: "0"
44
+ type: :development
45
+ version_requirements: *id002
46
+ description: Google's CityHash Implementation in Ruby
47
+ email:
48
+ - ashwin.raman9@gmail.com
49
+ executables: []
50
+
51
+ extensions: []
52
+
53
+ extra_rdoc_files: []
54
+
55
+ files:
56
+ - .gitignore
57
+ - Gemfile
58
+ - Gemfile.lock
59
+ - Rakefile
60
+ - city_hash.gemspec
61
+ - lib/city_hash.rb
62
+ - lib/city_hash/version.rb
63
+ - license.txt
64
+ - readme.md
65
+ - test/Makefile
66
+ - test/citymain.cc
67
+ - test/run.sh
68
+ - test/tc_rcity.rb
69
+ - test/test.zip
70
+ has_rdoc: true
71
+ homepage: ""
72
+ licenses: []
73
+
74
+ post_install_message:
75
+ rdoc_options: []
76
+
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ segments:
85
+ - 0
86
+ version: "0"
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ segments:
93
+ - 0
94
+ version: "0"
95
+ requirements: []
96
+
97
+ rubyforge_project: city_hash
98
+ rubygems_version: 1.3.7
99
+ signing_key:
100
+ specification_version: 3
101
+ summary: CityHash for Ruby
102
+ test_files:
103
+ - test/Makefile
104
+ - test/citymain.cc
105
+ - test/run.sh
106
+ - test/tc_rcity.rb
107
+ - test/test.zip