city_hash 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in city_hash.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,18 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ city_hash (0.0.1)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ rubyzip (0.9.4)
10
+ test-unit (2.3.0)
11
+
12
+ PLATFORMS
13
+ ruby
14
+
15
+ DEPENDENCIES
16
+ city_hash!
17
+ rubyzip
18
+ test-unit
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
data/city_hash.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "city_hash/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "city_hash"
7
+ s.version = CityHash::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Ashwin Ramaswamy"]
10
+ s.email = ["ashwin.raman9@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{CityHash for Ruby}
13
+ s.description = %q{Google's CityHash Implementation in Ruby}
14
+
15
+ s.rubyforge_project = "city_hash"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_development_dependency('test-unit')
23
+ s.add_development_dependency('rubyzip')
24
+ end
data/lib/city_hash.rb ADDED
@@ -0,0 +1,332 @@
1
+ # All source comments are duplicated from Google's CityHash (1.0.2)
2
+ # implementation at: http://code.google.com/p/cityhash/
3
+
4
+ module CityHash
5
+
6
+ def self.hash64(s, seed0 = nil, seed1 = nil)
7
+ return CityHash::Internal.hash64(s) if seed0.nil?
8
+ return CityHash::Internal.hash64WithSeed(s, seed0) if seed1.nil?
9
+ return CityHash::Internal.hash64WithSeeds(s, seed0, seed1)
10
+ end
11
+
12
+ def self.hash128(s, seed = nil)
13
+ return CityHash::Internal.hash128(s) if seed.nil?
14
+ return CityHash::Internal.hash128WithSeed(s, seed)
15
+ end
16
+
17
+ module Internal
18
+
19
+ # Some primes between 2^63 and 2^64 for various uses
20
+ K0 = 0xc3a5c85c97cb3127
21
+ K1 = 0xb492b66fbe98f273
22
+ K2 = 0x9ae16a3b2f90404f
23
+ K3 = 0xc949d7c7509e6557
24
+
25
+ def self.lower32(x)
26
+ x & 0xffffffff
27
+ end
28
+
29
+ def self.lower64(x)
30
+ x & 0xffffffffffffffff
31
+ end
32
+
33
+ def self.higher64(x)
34
+ x >> 64
35
+ end
36
+
37
+ # Return the hex-equivalent of byte-string
38
+ def self.bytes(s)
39
+ h = 0x0
40
+ s.reverse.bytes do |b|
41
+ h <<= 8
42
+ h |= b
43
+ end
44
+ h
45
+ end
46
+
47
+ # Hash 128 input bits down to 64 bits of output.
48
+ # This is intended to be a reasonably good hash function.
49
+ def self.hash128To64(x)
50
+ # Murmur-inspired hashing.
51
+ kMul = 0x9ddfea08eb382d69
52
+ a = lower64((lower64(x) ^ higher64(x)) * kMul)
53
+ a ^= (a >> 47)
54
+ b = lower64((higher64(x) ^ a) * kMul)
55
+ b ^= (b >> 47)
56
+ b = b * kMul
57
+ lower64(b)
58
+ end
59
+
60
+ # Bitwise right rotate
61
+ def self.rotate(val, shift)
62
+ return val if shift == 0
63
+ (val >> shift) | lower64((val << (64-shift)))
64
+ end
65
+
66
+ # Equivalent to rotate(...), but requires the second arg to be non-zero.
67
+ def self.rotateByAtleast1(val, shift)
68
+ (val >> shift) | lower64((val << (64-shift)))
69
+ end
70
+
71
+ def self.shiftMix(val)
72
+ lower64(val ^ (val >> 47))
73
+ end
74
+
75
+ def self.hashLen16(u, v)
76
+ uv = (v << 64) | u
77
+ hash128To64(uv)
78
+ end
79
+
80
+ def self.hashLen0To16(s)
81
+ len = s.length
82
+ if len > 8
83
+ a = bytes(s[0..7])
84
+ b = bytes(s[-8..-1])
85
+ return hashLen16(a, rotateByAtleast1(b+len, len)) ^ b
86
+ elsif len >= 4
87
+ a = bytes(s[0..3])
88
+ return hashLen16(len + (a << 3), bytes(s[-4..-1]))
89
+ elsif len > 0
90
+ a = bytes(s[0])
91
+ b = bytes(s[len >> 1])
92
+ c = bytes(s[len-1])
93
+ y = lower32(a + (b << 8))
94
+ z = len + c*4
95
+ return lower64(shiftMix(lower64(y * K2 ^ z * K3)) * K2)
96
+ end
97
+ K2
98
+ end
99
+
100
+ # This probably works well for 16-byte strings as well, but it may be overkill
101
+ # in that case.
102
+ def self.hashLen17To32(s)
103
+ a = lower64(bytes(s[0..7]) * K1)
104
+ b = bytes(s[8..15])
105
+ c = lower64(bytes(s[-8..-1]) * K2)
106
+ d = lower64(bytes(s[-16..-9]) * K0)
107
+ hashLen16(lower64(rotate(lower64(a-b), 43) + rotate(c, 30) + d),
108
+ lower64(a + rotate(b ^ K3, 20) - c) + s.length)
109
+ end
110
+
111
+ # Return a 16-byte hash for 48 bytes. Quick and dirty.
112
+ # Callers do best to use "random-looking" values for a and b.
113
+ def self._weakHashLen32WithSeeds(w, x, y, z, a, b)
114
+ a += w
115
+ b = rotate(lower64(b+a+z), 21)
116
+ c = a
117
+ a += x
118
+ a = lower64(a+y)
119
+ b += rotate(a, 44)
120
+ lower64(a+z) << 64 | lower64(b+c)
121
+ end
122
+
123
+ # Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
124
+ def self.weakHashLen32WithSeeds(s, a, b)
125
+ _weakHashLen32WithSeeds(bytes(s[0..7]),
126
+ bytes(s[8..15]),
127
+ bytes(s[16..23]),
128
+ bytes(s[24..31]),
129
+ a,
130
+ b)
131
+ end
132
+
133
+ # Return an 8-byte hash for 33 to 64 bytes.
134
+ def self.hashLen33To64(s)
135
+ len = s.length
136
+ z = bytes(s[24..31])
137
+ a = bytes(s[0..7]) + (len + bytes(s[-16..-9])) * K0
138
+ a = lower64(a)
139
+ b = rotate(lower64(a+z), 52)
140
+ c = rotate(a, 37)
141
+ a = lower64(a+bytes(s[8..15]))
142
+ c = lower64(c+rotate(a, 7))
143
+ a = lower64(a+bytes(s[16..23]))
144
+ vf = lower64(a+z)
145
+ vs = lower64(b + rotate(a, 31) + c)
146
+ a = bytes(s[16..23]) + bytes(s[-32..-25])
147
+ z = bytes(s[-8..-1])
148
+ b = rotate(lower64(a+z), 52)
149
+ c = rotate(a, 37)
150
+ a = lower64(a+bytes(s[-24..-17]))
151
+ c = lower64(c+rotate(a, 7))
152
+ a = lower64(a+bytes(s[-16..-9]))
153
+ wf = lower64(a+z)
154
+ ws = lower64(b + rotate(a, 31) + c)
155
+ r = shiftMix( lower64((vf + ws) * K2 + (wf + vs) * K0) )
156
+ lower64( shiftMix(lower64(r*K0+vs)) * K2)
157
+ end
158
+
159
+ def self.hashLenAbove64(s)
160
+ len = s.length
161
+ # For strings over 64 bytes we hash the end first, and then as we
162
+ # loop we keep 56 bytes of state: v, w, x, y, and z.
163
+ x = bytes(s[0..7])
164
+ y = bytes(s[-16..-9]) ^ K1
165
+ z = bytes(s[-56..-49]) ^ K0
166
+ v = weakHashLen32WithSeeds(s[-64..-1], len, y)
167
+ w = weakHashLen32WithSeeds(s[-32..-1], lower64(len*K1), K0)
168
+
169
+ z = lower64(z + shiftMix(lower64(v)) * K1)
170
+ x = lower64(rotate(lower64(z+x), 39) * K1)
171
+ y = lower64(rotate(y, 33) * K1)
172
+
173
+ # Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
174
+ len = (len - 1) & ~63;
175
+ begin
176
+ xrv = lower64(x + y + higher64(v) + bytes(s[16..23]))
177
+ yrv = lower64(y + lower64(v) + bytes(s[48..55]))
178
+ x = lower64(rotate(xrv, 37) * K1)
179
+ y = lower64(rotate(yrv, 42) * K1)
180
+ x ^= lower64(w)
181
+ y ^= higher64(v)
182
+ z = rotate(z ^ higher64(w), 33)
183
+ v = weakHashLen32WithSeeds(s, lower64(lower64(v) * K1), lower64(x + higher64(w)))
184
+ w = weakHashLen32WithSeeds(s[32..-1], lower64(z + lower64(w)), y)
185
+ z, x = x, z
186
+ s = s[64..-1]
187
+ len -= 64
188
+ end while len != 0
189
+
190
+ hashLen16(lower64(hashLen16(higher64(v), higher64(w)) + shiftMix(y) * K1 + z),
191
+ lower64(hashLen16(lower64(v), lower64(w)) + x))
192
+ end
193
+
194
+ # A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
195
+ # of any length representable in ssize_t. Based on City and Murmur.
196
+ def self.cityMurmur(s, seed)
197
+ len = s.length
198
+ a = lower64(seed)
199
+ b = higher64(seed)
200
+ c,d = 0, 0
201
+ l = s.length - 16
202
+ if l <=0 then
203
+ a = lower64(shiftMix(lower64(a * K1)) * K1)
204
+ c = lower64(b*K1 + hashLen0To16(s))
205
+ d = shiftMix(lower64(a + (len >=8 ? bytes(s[0..7]) : c)))
206
+ else
207
+ c = hashLen16(lower64(bytes(s[-8..-1]) + K1), a)
208
+ d = hashLen16(lower64(b+len), lower64(c + bytes(s[-16..-9])))
209
+ a = lower64(a+d)
210
+ begin
211
+ a ^= lower64(shiftMix(lower64(bytes(s[0..7]) * K1)) * K1)
212
+ a = lower64(a*K1)
213
+ b ^= a
214
+ c ^= lower64(shiftMix(lower64(bytes(s[8..15]) * K1)) * K1)
215
+ c = lower64(c*K1)
216
+ d ^= c
217
+ s = s[16..-1]
218
+ l -= 16
219
+ end while l > 0
220
+ end
221
+ a = hashLen16(a, c)
222
+ b = hashLen16(d, b)
223
+ ((a^b) << 64) | hashLen16(b, a)
224
+ end
225
+
226
+ def self.hash128WithSeed(s, seed)
227
+ # Create a copy of the input string
228
+ orig_s = String.new(s)
229
+ len = s.length
230
+ return cityMurmur(s, seed) if len < 128
231
+
232
+ # We expect len >= 128 to be the common case. Keep 56 bytes of state:
233
+ # v, w, x, y, and z.
234
+ x = lower64(seed)
235
+ y = higher64(seed)
236
+ z = lower64(len * K1)
237
+ vf = lower64(lower64(rotate(y ^ K1, 49) * K1) + bytes(s[0..7]))
238
+ vs = lower64(lower64(rotate(vf, 42) * K1) + bytes(s[8..15]))
239
+ wf = lower64(lower64(rotate(lower64(y+z), 35) * K1) + x)
240
+ ws = lower64(rotate(lower64(x + bytes(s[88..95])), 53) * K1)
241
+ v = (vf << 64) | vs
242
+ w = (wf << 64) | ws
243
+
244
+ # This is the same inner loop as CityHash64(), manually unrolled.
245
+ begin
246
+ x = lower64(rotate(lower64(x + y + vf + bytes(s[16..23])), 37) * K1)
247
+ y = lower64(rotate(lower64(y + vs + bytes(s[48..55])), 42) * K1)
248
+ x ^= ws
249
+ y ^= vf
250
+ z = rotate(z ^ wf, 33)
251
+ v = weakHashLen32WithSeeds(s, lower64(vs * K1), lower64(x+wf))
252
+ w = weakHashLen32WithSeeds(s[32..-1], lower64(z+ws), y)
253
+ vf, vs = higher64(v), lower64(v)
254
+ wf, ws = higher64(w), lower64(w)
255
+ z,x = x,z
256
+ s = s[64..-1]
257
+
258
+ x = lower64(rotate(lower64(x + y + vf + bytes(s[16..23])), 37) * K1)
259
+ y = lower64(rotate(lower64(y + vs + bytes(s[48..55])), 42) * K1)
260
+ x ^= ws
261
+ y ^= vf
262
+ z = rotate(z ^ wf, 33)
263
+ v = weakHashLen32WithSeeds(s, lower64(vs * K1), lower64(x+wf))
264
+ w = weakHashLen32WithSeeds(s[32..-1], lower64(z+ws), y)
265
+ vf, vs = higher64(v), lower64(v)
266
+ wf, ws = higher64(w), lower64(w)
267
+ z,x = x,z
268
+ s = s[64..-1]
269
+ len -= 128
270
+ end while len >= 128
271
+
272
+ y = lower64(y + rotate(wf, 37) * K0 + z)
273
+ x = lower64(x + rotate(lower64(vf + z), 49) * K0)
274
+ # If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
275
+ tail_done = 0
276
+ while tail_done < len do
277
+ tail_done += 32
278
+ y = lower64(rotate(lower64(y-x), 42) * K0 + vs)
279
+ wf = lower64(wf + bytes(orig_s[16-tail_done..23-tail_done]))
280
+ x = lower64(rotate(x, 49) * K0 + wf)
281
+ wf = lower64(wf + vf)
282
+ v = weakHashLen32WithSeeds(orig_s[-tail_done..-1], vf, vs)
283
+ vf, vs = higher64(v), lower64(v)
284
+ end
285
+ # At this point our 48 bytes of state should contain more than
286
+ # enough information for a strong 128-bit hash. We use two
287
+ # different 48-byte-to-8-byte hashes to get a 16-byte final result.
288
+ x = hashLen16(x, vf)
289
+ y = hashLen16(y, wf)
290
+ hf = lower64(hashLen16(lower64(x + vs), ws) + y)
291
+ hs = lower64(hashLen16(lower64(x + ws), lower64(y + vs)))
292
+ (hf << 64) | hs
293
+ end
294
+
295
+ # Internal interface routines for CityHash module
296
+ def self.hash64(s)
297
+ len = s.length
298
+ if len <= 16
299
+ return hashLen0To16(s)
300
+ elsif len <= 32
301
+ return hashLen17To32(s)
302
+ elsif len <= 64
303
+ return hashLen33To64(s)
304
+ else
305
+ return hashLenAbove64(s)
306
+ end
307
+ end
308
+
309
+ def self.hash64WithSeed(s, seed)
310
+ hash64WithSeeds(s, K2, seed)
311
+ end
312
+
313
+ def self.hash64WithSeeds(s, seed0, seed1)
314
+ hashLen16(lower64(hash64(s) - seed0), seed1)
315
+ end
316
+
317
+ def self.hash128(s)
318
+ len = s.length
319
+ if len >=16
320
+ seed = ((bytes(s[8..15]) << 64) | (bytes(s[0..7]) ^ K3))
321
+ return hash128WithSeed(s[16..-1], seed)
322
+ elsif len >= 8
323
+ seed = (bytes(s[-8..-1]) ^ K1) << 64
324
+ seed |= (bytes(s[0..7]) ^ lower64(len*K0))
325
+ return hash128WithSeed("", seed)
326
+ else
327
+ return hash128WithSeed(s, (K1<<64) | K0)
328
+ end
329
+ end
330
+
331
+ end # Module Internal
332
+ end # Module CityHash
@@ -0,0 +1,3 @@
1
+ module CityHash
2
+ VERSION = "0.0.1"
3
+ end
data/license.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 ashwinr
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/readme.md ADDED
@@ -0,0 +1,51 @@
1
+ # CityHash for Ruby
2
+
3
+ This is an implementation of Google's CityHash for Ruby. It supports both 64-bit and 128-bit hashes. The newer CityHashCrc routines have not yet been implemented. Please note that the code has not been optimized for speed.
4
+
5
+ ## Installing CityHash
6
+
7
+ Installing CityHash is as simple as
8
+
9
+ gem install CityHash
10
+
11
+ ## Using CityHash
12
+
13
+ require 'CityHash'
14
+
15
+ # Calculate a 64-bit hash
16
+ CityHash.hash64('New York City')
17
+
18
+ # Calculate a 64-bit hash with seed
19
+ CityHash.hash64('East Village', 0xef23)
20
+
21
+ # Calculate a 64-bit hash with two seeds
22
+ CityHash.hash64('Meatpacking', 0xba3c, 0x5acd)
23
+
24
+ # Calculate a 128-bit hash
25
+ CityHash.hash128('SoHo')
26
+
27
+ # Calculate a 128-bit hash with seed
28
+ CityHash.hash128('Upper West Side', 0x8ad1)
29
+
30
+ ## Testing CityHash
31
+
32
+ The test functions generate random strings and compare the outputs of both the C and Ruby implementations. The source for these strings is Dostoevsky's 'Crime and Punishment', obtained from Project Gutenberg and compressed within test.zip.
33
+
34
+ ### Prerequisites
35
+
36
+ Google's implementation of Cityhash must be installed on the test system, since the test routines link against libcityhash.
37
+
38
+ This 'city_hash' gem must already be installed on the test system.
39
+
40
+ ### Running the tests
41
+
42
+ cd test/
43
+ ./run.sh
44
+
45
+ ### Authors
46
+
47
+ Ashwin Ramaswamy
48
+
49
+ ### Copyright
50
+
51
+ Copyright (c) 2011 ashwinr. Please see license.txt for further details.
data/test/Makefile ADDED
@@ -0,0 +1,10 @@
1
+ CC= g++
2
+ LOCAL_LIBS= -lcityhash
3
+ OBJS= citymain.o
4
+ OUTPUT= city
5
+
6
+ city: citymain.o
7
+ $(CC) $(LOCAL_LIBS) $(OBJS) -o $(OUTPUT)
8
+
9
+ clean:
10
+ rm -f $(OUTPUT) $(OBJS)
data/test/citymain.cc ADDED
@@ -0,0 +1,97 @@
1
+ #include <iostream>
2
+ #include <string>
3
+ #include <sstream>
4
+ #include <iomanip>
5
+ #include "city.h"
6
+
7
+ void usage(char** argv)
8
+ {
9
+ std::cout << "Usage: " << argv[0] << " <hash function> <seed1> <seed2> <hash string>" << std::endl;
10
+ std::cout << "hashfunction = 1, for CityHash64" << std::endl;
11
+ std::cout << " = 2, for CityHash64WithSeed" << std::endl;
12
+ std::cout << " = 3, for CityHash64WithSeeds" << std::endl;
13
+ std::cout << " = 4, for CityHash128" << std::endl;
14
+ std::cout << " = 5, for CityHash128WithSeed" << std::endl;
15
+ exit(-1);
16
+ }
17
+
18
+ int main(int argc, char** argv)
19
+ {
20
+ if(argc < 3)
21
+ {
22
+ usage(argv);
23
+ }
24
+
25
+ std::stringstream hss, ss1, ss2;
26
+ int hashFunction = -1;
27
+ uint64 seed1, seed2;
28
+ uint128 seed128;
29
+ std::string hashString;
30
+ hss << argv[1], hss >> hashFunction;
31
+ switch(hashFunction)
32
+ {
33
+ case 1:
34
+ case 4:
35
+ hashString = argv[2];
36
+ break;
37
+
38
+ case 2:
39
+ if(argc != 4)
40
+ usage(argv);
41
+ ss1 << argv[2], ss1 >> seed1;
42
+ hashString = argv[3];
43
+ break;
44
+
45
+ case 3:
46
+ case 5:
47
+ if(argc != 5)
48
+ usage(argv);
49
+ ss1 << argv[2], ss1 >> seed1;
50
+ ss2 << argv[3], ss2 >> seed2;
51
+ hashString = argv[4];
52
+ break;
53
+ }
54
+
55
+ uint64 hash64;
56
+ uint128 hash128;
57
+ switch(hashFunction)
58
+ {
59
+ case 1:
60
+ hash64 = CityHash64(hashString.c_str(),
61
+ hashString.length());
62
+ break;
63
+
64
+ case 2:
65
+ hash64 = CityHash64WithSeed(hashString.c_str(),
66
+ hashString.length(), seed1);
67
+ break;
68
+
69
+ case 3:
70
+ hash64 = CityHash64WithSeeds(hashString.c_str(),
71
+ hashString.length(),
72
+ seed1, seed2);
73
+ break;
74
+
75
+ case 4:
76
+ hash128 = CityHash128(hashString.c_str(), hashString.length());
77
+ break;
78
+
79
+ case 5:
80
+ seed128 = uint128(seed1, seed2);
81
+ hash128 = CityHash128WithSeed(hashString.c_str(),
82
+ hashString.length(), seed128);
83
+ break;
84
+ }
85
+
86
+ if(hashFunction <= 3)
87
+ {
88
+ std::cout << "0x" << std::hex << hash64 << std::endl;
89
+ }
90
+ else
91
+ {
92
+ std::cout << "0x" << std::hex << hash128.first << std::setfill('0')
93
+ << std::setw(16) << hash128.second << std::endl;
94
+ }
95
+
96
+ return 0;
97
+ }
data/test/run.sh ADDED
@@ -0,0 +1,18 @@
1
+ #!/bin/sh
2
+
3
+ # Compile the C version of city
4
+
5
+ make
6
+
7
+ # Test whether 'city' exists
8
+
9
+ if [ ! -f './city' ]
10
+ then
11
+ echo "./city does not exist. Exiting..."
12
+ exit -1
13
+ fi
14
+
15
+ # Run the comparison tests
16
+
17
+ ruby tc_rcity.rb
18
+
data/test/tc_rcity.rb ADDED
@@ -0,0 +1,96 @@
1
+ #!/usr/local/bin/ruby
2
+
3
+ require 'city_hash'
4
+ require 'test/unit'
5
+ require 'zip/zip'
6
+
7
+ =begin
8
+ Run a gamut of test strings against both Google's C++ and
9
+ our Ruby implementation, and verify the results.
10
+ The test verifies both 64 and 128-bit hashes with and without
11
+ random seeds for strings of length from 1 to 2K.
12
+ The strings are sourced in randomly from 'Crime and Punishment'
13
+ obtained from Project Gutenberg.
14
+ =end
15
+
16
+ class TestCityHash < Test::Unit::TestCase
17
+ def initialize(testFunction)
18
+ super testFunction
19
+ puts 'Unzipping contents of test.zip'
20
+ @files = []
21
+ Zip::ZipFile::open('./test.zip') do |zf|
22
+ zf.each { |file|
23
+ fpath = File.join('/tmp', file.name)
24
+ FileUtils.mkdir_p(File.dirname(fpath))
25
+ zf.extract(file, fpath) unless File.exist?(fpath)
26
+ @files.push(File.new(fpath)) if fpath =~ /txt$/
27
+ }
28
+ end
29
+ end
30
+
31
+ def getRandomString(file, len)
32
+ size = file.size
33
+ begin
34
+ offset = rand(size)
35
+ end while offset+len >= size
36
+ file.pos = offset
37
+ file.read(len)
38
+ end
39
+
40
+ def getHash(function, seed1, seed2, s)
41
+ hash = -1
42
+ case function
43
+ when 1
44
+ hash = CityHash.hash64(s)
45
+ when 2
46
+ hash = CityHash.hash64(s, seed1)
47
+ when 3
48
+ hash = CityHash.hash64(s, seed1, seed2)
49
+ when 4
50
+ hash = CityHash.hash128(s)
51
+ else
52
+ hash = CityHash.hash128(s, (seed2 << 64) | seed1)
53
+ end
54
+ hash
55
+ end
56
+
57
+ def test_city_hash
58
+ max_int_64 = 2**64-1
59
+ puts 'Running tests'
60
+ start = Time.now
61
+ ffile = File.new('failures.txt', 'w')
62
+ for i in 1..2048 # length of hash string
63
+ for j in 1..2 # number of iterations
64
+ for k in 1..5 # all hash functions
65
+ seed1 = rand(max_int_64)
66
+ seed2 = rand(max_int_64)
67
+ file = @files[0] # only a single test file
68
+ string = getRandomString(file, i)
69
+ # Remove any unicode characters
70
+ string.gsub!(/[\x80-\xff]/,"")
71
+ # Escape a couple of shell characters (anything else missing?)
72
+ cstring = string.gsub("\"", "\\\"")
73
+ cstring = cstring.gsub("$", "\\$")
74
+ # Calculate Google's C++ hash
75
+ cityArgs = "#{k} "
76
+ if(k == 1 || k == 4)
77
+ cityArgs += "\"#{cstring}\""
78
+ elsif (k == 2)
79
+ cityArgs += "#{seed1} \"#{cstring}\""
80
+ else
81
+ cityArgs += "#{seed1} #{seed2} \"#{cstring}\""
82
+ end
83
+ cHex = `./city #{cityArgs}`
84
+ cHex = cHex.hex
85
+ # Calculate our Ruby hash
86
+ rHex = getHash(k, seed1, seed2, string)
87
+ # Verify hashes
88
+ ffile.puts "Failed hash function #{k} for string \"#{string}\" with hashes #{cHex} and #{rHex}" if(cHex != rHex)
89
+ assert(cHex == rHex)
90
+ end
91
+ end
92
+ end
93
+ elapsed = (Time.now - start)/60.0
94
+ end
95
+
96
+ end
data/test/test.zip ADDED
Binary file
metadata ADDED
@@ -0,0 +1,107 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: city_hash
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Ashwin Ramaswamy
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-05-17 00:00:00 -04:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: test-unit
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :development
32
+ version_requirements: *id001
33
+ - !ruby/object:Gem::Dependency
34
+ name: rubyzip
35
+ prerelease: false
36
+ requirement: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 0
43
+ version: "0"
44
+ type: :development
45
+ version_requirements: *id002
46
+ description: Google's CityHash Implementation in Ruby
47
+ email:
48
+ - ashwin.raman9@gmail.com
49
+ executables: []
50
+
51
+ extensions: []
52
+
53
+ extra_rdoc_files: []
54
+
55
+ files:
56
+ - .gitignore
57
+ - Gemfile
58
+ - Gemfile.lock
59
+ - Rakefile
60
+ - city_hash.gemspec
61
+ - lib/city_hash.rb
62
+ - lib/city_hash/version.rb
63
+ - license.txt
64
+ - readme.md
65
+ - test/Makefile
66
+ - test/citymain.cc
67
+ - test/run.sh
68
+ - test/tc_rcity.rb
69
+ - test/test.zip
70
+ has_rdoc: true
71
+ homepage: ""
72
+ licenses: []
73
+
74
+ post_install_message:
75
+ rdoc_options: []
76
+
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ segments:
85
+ - 0
86
+ version: "0"
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ segments:
93
+ - 0
94
+ version: "0"
95
+ requirements: []
96
+
97
+ rubyforge_project: city_hash
98
+ rubygems_version: 1.3.7
99
+ signing_key:
100
+ specification_version: 3
101
+ summary: CityHash for Ruby
102
+ test_files:
103
+ - test/Makefile
104
+ - test/citymain.cc
105
+ - test/run.sh
106
+ - test/tc_rcity.rb
107
+ - test/test.zip