nilsimsa 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,18 @@
1
+ nilsimsa
2
+ --------
3
+ Nilsimsa is a distance based hash, which is the opposite of more familiar
4
+ hashes like MD5. Instead of small changes making a large difference in
5
+ the resulting hash (to avoid collisions), distance based hashes cause
6
+ similar values to have similar output. This is good for detecting near
7
+ similar documents without having to store the original text.
8
+
9
+ Standard usage is as follows:
10
+
11
+ require 'nilsimsa'
12
+
13
+ n1 = Nilsimsa::new
14
+ text1 = "The quick brown fox"
15
+ n1.update(text1)
16
+ puts "Text '#{text1}': #{n1.hexdigest}"
17
+
18
+
data/bin/nilsimsa ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'nilsimsa'
4
+
5
+ if ARGV.size > 0 then
6
+ ARGV.each do |filename|
7
+ if FileTest::exists?(filename) then
8
+ n = Nilsimsa::new
9
+ n.file(filename)
10
+ puts n.hexdigest+" #{filename}"
11
+ else
12
+ puts "error: can't find '#{filename}'"
13
+ end
14
+ end
15
+ else
16
+ puts "Specify a file to hash"
17
+ end
18
+
@@ -0,0 +1,35 @@
1
+ require 'nilsimsa'
2
+
3
+ # Levenshtein implementation from
4
+ # http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance
5
+ # Used under the GNU Free Documentation license
6
+ class String
7
+ def levenshtein(other)
8
+ a, b = self.unpack('U*'), other.unpack('U*')
9
+ n, m = a.length, b.length
10
+ a, b, n, m = b, a, m, n if n > m
11
+ current = [*0..n]
12
+ 1.upto(m) do |i|
13
+ previous, current = current, [i]+[0]*n
14
+ 1.upto(n) do |j|
15
+ add, delete = previous[j]+1, current[j-1]+1
16
+ change = previous[j-1]
17
+ change += 1 if a[j-1] != b[i-1]
18
+ current[j] = [add, delete, change].min
19
+ end
20
+ end
21
+ current[n]
22
+ end
23
+ end
24
+
25
+ n1 = Nilsimsa::new
26
+ text1 = "The quick brown fox"
27
+ n1.update(text1)
28
+ puts "'#{text1}':\n #{n1.hexdigest}"
29
+
30
+ n2 = Nilsimsa::new
31
+ text2 = "The quick red fox"
32
+ n2.update(text2)
33
+ puts "'#{text2}':\n #{n2.hexdigest}"
34
+
35
+ puts "Distance: #{n1.hexdigest.levenshtein(n2.hexdigest)}"
data/ext/extconf.rb ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require 'mkmf'
3
+
4
+ create_makefile( 'nilsimsa_native' )
data/ext/nilsimsa.c ADDED
@@ -0,0 +1,312 @@
1
+ /*
2
+ * Source: Digest-Nilsimsa-0.06; _nilsimsa.c, nilsimsa.h
3
+ * Changes: 2005-04-14 Stephen Lewis <slewis@orcon.net.nz>
4
+ * - stripped superfluous code
5
+ * - some cleanups, reformatting
6
+ * - refactored to provide more appropriate ruby interface
7
+ *
8
+ * NOTE - I haven't gotten around to fixing the previous comment
9
+ * headers below this
10
+ */
11
+
12
+ /*
13
+ * chad's modifications for perl xs - Digest::Nilsimsa
14
+ *
15
+ * main() - removed (too many warnings)
16
+ * accbuf - added, practically identical to accfile()
17
+ * dprint() - added (prints debug msgs to debug.txt)
18
+ *
19
+ * $Id: _nilsimsa.c,v 1.1 2002/05/20 22:29:07 chad Exp $
20
+ */
21
+
22
+ /***************************************************************************
23
+ * main.c - nilsimsa
24
+ * -------------------
25
+ * begin : Fri Mar 16 01:41:08 EST 2001
26
+ * copyright : (C) 2001-2002 by cmeclax
27
+ * email : cmeclax@ixazon.dynip.com
28
+ ***************************************************************************/
29
+
30
+ /***************************************************************************
31
+ * *
32
+ * This program is free software; you can redistribute it and/or modify *
33
+ * it under the terms of the GNU General Public License as published by *
34
+ * the Free Software Foundation; either version 2 of the License, or *
35
+ * (at your option) any later version. *
36
+ * *
37
+ ***************************************************************************/
38
+
39
+ /* NOTE - this should really use the support code from Digest, but
40
+ * would require a ruby source tree to build in that case ( the
41
+ * required headers don't seem to be generally installed :/ )
42
+ */
43
+
44
+ #include <ruby.h>
45
+ #include <assert.h>
46
+
47
+ #define NSR_CODE_LEN 32
48
+ #define RB_NSR_IVAR "cdata" /* not prefixing @ makes innaccessible from ruby */
49
+
50
+ #define tran3(a,b,c,n) (((tran[((a)+(n))&255]^tran[(b)]*((n)+(n)+1))+tran[(c)^tran[n]])&255)
51
+
52
+
53
+ struct nsrecord {
54
+ int acc[256]; /* counts each trigram's hash */
55
+ char code[NSR_CODE_LEN]; /* the nilsimsa code as a bit vector */
56
+
57
+ unsigned int chcount; /* number of characters processed so far */
58
+ int lastch[4]; /* last 4 characters processed */
59
+ };
60
+
61
+ void nsr_init( struct nsrecord *a );
62
+ void nsr_digest( struct nsrecord *a );
63
+ void nsr_update(struct nsrecord *, char *buf,unsigned long len);
64
+ int nilsimsa(char *a,char *b);
65
+ /*void nsr_free( struct nsrecord *r );*/
66
+ static void filltran(void);
67
+ static void fillpopcount(void);
68
+
69
+ static struct nsrecord *get_nsr( VALUE obj );
70
+ VALUE rbns_init(int argc, VALUE *argv, VALUE self);
71
+ VALUE rbns_update(VALUE self, VALUE data);
72
+ VALUE rbns_nilsimsa(VALUE self, VALUE other);
73
+ VALUE rbns_digest(VALUE self);
74
+ void Init_nilsimsa_native(void);
75
+
76
+ unsigned char tran[256], popcount[256];
77
+
78
+ /* formerly clear() */
79
+ void
80
+ nsr_init( struct nsrecord *a ) {
81
+ assert( a );
82
+
83
+ memset(a->acc,0,sizeof(a->acc));
84
+ memset(a->code,0,sizeof(a->code));
85
+
86
+ a->chcount = 0;
87
+ {
88
+ int i;
89
+ for (i=0; i<4; i++) {
90
+ a->lastch[i] = -1;
91
+ }
92
+ }
93
+ }
94
+
95
+ static void
96
+ filltran(void) {
97
+ int i,j,k;
98
+ for (i=j=0;i<256;i++) {
99
+ j=(j*53+1)&255;
100
+ j+=j;
101
+ if (j>255) {
102
+ j-=255;
103
+ }
104
+ for (k=0;k<i;k++) {
105
+ if (j==tran[k]) {
106
+ j=(j+1)&255;
107
+ k=0;
108
+ }
109
+ }
110
+ tran[i]=j;
111
+ }
112
+ }
113
+
114
+ static void
115
+ fillpopcount(void) {
116
+ int i,j;
117
+ memset(popcount,0,sizeof(popcount));
118
+
119
+ for (i=0;i<256;i++) {
120
+ for (j=0;j<8;j++) {
121
+ popcount[i]+=1&(i>>j);
122
+ }
123
+ }
124
+ }
125
+
126
+
127
+ /* formerly accfile() */
128
+ void
129
+ nsr_update(struct nsrecord *a, char *buf, unsigned long len) {
130
+ unsigned int idx;
131
+ int *lastch=a->lastch; // convenience
132
+
133
+ assert( a );
134
+
135
+ for(idx=0; idx<len; idx++) {
136
+ unsigned char ch = (unsigned char) buf[idx];
137
+ a->chcount++;
138
+ if (lastch[1]>=0)
139
+ a->acc[tran3(ch,lastch[0],lastch[1],0)]++;
140
+ if (lastch[2]>=0) {
141
+ a->acc[tran3(ch,lastch[0],lastch[2],1)]++;
142
+ a->acc[tran3(ch,lastch[1],lastch[2],2)]++;
143
+ }
144
+ if (lastch[3]>=0) {
145
+ a->acc[tran3(ch,lastch[0],lastch[3],3)]++;
146
+ a->acc[tran3(ch,lastch[1],lastch[3],4)]++;
147
+ a->acc[tran3(ch,lastch[2],lastch[3],5)]++;
148
+ a->acc[tran3(lastch[3],lastch[0],ch,6)]++;
149
+ a->acc[tran3(lastch[3],lastch[2],ch,7)]++;
150
+ }
151
+ lastch[3]=lastch[2];
152
+ lastch[2]=lastch[1];
153
+ lastch[1]=lastch[0];
154
+ lastch[0]=ch;
155
+ }
156
+ }
157
+
158
+ /* formerly makecode() */
159
+ void
160
+ nsr_digest(struct nsrecord *a) {
161
+ int i;
162
+ int total=0; /* total number of trigrams counted */
163
+ int threshold=0; /* mean of all numbers in acc */
164
+
165
+ assert( a );
166
+
167
+ switch (a->chcount) {
168
+ case 0:
169
+ case 1:
170
+ case 2:
171
+ break;
172
+ case 3:
173
+ total = 1;
174
+ break;
175
+ case 4:
176
+ total = 4;
177
+ break;
178
+ default:
179
+ total = (8 * a->chcount) - 28;
180
+ break;
181
+ }
182
+
183
+ threshold=total/256; /* round down because criterion is >threshold */
184
+
185
+ memset(a->code,0,sizeof(a->code));
186
+ for (i=0;i<256;i++) {
187
+ a->code[i>>3]+=((a->acc[i]>threshold)<<(i&7));
188
+ }
189
+ }
190
+
191
+ /* NOTE - assumes both of length 32 */
192
+ int
193
+ nilsimsa(char *a,char *b) {
194
+ int i,bits=0;
195
+ assert( a );
196
+ assert( b );
197
+
198
+ for (i=0;i<NSR_CODE_LEN;i++) {
199
+ bits+=popcount[255&(a[i]^b[i])];
200
+ }
201
+
202
+ return 128-bits;
203
+ }
204
+
205
+ /*
206
+ void
207
+ nsr_free( struct nsrecord *r ) {
208
+ if (r) {
209
+ free( r );
210
+ }
211
+ }
212
+ */
213
+
214
+
215
+ /*
216
+ *
217
+ * begin ruby wrapper functions
218
+ *
219
+ */
220
+
221
+ static struct nsrecord *
222
+ get_nsr( VALUE obj ) {
223
+ VALUE wrapped;
224
+ struct nsrecord *ret;
225
+
226
+ if (!RTEST( rb_funcall( obj, rb_intern( "kind_of?" ), 1,
227
+ rb_eval_string("Nilsimsa")))) {
228
+ /* FIXME should raise exception */
229
+ return NULL;
230
+ }
231
+
232
+ wrapped = rb_iv_get( obj, RB_NSR_IVAR );
233
+ if (Qnil == wrapped) {
234
+ return NULL;
235
+ }
236
+
237
+ Data_Get_Struct( wrapped, struct nsrecord, ret );
238
+ return ret;
239
+ }
240
+
241
+ VALUE
242
+ rbns_init(int argc, VALUE *argv, VALUE self) {
243
+ VALUE wrapped_nsr;
244
+ struct nsrecord *r;
245
+ wrapped_nsr = Data_Make_Struct( rb_cObject, struct nsrecord,
246
+ NULL, -1, r );
247
+ rb_iv_set( self, RB_NSR_IVAR, wrapped_nsr );
248
+ nsr_init( r );
249
+
250
+ return rb_funcall2( self, rb_intern( "old_initialize" ), argc, argv );
251
+ }
252
+
253
+ VALUE
254
+ rbns_update(VALUE self, VALUE data) {
255
+ struct nsrecord *r;
256
+ char *chdata;
257
+ long chdata_len;
258
+ r = get_nsr( self );
259
+
260
+ Check_Type( data, T_STRING );
261
+ chdata = rb_str2cstr( data, &chdata_len );
262
+ nsr_update( r, chdata, chdata_len );
263
+ return data;
264
+ }
265
+
266
+ VALUE
267
+ rbns_nilsimsa(VALUE self, VALUE other) {
268
+ long len;
269
+ char *d1;
270
+ char *d2;
271
+
272
+ d1 = rb_str2cstr( rb_funcall( self, rb_intern( "digest" ), 0 ), &len );
273
+ if (len < NSR_CODE_LEN) {
274
+ return Qnil;
275
+ }
276
+
277
+ Check_Type( other, T_STRING );
278
+ d2 = rb_str2cstr( other, &len );
279
+ if (len < NSR_CODE_LEN) {
280
+ return Qnil;
281
+ }
282
+
283
+ return INT2NUM( nilsimsa( d1, d2 ) );
284
+ }
285
+
286
+ VALUE
287
+ rbns_digest(VALUE self) {
288
+ struct nsrecord *r=get_nsr( self );
289
+
290
+ nsr_digest( r );
291
+
292
+ /* reverse a newly created string of the digest */
293
+ return rb_funcall( rb_str_new( r->code, NSR_CODE_LEN ),
294
+ rb_intern( "reverse"), 0 );
295
+ }
296
+
297
+ void
298
+ Init_nilsimsa_native(void) {
299
+ VALUE rb_cNilsimsa;
300
+ /* initialize invariant data */
301
+ filltran();
302
+ fillpopcount();
303
+
304
+ /* this grafts itself over the top of an existing Nilsimsa class */
305
+ rb_cNilsimsa = rb_eval_string( "Nilsimsa" );
306
+ /* we'll call old_initialize from our new initialize */
307
+ rb_define_alias( rb_cNilsimsa, "old_initialize", "initialize" );
308
+ rb_define_method( rb_cNilsimsa, "initialize", rbns_init, -1 );
309
+ rb_define_method( rb_cNilsimsa, "update", rbns_update, 1 );
310
+ rb_define_method( rb_cNilsimsa, "nilsimsa", rbns_nilsimsa, 1 );
311
+ rb_define_method( rb_cNilsimsa, "digest", rbns_digest, 0 );
312
+ }
data/gemspec.rb ADDED
@@ -0,0 +1,33 @@
1
+ require 'rake'
2
+ require 'mkmf'
3
+
4
+ SPEC = Gem::Specification.new do |spec|
5
+ # Descriptive and source information for this gem.
6
+ spec.name = "nilsimsa"
7
+ spec.version = "1.0.1"
8
+ spec.summary = "Computes Nilsimsa values. Nilsimsa is a distance based hash"
9
+ spec.author = "Jonathan Wilkins"
10
+ spec.email = "jwilkins[at]nospam[dot]bitland[dot]net"
11
+ spec.has_rdoc = true
12
+ spec.extra_rdoc_files = ["README"]
13
+ spec.require_path = "."
14
+ spec.autorequire = "nilsimsa.rb"
15
+
16
+ unfiltered_files = FileList['*', 'examples/*', 'bin/*', 'ext/*']
17
+ spec.files = unfiltered_files.delete_if do |filename|
18
+ filename.include?(".gem") || filename.include?("Makefile") ||
19
+ filename.include?(".so") || filename.include?(".o")
20
+ end
21
+ spec.executables = ['nilsimsa']
22
+
23
+ # optional native component
24
+ if cc_command
25
+ spec.extensions << 'ext/extconf.rb'
26
+ end
27
+
28
+ puts "Building gem w/ "
29
+ spec.files.each do |f|
30
+ puts "- #{f}"
31
+ end
32
+
33
+ end
data/nilsimsa.rb ADDED
@@ -0,0 +1,177 @@
1
+ # Nilsimsa hash (build 20050414)
2
+ # Ruby port (C) 2005 Martin Pirker
3
+ # released under GNU GPL V2 license
4
+ #
5
+ # inspired by Digest::Nilsimsa-0.06 from Perl CPAN and
6
+ # the original C nilsimsa-0.2.4 implementation by cmeclax
7
+ # http://ixazon.dynip.com/~cmeclax/nilsimsa.html
8
+
9
+ class Nilsimsa
10
+
11
+ TRAN =
12
+ "\x02\xD6\x9E\x6F\xF9\x1D\x04\xAB\xD0\x22\x16\x1F\xD8\x73\xA1\xAC" <<
13
+ "\x3B\x70\x62\x96\x1E\x6E\x8F\x39\x9D\x05\x14\x4A\xA6\xBE\xAE\x0E" <<
14
+ "\xCF\xB9\x9C\x9A\xC7\x68\x13\xE1\x2D\xA4\xEB\x51\x8D\x64\x6B\x50" <<
15
+ "\x23\x80\x03\x41\xEC\xBB\x71\xCC\x7A\x86\x7F\x98\xF2\x36\x5E\xEE" <<
16
+ "\x8E\xCE\x4F\xB8\x32\xB6\x5F\x59\xDC\x1B\x31\x4C\x7B\xF0\x63\x01" <<
17
+ "\x6C\xBA\x07\xE8\x12\x77\x49\x3C\xDA\x46\xFE\x2F\x79\x1C\x9B\x30" <<
18
+ "\xE3\x00\x06\x7E\x2E\x0F\x38\x33\x21\xAD\xA5\x54\xCA\xA7\x29\xFC" <<
19
+ "\x5A\x47\x69\x7D\xC5\x95\xB5\xF4\x0B\x90\xA3\x81\x6D\x25\x55\x35" <<
20
+ "\xF5\x75\x74\x0A\x26\xBF\x19\x5C\x1A\xC6\xFF\x99\x5D\x84\xAA\x66" <<
21
+ "\x3E\xAF\x78\xB3\x20\x43\xC1\xED\x24\xEA\xE6\x3F\x18\xF3\xA0\x42" <<
22
+ "\x57\x08\x53\x60\xC3\xC0\x83\x40\x82\xD7\x09\xBD\x44\x2A\x67\xA8" <<
23
+ "\x93\xE0\xC2\x56\x9F\xD9\xDD\x85\x15\xB4\x8A\x27\x28\x92\x76\xDE" <<
24
+ "\xEF\xF8\xB2\xB7\xC9\x3D\x45\x94\x4B\x11\x0D\x65\xD5\x34\x8B\x91" <<
25
+ "\x0C\xFA\x87\xE9\x7C\x5B\xB1\x4D\xE5\xD4\xCB\x10\xA2\x17\x89\xBC" <<
26
+ "\xDB\xB0\xE2\x97\x88\x52\xF7\x48\xD3\x61\x2C\x3A\x2B\xD1\x8C\xFB" <<
27
+ "\xF1\xCD\xE4\x6A\xE7\xA9\xFD\xC4\x37\xC8\xD2\xF6\xDF\x58\x72\x4E"
28
+
29
+ POPC =
30
+ "\x00\x01\x01\x02\x01\x02\x02\x03\x01\x02\x02\x03\x02\x03\x03\x04" <<
31
+ "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" <<
32
+ "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" <<
33
+ "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" <<
34
+ "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" <<
35
+ "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" <<
36
+ "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" <<
37
+ "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" <<
38
+ "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" <<
39
+ "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" <<
40
+ "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" <<
41
+ "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" <<
42
+ "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" <<
43
+ "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" <<
44
+ "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" <<
45
+ "\x04\x05\x05\x06\x05\x06\x06\x07\x05\x06\x06\x07\x06\x07\x07\x08"
46
+
47
+ def initialize(*data)
48
+ @threshold=0; @count=0
49
+ @acc =Array::new(256,0)
50
+ @lastch0=@lastch1=@lastch2=@lastch3= -1
51
+
52
+ data.each do |d| update(d) end if data && (data.size>0)
53
+ end
54
+
55
+ def tran3(a,b,c,n)
56
+ (((TRAN[(a+n)&255]^TRAN[b]*(n+n+1))+TRAN[(c)^TRAN[n]])&255)
57
+ end
58
+
59
+ def update(data)
60
+ data.each_byte do |ch|
61
+ @count +=1
62
+ if @lastch1>-1 then
63
+ @acc[tran3(ch,@lastch0,@lastch1,0)] +=1
64
+ end
65
+ if @lastch2>-1 then
66
+ @acc[tran3(ch,@lastch0,@lastch2,1)] +=1
67
+ @acc[tran3(ch,@lastch1,@lastch2,2)] +=1
68
+ end
69
+ if @lastch3>-1 then
70
+ @acc[tran3(ch,@lastch0,@lastch3,3)] +=1
71
+ @acc[tran3(ch,@lastch1,@lastch3,4)] +=1
72
+ @acc[tran3(ch,@lastch2,@lastch3,5)] +=1
73
+ @acc[tran3(@lastch3,@lastch0,ch,6)] +=1
74
+ @acc[tran3(@lastch3,@lastch2,ch,7)] +=1
75
+ end
76
+ @lastch3=@lastch2
77
+ @lastch2=@lastch1
78
+ @lastch1=@lastch0
79
+ @lastch0=ch
80
+ end
81
+ end
82
+
83
+ def digest
84
+ @total=0;
85
+ case @count
86
+ when 0..2:
87
+ when 3 : @total +=1
88
+ when 4 : @total +=4
89
+ else
90
+ @total +=(8*@count)-28
91
+ end
92
+ @threshold=@total/256
93
+
94
+ @code=String::new(
95
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" <<
96
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00")
97
+ (0..255).each do |i|
98
+ @code[i>>3]+=( ((@acc[i]>@threshold)?(1):(0))<<(i&7) )
99
+ end
100
+
101
+ @code[0..31].reverse
102
+ end
103
+
104
+ def hexdigest
105
+ digest.unpack("H*")[0]
106
+ end
107
+
108
+ def to_s
109
+ hexdigest
110
+ end
111
+
112
+ def <<(whatever)
113
+ update(whatever)
114
+ end
115
+
116
+ def ==(otherdigest)
117
+ digest == otherdigest
118
+ end
119
+
120
+ def file(thisone)
121
+ File.open(thisone,"rb") do |f|
122
+ until f.eof? do update(f.read(10480)) end
123
+ end
124
+ end
125
+
126
+ def nilsimsa(otherdigest)
127
+ bits=0; myd=digest
128
+ (0..31).each do |i|
129
+ bits += POPC[255&myd[i]^otherdigest[i]]
130
+ end
131
+ (128-bits)
132
+ end
133
+
134
+ end
135
+
136
+ def selftest
137
+ n1 = Nilsimsa::new;
138
+ n1.update("abcdefgh")
139
+ puts "abcdefgh: #{n1.hexdigest=='14c8118000000000030800000004042004189020001308014088003280000078'}"
140
+ n2 = Nilsimsa::new("abcd","efgh")
141
+ puts "abcd efgh: #{n2.hexdigest=='14c8118000000000030800000004042004189020001308014088003280000078'}"
142
+ puts "digest: #{n1 == n2.digest}"
143
+ n1.update("ijk")
144
+ puts "ijk: #{n1.hexdigest=='14c811840010000c0328200108040630041890200217582d4098103280000078'}"
145
+ puts "nilsimsa: #{n1.nilsimsa(n2.digest)==109}"
146
+ puts
147
+ end
148
+
149
+ if __FILE__ == $0 then
150
+ if ARGV.size>0 then
151
+ begin # load C core - if available
152
+ require 'nilsimsa_native'
153
+ rescue LoadError => e
154
+ # ignore lack of native module
155
+ end
156
+
157
+ ARGV.each do |filename|
158
+ if FileTest::exists?(filename) then
159
+ n = Nilsimsa::new
160
+ n.file(filename)
161
+ puts n.hexdigest+" #{filename}"
162
+ else
163
+ puts "error: can't find '#{filename}'"
164
+ end
165
+ end
166
+ else
167
+ puts 'Running selftest using native ruby version'
168
+ selftest
169
+ begin # load C core - if available
170
+ require './nilsimsa_native'
171
+ puts 'Running selftest using compiled nilsimsa in current dir'
172
+ selftest
173
+ rescue LoadError => e
174
+ puts "Couldnt run selftest with compiled nilsimsa"
175
+ end
176
+ end
177
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.4
3
+ specification_version: 1
4
+ name: nilsimsa
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.1
7
+ date: 2007-11-15 00:00:00 -08:00
8
+ summary: Computes Nilsimsa values. Nilsimsa is a distance based hash
9
+ require_paths:
10
+ - .
11
+ email: jwilkins[at]nospam[dot]bitland[dot]net
12
+ homepage:
13
+ rubyforge_project:
14
+ description:
15
+ autorequire: nilsimsa.rb
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Jonathan Wilkins
31
+ files:
32
+ - README
33
+ - examples
34
+ - ext
35
+ - nilsimsa.rb
36
+ - bin
37
+ - gemspec.rb
38
+ - examples/simple.rb
39
+ - bin/nilsimsa
40
+ - ext/nilsimsa.c
41
+ - ext/extconf.rb
42
+ test_files: []
43
+
44
+ rdoc_options: []
45
+
46
+ extra_rdoc_files:
47
+ - README
48
+ executables:
49
+ - nilsimsa
50
+ extensions:
51
+ - ext/extconf.rb
52
+ requirements: []
53
+
54
+ dependencies: []
55
+