jwilkins-nilsimsa 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,18 @@
1
+ nilsimsa
2
+ --------
3
+ Nilsimsa is a distance based hash, which is the opposite of more familiar
4
+ hashes like MD5. Instead of small changes making a large difference in
5
+ the resulting hash (to avoid collisions), distance based hashes cause
6
+ similar values to have similar output. This is good for detecting near
7
+ similar documents without having to store the original text.
8
+
9
+ Standard usage is as follows:
10
+
11
+ require 'nilsimsa'
12
+
13
+ n1 = Nilsimsa::new
14
+ text1 = "The quick brown fox"
15
+ n1.update(text1)
16
+ puts "Text '#{text1}': #{n1.hexdigest}"
17
+
18
+
data/bin/nilsimsa ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'nilsimsa'
4
+
5
+ if ARGV.size > 0 then
6
+ ARGV.each do |filename|
7
+ if FileTest::exists?(filename) then
8
+ n = Nilsimsa::new
9
+ n.file(filename)
10
+ puts n.hexdigest+" #{filename}"
11
+ else
12
+ puts "error: can't find '#{filename}'"
13
+ end
14
+ end
15
+ else
16
+ puts "Specify a file to hash"
17
+ end
18
+
@@ -0,0 +1,35 @@
1
+ require 'nilsimsa'
2
+
3
+ # Levenshtein implementation from
4
+ # http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance
5
+ # Used under the GNU Free Documentation license
6
+ class String
7
+ def levenshtein(other)
8
+ a, b = self.unpack('U*'), other.unpack('U*')
9
+ n, m = a.length, b.length
10
+ a, b, n, m = b, a, m, n if n > m
11
+ current = [*0..n]
12
+ 1.upto(m) do |i|
13
+ previous, current = current, [i]+[0]*n
14
+ 1.upto(n) do |j|
15
+ add, delete = previous[j]+1, current[j-1]+1
16
+ change = previous[j-1]
17
+ change += 1 if a[j-1] != b[i-1]
18
+ current[j] = [add, delete, change].min
19
+ end
20
+ end
21
+ current[n]
22
+ end
23
+ end
24
+
25
+ n1 = Nilsimsa::new
26
+ text1 = "The quick brown fox"
27
+ n1.update(text1)
28
+ puts "'#{text1}':\n #{n1.hexdigest}"
29
+
30
+ n2 = Nilsimsa::new
31
+ text2 = "The quick red fox"
32
+ n2.update(text2)
33
+ puts "'#{text2}':\n #{n2.hexdigest}"
34
+
35
+ puts "Distance: #{n1.hexdigest.levenshtein(n2.hexdigest)}"
data/ext/extconf.rb ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require 'mkmf'
3
+
4
+ create_makefile( 'nilsimsa_native' )
data/ext/nilsimsa.c ADDED
@@ -0,0 +1,312 @@
1
+ /*
2
+ * Source: Digest-Nilsimsa-0.06; _nilsimsa.c, nilsimsa.h
3
+ * Changes: 2005-04-14 Stephen Lewis <slewis@orcon.net.nz>
4
+ * - stripped superfluous code
5
+ * - some cleanups, reformatting
6
+ * - refactored to provide more appropriate ruby interface
7
+ *
8
+ * NOTE - I haven't gotten around to fixing the previous comment
9
+ * headers below this
10
+ */
11
+
12
+ /*
13
+ * chad's modifications for perl xs - Digest::Nilsimsa
14
+ *
15
+ * main() - removed (too many warnings)
16
+ * accbuf - added, practically identical to accfile()
17
+ * dprint() - added (prints debug msgs to debug.txt)
18
+ *
19
+ * $Id: _nilsimsa.c,v 1.1 2002/05/20 22:29:07 chad Exp $
20
+ */
21
+
22
+ /***************************************************************************
23
+ * main.c - nilsimsa
24
+ * -------------------
25
+ * begin : Fri Mar 16 01:41:08 EST 2001
26
+ * copyright : (C) 2001-2002 by cmeclax
27
+ * email : cmeclax@ixazon.dynip.com
28
+ ***************************************************************************/
29
+
30
+ /***************************************************************************
31
+ * *
32
+ * This program is free software; you can redistribute it and/or modify *
33
+ * it under the terms of the GNU General Public License as published by *
34
+ * the Free Software Foundation; either version 2 of the License, or *
35
+ * (at your option) any later version. *
36
+ * *
37
+ ***************************************************************************/
38
+
39
+ /* NOTE - this should really use the support code from Digest, but
40
+ * would require a ruby source tree to build in that case ( the
41
+ * required headers don't seem to be generally installed :/ )
42
+ */
43
+
44
+ #include <ruby.h>
45
+ #include <assert.h>
46
+
47
+ #define NSR_CODE_LEN 32
48
+ #define RB_NSR_IVAR "cdata" /* not prefixing @ makes innaccessible from ruby */
49
+
50
+ #define tran3(a,b,c,n) (((tran[((a)+(n))&255]^tran[(b)]*((n)+(n)+1))+tran[(c)^tran[n]])&255)
51
+
52
+
53
+ struct nsrecord {
54
+ int acc[256]; /* counts each trigram's hash */
55
+ char code[NSR_CODE_LEN]; /* the nilsimsa code as a bit vector */
56
+
57
+ unsigned int chcount; /* number of characters processed so far */
58
+ int lastch[4]; /* last 4 characters processed */
59
+ };
60
+
61
+ void nsr_init( struct nsrecord *a );
62
+ void nsr_digest( struct nsrecord *a );
63
+ void nsr_update(struct nsrecord *, char *buf,unsigned long len);
64
+ int nilsimsa(char *a,char *b);
65
+ /*void nsr_free( struct nsrecord *r );*/
66
+ static void filltran(void);
67
+ static void fillpopcount(void);
68
+
69
+ static struct nsrecord *get_nsr( VALUE obj );
70
+ VALUE rbns_init(int argc, VALUE *argv, VALUE self);
71
+ VALUE rbns_update(VALUE self, VALUE data);
72
+ VALUE rbns_nilsimsa(VALUE self, VALUE other);
73
+ VALUE rbns_digest(VALUE self);
74
+ void Init_nilsimsa_native(void);
75
+
76
+ unsigned char tran[256], popcount[256];
77
+
78
+ /* formerly clear() */
79
+ void
80
+ nsr_init( struct nsrecord *a ) {
81
+ assert( a );
82
+
83
+ memset(a->acc,0,sizeof(a->acc));
84
+ memset(a->code,0,sizeof(a->code));
85
+
86
+ a->chcount = 0;
87
+ {
88
+ int i;
89
+ for (i=0; i<4; i++) {
90
+ a->lastch[i] = -1;
91
+ }
92
+ }
93
+ }
94
+
95
+ static void
96
+ filltran(void) {
97
+ int i,j,k;
98
+ for (i=j=0;i<256;i++) {
99
+ j=(j*53+1)&255;
100
+ j+=j;
101
+ if (j>255) {
102
+ j-=255;
103
+ }
104
+ for (k=0;k<i;k++) {
105
+ if (j==tran[k]) {
106
+ j=(j+1)&255;
107
+ k=0;
108
+ }
109
+ }
110
+ tran[i]=j;
111
+ }
112
+ }
113
+
114
+ static void
115
+ fillpopcount(void) {
116
+ int i,j;
117
+ memset(popcount,0,sizeof(popcount));
118
+
119
+ for (i=0;i<256;i++) {
120
+ for (j=0;j<8;j++) {
121
+ popcount[i]+=1&(i>>j);
122
+ }
123
+ }
124
+ }
125
+
126
+
127
+ /* formerly accfile() */
128
+ void
129
+ nsr_update(struct nsrecord *a, char *buf, unsigned long len) {
130
+ unsigned int idx;
131
+ int *lastch=a->lastch; // convenience
132
+
133
+ assert( a );
134
+
135
+ for(idx=0; idx<len; idx++) {
136
+ unsigned char ch = (unsigned char) buf[idx];
137
+ a->chcount++;
138
+ if (lastch[1]>=0)
139
+ a->acc[tran3(ch,lastch[0],lastch[1],0)]++;
140
+ if (lastch[2]>=0) {
141
+ a->acc[tran3(ch,lastch[0],lastch[2],1)]++;
142
+ a->acc[tran3(ch,lastch[1],lastch[2],2)]++;
143
+ }
144
+ if (lastch[3]>=0) {
145
+ a->acc[tran3(ch,lastch[0],lastch[3],3)]++;
146
+ a->acc[tran3(ch,lastch[1],lastch[3],4)]++;
147
+ a->acc[tran3(ch,lastch[2],lastch[3],5)]++;
148
+ a->acc[tran3(lastch[3],lastch[0],ch,6)]++;
149
+ a->acc[tran3(lastch[3],lastch[2],ch,7)]++;
150
+ }
151
+ lastch[3]=lastch[2];
152
+ lastch[2]=lastch[1];
153
+ lastch[1]=lastch[0];
154
+ lastch[0]=ch;
155
+ }
156
+ }
157
+
158
+ /* formerly makecode() */
159
+ void
160
+ nsr_digest(struct nsrecord *a) {
161
+ int i;
162
+ int total=0; /* total number of trigrams counted */
163
+ int threshold=0; /* mean of all numbers in acc */
164
+
165
+ assert( a );
166
+
167
+ switch (a->chcount) {
168
+ case 0:
169
+ case 1:
170
+ case 2:
171
+ break;
172
+ case 3:
173
+ total = 1;
174
+ break;
175
+ case 4:
176
+ total = 4;
177
+ break;
178
+ default:
179
+ total = (8 * a->chcount) - 28;
180
+ break;
181
+ }
182
+
183
+ threshold=total/256; /* round down because criterion is >threshold */
184
+
185
+ memset(a->code,0,sizeof(a->code));
186
+ for (i=0;i<256;i++) {
187
+ a->code[i>>3]+=((a->acc[i]>threshold)<<(i&7));
188
+ }
189
+ }
190
+
191
+ /* NOTE - assumes both of length 32 */
192
+ int
193
+ nilsimsa(char *a,char *b) {
194
+ int i,bits=0;
195
+ assert( a );
196
+ assert( b );
197
+
198
+ for (i=0;i<NSR_CODE_LEN;i++) {
199
+ bits+=popcount[255&(a[i]^b[i])];
200
+ }
201
+
202
+ return 128-bits;
203
+ }
204
+
205
+ /*
206
+ void
207
+ nsr_free( struct nsrecord *r ) {
208
+ if (r) {
209
+ free( r );
210
+ }
211
+ }
212
+ */
213
+
214
+
215
+ /*
216
+ *
217
+ * begin ruby wrapper functions
218
+ *
219
+ */
220
+
221
+ static struct nsrecord *
222
+ get_nsr( VALUE obj ) {
223
+ VALUE wrapped;
224
+ struct nsrecord *ret;
225
+
226
+ if (!RTEST( rb_funcall( obj, rb_intern( "kind_of?" ), 1,
227
+ rb_eval_string("Nilsimsa")))) {
228
+ /* FIXME should raise exception */
229
+ return NULL;
230
+ }
231
+
232
+ wrapped = rb_iv_get( obj, RB_NSR_IVAR );
233
+ if (Qnil == wrapped) {
234
+ return NULL;
235
+ }
236
+
237
+ Data_Get_Struct( wrapped, struct nsrecord, ret );
238
+ return ret;
239
+ }
240
+
241
+ VALUE
242
+ rbns_init(int argc, VALUE *argv, VALUE self) {
243
+ VALUE wrapped_nsr;
244
+ struct nsrecord *r;
245
+ wrapped_nsr = Data_Make_Struct( rb_cObject, struct nsrecord,
246
+ NULL, -1, r );
247
+ rb_iv_set( self, RB_NSR_IVAR, wrapped_nsr );
248
+ nsr_init( r );
249
+
250
+ return rb_funcall2( self, rb_intern( "old_initialize" ), argc, argv );
251
+ }
252
+
253
+ VALUE
254
+ rbns_update(VALUE self, VALUE data) {
255
+ struct nsrecord *r;
256
+ char *chdata;
257
+ long chdata_len;
258
+ r = get_nsr( self );
259
+
260
+ Check_Type( data, T_STRING );
261
+ chdata = rb_str2cstr( data, &chdata_len );
262
+ nsr_update( r, chdata, chdata_len );
263
+ return data;
264
+ }
265
+
266
+ VALUE
267
+ rbns_nilsimsa(VALUE self, VALUE other) {
268
+ long len;
269
+ char *d1;
270
+ char *d2;
271
+
272
+ d1 = rb_str2cstr( rb_funcall( self, rb_intern( "digest" ), 0 ), &len );
273
+ if (len < NSR_CODE_LEN) {
274
+ return Qnil;
275
+ }
276
+
277
+ Check_Type( other, T_STRING );
278
+ d2 = rb_str2cstr( other, &len );
279
+ if (len < NSR_CODE_LEN) {
280
+ return Qnil;
281
+ }
282
+
283
+ return INT2NUM( nilsimsa( d1, d2 ) );
284
+ }
285
+
286
+ VALUE
287
+ rbns_digest(VALUE self) {
288
+ struct nsrecord *r=get_nsr( self );
289
+
290
+ nsr_digest( r );
291
+
292
+ /* reverse a newly created string of the digest */
293
+ return rb_funcall( rb_str_new( r->code, NSR_CODE_LEN ),
294
+ rb_intern( "reverse"), 0 );
295
+ }
296
+
297
+ void
298
+ Init_nilsimsa_native(void) {
299
+ VALUE rb_cNilsimsa;
300
+ /* initialize invariant data */
301
+ filltran();
302
+ fillpopcount();
303
+
304
+ /* this grafts itself over the top of an existing Nilsimsa class */
305
+ rb_cNilsimsa = rb_eval_string( "Nilsimsa" );
306
+ /* we'll call old_initialize from our new initialize */
307
+ rb_define_alias( rb_cNilsimsa, "old_initialize", "initialize" );
308
+ rb_define_method( rb_cNilsimsa, "initialize", rbns_init, -1 );
309
+ rb_define_method( rb_cNilsimsa, "update", rbns_update, 1 );
310
+ rb_define_method( rb_cNilsimsa, "nilsimsa", rbns_nilsimsa, 1 );
311
+ rb_define_method( rb_cNilsimsa, "digest", rbns_digest, 0 );
312
+ }
data/nilsimsa.gemspec ADDED
@@ -0,0 +1,17 @@
1
+ SPEC = Gem::Specification.new do |spec|
2
+ # Descriptive and source information for this gem.
3
+ spec.name = "nilsimsa"
4
+ spec.version = "1.0.5"
5
+ spec.summary = "Computes Nilsimsa values. Nilsimsa is a distance based hash"
6
+ spec.author = "Jonathan Wilkins"
7
+ spec.email = "jwilkins[at]nospam[dot]bitland[dot]net"
8
+ spec.has_rdoc = true
9
+ spec.extra_rdoc_files = ["README"]
10
+
11
+ spec.files = %w(README nilsimsa.gemspec nilsimsa.rb bin/nilsimsa
12
+ examples/simple.rb ext/extconf.rb ext/nilsimsa.c)
13
+ spec.executables = ['nilsimsa']
14
+
15
+ # optional native component
16
+ spec.extensions = ['ext/extconf.rb']
17
+ end
data/nilsimsa.rb ADDED
@@ -0,0 +1,182 @@
1
+ # Nilsimsa hash (build 20050414)
2
+ # Ruby port (C) 2005 Martin Pirker
3
+ # released under GNU GPL V2 license
4
+ #
5
+ # inspired by Digest::Nilsimsa-0.06 from Perl CPAN and
6
+ # the original C nilsimsa-0.2.4 implementation by cmeclax
7
+ # http://ixazon.dynip.com/~cmeclax/nilsimsa.html
8
+
9
+ class Nilsimsa
10
+
11
+ TRAN =
12
+ "\x02\xD6\x9E\x6F\xF9\x1D\x04\xAB\xD0\x22\x16\x1F\xD8\x73\xA1\xAC" <<
13
+ "\x3B\x70\x62\x96\x1E\x6E\x8F\x39\x9D\x05\x14\x4A\xA6\xBE\xAE\x0E" <<
14
+ "\xCF\xB9\x9C\x9A\xC7\x68\x13\xE1\x2D\xA4\xEB\x51\x8D\x64\x6B\x50" <<
15
+ "\x23\x80\x03\x41\xEC\xBB\x71\xCC\x7A\x86\x7F\x98\xF2\x36\x5E\xEE" <<
16
+ "\x8E\xCE\x4F\xB8\x32\xB6\x5F\x59\xDC\x1B\x31\x4C\x7B\xF0\x63\x01" <<
17
+ "\x6C\xBA\x07\xE8\x12\x77\x49\x3C\xDA\x46\xFE\x2F\x79\x1C\x9B\x30" <<
18
+ "\xE3\x00\x06\x7E\x2E\x0F\x38\x33\x21\xAD\xA5\x54\xCA\xA7\x29\xFC" <<
19
+ "\x5A\x47\x69\x7D\xC5\x95\xB5\xF4\x0B\x90\xA3\x81\x6D\x25\x55\x35" <<
20
+ "\xF5\x75\x74\x0A\x26\xBF\x19\x5C\x1A\xC6\xFF\x99\x5D\x84\xAA\x66" <<
21
+ "\x3E\xAF\x78\xB3\x20\x43\xC1\xED\x24\xEA\xE6\x3F\x18\xF3\xA0\x42" <<
22
+ "\x57\x08\x53\x60\xC3\xC0\x83\x40\x82\xD7\x09\xBD\x44\x2A\x67\xA8" <<
23
+ "\x93\xE0\xC2\x56\x9F\xD9\xDD\x85\x15\xB4\x8A\x27\x28\x92\x76\xDE" <<
24
+ "\xEF\xF8\xB2\xB7\xC9\x3D\x45\x94\x4B\x11\x0D\x65\xD5\x34\x8B\x91" <<
25
+ "\x0C\xFA\x87\xE9\x7C\x5B\xB1\x4D\xE5\xD4\xCB\x10\xA2\x17\x89\xBC" <<
26
+ "\xDB\xB0\xE2\x97\x88\x52\xF7\x48\xD3\x61\x2C\x3A\x2B\xD1\x8C\xFB" <<
27
+ "\xF1\xCD\xE4\x6A\xE7\xA9\xFD\xC4\x37\xC8\xD2\xF6\xDF\x58\x72\x4E"
28
+
29
+ POPC =
30
+ "\x00\x01\x01\x02\x01\x02\x02\x03\x01\x02\x02\x03\x02\x03\x03\x04" <<
31
+ "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" <<
32
+ "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" <<
33
+ "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" <<
34
+ "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" <<
35
+ "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" <<
36
+ "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" <<
37
+ "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" <<
38
+ "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" <<
39
+ "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" <<
40
+ "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" <<
41
+ "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" <<
42
+ "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" <<
43
+ "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" <<
44
+ "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" <<
45
+ "\x04\x05\x05\x06\x05\x06\x06\x07\x05\x06\x06\x07\x06\x07\x07\x08"
46
+
47
+ def initialize(*data)
48
+ @threshold=0; @count=0
49
+ @acc =Array::new(256,0)
50
+ @lastch0=@lastch1=@lastch2=@lastch3= -1
51
+
52
+ data.each do |d| update(d) end if data && (data.size>0)
53
+ end
54
+
55
+ def tran3(a,b,c,n)
56
+ (((TRAN[(a+n)&255]^TRAN[b]*(n+n+1))+TRAN[(c)^TRAN[n]])&255)
57
+ end
58
+
59
+ def update(data)
60
+ data.each_byte do |ch|
61
+ @count +=1
62
+ if @lastch1>-1 then
63
+ @acc[tran3(ch,@lastch0,@lastch1,0)] +=1
64
+ end
65
+ if @lastch2>-1 then
66
+ @acc[tran3(ch,@lastch0,@lastch2,1)] +=1
67
+ @acc[tran3(ch,@lastch1,@lastch2,2)] +=1
68
+ end
69
+ if @lastch3>-1 then
70
+ @acc[tran3(ch,@lastch0,@lastch3,3)] +=1
71
+ @acc[tran3(ch,@lastch1,@lastch3,4)] +=1
72
+ @acc[tran3(ch,@lastch2,@lastch3,5)] +=1
73
+ @acc[tran3(@lastch3,@lastch0,ch,6)] +=1
74
+ @acc[tran3(@lastch3,@lastch2,ch,7)] +=1
75
+ end
76
+ @lastch3=@lastch2
77
+ @lastch2=@lastch1
78
+ @lastch1=@lastch0
79
+ @lastch0=ch
80
+ end
81
+ end
82
+
83
+ def digest
84
+ @total=0;
85
+ case @count
86
+ when 0..2:
87
+ when 3 : @total +=1
88
+ when 4 : @total +=4
89
+ else
90
+ @total +=(8*@count)-28
91
+ end
92
+ @threshold=@total/256
93
+
94
+ @code=String::new(
95
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" <<
96
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00")
97
+ (0..255).each do |i|
98
+ @code[i>>3]+=( ((@acc[i]>@threshold)?(1):(0))<<(i&7) )
99
+ end
100
+
101
+ @code[0..31].reverse
102
+ end
103
+
104
+ def hexdigest
105
+ digest.unpack("H*")[0]
106
+ end
107
+
108
+ def to_s
109
+ hexdigest
110
+ end
111
+
112
+ def <<(whatever)
113
+ update(whatever)
114
+ end
115
+
116
+ def ==(otherdigest)
117
+ digest == otherdigest
118
+ end
119
+
120
+ def file(thisone)
121
+ File.open(thisone,"rb") do |f|
122
+ until f.eof? do update(f.read(10480)) end
123
+ end
124
+ end
125
+
126
+ def nilsimsa(otherdigest)
127
+ bits=0; myd=digest
128
+ (0..31).each do |i|
129
+ bits += POPC[255&myd[i]^otherdigest[i]]
130
+ end
131
+ (128-bits)
132
+ end
133
+
134
+ end
135
+
136
+ def selftest
137
+ n1 = Nilsimsa::new;
138
+ n1.update("abcdefgh")
139
+ puts "abcdefgh: #{n1.hexdigest=='14c8118000000000030800000004042004189020001308014088003280000078'}"
140
+ n2 = Nilsimsa::new("abcd","efgh")
141
+ puts "abcd efgh: #{n2.hexdigest=='14c8118000000000030800000004042004189020001308014088003280000078'}"
142
+ puts "digest: #{n1 == n2.digest}"
143
+ n1.update("ijk")
144
+ puts "ijk: #{n1.hexdigest=='14c811840010000c0328200108040630041890200217582d4098103280000078'}"
145
+ puts "nilsimsa: #{n1.nilsimsa(n2.digest)==109}"
146
+ puts
147
+ end
148
+
149
+ if __FILE__ == $0 then
150
+ if ARGV.size>0 then
151
+ begin # load C core - if available
152
+ require 'nilsimsa_native'
153
+ rescue LoadError => e
154
+ # ignore lack of native module
155
+ end
156
+
157
+ ARGV.each do |filename|
158
+ if FileTest::exists?(filename) then
159
+ n = Nilsimsa::new
160
+ n.file(filename)
161
+ puts n.hexdigest+" #{filename}"
162
+ else
163
+ puts "error: can't find '#{filename}'"
164
+ end
165
+ end
166
+ else
167
+ puts 'Running selftest using native ruby version'
168
+ selftest
169
+ begin # load C core - if available
170
+ if File.exists?('./nilsimsa_native')
171
+ require './nilsimsa_native'
172
+ puts 'Running selftest using compiled nilsimsa in current dir'
173
+ else
174
+ require 'nilsimsa_native'
175
+ puts 'Running selftest using compiled nilsimsa'
176
+ end
177
+ selftest
178
+ rescue LoadError => e
179
+ puts "Couldnt run selftest with compiled nilsimsa"
180
+ end
181
+ end
182
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jwilkins-nilsimsa
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.5
5
+ platform: ruby
6
+ authors:
7
+ - Jonathan Wilkins
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-05-16 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: jwilkins[at]nospam[dot]bitland[dot]net
18
+ executables:
19
+ - nilsimsa
20
+ extensions:
21
+ - ext/extconf.rb
22
+ extra_rdoc_files:
23
+ - README
24
+ files:
25
+ - README
26
+ - nilsimsa.gemspec
27
+ - nilsimsa.rb
28
+ - bin/nilsimsa
29
+ - examples/simple.rb
30
+ - ext/extconf.rb
31
+ - ext/nilsimsa.c
32
+ has_rdoc: true
33
+ homepage:
34
+ licenses:
35
+ post_install_message:
36
+ rdoc_options: []
37
+
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: "0"
45
+ version:
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: "0"
51
+ version:
52
+ requirements: []
53
+
54
+ rubyforge_project:
55
+ rubygems_version: 1.3.5
56
+ signing_key:
57
+ specification_version: 2
58
+ summary: Computes Nilsimsa values. Nilsimsa is a distance based hash
59
+ test_files: []
60
+