nilsimsa 1.0.1 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +14 -0
- data/{README → README.md} +2 -0
- data/Rakefile +19 -0
- data/bin/nilsimsa +0 -1
- data/ext/extconf.rb +7 -0
- data/ext/nilsimsa.c +26 -7
- data/{nilsimsa.rb → lib/nilsimsa.rb} +27 -54
- data/nilsimsa.gemspec +20 -0
- metadata +49 -48
- data/gemspec.rb +0 -33
data/Gemfile
ADDED
data/{README → README.md}
RENAMED
@@ -1,5 +1,7 @@
|
|
1
1
|
nilsimsa
|
2
2
|
--------
|
3
|
+
[![Build Status](https://secure.travis-ci.org/jwilkins/nilsimsa.png)](http://travis-ci.org/jwilkins/nilsimsa)
|
4
|
+
|
3
5
|
Nilsimsa is a distance based hash, which is the opposite of more familiar
|
4
6
|
hashes like MD5. Instead of small changes making a large difference in
|
5
7
|
the resulting hash (to avoid collisions), distance based hashes cause
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler.require
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
|
5
|
+
RSpec::Core::RakeTask.new(:spec)
|
6
|
+
|
7
|
+
desc 'Default: run specs'
|
8
|
+
task :default => :spec
|
9
|
+
|
10
|
+
task :cleanbuild do
|
11
|
+
`rm ext/*.o ext/Makefile`
|
12
|
+
if RUBY_VERSION =~ /^1.9/
|
13
|
+
`cd ext && ruby extconf.rb && make`
|
14
|
+
elsif RUBY_VERSION =~ /^1.8/
|
15
|
+
`cd ext && ruby extconf.rb && make`
|
16
|
+
else
|
17
|
+
puts "Ruby version #{RUBY_VERSION}? Can't help you.. "
|
18
|
+
end
|
19
|
+
end
|
data/bin/nilsimsa
CHANGED
data/ext/extconf.rb
CHANGED
data/ext/nilsimsa.c
CHANGED
@@ -49,6 +49,15 @@
|
|
49
49
|
|
50
50
|
#define tran3(a,b,c,n) (((tran[((a)+(n))&255]^tran[(b)]*((n)+(n)+1))+tran[(c)^tran[n]])&255)
|
51
51
|
|
52
|
+
#ifdef HAVE_RUBY_IO_H
|
53
|
+
#ifndef RSTRING_PTR
|
54
|
+
#define RSTRING_PTR(s) (RSTRING(s))
|
55
|
+
#endif
|
56
|
+
#ifndef RSTRING_LEN
|
57
|
+
#define RSTRING_LEN(s) (RSTRING(s))
|
58
|
+
#endif
|
59
|
+
#else
|
60
|
+
#endif
|
52
61
|
|
53
62
|
struct nsrecord {
|
54
63
|
int acc[256]; /* counts each trigram's hash */
|
@@ -256,10 +265,13 @@ rbns_update(VALUE self, VALUE data) {
|
|
256
265
|
char *chdata;
|
257
266
|
long chdata_len;
|
258
267
|
r = get_nsr( self );
|
268
|
+
VALUE str;
|
259
269
|
|
260
270
|
Check_Type( data, T_STRING );
|
261
|
-
|
262
|
-
|
271
|
+
|
272
|
+
str = StringValue(data);
|
273
|
+
//chdata = rb_str2cstr( data, &chdata_len );
|
274
|
+
nsr_update( r, (RSTRING_PTR(str)), (RSTRING_LEN(str)) );
|
263
275
|
return data;
|
264
276
|
}
|
265
277
|
|
@@ -269,18 +281,25 @@ rbns_nilsimsa(VALUE self, VALUE other) {
|
|
269
281
|
char *d1;
|
270
282
|
char *d2;
|
271
283
|
|
272
|
-
|
273
|
-
|
284
|
+
VALUE str1;
|
285
|
+
VALUE str2;
|
286
|
+
|
287
|
+
//d1 = rb_str2cstr( rb_funcall( self, rb_intern( "digest" ), 0 ), &len );
|
288
|
+
str1 = rb_funcall( self, rb_intern( "digest" ), 0 );
|
289
|
+
str1 = StringValue(str1);
|
290
|
+
if(RSTRING_LEN(str1) < NSR_CODE_LEN) {
|
274
291
|
return Qnil;
|
275
292
|
}
|
276
293
|
|
277
294
|
Check_Type( other, T_STRING );
|
278
|
-
d2 = rb_str2cstr( other, &len );
|
279
|
-
|
295
|
+
//d2 = rb_str2cstr( other, &len );
|
296
|
+
str2 = StringValue( other);
|
297
|
+
if (RSTRING_LEN(str2) < NSR_CODE_LEN) {
|
280
298
|
return Qnil;
|
281
299
|
}
|
282
300
|
|
283
|
-
return INT2NUM( nilsimsa( d1, d2 ) );
|
301
|
+
//return INT2NUM( nilsimsa( d1, d2 ) );
|
302
|
+
return INT2NUM( nilsimsa( RSTRING_PTR(str1), RSTRING_PTR(str2)) );
|
284
303
|
}
|
285
304
|
|
286
305
|
VALUE
|
@@ -5,7 +5,6 @@
|
|
5
5
|
# inspired by Digest::Nilsimsa-0.06 from Perl CPAN and
|
6
6
|
# the original C nilsimsa-0.2.4 implementation by cmeclax
|
7
7
|
# http://ixazon.dynip.com/~cmeclax/nilsimsa.html
|
8
|
-
|
9
8
|
class Nilsimsa
|
10
9
|
|
11
10
|
TRAN =
|
@@ -45,16 +44,20 @@ class Nilsimsa
|
|
45
44
|
"\x04\x05\x05\x06\x05\x06\x06\x07\x05\x06\x06\x07\x06\x07\x07\x08"
|
46
45
|
|
47
46
|
def initialize(*data)
|
48
|
-
@threshold=0;
|
49
|
-
@
|
47
|
+
@threshold=0;
|
48
|
+
@count=0
|
49
|
+
@acc = Array::new(256,0)
|
50
50
|
@lastch0=@lastch1=@lastch2=@lastch3= -1
|
51
51
|
|
52
52
|
data.each do |d| update(d) end if data && (data.size>0)
|
53
53
|
end
|
54
54
|
|
55
|
-
def
|
55
|
+
def tran3_orig(a,b,c,n)
|
56
56
|
(((TRAN[(a+n)&255]^TRAN[b]*(n+n+1))+TRAN[(c)^TRAN[n]])&255)
|
57
57
|
end
|
58
|
+
def tran3(a,b,c,n)
|
59
|
+
((((TRAN[(a+n)&255].ord)^(TRAN[b].ord)*(n+n+1))+(TRAN[(c)^(TRAN[n].ord)]).ord)&255)
|
60
|
+
end
|
58
61
|
|
59
62
|
def update(data)
|
60
63
|
data.each_byte do |ch|
|
@@ -83,19 +86,25 @@ class Nilsimsa
|
|
83
86
|
def digest
|
84
87
|
@total=0;
|
85
88
|
case @count
|
86
|
-
when 0..2
|
87
|
-
when 3
|
88
|
-
when 4
|
89
|
-
else
|
90
|
-
@total +=(8*@count)-28
|
89
|
+
when 0..2 then ;
|
90
|
+
when 3 then @total +=1
|
91
|
+
when 4 then @total +=4
|
92
|
+
else @total +=(8*@count)-28
|
91
93
|
end
|
92
94
|
@threshold=@total/256
|
93
95
|
|
94
|
-
@code=
|
95
|
-
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" <<
|
96
|
-
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00")
|
96
|
+
@code="\x00"*32
|
97
97
|
(0..255).each do |i|
|
98
|
-
|
98
|
+
offset = i>>3
|
99
|
+
cur_val = @code[offset].ord
|
100
|
+
@code[offset] = (cur_val + ( ((@acc[i].ord>@threshold)?(1):(0))<<(i&7) )).chr
|
101
|
+
# cv = @code[i>>3].ord
|
102
|
+
# if @acc[i] > @threshold
|
103
|
+
# #@code[i>>3]+=( (((@acc[i])>@threshold)?(1):(0))<<(i&7) )
|
104
|
+
# @code[cv] = (@code[cv].ord + (1 <<(i&7))).chr
|
105
|
+
# else
|
106
|
+
# @code[cv] = (@code[cv].ord + (0 <<(i&7))).chr
|
107
|
+
# end
|
99
108
|
end
|
100
109
|
|
101
110
|
@code[0..31].reverse
|
@@ -126,52 +135,16 @@ class Nilsimsa
|
|
126
135
|
def nilsimsa(otherdigest)
|
127
136
|
bits=0; myd=digest
|
128
137
|
(0..31).each do |i|
|
129
|
-
bits += POPC[255&myd[i]^otherdigest[i]]
|
138
|
+
bits += POPC[255&myd[i].ord^otherdigest[i].ord].ord
|
130
139
|
end
|
131
140
|
(128-bits)
|
132
141
|
end
|
133
|
-
|
134
142
|
end
|
135
143
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
n2 = Nilsimsa::new("abcd","efgh")
|
141
|
-
puts "abcd efgh: #{n2.hexdigest=='14c8118000000000030800000004042004189020001308014088003280000078'}"
|
142
|
-
puts "digest: #{n1 == n2.digest}"
|
143
|
-
n1.update("ijk")
|
144
|
-
puts "ijk: #{n1.hexdigest=='14c811840010000c0328200108040630041890200217582d4098103280000078'}"
|
145
|
-
puts "nilsimsa: #{n1.nilsimsa(n2.digest)==109}"
|
146
|
-
puts
|
144
|
+
begin # load C core - if available
|
145
|
+
#require "#{File.join(File.dirname(__FILE__), '..', 'ext', 'nilsimsa_native')}"
|
146
|
+
rescue LoadError => e
|
147
|
+
# ignore lack of native module
|
147
148
|
end
|
148
149
|
|
149
|
-
if __FILE__ == $0 then
|
150
|
-
if ARGV.size>0 then
|
151
|
-
begin # load C core - if available
|
152
|
-
require 'nilsimsa_native'
|
153
|
-
rescue LoadError => e
|
154
|
-
# ignore lack of native module
|
155
|
-
end
|
156
150
|
|
157
|
-
ARGV.each do |filename|
|
158
|
-
if FileTest::exists?(filename) then
|
159
|
-
n = Nilsimsa::new
|
160
|
-
n.file(filename)
|
161
|
-
puts n.hexdigest+" #{filename}"
|
162
|
-
else
|
163
|
-
puts "error: can't find '#{filename}'"
|
164
|
-
end
|
165
|
-
end
|
166
|
-
else
|
167
|
-
puts 'Running selftest using native ruby version'
|
168
|
-
selftest
|
169
|
-
begin # load C core - if available
|
170
|
-
require './nilsimsa_native'
|
171
|
-
puts 'Running selftest using compiled nilsimsa in current dir'
|
172
|
-
selftest
|
173
|
-
rescue LoadError => e
|
174
|
-
puts "Couldnt run selftest with compiled nilsimsa"
|
175
|
-
end
|
176
|
-
end
|
177
|
-
end
|
data/nilsimsa.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
SPEC = Gem::Specification.new do |spec|
|
2
|
+
# Descriptive and source information for this gem.
|
3
|
+
spec.name = "nilsimsa"
|
4
|
+
spec.version = "1.1.1"
|
5
|
+
spec.summary = "Computes Nilsimsa values. Nilsimsa is a distance based hash"
|
6
|
+
spec.author = "Jonathan Wilkins"
|
7
|
+
spec.email = "jwilkins[at]nospam[dot]bitland[dot]net"
|
8
|
+
spec.has_rdoc = true
|
9
|
+
spec.extra_rdoc_files = ["README.md"]
|
10
|
+
|
11
|
+
spec.files = %w(Gemfile Rakefile README.md nilsimsa.gemspec
|
12
|
+
lib/nilsimsa.rb
|
13
|
+
bin/nilsimsa
|
14
|
+
examples/simple.rb
|
15
|
+
ext/extconf.rb ext/nilsimsa.c)
|
16
|
+
spec.executables = ['nilsimsa']
|
17
|
+
|
18
|
+
# optional native component
|
19
|
+
spec.extensions = ['ext/extconf.rb']
|
20
|
+
end
|
metadata
CHANGED
@@ -1,55 +1,56 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.4
|
3
|
-
specification_version: 1
|
1
|
+
--- !ruby/object:Gem::Specification
|
4
2
|
name: nilsimsa
|
5
|
-
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.
|
7
|
-
|
8
|
-
summary: Computes Nilsimsa values. Nilsimsa is a distance based hash
|
9
|
-
require_paths:
|
10
|
-
- .
|
11
|
-
email: jwilkins[at]nospam[dot]bitland[dot]net
|
12
|
-
homepage:
|
13
|
-
rubyforge_project:
|
14
|
-
description:
|
15
|
-
autorequire: nilsimsa.rb
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: true
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.1
|
5
|
+
prerelease:
|
25
6
|
platform: ruby
|
26
|
-
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
|
-
authors:
|
7
|
+
authors:
|
30
8
|
- Jonathan Wilkins
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-06-21 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description:
|
15
|
+
email: jwilkins[at]nospam[dot]bitland[dot]net
|
16
|
+
executables:
|
17
|
+
- nilsimsa
|
18
|
+
extensions:
|
19
|
+
- ext/extconf.rb
|
20
|
+
extra_rdoc_files:
|
21
|
+
- README.md
|
22
|
+
files:
|
23
|
+
- Gemfile
|
24
|
+
- Rakefile
|
25
|
+
- README.md
|
26
|
+
- nilsimsa.gemspec
|
27
|
+
- lib/nilsimsa.rb
|
39
28
|
- bin/nilsimsa
|
40
|
-
-
|
29
|
+
- examples/simple.rb
|
41
30
|
- ext/extconf.rb
|
42
|
-
|
43
|
-
|
31
|
+
- ext/nilsimsa.c
|
32
|
+
homepage:
|
33
|
+
licenses: []
|
34
|
+
post_install_message:
|
44
35
|
rdoc_options: []
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
-
|
36
|
+
require_paths:
|
37
|
+
- lib
|
38
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
45
|
+
none: false
|
46
|
+
requirements:
|
47
|
+
- - ! '>='
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
52
50
|
requirements: []
|
53
|
-
|
54
|
-
|
55
|
-
|
51
|
+
rubyforge_project:
|
52
|
+
rubygems_version: 1.8.10
|
53
|
+
signing_key:
|
54
|
+
specification_version: 3
|
55
|
+
summary: Computes Nilsimsa values. Nilsimsa is a distance based hash
|
56
|
+
test_files: []
|
data/gemspec.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
require 'rake'
|
2
|
-
require 'mkmf'
|
3
|
-
|
4
|
-
SPEC = Gem::Specification.new do |spec|
|
5
|
-
# Descriptive and source information for this gem.
|
6
|
-
spec.name = "nilsimsa"
|
7
|
-
spec.version = "1.0.1"
|
8
|
-
spec.summary = "Computes Nilsimsa values. Nilsimsa is a distance based hash"
|
9
|
-
spec.author = "Jonathan Wilkins"
|
10
|
-
spec.email = "jwilkins[at]nospam[dot]bitland[dot]net"
|
11
|
-
spec.has_rdoc = true
|
12
|
-
spec.extra_rdoc_files = ["README"]
|
13
|
-
spec.require_path = "."
|
14
|
-
spec.autorequire = "nilsimsa.rb"
|
15
|
-
|
16
|
-
unfiltered_files = FileList['*', 'examples/*', 'bin/*', 'ext/*']
|
17
|
-
spec.files = unfiltered_files.delete_if do |filename|
|
18
|
-
filename.include?(".gem") || filename.include?("Makefile") ||
|
19
|
-
filename.include?(".so") || filename.include?(".o")
|
20
|
-
end
|
21
|
-
spec.executables = ['nilsimsa']
|
22
|
-
|
23
|
-
# optional native component
|
24
|
-
if cc_command
|
25
|
-
spec.extensions << 'ext/extconf.rb'
|
26
|
-
end
|
27
|
-
|
28
|
-
puts "Building gem w/ "
|
29
|
-
spec.files.each do |f|
|
30
|
-
puts "- #{f}"
|
31
|
-
end
|
32
|
-
|
33
|
-
end
|