nilsimsa 1.0.1 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +14 -0
- data/{README → README.md} +2 -0
- data/Rakefile +19 -0
- data/bin/nilsimsa +0 -1
- data/ext/extconf.rb +7 -0
- data/ext/nilsimsa.c +26 -7
- data/{nilsimsa.rb → lib/nilsimsa.rb} +27 -54
- data/nilsimsa.gemspec +20 -0
- metadata +49 -48
- data/gemspec.rb +0 -33
data/Gemfile
ADDED
data/{README → README.md}
RENAMED
@@ -1,5 +1,7 @@
|
|
1
1
|
nilsimsa
|
2
2
|
--------
|
3
|
+
[](http://travis-ci.org/jwilkins/nilsimsa)
|
4
|
+
|
3
5
|
Nilsimsa is a distance based hash, which is the opposite of more familiar
|
4
6
|
hashes like MD5. Instead of small changes making a large difference in
|
5
7
|
the resulting hash (to avoid collisions), distance based hashes cause
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler.require
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
|
5
|
+
RSpec::Core::RakeTask.new(:spec)
|
6
|
+
|
7
|
+
desc 'Default: run specs'
|
8
|
+
task :default => :spec
|
9
|
+
|
10
|
+
task :cleanbuild do
|
11
|
+
`rm ext/*.o ext/Makefile`
|
12
|
+
if RUBY_VERSION =~ /^1.9/
|
13
|
+
`cd ext && ruby extconf.rb && make`
|
14
|
+
elsif RUBY_VERSION =~ /^1.8/
|
15
|
+
`cd ext && ruby extconf.rb && make`
|
16
|
+
else
|
17
|
+
puts "Ruby version #{RUBY_VERSION}? Can't help you.. "
|
18
|
+
end
|
19
|
+
end
|
data/bin/nilsimsa
CHANGED
data/ext/extconf.rb
CHANGED
data/ext/nilsimsa.c
CHANGED
@@ -49,6 +49,15 @@
|
|
49
49
|
|
50
50
|
#define tran3(a,b,c,n) (((tran[((a)+(n))&255]^tran[(b)]*((n)+(n)+1))+tran[(c)^tran[n]])&255)
|
51
51
|
|
52
|
+
#ifdef HAVE_RUBY_IO_H
|
53
|
+
#ifndef RSTRING_PTR
|
54
|
+
#define RSTRING_PTR(s) (RSTRING(s))
|
55
|
+
#endif
|
56
|
+
#ifndef RSTRING_LEN
|
57
|
+
#define RSTRING_LEN(s) (RSTRING(s))
|
58
|
+
#endif
|
59
|
+
#else
|
60
|
+
#endif
|
52
61
|
|
53
62
|
struct nsrecord {
|
54
63
|
int acc[256]; /* counts each trigram's hash */
|
@@ -256,10 +265,13 @@ rbns_update(VALUE self, VALUE data) {
|
|
256
265
|
char *chdata;
|
257
266
|
long chdata_len;
|
258
267
|
r = get_nsr( self );
|
268
|
+
VALUE str;
|
259
269
|
|
260
270
|
Check_Type( data, T_STRING );
|
261
|
-
|
262
|
-
|
271
|
+
|
272
|
+
str = StringValue(data);
|
273
|
+
//chdata = rb_str2cstr( data, &chdata_len );
|
274
|
+
nsr_update( r, (RSTRING_PTR(str)), (RSTRING_LEN(str)) );
|
263
275
|
return data;
|
264
276
|
}
|
265
277
|
|
@@ -269,18 +281,25 @@ rbns_nilsimsa(VALUE self, VALUE other) {
|
|
269
281
|
char *d1;
|
270
282
|
char *d2;
|
271
283
|
|
272
|
-
|
273
|
-
|
284
|
+
VALUE str1;
|
285
|
+
VALUE str2;
|
286
|
+
|
287
|
+
//d1 = rb_str2cstr( rb_funcall( self, rb_intern( "digest" ), 0 ), &len );
|
288
|
+
str1 = rb_funcall( self, rb_intern( "digest" ), 0 );
|
289
|
+
str1 = StringValue(str1);
|
290
|
+
if(RSTRING_LEN(str1) < NSR_CODE_LEN) {
|
274
291
|
return Qnil;
|
275
292
|
}
|
276
293
|
|
277
294
|
Check_Type( other, T_STRING );
|
278
|
-
d2 = rb_str2cstr( other, &len );
|
279
|
-
|
295
|
+
//d2 = rb_str2cstr( other, &len );
|
296
|
+
str2 = StringValue( other);
|
297
|
+
if (RSTRING_LEN(str2) < NSR_CODE_LEN) {
|
280
298
|
return Qnil;
|
281
299
|
}
|
282
300
|
|
283
|
-
return INT2NUM( nilsimsa( d1, d2 ) );
|
301
|
+
//return INT2NUM( nilsimsa( d1, d2 ) );
|
302
|
+
return INT2NUM( nilsimsa( RSTRING_PTR(str1), RSTRING_PTR(str2)) );
|
284
303
|
}
|
285
304
|
|
286
305
|
VALUE
|
@@ -5,7 +5,6 @@
|
|
5
5
|
# inspired by Digest::Nilsimsa-0.06 from Perl CPAN and
|
6
6
|
# the original C nilsimsa-0.2.4 implementation by cmeclax
|
7
7
|
# http://ixazon.dynip.com/~cmeclax/nilsimsa.html
|
8
|
-
|
9
8
|
class Nilsimsa
|
10
9
|
|
11
10
|
TRAN =
|
@@ -45,16 +44,20 @@ class Nilsimsa
|
|
45
44
|
"\x04\x05\x05\x06\x05\x06\x06\x07\x05\x06\x06\x07\x06\x07\x07\x08"
|
46
45
|
|
47
46
|
def initialize(*data)
|
48
|
-
@threshold=0;
|
49
|
-
@
|
47
|
+
@threshold=0;
|
48
|
+
@count=0
|
49
|
+
@acc = Array::new(256,0)
|
50
50
|
@lastch0=@lastch1=@lastch2=@lastch3= -1
|
51
51
|
|
52
52
|
data.each do |d| update(d) end if data && (data.size>0)
|
53
53
|
end
|
54
54
|
|
55
|
-
def
|
55
|
+
def tran3_orig(a,b,c,n)
|
56
56
|
(((TRAN[(a+n)&255]^TRAN[b]*(n+n+1))+TRAN[(c)^TRAN[n]])&255)
|
57
57
|
end
|
58
|
+
def tran3(a,b,c,n)
|
59
|
+
((((TRAN[(a+n)&255].ord)^(TRAN[b].ord)*(n+n+1))+(TRAN[(c)^(TRAN[n].ord)]).ord)&255)
|
60
|
+
end
|
58
61
|
|
59
62
|
def update(data)
|
60
63
|
data.each_byte do |ch|
|
@@ -83,19 +86,25 @@ class Nilsimsa
|
|
83
86
|
def digest
|
84
87
|
@total=0;
|
85
88
|
case @count
|
86
|
-
when 0..2
|
87
|
-
when 3
|
88
|
-
when 4
|
89
|
-
else
|
90
|
-
@total +=(8*@count)-28
|
89
|
+
when 0..2 then ;
|
90
|
+
when 3 then @total +=1
|
91
|
+
when 4 then @total +=4
|
92
|
+
else @total +=(8*@count)-28
|
91
93
|
end
|
92
94
|
@threshold=@total/256
|
93
95
|
|
94
|
-
@code=
|
95
|
-
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" <<
|
96
|
-
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00")
|
96
|
+
@code="\x00"*32
|
97
97
|
(0..255).each do |i|
|
98
|
-
|
98
|
+
offset = i>>3
|
99
|
+
cur_val = @code[offset].ord
|
100
|
+
@code[offset] = (cur_val + ( ((@acc[i].ord>@threshold)?(1):(0))<<(i&7) )).chr
|
101
|
+
# cv = @code[i>>3].ord
|
102
|
+
# if @acc[i] > @threshold
|
103
|
+
# #@code[i>>3]+=( (((@acc[i])>@threshold)?(1):(0))<<(i&7) )
|
104
|
+
# @code[cv] = (@code[cv].ord + (1 <<(i&7))).chr
|
105
|
+
# else
|
106
|
+
# @code[cv] = (@code[cv].ord + (0 <<(i&7))).chr
|
107
|
+
# end
|
99
108
|
end
|
100
109
|
|
101
110
|
@code[0..31].reverse
|
@@ -126,52 +135,16 @@ class Nilsimsa
|
|
126
135
|
def nilsimsa(otherdigest)
|
127
136
|
bits=0; myd=digest
|
128
137
|
(0..31).each do |i|
|
129
|
-
bits += POPC[255&myd[i]^otherdigest[i]]
|
138
|
+
bits += POPC[255&myd[i].ord^otherdigest[i].ord].ord
|
130
139
|
end
|
131
140
|
(128-bits)
|
132
141
|
end
|
133
|
-
|
134
142
|
end
|
135
143
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
n2 = Nilsimsa::new("abcd","efgh")
|
141
|
-
puts "abcd efgh: #{n2.hexdigest=='14c8118000000000030800000004042004189020001308014088003280000078'}"
|
142
|
-
puts "digest: #{n1 == n2.digest}"
|
143
|
-
n1.update("ijk")
|
144
|
-
puts "ijk: #{n1.hexdigest=='14c811840010000c0328200108040630041890200217582d4098103280000078'}"
|
145
|
-
puts "nilsimsa: #{n1.nilsimsa(n2.digest)==109}"
|
146
|
-
puts
|
144
|
+
begin # load C core - if available
|
145
|
+
#require "#{File.join(File.dirname(__FILE__), '..', 'ext', 'nilsimsa_native')}"
|
146
|
+
rescue LoadError => e
|
147
|
+
# ignore lack of native module
|
147
148
|
end
|
148
149
|
|
149
|
-
if __FILE__ == $0 then
|
150
|
-
if ARGV.size>0 then
|
151
|
-
begin # load C core - if available
|
152
|
-
require 'nilsimsa_native'
|
153
|
-
rescue LoadError => e
|
154
|
-
# ignore lack of native module
|
155
|
-
end
|
156
150
|
|
157
|
-
ARGV.each do |filename|
|
158
|
-
if FileTest::exists?(filename) then
|
159
|
-
n = Nilsimsa::new
|
160
|
-
n.file(filename)
|
161
|
-
puts n.hexdigest+" #{filename}"
|
162
|
-
else
|
163
|
-
puts "error: can't find '#{filename}'"
|
164
|
-
end
|
165
|
-
end
|
166
|
-
else
|
167
|
-
puts 'Running selftest using native ruby version'
|
168
|
-
selftest
|
169
|
-
begin # load C core - if available
|
170
|
-
require './nilsimsa_native'
|
171
|
-
puts 'Running selftest using compiled nilsimsa in current dir'
|
172
|
-
selftest
|
173
|
-
rescue LoadError => e
|
174
|
-
puts "Couldnt run selftest with compiled nilsimsa"
|
175
|
-
end
|
176
|
-
end
|
177
|
-
end
|
data/nilsimsa.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
SPEC = Gem::Specification.new do |spec|
|
2
|
+
# Descriptive and source information for this gem.
|
3
|
+
spec.name = "nilsimsa"
|
4
|
+
spec.version = "1.1.1"
|
5
|
+
spec.summary = "Computes Nilsimsa values. Nilsimsa is a distance based hash"
|
6
|
+
spec.author = "Jonathan Wilkins"
|
7
|
+
spec.email = "jwilkins[at]nospam[dot]bitland[dot]net"
|
8
|
+
spec.has_rdoc = true
|
9
|
+
spec.extra_rdoc_files = ["README.md"]
|
10
|
+
|
11
|
+
spec.files = %w(Gemfile Rakefile README.md nilsimsa.gemspec
|
12
|
+
lib/nilsimsa.rb
|
13
|
+
bin/nilsimsa
|
14
|
+
examples/simple.rb
|
15
|
+
ext/extconf.rb ext/nilsimsa.c)
|
16
|
+
spec.executables = ['nilsimsa']
|
17
|
+
|
18
|
+
# optional native component
|
19
|
+
spec.extensions = ['ext/extconf.rb']
|
20
|
+
end
|
metadata
CHANGED
@@ -1,55 +1,56 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.4
|
3
|
-
specification_version: 1
|
1
|
+
--- !ruby/object:Gem::Specification
|
4
2
|
name: nilsimsa
|
5
|
-
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.
|
7
|
-
|
8
|
-
summary: Computes Nilsimsa values. Nilsimsa is a distance based hash
|
9
|
-
require_paths:
|
10
|
-
- .
|
11
|
-
email: jwilkins[at]nospam[dot]bitland[dot]net
|
12
|
-
homepage:
|
13
|
-
rubyforge_project:
|
14
|
-
description:
|
15
|
-
autorequire: nilsimsa.rb
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: true
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.1
|
5
|
+
prerelease:
|
25
6
|
platform: ruby
|
26
|
-
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
|
-
authors:
|
7
|
+
authors:
|
30
8
|
- Jonathan Wilkins
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-06-21 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description:
|
15
|
+
email: jwilkins[at]nospam[dot]bitland[dot]net
|
16
|
+
executables:
|
17
|
+
- nilsimsa
|
18
|
+
extensions:
|
19
|
+
- ext/extconf.rb
|
20
|
+
extra_rdoc_files:
|
21
|
+
- README.md
|
22
|
+
files:
|
23
|
+
- Gemfile
|
24
|
+
- Rakefile
|
25
|
+
- README.md
|
26
|
+
- nilsimsa.gemspec
|
27
|
+
- lib/nilsimsa.rb
|
39
28
|
- bin/nilsimsa
|
40
|
-
-
|
29
|
+
- examples/simple.rb
|
41
30
|
- ext/extconf.rb
|
42
|
-
|
43
|
-
|
31
|
+
- ext/nilsimsa.c
|
32
|
+
homepage:
|
33
|
+
licenses: []
|
34
|
+
post_install_message:
|
44
35
|
rdoc_options: []
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
-
|
36
|
+
require_paths:
|
37
|
+
- lib
|
38
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
45
|
+
none: false
|
46
|
+
requirements:
|
47
|
+
- - ! '>='
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
52
50
|
requirements: []
|
53
|
-
|
54
|
-
|
55
|
-
|
51
|
+
rubyforge_project:
|
52
|
+
rubygems_version: 1.8.10
|
53
|
+
signing_key:
|
54
|
+
specification_version: 3
|
55
|
+
summary: Computes Nilsimsa values. Nilsimsa is a distance based hash
|
56
|
+
test_files: []
|
data/gemspec.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
require 'rake'
|
2
|
-
require 'mkmf'
|
3
|
-
|
4
|
-
SPEC = Gem::Specification.new do |spec|
|
5
|
-
# Descriptive and source information for this gem.
|
6
|
-
spec.name = "nilsimsa"
|
7
|
-
spec.version = "1.0.1"
|
8
|
-
spec.summary = "Computes Nilsimsa values. Nilsimsa is a distance based hash"
|
9
|
-
spec.author = "Jonathan Wilkins"
|
10
|
-
spec.email = "jwilkins[at]nospam[dot]bitland[dot]net"
|
11
|
-
spec.has_rdoc = true
|
12
|
-
spec.extra_rdoc_files = ["README"]
|
13
|
-
spec.require_path = "."
|
14
|
-
spec.autorequire = "nilsimsa.rb"
|
15
|
-
|
16
|
-
unfiltered_files = FileList['*', 'examples/*', 'bin/*', 'ext/*']
|
17
|
-
spec.files = unfiltered_files.delete_if do |filename|
|
18
|
-
filename.include?(".gem") || filename.include?("Makefile") ||
|
19
|
-
filename.include?(".so") || filename.include?(".o")
|
20
|
-
end
|
21
|
-
spec.executables = ['nilsimsa']
|
22
|
-
|
23
|
-
# optional native component
|
24
|
-
if cc_command
|
25
|
-
spec.extensions << 'ext/extconf.rb'
|
26
|
-
end
|
27
|
-
|
28
|
-
puts "Building gem w/ "
|
29
|
-
spec.files.each do |f|
|
30
|
-
puts "- #{f}"
|
31
|
-
end
|
32
|
-
|
33
|
-
end
|