nilsimsa 1.0.1 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source :rubygems
2
+
3
+ group :development do
4
+ if RUBY_VERSION =~ /^1.9/
5
+ gem "ruby-debug19", :require => "ruby-debug"
6
+ else
7
+ gem 'ruby-debug'
8
+ end
9
+ end
10
+
11
+ group :test do
12
+ gem "rspec"
13
+ gem "rake"
14
+ end
@@ -1,5 +1,7 @@
1
1
  nilsimsa
2
2
  --------
3
+ [![Build Status](https://secure.travis-ci.org/jwilkins/nilsimsa.png)](http://travis-ci.org/jwilkins/nilsimsa)
4
+
3
5
  Nilsimsa is a distance based hash, which is the opposite of more familiar
4
6
  hashes like MD5. Instead of small changes making a large difference in
5
7
  the resulting hash (to avoid collisions), distance based hashes cause
data/Rakefile ADDED
@@ -0,0 +1,19 @@
1
+ require 'bundler'
2
+ Bundler.require
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ desc 'Default: run specs'
8
+ task :default => :spec
9
+
10
+ task :cleanbuild do
11
+ `rm ext/*.o ext/Makefile`
12
+ if RUBY_VERSION =~ /^1.9/
13
+ `cd ext && ruby extconf.rb && make`
14
+ elsif RUBY_VERSION =~ /^1.8/
15
+ `cd ext && ruby extconf.rb && make`
16
+ else
17
+ puts "Ruby version #{RUBY_VERSION}? Can't help you.. "
18
+ end
19
+ end
data/bin/nilsimsa CHANGED
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
- require 'rubygems'
3
2
  require 'nilsimsa'
4
3
 
5
4
  if ARGV.size > 0 then
data/ext/extconf.rb CHANGED
@@ -1,4 +1,11 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'mkmf'
3
+ require 'rbconfig'
4
+
5
+ CONFIG["DLEXT"] = "bundle"
6
+ CONFIG["LDSHARED"] = "$(CC) -shared"
7
+ CONFIG["CCDLFLAGS"] = " -fPIC"
8
+
9
+ have_header('ruby.h') or missing('ruby.h')
3
10
 
4
11
  create_makefile( 'nilsimsa_native' )
data/ext/nilsimsa.c CHANGED
@@ -49,6 +49,15 @@
49
49
 
50
50
  #define tran3(a,b,c,n) (((tran[((a)+(n))&255]^tran[(b)]*((n)+(n)+1))+tran[(c)^tran[n]])&255)
51
51
 
52
+ #ifdef HAVE_RUBY_IO_H
53
+ #ifndef RSTRING_PTR
54
+ #define RSTRING_PTR(s) (RSTRING(s))
55
+ #endif
56
+ #ifndef RSTRING_LEN
57
+ #define RSTRING_LEN(s) (RSTRING(s))
58
+ #endif
59
+ #else
60
+ #endif
52
61
 
53
62
  struct nsrecord {
54
63
  int acc[256]; /* counts each trigram's hash */
@@ -256,10 +265,13 @@ rbns_update(VALUE self, VALUE data) {
256
265
  char *chdata;
257
266
  long chdata_len;
258
267
  r = get_nsr( self );
268
+ VALUE str;
259
269
 
260
270
  Check_Type( data, T_STRING );
261
- chdata = rb_str2cstr( data, &chdata_len );
262
- nsr_update( r, chdata, chdata_len );
271
+
272
+ str = StringValue(data);
273
+ //chdata = rb_str2cstr( data, &chdata_len );
274
+ nsr_update( r, (RSTRING_PTR(str)), (RSTRING_LEN(str)) );
263
275
  return data;
264
276
  }
265
277
 
@@ -269,18 +281,25 @@ rbns_nilsimsa(VALUE self, VALUE other) {
269
281
  char *d1;
270
282
  char *d2;
271
283
 
272
- d1 = rb_str2cstr( rb_funcall( self, rb_intern( "digest" ), 0 ), &len );
273
- if (len < NSR_CODE_LEN) {
284
+ VALUE str1;
285
+ VALUE str2;
286
+
287
+ //d1 = rb_str2cstr( rb_funcall( self, rb_intern( "digest" ), 0 ), &len );
288
+ str1 = rb_funcall( self, rb_intern( "digest" ), 0 );
289
+ str1 = StringValue(str1);
290
+ if(RSTRING_LEN(str1) < NSR_CODE_LEN) {
274
291
  return Qnil;
275
292
  }
276
293
 
277
294
  Check_Type( other, T_STRING );
278
- d2 = rb_str2cstr( other, &len );
279
- if (len < NSR_CODE_LEN) {
295
+ //d2 = rb_str2cstr( other, &len );
296
+ str2 = StringValue( other);
297
+ if (RSTRING_LEN(str2) < NSR_CODE_LEN) {
280
298
  return Qnil;
281
299
  }
282
300
 
283
- return INT2NUM( nilsimsa( d1, d2 ) );
301
+ //return INT2NUM( nilsimsa( d1, d2 ) );
302
+ return INT2NUM( nilsimsa( RSTRING_PTR(str1), RSTRING_PTR(str2)) );
284
303
  }
285
304
 
286
305
  VALUE
@@ -5,7 +5,6 @@
5
5
  # inspired by Digest::Nilsimsa-0.06 from Perl CPAN and
6
6
  # the original C nilsimsa-0.2.4 implementation by cmeclax
7
7
  # http://ixazon.dynip.com/~cmeclax/nilsimsa.html
8
-
9
8
  class Nilsimsa
10
9
 
11
10
  TRAN =
@@ -45,16 +44,20 @@ class Nilsimsa
45
44
  "\x04\x05\x05\x06\x05\x06\x06\x07\x05\x06\x06\x07\x06\x07\x07\x08"
46
45
 
47
46
  def initialize(*data)
48
- @threshold=0; @count=0
49
- @acc =Array::new(256,0)
47
+ @threshold=0;
48
+ @count=0
49
+ @acc = Array::new(256,0)
50
50
  @lastch0=@lastch1=@lastch2=@lastch3= -1
51
51
 
52
52
  data.each do |d| update(d) end if data && (data.size>0)
53
53
  end
54
54
 
55
- def tran3(a,b,c,n)
55
+ def tran3_orig(a,b,c,n)
56
56
  (((TRAN[(a+n)&255]^TRAN[b]*(n+n+1))+TRAN[(c)^TRAN[n]])&255)
57
57
  end
58
+ def tran3(a,b,c,n)
59
+ ((((TRAN[(a+n)&255].ord)^(TRAN[b].ord)*(n+n+1))+(TRAN[(c)^(TRAN[n].ord)]).ord)&255)
60
+ end
58
61
 
59
62
  def update(data)
60
63
  data.each_byte do |ch|
@@ -83,19 +86,25 @@ class Nilsimsa
83
86
  def digest
84
87
  @total=0;
85
88
  case @count
86
- when 0..2:
87
- when 3 : @total +=1
88
- when 4 : @total +=4
89
- else
90
- @total +=(8*@count)-28
89
+ when 0..2 then ;
90
+ when 3 then @total +=1
91
+ when 4 then @total +=4
92
+ else @total +=(8*@count)-28
91
93
  end
92
94
  @threshold=@total/256
93
95
 
94
- @code=String::new(
95
- "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" <<
96
- "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00")
96
+ @code="\x00"*32
97
97
  (0..255).each do |i|
98
- @code[i>>3]+=( ((@acc[i]>@threshold)?(1):(0))<<(i&7) )
98
+ offset = i>>3
99
+ cur_val = @code[offset].ord
100
+ @code[offset] = (cur_val + ( ((@acc[i].ord>@threshold)?(1):(0))<<(i&7) )).chr
101
+ # cv = @code[i>>3].ord
102
+ # if @acc[i] > @threshold
103
+ # #@code[i>>3]+=( (((@acc[i])>@threshold)?(1):(0))<<(i&7) )
104
+ # @code[cv] = (@code[cv].ord + (1 <<(i&7))).chr
105
+ # else
106
+ # @code[cv] = (@code[cv].ord + (0 <<(i&7))).chr
107
+ # end
99
108
  end
100
109
 
101
110
  @code[0..31].reverse
@@ -126,52 +135,16 @@ class Nilsimsa
126
135
  def nilsimsa(otherdigest)
127
136
  bits=0; myd=digest
128
137
  (0..31).each do |i|
129
- bits += POPC[255&myd[i]^otherdigest[i]]
138
+ bits += POPC[255&myd[i].ord^otherdigest[i].ord].ord
130
139
  end
131
140
  (128-bits)
132
141
  end
133
-
134
142
  end
135
143
 
136
- def selftest
137
- n1 = Nilsimsa::new;
138
- n1.update("abcdefgh")
139
- puts "abcdefgh: #{n1.hexdigest=='14c8118000000000030800000004042004189020001308014088003280000078'}"
140
- n2 = Nilsimsa::new("abcd","efgh")
141
- puts "abcd efgh: #{n2.hexdigest=='14c8118000000000030800000004042004189020001308014088003280000078'}"
142
- puts "digest: #{n1 == n2.digest}"
143
- n1.update("ijk")
144
- puts "ijk: #{n1.hexdigest=='14c811840010000c0328200108040630041890200217582d4098103280000078'}"
145
- puts "nilsimsa: #{n1.nilsimsa(n2.digest)==109}"
146
- puts
144
+ begin # load C core - if available
145
+ #require "#{File.join(File.dirname(__FILE__), '..', 'ext', 'nilsimsa_native')}"
146
+ rescue LoadError => e
147
+ # ignore lack of native module
147
148
  end
148
149
 
149
- if __FILE__ == $0 then
150
- if ARGV.size>0 then
151
- begin # load C core - if available
152
- require 'nilsimsa_native'
153
- rescue LoadError => e
154
- # ignore lack of native module
155
- end
156
150
 
157
- ARGV.each do |filename|
158
- if FileTest::exists?(filename) then
159
- n = Nilsimsa::new
160
- n.file(filename)
161
- puts n.hexdigest+" #{filename}"
162
- else
163
- puts "error: can't find '#{filename}'"
164
- end
165
- end
166
- else
167
- puts 'Running selftest using native ruby version'
168
- selftest
169
- begin # load C core - if available
170
- require './nilsimsa_native'
171
- puts 'Running selftest using compiled nilsimsa in current dir'
172
- selftest
173
- rescue LoadError => e
174
- puts "Couldnt run selftest with compiled nilsimsa"
175
- end
176
- end
177
- end
data/nilsimsa.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ SPEC = Gem::Specification.new do |spec|
2
+ # Descriptive and source information for this gem.
3
+ spec.name = "nilsimsa"
4
+ spec.version = "1.1.1"
5
+ spec.summary = "Computes Nilsimsa values. Nilsimsa is a distance based hash"
6
+ spec.author = "Jonathan Wilkins"
7
+ spec.email = "jwilkins[at]nospam[dot]bitland[dot]net"
8
+ spec.has_rdoc = true
9
+ spec.extra_rdoc_files = ["README.md"]
10
+
11
+ spec.files = %w(Gemfile Rakefile README.md nilsimsa.gemspec
12
+ lib/nilsimsa.rb
13
+ bin/nilsimsa
14
+ examples/simple.rb
15
+ ext/extconf.rb ext/nilsimsa.c)
16
+ spec.executables = ['nilsimsa']
17
+
18
+ # optional native component
19
+ spec.extensions = ['ext/extconf.rb']
20
+ end
metadata CHANGED
@@ -1,55 +1,56 @@
1
- --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.4
3
- specification_version: 1
1
+ --- !ruby/object:Gem::Specification
4
2
  name: nilsimsa
5
- version: !ruby/object:Gem::Version
6
- version: 1.0.1
7
- date: 2007-11-15 00:00:00 -08:00
8
- summary: Computes Nilsimsa values. Nilsimsa is a distance based hash
9
- require_paths:
10
- - .
11
- email: jwilkins[at]nospam[dot]bitland[dot]net
12
- homepage:
13
- rubyforge_project:
14
- description:
15
- autorequire: nilsimsa.rb
16
- default_executable:
17
- bindir: bin
18
- has_rdoc: true
19
- required_ruby_version: !ruby/object:Gem::Version::Requirement
20
- requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
24
- version:
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.1
5
+ prerelease:
25
6
  platform: ruby
26
- signing_key:
27
- cert_chain:
28
- post_install_message:
29
- authors:
7
+ authors:
30
8
  - Jonathan Wilkins
31
- files:
32
- - README
33
- - examples
34
- - ext
35
- - nilsimsa.rb
36
- - bin
37
- - gemspec.rb
38
- - examples/simple.rb
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-21 00:00:00.000000000Z
13
+ dependencies: []
14
+ description:
15
+ email: jwilkins[at]nospam[dot]bitland[dot]net
16
+ executables:
17
+ - nilsimsa
18
+ extensions:
19
+ - ext/extconf.rb
20
+ extra_rdoc_files:
21
+ - README.md
22
+ files:
23
+ - Gemfile
24
+ - Rakefile
25
+ - README.md
26
+ - nilsimsa.gemspec
27
+ - lib/nilsimsa.rb
39
28
  - bin/nilsimsa
40
- - ext/nilsimsa.c
29
+ - examples/simple.rb
41
30
  - ext/extconf.rb
42
- test_files: []
43
-
31
+ - ext/nilsimsa.c
32
+ homepage:
33
+ licenses: []
34
+ post_install_message:
44
35
  rdoc_options: []
45
-
46
- extra_rdoc_files:
47
- - README
48
- executables:
49
- - nilsimsa
50
- extensions:
51
- - ext/extconf.rb
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ none: false
46
+ requirements:
47
+ - - ! '>='
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
52
50
  requirements: []
53
-
54
- dependencies: []
55
-
51
+ rubyforge_project:
52
+ rubygems_version: 1.8.10
53
+ signing_key:
54
+ specification_version: 3
55
+ summary: Computes Nilsimsa values. Nilsimsa is a distance based hash
56
+ test_files: []
data/gemspec.rb DELETED
@@ -1,33 +0,0 @@
1
- require 'rake'
2
- require 'mkmf'
3
-
4
- SPEC = Gem::Specification.new do |spec|
5
- # Descriptive and source information for this gem.
6
- spec.name = "nilsimsa"
7
- spec.version = "1.0.1"
8
- spec.summary = "Computes Nilsimsa values. Nilsimsa is a distance based hash"
9
- spec.author = "Jonathan Wilkins"
10
- spec.email = "jwilkins[at]nospam[dot]bitland[dot]net"
11
- spec.has_rdoc = true
12
- spec.extra_rdoc_files = ["README"]
13
- spec.require_path = "."
14
- spec.autorequire = "nilsimsa.rb"
15
-
16
- unfiltered_files = FileList['*', 'examples/*', 'bin/*', 'ext/*']
17
- spec.files = unfiltered_files.delete_if do |filename|
18
- filename.include?(".gem") || filename.include?("Makefile") ||
19
- filename.include?(".so") || filename.include?(".o")
20
- end
21
- spec.executables = ['nilsimsa']
22
-
23
- # optional native component
24
- if cc_command
25
- spec.extensions << 'ext/extconf.rb'
26
- end
27
-
28
- puts "Building gem w/ "
29
- spec.files.each do |f|
30
- puts "- #{f}"
31
- end
32
-
33
- end