nilsimsa 1.0.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source :rubygems
2
+
3
+ group :development do
4
+ if RUBY_VERSION =~ /^1.9/
5
+ gem "ruby-debug19", :require => "ruby-debug"
6
+ else
7
+ gem 'ruby-debug'
8
+ end
9
+ end
10
+
11
+ group :test do
12
+ gem "rspec"
13
+ gem "rake"
14
+ end
@@ -1,5 +1,7 @@
1
1
  nilsimsa
2
2
  --------
3
+ [![Build Status](https://secure.travis-ci.org/jwilkins/nilsimsa.png)](http://travis-ci.org/jwilkins/nilsimsa)
4
+
3
5
  Nilsimsa is a distance based hash, which is the opposite of more familiar
4
6
  hashes like MD5. Instead of small changes making a large difference in
5
7
  the resulting hash (to avoid collisions), distance based hashes cause
data/Rakefile ADDED
@@ -0,0 +1,19 @@
1
+ require 'bundler'
2
+ Bundler.require
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ desc 'Default: run specs'
8
+ task :default => :spec
9
+
10
+ task :cleanbuild do
11
+ `rm ext/*.o ext/Makefile`
12
+ if RUBY_VERSION =~ /^1.9/
13
+ `cd ext && ruby extconf.rb && make`
14
+ elsif RUBY_VERSION =~ /^1.8/
15
+ `cd ext && ruby extconf.rb && make`
16
+ else
17
+ puts "Ruby version #{RUBY_VERSION}? Can't help you.. "
18
+ end
19
+ end
data/bin/nilsimsa CHANGED
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
- require 'rubygems'
3
2
  require 'nilsimsa'
4
3
 
5
4
  if ARGV.size > 0 then
data/ext/extconf.rb CHANGED
@@ -1,4 +1,11 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'mkmf'
3
+ require 'rbconfig'
4
+
5
+ CONFIG["DLEXT"] = "bundle"
6
+ CONFIG["LDSHARED"] = "$(CC) -shared"
7
+ CONFIG["CCDLFLAGS"] = " -fPIC"
8
+
9
+ have_header('ruby.h') or missing('ruby.h')
3
10
 
4
11
  create_makefile( 'nilsimsa_native' )
data/ext/nilsimsa.c CHANGED
@@ -49,6 +49,15 @@
49
49
 
50
50
  #define tran3(a,b,c,n) (((tran[((a)+(n))&255]^tran[(b)]*((n)+(n)+1))+tran[(c)^tran[n]])&255)
51
51
 
52
+ #ifdef HAVE_RUBY_IO_H
53
+ #ifndef RSTRING_PTR
54
+ #define RSTRING_PTR(s) (RSTRING(s))
55
+ #endif
56
+ #ifndef RSTRING_LEN
57
+ #define RSTRING_LEN(s) (RSTRING(s))
58
+ #endif
59
+ #else
60
+ #endif
52
61
 
53
62
  struct nsrecord {
54
63
  int acc[256]; /* counts each trigram's hash */
@@ -256,10 +265,13 @@ rbns_update(VALUE self, VALUE data) {
256
265
  char *chdata;
257
266
  long chdata_len;
258
267
  r = get_nsr( self );
268
+ VALUE str;
259
269
 
260
270
  Check_Type( data, T_STRING );
261
- chdata = rb_str2cstr( data, &chdata_len );
262
- nsr_update( r, chdata, chdata_len );
271
+
272
+ str = StringValue(data);
273
+ //chdata = rb_str2cstr( data, &chdata_len );
274
+ nsr_update( r, (RSTRING_PTR(str)), (RSTRING_LEN(str)) );
263
275
  return data;
264
276
  }
265
277
 
@@ -269,18 +281,25 @@ rbns_nilsimsa(VALUE self, VALUE other) {
269
281
  char *d1;
270
282
  char *d2;
271
283
 
272
- d1 = rb_str2cstr( rb_funcall( self, rb_intern( "digest" ), 0 ), &len );
273
- if (len < NSR_CODE_LEN) {
284
+ VALUE str1;
285
+ VALUE str2;
286
+
287
+ //d1 = rb_str2cstr( rb_funcall( self, rb_intern( "digest" ), 0 ), &len );
288
+ str1 = rb_funcall( self, rb_intern( "digest" ), 0 );
289
+ str1 = StringValue(str1);
290
+ if(RSTRING_LEN(str1) < NSR_CODE_LEN) {
274
291
  return Qnil;
275
292
  }
276
293
 
277
294
  Check_Type( other, T_STRING );
278
- d2 = rb_str2cstr( other, &len );
279
- if (len < NSR_CODE_LEN) {
295
+ //d2 = rb_str2cstr( other, &len );
296
+ str2 = StringValue( other);
297
+ if (RSTRING_LEN(str2) < NSR_CODE_LEN) {
280
298
  return Qnil;
281
299
  }
282
300
 
283
- return INT2NUM( nilsimsa( d1, d2 ) );
301
+ //return INT2NUM( nilsimsa( d1, d2 ) );
302
+ return INT2NUM( nilsimsa( RSTRING_PTR(str1), RSTRING_PTR(str2)) );
284
303
  }
285
304
 
286
305
  VALUE
@@ -5,7 +5,6 @@
5
5
  # inspired by Digest::Nilsimsa-0.06 from Perl CPAN and
6
6
  # the original C nilsimsa-0.2.4 implementation by cmeclax
7
7
  # http://ixazon.dynip.com/~cmeclax/nilsimsa.html
8
-
9
8
  class Nilsimsa
10
9
 
11
10
  TRAN =
@@ -45,16 +44,20 @@ class Nilsimsa
45
44
  "\x04\x05\x05\x06\x05\x06\x06\x07\x05\x06\x06\x07\x06\x07\x07\x08"
46
45
 
47
46
  def initialize(*data)
48
- @threshold=0; @count=0
49
- @acc =Array::new(256,0)
47
+ @threshold=0;
48
+ @count=0
49
+ @acc = Array::new(256,0)
50
50
  @lastch0=@lastch1=@lastch2=@lastch3= -1
51
51
 
52
52
  data.each do |d| update(d) end if data && (data.size>0)
53
53
  end
54
54
 
55
- def tran3(a,b,c,n)
55
+ def tran3_orig(a,b,c,n)
56
56
  (((TRAN[(a+n)&255]^TRAN[b]*(n+n+1))+TRAN[(c)^TRAN[n]])&255)
57
57
  end
58
+ def tran3(a,b,c,n)
59
+ ((((TRAN[(a+n)&255].ord)^(TRAN[b].ord)*(n+n+1))+(TRAN[(c)^(TRAN[n].ord)]).ord)&255)
60
+ end
58
61
 
59
62
  def update(data)
60
63
  data.each_byte do |ch|
@@ -83,19 +86,25 @@ class Nilsimsa
83
86
  def digest
84
87
  @total=0;
85
88
  case @count
86
- when 0..2:
87
- when 3 : @total +=1
88
- when 4 : @total +=4
89
- else
90
- @total +=(8*@count)-28
89
+ when 0..2 then ;
90
+ when 3 then @total +=1
91
+ when 4 then @total +=4
92
+ else @total +=(8*@count)-28
91
93
  end
92
94
  @threshold=@total/256
93
95
 
94
- @code=String::new(
95
- "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" <<
96
- "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00")
96
+ @code="\x00"*32
97
97
  (0..255).each do |i|
98
- @code[i>>3]+=( ((@acc[i]>@threshold)?(1):(0))<<(i&7) )
98
+ offset = i>>3
99
+ cur_val = @code[offset].ord
100
+ @code[offset] = (cur_val + ( ((@acc[i].ord>@threshold)?(1):(0))<<(i&7) )).chr
101
+ # cv = @code[i>>3].ord
102
+ # if @acc[i] > @threshold
103
+ # #@code[i>>3]+=( (((@acc[i])>@threshold)?(1):(0))<<(i&7) )
104
+ # @code[cv] = (@code[cv].ord + (1 <<(i&7))).chr
105
+ # else
106
+ # @code[cv] = (@code[cv].ord + (0 <<(i&7))).chr
107
+ # end
99
108
  end
100
109
 
101
110
  @code[0..31].reverse
@@ -126,52 +135,16 @@ class Nilsimsa
126
135
  def nilsimsa(otherdigest)
127
136
  bits=0; myd=digest
128
137
  (0..31).each do |i|
129
- bits += POPC[255&myd[i]^otherdigest[i]]
138
+ bits += POPC[255&myd[i].ord^otherdigest[i].ord].ord
130
139
  end
131
140
  (128-bits)
132
141
  end
133
-
134
142
  end
135
143
 
136
- def selftest
137
- n1 = Nilsimsa::new;
138
- n1.update("abcdefgh")
139
- puts "abcdefgh: #{n1.hexdigest=='14c8118000000000030800000004042004189020001308014088003280000078'}"
140
- n2 = Nilsimsa::new("abcd","efgh")
141
- puts "abcd efgh: #{n2.hexdigest=='14c8118000000000030800000004042004189020001308014088003280000078'}"
142
- puts "digest: #{n1 == n2.digest}"
143
- n1.update("ijk")
144
- puts "ijk: #{n1.hexdigest=='14c811840010000c0328200108040630041890200217582d4098103280000078'}"
145
- puts "nilsimsa: #{n1.nilsimsa(n2.digest)==109}"
146
- puts
144
+ begin # load C core - if available
145
+ #require "#{File.join(File.dirname(__FILE__), '..', 'ext', 'nilsimsa_native')}"
146
+ rescue LoadError => e
147
+ # ignore lack of native module
147
148
  end
148
149
 
149
- if __FILE__ == $0 then
150
- if ARGV.size>0 then
151
- begin # load C core - if available
152
- require 'nilsimsa_native'
153
- rescue LoadError => e
154
- # ignore lack of native module
155
- end
156
150
 
157
- ARGV.each do |filename|
158
- if FileTest::exists?(filename) then
159
- n = Nilsimsa::new
160
- n.file(filename)
161
- puts n.hexdigest+" #{filename}"
162
- else
163
- puts "error: can't find '#{filename}'"
164
- end
165
- end
166
- else
167
- puts 'Running selftest using native ruby version'
168
- selftest
169
- begin # load C core - if available
170
- require './nilsimsa_native'
171
- puts 'Running selftest using compiled nilsimsa in current dir'
172
- selftest
173
- rescue LoadError => e
174
- puts "Couldnt run selftest with compiled nilsimsa"
175
- end
176
- end
177
- end
data/nilsimsa.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ SPEC = Gem::Specification.new do |spec|
2
+ # Descriptive and source information for this gem.
3
+ spec.name = "nilsimsa"
4
+ spec.version = "1.1.1"
5
+ spec.summary = "Computes Nilsimsa values. Nilsimsa is a distance based hash"
6
+ spec.author = "Jonathan Wilkins"
7
+ spec.email = "jwilkins[at]nospam[dot]bitland[dot]net"
8
+ spec.has_rdoc = true
9
+ spec.extra_rdoc_files = ["README.md"]
10
+
11
+ spec.files = %w(Gemfile Rakefile README.md nilsimsa.gemspec
12
+ lib/nilsimsa.rb
13
+ bin/nilsimsa
14
+ examples/simple.rb
15
+ ext/extconf.rb ext/nilsimsa.c)
16
+ spec.executables = ['nilsimsa']
17
+
18
+ # optional native component
19
+ spec.extensions = ['ext/extconf.rb']
20
+ end
metadata CHANGED
@@ -1,55 +1,56 @@
1
- --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.4
3
- specification_version: 1
1
+ --- !ruby/object:Gem::Specification
4
2
  name: nilsimsa
5
- version: !ruby/object:Gem::Version
6
- version: 1.0.1
7
- date: 2007-11-15 00:00:00 -08:00
8
- summary: Computes Nilsimsa values. Nilsimsa is a distance based hash
9
- require_paths:
10
- - .
11
- email: jwilkins[at]nospam[dot]bitland[dot]net
12
- homepage:
13
- rubyforge_project:
14
- description:
15
- autorequire: nilsimsa.rb
16
- default_executable:
17
- bindir: bin
18
- has_rdoc: true
19
- required_ruby_version: !ruby/object:Gem::Version::Requirement
20
- requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
24
- version:
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.1
5
+ prerelease:
25
6
  platform: ruby
26
- signing_key:
27
- cert_chain:
28
- post_install_message:
29
- authors:
7
+ authors:
30
8
  - Jonathan Wilkins
31
- files:
32
- - README
33
- - examples
34
- - ext
35
- - nilsimsa.rb
36
- - bin
37
- - gemspec.rb
38
- - examples/simple.rb
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-21 00:00:00.000000000Z
13
+ dependencies: []
14
+ description:
15
+ email: jwilkins[at]nospam[dot]bitland[dot]net
16
+ executables:
17
+ - nilsimsa
18
+ extensions:
19
+ - ext/extconf.rb
20
+ extra_rdoc_files:
21
+ - README.md
22
+ files:
23
+ - Gemfile
24
+ - Rakefile
25
+ - README.md
26
+ - nilsimsa.gemspec
27
+ - lib/nilsimsa.rb
39
28
  - bin/nilsimsa
40
- - ext/nilsimsa.c
29
+ - examples/simple.rb
41
30
  - ext/extconf.rb
42
- test_files: []
43
-
31
+ - ext/nilsimsa.c
32
+ homepage:
33
+ licenses: []
34
+ post_install_message:
44
35
  rdoc_options: []
45
-
46
- extra_rdoc_files:
47
- - README
48
- executables:
49
- - nilsimsa
50
- extensions:
51
- - ext/extconf.rb
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ none: false
46
+ requirements:
47
+ - - ! '>='
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
52
50
  requirements: []
53
-
54
- dependencies: []
55
-
51
+ rubyforge_project:
52
+ rubygems_version: 1.8.10
53
+ signing_key:
54
+ specification_version: 3
55
+ summary: Computes Nilsimsa values. Nilsimsa is a distance based hash
56
+ test_files: []
data/gemspec.rb DELETED
@@ -1,33 +0,0 @@
1
- require 'rake'
2
- require 'mkmf'
3
-
4
- SPEC = Gem::Specification.new do |spec|
5
- # Descriptive and source information for this gem.
6
- spec.name = "nilsimsa"
7
- spec.version = "1.0.1"
8
- spec.summary = "Computes Nilsimsa values. Nilsimsa is a distance based hash"
9
- spec.author = "Jonathan Wilkins"
10
- spec.email = "jwilkins[at]nospam[dot]bitland[dot]net"
11
- spec.has_rdoc = true
12
- spec.extra_rdoc_files = ["README"]
13
- spec.require_path = "."
14
- spec.autorequire = "nilsimsa.rb"
15
-
16
- unfiltered_files = FileList['*', 'examples/*', 'bin/*', 'ext/*']
17
- spec.files = unfiltered_files.delete_if do |filename|
18
- filename.include?(".gem") || filename.include?("Makefile") ||
19
- filename.include?(".so") || filename.include?(".o")
20
- end
21
- spec.executables = ['nilsimsa']
22
-
23
- # optional native component
24
- if cc_command
25
- spec.extensions << 'ext/extconf.rb'
26
- end
27
-
28
- puts "Building gem w/ "
29
- spec.files.each do |f|
30
- puts "- #{f}"
31
- end
32
-
33
- end