fuzzy-string-match 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -2,17 +2,23 @@
2
2
 
3
3
  * fuzzy-string-match is a fuzzy string matching library for ruby.
4
4
  * It is fast. ( written in C with RubyInline )
5
- * It suports only Jaro-Winkler distance algorithm.
5
+ * It supports only Jaro-Winkler distance algorithm.
6
6
  * This program was ported by hand from lucene-3.0.2. (lucene is Java product)
7
7
  * If you want to add another string distance algorithm, please port by yourself and contact me <kiyoka@sumibi.org>.
8
8
 
9
+ ## The reason why i developed fuzzy-string-match
10
+ * I tried amatch-0.2.5, but it contains some issues.
11
+ 1. Some memory leaks.
12
+ 2. I felt difficult to maintain it.
13
+ * So, I decide to create another gem by porting lucene-3.0.x.
14
+
9
15
  ## Installing
10
16
  1. gem install fuzzy-string-match
11
17
 
12
18
  ## Features
13
- * Caluclate Jaro-Winkler distance of two strings.
14
- * Pure ruby version can handle both ascii and UTF8 strings. (and slow)
15
- * Native version can only ascii strings. (and fast)
19
+ * Calculate Jaro-Winkler distance of two strings.
20
+ * Pure ruby version can handle both ASCII and UTF8 strings. (and slow)
21
+ * Native version can only ASCII strings. (and fast)
16
22
 
17
23
  ## Sample code
18
24
  * Native version
@@ -51,6 +57,25 @@
51
57
  => 0.8133333333333332
52
58
  </code>
53
59
 
60
+ ## Benchmarks
61
+
62
+ <console>
63
+ $ rake bench
64
+ ruby ./benchmark/vs_amatch.rb
65
+ ---
66
+ --- Each match functions will be called 1Mega times. ---
67
+ ---
68
+ [Amatch]
69
+ user system total real
70
+ 1.160000 0.050000 1.210000 ( 1.218259)
71
+ [this Module (pure)]
72
+ user system total real
73
+ 39.940000 0.160000 40.100000 ( 40.542448)
74
+ [this Module (native)]
75
+ user system total real
76
+ 0.480000 0.000000 0.480000 ( 0.484187)
77
+ </console>
78
+
54
79
  ## Requires
55
80
  - RubyInline
56
81
  - Ruby 1.9.1 or higher
@@ -60,9 +85,9 @@
60
85
  - I ported from java source code of lucene-3.0.2.
61
86
 
62
87
  ## See also
63
- - http://en.wikipedia.org/wiki/Jaro–Winkler_distance
64
- - http://lucene.apache.org/
65
- - http://github.com/naoya/perl-text-jarowinkler
88
+ - <http://en.wikipedia.org/wiki/Jaro–Winkler_distance>
89
+ - <http://lucene.apache.org/>
90
+ - <http://github.com/naoya/perl-text-jarowinkler>
66
91
 
67
92
  ## License
68
93
  - Apache 2.0 LICENSE
@@ -1,5 +1,5 @@
1
1
  #
2
- # Fuzzy String Match
2
+ # Fuzzy String Match
3
3
  #
4
4
  # Copyright 2010 Kiyoka Nishiyama
5
5
  #
@@ -16,7 +16,7 @@
16
16
  # limitations under the License.
17
17
  #
18
18
  module FuzzyStringMatch
19
-
19
+
20
20
  class JaroWinkler
21
21
  def create( type = :pure ) # factory method
22
22
  case type
@@ -34,7 +34,7 @@ module FuzzyStringMatch
34
34
  def getDistance( s1, s2 )
35
35
  a1 = s1.split( // )
36
36
  a2 = s2.split( // )
37
-
37
+
38
38
  if s1.size > s2.size
39
39
  (max,min) = a1,a2
40
40
  else
@@ -50,7 +50,7 @@ module FuzzyStringMatch
50
50
  c1 = min[mi]
51
51
  xi = [mi - range, 0].max
52
52
  xn = [mi + range + 1, max.size].min
53
-
53
+
54
54
  (xi ... xn).each { |i|
55
55
  if (not flags[i]) && ( c1 == max[i] )
56
56
  indexes[mi] = i
@@ -79,7 +79,7 @@ module FuzzyStringMatch
79
79
  si += 1
80
80
  end
81
81
  }
82
-
82
+
83
83
  transpositions = 0
84
84
  (0 ... ms1.size).each { |mi|
85
85
  if ms1[mi] != ms2[mi]
@@ -110,7 +110,8 @@ module FuzzyStringMatch
110
110
  require 'inline'
111
111
  class JaroWinklerNative
112
112
  inline do |builder|
113
- builder.add_compile_flags '-std=c99'
113
+ builder.include '<iostream>'
114
+ builder.add_compile_flags '-x c++', '-lstdc++'
114
115
  builder.c_raw 'int max( int a, int b ) { return ((a)>(b)?(a):(b)); }'
115
116
  builder.c_raw 'int min( int a, int b ) { return ((a)<(b)?(a):(b)); }'
116
117
  builder.c_raw 'double double_min( double a, double b ) { return ((a)<(b)?(a):(b)); }'
@@ -130,7 +131,7 @@ double getDistance( char *s1, char *s2 )
130
131
  _min = s1; _min_length = strlen(s1);
131
132
  }
132
133
  int range = max( _max_length / 2 - 1, 0 );
133
-
134
+
134
135
  int indexes[_min_length];
135
136
  for( int i = 0 ; i < _min_length ; i++ ) {
136
137
  indexes[i] = -1;
@@ -156,7 +157,7 @@ double getDistance( char *s1, char *s2 )
156
157
  char ms1[matches];
157
158
  char ms2[matches];
158
159
  int ms1_length = matches;
159
-
160
+
160
161
  for (int i = 0, si = 0; i < _min_length; i++) {
161
162
  if (indexes[i] != -1) {
162
163
  ms1[si] = _min[i];
@@ -29,13 +29,13 @@ describe FuzzyStringMatch, "when some string distances (Pure) are" do
29
29
  @jarow = FuzzyStringMatch::JaroWinkler.new.create
30
30
  end
31
31
  it "should" do
32
- @jarow.getDistance( "henka", "henkan" ).should be_close( 0.9722, 0.0001 )
32
+ @jarow.getDistance( "henka", "henkan" ).should be_within(0.0001).of(0.9722)
33
33
  @jarow.getDistance( "al", "al" ).should == 1.0
34
- @jarow.getDistance( "martha", "marhta" ).should be_close( 0.9611, 0.0001 )
35
- @jarow.getDistance( "jones", "johnson" ).should be_close( 0.8323, 0.0001 )
36
- @jarow.getDistance( "abcvwxyz", "cabvwxyz" ).should be_close( 0.9583, 0.0001 )
37
- @jarow.getDistance( "dwayne", "duane" ).should be_close( 0.8400, 0.0001 )
38
- @jarow.getDistance( "dixon", "dicksonx" ).should be_close( 0.8133, 0.0001 )
34
+ @jarow.getDistance( "martha", "marhta" ).should be_within(0.0001).of(0.9611)
35
+ @jarow.getDistance( "jones", "johnson" ).should be_within(0.0001).of(0.8323)
36
+ @jarow.getDistance( "abcvwxyz", "cabvwxyz" ).should be_within(0.0001).of(0.9583)
37
+ @jarow.getDistance( "dwayne", "duane" ).should be_within(0.0001).of(0.8400)
38
+ @jarow.getDistance( "dixon", "dicksonx" ).should be_within(0.0001).of(0.8133)
39
39
  @jarow.getDistance( "fvie", "ten" ).should == 0.0
40
40
  lambda {
41
41
  d1 = @jarow.getDistance("zac ephron", "zac efron")
@@ -55,13 +55,13 @@ describe FuzzyStringMatch, "when some string distances (Native) are" do
55
55
  @jarow = FuzzyStringMatch::JaroWinkler.new.create( :native )
56
56
  end
57
57
  it "should" do
58
- @jarow.getDistance( "henka", "henkan" ).should be_close( 0.9722, 0.0001 )
58
+ @jarow.getDistance( "henka", "henkan" ).should be_within(0.0001).of(0.9722)
59
59
  @jarow.getDistance( "al", "al" ).should == 1.0
60
- @jarow.getDistance( "martha", "marhta" ).should be_close( 0.9611, 0.0001 )
61
- @jarow.getDistance( "jones", "johnson" ).should be_close( 0.8323, 0.0001 )
62
- @jarow.getDistance( "abcvwxyz", "cabvwxyz" ).should be_close( 0.9583, 0.0001 )
63
- @jarow.getDistance( "dwayne", "duane" ).should be_close( 0.8400, 0.0001 )
64
- @jarow.getDistance( "dixon", "dicksonx" ).should be_close( 0.8133, 0.0001 )
60
+ @jarow.getDistance( "martha", "marhta" ).should be_within(0.0001).of(0.9611)
61
+ @jarow.getDistance( "jones", "johnson" ).should be_within(0.0001).of(0.8323)
62
+ @jarow.getDistance( "abcvwxyz", "cabvwxyz" ).should be_within(0.0001).of(0.9583)
63
+ @jarow.getDistance( "dwayne", "duane" ).should be_within(0.0001).of(0.8400)
64
+ @jarow.getDistance( "dixon", "dicksonx" ).should be_within(0.0001).of(0.8133)
65
65
  @jarow.getDistance( "fvie", "ten" ).should == 0.0
66
66
  lambda {
67
67
  d1 = @jarow.getDistance("zac ephron", "zac efron")
@@ -87,13 +87,13 @@ describe FuzzyStringMatch, "when some UTF8 string distances (Pure) are" do
87
87
  @jarow = FuzzyStringMatch::JaroWinkler.new.create
88
88
  end
89
89
  it "should" do
90
- @jarow.getDistance( "al", "al" ).should == 1.0
91
- @jarow.getDistance( "martha", "marhta" ).should be_close( 0.9611, 0.0001 )
92
- @jarow.getDistance( "jones", "johnson" ).should be_close( 0.8323, 0.0001 )
93
- @jarow.getDistance( "abcvwxyz", "cabvwxyz" ).should be_close( 0.9583, 0.0001 )
94
- @jarow.getDistance( "dwayne", "duane" ).should be_close( 0.8400, 0.0001 )
95
- @jarow.getDistance( "dixon", "dicksonx" ).should be_close( 0.8133, 0.0001 )
96
- @jarow.getDistance( "fvie", "ten" ).should == 0.0
90
+ @jarow.getDistance( "al", "al" ).should == 1.0
91
+ @jarow.getDistance( "martha", "marhta" ).should be_within(0.0001).of(0.9611)
92
+ @jarow.getDistance( "jones", "johnson" ).should be_within(0.0001).of(0.8323)
93
+ @jarow.getDistance( "abcvwxyz", "cabvwxyz" ).should be_within(0.0001).of(0.9583)
94
+ @jarow.getDistance( "dwayne", "duane" ).should be_within(0.0001).of(0.8400)
95
+ @jarow.getDistance( "dixon", "dicksonx" ).should be_within(0.0001).of(0.8133)
96
+ @jarow.getDistance( "fvie", "ten" ).should == 0.0
97
97
  lambda {
98
98
  d1 = @jarow.getDistance("zac ephron", "zac efron")
99
99
  d2 = @jarow.getDistance("zac ephron", "kai ephron")
@@ -104,26 +104,26 @@ describe FuzzyStringMatch, "when some UTF8 string distances (Pure) are" do
104
104
  d2 = @jarow.getDistance("brittney spears", "brittney startzman")
105
105
  d1 > d2
106
106
  }.should be_true
107
- @jarow.getDistance( "スパゲティー", "スパゲッティー" ).should be_close( 0.9666, 0.0001 )
108
- @jarow.getDistance( "スパゲティー", "スパゲティ" ).should be_close( 0.9722, 0.0001 )
109
- @jarow.getDistance( "スティービー・ワンダー", "スピーディー・ワンダー" ).should be_close( 0.8561, 0.0001 )
110
- @jarow.getDistance( "マイケル・ジャクソン", "ジャイケル・マクソン" ).should be_close( 0.8000, 0.0001 )
111
- @jarow.getDistance( "まつもとゆきひろ", "まつもとひろゆき" ).should be_close( 0.9500, 0.0001 )
112
- @jarow.getDistance( "クライエント", "クライアント" ).should be_close( 0.9222, 0.0001 )
113
- @jarow.getDistance( "サーバー", "サーバ" ).should be_close( 0.9416, 0.0001 )
107
+ @jarow.getDistance( "スパゲティー", "スパゲッティー" ).should be_within(0.0001).of(0.9666)
108
+ @jarow.getDistance( "スパゲティー", "スパゲティ" ).should be_within(0.0001).of(0.9722)
109
+ @jarow.getDistance( "スティービー・ワンダー", "スピーディー・ワンダー" ).should be_within(0.0001).of(0.8561)
110
+ @jarow.getDistance( "マイケル・ジャクソン", "ジャイケル・マクソン" ).should be_within(0.0001).of(0.8000)
111
+ @jarow.getDistance( "まつもとゆきひろ", "まつもとひろゆき" ).should be_within(0.0001).of(0.9500)
112
+ @jarow.getDistance( "クライエント", "クライアント" ).should be_within(0.0001).of(0.9222)
113
+ @jarow.getDistance( "サーバー", "サーバ" ).should be_within(0.0001).of(0.9416)
114
114
  end
115
115
  end
116
116
 
117
117
 
118
118
  describe Amatch, "when use Amatch gem, results are" do
119
119
  it "should" do
120
- amatch_getDistance( "henka", "henkan" ).should be_close( 0.9666, 0.0001 ) ## amatch's result value is different from lucene version.
120
+ amatch_getDistance( "henka", "henkan" ).should be_within(0.0001).of(0.9666) ## amatch's result value is different from lucene version.
121
121
  amatch_getDistance( "al", "al" ).should == 1.0
122
- amatch_getDistance( "martha", "marhta" ).should be_close( 0.9611, 0.0001 )
123
- amatch_getDistance( "jones", "johnson" ).should be_close( 0.8323, 0.0001 )
124
- amatch_getDistance( "abcvwxyz", "cabvwxyz" ).should be_close( 0.9583, 0.0001 )
125
- amatch_getDistance( "dwayne", "duane" ).should be_close( 0.8400, 0.0001 )
126
- amatch_getDistance( "dixon", "dicksonx" ).should be_close( 0.8133, 0.0001 )
122
+ amatch_getDistance( "martha", "marhta" ).should be_within(0.0001).of(0.9611)
123
+ amatch_getDistance( "jones", "johnson" ).should be_within(0.0001).of(0.8323)
124
+ amatch_getDistance( "abcvwxyz", "cabvwxyz" ).should be_within(0.0001).of(0.9583)
125
+ amatch_getDistance( "dwayne", "duane" ).should be_within(0.0001).of(0.8400)
126
+ amatch_getDistance( "dixon", "dicksonx" ).should be_within(0.0001).of(0.8133)
127
127
  amatch_getDistance( "fvie", "ten" ).should == 0.0
128
128
  lambda {
129
129
  d1 = amatch_getDistance("zac ephron", "zac efron")
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fuzzy-string-match
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
4
+ hash: 57
5
+ prerelease:
5
6
  segments:
6
7
  - 0
7
8
  - 9
8
- - 0
9
- version: 0.9.0
9
+ - 1
10
+ version: 0.9.1
10
11
  platform: ruby
11
12
  authors:
12
13
  - Kiyoka Nishiyama
@@ -14,8 +15,7 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2010-10-13 00:00:00 +09:00
18
- default_executable:
18
+ date: 2011-07-30 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: rspec
@@ -25,6 +25,7 @@ dependencies:
25
25
  requirements:
26
26
  - - ">="
27
27
  - !ruby/object:Gem::Version
28
+ hash: 3
28
29
  segments:
29
30
  - 0
30
31
  version: "0"
@@ -38,6 +39,7 @@ dependencies:
38
39
  requirements:
39
40
  - - ">="
40
41
  - !ruby/object:Gem::Version
42
+ hash: 3
41
43
  segments:
42
44
  - 0
43
45
  version: "0"
@@ -51,6 +53,7 @@ dependencies:
51
53
  requirements:
52
54
  - - ">="
53
55
  - !ruby/object:Gem::Version
56
+ hash: 43
54
57
  segments:
55
58
  - 3
56
59
  - 8
@@ -73,13 +76,12 @@ files:
73
76
  - benchmark/vs_amatch.rb
74
77
  - lib/fuzzystringmatch.rb
75
78
  - test/fuzzystringmatch_spec.rb
76
- has_rdoc: true
77
79
  homepage: http://github.com/kiyoka/fuzzy-string-match
78
80
  licenses: []
79
81
 
80
82
  post_install_message:
81
- rdoc_options:
82
- - --charset=UTF-8
83
+ rdoc_options: []
84
+
83
85
  require_paths:
84
86
  - lib
85
87
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -87,6 +89,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
87
89
  requirements:
88
90
  - - ">="
89
91
  - !ruby/object:Gem::Version
92
+ hash: 49
90
93
  segments:
91
94
  - 1
92
95
  - 9
@@ -97,15 +100,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
97
100
  requirements:
98
101
  - - ">="
99
102
  - !ruby/object:Gem::Version
103
+ hash: 3
100
104
  segments:
101
105
  - 0
102
106
  version: "0"
103
107
  requirements: []
104
108
 
105
109
  rubyforge_project:
106
- rubygems_version: 1.3.7
110
+ rubygems_version: 1.7.2
107
111
  signing_key:
108
112
  specification_version: 3
109
113
  summary: fuzzy string matching library
110
- test_files:
111
- - test/fuzzystringmatch_spec.rb
114
+ test_files: []
115
+