string_score 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +1 -1
- data/README.mdown +6 -6
- data/lib/string_score.rb +7 -137
- data/lib/string_score/ext/string.rb +5 -0
- data/lib/string_score/scorer.rb +151 -0
- data/lib/string_score/version.rb +1 -1
- data/spec/string_score_spec.rb +49 -3
- metadata +4 -18
data/LICENSE
CHANGED
@@ -25,4 +25,4 @@ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
25
25
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
26
26
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
27
27
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
28
|
-
|
28
|
+
|
data/README.mdown
CHANGED
@@ -15,20 +15,20 @@ Tested under Ruby 1.9.2 and Ruby 1.8.7.
|
|
15
15
|
Usage
|
16
16
|
---
|
17
17
|
|
18
|
-
gem install string_score
|
19
|
-
|
18
|
+
$ gem install string_score
|
19
|
+
|
20
20
|
scorer = StringScore.new("string to test against")
|
21
21
|
scorer.score("string to test")
|
22
|
+
scorer.sort_by_score(['strings', 'to', 'sort'])
|
22
23
|
|
23
24
|
Or, include it into String for a convenience method:
|
24
25
|
|
25
|
-
|
26
|
-
String.send(:include, StringScore)
|
27
|
-
|
28
|
-
# and then
|
26
|
+
require 'string_score/ext/string'
|
29
27
|
|
30
28
|
"Hello World".score("hello")
|
31
29
|
"Hello World".score("Whirl", 0.5) # or for fuzzy
|
30
|
+
|
31
|
+
"Hello World".sort_by_score(["xyz", "hello"]) #=> ["hello", "xyx"]
|
32
32
|
|
33
33
|
Copyright
|
34
34
|
---
|
data/lib/string_score.rb
CHANGED
@@ -3,15 +3,12 @@ module StringScore
|
|
3
3
|
class InternalError < RuntimeError; end
|
4
4
|
class ArgumentError < RuntimeError; end
|
5
5
|
|
6
|
-
NON_STRING_MSG = "Supply a string or an object with can be coerced to a string."
|
7
|
-
NON_STRING_ERROR_MESSAGE = /undefined method `to_s'/
|
8
|
-
|
9
6
|
def score(target_string, fuzziness = 0.0)
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
7
|
+
StringScore.new(self, fuzziness).score(target_string)
|
8
|
+
end
|
9
|
+
|
10
|
+
def sort_by_score(target, fuzziness = 0.0, &block)
|
11
|
+
StringScore.new(self, fuzziness).sort_by_score(target)
|
15
12
|
end
|
16
13
|
|
17
14
|
# proxy to Scorer to simplify calling API
|
@@ -19,133 +16,6 @@ module StringScore
|
|
19
16
|
StringScore::Scorer.new(base_string.to_s, fuzziness)
|
20
17
|
end
|
21
18
|
|
22
|
-
class Scorer
|
23
|
-
|
24
|
-
attr_accessor :base_string, :default_fuziness
|
25
|
-
|
26
|
-
def initialize(string, fuzziness=0.0)
|
27
|
-
with_error_handling do
|
28
|
-
@default_fuziness = fuzziness
|
29
|
-
@base_string = string.to_s
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def score(target_string, fuzziness=nil)
|
34
|
-
with_error_handling do
|
35
|
-
return 1 if (base_string == target_string.to_s)
|
36
|
-
partial_score(target_string, (fuzziness || default_fuziness))
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def partial_score(target, fuzziness)
|
41
|
-
|
42
|
-
string = base_string.dup # clean copy for chomping
|
43
|
-
over_all_index = 0
|
44
|
-
|
45
|
-
cumulative_score = 0.0
|
46
|
-
|
47
|
-
target_length = target.length.to_f
|
48
|
-
string_length = string.length.to_f
|
49
|
-
|
50
|
-
start_of_string_bonus = false
|
51
|
-
|
52
|
-
abbreviation_score = 0
|
53
|
-
fuzzies = 1.0
|
54
|
-
final_score = 0.0
|
55
|
-
|
56
|
-
target_index = 0
|
57
|
-
|
58
|
-
target.each_char do |c|
|
59
|
-
over_all_index = (base_string.length - string.length)
|
60
|
-
|
61
|
-
character_score = 0.0
|
62
|
-
|
63
|
-
index_c_lowercase = string.index(c.downcase)
|
64
|
-
index_c_uppercase = string.index(c.upcase)
|
65
|
-
|
66
|
-
current_index = [index_c_lowercase, index_c_uppercase].compact.min
|
67
|
-
over_all_index += current_index.to_i
|
68
|
-
|
69
|
-
if ! current_index
|
70
|
-
|
71
|
-
if fuzziness > 0.0
|
72
|
-
fuzzies += (1 - fuzziness)
|
73
|
-
target_index += 1
|
74
|
-
next
|
75
|
-
else
|
76
|
-
return 0 # abort on any mismatch
|
77
|
-
end
|
78
|
-
|
79
|
-
end
|
80
|
-
|
81
|
-
if string.slice(current_index, 1) == c
|
82
|
-
character_score = 0.2 # exact case match
|
83
|
-
else
|
84
|
-
character_score = 0.1 # character but not case match
|
85
|
-
end
|
86
|
-
|
87
|
-
# consecutive bonus
|
88
|
-
if current_index == 0
|
89
|
-
character_score += 0.6
|
90
|
-
if target_index == 0
|
91
|
-
start_of_string_bonus = true
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
# acronym bonus
|
96
|
-
previous_index = over_all_index - 1
|
97
|
-
if base_string.slice(previous_index, 1) == ' '
|
98
|
-
character_score += 0.8
|
99
|
-
end
|
100
|
-
|
101
|
-
cumulative_score += character_score
|
102
|
-
target_index += 1
|
103
|
-
string = string[(current_index + 1), (string.length - 1)]
|
104
|
-
end
|
105
|
-
|
106
|
-
matched_score = cumulative_score.to_f / string_length.to_f
|
107
|
-
|
108
|
-
with_long_string_bonus = (((matched_score * (target_length.to_f / string_length.to_f)) + matched_score) / 2)
|
109
|
-
|
110
|
-
final_score = with_long_string_bonus / fuzzies
|
111
|
-
|
112
|
-
if start_of_string_bonus
|
113
|
-
if final_score + 0.15 < 1.0
|
114
|
-
final_score += 0.15
|
115
|
-
elsif final_score + 0.15 >= 1.0
|
116
|
-
final_score = 1.0
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
if final_score > 1.0 || final_score < 0.0
|
121
|
-
raise StringScore::InternalError, "Out of range score: '#{final_score}'"
|
122
|
-
end
|
123
|
-
|
124
|
-
final_score
|
125
|
-
end
|
126
|
-
|
127
|
-
def with_error_handling
|
128
|
-
|
129
|
-
yield
|
130
|
-
|
131
|
-
rescue StringScore::InternalError, StringScore::ArgumentError
|
132
|
-
|
133
|
-
raise # allow nesting of #with_error_handling
|
134
|
-
|
135
|
-
rescue NoMethodError => e
|
136
|
-
|
137
|
-
if e.message =~ NON_STRING_MSG
|
138
|
-
raise StringScore::ArgumentError, NON_STRING_MSG
|
139
|
-
else
|
140
|
-
raise StringScore::InternalError, "#{e.class}: #{e.message}"
|
141
|
-
end
|
142
|
-
|
143
|
-
rescue => e
|
144
|
-
|
145
|
-
raise StringScore::InternalError, "#{e.class}: #{e.message}"
|
146
|
-
|
147
|
-
end
|
148
|
-
|
149
|
-
end
|
150
|
-
|
151
19
|
end
|
20
|
+
|
21
|
+
require 'string_score/scorer'
|
@@ -0,0 +1,151 @@
|
|
1
|
+
class StringScore::Scorer
|
2
|
+
|
3
|
+
attr_accessor :base_string, :default_fuziness
|
4
|
+
|
5
|
+
NON_STRING_MSG = "Supply a string or an object with can be coerced to a string."
|
6
|
+
NON_STRING_ERROR_MESSAGE = /undefined method `to_s'/
|
7
|
+
|
8
|
+
def initialize(string, fuzziness=0.0)
|
9
|
+
with_error_handling do
|
10
|
+
@default_fuziness = fuzziness
|
11
|
+
@base_string = string.to_s
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def score(target, fuzziness=nil, &block)
|
16
|
+
if target.kind_of?(String)
|
17
|
+
with_error_handling do
|
18
|
+
return 1 if (base_string == target.to_s)
|
19
|
+
partial_score(target, (fuzziness || default_fuziness))
|
20
|
+
end
|
21
|
+
else
|
22
|
+
raise StringScore::ArgumentError, "Must supply a string."
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def sort_by_score(target_iterator, fuzziness=nil, &block)
|
27
|
+
if target_iterator.respond_to?(:each)
|
28
|
+
scored = target_iterator.map do |target|
|
29
|
+
to_match = if block_given?
|
30
|
+
block.call(target)
|
31
|
+
else
|
32
|
+
target
|
33
|
+
end
|
34
|
+
[score(to_match, fuzziness), target]
|
35
|
+
end
|
36
|
+
scored.sort_by{ |t| t[0] }.reverse.map{ |t| t[1] }
|
37
|
+
else
|
38
|
+
raise StringScore::ArgumentError, "Must supply an iterable object with strings."
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def partial_score(target, fuzziness)
|
43
|
+
|
44
|
+
string = base_string.dup # clean copy for chomping
|
45
|
+
over_all_index = 0
|
46
|
+
|
47
|
+
cumulative_score = 0.0
|
48
|
+
|
49
|
+
target_length = target.length.to_f
|
50
|
+
string_length = string.length.to_f
|
51
|
+
|
52
|
+
start_of_string_bonus = false
|
53
|
+
|
54
|
+
abbreviation_score = 0
|
55
|
+
fuzzies = 1.0
|
56
|
+
final_score = 0.0
|
57
|
+
|
58
|
+
target_index = 0
|
59
|
+
|
60
|
+
target.each_char do |c|
|
61
|
+
over_all_index = (base_string.length - string.length)
|
62
|
+
|
63
|
+
character_score = 0.0
|
64
|
+
|
65
|
+
index_c_lowercase = string.index(c.downcase)
|
66
|
+
index_c_uppercase = string.index(c.upcase)
|
67
|
+
|
68
|
+
current_index = [index_c_lowercase, index_c_uppercase].compact.min
|
69
|
+
over_all_index += current_index.to_i
|
70
|
+
|
71
|
+
if ! current_index
|
72
|
+
|
73
|
+
if fuzziness > 0.0
|
74
|
+
fuzzies += (1 - fuzziness)
|
75
|
+
target_index += 1
|
76
|
+
next
|
77
|
+
else
|
78
|
+
return 0 # abort on any mismatch
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
if string.slice(current_index, 1) == c
|
84
|
+
character_score = 0.2 # exact case match
|
85
|
+
else
|
86
|
+
character_score = 0.1 # character but not case match
|
87
|
+
end
|
88
|
+
|
89
|
+
# consecutive bonus
|
90
|
+
if current_index == 0
|
91
|
+
character_score += 0.6
|
92
|
+
if target_index == 0
|
93
|
+
start_of_string_bonus = true
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# acronym bonus
|
98
|
+
previous_index = over_all_index - 1
|
99
|
+
if base_string.slice(previous_index, 1) == ' '
|
100
|
+
character_score += 0.8
|
101
|
+
end
|
102
|
+
|
103
|
+
cumulative_score += character_score
|
104
|
+
target_index += 1
|
105
|
+
string = string[(current_index + 1), (string.length - 1)]
|
106
|
+
end
|
107
|
+
|
108
|
+
matched_score = cumulative_score.to_f / string_length.to_f
|
109
|
+
|
110
|
+
with_long_string_bonus = (((matched_score * (target_length.to_f / string_length.to_f)) + matched_score) / 2)
|
111
|
+
|
112
|
+
final_score = with_long_string_bonus / fuzzies
|
113
|
+
|
114
|
+
if start_of_string_bonus
|
115
|
+
if final_score + 0.15 < 1.0
|
116
|
+
final_score += 0.15
|
117
|
+
elsif final_score + 0.15 >= 1.0
|
118
|
+
final_score = 1.0
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
if final_score > 1.0 || final_score < 0.0
|
123
|
+
raise StringScore::InternalError, "Out of range score: '#{final_score}'"
|
124
|
+
end
|
125
|
+
|
126
|
+
final_score
|
127
|
+
end
|
128
|
+
|
129
|
+
def with_error_handling
|
130
|
+
|
131
|
+
yield
|
132
|
+
|
133
|
+
rescue StringScore::InternalError, StringScore::ArgumentError
|
134
|
+
|
135
|
+
raise # allow nesting of #with_error_handling
|
136
|
+
|
137
|
+
rescue NoMethodError => e
|
138
|
+
|
139
|
+
if e.message =~ NON_STRING_ERROR_MESSAGE
|
140
|
+
raise StringScore::ArgumentError, NON_STRING_MSG
|
141
|
+
else
|
142
|
+
raise StringScore::InternalError, "#{e.class}: #{e.message}"
|
143
|
+
end
|
144
|
+
|
145
|
+
rescue => e
|
146
|
+
|
147
|
+
raise StringScore::InternalError, "#{e.class}: #{e.message}"
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
data/lib/string_score/version.rb
CHANGED
data/spec/string_score_spec.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'string_score'
|
2
|
-
|
3
|
-
String.send(:include, StringScore)
|
2
|
+
require 'string_score/ext/string'
|
4
3
|
|
5
4
|
RSpec::Matchers.define :be_greater_than do |expected|
|
6
5
|
match do |actual|
|
@@ -18,7 +17,7 @@ describe StringScore do
|
|
18
17
|
|
19
18
|
subject { StringScore.new('Hello World') }
|
20
19
|
|
21
|
-
it "provides a method directly on a string instance" do
|
20
|
+
it "provides a method directly on a string instance for scoring" do
|
22
21
|
"foobar".score('foo').should == StringScore.new("foobar").score('foo')
|
23
22
|
end
|
24
23
|
|
@@ -110,4 +109,51 @@ describe StringScore do
|
|
110
109
|
subject.score('Hz', 0.9).should be_greater_than(subject.score('Hz', 0.5))
|
111
110
|
end
|
112
111
|
|
112
|
+
it "accepts an array of values to score" do
|
113
|
+
subject.sort_by_score(["Hello"]).should be_kind_of(Array)
|
114
|
+
end
|
115
|
+
|
116
|
+
it "provides a method directly on a string instance for scoring" do
|
117
|
+
target_list = ['foo', 'foobar', 'xyz']
|
118
|
+
"foobar".sort_by_score(target_list).should == StringScore.new("foobar").sort_by_score(target_list)
|
119
|
+
end
|
120
|
+
|
121
|
+
it "sorts a passed array by score, highest to lowest" do
|
122
|
+
subject.sort_by_score(["xyz", "Hello", "hello"]).should == ["Hello", "hello", "xyz"]
|
123
|
+
end
|
124
|
+
|
125
|
+
it "sorts equal matches alphabetically" do
|
126
|
+
subject.score("ell").should == subject.score("llo")
|
127
|
+
subject.sort_by_score(["llo", "ell"]).should == ["ell", "llo"]
|
128
|
+
end
|
129
|
+
|
130
|
+
it "passes fuzziness checks through with arrays" do
|
131
|
+
subject.sort_by_score(["hey", "abc"]).should == ['abc', 'hey']
|
132
|
+
subject.sort_by_score(["hey", "abc"], 0.5).should == ['hey', 'abc']
|
133
|
+
end
|
134
|
+
|
135
|
+
it "accepts a block to provide a scoring target for non-string lists" do
|
136
|
+
nested_list = [
|
137
|
+
[nil, "there"],
|
138
|
+
[nil, "hello"],
|
139
|
+
[nil, "World"]
|
140
|
+
]
|
141
|
+
|
142
|
+
expected_sort = [
|
143
|
+
[nil, "hello"],
|
144
|
+
[nil, "World"],
|
145
|
+
[nil, "there"]
|
146
|
+
]
|
147
|
+
|
148
|
+
subject.sort_by_score(nested_list){|t| t[1] }.should == expected_sort
|
149
|
+
end
|
150
|
+
|
151
|
+
it "raises an argument error if passed an invalid score target" do
|
152
|
+
expect { subject.score(nil) }.to raise_error(StringScore::ArgumentError)
|
153
|
+
end
|
154
|
+
|
155
|
+
it "raises an argument error if passed an invalid sort target" do
|
156
|
+
expect { subject.sort_by_score(nil) }.to raise_error(StringScore::ArgumentError)
|
157
|
+
end
|
158
|
+
|
113
159
|
end
|
metadata
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_score
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 23
|
5
4
|
prerelease:
|
6
|
-
|
7
|
-
- 1
|
8
|
-
- 0
|
9
|
-
- 0
|
10
|
-
version: 1.0.0
|
5
|
+
version: 1.1.0
|
11
6
|
platform: ruby
|
12
7
|
authors:
|
13
8
|
- Jim Lindley
|
@@ -15,7 +10,7 @@ autorequire:
|
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
12
|
|
18
|
-
date: 2011-03-
|
13
|
+
date: 2011-03-16 00:00:00 -07:00
|
19
14
|
default_executable:
|
20
15
|
dependencies:
|
21
16
|
- !ruby/object:Gem::Dependency
|
@@ -26,11 +21,6 @@ dependencies:
|
|
26
21
|
requirements:
|
27
22
|
- - ~>
|
28
23
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 27
|
30
|
-
segments:
|
31
|
-
- 2
|
32
|
-
- 5
|
33
|
-
- 0
|
34
24
|
version: 2.5.0
|
35
25
|
type: :development
|
36
26
|
version_requirements: *id001
|
@@ -50,6 +40,8 @@ files:
|
|
50
40
|
- README.mdown
|
51
41
|
- Rakefile
|
52
42
|
- lib/string_score.rb
|
43
|
+
- lib/string_score/ext/string.rb
|
44
|
+
- lib/string_score/scorer.rb
|
53
45
|
- lib/string_score/version.rb
|
54
46
|
- spec/string_score_spec.rb
|
55
47
|
- string_score.gemspec
|
@@ -67,18 +59,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
67
59
|
requirements:
|
68
60
|
- - ">="
|
69
61
|
- !ruby/object:Gem::Version
|
70
|
-
hash: 3
|
71
|
-
segments:
|
72
|
-
- 0
|
73
62
|
version: "0"
|
74
63
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
64
|
none: false
|
76
65
|
requirements:
|
77
66
|
- - ">="
|
78
67
|
- !ruby/object:Gem::Version
|
79
|
-
hash: 3
|
80
|
-
segments:
|
81
|
-
- 0
|
82
68
|
version: "0"
|
83
69
|
requirements: []
|
84
70
|
|