string_score 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +1 -1
- data/README.mdown +6 -6
- data/lib/string_score.rb +7 -137
- data/lib/string_score/ext/string.rb +5 -0
- data/lib/string_score/scorer.rb +151 -0
- data/lib/string_score/version.rb +1 -1
- data/spec/string_score_spec.rb +49 -3
- metadata +4 -18
data/LICENSE
CHANGED
@@ -25,4 +25,4 @@ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
25
25
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
26
26
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
27
27
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
28
|
-
|
28
|
+
|
data/README.mdown
CHANGED
@@ -15,20 +15,20 @@ Tested under Ruby 1.9.2 and Ruby 1.8.7.
|
|
15
15
|
Usage
|
16
16
|
---
|
17
17
|
|
18
|
-
gem install string_score
|
19
|
-
|
18
|
+
$ gem install string_score
|
19
|
+
|
20
20
|
scorer = StringScore.new("string to test against")
|
21
21
|
scorer.score("string to test")
|
22
|
+
scorer.sort_by_score(['strings', 'to', 'sort'])
|
22
23
|
|
23
24
|
Or, include it into String for a convenience method:
|
24
25
|
|
25
|
-
|
26
|
-
String.send(:include, StringScore)
|
27
|
-
|
28
|
-
# and then
|
26
|
+
require 'string_score/ext/string'
|
29
27
|
|
30
28
|
"Hello World".score("hello")
|
31
29
|
"Hello World".score("Whirl", 0.5) # or for fuzzy
|
30
|
+
|
31
|
+
"Hello World".sort_by_score(["xyz", "hello"]) #=> ["hello", "xyx"]
|
32
32
|
|
33
33
|
Copyright
|
34
34
|
---
|
data/lib/string_score.rb
CHANGED
@@ -3,15 +3,12 @@ module StringScore
|
|
3
3
|
class InternalError < RuntimeError; end
|
4
4
|
class ArgumentError < RuntimeError; end
|
5
5
|
|
6
|
-
NON_STRING_MSG = "Supply a string or an object with can be coerced to a string."
|
7
|
-
NON_STRING_ERROR_MESSAGE = /undefined method `to_s'/
|
8
|
-
|
9
6
|
def score(target_string, fuzziness = 0.0)
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
7
|
+
StringScore.new(self, fuzziness).score(target_string)
|
8
|
+
end
|
9
|
+
|
10
|
+
def sort_by_score(target, fuzziness = 0.0, &block)
|
11
|
+
StringScore.new(self, fuzziness).sort_by_score(target)
|
15
12
|
end
|
16
13
|
|
17
14
|
# proxy to Scorer to simplify calling API
|
@@ -19,133 +16,6 @@ module StringScore
|
|
19
16
|
StringScore::Scorer.new(base_string.to_s, fuzziness)
|
20
17
|
end
|
21
18
|
|
22
|
-
class Scorer
|
23
|
-
|
24
|
-
attr_accessor :base_string, :default_fuziness
|
25
|
-
|
26
|
-
def initialize(string, fuzziness=0.0)
|
27
|
-
with_error_handling do
|
28
|
-
@default_fuziness = fuzziness
|
29
|
-
@base_string = string.to_s
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def score(target_string, fuzziness=nil)
|
34
|
-
with_error_handling do
|
35
|
-
return 1 if (base_string == target_string.to_s)
|
36
|
-
partial_score(target_string, (fuzziness || default_fuziness))
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def partial_score(target, fuzziness)
|
41
|
-
|
42
|
-
string = base_string.dup # clean copy for chomping
|
43
|
-
over_all_index = 0
|
44
|
-
|
45
|
-
cumulative_score = 0.0
|
46
|
-
|
47
|
-
target_length = target.length.to_f
|
48
|
-
string_length = string.length.to_f
|
49
|
-
|
50
|
-
start_of_string_bonus = false
|
51
|
-
|
52
|
-
abbreviation_score = 0
|
53
|
-
fuzzies = 1.0
|
54
|
-
final_score = 0.0
|
55
|
-
|
56
|
-
target_index = 0
|
57
|
-
|
58
|
-
target.each_char do |c|
|
59
|
-
over_all_index = (base_string.length - string.length)
|
60
|
-
|
61
|
-
character_score = 0.0
|
62
|
-
|
63
|
-
index_c_lowercase = string.index(c.downcase)
|
64
|
-
index_c_uppercase = string.index(c.upcase)
|
65
|
-
|
66
|
-
current_index = [index_c_lowercase, index_c_uppercase].compact.min
|
67
|
-
over_all_index += current_index.to_i
|
68
|
-
|
69
|
-
if ! current_index
|
70
|
-
|
71
|
-
if fuzziness > 0.0
|
72
|
-
fuzzies += (1 - fuzziness)
|
73
|
-
target_index += 1
|
74
|
-
next
|
75
|
-
else
|
76
|
-
return 0 # abort on any mismatch
|
77
|
-
end
|
78
|
-
|
79
|
-
end
|
80
|
-
|
81
|
-
if string.slice(current_index, 1) == c
|
82
|
-
character_score = 0.2 # exact case match
|
83
|
-
else
|
84
|
-
character_score = 0.1 # character but not case match
|
85
|
-
end
|
86
|
-
|
87
|
-
# consecutive bonus
|
88
|
-
if current_index == 0
|
89
|
-
character_score += 0.6
|
90
|
-
if target_index == 0
|
91
|
-
start_of_string_bonus = true
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
# acronym bonus
|
96
|
-
previous_index = over_all_index - 1
|
97
|
-
if base_string.slice(previous_index, 1) == ' '
|
98
|
-
character_score += 0.8
|
99
|
-
end
|
100
|
-
|
101
|
-
cumulative_score += character_score
|
102
|
-
target_index += 1
|
103
|
-
string = string[(current_index + 1), (string.length - 1)]
|
104
|
-
end
|
105
|
-
|
106
|
-
matched_score = cumulative_score.to_f / string_length.to_f
|
107
|
-
|
108
|
-
with_long_string_bonus = (((matched_score * (target_length.to_f / string_length.to_f)) + matched_score) / 2)
|
109
|
-
|
110
|
-
final_score = with_long_string_bonus / fuzzies
|
111
|
-
|
112
|
-
if start_of_string_bonus
|
113
|
-
if final_score + 0.15 < 1.0
|
114
|
-
final_score += 0.15
|
115
|
-
elsif final_score + 0.15 >= 1.0
|
116
|
-
final_score = 1.0
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
if final_score > 1.0 || final_score < 0.0
|
121
|
-
raise StringScore::InternalError, "Out of range score: '#{final_score}'"
|
122
|
-
end
|
123
|
-
|
124
|
-
final_score
|
125
|
-
end
|
126
|
-
|
127
|
-
def with_error_handling
|
128
|
-
|
129
|
-
yield
|
130
|
-
|
131
|
-
rescue StringScore::InternalError, StringScore::ArgumentError
|
132
|
-
|
133
|
-
raise # allow nesting of #with_error_handling
|
134
|
-
|
135
|
-
rescue NoMethodError => e
|
136
|
-
|
137
|
-
if e.message =~ NON_STRING_MSG
|
138
|
-
raise StringScore::ArgumentError, NON_STRING_MSG
|
139
|
-
else
|
140
|
-
raise StringScore::InternalError, "#{e.class}: #{e.message}"
|
141
|
-
end
|
142
|
-
|
143
|
-
rescue => e
|
144
|
-
|
145
|
-
raise StringScore::InternalError, "#{e.class}: #{e.message}"
|
146
|
-
|
147
|
-
end
|
148
|
-
|
149
|
-
end
|
150
|
-
|
151
19
|
end
|
20
|
+
|
21
|
+
require 'string_score/scorer'
|
@@ -0,0 +1,151 @@
|
|
1
|
+
class StringScore::Scorer
|
2
|
+
|
3
|
+
attr_accessor :base_string, :default_fuziness
|
4
|
+
|
5
|
+
NON_STRING_MSG = "Supply a string or an object with can be coerced to a string."
|
6
|
+
NON_STRING_ERROR_MESSAGE = /undefined method `to_s'/
|
7
|
+
|
8
|
+
def initialize(string, fuzziness=0.0)
|
9
|
+
with_error_handling do
|
10
|
+
@default_fuziness = fuzziness
|
11
|
+
@base_string = string.to_s
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def score(target, fuzziness=nil, &block)
|
16
|
+
if target.kind_of?(String)
|
17
|
+
with_error_handling do
|
18
|
+
return 1 if (base_string == target.to_s)
|
19
|
+
partial_score(target, (fuzziness || default_fuziness))
|
20
|
+
end
|
21
|
+
else
|
22
|
+
raise StringScore::ArgumentError, "Must supply a string."
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def sort_by_score(target_iterator, fuzziness=nil, &block)
|
27
|
+
if target_iterator.respond_to?(:each)
|
28
|
+
scored = target_iterator.map do |target|
|
29
|
+
to_match = if block_given?
|
30
|
+
block.call(target)
|
31
|
+
else
|
32
|
+
target
|
33
|
+
end
|
34
|
+
[score(to_match, fuzziness), target]
|
35
|
+
end
|
36
|
+
scored.sort_by{ |t| t[0] }.reverse.map{ |t| t[1] }
|
37
|
+
else
|
38
|
+
raise StringScore::ArgumentError, "Must supply an iterable object with strings."
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def partial_score(target, fuzziness)
|
43
|
+
|
44
|
+
string = base_string.dup # clean copy for chomping
|
45
|
+
over_all_index = 0
|
46
|
+
|
47
|
+
cumulative_score = 0.0
|
48
|
+
|
49
|
+
target_length = target.length.to_f
|
50
|
+
string_length = string.length.to_f
|
51
|
+
|
52
|
+
start_of_string_bonus = false
|
53
|
+
|
54
|
+
abbreviation_score = 0
|
55
|
+
fuzzies = 1.0
|
56
|
+
final_score = 0.0
|
57
|
+
|
58
|
+
target_index = 0
|
59
|
+
|
60
|
+
target.each_char do |c|
|
61
|
+
over_all_index = (base_string.length - string.length)
|
62
|
+
|
63
|
+
character_score = 0.0
|
64
|
+
|
65
|
+
index_c_lowercase = string.index(c.downcase)
|
66
|
+
index_c_uppercase = string.index(c.upcase)
|
67
|
+
|
68
|
+
current_index = [index_c_lowercase, index_c_uppercase].compact.min
|
69
|
+
over_all_index += current_index.to_i
|
70
|
+
|
71
|
+
if ! current_index
|
72
|
+
|
73
|
+
if fuzziness > 0.0
|
74
|
+
fuzzies += (1 - fuzziness)
|
75
|
+
target_index += 1
|
76
|
+
next
|
77
|
+
else
|
78
|
+
return 0 # abort on any mismatch
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
if string.slice(current_index, 1) == c
|
84
|
+
character_score = 0.2 # exact case match
|
85
|
+
else
|
86
|
+
character_score = 0.1 # character but not case match
|
87
|
+
end
|
88
|
+
|
89
|
+
# consecutive bonus
|
90
|
+
if current_index == 0
|
91
|
+
character_score += 0.6
|
92
|
+
if target_index == 0
|
93
|
+
start_of_string_bonus = true
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# acronym bonus
|
98
|
+
previous_index = over_all_index - 1
|
99
|
+
if base_string.slice(previous_index, 1) == ' '
|
100
|
+
character_score += 0.8
|
101
|
+
end
|
102
|
+
|
103
|
+
cumulative_score += character_score
|
104
|
+
target_index += 1
|
105
|
+
string = string[(current_index + 1), (string.length - 1)]
|
106
|
+
end
|
107
|
+
|
108
|
+
matched_score = cumulative_score.to_f / string_length.to_f
|
109
|
+
|
110
|
+
with_long_string_bonus = (((matched_score * (target_length.to_f / string_length.to_f)) + matched_score) / 2)
|
111
|
+
|
112
|
+
final_score = with_long_string_bonus / fuzzies
|
113
|
+
|
114
|
+
if start_of_string_bonus
|
115
|
+
if final_score + 0.15 < 1.0
|
116
|
+
final_score += 0.15
|
117
|
+
elsif final_score + 0.15 >= 1.0
|
118
|
+
final_score = 1.0
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
if final_score > 1.0 || final_score < 0.0
|
123
|
+
raise StringScore::InternalError, "Out of range score: '#{final_score}'"
|
124
|
+
end
|
125
|
+
|
126
|
+
final_score
|
127
|
+
end
|
128
|
+
|
129
|
+
def with_error_handling
|
130
|
+
|
131
|
+
yield
|
132
|
+
|
133
|
+
rescue StringScore::InternalError, StringScore::ArgumentError
|
134
|
+
|
135
|
+
raise # allow nesting of #with_error_handling
|
136
|
+
|
137
|
+
rescue NoMethodError => e
|
138
|
+
|
139
|
+
if e.message =~ NON_STRING_ERROR_MESSAGE
|
140
|
+
raise StringScore::ArgumentError, NON_STRING_MSG
|
141
|
+
else
|
142
|
+
raise StringScore::InternalError, "#{e.class}: #{e.message}"
|
143
|
+
end
|
144
|
+
|
145
|
+
rescue => e
|
146
|
+
|
147
|
+
raise StringScore::InternalError, "#{e.class}: #{e.message}"
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
data/lib/string_score/version.rb
CHANGED
data/spec/string_score_spec.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'string_score'
|
2
|
-
|
3
|
-
String.send(:include, StringScore)
|
2
|
+
require 'string_score/ext/string'
|
4
3
|
|
5
4
|
RSpec::Matchers.define :be_greater_than do |expected|
|
6
5
|
match do |actual|
|
@@ -18,7 +17,7 @@ describe StringScore do
|
|
18
17
|
|
19
18
|
subject { StringScore.new('Hello World') }
|
20
19
|
|
21
|
-
it "provides a method directly on a string instance" do
|
20
|
+
it "provides a method directly on a string instance for scoring" do
|
22
21
|
"foobar".score('foo').should == StringScore.new("foobar").score('foo')
|
23
22
|
end
|
24
23
|
|
@@ -110,4 +109,51 @@ describe StringScore do
|
|
110
109
|
subject.score('Hz', 0.9).should be_greater_than(subject.score('Hz', 0.5))
|
111
110
|
end
|
112
111
|
|
112
|
+
it "accepts an array of values to score" do
|
113
|
+
subject.sort_by_score(["Hello"]).should be_kind_of(Array)
|
114
|
+
end
|
115
|
+
|
116
|
+
it "provides a method directly on a string instance for scoring" do
|
117
|
+
target_list = ['foo', 'foobar', 'xyz']
|
118
|
+
"foobar".sort_by_score(target_list).should == StringScore.new("foobar").sort_by_score(target_list)
|
119
|
+
end
|
120
|
+
|
121
|
+
it "sorts a passed array by score, highest to lowest" do
|
122
|
+
subject.sort_by_score(["xyz", "Hello", "hello"]).should == ["Hello", "hello", "xyz"]
|
123
|
+
end
|
124
|
+
|
125
|
+
it "sorts equal matches alphabetically" do
|
126
|
+
subject.score("ell").should == subject.score("llo")
|
127
|
+
subject.sort_by_score(["llo", "ell"]).should == ["ell", "llo"]
|
128
|
+
end
|
129
|
+
|
130
|
+
it "passes fuzziness checks through with arrays" do
|
131
|
+
subject.sort_by_score(["hey", "abc"]).should == ['abc', 'hey']
|
132
|
+
subject.sort_by_score(["hey", "abc"], 0.5).should == ['hey', 'abc']
|
133
|
+
end
|
134
|
+
|
135
|
+
it "accepts a block to provide a scoring target for non-string lists" do
|
136
|
+
nested_list = [
|
137
|
+
[nil, "there"],
|
138
|
+
[nil, "hello"],
|
139
|
+
[nil, "World"]
|
140
|
+
]
|
141
|
+
|
142
|
+
expected_sort = [
|
143
|
+
[nil, "hello"],
|
144
|
+
[nil, "World"],
|
145
|
+
[nil, "there"]
|
146
|
+
]
|
147
|
+
|
148
|
+
subject.sort_by_score(nested_list){|t| t[1] }.should == expected_sort
|
149
|
+
end
|
150
|
+
|
151
|
+
it "raises an argument error if passed an invalid score target" do
|
152
|
+
expect { subject.score(nil) }.to raise_error(StringScore::ArgumentError)
|
153
|
+
end
|
154
|
+
|
155
|
+
it "raises an argument error if passed an invalid sort target" do
|
156
|
+
expect { subject.sort_by_score(nil) }.to raise_error(StringScore::ArgumentError)
|
157
|
+
end
|
158
|
+
|
113
159
|
end
|
metadata
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_score
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 23
|
5
4
|
prerelease:
|
6
|
-
|
7
|
-
- 1
|
8
|
-
- 0
|
9
|
-
- 0
|
10
|
-
version: 1.0.0
|
5
|
+
version: 1.1.0
|
11
6
|
platform: ruby
|
12
7
|
authors:
|
13
8
|
- Jim Lindley
|
@@ -15,7 +10,7 @@ autorequire:
|
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
12
|
|
18
|
-
date: 2011-03-
|
13
|
+
date: 2011-03-16 00:00:00 -07:00
|
19
14
|
default_executable:
|
20
15
|
dependencies:
|
21
16
|
- !ruby/object:Gem::Dependency
|
@@ -26,11 +21,6 @@ dependencies:
|
|
26
21
|
requirements:
|
27
22
|
- - ~>
|
28
23
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 27
|
30
|
-
segments:
|
31
|
-
- 2
|
32
|
-
- 5
|
33
|
-
- 0
|
34
24
|
version: 2.5.0
|
35
25
|
type: :development
|
36
26
|
version_requirements: *id001
|
@@ -50,6 +40,8 @@ files:
|
|
50
40
|
- README.mdown
|
51
41
|
- Rakefile
|
52
42
|
- lib/string_score.rb
|
43
|
+
- lib/string_score/ext/string.rb
|
44
|
+
- lib/string_score/scorer.rb
|
53
45
|
- lib/string_score/version.rb
|
54
46
|
- spec/string_score_spec.rb
|
55
47
|
- string_score.gemspec
|
@@ -67,18 +59,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
67
59
|
requirements:
|
68
60
|
- - ">="
|
69
61
|
- !ruby/object:Gem::Version
|
70
|
-
hash: 3
|
71
|
-
segments:
|
72
|
-
- 0
|
73
62
|
version: "0"
|
74
63
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
64
|
none: false
|
76
65
|
requirements:
|
77
66
|
- - ">="
|
78
67
|
- !ruby/object:Gem::Version
|
79
|
-
hash: 3
|
80
|
-
segments:
|
81
|
-
- 0
|
82
68
|
version: "0"
|
83
69
|
requirements: []
|
84
70
|
|