string_score 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in string_score.gemspec
4
+ gemspec
data/README.mdown ADDED
@@ -0,0 +1,5 @@
1
+ String Score (Ruby)
2
+ ===
3
+
4
+ Port of https://github.com/joshaven/string_score from js to ruby.
5
+
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ task :default => :spec
7
+
@@ -0,0 +1,3 @@
1
+ module StringScore
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,150 @@
1
+ module StringScore
2
+
3
+ class InternalError < RuntimeError; end
4
+ class ArgumentError < RuntimeError; end
5
+
6
+ NON_STRING_MSG = "Supply a string or an object with can be coerced to a string."
7
+ NON_STRING_ERROR_MESSAGE = /undefined method `to_s'/
8
+
9
+ def score(target_string, fuzziness = 0.0)
10
+ if @string_scorer && @string_scorer.base_string != self
11
+ @string_scorer = nil
12
+ end
13
+ @string_scorer ||= StringScore.new(self, fuzziness)
14
+ @string_scorer.score(target_string)
15
+ end
16
+
17
+ # proxy to Scorer to simplify calling API
18
+ def self.new(base_string, fuzziness=0.0)
19
+ StringScore::Scorer.new(base_string.to_s, fuzziness)
20
+ end
21
+
22
+ class Scorer
23
+
24
+ attr_accessor :base_string, :default_fuziness
25
+
26
+ def initialize(string, fuzziness=0.0)
27
+ with_error_handling do
28
+ @default_fuziness = fuzziness
29
+ @base_string = string.to_s
30
+ end
31
+ end
32
+
33
+ def score(target_string, fuzziness=nil)
34
+ with_error_handling do
35
+ return 1 if (base_string == target_string.to_s)
36
+ partial_score(target_string, (fuzziness || default_fuziness))
37
+ end
38
+ end
39
+
40
+ def partial_score(target, fuzziness)
41
+
42
+ string = base_string.dup # clean copy for chomping
43
+ over_all_index = 0
44
+
45
+ cumulative_score = 0.0
46
+
47
+ target_length = target.length.to_f
48
+ string_length = string.length.to_f
49
+
50
+ start_of_string_bonus = false
51
+
52
+ abbreviation_score = 0
53
+ fuzzies = 1.0
54
+ final_score = 0.0
55
+
56
+ target_index = 0
57
+
58
+ target.each_char do |c|
59
+ over_all_index = (base_string.length - string.length)
60
+
61
+ character_score = 0.0
62
+
63
+ index_c_lowercase = string.index(c.downcase)
64
+ index_c_uppercase = string.index(c.upcase)
65
+
66
+ current_index = [index_c_lowercase, index_c_uppercase].compact.min
67
+ over_all_index += current_index.to_i
68
+
69
+ if ! current_index
70
+
71
+ if fuzziness > 0.0
72
+ fuzzies += (1 - fuzziness)
73
+ target_index += 1
74
+ next
75
+ else
76
+ return 0 # abort on any mismatch
77
+ end
78
+
79
+ end
80
+
81
+ if string[current_index] == c
82
+ character_score = 0.2 # exact case match
83
+ else
84
+ character_score = 0.1 # character but not case match
85
+ end
86
+
87
+ # consecutive bonus
88
+ if current_index == 0
89
+ character_score += 0.6
90
+ if target_index == 0
91
+ start_of_string_bonus = true
92
+ end
93
+ end
94
+
95
+ # acronym bonus
96
+ if base_string[over_all_index - 1] == ' '
97
+ character_score += 0.8
98
+ end
99
+
100
+ cumulative_score += character_score
101
+ target_index += 1
102
+ string = string[(current_index + 1), (string.length - 1)]
103
+ end
104
+
105
+ matched_score = cumulative_score.to_f / string_length.to_f
106
+
107
+ with_long_string_bonus = (((matched_score * (target_length.to_f / string_length.to_f)) + matched_score) / 2)
108
+
109
+ final_score = with_long_string_bonus / fuzzies
110
+
111
+ if start_of_string_bonus
112
+ if final_score + 0.15 < 1.0
113
+ final_score += 0.15
114
+ elsif final_score + 0.15 >= 1.0
115
+ final_score = 1.0
116
+ end
117
+ end
118
+
119
+ if final_score > 1.0 || final_score < 0.0
120
+ raise StringScore::InternalError, "Out of range score: '#{final_score}'"
121
+ end
122
+
123
+ final_score
124
+ end
125
+
126
+ def with_error_handling
127
+
128
+ yield
129
+
130
+ rescue StringScore::InternalError, StringScore::ArgumentError
131
+
132
+ raise # allow nesting of #with_error_handling
133
+
134
+ rescue NoMethodError => e
135
+
136
+ if e.message =~ NON_STRING_MSG
137
+ raise StringScore::ArgumentError, NON_STRING_MSG
138
+ else
139
+ raise StringScore::InternalError, "#{e.class}: #{e.message}"
140
+ end
141
+
142
+ rescue => e
143
+
144
+ raise StringScore::InternalError, "#{e.class}: #{e.message}"
145
+
146
+ end
147
+
148
+ end
149
+
150
+ end
@@ -0,0 +1,119 @@
1
+ require 'string_score'
2
+
3
+ String.send(:include, StringScore)
4
+
5
+ RSpec::Matchers.define :be_greater_than do |expected|
6
+ match do |actual|
7
+ expected < actual
8
+ end
9
+ end
10
+
11
+ RSpec::Matchers.define :be_less_than do |expected|
12
+ match do |actual|
13
+ expected > actual
14
+ end
15
+ end
16
+
17
+ RSpec::Matchers.define :be_about do |expected|
18
+ match do |actual|
19
+ expected.to_f.round(5) == actual.to_f.round(5) # to 3 decimal points
20
+ end
21
+ end
22
+
23
+ describe StringScore do
24
+
25
+ subject { StringScore.new('Hello World') }
26
+
27
+ it "provides a method directly on a string instance" do
28
+ "foobar".score('foo').should == StringScore.new("foobar").score('foo')
29
+ end
30
+
31
+ it "scores at 1 for exact match" do
32
+ subject.score('Hello World').should == 1
33
+ end
34
+
35
+ # probably rare but need to handle just in case
36
+ it "allows updates of string to match against" do
37
+ string_to_match = "Hello World"
38
+ string_to_match.score('Hello World').should == 1
39
+ string_to_match.gsub!(/\w/, 'X')
40
+ string_to_match.score('Hello World').should_not == 1
41
+ end
42
+
43
+ it "scores 0 for non-matches (character not in string)" do
44
+ subject.score("hellx").should == 0
45
+ subject.score("hello_world").should == 0
46
+ end
47
+
48
+ it "matches sequentially" do
49
+ subject.score('WH').should == 0
50
+ end
51
+
52
+ it "prefers same-case matches" do
53
+ subject.score('hello').should be_less_than(subject.score('Hello'))
54
+ end
55
+
56
+ it "scores higher on closers matchs" do
57
+ subject.score('H').should be_less_than(subject.score('He'))
58
+ end
59
+
60
+ it "will match despite wrong case" do
61
+ subject.score("hello").should be_greater_than(0)
62
+ end
63
+
64
+ it "scores progressively higher weighting on more matches" do
65
+ subject.score("e").should be_less_than(subject.score("h"))
66
+ subject.score("h").should be_less_than(subject.score("he"))
67
+ subject.score("hel").should be_less_than(subject.score("hell"))
68
+ subject.score("hell").should be_less_than(subject.score("hello"))
69
+ subject.score("hello").should be_less_than(subject.score("helloworld"))
70
+ subject.score("helloworl").should be_less_than(subject.score("hello worl"))
71
+ subject.score("hello worl").should be_less_than(subject.score("hello world"))
72
+ end
73
+
74
+ it "provides a consecutive letter bonus" do
75
+ subject.score('Hel').should be_greater_than(subject.score('Hld'))
76
+ end
77
+
78
+ it "gives an acronym bonus" do
79
+ subject.score('HW').should be_greater_than(subject.score('Ho'))
80
+ 'yet another Hello World'.score('yaHW').should be_greater_than('Hello World'.score('yet another'))
81
+ "Hillsdale Michigan".score("HiMi").should be_greater_than("Hillsdale Michigan".score("Hill"))
82
+
83
+ # I think these pass in error in the js version, will check
84
+ # "Hillsdale Michigan".score("HiMi").should be_greater_than("Hillsdale Michigan".score("hills"))
85
+ # "Hillsdale Michigan".score("HiMi").should be_greater_than("Hillsdale Michigan".score("hillsd"))
86
+ # "Hillsdale Michigan".score("HiMi").should be_greater_than("Hillsdale Michigan".score("illsda"))
87
+ end
88
+
89
+ it "gives a bonus for matching the start of the string" do
90
+ "Hillsdale".score("hi").should be_greater_than("Chippewa".score("hi"))
91
+ "hello world".score("h").should be_greater_than("hello world".score("w"))
92
+ end
93
+
94
+ it "gives proper string weights" do
95
+ "Research Resources North".score('res').should be_greater_than("Mary Conces".score('res'))
96
+ "Research Resources North".score('res').should be_greater_than("Bonnie Strathern - Southwest Michigan Title Search".score('res'))
97
+ end
98
+
99
+ it "gives start of string bonuses" do
100
+ "Mary Large".score('mar').should be_greater_than("Large Mary".score('mar'))
101
+ "Silly Mary Large".score('mar').should be_about("Silly Large Mary".score('mar'))
102
+ end
103
+
104
+
105
+ it "can fuzzily match strings" do
106
+ subject.score('Hz').should == 0
107
+ subject.score('Hz', 0.5).should be_less_than(subject.score('H', 0.5))
108
+ end
109
+
110
+ it "should be tuned well" do
111
+ "hello world".score("hello worl", 0.5).should be_greater_than("hello world".score("hello wor1", 0.5))
112
+ 'Hello World'.score('jello',0.5).should be_greater_than(0)
113
+ end
114
+
115
+ it "should have varying degrees of fuziness" do
116
+ subject.score('Hz', 0.9).should be_greater_than(subject.score('Hz', 0.5))
117
+ end
118
+
119
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "string_score/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "string_score"
7
+ s.version = StringScore::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Jim Lindley"]
10
+ s.email = ["jim@jimlindley.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{Score how close a string is to another string.}
13
+ s.description = %q{Port of https://github.com/joshaven/string_score from js to ruby.}
14
+
15
+ s.rubyforge_project = "string_score"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+
23
+ s.add_development_dependency "rspec", '~>2.5.0'
24
+
25
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: string_score
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Jim Lindley
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-03-15 00:00:00 -07:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: rspec
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: 2.5.0
25
+ type: :development
26
+ version_requirements: *id001
27
+ description: Port of https://github.com/joshaven/string_score from js to ruby.
28
+ email:
29
+ - jim@jimlindley.com
30
+ executables: []
31
+
32
+ extensions: []
33
+
34
+ extra_rdoc_files: []
35
+
36
+ files:
37
+ - .gitignore
38
+ - Gemfile
39
+ - README.mdown
40
+ - Rakefile
41
+ - lib/string_score.rb
42
+ - lib/string_score/version.rb
43
+ - spec/string_score_spec.rb
44
+ - string_score.gemspec
45
+ has_rdoc: true
46
+ homepage: ""
47
+ licenses: []
48
+
49
+ post_install_message:
50
+ rdoc_options: []
51
+
52
+ require_paths:
53
+ - lib
54
+ required_ruby_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: "0"
66
+ requirements: []
67
+
68
+ rubyforge_project: string_score
69
+ rubygems_version: 1.6.0
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: Score how close a string is to another string.
73
+ test_files:
74
+ - spec/string_score_spec.rb