string_score 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in string_score.gemspec
4
+ gemspec
data/README.mdown ADDED
@@ -0,0 +1,5 @@
1
+ String Score (Ruby)
2
+ ===
3
+
4
+ Port of https://github.com/joshaven/string_score from js to ruby.
5
+
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ task :default => :spec
7
+
@@ -0,0 +1,3 @@
1
+ module StringScore
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,150 @@
1
+ module StringScore
2
+
3
+ class InternalError < RuntimeError; end
4
+ class ArgumentError < RuntimeError; end
5
+
6
+ NON_STRING_MSG = "Supply a string or an object with can be coerced to a string."
7
+ NON_STRING_ERROR_MESSAGE = /undefined method `to_s'/
8
+
9
+ def score(target_string, fuzziness = 0.0)
10
+ if @string_scorer && @string_scorer.base_string != self
11
+ @string_scorer = nil
12
+ end
13
+ @string_scorer ||= StringScore.new(self, fuzziness)
14
+ @string_scorer.score(target_string)
15
+ end
16
+
17
+ # proxy to Scorer to simplify calling API
18
+ def self.new(base_string, fuzziness=0.0)
19
+ StringScore::Scorer.new(base_string.to_s, fuzziness)
20
+ end
21
+
22
+ class Scorer
23
+
24
+ attr_accessor :base_string, :default_fuziness
25
+
26
+ def initialize(string, fuzziness=0.0)
27
+ with_error_handling do
28
+ @default_fuziness = fuzziness
29
+ @base_string = string.to_s
30
+ end
31
+ end
32
+
33
+ def score(target_string, fuzziness=nil)
34
+ with_error_handling do
35
+ return 1 if (base_string == target_string.to_s)
36
+ partial_score(target_string, (fuzziness || default_fuziness))
37
+ end
38
+ end
39
+
40
+ def partial_score(target, fuzziness)
41
+
42
+ string = base_string.dup # clean copy for chomping
43
+ over_all_index = 0
44
+
45
+ cumulative_score = 0.0
46
+
47
+ target_length = target.length.to_f
48
+ string_length = string.length.to_f
49
+
50
+ start_of_string_bonus = false
51
+
52
+ abbreviation_score = 0
53
+ fuzzies = 1.0
54
+ final_score = 0.0
55
+
56
+ target_index = 0
57
+
58
+ target.each_char do |c|
59
+ over_all_index = (base_string.length - string.length)
60
+
61
+ character_score = 0.0
62
+
63
+ index_c_lowercase = string.index(c.downcase)
64
+ index_c_uppercase = string.index(c.upcase)
65
+
66
+ current_index = [index_c_lowercase, index_c_uppercase].compact.min
67
+ over_all_index += current_index.to_i
68
+
69
+ if ! current_index
70
+
71
+ if fuzziness > 0.0
72
+ fuzzies += (1 - fuzziness)
73
+ target_index += 1
74
+ next
75
+ else
76
+ return 0 # abort on any mismatch
77
+ end
78
+
79
+ end
80
+
81
+ if string[current_index] == c
82
+ character_score = 0.2 # exact case match
83
+ else
84
+ character_score = 0.1 # character but not case match
85
+ end
86
+
87
+ # consecutive bonus
88
+ if current_index == 0
89
+ character_score += 0.6
90
+ if target_index == 0
91
+ start_of_string_bonus = true
92
+ end
93
+ end
94
+
95
+ # acronym bonus
96
+ if base_string[over_all_index - 1] == ' '
97
+ character_score += 0.8
98
+ end
99
+
100
+ cumulative_score += character_score
101
+ target_index += 1
102
+ string = string[(current_index + 1), (string.length - 1)]
103
+ end
104
+
105
+ matched_score = cumulative_score.to_f / string_length.to_f
106
+
107
+ with_long_string_bonus = (((matched_score * (target_length.to_f / string_length.to_f)) + matched_score) / 2)
108
+
109
+ final_score = with_long_string_bonus / fuzzies
110
+
111
+ if start_of_string_bonus
112
+ if final_score + 0.15 < 1.0
113
+ final_score += 0.15
114
+ elsif final_score + 0.15 >= 1.0
115
+ final_score = 1.0
116
+ end
117
+ end
118
+
119
+ if final_score > 1.0 || final_score < 0.0
120
+ raise StringScore::InternalError, "Out of range score: '#{final_score}'"
121
+ end
122
+
123
+ final_score
124
+ end
125
+
126
+ def with_error_handling
127
+
128
+ yield
129
+
130
+ rescue StringScore::InternalError, StringScore::ArgumentError
131
+
132
+ raise # allow nesting of #with_error_handling
133
+
134
+ rescue NoMethodError => e
135
+
136
+ if e.message =~ NON_STRING_MSG
137
+ raise StringScore::ArgumentError, NON_STRING_MSG
138
+ else
139
+ raise StringScore::InternalError, "#{e.class}: #{e.message}"
140
+ end
141
+
142
+ rescue => e
143
+
144
+ raise StringScore::InternalError, "#{e.class}: #{e.message}"
145
+
146
+ end
147
+
148
+ end
149
+
150
+ end
@@ -0,0 +1,119 @@
1
+ require 'string_score'
2
+
3
+ String.send(:include, StringScore)
4
+
5
+ RSpec::Matchers.define :be_greater_than do |expected|
6
+ match do |actual|
7
+ expected < actual
8
+ end
9
+ end
10
+
11
+ RSpec::Matchers.define :be_less_than do |expected|
12
+ match do |actual|
13
+ expected > actual
14
+ end
15
+ end
16
+
17
+ RSpec::Matchers.define :be_about do |expected|
18
+ match do |actual|
19
+ expected.to_f.round(5) == actual.to_f.round(5) # to 3 decimal points
20
+ end
21
+ end
22
+
23
+ describe StringScore do
24
+
25
+ subject { StringScore.new('Hello World') }
26
+
27
+ it "provides a method directly on a string instance" do
28
+ "foobar".score('foo').should == StringScore.new("foobar").score('foo')
29
+ end
30
+
31
+ it "scores at 1 for exact match" do
32
+ subject.score('Hello World').should == 1
33
+ end
34
+
35
+ # probably rare but need to handle just in case
36
+ it "allows updates of string to match against" do
37
+ string_to_match = "Hello World"
38
+ string_to_match.score('Hello World').should == 1
39
+ string_to_match.gsub!(/\w/, 'X')
40
+ string_to_match.score('Hello World').should_not == 1
41
+ end
42
+
43
+ it "scores 0 for non-matches (character not in string)" do
44
+ subject.score("hellx").should == 0
45
+ subject.score("hello_world").should == 0
46
+ end
47
+
48
+ it "matches sequentially" do
49
+ subject.score('WH').should == 0
50
+ end
51
+
52
+ it "prefers same-case matches" do
53
+ subject.score('hello').should be_less_than(subject.score('Hello'))
54
+ end
55
+
56
+ it "scores higher on closers matchs" do
57
+ subject.score('H').should be_less_than(subject.score('He'))
58
+ end
59
+
60
+ it "will match despite wrong case" do
61
+ subject.score("hello").should be_greater_than(0)
62
+ end
63
+
64
+ it "scores progressively higher weighting on more matches" do
65
+ subject.score("e").should be_less_than(subject.score("h"))
66
+ subject.score("h").should be_less_than(subject.score("he"))
67
+ subject.score("hel").should be_less_than(subject.score("hell"))
68
+ subject.score("hell").should be_less_than(subject.score("hello"))
69
+ subject.score("hello").should be_less_than(subject.score("helloworld"))
70
+ subject.score("helloworl").should be_less_than(subject.score("hello worl"))
71
+ subject.score("hello worl").should be_less_than(subject.score("hello world"))
72
+ end
73
+
74
+ it "provides a consecutive letter bonus" do
75
+ subject.score('Hel').should be_greater_than(subject.score('Hld'))
76
+ end
77
+
78
+ it "gives an acronym bonus" do
79
+ subject.score('HW').should be_greater_than(subject.score('Ho'))
80
+ 'yet another Hello World'.score('yaHW').should be_greater_than('Hello World'.score('yet another'))
81
+ "Hillsdale Michigan".score("HiMi").should be_greater_than("Hillsdale Michigan".score("Hill"))
82
+
83
+ # I think these pass in error in the js version, will check
84
+ # "Hillsdale Michigan".score("HiMi").should be_greater_than("Hillsdale Michigan".score("hills"))
85
+ # "Hillsdale Michigan".score("HiMi").should be_greater_than("Hillsdale Michigan".score("hillsd"))
86
+ # "Hillsdale Michigan".score("HiMi").should be_greater_than("Hillsdale Michigan".score("illsda"))
87
+ end
88
+
89
+ it "gives a bonus for matching the start of the string" do
90
+ "Hillsdale".score("hi").should be_greater_than("Chippewa".score("hi"))
91
+ "hello world".score("h").should be_greater_than("hello world".score("w"))
92
+ end
93
+
94
+ it "gives proper string weights" do
95
+ "Research Resources North".score('res').should be_greater_than("Mary Conces".score('res'))
96
+ "Research Resources North".score('res').should be_greater_than("Bonnie Strathern - Southwest Michigan Title Search".score('res'))
97
+ end
98
+
99
+ it "gives start of string bonuses" do
100
+ "Mary Large".score('mar').should be_greater_than("Large Mary".score('mar'))
101
+ "Silly Mary Large".score('mar').should be_about("Silly Large Mary".score('mar'))
102
+ end
103
+
104
+
105
+ it "can fuzzily match strings" do
106
+ subject.score('Hz').should == 0
107
+ subject.score('Hz', 0.5).should be_less_than(subject.score('H', 0.5))
108
+ end
109
+
110
+ it "should be tuned well" do
111
+ "hello world".score("hello worl", 0.5).should be_greater_than("hello world".score("hello wor1", 0.5))
112
+ 'Hello World'.score('jello',0.5).should be_greater_than(0)
113
+ end
114
+
115
+ it "should have varying degrees of fuziness" do
116
+ subject.score('Hz', 0.9).should be_greater_than(subject.score('Hz', 0.5))
117
+ end
118
+
119
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "string_score/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "string_score"
7
+ s.version = StringScore::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Jim Lindley"]
10
+ s.email = ["jim@jimlindley.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{Score how close a string is to another string.}
13
+ s.description = %q{Port of https://github.com/joshaven/string_score from js to ruby.}
14
+
15
+ s.rubyforge_project = "string_score"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+
23
+ s.add_development_dependency "rspec", '~>2.5.0'
24
+
25
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: string_score
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Jim Lindley
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-03-15 00:00:00 -07:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: rspec
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: 2.5.0
25
+ type: :development
26
+ version_requirements: *id001
27
+ description: Port of https://github.com/joshaven/string_score from js to ruby.
28
+ email:
29
+ - jim@jimlindley.com
30
+ executables: []
31
+
32
+ extensions: []
33
+
34
+ extra_rdoc_files: []
35
+
36
+ files:
37
+ - .gitignore
38
+ - Gemfile
39
+ - README.mdown
40
+ - Rakefile
41
+ - lib/string_score.rb
42
+ - lib/string_score/version.rb
43
+ - spec/string_score_spec.rb
44
+ - string_score.gemspec
45
+ has_rdoc: true
46
+ homepage: ""
47
+ licenses: []
48
+
49
+ post_install_message:
50
+ rdoc_options: []
51
+
52
+ require_paths:
53
+ - lib
54
+ required_ruby_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: "0"
66
+ requirements: []
67
+
68
+ rubyforge_project: string_score
69
+ rubygems_version: 1.6.0
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: Score how close a string is to another string.
73
+ test_files:
74
+ - spec/string_score_spec.rb