fuzzy_string 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a09b2a05f14d665be1a6d1443ea8e8ba798e9c59
4
+ data.tar.gz: cb100eb7b5fa588455476e238a94df4595a14070
5
+ SHA512:
6
+ metadata.gz: b129d6428e036ac065817ce60047b3d8e804cae6e5e8c23afdb4810cc43cff0427f3feebbcaa7ea089c31c08951aeda968b272dfb18af3309d206e6289f4231a
7
+ data.tar.gz: e88c3bc9cc796194c90e36193aeae75322b39e8e3abffdbd7b578899d75c1e997d9871083d07e38a595e1b8ec4d69361f74e7a9a8146b25b7c2209e70b8ddfb6
data/.gitignore ADDED
@@ -0,0 +1,52 @@
1
+ # See https://help.github.com/articles/ignoring-files for more about ignoring files.
2
+ #
3
+ # If you find yourself ignoring temporary files generated by your text editor
4
+ # or operating system, you probably want to add a global ignore instead:
5
+ # git config --global core.excludesfile '~/.gitignore_global'
6
+ # Ignore all test uploads
7
+ /public/uploads/*
8
+ # Ignore bundler config.
9
+ /.bundle
10
+
11
+ # Ignore the default SQLite database.
12
+ /db/*.sqlite3
13
+ /db/*.sqlite3-journal
14
+
15
+ # Ignore all logfiles and tempfiles.
16
+ /log/*.log
17
+ /tmp
18
+ .transcon.log
19
+
20
+ .DS_Store
21
+ Thumbs.db
22
+ .*.swp
23
+ .*.swo
24
+ .bundle
25
+ *~
26
+ \#*
27
+ .\#*
28
+ .redcar
29
+ *.rbc
30
+ doc-pak
31
+ description-pak
32
+ /pkg/
33
+
34
+ *.sassc
35
+ .sass-cache
36
+ capybara-*.html
37
+ .rspec
38
+ /public/system
39
+ /coverage/
40
+ /spec/tmp
41
+ **.orig
42
+ rerun.txt
43
+ pickle-email-*.html
44
+
45
+ ## Environment normalisation:
46
+ /vendor/bundle
47
+
48
+ # these should all be checked in to normalise the environment:
49
+ # Gemfile.lock, .ruby-version, .ruby-gemset
50
+
51
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
52
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fuzzy_string.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,30 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ fuzzy_string (0.0.2)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ansi (1.5.0)
10
+ awesome_print (1.6.1)
11
+ builder (3.2.2)
12
+ minitest (5.6.1)
13
+ minitest-reporters (1.0.14)
14
+ ansi
15
+ builder
16
+ minitest (>= 5.0)
17
+ ruby-progressbar
18
+ rake (10.4.2)
19
+ ruby-progressbar (1.7.5)
20
+
21
+ PLATFORMS
22
+ ruby
23
+
24
+ DEPENDENCIES
25
+ awesome_print
26
+ bundler (~> 1.7)
27
+ fuzzy_string!
28
+ minitest
29
+ minitest-reporters (>= 1.0.1)
30
+ rake (~> 10.0)
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Chris Moody
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # FuzzyString
2
+
3
+ A ranking system for strings. The rank by itself is arbitrary and only has context when compared to other ranks.
4
+ The base score is the levenschtein distance which is modified by other basic matching criteria.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'fuzzy_string'
12
+ ```
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install fuzzy_string
21
+
22
+ ## Usage
23
+
24
+ To use as a ranking system (the 'rank' a string has based on another):
25
+
26
+ ```ruby
27
+ $ 'test case 1' ^ 'test case' #=> 2.5
28
+ $ 'test case 1' ^ 'test' #=> 8.75
29
+ $ 'tesla roadster' ^ 'test' #=> 18.5
30
+ ```
31
+
32
+ To access the levenshtein distance:
33
+
34
+ ```ruby
35
+ $ 'Test case' - 'case' #=> 5
36
+ ```
37
+
38
+ ## Contributing
39
+
40
+ 1. Fork it ( https://github.com/[my-github-username]/fuzzy_string/fork )
41
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
42
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
43
+ 4. Push to the branch (`git push origin my-new-feature`)
44
+ 5. Create a new Pull Request
45
+
46
+ Author
47
+ -------
48
+
49
+ * Chris Moody
50
+
51
+ License
52
+ -------
53
+
54
+ This is free software released into the public domain.
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << 'test'
6
+ t.pattern = "test/*_test.rb"
7
+ end
8
+
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fuzzy_string/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fuzzy_string"
8
+ spec.version = FuzzyString::VERSION
9
+ spec.authors = ["Chris Moody"]
10
+ spec.email = ["cmoody@transcon.com"]
11
+ spec.summary = %q{Relative ranking system for strings.}
12
+ spec.description = %q{A ranking system for strings. The rank by itself is arbitrary and only has context
13
+ when compared to other ranks. The base score is the levenschtein distance which is
14
+ modified by other basic matching criteria.}
15
+ spec.homepage = "https://github.com/transcon/fuzzy_string"
16
+ spec.license = "MIT"
17
+
18
+ spec.files = `git ls-files -z`.split("\x0")
19
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
+ spec.require_paths = ["lib"]
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.7"
24
+ spec.add_development_dependency "rake", "~> 10.0"
25
+ spec.add_development_dependency 'minitest'
26
+ spec.add_development_dependency 'awesome_print'
27
+ spec.add_development_dependency 'minitest-reporters', '>= 1.0.1'
28
+ end
@@ -0,0 +1,21 @@
1
+ class FuzzyString::AdjustedScore
2
+ def self.rank(first,second) new(first,second).rank end
3
+ def initialize(first,second)
4
+ @first = first
5
+ @second = second
6
+ end
7
+ def rank
8
+ return 0 if (@first == @second)
9
+ return @second.length if (@first.length == 0)
10
+ return @first.length if (@second.length == 0)
11
+ adjusted_levenschtein_distance
12
+ end
13
+ def adjusted_levenschtein_distance
14
+ pieces = @first.split(/#{@second.chars.to_a.join('(.*?)')}/i)
15
+ score = pieces[0][-1] == ' ' ? -1 : 0
16
+ score += cost(pieces.shift,0.5) + cost(pieces.pop,0.25) + cost(pieces.join,1)
17
+ score += FuzzyString::Levenshtein.distance(@first,@second)
18
+ end
19
+ private
20
+ def cost(piece,multiplier) piece.length * multiplier rescue(0) end
21
+ end
@@ -0,0 +1,19 @@
1
+ class FuzzyString::Levenshtein
2
+ def self.distance(first,second) new(first,second).distance end
3
+ def initialize(first,second)
4
+ @first = first
5
+ @second = second
6
+ end
7
+ def distance
8
+ v0 = (0..@second.length).map{|a| a}
9
+ @first.length.times {|i| v0 = new_row(v0,i) }
10
+ return v0[@second.length]
11
+ end
12
+ private
13
+ def new_row(old,i)
14
+ row = [i + 1]
15
+ @second.length.times {|j| row[j + 1] = [row[j] + 1, old[j + 1] + 1, old[j] + cost(old,i,j)].min}
16
+ return row
17
+ end
18
+ def cost(old,i,j) cost = @first[i] == @second[j] ? 0 : @first[i].downcase == @second[j].downcase ? 0.75 : 1 end
19
+ end
@@ -0,0 +1,3 @@
1
+ module FuzzyString
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,9 @@
1
+ require "fuzzy_string/version"
2
+ require "fuzzy_string/levenshtein"
3
+ require "fuzzy_string/adjusted_score"
4
+
5
+ module FuzzyString
6
+ def ^(other) AdjustedScore.rank(self,other) end
7
+ def -(other) Levenshtein.distance(self,other) end
8
+ end
9
+ String.include(FuzzyString)
@@ -0,0 +1,36 @@
1
+ require 'minitest/autorun'
2
+ require 'fuzzy_string'
3
+ require 'minitest/reporters'
4
+ Minitest::Reporters.use! Minitest::Reporters::SpecReporter.new
5
+
6
+ class FuzzyStringTest < MiniTest::Unit::TestCase
7
+
8
+ def test_levenshtein_distance_calculates_known_quantities
9
+ assert_equal 3, 'kitten' - 'sitting'
10
+ assert_equal 3, 'Saturday' - 'Sunday'
11
+ assert_equal 3, 'abc' - ''
12
+ assert_equal 0, 'string' - 'string'
13
+ assert_equal 2, 'Gumbo' - 'Gambol'
14
+ end
15
+
16
+ def test_penalizes_gaps_between_string
17
+ assert 'ZZmonaZ'^'mona' < 'mZoZnZa'^'mona'
18
+ end
19
+
20
+ def test_penalizes_distance_from_beginning
21
+ assert 'ZmonaZ'^'mona' < 'ZZmona'^'mona'
22
+ end
23
+
24
+ def test_penalizes_excess_tailing_characters
25
+ assert 'ZZmonaZ'^'mona' < 'ZZmonaZZ' ^ 'mona'
26
+ end
27
+
28
+ def test_penalizes_string_not_being_the_start_of_a_word
29
+ assert 'ZZ mona' ^ 'mona' < 'Z Zmona' ^ 'mona'
30
+ end
31
+
32
+ def test_penalizes_unmatched_case
33
+ assert 'ZZmonaZ'^'mona' < 'ZZMonaZ'^'mona'
34
+ end
35
+
36
+ end
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fuzzy_string
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Chris Moody
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: awesome_print
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: minitest-reporters
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: 1.0.1
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.0.1
83
+ description: |-
84
+ A ranking system for strings. The rank by itself is arbitrary and only has context
85
+ when compared to other ranks. The base score is the levenschtein distance which is
86
+ modified by other basic matching criteria.
87
+ email:
88
+ - cmoody@transcon.com
89
+ executables: []
90
+ extensions: []
91
+ extra_rdoc_files: []
92
+ files:
93
+ - ".gitignore"
94
+ - Gemfile
95
+ - Gemfile.lock
96
+ - LICENSE.txt
97
+ - README.md
98
+ - Rakefile
99
+ - fuzzy_string.gemspec
100
+ - lib/fuzzy_string.rb
101
+ - lib/fuzzy_string/adjusted_score.rb
102
+ - lib/fuzzy_string/levenshtein.rb
103
+ - lib/fuzzy_string/version.rb
104
+ - test/fuzzy_string_test.rb
105
+ homepage: https://github.com/transcon/fuzzy_string
106
+ licenses:
107
+ - MIT
108
+ metadata: {}
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 2.4.3
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: Relative ranking system for strings.
129
+ test_files:
130
+ - test/fuzzy_string_test.rb