fuzzy_string 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a09b2a05f14d665be1a6d1443ea8e8ba798e9c59
4
+ data.tar.gz: cb100eb7b5fa588455476e238a94df4595a14070
5
+ SHA512:
6
+ metadata.gz: b129d6428e036ac065817ce60047b3d8e804cae6e5e8c23afdb4810cc43cff0427f3feebbcaa7ea089c31c08951aeda968b272dfb18af3309d206e6289f4231a
7
+ data.tar.gz: e88c3bc9cc796194c90e36193aeae75322b39e8e3abffdbd7b578899d75c1e997d9871083d07e38a595e1b8ec4d69361f74e7a9a8146b25b7c2209e70b8ddfb6
data/.gitignore ADDED
@@ -0,0 +1,52 @@
1
+ # See https://help.github.com/articles/ignoring-files for more about ignoring files.
2
+ #
3
+ # If you find yourself ignoring temporary files generated by your text editor
4
+ # or operating system, you probably want to add a global ignore instead:
5
+ # git config --global core.excludesfile '~/.gitignore_global'
6
+ # Ignore all test uploads
7
+ /public/uploads/*
8
+ # Ignore bundler config.
9
+ /.bundle
10
+
11
+ # Ignore the default SQLite database.
12
+ /db/*.sqlite3
13
+ /db/*.sqlite3-journal
14
+
15
+ # Ignore all logfiles and tempfiles.
16
+ /log/*.log
17
+ /tmp
18
+ .transcon.log
19
+
20
+ .DS_Store
21
+ Thumbs.db
22
+ .*.swp
23
+ .*.swo
24
+ .bundle
25
+ *~
26
+ \#*
27
+ .\#*
28
+ .redcar
29
+ *.rbc
30
+ doc-pak
31
+ description-pak
32
+ /pkg/
33
+
34
+ *.sassc
35
+ .sass-cache
36
+ capybara-*.html
37
+ .rspec
38
+ /public/system
39
+ /coverage/
40
+ /spec/tmp
41
+ **.orig
42
+ rerun.txt
43
+ pickle-email-*.html
44
+
45
+ ## Environment normalisation:
46
+ /vendor/bundle
47
+
48
+ # these should all be checked in to normalise the environment:
49
+ # Gemfile.lock, .ruby-version, .ruby-gemset
50
+
51
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
52
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fuzzy_string.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,30 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ fuzzy_string (0.0.2)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ansi (1.5.0)
10
+ awesome_print (1.6.1)
11
+ builder (3.2.2)
12
+ minitest (5.6.1)
13
+ minitest-reporters (1.0.14)
14
+ ansi
15
+ builder
16
+ minitest (>= 5.0)
17
+ ruby-progressbar
18
+ rake (10.4.2)
19
+ ruby-progressbar (1.7.5)
20
+
21
+ PLATFORMS
22
+ ruby
23
+
24
+ DEPENDENCIES
25
+ awesome_print
26
+ bundler (~> 1.7)
27
+ fuzzy_string!
28
+ minitest
29
+ minitest-reporters (>= 1.0.1)
30
+ rake (~> 10.0)
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Chris Moody
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # FuzzyString
2
+
3
+ A ranking system for strings. The rank by itself is arbitrary and only has context when compared to other ranks.
4
+ The base score is the levenschtein distance which is modified by other basic matching criteria.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'fuzzy_string'
12
+ ```
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install fuzzy_string
21
+
22
+ ## Usage
23
+
24
+ To use as a ranking system (the 'rank' a string has based on another):
25
+
26
+ ```ruby
27
+ $ 'test case 1' ^ 'test case' #=> 2.5
28
+ $ 'test case 1' ^ 'test' #=> 8.75
29
+ $ 'tesla roadster' ^ 'test' #=> 18.5
30
+ ```
31
+
32
+ To access the levenshtein distance:
33
+
34
+ ```ruby
35
+ $ 'Test case' - 'case' #=> 5
36
+ ```
37
+
38
+ ## Contributing
39
+
40
+ 1. Fork it ( https://github.com/[my-github-username]/fuzzy_string/fork )
41
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
42
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
43
+ 4. Push to the branch (`git push origin my-new-feature`)
44
+ 5. Create a new Pull Request
45
+
46
+ Author
47
+ -------
48
+
49
+ * Chris Moody
50
+
51
+ License
52
+ -------
53
+
54
+ This is free software released into the public domain.
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << 'test'
6
+ t.pattern = "test/*_test.rb"
7
+ end
8
+
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fuzzy_string/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fuzzy_string"
8
+ spec.version = FuzzyString::VERSION
9
+ spec.authors = ["Chris Moody"]
10
+ spec.email = ["cmoody@transcon.com"]
11
+ spec.summary = %q{Relative ranking system for strings.}
12
+ spec.description = %q{A ranking system for strings. The rank by itself is arbitrary and only has context
13
+ when compared to other ranks. The base score is the levenschtein distance which is
14
+ modified by other basic matching criteria.}
15
+ spec.homepage = "https://github.com/transcon/fuzzy_string"
16
+ spec.license = "MIT"
17
+
18
+ spec.files = `git ls-files -z`.split("\x0")
19
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
+ spec.require_paths = ["lib"]
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.7"
24
+ spec.add_development_dependency "rake", "~> 10.0"
25
+ spec.add_development_dependency 'minitest'
26
+ spec.add_development_dependency 'awesome_print'
27
+ spec.add_development_dependency 'minitest-reporters', '>= 1.0.1'
28
+ end
@@ -0,0 +1,21 @@
1
+ class FuzzyString::AdjustedScore
2
+ def self.rank(first,second) new(first,second).rank end
3
+ def initialize(first,second)
4
+ @first = first
5
+ @second = second
6
+ end
7
+ def rank
8
+ return 0 if (@first == @second)
9
+ return @second.length if (@first.length == 0)
10
+ return @first.length if (@second.length == 0)
11
+ adjusted_levenschtein_distance
12
+ end
13
+ def adjusted_levenschtein_distance
14
+ pieces = @first.split(/#{@second.chars.to_a.join('(.*?)')}/i)
15
+ score = pieces[0][-1] == ' ' ? -1 : 0
16
+ score += cost(pieces.shift,0.5) + cost(pieces.pop,0.25) + cost(pieces.join,1)
17
+ score += FuzzyString::Levenshtein.distance(@first,@second)
18
+ end
19
+ private
20
+ def cost(piece,multiplier) piece.length * multiplier rescue(0) end
21
+ end
@@ -0,0 +1,19 @@
1
+ class FuzzyString::Levenshtein
2
+ def self.distance(first,second) new(first,second).distance end
3
+ def initialize(first,second)
4
+ @first = first
5
+ @second = second
6
+ end
7
+ def distance
8
+ v0 = (0..@second.length).map{|a| a}
9
+ @first.length.times {|i| v0 = new_row(v0,i) }
10
+ return v0[@second.length]
11
+ end
12
+ private
13
+ def new_row(old,i)
14
+ row = [i + 1]
15
+ @second.length.times {|j| row[j + 1] = [row[j] + 1, old[j + 1] + 1, old[j] + cost(old,i,j)].min}
16
+ return row
17
+ end
18
+ def cost(old,i,j) cost = @first[i] == @second[j] ? 0 : @first[i].downcase == @second[j].downcase ? 0.75 : 1 end
19
+ end
@@ -0,0 +1,3 @@
1
+ module FuzzyString
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,9 @@
1
+ require "fuzzy_string/version"
2
+ require "fuzzy_string/levenshtein"
3
+ require "fuzzy_string/adjusted_score"
4
+
5
+ module FuzzyString
6
+ def ^(other) AdjustedScore.rank(self,other) end
7
+ def -(other) Levenshtein.distance(self,other) end
8
+ end
9
+ String.include(FuzzyString)
@@ -0,0 +1,36 @@
1
+ require 'minitest/autorun'
2
+ require 'fuzzy_string'
3
+ require 'minitest/reporters'
4
+ Minitest::Reporters.use! Minitest::Reporters::SpecReporter.new
5
+
6
+ class FuzzyStringTest < MiniTest::Unit::TestCase
7
+
8
+ def test_levenshtein_distance_calculates_known_quantities
9
+ assert_equal 3, 'kitten' - 'sitting'
10
+ assert_equal 3, 'Saturday' - 'Sunday'
11
+ assert_equal 3, 'abc' - ''
12
+ assert_equal 0, 'string' - 'string'
13
+ assert_equal 2, 'Gumbo' - 'Gambol'
14
+ end
15
+
16
+ def test_penalizes_gaps_between_string
17
+ assert 'ZZmonaZ'^'mona' < 'mZoZnZa'^'mona'
18
+ end
19
+
20
+ def test_penalizes_distance_from_beginning
21
+ assert 'ZmonaZ'^'mona' < 'ZZmona'^'mona'
22
+ end
23
+
24
+ def test_penalizes_excess_tailing_characters
25
+ assert 'ZZmonaZ'^'mona' < 'ZZmonaZZ' ^ 'mona'
26
+ end
27
+
28
+ def test_penalizes_string_not_being_the_start_of_a_word
29
+ assert 'ZZ mona' ^ 'mona' < 'Z Zmona' ^ 'mona'
30
+ end
31
+
32
+ def test_penalizes_unmatched_case
33
+ assert 'ZZmonaZ'^'mona' < 'ZZMonaZ'^'mona'
34
+ end
35
+
36
+ end
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fuzzy_string
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Chris Moody
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: awesome_print
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: minitest-reporters
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: 1.0.1
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.0.1
83
+ description: |-
84
+ A ranking system for strings. The rank by itself is arbitrary and only has context
85
+ when compared to other ranks. The base score is the levenschtein distance which is
86
+ modified by other basic matching criteria.
87
+ email:
88
+ - cmoody@transcon.com
89
+ executables: []
90
+ extensions: []
91
+ extra_rdoc_files: []
92
+ files:
93
+ - ".gitignore"
94
+ - Gemfile
95
+ - Gemfile.lock
96
+ - LICENSE.txt
97
+ - README.md
98
+ - Rakefile
99
+ - fuzzy_string.gemspec
100
+ - lib/fuzzy_string.rb
101
+ - lib/fuzzy_string/adjusted_score.rb
102
+ - lib/fuzzy_string/levenshtein.rb
103
+ - lib/fuzzy_string/version.rb
104
+ - test/fuzzy_string_test.rb
105
+ homepage: https://github.com/transcon/fuzzy_string
106
+ licenses:
107
+ - MIT
108
+ metadata: {}
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 2.4.3
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: Relative ranking system for strings.
129
+ test_files:
130
+ - test/fuzzy_string_test.rb