fuzzy_string 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +52 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +30 -0
- data/LICENSE.txt +22 -0
- data/README.md +54 -0
- data/Rakefile +8 -0
- data/fuzzy_string.gemspec +28 -0
- data/lib/fuzzy_string/adjusted_score.rb +21 -0
- data/lib/fuzzy_string/levenshtein.rb +19 -0
- data/lib/fuzzy_string/version.rb +3 -0
- data/lib/fuzzy_string.rb +9 -0
- data/test/fuzzy_string_test.rb +36 -0
- metadata +130 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a09b2a05f14d665be1a6d1443ea8e8ba798e9c59
|
4
|
+
data.tar.gz: cb100eb7b5fa588455476e238a94df4595a14070
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b129d6428e036ac065817ce60047b3d8e804cae6e5e8c23afdb4810cc43cff0427f3feebbcaa7ea089c31c08951aeda968b272dfb18af3309d206e6289f4231a
|
7
|
+
data.tar.gz: e88c3bc9cc796194c90e36193aeae75322b39e8e3abffdbd7b578899d75c1e997d9871083d07e38a595e1b8ec4d69361f74e7a9a8146b25b7c2209e70b8ddfb6
|
data/.gitignore
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# See https://help.github.com/articles/ignoring-files for more about ignoring files.
|
2
|
+
#
|
3
|
+
# If you find yourself ignoring temporary files generated by your text editor
|
4
|
+
# or operating system, you probably want to add a global ignore instead:
|
5
|
+
# git config --global core.excludesfile '~/.gitignore_global'
|
6
|
+
# Ignore all test uploads
|
7
|
+
/public/uploads/*
|
8
|
+
# Ignore bundler config.
|
9
|
+
/.bundle
|
10
|
+
|
11
|
+
# Ignore the default SQLite database.
|
12
|
+
/db/*.sqlite3
|
13
|
+
/db/*.sqlite3-journal
|
14
|
+
|
15
|
+
# Ignore all logfiles and tempfiles.
|
16
|
+
/log/*.log
|
17
|
+
/tmp
|
18
|
+
.transcon.log
|
19
|
+
|
20
|
+
.DS_Store
|
21
|
+
Thumbs.db
|
22
|
+
.*.swp
|
23
|
+
.*.swo
|
24
|
+
.bundle
|
25
|
+
*~
|
26
|
+
\#*
|
27
|
+
.\#*
|
28
|
+
.redcar
|
29
|
+
*.rbc
|
30
|
+
doc-pak
|
31
|
+
description-pak
|
32
|
+
/pkg/
|
33
|
+
|
34
|
+
*.sassc
|
35
|
+
.sass-cache
|
36
|
+
capybara-*.html
|
37
|
+
.rspec
|
38
|
+
/public/system
|
39
|
+
/coverage/
|
40
|
+
/spec/tmp
|
41
|
+
**.orig
|
42
|
+
rerun.txt
|
43
|
+
pickle-email-*.html
|
44
|
+
|
45
|
+
## Environment normalisation:
|
46
|
+
/vendor/bundle
|
47
|
+
|
48
|
+
# these should all be checked in to normalise the environment:
|
49
|
+
# Gemfile.lock, .ruby-version, .ruby-gemset
|
50
|
+
|
51
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
52
|
+
.rvmrc
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
fuzzy_string (0.0.2)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
ansi (1.5.0)
|
10
|
+
awesome_print (1.6.1)
|
11
|
+
builder (3.2.2)
|
12
|
+
minitest (5.6.1)
|
13
|
+
minitest-reporters (1.0.14)
|
14
|
+
ansi
|
15
|
+
builder
|
16
|
+
minitest (>= 5.0)
|
17
|
+
ruby-progressbar
|
18
|
+
rake (10.4.2)
|
19
|
+
ruby-progressbar (1.7.5)
|
20
|
+
|
21
|
+
PLATFORMS
|
22
|
+
ruby
|
23
|
+
|
24
|
+
DEPENDENCIES
|
25
|
+
awesome_print
|
26
|
+
bundler (~> 1.7)
|
27
|
+
fuzzy_string!
|
28
|
+
minitest
|
29
|
+
minitest-reporters (>= 1.0.1)
|
30
|
+
rake (~> 10.0)
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Chris Moody
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
# FuzzyString
|
2
|
+
|
3
|
+
A ranking system for strings. The rank by itself is arbitrary and only has context when compared to other ranks.
|
4
|
+
The base score is the levenschtein distance which is modified by other basic matching criteria.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'fuzzy_string'
|
12
|
+
```
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install fuzzy_string
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
To use as a ranking system (the 'rank' a string has based on another):
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
$ 'test case 1' ^ 'test case' #=> 2.5
|
28
|
+
$ 'test case 1' ^ 'test' #=> 8.75
|
29
|
+
$ 'tesla roadster' ^ 'test' #=> 18.5
|
30
|
+
```
|
31
|
+
|
32
|
+
To access the levenshtein distance:
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
$ 'Test case' - 'case' #=> 5
|
36
|
+
```
|
37
|
+
|
38
|
+
## Contributing
|
39
|
+
|
40
|
+
1. Fork it ( https://github.com/[my-github-username]/fuzzy_string/fork )
|
41
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
42
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
43
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
44
|
+
5. Create a new Pull Request
|
45
|
+
|
46
|
+
Author
|
47
|
+
-------
|
48
|
+
|
49
|
+
* Chris Moody
|
50
|
+
|
51
|
+
License
|
52
|
+
-------
|
53
|
+
|
54
|
+
This is free software released into the public domain.
|
data/Rakefile
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'fuzzy_string/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "fuzzy_string"
|
8
|
+
spec.version = FuzzyString::VERSION
|
9
|
+
spec.authors = ["Chris Moody"]
|
10
|
+
spec.email = ["cmoody@transcon.com"]
|
11
|
+
spec.summary = %q{Relative ranking system for strings.}
|
12
|
+
spec.description = %q{A ranking system for strings. The rank by itself is arbitrary and only has context
|
13
|
+
when compared to other ranks. The base score is the levenschtein distance which is
|
14
|
+
modified by other basic matching criteria.}
|
15
|
+
spec.homepage = "https://github.com/transcon/fuzzy_string"
|
16
|
+
spec.license = "MIT"
|
17
|
+
|
18
|
+
spec.files = `git ls-files -z`.split("\x0")
|
19
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
20
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
21
|
+
spec.require_paths = ["lib"]
|
22
|
+
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
24
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
25
|
+
spec.add_development_dependency 'minitest'
|
26
|
+
spec.add_development_dependency 'awesome_print'
|
27
|
+
spec.add_development_dependency 'minitest-reporters', '>= 1.0.1'
|
28
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
class FuzzyString::AdjustedScore
|
2
|
+
def self.rank(first,second) new(first,second).rank end
|
3
|
+
def initialize(first,second)
|
4
|
+
@first = first
|
5
|
+
@second = second
|
6
|
+
end
|
7
|
+
def rank
|
8
|
+
return 0 if (@first == @second)
|
9
|
+
return @second.length if (@first.length == 0)
|
10
|
+
return @first.length if (@second.length == 0)
|
11
|
+
adjusted_levenschtein_distance
|
12
|
+
end
|
13
|
+
def adjusted_levenschtein_distance
|
14
|
+
pieces = @first.split(/#{@second.chars.to_a.join('(.*?)')}/i)
|
15
|
+
score = pieces[0][-1] == ' ' ? -1 : 0
|
16
|
+
score += cost(pieces.shift,0.5) + cost(pieces.pop,0.25) + cost(pieces.join,1)
|
17
|
+
score += FuzzyString::Levenshtein.distance(@first,@second)
|
18
|
+
end
|
19
|
+
private
|
20
|
+
def cost(piece,multiplier) piece.length * multiplier rescue(0) end
|
21
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class FuzzyString::Levenshtein
|
2
|
+
def self.distance(first,second) new(first,second).distance end
|
3
|
+
def initialize(first,second)
|
4
|
+
@first = first
|
5
|
+
@second = second
|
6
|
+
end
|
7
|
+
def distance
|
8
|
+
v0 = (0..@second.length).map{|a| a}
|
9
|
+
@first.length.times {|i| v0 = new_row(v0,i) }
|
10
|
+
return v0[@second.length]
|
11
|
+
end
|
12
|
+
private
|
13
|
+
def new_row(old,i)
|
14
|
+
row = [i + 1]
|
15
|
+
@second.length.times {|j| row[j + 1] = [row[j] + 1, old[j + 1] + 1, old[j] + cost(old,i,j)].min}
|
16
|
+
return row
|
17
|
+
end
|
18
|
+
def cost(old,i,j) cost = @first[i] == @second[j] ? 0 : @first[i].downcase == @second[j].downcase ? 0.75 : 1 end
|
19
|
+
end
|
data/lib/fuzzy_string.rb
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
require "fuzzy_string/version"
|
2
|
+
require "fuzzy_string/levenshtein"
|
3
|
+
require "fuzzy_string/adjusted_score"
|
4
|
+
|
5
|
+
module FuzzyString
|
6
|
+
def ^(other) AdjustedScore.rank(self,other) end
|
7
|
+
def -(other) Levenshtein.distance(self,other) end
|
8
|
+
end
|
9
|
+
String.include(FuzzyString)
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'fuzzy_string'
|
3
|
+
require 'minitest/reporters'
|
4
|
+
Minitest::Reporters.use! Minitest::Reporters::SpecReporter.new
|
5
|
+
|
6
|
+
class FuzzyStringTest < MiniTest::Unit::TestCase
|
7
|
+
|
8
|
+
def test_levenshtein_distance_calculates_known_quantities
|
9
|
+
assert_equal 3, 'kitten' - 'sitting'
|
10
|
+
assert_equal 3, 'Saturday' - 'Sunday'
|
11
|
+
assert_equal 3, 'abc' - ''
|
12
|
+
assert_equal 0, 'string' - 'string'
|
13
|
+
assert_equal 2, 'Gumbo' - 'Gambol'
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_penalizes_gaps_between_string
|
17
|
+
assert 'ZZmonaZ'^'mona' < 'mZoZnZa'^'mona'
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_penalizes_distance_from_beginning
|
21
|
+
assert 'ZmonaZ'^'mona' < 'ZZmona'^'mona'
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_penalizes_excess_tailing_characters
|
25
|
+
assert 'ZZmonaZ'^'mona' < 'ZZmonaZZ' ^ 'mona'
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_penalizes_string_not_being_the_start_of_a_word
|
29
|
+
assert 'ZZ mona' ^ 'mona' < 'Z Zmona' ^ 'mona'
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_penalizes_unmatched_case
|
33
|
+
assert 'ZZmonaZ'^'mona' < 'ZZMonaZ'^'mona'
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fuzzy_string
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Chris Moody
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-05-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: awesome_print
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: minitest-reporters
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 1.0.1
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.0.1
|
83
|
+
description: |-
|
84
|
+
A ranking system for strings. The rank by itself is arbitrary and only has context
|
85
|
+
when compared to other ranks. The base score is the levenschtein distance which is
|
86
|
+
modified by other basic matching criteria.
|
87
|
+
email:
|
88
|
+
- cmoody@transcon.com
|
89
|
+
executables: []
|
90
|
+
extensions: []
|
91
|
+
extra_rdoc_files: []
|
92
|
+
files:
|
93
|
+
- ".gitignore"
|
94
|
+
- Gemfile
|
95
|
+
- Gemfile.lock
|
96
|
+
- LICENSE.txt
|
97
|
+
- README.md
|
98
|
+
- Rakefile
|
99
|
+
- fuzzy_string.gemspec
|
100
|
+
- lib/fuzzy_string.rb
|
101
|
+
- lib/fuzzy_string/adjusted_score.rb
|
102
|
+
- lib/fuzzy_string/levenshtein.rb
|
103
|
+
- lib/fuzzy_string/version.rb
|
104
|
+
- test/fuzzy_string_test.rb
|
105
|
+
homepage: https://github.com/transcon/fuzzy_string
|
106
|
+
licenses:
|
107
|
+
- MIT
|
108
|
+
metadata: {}
|
109
|
+
post_install_message:
|
110
|
+
rdoc_options: []
|
111
|
+
require_paths:
|
112
|
+
- lib
|
113
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
123
|
+
requirements: []
|
124
|
+
rubyforge_project:
|
125
|
+
rubygems_version: 2.4.3
|
126
|
+
signing_key:
|
127
|
+
specification_version: 4
|
128
|
+
summary: Relative ranking system for strings.
|
129
|
+
test_files:
|
130
|
+
- test/fuzzy_string_test.rb
|