string-similarity 1.1.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cd97efdcd76434ae400e6382b55e54d44ce003d8
4
- data.tar.gz: 1ca9a5eb0075b86d30afd03669226425161d510f
3
+ metadata.gz: 308c3664b419f777c0492b103cb9901e108455c0
4
+ data.tar.gz: 5e2af01712dc0a08c37b8dd4cbc1f60a5883cb38
5
5
  SHA512:
6
- metadata.gz: d904adf7b09fc53dadee2e47f3111f4bcce52d4cabfad7c1b7fee542d9af5c65298cbf7fa1be95d729dfb2672b21994c6f7e28ad0498a5f147311f7dc3e2418f
7
- data.tar.gz: d849740e7fb49897439baf66f06f3cafd4677f01c6530bb212bb9b406c5c8c974716b3af562fb72c8bcffeaa1602a4d880bed35b5f5289381e01dd57200b313c
6
+ metadata.gz: a739214fa67e112e179e9b744e9e8afa4c728d963a0d9ef70bbe3cbbe8abdc8485eef391670963ed050ed723e849ce59bddd672c543c8b12a8c330544800f09e
7
+ data.tar.gz: f6c7b317034c2b9c324cdbda88e33fac62e592663676735168017ef3c8ab23f247fe21a4e3289f202271bcbcb639f50c917cddcd47a26cee7e4ec68177235596
data/.travis.yml CHANGED
@@ -1,6 +1,10 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.0.0-p647
4
- - 2.1.7
5
- - 2.2.3
3
+ - ruby-head
4
+ - 2.3.0
5
+ - 2.2.4
6
+ - 2.1.8
7
+ matrix:
8
+ allow_failures:
9
+ - rvm: ruby-head
6
10
  before_install: gem install bundler -v 1.10.6
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ **2.0.0** (2016-02-19)
2
+
3
+ * removed: core extensions on `String`
4
+ * added: refinements for `String` (see README!)
5
+
6
+
1
7
  **1.1.1** (2016-02-19)
2
8
 
3
- * added: `require 'string-similarity'` now works aswell.
9
+ * added: `require 'string-similarity'` now works as well.
data/README.md CHANGED
@@ -43,26 +43,39 @@ String::Similarity.cosine 'mine', 'thyne'
43
43
  String::Similarity.cosine 'foo', 'foo'
44
44
  # => 1.0
45
45
 
46
- # or call on a string directly
47
- 'string'.cosine_similarity_to 'strong'
48
- # => 0.8333333333333335
49
-
50
46
 
51
47
  # Same for Levenshtein:
52
48
  String::Similarity.levenshtein_distance('kitten', 'sitting') # or ...
53
- 'kitten'.levenshtein_distance_to('sitting')
54
49
  # => 3
55
50
  String::Similarity.levenshtein('foo', 'far') # or ...
51
+ # => 0.5
52
+ ```
53
+
54
+ If you want, you can use [Refinements](http://ruby-doc.org/core-2.3.0/doc/syntax/refinements_rdoc.html) to add the functionality to the `String` class:
55
+
56
+ ```ruby
57
+ using String::SimilarityRefinements
58
+
59
+ 'string'.cosine_similarity_to 'strong'
60
+ # => 0.8333333333333335
61
+
62
+ 'kitten'.levenshtein_distance_to('sitting')
63
+ # => 3
64
+
56
65
  'far'.levenshtein_similarity_to('foo')
57
66
  # => 0.5
58
67
  ```
59
68
 
69
+ (See this free [Ruby Tapas Episode](http://www.rubytapas.com/episodes/250-Refinements) if you don't know Refinements)
70
+
60
71
  ## Development
61
72
 
62
73
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
63
74
 
64
75
  To install this gem onto your local machine, run `bundle exec rake install`.
65
76
 
77
+ This Project uses [Semantic Versioning](http://semver.org/).
78
+
66
79
  ## Contributing
67
80
 
68
81
  1. Fork it ( https://github.com/mhutter/string-similarity/fork )
@@ -1,142 +1,122 @@
1
1
  require 'string/similarity/version'
2
-
3
- # For convenience, String is extended by a couple of helper methods
4
- class String
5
- # Returns the cosine similarity to +other+
6
- # @see String::Similarity#cosine
7
- def cosine_similarity_to(other)
8
- String::Similarity.cosine(self, other)
9
- end
10
-
11
- # Returns the Levenshtein distance to +other+
12
- # @see String::Similarity.levenshtein_distance
13
- def levenshtein_distance_to(other)
14
- String::Similarity.levenshtein_distance(self, other)
2
+ require 'string/similarity_refinements'
3
+
4
+ # +String::Similarity+ provides various methods for
5
+ # calculating string distances.
6
+ module String::Similarity
7
+ # Calcuate the {https://en.wikipedia.org/wiki/Cosine_similarity
8
+ # Cosine similarity} of two strings.
9
+ #
10
+ # For an explanation of the Cosine similarity of two strings read
11
+ # {http://stackoverflow.com/a/1750187/405454 this excellent SO answer}.
12
+ #
13
+ # @param str1 [String] first string
14
+ # @param str2 [String] second string
15
+ # @return [Float] cosine similarity of the two arguments.
16
+ # - +1.0+ if the strings are identical
17
+ # - +0.0+ if the strings are completely different
18
+ # - +0.0+ if one of the strings is empty
19
+ def self.cosine(str1, str2)
20
+ return 1.0 if str1 == str2
21
+ return 0.0 if str1.empty? || str2.empty?
22
+
23
+ # convert both texts to vectors
24
+ v1 = vector(str1)
25
+ v2 = vector(str2)
26
+
27
+ # calculate the dot product
28
+ dot_product = dot(v1, v2)
29
+
30
+ # calculate the magnitude
31
+ magnitude = mag(v1.values) * mag(v2.values)
32
+ dot_product / magnitude
15
33
  end
16
34
 
17
- # Returns the Levenshtein similarity to +other+
18
- # @see String::Similarity.levenshtein
19
- def levenshtein_similarity_to(other)
20
- String::Similarity.levenshtein(self, other)
35
+ # Calculate the Levenshtein similarity for two strings.
36
+ #
37
+ # This is basically the inversion of the levenshtein_distance, i.e.
38
+ # 1 / levenshtein_distance(str1, str2)
39
+ #
40
+ # @param str1 [String] first string
41
+ # @param str2 [String] second string
42
+ # @return [Float] levenshtein similarity of the two arguments.
43
+ # - +1.0+ if the strings are identical
44
+ # - +0.0+ if one of the strings is empty
45
+ # @see #levenshtein_distance
46
+ def self.levenshtein(str1, str2)
47
+ return 1.0 if str1.eql?(str2)
48
+ return 0.0 if str1.empty? || str2.empty?
49
+ 1.0 / levenshtein_distance(str1, str2)
21
50
  end
22
51
 
23
- # +String::Similarity+ provides various methods for
24
- # calculating string distances.
25
- module Similarity
26
- # Calcuate the {https://en.wikipedia.org/wiki/Cosine_similarity
27
- # Cosine similarity} of two strings.
28
- #
29
- # For an explanation of the Cosine similarity of two strings read
30
- # {http://stackoverflow.com/a/1750187/405454 this excellent SO answer}.
31
- #
32
- # @param str1 [String] first string
33
- # @param str2 [String] second string
34
- # @return [Float] cosine similarity of the two arguments.
35
- # - +1.0+ if the strings are identical
36
- # - +0.0+ if the strings are completely different
37
- # - +0.0+ if one of the strings is empty
38
- def self.cosine(str1, str2)
39
- return 1.0 if str1 == str2
40
- return 0.0 if str1.empty? || str2.empty?
41
-
42
- # convert both texts to vectors
43
- v1 = vector(str1)
44
- v2 = vector(str2)
45
-
46
- # calculate the dot product
47
- dot_product = dot(v1, v2)
48
-
49
- # calculate the magnitude
50
- magnitude = mag(v1.values) * mag(v2.values)
51
- dot_product / magnitude
52
- end
53
-
54
- # Calculate the Levenshtein similarity for two strings.
55
- #
56
- # This is basically the inversion of the levenshtein_distance, i.e.
57
- # 1 / levenshtein_distance(str1, str2)
58
- #
59
- # @param str1 [String] first string
60
- # @param str2 [String] second string
61
- # @return [Float] levenshtein similarity of the two arguments.
62
- # - +1.0+ if the strings are identical
63
- # - +0.0+ if one of the strings is empty
64
- # @see #levenshtein_distance
65
- def self.levenshtein(str1, str2)
66
- return 1.0 if str1.eql?(str2)
67
- return 0.0 if str1.empty? || str2.empty?
68
- 1.0 / levenshtein_distance(str1, str2)
69
- end
70
-
71
- # Calculate the {https://en.wikipedia.org/wiki/Levenshtein_distance
72
- # Levenshtein distance} of two strings.
73
- #
74
- # @param str1 [String] first string
75
- # @param str2 [String] second string
76
- # @return [Fixnum] edit distance between the two strings
77
- # - +0+ if the strings are identical
78
- def self.levenshtein_distance(str1, str2)
79
- # base cases
80
- result = base_case?(str1, str2)
81
- return result if result
82
-
83
- # Initialize cost-matrix rows
84
- previous = (0..str2.length).to_a
85
- current = []
86
-
87
- (0...str1.length).each do |i|
88
- # first element is always the edit distance from an empty string.
89
- current[0] = i + 1
90
- (0...str2.length).each do |j|
91
- current[j + 1] = [
92
- # insertion
93
- current[j] + 1,
94
- # deletion
95
- previous[j + 1] + 1,
96
- # substitution or no operation
97
- previous[j] + (str1[i].eql?(str2[j]) ? 0 : 1)
98
- ].min
99
- end
100
- previous = current.dup
52
+ # Calculate the {https://en.wikipedia.org/wiki/Levenshtein_distance
53
+ # Levenshtein distance} of two strings.
54
+ #
55
+ # @param str1 [String] first string
56
+ # @param str2 [String] second string
57
+ # @return [Fixnum] edit distance between the two strings
58
+ # - +0+ if the strings are identical
59
+ def self.levenshtein_distance(str1, str2)
60
+ # base cases
61
+ result = base_case?(str1, str2)
62
+ return result if result
63
+
64
+ # Initialize cost-matrix rows
65
+ previous = (0..str2.length).to_a
66
+ current = []
67
+
68
+ (0...str1.length).each do |i|
69
+ # first element is always the edit distance from an empty string.
70
+ current[0] = i + 1
71
+ (0...str2.length).each do |j|
72
+ current[j + 1] = [
73
+ # insertion
74
+ current[j] + 1,
75
+ # deletion
76
+ previous[j + 1] + 1,
77
+ # substitution or no operation
78
+ previous[j] + (str1[i].eql?(str2[j]) ? 0 : 1)
79
+ ].min
101
80
  end
102
-
103
- current[str2.length]
81
+ previous = current.dup
104
82
  end
105
83
 
106
- private
84
+ current[str2.length]
85
+ end
107
86
 
108
- def self.base_case?(str1, str2)
109
- return 0 if str1.eql?(str2)
110
- return str2.length if str1.empty?
111
- return str1.length if str2.empty?
112
- false
113
- end
87
+ private
114
88
 
115
- # create a vector from +str+
116
- #
117
- # @example
118
- # v1 = vector('hello') # => {"h"=>1, "e"=>1, "l"=>2, "o"=>1}
119
- # v1["x"] # => 0
120
- def self.vector(str)
121
- v = Hash.new(0)
122
- str.each_char { |c| v[c] += 1 }
123
- v
124
- end
89
+ def self.base_case?(str1, str2)
90
+ return 0 if str1.eql?(str2)
91
+ return str2.length if str1.empty?
92
+ return str1.length if str2.empty?
93
+ false
94
+ end
125
95
 
126
- # calculate the dot product of +vector1+ and +vector2+
127
- def self.dot(vector1, vector2)
128
- product = 0
129
- vector1.each do |k, v|
130
- product += v * vector2[k]
131
- end
132
- product
133
- end
96
+ # create a vector from +str+
97
+ #
98
+ # @example
99
+ # v1 = vector('hello') # => {"h"=>1, "e"=>1, "l"=>2, "o"=>1}
100
+ # v1["x"] # => 0
101
+ def self.vector(str)
102
+ v = Hash.new(0)
103
+ str.each_char { |c| v[c] += 1 }
104
+ v
105
+ end
134
106
 
135
- # calculate the magnitude for +vector+
136
- def self.mag(vector)
137
- # calculate the sum of squares
138
- sq = vector.inject(0) { |a, e| a + e**2 }
139
- Math.sqrt(sq)
107
+ # calculate the dot product of +vector1+ and +vector2+
108
+ def self.dot(vector1, vector2)
109
+ product = 0
110
+ vector1.each do |k, v|
111
+ product += v * vector2[k]
140
112
  end
113
+ product
114
+ end
115
+
116
+ # calculate the magnitude for +vector+
117
+ def self.mag(vector)
118
+ # calculate the sum of squares
119
+ sq = vector.inject(0) { |a, e| a + e**2 }
120
+ Math.sqrt(sq)
141
121
  end
142
122
  end
@@ -1,6 +1,6 @@
1
1
  class String
2
2
  module Similarity
3
3
  # Gem version
4
- VERSION = '1.1.1'
4
+ VERSION = '2.0.0'
5
5
  end
6
6
  end
@@ -0,0 +1,22 @@
1
+ # provide refinements for the String class
2
+ module String::SimilarityRefinements
3
+ refine String do
4
+ # Returns the cosine similarity to +other+
5
+ # @see String::Similarity#cosine
6
+ def cosine_similarity_to(other)
7
+ String::Similarity.cosine(self, other)
8
+ end
9
+
10
+ # Returns the Levenshtein distance to +other+
11
+ # @see String::Similarity.levenshtein_distance
12
+ def levenshtein_distance_to(other)
13
+ String::Similarity.levenshtein_distance(self, other)
14
+ end
15
+
16
+ # Returns the Levenshtein similarity to +other+
17
+ # @see String::Similarity.levenshtein
18
+ def levenshtein_similarity_to(other)
19
+ String::Similarity.levenshtein(self, other)
20
+ end
21
+ end
22
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string-similarity
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Hutter
@@ -100,6 +100,7 @@ files:
100
100
  - lib/string-similarity.rb
101
101
  - lib/string/similarity.rb
102
102
  - lib/string/similarity/version.rb
103
+ - lib/string/similarity_refinements.rb
103
104
  - string-similarity.gemspec
104
105
  homepage: https://github.com/mhutter/string-similarity
105
106
  licenses: