string-similarity 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b89926f7398486de02d94f5f1d8742834a95d54f
4
- data.tar.gz: 0493548d9494e4855b648551848da77fbef6945d
3
+ metadata.gz: cd97efdcd76434ae400e6382b55e54d44ce003d8
4
+ data.tar.gz: 1ca9a5eb0075b86d30afd03669226425161d510f
5
5
  SHA512:
6
- metadata.gz: 8af14a673396cf5b7b20df9e279621e4301fb5c8c52f14b1a8aae3b096d1ad5a480ce6e4cc64c0b5ba6bef1c0fe72f6b530f8a10aef21bfe1c7f932d00007e3a
7
- data.tar.gz: a89455cf5807fb41749d4f4b86e4642e50cf1ff8620d278e8e3b43c2abf6cd3454e740a81337e1fa7d7d3c76704a17b271a1cd61558362d37e109ca01835be6b
6
+ metadata.gz: d904adf7b09fc53dadee2e47f3111f4bcce52d4cabfad7c1b7fee542d9af5c65298cbf7fa1be95d729dfb2672b21994c6f7e28ad0498a5f147311f7dc3e2418f
7
+ data.tar.gz: d849740e7fb49897439baf66f06f3cafd4677f01c6530bb212bb9b406c5c8c974716b3af562fb72c8bcffeaa1602a4d880bed35b5f5289381e01dd57200b313c
data/.rubocop.yml ADDED
@@ -0,0 +1,3 @@
1
+ AllCops:
2
+ Exclude:
3
+ - lib/string/similarity/version.rb
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ **1.1.1** (2016-02-19)
2
+
3
+ * added: `require 'string-similarity'` now works aswell.
data/README.md CHANGED
@@ -10,9 +10,9 @@ Library for calculating the similarity of two strings.
10
10
 
11
11
  ## State
12
12
 
13
- - Cosine: **done**
14
- - Hamming: _todo_
15
- - Levenshtein: _todo_
13
+ - [x] Cosine
14
+ - [ ] Hamming
15
+ - [x] Levenshtein
16
16
 
17
17
  ## Installation
18
18
 
@@ -33,7 +33,7 @@ Or install it yourself as:
33
33
  ## Usage
34
34
 
35
35
  ```ruby
36
- require 'string-similarity'
36
+ require 'string/similarity'
37
37
 
38
38
  # Call the methods on the module
39
39
  String::Similarity.cosine 'foo', 'bar'
@@ -0,0 +1 @@
1
+ # require 'string/similarity'
@@ -1,5 +1,6 @@
1
1
  require 'string/similarity/version'
2
2
 
3
+ # For convenience, String is extended by a couple of helper methods
3
4
  class String
4
5
  # Returns the cosine similarity to +other+
5
6
  # @see String::Similarity#cosine
@@ -21,7 +22,7 @@ class String
21
22
 
22
23
  # +String::Similarity+ provides various methods for
23
24
  # calculating string distances.
24
- module Similarity extend self
25
+ module Similarity
25
26
  # Calcuate the {https://en.wikipedia.org/wiki/Cosine_similarity
26
27
  # Cosine similarity} of two strings.
27
28
  #
@@ -34,12 +35,13 @@ class String
34
35
  # - +1.0+ if the strings are identical
35
36
  # - +0.0+ if the strings are completely different
36
37
  # - +0.0+ if one of the strings is empty
37
- def cosine(str1, str2)
38
+ def self.cosine(str1, str2)
38
39
  return 1.0 if str1 == str2
39
40
  return 0.0 if str1.empty? || str2.empty?
40
41
 
41
42
  # convert both texts to vectors
42
- v1, v2 = vector(str1), vector(str2)
43
+ v1 = vector(str1)
44
+ v2 = vector(str2)
43
45
 
44
46
  # calculate the dot product
45
47
  dot_product = dot(v1, v2)
@@ -60,7 +62,7 @@ class String
60
62
  # - +1.0+ if the strings are identical
61
63
  # - +0.0+ if one of the strings is empty
62
64
  # @see #levenshtein_distance
63
- def levenshtein(str1, str2)
65
+ def self.levenshtein(str1, str2)
64
66
  return 1.0 if str1.eql?(str2)
65
67
  return 0.0 if str1.empty? || str2.empty?
66
68
  1.0 / levenshtein_distance(str1, str2)
@@ -73,11 +75,10 @@ class String
73
75
  # @param str2 [String] second string
74
76
  # @return [Fixnum] edit distance between the two strings
75
77
  # - +0+ if the strings are identical
76
- def levenshtein_distance(str1, str2)
78
+ def self.levenshtein_distance(str1, str2)
77
79
  # base cases
78
- return 0 if str1.eql?(str2)
79
- return str2.length if str1.empty?
80
- return str1.length if str2.empty?
80
+ result = base_case?(str1, str2)
81
+ return result if result
81
82
 
82
83
  # Initialize cost-matrix rows
83
84
  previous = (0..str2.length).to_a
@@ -87,11 +88,11 @@ class String
87
88
  # first element is always the edit distance from an empty string.
88
89
  current[0] = i + 1
89
90
  (0...str2.length).each do |j|
90
- current[j+1] = [
91
+ current[j + 1] = [
91
92
  # insertion
92
93
  current[j] + 1,
93
94
  # deletion
94
- previous[j+1] + 1,
95
+ previous[j + 1] + 1,
95
96
  # substitution or no operation
96
97
  previous[j] + (str1[i].eql?(str2[j]) ? 0 : 1)
97
98
  ].min
@@ -104,19 +105,26 @@ class String
104
105
 
105
106
  private
106
107
 
108
+ def self.base_case?(str1, str2)
109
+ return 0 if str1.eql?(str2)
110
+ return str2.length if str1.empty?
111
+ return str1.length if str2.empty?
112
+ false
113
+ end
114
+
107
115
  # create a vector from +str+
108
116
  #
109
117
  # @example
110
118
  # v1 = vector('hello') # => {"h"=>1, "e"=>1, "l"=>2, "o"=>1}
111
119
  # v1["x"] # => 0
112
- def vector(str)
120
+ def self.vector(str)
113
121
  v = Hash.new(0)
114
122
  str.each_char { |c| v[c] += 1 }
115
123
  v
116
124
  end
117
125
 
118
126
  # calculate the dot product of +vector1+ and +vector2+
119
- def dot(vector1, vector2)
127
+ def self.dot(vector1, vector2)
120
128
  product = 0
121
129
  vector1.each do |k, v|
122
130
  product += v * vector2[k]
@@ -125,9 +133,9 @@ class String
125
133
  end
126
134
 
127
135
  # calculate the magnitude for +vector+
128
- def mag(vector)
136
+ def self.mag(vector)
129
137
  # calculate the sum of squares
130
- sq = vector.inject(0) { |s, n| s + n**2 }
138
+ sq = vector.inject(0) { |a, e| a + e**2 }
131
139
  Math.sqrt(sq)
132
140
  end
133
141
  end
@@ -1,6 +1,6 @@
1
1
  class String
2
2
  module Similarity
3
3
  # Gem version
4
- VERSION = '1.1.0'
4
+ VERSION = '1.1.1'
5
5
  end
6
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string-similarity
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Hutter
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-09-07 00:00:00.000000000 Z
11
+ date: 2016-02-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -87,7 +87,9 @@ extra_rdoc_files: []
87
87
  files:
88
88
  - ".gitignore"
89
89
  - ".rspec"
90
+ - ".rubocop.yml"
90
91
  - ".travis.yml"
92
+ - CHANGELOG.md
91
93
  - Gemfile
92
94
  - Guardfile
93
95
  - LICENSE.txt
@@ -95,6 +97,7 @@ files:
95
97
  - Rakefile
96
98
  - bin/console
97
99
  - bin/setup
100
+ - lib/string-similarity.rb
98
101
  - lib/string/similarity.rb
99
102
  - lib/string/similarity/version.rb
100
103
  - string-similarity.gemspec
@@ -118,9 +121,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
121
  version: '0'
119
122
  requirements: []
120
123
  rubyforge_project:
121
- rubygems_version: 2.4.5.1
124
+ rubygems_version: 2.5.1
122
125
  signing_key:
123
126
  specification_version: 4
124
127
  summary: Various methods for calculating string similarities.
125
128
  test_files: []
126
- has_rdoc: