string-similarity 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b89926f7398486de02d94f5f1d8742834a95d54f
4
- data.tar.gz: 0493548d9494e4855b648551848da77fbef6945d
3
+ metadata.gz: cd97efdcd76434ae400e6382b55e54d44ce003d8
4
+ data.tar.gz: 1ca9a5eb0075b86d30afd03669226425161d510f
5
5
  SHA512:
6
- metadata.gz: 8af14a673396cf5b7b20df9e279621e4301fb5c8c52f14b1a8aae3b096d1ad5a480ce6e4cc64c0b5ba6bef1c0fe72f6b530f8a10aef21bfe1c7f932d00007e3a
7
- data.tar.gz: a89455cf5807fb41749d4f4b86e4642e50cf1ff8620d278e8e3b43c2abf6cd3454e740a81337e1fa7d7d3c76704a17b271a1cd61558362d37e109ca01835be6b
6
+ metadata.gz: d904adf7b09fc53dadee2e47f3111f4bcce52d4cabfad7c1b7fee542d9af5c65298cbf7fa1be95d729dfb2672b21994c6f7e28ad0498a5f147311f7dc3e2418f
7
+ data.tar.gz: d849740e7fb49897439baf66f06f3cafd4677f01c6530bb212bb9b406c5c8c974716b3af562fb72c8bcffeaa1602a4d880bed35b5f5289381e01dd57200b313c
data/.rubocop.yml ADDED
@@ -0,0 +1,3 @@
1
+ AllCops:
2
+ Exclude:
3
+ - lib/string/similarity/version.rb
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ **1.1.1** (2016-02-19)
2
+
3
+ * added: `require 'string-similarity'` now works aswell.
data/README.md CHANGED
@@ -10,9 +10,9 @@ Library for calculating the similarity of two strings.
10
10
 
11
11
  ## State
12
12
 
13
- - Cosine: **done**
14
- - Hamming: _todo_
15
- - Levenshtein: _todo_
13
+ - [x] Cosine
14
+ - [ ] Hamming
15
+ - [x] Levenshtein
16
16
 
17
17
  ## Installation
18
18
 
@@ -33,7 +33,7 @@ Or install it yourself as:
33
33
  ## Usage
34
34
 
35
35
  ```ruby
36
- require 'string-similarity'
36
+ require 'string/similarity'
37
37
 
38
38
  # Call the methods on the module
39
39
  String::Similarity.cosine 'foo', 'bar'
@@ -0,0 +1 @@
1
+ # require 'string/similarity'
@@ -1,5 +1,6 @@
1
1
  require 'string/similarity/version'
2
2
 
3
+ # For convenience, String is extended by a couple of helper methods
3
4
  class String
4
5
  # Returns the cosine similarity to +other+
5
6
  # @see String::Similarity#cosine
@@ -21,7 +22,7 @@ class String
21
22
 
22
23
  # +String::Similarity+ provides various methods for
23
24
  # calculating string distances.
24
- module Similarity extend self
25
+ module Similarity
25
26
  # Calcuate the {https://en.wikipedia.org/wiki/Cosine_similarity
26
27
  # Cosine similarity} of two strings.
27
28
  #
@@ -34,12 +35,13 @@ class String
34
35
  # - +1.0+ if the strings are identical
35
36
  # - +0.0+ if the strings are completely different
36
37
  # - +0.0+ if one of the strings is empty
37
- def cosine(str1, str2)
38
+ def self.cosine(str1, str2)
38
39
  return 1.0 if str1 == str2
39
40
  return 0.0 if str1.empty? || str2.empty?
40
41
 
41
42
  # convert both texts to vectors
42
- v1, v2 = vector(str1), vector(str2)
43
+ v1 = vector(str1)
44
+ v2 = vector(str2)
43
45
 
44
46
  # calculate the dot product
45
47
  dot_product = dot(v1, v2)
@@ -60,7 +62,7 @@ class String
60
62
  # - +1.0+ if the strings are identical
61
63
  # - +0.0+ if one of the strings is empty
62
64
  # @see #levenshtein_distance
63
- def levenshtein(str1, str2)
65
+ def self.levenshtein(str1, str2)
64
66
  return 1.0 if str1.eql?(str2)
65
67
  return 0.0 if str1.empty? || str2.empty?
66
68
  1.0 / levenshtein_distance(str1, str2)
@@ -73,11 +75,10 @@ class String
73
75
  # @param str2 [String] second string
74
76
  # @return [Fixnum] edit distance between the two strings
75
77
  # - +0+ if the strings are identical
76
- def levenshtein_distance(str1, str2)
78
+ def self.levenshtein_distance(str1, str2)
77
79
  # base cases
78
- return 0 if str1.eql?(str2)
79
- return str2.length if str1.empty?
80
- return str1.length if str2.empty?
80
+ result = base_case?(str1, str2)
81
+ return result if result
81
82
 
82
83
  # Initialize cost-matrix rows
83
84
  previous = (0..str2.length).to_a
@@ -87,11 +88,11 @@ class String
87
88
  # first element is always the edit distance from an empty string.
88
89
  current[0] = i + 1
89
90
  (0...str2.length).each do |j|
90
- current[j+1] = [
91
+ current[j + 1] = [
91
92
  # insertion
92
93
  current[j] + 1,
93
94
  # deletion
94
- previous[j+1] + 1,
95
+ previous[j + 1] + 1,
95
96
  # substitution or no operation
96
97
  previous[j] + (str1[i].eql?(str2[j]) ? 0 : 1)
97
98
  ].min
@@ -104,19 +105,26 @@ class String
104
105
 
105
106
  private
106
107
 
108
+ def self.base_case?(str1, str2)
109
+ return 0 if str1.eql?(str2)
110
+ return str2.length if str1.empty?
111
+ return str1.length if str2.empty?
112
+ false
113
+ end
114
+
107
115
  # create a vector from +str+
108
116
  #
109
117
  # @example
110
118
  # v1 = vector('hello') # => {"h"=>1, "e"=>1, "l"=>2, "o"=>1}
111
119
  # v1["x"] # => 0
112
- def vector(str)
120
+ def self.vector(str)
113
121
  v = Hash.new(0)
114
122
  str.each_char { |c| v[c] += 1 }
115
123
  v
116
124
  end
117
125
 
118
126
  # calculate the dot product of +vector1+ and +vector2+
119
- def dot(vector1, vector2)
127
+ def self.dot(vector1, vector2)
120
128
  product = 0
121
129
  vector1.each do |k, v|
122
130
  product += v * vector2[k]
@@ -125,9 +133,9 @@ class String
125
133
  end
126
134
 
127
135
  # calculate the magnitude for +vector+
128
- def mag(vector)
136
+ def self.mag(vector)
129
137
  # calculate the sum of squares
130
- sq = vector.inject(0) { |s, n| s + n**2 }
138
+ sq = vector.inject(0) { |a, e| a + e**2 }
131
139
  Math.sqrt(sq)
132
140
  end
133
141
  end
@@ -1,6 +1,6 @@
1
1
  class String
2
2
  module Similarity
3
3
  # Gem version
4
- VERSION = '1.1.0'
4
+ VERSION = '1.1.1'
5
5
  end
6
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string-similarity
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Hutter
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-09-07 00:00:00.000000000 Z
11
+ date: 2016-02-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -87,7 +87,9 @@ extra_rdoc_files: []
87
87
  files:
88
88
  - ".gitignore"
89
89
  - ".rspec"
90
+ - ".rubocop.yml"
90
91
  - ".travis.yml"
92
+ - CHANGELOG.md
91
93
  - Gemfile
92
94
  - Guardfile
93
95
  - LICENSE.txt
@@ -95,6 +97,7 @@ files:
95
97
  - Rakefile
96
98
  - bin/console
97
99
  - bin/setup
100
+ - lib/string-similarity.rb
98
101
  - lib/string/similarity.rb
99
102
  - lib/string/similarity/version.rb
100
103
  - string-similarity.gemspec
@@ -118,9 +121,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
121
  version: '0'
119
122
  requirements: []
120
123
  rubyforge_project:
121
- rubygems_version: 2.4.5.1
124
+ rubygems_version: 2.5.1
122
125
  signing_key:
123
126
  specification_version: 4
124
127
  summary: Various methods for calculating string similarities.
125
128
  test_files: []
126
- has_rdoc: