string-similarity 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a0e99a22e06043cfc985e7cda581c9d5b2747ada
4
+ data.tar.gz: dfc533af4d4b9fc4d138d0505193b51cc934098f
5
+ SHA512:
6
+ metadata.gz: 84343e34af35640b92bb708fa3d87d96b35aa1d56892dd33ce1f8843edf0439a349a106b924f500b659d7a87d530116a70d9fbef87165502d83fda58a30c9807
7
+ data.tar.gz: a458bf9bec226ec89549c7ae7041ee1476e8ba2c2ecac92bc71a89e14f16d7350ee5e00bce3fa206d541613ca620ed373356e6fa399ba34ec60bbe45a8f393e6
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /spec/examples.txt
10
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.0.0-p647
4
+ - 2.1.7
5
+ - 2.2.3
6
+ before_install: gem install bundler -v 1.10.6
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in string-similarity.gemspec
4
+ gemspec
5
+
6
+ gem 'codeclimate-test-reporter', group: :test, require: nil
data/Guardfile ADDED
@@ -0,0 +1,5 @@
1
+ guard :rspec, cmd: 'bundle exec rspec' do
2
+ watch(%r{^spec/(.*)\/?(.*)_spec\.rb$})
3
+ watch(%r{^lib/(.*/)?([^/]+)\.rb$}) { |m| "spec/#{m[1]}#{m[2]}_spec.rb" }
4
+ watch(%r{^spec/spec_helper\.rb$}) { 'spec' }
5
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Manuel Hutter
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,71 @@
1
+ # String::Similarity
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/string-similarity.svg)](http://badge.fury.io/rb/string-similarity)
4
+ [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](http://rubydoc.org/gems/string-similarity/frames)
5
+ [![Build Status](https://travis-ci.org/mhutter/string-similarity.svg)](https://travis-ci.org/mhutter/string-similarity)
6
+ [![Code Climate](https://codeclimate.com/github/mhutter/string-similarity/badges/gpa.svg)](https://codeclimate.com/github/mhutter/string-similarity)
7
+ [![Test Coverage](https://codeclimate.com/github/mhutter/string-similarity/badges/coverage.svg)](https://codeclimate.com/github/mhutter/string-similarity/coverage)
8
+
9
+ Library for calculating the similarity of two strings.
10
+
11
+ ## State
12
+
13
+ - Cosine: **done**
14
+ - Hamming: _todo_
15
+ - Levenshtein: _todo_
16
+
17
+ ## Installation
18
+
19
+ Add this line to your application's Gemfile:
20
+
21
+ ```ruby
22
+ gem 'string-similarity'
23
+ ```
24
+
25
+ And then execute:
26
+
27
+ $ bundle
28
+
29
+ Or install it yourself as:
30
+
31
+ $ gem install string-similarity
32
+
33
+ ## Usage
34
+
35
+ ```ruby
36
+ require 'string-similarity'
37
+
38
+ # Call the methods on the module
39
+ String::Similarity.cosine 'foo', 'bar'
40
+ # => 0.0
41
+ String::Similarity.cosine 'mine', 'thyne'
42
+ # => 0.4472135954999579
43
+ String::Similarity.cosine 'foo', 'foo'
44
+ # => 1.0
45
+
46
+ # or call on a string directly
47
+ 'string'.cosine_similarity_to 'strong'
48
+ # => 0.8333333333333335
49
+ ```
50
+
51
+ ## Development
52
+
53
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
54
+
55
+ To install this gem onto your local machine, run `bundle exec rake install`.
56
+
57
+ ## Contributing
58
+
59
+ 1. Fork it ( https://github.com/mhutter/string-similarity/fork )
60
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
61
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
62
+ 4. Push to the branch (`git push origin my-new-feature`)
63
+ 5. Create a new Pull Request
64
+
65
+
66
+ Bug reports and pull requests are welcome on GitHub at https://github.com/mhutter/string-similarity.
67
+
68
+
69
+ ## License
70
+
71
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,4 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+ RSpec::Core::RakeTask.new(:spec)
4
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bundler/setup'
3
+ require 'string/similarity'
4
+
5
+ require 'pry'
6
+ Pry.start
data/bin/setup ADDED
@@ -0,0 +1,5 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
@@ -0,0 +1,67 @@
1
+ require 'string/similarity/version'
2
+
3
+ class String
4
+
5
+ def cosine_similarity_to(other)
6
+ String::Similarity.cosine(self, other)
7
+ end
8
+
9
+ # +String::Similarity+ provides various methods for
10
+ # calculating string distances.
11
+ module Similarity extend self
12
+
13
+
14
+ # Calcuate the
15
+ # {https://en.wikipedia.org/wiki/Cosine_similarity Cosine similarity}
16
+ # of two strings.
17
+ #
18
+ # @param str1 [String] first string
19
+ # @param str2 [String] second string
20
+ # @return [Float] cosine distance of the two arguments.
21
+ # - +1.0+ if the strings are identical
22
+ # - +0.0+ if the strings are completely different
23
+ # - +0.0+ if one of the strings is empty
24
+ def cosine(str1, str2)
25
+ return 1.0 if str1 == str2
26
+ return 0.0 if str1.empty? || str2.empty?
27
+
28
+ # convert both texts to vectors
29
+ v1, v2 = vector(str1), vector(str2)
30
+
31
+ # calculate the dot product
32
+ dot_product = dot(v1, v2)
33
+
34
+ # calculate the magnitude
35
+ magnitude = mag(v1.values) * mag(v2.values)
36
+ dot_product / magnitude
37
+ end
38
+
39
+ private
40
+
41
+ # create a vector from +str+
42
+ #
43
+ # @example
44
+ # vector('hello') # => {"h"=>1, "e"=>1, "l"=>2, "o"=>1}
45
+ def vector(str)
46
+ v = Hash.new(0)
47
+ str.each_char { |c| v[c] += 1 }
48
+ v
49
+ end
50
+
51
+ # calculate the dot product of +vector1+ and +vector2+
52
+ def dot(vector1, vector2)
53
+ product = 0
54
+ vector1.each do |k,v|
55
+ product += v * vector2[k]
56
+ end
57
+ product
58
+ end
59
+
60
+ # calculate the magnitude for +vector+
61
+ def mag(vector)
62
+ # calculate the sum of squares
63
+ sq = vector.inject(0) { |s,n| s + n**2 }
64
+ Math.sqrt(sq)
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,5 @@
1
+ class String
2
+ module Similarity
3
+ VERSION = '1.0.1'
4
+ end
5
+ end
@@ -0,0 +1,35 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'string/similarity/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'string-similarity'
8
+ spec.version = String::Similarity::VERSION
9
+ spec.authors = ['Manuel Hutter']
10
+ spec.email = ['manuel@hutter.io']
11
+
12
+ spec.summary = %q{Various methods for calculating string similarities.}
13
+ spec.description = <<-EOT
14
+ This gem provides some methods for calculating similarities of two strings.
15
+
16
+ Currently implemented:
17
+ - Cosine similarity
18
+
19
+ Planned:
20
+ - Hamming similarity
21
+ - Levenshtein similarity
22
+ EOT
23
+ spec.homepage = 'https://github.com/mhutter/string-similarity'
24
+ spec.license = 'MIT'
25
+
26
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
27
+ spec.bindir = 'exe'
28
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
29
+ spec.require_paths = ['lib']
30
+
31
+ spec.add_development_dependency 'bundler', '~> 1.10'
32
+ spec.add_development_dependency 'rake', '~> 10.0'
33
+ spec.add_development_dependency 'rspec'
34
+ spec.add_development_dependency 'pry'
35
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: string-similarity
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Manuel Hutter
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-09-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: |
70
+ This gem provides some methods for calculating similarities of two strings.
71
+
72
+ Currently implemented:
73
+ - Cosine similarity
74
+
75
+ Planned:
76
+ - Hamming similarity
77
+ - Levenshtein similarity
78
+ email:
79
+ - manuel@hutter.io
80
+ executables: []
81
+ extensions: []
82
+ extra_rdoc_files: []
83
+ files:
84
+ - ".gitignore"
85
+ - ".rspec"
86
+ - ".travis.yml"
87
+ - Gemfile
88
+ - Guardfile
89
+ - LICENSE.txt
90
+ - README.md
91
+ - Rakefile
92
+ - bin/console
93
+ - bin/setup
94
+ - lib/string/similarity.rb
95
+ - lib/string/similarity/version.rb
96
+ - string-similarity.gemspec
97
+ homepage: https://github.com/mhutter/string-similarity
98
+ licenses:
99
+ - MIT
100
+ metadata: {}
101
+ post_install_message:
102
+ rdoc_options: []
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ requirements: []
116
+ rubyforge_project:
117
+ rubygems_version: 2.4.5.1
118
+ signing_key:
119
+ specification_version: 4
120
+ summary: Various methods for calculating string similarities.
121
+ test_files: []
122
+ has_rdoc: