similar 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format documentation
3
+
data/.rvmrc ADDED
@@ -0,0 +1,2 @@
1
+ rvm use ruby-1.9.3-p194@similar --create
2
+
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in similar.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Scott Barr
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ # Similar
2
+
3
+ Similar is a library of functions to assist in determining the similarity
4
+ if arbitrary sets of data.
5
+
6
+ At the moment only the calculation of pearson scores is implemented.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'similar'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install similar
21
+
22
+ ## Examples
23
+
24
+ a = [1, 2, 3]
25
+ b = [1, 2, 3]
26
+ Similar.pearson_score(a, b)
27
+ => 1.0
28
+
29
+ a = [1, 2, 3]
30
+ b = [1, 2, 5.0]
31
+ Similar.pearson_score(a, b)
32
+ => 0.9607689228305226
33
+
34
+ ## Contributing
35
+
36
+ 1. Fork it
37
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
38
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
39
+ 4. Push to the branch (`git push origin my-new-feature`)
40
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,3 @@
1
+ module Similar
2
+ VERSION = "0.0.1"
3
+ end
data/lib/similar.rb ADDED
@@ -0,0 +1,46 @@
1
+ require "similar/version"
2
+
3
+ module Similar
4
+
5
+ # Calculate the pearson score for the values in two Arrays.
6
+ #
7
+ # Each Array must contain the same number of elements.
8
+ def self.pearson_score(a, b)
9
+ n = a.length
10
+
11
+ # There is nothing to compare.
12
+ return 0 unless n > 0
13
+
14
+ raise ArgumentError.new("Arrays not of equal length") if n != b.length
15
+
16
+ # There is a case with pearson score, where if the two arrays
17
+ # are exactly the same it returns 0, when really the score should be 1.0
18
+ # as there is an exact correlation between the values.
19
+ #
20
+ # Zero is returned in this case because determining the difference between
21
+ # points shows that there is no difference at all... zero.
22
+ #
23
+ # I am returning 1.0 to show extremely high similarity.
24
+ return 1.0 if a == b
25
+
26
+ # sum of the values
27
+ sum_1 = a.inject(0) { |sum, c| sum + c }
28
+ sum_2 = b.inject(0) { |sum, c| sum + c }
29
+
30
+ # sum of the squares
31
+ sum_1_sq = a.inject(0) { |sum, c| sum + c ** 2 }
32
+ sum_2_sq = b.inject(0) { |sum, c| sum + c ** 2 }
33
+
34
+ # sum of the product
35
+ prod_sum = a.zip(b).inject(0) { |sum, ab| sum + ab[0] * ab[1] }
36
+
37
+ # calculate the Pearson score
38
+ num = prod_sum - (sum_1 * sum_2 / n)
39
+ den = Math.sqrt((sum_1_sq - (sum_1 ** 2) / n) * (sum_2_sq - (sum_2 ** 2) / n))
40
+
41
+ return 0 if den == 0
42
+
43
+ num / den.to_f
44
+ end
45
+
46
+ end
data/similar.gemspec ADDED
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'similar/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "similar"
8
+ gem.version = Similar::VERSION
9
+ gem.authors = ["Scott Barr"]
10
+ gem.email = ["scottjbarr@gmail.com"]
11
+ gem.description = <<-EOF
12
+ Similar is a library of functions to assist in determining the similarity
13
+ if arbitrary sets of data.
14
+ EOF
15
+ gem.summary = %q{Determine similarity of arbitrary sets of data}
16
+ gem.homepage = "http://scottjbarr.com"
17
+
18
+ gem.files = `git ls-files`.split($/)
19
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
20
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
21
+ gem.require_paths = ["lib"]
22
+
23
+ gem.add_development_dependency 'rspec'
24
+ gem.add_development_dependency 'rspec-core'
25
+ gem.add_development_dependency 'rspec-expectations'
26
+ gem.add_development_dependency 'rspec-mocks'
27
+ gem.add_development_dependency 'autotest'
28
+ gem.add_development_dependency 'autotest-fsevent'
29
+ gem.add_development_dependency 'autotest-growl'
30
+ gem.add_development_dependency 'builder'
31
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe Similar do
5
+
6
+ it "should not calculate pearson score when nothing to compare" do
7
+ score = Similar.pearson_score([], [])
8
+ score.should == 0
9
+ end
10
+
11
+ it "should calculate pearson score" do
12
+ a = [2.5, 3.5, 3.0, 3.5, 3.0, 2.5]
13
+ b = [3.0, 3.5, 1.5, 5.0, 3.0, 3.5]
14
+
15
+ score = Similar.pearson_score(a, b)
16
+ score.should be_within(0.00001).of(0.39605901719066977)
17
+ end
18
+
19
+ it "should raise an exception when calculating pearson score with different array lengths" do
20
+ a = [2.5, 3.5]
21
+ b = [3.0]
22
+
23
+ lambda { Similar.pearson_score(a, b) }.should raise_exception(ArgumentError, "Arrays not of equal length")
24
+ end
25
+
26
+ end
@@ -0,0 +1,3 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'similar'
metadata ADDED
@@ -0,0 +1,188 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: similar
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Scott Barr
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-02 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec-core
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec-expectations
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec-mocks
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: autotest
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: autotest-fsevent
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: autotest-growl
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
127
+ name: builder
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ description: ! " Similar is a library of functions to assist in determining the
143
+ similarity\n if arbitrary sets of data.\n"
144
+ email:
145
+ - scottjbarr@gmail.com
146
+ executables: []
147
+ extensions: []
148
+ extra_rdoc_files: []
149
+ files:
150
+ - .gitignore
151
+ - .rspec
152
+ - .rvmrc
153
+ - Gemfile
154
+ - LICENSE.txt
155
+ - README.md
156
+ - Rakefile
157
+ - lib/similar.rb
158
+ - lib/similar/version.rb
159
+ - similar.gemspec
160
+ - spec/similar_spec.rb
161
+ - spec/spec_helper.rb
162
+ homepage: http://scottjbarr.com
163
+ licenses: []
164
+ post_install_message:
165
+ rdoc_options: []
166
+ require_paths:
167
+ - lib
168
+ required_ruby_version: !ruby/object:Gem::Requirement
169
+ none: false
170
+ requirements:
171
+ - - ! '>='
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ required_rubygems_version: !ruby/object:Gem::Requirement
175
+ none: false
176
+ requirements:
177
+ - - ! '>='
178
+ - !ruby/object:Gem::Version
179
+ version: '0'
180
+ requirements: []
181
+ rubyforge_project:
182
+ rubygems_version: 1.8.24
183
+ signing_key:
184
+ specification_version: 3
185
+ summary: Determine similarity of arbitrary sets of data
186
+ test_files:
187
+ - spec/similar_spec.rb
188
+ - spec/spec_helper.rb