similar 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format documentation
3
+
data/.rvmrc ADDED
@@ -0,0 +1,2 @@
1
+ rvm use ruby-1.9.3-p194@similar --create
2
+
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in similar.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Scott Barr
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ # Similar
2
+
3
+ Similar is a library of functions to assist in determining the similarity
4
+ if arbitrary sets of data.
5
+
6
+ At the moment only the calculation of pearson scores is implemented.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'similar'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install similar
21
+
22
+ ## Examples
23
+
24
+ a = [1, 2, 3]
25
+ b = [1, 2, 3]
26
+ Similar.pearson_score(a, b)
27
+ => 1.0
28
+
29
+ a = [1, 2, 3]
30
+ b = [1, 2, 5.0]
31
+ Similar.pearson_score(a, b)
32
+ => 0.9607689228305226
33
+
34
+ ## Contributing
35
+
36
+ 1. Fork it
37
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
38
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
39
+ 4. Push to the branch (`git push origin my-new-feature`)
40
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,3 @@
1
+ module Similar
2
+ VERSION = "0.0.1"
3
+ end
data/lib/similar.rb ADDED
@@ -0,0 +1,46 @@
1
+ require "similar/version"
2
+
3
+ module Similar
4
+
5
+ # Calculate the pearson score for the values in two Arrays.
6
+ #
7
+ # Each Array must contain the same number of elements.
8
+ def self.pearson_score(a, b)
9
+ n = a.length
10
+
11
+ # There is nothing to compare.
12
+ return 0 unless n > 0
13
+
14
+ raise ArgumentError.new("Arrays not of equal length") if n != b.length
15
+
16
+ # There is a case with pearson score, where if the two arrays
17
+ # are exactly the same it returns 0, when really the score should be 1.0
18
+ # as there is an exact correlation between the values.
19
+ #
20
+ # Zero is returned in this case because determining the difference between
21
+ # points shows that there is no difference at all... zero.
22
+ #
23
+ # I am returning 1.0 to show extremely high similarity.
24
+ return 1.0 if a == b
25
+
26
+ # sum of the values
27
+ sum_1 = a.inject(0) { |sum, c| sum + c }
28
+ sum_2 = b.inject(0) { |sum, c| sum + c }
29
+
30
+ # sum of the squares
31
+ sum_1_sq = a.inject(0) { |sum, c| sum + c ** 2 }
32
+ sum_2_sq = b.inject(0) { |sum, c| sum + c ** 2 }
33
+
34
+ # sum of the product
35
+ prod_sum = a.zip(b).inject(0) { |sum, ab| sum + ab[0] * ab[1] }
36
+
37
+ # calculate the Pearson score
38
+ num = prod_sum - (sum_1 * sum_2 / n)
39
+ den = Math.sqrt((sum_1_sq - (sum_1 ** 2) / n) * (sum_2_sq - (sum_2 ** 2) / n))
40
+
41
+ return 0 if den == 0
42
+
43
+ num / den.to_f
44
+ end
45
+
46
+ end
data/similar.gemspec ADDED
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'similar/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "similar"
8
+ gem.version = Similar::VERSION
9
+ gem.authors = ["Scott Barr"]
10
+ gem.email = ["scottjbarr@gmail.com"]
11
+ gem.description = <<-EOF
12
+ Similar is a library of functions to assist in determining the similarity
13
+ if arbitrary sets of data.
14
+ EOF
15
+ gem.summary = %q{Determine similarity of arbitrary sets of data}
16
+ gem.homepage = "http://scottjbarr.com"
17
+
18
+ gem.files = `git ls-files`.split($/)
19
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
20
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
21
+ gem.require_paths = ["lib"]
22
+
23
+ gem.add_development_dependency 'rspec'
24
+ gem.add_development_dependency 'rspec-core'
25
+ gem.add_development_dependency 'rspec-expectations'
26
+ gem.add_development_dependency 'rspec-mocks'
27
+ gem.add_development_dependency 'autotest'
28
+ gem.add_development_dependency 'autotest-fsevent'
29
+ gem.add_development_dependency 'autotest-growl'
30
+ gem.add_development_dependency 'builder'
31
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe Similar do
5
+
6
+ it "should not calculate pearson score when nothing to compare" do
7
+ score = Similar.pearson_score([], [])
8
+ score.should == 0
9
+ end
10
+
11
+ it "should calculate pearson score" do
12
+ a = [2.5, 3.5, 3.0, 3.5, 3.0, 2.5]
13
+ b = [3.0, 3.5, 1.5, 5.0, 3.0, 3.5]
14
+
15
+ score = Similar.pearson_score(a, b)
16
+ score.should be_within(0.00001).of(0.39605901719066977)
17
+ end
18
+
19
+ it "should raise an exception when calculating pearson score with different array lengths" do
20
+ a = [2.5, 3.5]
21
+ b = [3.0]
22
+
23
+ lambda { Similar.pearson_score(a, b) }.should raise_exception(ArgumentError, "Arrays not of equal length")
24
+ end
25
+
26
+ end
@@ -0,0 +1,3 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'similar'
metadata ADDED
@@ -0,0 +1,188 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: similar
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Scott Barr
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-02 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec-core
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec-expectations
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec-mocks
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: autotest
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: autotest-fsevent
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: autotest-growl
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
127
+ name: builder
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ description: ! " Similar is a library of functions to assist in determining the
143
+ similarity\n if arbitrary sets of data.\n"
144
+ email:
145
+ - scottjbarr@gmail.com
146
+ executables: []
147
+ extensions: []
148
+ extra_rdoc_files: []
149
+ files:
150
+ - .gitignore
151
+ - .rspec
152
+ - .rvmrc
153
+ - Gemfile
154
+ - LICENSE.txt
155
+ - README.md
156
+ - Rakefile
157
+ - lib/similar.rb
158
+ - lib/similar/version.rb
159
+ - similar.gemspec
160
+ - spec/similar_spec.rb
161
+ - spec/spec_helper.rb
162
+ homepage: http://scottjbarr.com
163
+ licenses: []
164
+ post_install_message:
165
+ rdoc_options: []
166
+ require_paths:
167
+ - lib
168
+ required_ruby_version: !ruby/object:Gem::Requirement
169
+ none: false
170
+ requirements:
171
+ - - ! '>='
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ required_rubygems_version: !ruby/object:Gem::Requirement
175
+ none: false
176
+ requirements:
177
+ - - ! '>='
178
+ - !ruby/object:Gem::Version
179
+ version: '0'
180
+ requirements: []
181
+ rubyforge_project:
182
+ rubygems_version: 1.8.24
183
+ signing_key:
184
+ specification_version: 3
185
+ summary: Determine similarity of arbitrary sets of data
186
+ test_files:
187
+ - spec/similar_spec.rb
188
+ - spec/spec_helper.rb