array_metrics 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ pkg
data/Gemfile ADDED
@@ -0,0 +1 @@
1
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Ricardo Panaggio
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,45 @@
1
+ = array_metrics
2
+
3
+ A bundle of metrics for Arrays.
4
+
5
+ == Installation
6
+
7
+ gem install array_metrics
8
+
9
+ == Usage
10
+
11
+ Metrics (will) have a lot of distance and similarity metrics implemented
12
+ inside it.
13
+
14
+ For example, to calculate the Pearson Correlarion between two Arrays
15
+ <tt>arr1</tt> and <tt>arr2</tt>:
16
+
17
+ ArrayMetrics.cosine_similarity(arr1, arr2)
18
+
19
+ == Maintainer
20
+
21
+ * Ricardo Panaggio - http://panaggio.net
22
+
23
+ == License
24
+
25
+ (The MIT License)
26
+
27
+ Permission is hereby granted, free of charge, to any person obtaining
28
+ a copy of this software and associated documentation files (the
29
+ 'Software'), to deal in the Software without restriction, including
30
+ without limitation the rights to use, copy, modify, merge, publish,
31
+ distribute, sublicense, and/or sell copies of the Software, and to
32
+ permit persons to whom the Software is furnished to do so, subject to
33
+ the following conditions:
34
+
35
+ The above copyright notice and this permission notice shall be
36
+ included in all copies or substantial portions of the Software.
37
+
38
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
39
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
40
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
41
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
42
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
43
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
44
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
45
+
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ gemspec = eval(File.read(Dir["*.gemspec"].first))
2
+
3
+ desc "Validate the gemspec"
4
+ task :gemspec do
5
+ gemspec.validate
6
+ end
7
+
8
+ desc "Build gem locally"
9
+ task :build => :gemspec do
10
+ system "gem build #{gemspec.name}.gemspec"
11
+ FileUtils.mkdir_p "pkg"
12
+ FileUtils.mv "#{gemspec.name}-#{gemspec.version}.gem", "pkg"
13
+ end
14
+
15
+ desc "Install gem locally"
16
+ task :install => :build do
17
+ system "gem install pkg/#{gemspec.name}-#{gemspec.version}"
18
+ end
19
+
20
+ desc "Clean automatically generated files"
21
+ task :clean do
22
+ FileUtils.rm_rf "pkg"
23
+ end
24
+
25
+ require 'rspec/core/rake_task'
26
+ RSpec::Core::RakeTask.new
@@ -0,0 +1,17 @@
1
+ # coding: UTF-8
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "array_metrics"
5
+ s.version = "0.0.1"
6
+ s.platform = Gem::Platform::RUBY
7
+ s.authors = ["Ricardo Panaggio"]
8
+ s.email = ["panaggio.ricardo@gmail.com"]
9
+ s.homepage = "http://github.com/panaggio/array_metrics"
10
+ s.summary = "A bundle of metrics for Arrays"
11
+ s.description = "A bundle (not that much by now) of metrics for Arrays"
12
+ s.files = `git ls-files`.split("\n")
13
+ s.test_files = `git ls-files spec`.split("\n")
14
+ s.has_rdoc = true
15
+ s.require_path = 'lib'
16
+ s.add_runtime_dependency "rspec", "~> 2.0"
17
+ end
@@ -0,0 +1,12 @@
1
+ NaN = 0.0/0
2
+
3
+ class Array
4
+ def mean
5
+ return 0.0 if size==0
6
+ self.inject(0.0){|sum,e| sum+e}.to_f/self.size
7
+ end
8
+
9
+ def -@
10
+ self.map{ |e| if e.nil? then e else -e end }
11
+ end
12
+ end
@@ -0,0 +1,47 @@
1
+ module ArrayMetrics
2
+ # Calculates the Constraint Pearson Correlation between Arrays x and y.
3
+ # See Shardanand and Maes 1995 for more details.
4
+ #
5
+ # Shardanand, U. and Maes, P. Social information filtering: algorithms for
6
+ # automating "word of mouth". In Proceedings of the SIGCHI conference on
7
+ # Human factors in computing systems (CHI '95). ACM Press/Addison-Wesley
8
+ # Publishing Co., New York, NY, USA, 210-217.
9
+ # DOI=10.1145/223904.223931 http://dx.doi.org/10.1145/223904.223931
10
+ def self.constraint_pearson_correlation(x, y, m)
11
+ raise "array sizes don't match" if x.size != y.size
12
+
13
+ cx, cy = [], []
14
+ x.each_with_index do |xi, i|
15
+ next if xi.nil? or y[i].nil?
16
+ cx << xi
17
+ cy << y[i]
18
+ end
19
+
20
+ return NaN if cx.size == 0
21
+
22
+ s = sx = sy = 0.0
23
+ if m.nil?
24
+ cx_ = cx.mean
25
+ cy_ = cy.mean
26
+ else
27
+ cx_ = cy_ = m
28
+ end
29
+
30
+ cx.each_with_index do |cxi,i|
31
+ px = cxi-cx_
32
+ py = cy[i]-cy_
33
+ s += px*py
34
+ sx += px*px
35
+ sy += py*py
36
+ end
37
+
38
+ s/Math.sqrt(sx*sy)
39
+ end
40
+
41
+ # Calculates the Pearson Correlation between the Arrays x and x.
42
+ # See https://secure.wikimedia.org/wikipedia/en/wiki/Pearson_correlation
43
+ # for more details.
44
+ def self.pearson_correlation(x,y)
45
+ self.constraint_pearson_correlation(x,y,nil)
46
+ end
47
+ end
@@ -0,0 +1,2 @@
1
+ require "lib/array_metrics/common.rb"
2
+ require "lib/array_metrics/pearson_correlation.rb"
@@ -0,0 +1,29 @@
1
+ require File.join(File.dirname(__FILE__), "../spec_helper.rb")
2
+
3
+ describe Array do
4
+ describe "#mean" do
5
+ it "should return 0 when the array is empty" do
6
+ [].mean.should == 0.0
7
+ end
8
+
9
+ it "should calculate then mean value correctly" do
10
+ [1,2,3].mean.should == 2.0
11
+ [1,2,3,4,5].mean.should == 3.0
12
+ arr = [0.5,1.3,4.0/3,1.7/3.12]
13
+ arr.mean.should be_within(0.0001).of(arr.inject(0.0){|s,e| s+e}/arr.size)
14
+ end
15
+ end
16
+
17
+ describe "#-@" do
18
+ it "should return the same array when there are only nil's" do
19
+ arr = [nil,nil,nil,nil,nil]
20
+ (-arr).should == arr
21
+ end
22
+
23
+ it "should return inverted numeric values" do
24
+ (-[1,2,3,-1,-2,-3]).should == [-1,-2,-3,1,2,3]
25
+ (-[1.0,nil,0.0,-1.0]).should == [-1.0,nil,0.0,1.0]
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,194 @@
1
+ require File.join(File.dirname(__FILE__), "../spec_helper.rb")
2
+
3
+ describe ArrayMetrics do
4
+ arrays = [[1,2,3], [1,0,-1,nil,1000]]
5
+ opposite_pairs = [
6
+ [[1,2,3],[3,2,1]], [[1,2,3],[7,5,3]],
7
+ [[1,10,1,10,1],[-3,-30,-3,-30,-3]] ]
8
+ non_correlated_pairs = [ [[1,2,3],[1,2,1]] ]
9
+ generic_cases = [
10
+ [[1,2,3], [1,2,6], 5.0/Math.sqrt(2*14), 2, 5.0/Math.sqrt(2*17)],
11
+ [[1,2,3], [1,6,-1], -2.0/Math.sqrt(2*26), 2, -2.0/Math.sqrt(2*26)],
12
+ [[1,2,6], [1,6,-1], -11.0/Math.sqrt(14*26),3, -11.0/Math.sqrt(14*29)] ]
13
+
14
+ describe "#pearson_correlation" do
15
+ it "should return NaN when arrays have size less or equal to one" do
16
+ result = ArrayMetrics.pearson_correlation([],[])
17
+ result.should be_nan
18
+
19
+ result = ArrayMetrics.pearson_correlation([5],[1])
20
+ result.should be_nan
21
+ end
22
+
23
+ it "should return NaN when arrays have size less or equal to one, even after removing nils" do
24
+ result = ArrayMetrics.pearson_correlation([nil],[nil])
25
+ result.should be_nan
26
+
27
+ result = ArrayMetrics.pearson_correlation([nil,nil],[nil,nil])
28
+ result.should be_nan
29
+
30
+ result = ArrayMetrics.pearson_correlation([nil,1],[nil,5])
31
+ result.should be_nan
32
+ end
33
+
34
+ it "should return NaN when one of the arrays it homogeneous" do
35
+ result = ArrayMetrics.pearson_correlation([1,2,3],[1,1,1])
36
+ result.should be_nan
37
+ end
38
+
39
+ it "should raise an error when arrays size don't match" do
40
+ lambda do
41
+ ArrayMetrics.pearson_correlation([1,2],[1])
42
+ end.should raise_error(RuntimeError, "array sizes don't match")
43
+
44
+ lambda do
45
+ ArrayMetrics.pearson_correlation([1],[1,2])
46
+ end.should raise_error(RuntimeError, "array sizes don't match")
47
+ end
48
+
49
+ it "should return 1 when arrays are the same" do
50
+ arrays.each do |arr|
51
+ result = ArrayMetrics.pearson_correlation(arr,arr)
52
+ result.should == 1.0
53
+ end
54
+ end
55
+
56
+ it "should return -1 when arrays are the opposite from each other" do
57
+ arrays.each do |arr|
58
+ result = ArrayMetrics.pearson_correlation(arr,-arr)
59
+ result.should == -1.0
60
+ end
61
+
62
+ opposite_pairs.each do |arr1, arr2|
63
+ result = ArrayMetrics.pearson_correlation(arr1,arr2)
64
+ result.should == -1.0
65
+ end
66
+ end
67
+
68
+ it "should return 0 if arrays are not correlated" do
69
+ non_correlated_pairs.each do |arr1, arr2|
70
+ result = ArrayMetrics.pearson_correlation(arr1,arr2)
71
+ result.should == 0.0
72
+ end
73
+ end
74
+
75
+ it "should calculate generic cases correctly" do
76
+ generic_cases.each do |arr1, arr2, response, _, _|
77
+ result = ArrayMetrics.pearson_correlation(arr1,arr2)
78
+ result.should be_within(0.0001).of(response)
79
+ end
80
+ end
81
+
82
+ it "should return the same value if arrays are switched (commutative property)" do
83
+ generic_cases.each do |arr1, arr2, response, _, _|
84
+ result = ArrayMetrics.pearson_correlation(arr2,arr1)
85
+ result.should be_within(0.0001).of(response)
86
+ end
87
+ end
88
+
89
+ it "should return the same value if a constant is added to the arrays" do
90
+ generic_cases.each do |arr1, arr2, response, _, _|
91
+ result = ArrayMetrics.pearson_correlation(
92
+ arr1.map{|e| e+1},arr2.map{|e| e+1})
93
+ result.should be_within(0.0001).of(response)
94
+ end
95
+ end
96
+
97
+ it "should use only positions that are both not nil in both arrays" do
98
+ r1 = ArrayMetrics.pearson_correlation([1,2,nil,5,nil,4], [1,nil,3,4,nil,3])
99
+ r2 = ArrayMetrics.pearson_correlation([1,5,4], [1,4,3])
100
+ r1.should be_within(0.0001).of(r2)
101
+ end
102
+ end
103
+
104
+ describe "#constraint_pearson_correlation" do
105
+ it "should return NaN when arrays have size equal to zero" do
106
+ result = ArrayMetrics.constraint_pearson_correlation([],[],3)
107
+ result.should be_nan
108
+ end
109
+
110
+ it "should return only 1, -1 or NaN when arrays have size equal to one" do
111
+ result = ArrayMetrics.constraint_pearson_correlation([5],[1],3)
112
+ result.should == -1.0
113
+
114
+ result = ArrayMetrics.constraint_pearson_correlation([1],[2],3)
115
+ result.should == 1.0
116
+
117
+ result = ArrayMetrics.constraint_pearson_correlation([2],[2],2)
118
+ result.should be_nan
119
+ end
120
+
121
+ it "should return NaN when arrays have size equal to one, even after removing nils" do
122
+ result = ArrayMetrics.constraint_pearson_correlation([nil],[nil],2)
123
+ result.should be_nan
124
+
125
+ result = ArrayMetrics.constraint_pearson_correlation([nil,nil],[nil,nil],3)
126
+ result.should be_nan
127
+ end
128
+
129
+ it "should return NaN when one of the arrays is homogeneous and its mean equals to m" do
130
+ result = ArrayMetrics.constraint_pearson_correlation([1,2,3],[1,1,1],1)
131
+ result.should be_nan
132
+ end
133
+
134
+ it "should raise an error when arrays size don't match" do
135
+ lambda do
136
+ ArrayMetrics.constraint_pearson_correlation([1,2],[1],3)
137
+ end.should raise_error(RuntimeError, "array sizes don't match")
138
+
139
+ lambda do
140
+ ArrayMetrics.constraint_pearson_correlation([1],[1,2],3)
141
+ end.should raise_error(RuntimeError, "array sizes don't match")
142
+ end
143
+
144
+ it "should return 1 when arrays are the same, even with different m's" do
145
+ arrays.each do |arr|
146
+ (0..10).to_a.each do |m|
147
+ result = ArrayMetrics.constraint_pearson_correlation(arr,arr,m)
148
+ result.should == 1.0
149
+ end
150
+ end
151
+ end
152
+
153
+ it "should return -1 when arrays are the opposite from each, using m as the mean" do
154
+ result = ArrayMetrics.constraint_pearson_correlation([1,2,3],[3,2,1],2)
155
+ result.should == -1.0
156
+ end
157
+
158
+ it "should return 0 if arrays are not correlated" do
159
+ result = ArrayMetrics.constraint_pearson_correlation([1,2,3],[1,2,1],2)
160
+ result.should == 0.0
161
+ end
162
+
163
+ it "should calculate generic cases correctly" do
164
+ generic_cases.each do |arr1, arr2, _, m, response|
165
+ result = ArrayMetrics.constraint_pearson_correlation(arr1,arr2,m)
166
+ result.should be_within(0.0001).of(response)
167
+ end
168
+ end
169
+
170
+ it "should return the same value if arrays are switched (commutative property)" do
171
+ generic_cases.each do |arr1, arr2, _, m, response|
172
+ result = ArrayMetrics.constraint_pearson_correlation(arr2,arr1,m)
173
+ result.should be_within(0.0001).of(response)
174
+ end
175
+ end
176
+
177
+ it "should return the same value if a constant is added to the arrays" do
178
+ generic_cases.each do |arr1, arr2, _, m, response|
179
+ result = ArrayMetrics.constraint_pearson_correlation(
180
+ arr1.map{|e| e+1},arr2.map{|e| e+1},m+1)
181
+ result.should be_within(0.0001).of(response)
182
+ end
183
+ end
184
+
185
+ it "should use only positions that are both not nil in both arrays" do
186
+ r1 = ArrayMetrics.constraint_pearson_correlation(
187
+ [1,2,nil,5,nil,4], [1,nil,3,4,nil,3],3)
188
+ r2 = ArrayMetrics.constraint_pearson_correlation(
189
+ [1,5,4], [1,4,3],3)
190
+ r1.should be_within(0.0001).of(r2)
191
+ end
192
+ end
193
+ end
194
+
@@ -0,0 +1,3 @@
1
+ require "rubygems"
2
+ require "rspec"
3
+ require File.join(File.dirname(__FILE__), "../lib/array_metrics.rb" )
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: array_metrics
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Ricardo Panaggio
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-01-11 00:00:00 -02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rspec
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 2
32
+ - 0
33
+ version: "2.0"
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ description: A bundle (not that much by now) of metrics for Arrays
37
+ email:
38
+ - panaggio.ricardo@gmail.com
39
+ executables: []
40
+
41
+ extensions: []
42
+
43
+ extra_rdoc_files: []
44
+
45
+ files:
46
+ - .gitignore
47
+ - Gemfile
48
+ - LICENSE
49
+ - README.rdoc
50
+ - Rakefile
51
+ - array_metrics.gemspec
52
+ - lib/array_metrics.rb
53
+ - lib/array_metrics/common.rb
54
+ - lib/array_metrics/pearson_correlation.rb
55
+ - spec/array_metrics/common_spec.rb
56
+ - spec/array_metrics/pearson_correlation_spec.rb
57
+ - spec/spec_helper.rb
58
+ has_rdoc: true
59
+ homepage: http://github.com/panaggio/array_metrics
60
+ licenses: []
61
+
62
+ post_install_message:
63
+ rdoc_options: []
64
+
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ hash: 3
82
+ segments:
83
+ - 0
84
+ version: "0"
85
+ requirements: []
86
+
87
+ rubyforge_project:
88
+ rubygems_version: 1.4.1
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: A bundle of metrics for Arrays
92
+ test_files:
93
+ - spec/array_metrics/common_spec.rb
94
+ - spec/array_metrics/pearson_correlation_spec.rb
95
+ - spec/spec_helper.rb