array_metrics 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ pkg
data/Gemfile ADDED
@@ -0,0 +1 @@
1
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Ricardo Panaggio
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,45 @@
1
+ = array_metrics
2
+
3
+ A bundle of metrics for Arrays.
4
+
5
+ == Installation
6
+
7
+ gem install array_metrics
8
+
9
+ == Usage
10
+
11
+ Metrics (will) have a lot of distance and similarity metrics implemented
12
+ inside it.
13
+
14
+ For example, to calculate the Pearson Correlarion between two Arrays
15
+ <tt>arr1</tt> and <tt>arr2</tt>:
16
+
17
+ ArrayMetrics.cosine_similarity(arr1, arr2)
18
+
19
+ == Maintainer
20
+
21
+ * Ricardo Panaggio - http://panaggio.net
22
+
23
+ == License
24
+
25
+ (The MIT License)
26
+
27
+ Permission is hereby granted, free of charge, to any person obtaining
28
+ a copy of this software and associated documentation files (the
29
+ 'Software'), to deal in the Software without restriction, including
30
+ without limitation the rights to use, copy, modify, merge, publish,
31
+ distribute, sublicense, and/or sell copies of the Software, and to
32
+ permit persons to whom the Software is furnished to do so, subject to
33
+ the following conditions:
34
+
35
+ The above copyright notice and this permission notice shall be
36
+ included in all copies or substantial portions of the Software.
37
+
38
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
39
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
40
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
41
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
42
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
43
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
44
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
45
+
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ gemspec = eval(File.read(Dir["*.gemspec"].first))
2
+
3
+ desc "Validate the gemspec"
4
+ task :gemspec do
5
+ gemspec.validate
6
+ end
7
+
8
+ desc "Build gem locally"
9
+ task :build => :gemspec do
10
+ system "gem build #{gemspec.name}.gemspec"
11
+ FileUtils.mkdir_p "pkg"
12
+ FileUtils.mv "#{gemspec.name}-#{gemspec.version}.gem", "pkg"
13
+ end
14
+
15
+ desc "Install gem locally"
16
+ task :install => :build do
17
+ system "gem install pkg/#{gemspec.name}-#{gemspec.version}"
18
+ end
19
+
20
+ desc "Clean automatically generated files"
21
+ task :clean do
22
+ FileUtils.rm_rf "pkg"
23
+ end
24
+
25
+ require 'rspec/core/rake_task'
26
+ RSpec::Core::RakeTask.new
@@ -0,0 +1,17 @@
1
+ # coding: UTF-8
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "array_metrics"
5
+ s.version = "0.0.1"
6
+ s.platform = Gem::Platform::RUBY
7
+ s.authors = ["Ricardo Panaggio"]
8
+ s.email = ["panaggio.ricardo@gmail.com"]
9
+ s.homepage = "http://github.com/panaggio/array_metrics"
10
+ s.summary = "A bundle of metrics for Arrays"
11
+ s.description = "A bundle (not that much by now) of metrics for Arrays"
12
+ s.files = `git ls-files`.split("\n")
13
+ s.test_files = `git ls-files spec`.split("\n")
14
+ s.has_rdoc = true
15
+ s.require_path = 'lib'
16
+ s.add_runtime_dependency "rspec", "~> 2.0"
17
+ end
@@ -0,0 +1,12 @@
1
+ NaN = 0.0/0
2
+
3
+ class Array
4
+ def mean
5
+ return 0.0 if size==0
6
+ self.inject(0.0){|sum,e| sum+e}.to_f/self.size
7
+ end
8
+
9
+ def -@
10
+ self.map{ |e| if e.nil? then e else -e end }
11
+ end
12
+ end
@@ -0,0 +1,47 @@
1
+ module ArrayMetrics
2
+ # Calculates the Constraint Pearson Correlation between Arrays x and y.
3
+ # See Shardanand and Maes 1995 for more details.
4
+ #
5
+ # Shardanand, U. and Maes, P. Social information filtering: algorithms for
6
+ # automating "word of mouth". In Proceedings of the SIGCHI conference on
7
+ # Human factors in computing systems (CHI '95). ACM Press/Addison-Wesley
8
+ # Publishing Co., New York, NY, USA, 210-217.
9
+ # DOI=10.1145/223904.223931 http://dx.doi.org/10.1145/223904.223931
10
+ def self.constraint_pearson_correlation(x, y, m)
11
+ raise "array sizes don't match" if x.size != y.size
12
+
13
+ cx, cy = [], []
14
+ x.each_with_index do |xi, i|
15
+ next if xi.nil? or y[i].nil?
16
+ cx << xi
17
+ cy << y[i]
18
+ end
19
+
20
+ return NaN if cx.size == 0
21
+
22
+ s = sx = sy = 0.0
23
+ if m.nil?
24
+ cx_ = cx.mean
25
+ cy_ = cy.mean
26
+ else
27
+ cx_ = cy_ = m
28
+ end
29
+
30
+ cx.each_with_index do |cxi,i|
31
+ px = cxi-cx_
32
+ py = cy[i]-cy_
33
+ s += px*py
34
+ sx += px*px
35
+ sy += py*py
36
+ end
37
+
38
+ s/Math.sqrt(sx*sy)
39
+ end
40
+
41
+ # Calculates the Pearson Correlation between the Arrays x and x.
42
+ # See https://secure.wikimedia.org/wikipedia/en/wiki/Pearson_correlation
43
+ # for more details.
44
+ def self.pearson_correlation(x,y)
45
+ self.constraint_pearson_correlation(x,y,nil)
46
+ end
47
+ end
@@ -0,0 +1,2 @@
1
+ require "lib/array_metrics/common.rb"
2
+ require "lib/array_metrics/pearson_correlation.rb"
@@ -0,0 +1,29 @@
1
+ require File.join(File.dirname(__FILE__), "../spec_helper.rb")
2
+
3
+ describe Array do
4
+ describe "#mean" do
5
+ it "should return 0 when the array is empty" do
6
+ [].mean.should == 0.0
7
+ end
8
+
9
+ it "should calculate then mean value correctly" do
10
+ [1,2,3].mean.should == 2.0
11
+ [1,2,3,4,5].mean.should == 3.0
12
+ arr = [0.5,1.3,4.0/3,1.7/3.12]
13
+ arr.mean.should be_within(0.0001).of(arr.inject(0.0){|s,e| s+e}/arr.size)
14
+ end
15
+ end
16
+
17
+ describe "#-@" do
18
+ it "should return the same array when there are only nil's" do
19
+ arr = [nil,nil,nil,nil,nil]
20
+ (-arr).should == arr
21
+ end
22
+
23
+ it "should return inverted numeric values" do
24
+ (-[1,2,3,-1,-2,-3]).should == [-1,-2,-3,1,2,3]
25
+ (-[1.0,nil,0.0,-1.0]).should == [-1.0,nil,0.0,1.0]
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,194 @@
1
+ require File.join(File.dirname(__FILE__), "../spec_helper.rb")
2
+
3
+ describe ArrayMetrics do
4
+ arrays = [[1,2,3], [1,0,-1,nil,1000]]
5
+ opposite_pairs = [
6
+ [[1,2,3],[3,2,1]], [[1,2,3],[7,5,3]],
7
+ [[1,10,1,10,1],[-3,-30,-3,-30,-3]] ]
8
+ non_correlated_pairs = [ [[1,2,3],[1,2,1]] ]
9
+ generic_cases = [
10
+ [[1,2,3], [1,2,6], 5.0/Math.sqrt(2*14), 2, 5.0/Math.sqrt(2*17)],
11
+ [[1,2,3], [1,6,-1], -2.0/Math.sqrt(2*26), 2, -2.0/Math.sqrt(2*26)],
12
+ [[1,2,6], [1,6,-1], -11.0/Math.sqrt(14*26),3, -11.0/Math.sqrt(14*29)] ]
13
+
14
+ describe "#pearson_correlation" do
15
+ it "should return NaN when arrays have size less or equal to one" do
16
+ result = ArrayMetrics.pearson_correlation([],[])
17
+ result.should be_nan
18
+
19
+ result = ArrayMetrics.pearson_correlation([5],[1])
20
+ result.should be_nan
21
+ end
22
+
23
+ it "should return NaN when arrays have size less or equal to one, even after removing nils" do
24
+ result = ArrayMetrics.pearson_correlation([nil],[nil])
25
+ result.should be_nan
26
+
27
+ result = ArrayMetrics.pearson_correlation([nil,nil],[nil,nil])
28
+ result.should be_nan
29
+
30
+ result = ArrayMetrics.pearson_correlation([nil,1],[nil,5])
31
+ result.should be_nan
32
+ end
33
+
34
+ it "should return NaN when one of the arrays it homogeneous" do
35
+ result = ArrayMetrics.pearson_correlation([1,2,3],[1,1,1])
36
+ result.should be_nan
37
+ end
38
+
39
+ it "should raise an error when arrays size don't match" do
40
+ lambda do
41
+ ArrayMetrics.pearson_correlation([1,2],[1])
42
+ end.should raise_error(RuntimeError, "array sizes don't match")
43
+
44
+ lambda do
45
+ ArrayMetrics.pearson_correlation([1],[1,2])
46
+ end.should raise_error(RuntimeError, "array sizes don't match")
47
+ end
48
+
49
+ it "should return 1 when arrays are the same" do
50
+ arrays.each do |arr|
51
+ result = ArrayMetrics.pearson_correlation(arr,arr)
52
+ result.should == 1.0
53
+ end
54
+ end
55
+
56
+ it "should return -1 when arrays are the opposite from each other" do
57
+ arrays.each do |arr|
58
+ result = ArrayMetrics.pearson_correlation(arr,-arr)
59
+ result.should == -1.0
60
+ end
61
+
62
+ opposite_pairs.each do |arr1, arr2|
63
+ result = ArrayMetrics.pearson_correlation(arr1,arr2)
64
+ result.should == -1.0
65
+ end
66
+ end
67
+
68
+ it "should return 0 if arrays are not correlated" do
69
+ non_correlated_pairs.each do |arr1, arr2|
70
+ result = ArrayMetrics.pearson_correlation(arr1,arr2)
71
+ result.should == 0.0
72
+ end
73
+ end
74
+
75
+ it "should calculate generic cases correctly" do
76
+ generic_cases.each do |arr1, arr2, response, _, _|
77
+ result = ArrayMetrics.pearson_correlation(arr1,arr2)
78
+ result.should be_within(0.0001).of(response)
79
+ end
80
+ end
81
+
82
+ it "should return the same value if arrays are switched (commutative property)" do
83
+ generic_cases.each do |arr1, arr2, response, _, _|
84
+ result = ArrayMetrics.pearson_correlation(arr2,arr1)
85
+ result.should be_within(0.0001).of(response)
86
+ end
87
+ end
88
+
89
+ it "should return the same value if a constant is added to the arrays" do
90
+ generic_cases.each do |arr1, arr2, response, _, _|
91
+ result = ArrayMetrics.pearson_correlation(
92
+ arr1.map{|e| e+1},arr2.map{|e| e+1})
93
+ result.should be_within(0.0001).of(response)
94
+ end
95
+ end
96
+
97
+ it "should use only positions that are both not nil in both arrays" do
98
+ r1 = ArrayMetrics.pearson_correlation([1,2,nil,5,nil,4], [1,nil,3,4,nil,3])
99
+ r2 = ArrayMetrics.pearson_correlation([1,5,4], [1,4,3])
100
+ r1.should be_within(0.0001).of(r2)
101
+ end
102
+ end
103
+
104
+ describe "#constraint_pearson_correlation" do
105
+ it "should return NaN when arrays have size equal to zero" do
106
+ result = ArrayMetrics.constraint_pearson_correlation([],[],3)
107
+ result.should be_nan
108
+ end
109
+
110
+ it "should return only 1, -1 or NaN when arrays have size equal to one" do
111
+ result = ArrayMetrics.constraint_pearson_correlation([5],[1],3)
112
+ result.should == -1.0
113
+
114
+ result = ArrayMetrics.constraint_pearson_correlation([1],[2],3)
115
+ result.should == 1.0
116
+
117
+ result = ArrayMetrics.constraint_pearson_correlation([2],[2],2)
118
+ result.should be_nan
119
+ end
120
+
121
+ it "should return NaN when arrays have size equal to one, even after removing nils" do
122
+ result = ArrayMetrics.constraint_pearson_correlation([nil],[nil],2)
123
+ result.should be_nan
124
+
125
+ result = ArrayMetrics.constraint_pearson_correlation([nil,nil],[nil,nil],3)
126
+ result.should be_nan
127
+ end
128
+
129
+ it "should return NaN when one of the arrays is homogeneous and its mean equals to m" do
130
+ result = ArrayMetrics.constraint_pearson_correlation([1,2,3],[1,1,1],1)
131
+ result.should be_nan
132
+ end
133
+
134
+ it "should raise an error when arrays size don't match" do
135
+ lambda do
136
+ ArrayMetrics.constraint_pearson_correlation([1,2],[1],3)
137
+ end.should raise_error(RuntimeError, "array sizes don't match")
138
+
139
+ lambda do
140
+ ArrayMetrics.constraint_pearson_correlation([1],[1,2],3)
141
+ end.should raise_error(RuntimeError, "array sizes don't match")
142
+ end
143
+
144
+ it "should return 1 when arrays are the same, even with different m's" do
145
+ arrays.each do |arr|
146
+ (0..10).to_a.each do |m|
147
+ result = ArrayMetrics.constraint_pearson_correlation(arr,arr,m)
148
+ result.should == 1.0
149
+ end
150
+ end
151
+ end
152
+
153
+ it "should return -1 when arrays are the opposite from each, using m as the mean" do
154
+ result = ArrayMetrics.constraint_pearson_correlation([1,2,3],[3,2,1],2)
155
+ result.should == -1.0
156
+ end
157
+
158
+ it "should return 0 if arrays are not correlated" do
159
+ result = ArrayMetrics.constraint_pearson_correlation([1,2,3],[1,2,1],2)
160
+ result.should == 0.0
161
+ end
162
+
163
+ it "should calculate generic cases correctly" do
164
+ generic_cases.each do |arr1, arr2, _, m, response|
165
+ result = ArrayMetrics.constraint_pearson_correlation(arr1,arr2,m)
166
+ result.should be_within(0.0001).of(response)
167
+ end
168
+ end
169
+
170
+ it "should return the same value if arrays are switched (commutative property)" do
171
+ generic_cases.each do |arr1, arr2, _, m, response|
172
+ result = ArrayMetrics.constraint_pearson_correlation(arr2,arr1,m)
173
+ result.should be_within(0.0001).of(response)
174
+ end
175
+ end
176
+
177
+ it "should return the same value if a constant is added to the arrays" do
178
+ generic_cases.each do |arr1, arr2, _, m, response|
179
+ result = ArrayMetrics.constraint_pearson_correlation(
180
+ arr1.map{|e| e+1},arr2.map{|e| e+1},m+1)
181
+ result.should be_within(0.0001).of(response)
182
+ end
183
+ end
184
+
185
+ it "should use only positions that are both not nil in both arrays" do
186
+ r1 = ArrayMetrics.constraint_pearson_correlation(
187
+ [1,2,nil,5,nil,4], [1,nil,3,4,nil,3],3)
188
+ r2 = ArrayMetrics.constraint_pearson_correlation(
189
+ [1,5,4], [1,4,3],3)
190
+ r1.should be_within(0.0001).of(r2)
191
+ end
192
+ end
193
+ end
194
+
@@ -0,0 +1,3 @@
1
+ require "rubygems"
2
+ require "rspec"
3
+ require File.join(File.dirname(__FILE__), "../lib/array_metrics.rb" )
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: array_metrics
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Ricardo Panaggio
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-01-11 00:00:00 -02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rspec
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 2
32
+ - 0
33
+ version: "2.0"
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ description: A bundle (not that much by now) of metrics for Arrays
37
+ email:
38
+ - panaggio.ricardo@gmail.com
39
+ executables: []
40
+
41
+ extensions: []
42
+
43
+ extra_rdoc_files: []
44
+
45
+ files:
46
+ - .gitignore
47
+ - Gemfile
48
+ - LICENSE
49
+ - README.rdoc
50
+ - Rakefile
51
+ - array_metrics.gemspec
52
+ - lib/array_metrics.rb
53
+ - lib/array_metrics/common.rb
54
+ - lib/array_metrics/pearson_correlation.rb
55
+ - spec/array_metrics/common_spec.rb
56
+ - spec/array_metrics/pearson_correlation_spec.rb
57
+ - spec/spec_helper.rb
58
+ has_rdoc: true
59
+ homepage: http://github.com/panaggio/array_metrics
60
+ licenses: []
61
+
62
+ post_install_message:
63
+ rdoc_options: []
64
+
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ hash: 3
82
+ segments:
83
+ - 0
84
+ version: "0"
85
+ requirements: []
86
+
87
+ rubyforge_project:
88
+ rubygems_version: 1.4.1
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: A bundle of metrics for Arrays
92
+ test_files:
93
+ - spec/array_metrics/common_spec.rb
94
+ - spec/array_metrics/pearson_correlation_spec.rb
95
+ - spec/spec_helper.rb