pearson 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (6) hide show
  1. checksums.yaml +7 -0
  2. data/.ruby-version +1 -0
  3. data/Gemfile +5 -0
  4. data/LICENSE.txt +22 -0
  5. data/lib/pearson.rb +108 -0
  6. metadata +61 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e0bbce86ff859e971101cf43995637d7d56d07f6
4
+ data.tar.gz: 72e1811067f3d044213e55d1336d74a2c1163606
5
+ SHA512:
6
+ metadata.gz: b4f9b2327f220781a24f3ab96f0087c1a7819b33a6fa866137d05773e932b83448bda278a8c9e202894f6a72c842386864a7ab10e62661bcf1b1eaf62643b7ed
7
+ data.tar.gz: d75ebd9be509a4aba48b2eb9d77574dc37aa46545c49cead8414f1b34cc187428a44d8068c525ad683f5e12b3604cfa5ee14df759dd08afce410165cc1419184
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.0.0-p247
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'http://rubygems.org'
2
+
3
+ group :development do
4
+ gem 'rspec', '>= 2.14.1'
5
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Alfonso Jiménez
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/lib/pearson.rb ADDED
@@ -0,0 +1,108 @@
1
+ module Pearson
2
+ class << self
3
+ # Calculates the pearson correlation coefficient between
4
+ # two entities
5
+ #
6
+ # @param [Hash] Hash containing entity-item scores
7
+ # @param [String] First entity
8
+ # @param [String] Second entity
9
+ #
10
+ # @return [Float] Coefficient
11
+ def coefficient(scores, entity1, entity2)
12
+ shared_items = shared_items(scores, entity1, entity2)
13
+
14
+ n = shared_items.length
15
+
16
+ return 0 if n == 0
17
+
18
+ sum1 = sum2 = sum1_sq = sum2_sq = psum = 0
19
+
20
+ shared_items.each_key do |item|
21
+ sum1 += scores[entity1][item]
22
+ sum2 += scores[entity2][item]
23
+
24
+ sum1_sq += scores[entity1][item]**2
25
+ sum2_sq += scores[entity2][item]**2
26
+
27
+ psum += scores[entity1][item]*scores[entity2][item]
28
+ end
29
+
30
+ num = psum - (sum1*sum2/n)
31
+ den = ((sum1_sq - (sum1**2)/n) * (sum2_sq - (sum2**2)/n)) ** 0.5
32
+
33
+ den == 0 ? 0 : num/den
34
+ end
35
+
36
+ # Returns the closest entities from a given entity. The distance
37
+ # between entities is based on the Pearson correlation coefficient
38
+ #
39
+ # @param [Hash] Hash containing entity-item scores
40
+ # @param [String] Entity
41
+ # @param [Hash] Options (limit)
42
+ #
43
+ # @return [Array] Top matches
44
+ def closest_entities(scores, entity, opts={})
45
+ sort_desc(scores, opts) do |h, k, v|
46
+ entity == k ? h : h.merge(k => coefficient(scores, entity, k))
47
+ end
48
+ end
49
+
50
+ # Returns the best recommended items for a given entity
51
+ #
52
+ # @param [Hash] Hash containing entity-item scores
53
+ # @param [String] Entity
54
+ # @param [Hash] Options (limit)
55
+ #
56
+ # @return [Array] Top matches [item, score]
57
+ def recommendations(scores, person, opts={})
58
+ totals = {}
59
+ similaritySums = {}
60
+
61
+ totals.default = 0
62
+ similaritySums.default = 0
63
+
64
+ fail PersonNotFound unless scores[person]
65
+
66
+ scores.each do |other_person|
67
+ next if other_person.first == person
68
+
69
+ similarity = coefficient(scores, person, other_person.first)
70
+
71
+ next if similarity <= 0
72
+
73
+ scores[other_person.first].each do |item, score|
74
+ if !scores[person].keys.include?(item) || scores[person][item] == 0
75
+ totals[item] += score * similarity
76
+ similaritySums[item] += similarity
77
+ end
78
+ end
79
+ end
80
+
81
+ sort_desc(totals, opts) {|h, k, v| h.merge(k => v/similaritySums[k]) }
82
+ end
83
+
84
+ private
85
+
86
+ # Returns a hash containing the shared items between two different entities
87
+ #
88
+ # @param [Hash] Hash containing entity-item scores
89
+ # @param [String] Entity
90
+ # @param [String] Entity
91
+ #
92
+ # @return [Hash] Common items
93
+ def shared_items(scores, entity1, entity2)
94
+ Hash[*(scores[entity1].keys & scores[entity2].keys).flat_map{|k| [k, 1]}]
95
+ end
96
+
97
+ def sort_desc(results, opts={})
98
+ limit = opts[:limit] || 3
99
+
100
+ results.reduce({}) do |h, (k, v)|
101
+ yield(h, k, v)
102
+ end.sort_by{|k, v| v}.reverse[0..(opts[:limit] || 3)-1]
103
+ end
104
+ end
105
+
106
+ class PersonNotFound < StandardError; end
107
+ end
108
+
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pearson
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Alfonso Jiménez
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-01-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 2.14.0
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 2.14.0
27
+ description: Pearson correlation coefficient calculator
28
+ email:
29
+ - yo@alfonsojimenez.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - Gemfile
35
+ - LICENSE.txt
36
+ - .ruby-version
37
+ - lib/pearson.rb
38
+ homepage: https://github.com/alfonsojimenez/pearson
39
+ licenses: []
40
+ metadata: {}
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - '>='
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ requirements: []
56
+ rubyforge_project:
57
+ rubygems_version: 2.0.3
58
+ signing_key:
59
+ specification_version: 4
60
+ summary: Pearson correlation coefficient calculator
61
+ test_files: []