pearson 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. checksums.yaml +7 -0
  2. data/.ruby-version +1 -0
  3. data/Gemfile +5 -0
  4. data/LICENSE.txt +22 -0
  5. data/lib/pearson.rb +108 -0
  6. metadata +61 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e0bbce86ff859e971101cf43995637d7d56d07f6
4
+ data.tar.gz: 72e1811067f3d044213e55d1336d74a2c1163606
5
+ SHA512:
6
+ metadata.gz: b4f9b2327f220781a24f3ab96f0087c1a7819b33a6fa866137d05773e932b83448bda278a8c9e202894f6a72c842386864a7ab10e62661bcf1b1eaf62643b7ed
7
+ data.tar.gz: d75ebd9be509a4aba48b2eb9d77574dc37aa46545c49cead8414f1b34cc187428a44d8068c525ad683f5e12b3604cfa5ee14df759dd08afce410165cc1419184
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.0.0-p247
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'http://rubygems.org'
2
+
3
+ group :development do
4
+ gem 'rspec', '>= 2.14.1'
5
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Alfonso Jiménez
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/lib/pearson.rb ADDED
@@ -0,0 +1,108 @@
1
+ module Pearson
2
+ class << self
3
+ # Calculates the pearson correlation coefficient between
4
+ # two entities
5
+ #
6
+ # @param [Hash] Hash containing entity-item scores
7
+ # @param [String] First entity
8
+ # @param [String] Second entity
9
+ #
10
+ # @return [Float] Coefficient
11
+ def coefficient(scores, entity1, entity2)
12
+ shared_items = shared_items(scores, entity1, entity2)
13
+
14
+ n = shared_items.length
15
+
16
+ return 0 if n == 0
17
+
18
+ sum1 = sum2 = sum1_sq = sum2_sq = psum = 0
19
+
20
+ shared_items.each_key do |item|
21
+ sum1 += scores[entity1][item]
22
+ sum2 += scores[entity2][item]
23
+
24
+ sum1_sq += scores[entity1][item]**2
25
+ sum2_sq += scores[entity2][item]**2
26
+
27
+ psum += scores[entity1][item]*scores[entity2][item]
28
+ end
29
+
30
+ num = psum - (sum1*sum2/n)
31
+ den = ((sum1_sq - (sum1**2)/n) * (sum2_sq - (sum2**2)/n)) ** 0.5
32
+
33
+ den == 0 ? 0 : num/den
34
+ end
35
+
36
+ # Returns the closest entities from a given entity. The distance
37
+ # between entities is based on the Pearson correlation coefficient
38
+ #
39
+ # @param [Hash] Hash containing entity-item scores
40
+ # @param [String] Entity
41
+ # @param [Hash] Options (limit)
42
+ #
43
+ # @return [Array] Top matches
44
+ def closest_entities(scores, entity, opts={})
45
+ sort_desc(scores, opts) do |h, k, v|
46
+ entity == k ? h : h.merge(k => coefficient(scores, entity, k))
47
+ end
48
+ end
49
+
50
+ # Returns the best recommended items for a given entity
51
+ #
52
+ # @param [Hash] Hash containing entity-item scores
53
+ # @param [String] Entity
54
+ # @param [Hash] Options (limit)
55
+ #
56
+ # @return [Array] Top matches [item, score]
57
+ def recommendations(scores, person, opts={})
58
+ totals = {}
59
+ similaritySums = {}
60
+
61
+ totals.default = 0
62
+ similaritySums.default = 0
63
+
64
+ fail PersonNotFound unless scores[person]
65
+
66
+ scores.each do |other_person|
67
+ next if other_person.first == person
68
+
69
+ similarity = coefficient(scores, person, other_person.first)
70
+
71
+ next if similarity <= 0
72
+
73
+ scores[other_person.first].each do |item, score|
74
+ if !scores[person].keys.include?(item) || scores[person][item] == 0
75
+ totals[item] += score * similarity
76
+ similaritySums[item] += similarity
77
+ end
78
+ end
79
+ end
80
+
81
+ sort_desc(totals, opts) {|h, k, v| h.merge(k => v/similaritySums[k]) }
82
+ end
83
+
84
+ private
85
+
86
+ # Returns a hash containing the shared items between two different entities
87
+ #
88
+ # @param [Hash] Hash containing entity-item scores
89
+ # @param [String] Entity
90
+ # @param [String] Entity
91
+ #
92
+ # @return [Hash] Common items
93
+ def shared_items(scores, entity1, entity2)
94
+ Hash[*(scores[entity1].keys & scores[entity2].keys).flat_map{|k| [k, 1]}]
95
+ end
96
+
97
+ def sort_desc(results, opts={})
98
+ limit = opts[:limit] || 3
99
+
100
+ results.reduce({}) do |h, (k, v)|
101
+ yield(h, k, v)
102
+ end.sort_by{|k, v| v}.reverse[0..(opts[:limit] || 3)-1]
103
+ end
104
+ end
105
+
106
+ class PersonNotFound < StandardError; end
107
+ end
108
+
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pearson
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Alfonso Jiménez
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-01-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 2.14.0
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 2.14.0
27
+ description: Pearson correlation coefficient calculator
28
+ email:
29
+ - yo@alfonsojimenez.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - Gemfile
35
+ - LICENSE.txt
36
+ - .ruby-version
37
+ - lib/pearson.rb
38
+ homepage: https://github.com/alfonsojimenez/pearson
39
+ licenses: []
40
+ metadata: {}
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - '>='
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ requirements: []
56
+ rubyforge_project:
57
+ rubygems_version: 2.0.3
58
+ signing_key:
59
+ specification_version: 4
60
+ summary: Pearson correlation coefficient calculator
61
+ test_files: []