tyler-collaborative_filter 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +686 -0
- data/README.textile +109 -0
- data/Rakefile +1 -0
- data/lib/boosters/simple_booster.rb +137 -0
- data/lib/collaborative_filter.rb +43 -0
- data/lib/collaborative_filter/config.rb +58 -0
- data/lib/collaborative_filter/content_booster.rb +51 -0
- data/lib/collaborative_filter/data_set.rb +75 -0
- data/lib/collaborative_filter/output.rb +13 -0
- data/lib/collaborative_filter/output/mysql_adapter.rb +42 -0
- data/lib/collaborative_filter/output/yaml_adapter.rb +13 -0
- data/lib/correlators/simple_svd.rb +55 -0
- data/lib/recommenders/simplest_recommender.rb +92 -0
- metadata +66 -0
@@ -0,0 +1,13 @@
|
|
1
|
+
class CollaborativeFilter
|
2
|
+
class Output
|
3
|
+
def self.store(options, recommendations)
|
4
|
+
@@adapters[options[:type]].new(options[:options], recommendations)
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.register(name, class_name)
|
8
|
+
@@adapters ||= {}
|
9
|
+
@@adapters[name] = class_name
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class CollaborativeFilter
|
2
|
+
class Output
|
3
|
+
class SqlAdapter
|
4
|
+
CollaborativeFilter::Output.register :sql, self
|
5
|
+
|
6
|
+
def initialize(options, recommendations)
|
7
|
+
setup_mapping options[:mapping] || {}
|
8
|
+
recommendations.each do |user_id, recs|
|
9
|
+
next if recs.empty?
|
10
|
+
ActiveRecord::Base.connection.execute \
|
11
|
+
"INSERT INTO #{options[:table_name]} (#{@mapping_values.join(',')}) VALUES #{records_to_sql(user_id, recs)}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def setup_mapping(config_mapping)
|
16
|
+
@mapping = { :user_id => :user_id,
|
17
|
+
:user_type => nil,
|
18
|
+
:item_id => :item_id,
|
19
|
+
:item_type => :item_type,
|
20
|
+
:score => :score }
|
21
|
+
@mapping.merge!(config_mapping)
|
22
|
+
@mapping.each { |k,v| @mapping.delete(k) unless v }
|
23
|
+
ma = @mapping.to_a
|
24
|
+
@mapping_keys = ma.map(&:first)
|
25
|
+
@mapping_values = ma.map(&:last)
|
26
|
+
end
|
27
|
+
|
28
|
+
def records_to_sql(user_id, recs)
|
29
|
+
recs.map { |item_id, score|
|
30
|
+
data = {}
|
31
|
+
data[:user_id] = user_id
|
32
|
+
data[:user_id], data[:user_type] = data[:user_id] if data[:user_id].is_a?(Array)
|
33
|
+
data[:item_id] = item_id
|
34
|
+
data[:item_id], data[:item_type] = data[:item_id] if data[:item_id].is_a?(Array)
|
35
|
+
data[:score] = score
|
36
|
+
|
37
|
+
'(' + @mapping_keys.map { |key| "'#{data[key]}'" }.join(',') + ')'
|
38
|
+
}.join(',')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class CollaborativeFilter
|
2
|
+
class Output
|
3
|
+
class YamlAdapter
|
4
|
+
CollaborativeFilter::Output.register :yaml, self
|
5
|
+
|
6
|
+
def initialize(options, recommendations)
|
7
|
+
require 'yaml'
|
8
|
+
filename = options[:filename] || 'recommendations.yml'
|
9
|
+
File.open(filename,'w') { |f| f << recommendations.to_yaml }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# A correlator helps us find users who are similar to each other. There are
|
2
|
+
# a crapton of ways to accomplish this. In this case we're using a
|
3
|
+
# singular-value-decomposition algorithm. In essence, we decompose the matrix
|
4
|
+
# of user-item nodes (where nodes are rating, purchases, etc) into two matrices
|
5
|
+
# U and V, and their singular values S. We take the first two columns of
|
6
|
+
# V-transpose and plot them in 2-dimensional space as if the corresponding
|
7
|
+
# entries in the columns were X and Y coordinates. This will clump the users
|
8
|
+
# into groups. A simple, and moderately accurate, way to find those groups
|
9
|
+
# is to find the cosine similarities of the different users.
|
10
|
+
#
|
11
|
+
# So the correlator takes a sparse matrix, a users array, an items array, and
|
12
|
+
# options. It outputs a hash that looks like...
|
13
|
+
#
|
14
|
+
# { user_id => [[cos_sim, sim_user_1], [cos_sim, sim_user_2], ...] }
|
15
|
+
class CollaborativeFilter
|
16
|
+
class SimpleSvd
|
17
|
+
def run(matrix,users,items,options)
|
18
|
+
qty = 0
|
19
|
+
|
20
|
+
u,v,s = matrix.svd
|
21
|
+
|
22
|
+
# we use the transpose of the V matrix
|
23
|
+
xs,ys = [v.transpose.col(0).to_a, v.transpose.col(1).to_a]
|
24
|
+
|
25
|
+
# precompute some of the terms from the cos. sim function. thanks pete!
|
26
|
+
precomputes = []
|
27
|
+
xs.each_index { |i| precomputes << Math.sqrt((xs[i] * xs[i]) + (ys[i] * ys[i])) }
|
28
|
+
|
29
|
+
similar_users = {}
|
30
|
+
# compute the similarities between each user and each other user currently this is O(n^2)...
|
31
|
+
# there is one major improvement that could be made to it... which is to cache the results
|
32
|
+
xs.each_index do |user_idx|
|
33
|
+
x1, y1 = xs[user_idx], ys[user_idx]
|
34
|
+
sims = []
|
35
|
+
xs.each_index do |target_idx|
|
36
|
+
next if user_idx == target_idx
|
37
|
+
x2, y2 = xs[target_idx], ys[target_idx]
|
38
|
+
|
39
|
+
# compute the cosine similarity between user and target
|
40
|
+
sim = ((x1 * x2) + (y1 * y2)) / (precomputes[user_idx] * precomputes[target_idx])
|
41
|
+
|
42
|
+
sims << [target_idx, sim] if sim >= options[:cosine_similarity]
|
43
|
+
end
|
44
|
+
|
45
|
+
x = sims.sort_by(&:last).reverse[0, (options[:max_similar_users] || sims.size)]
|
46
|
+
qty += x.size
|
47
|
+
similar_users[user_idx] = x
|
48
|
+
end
|
49
|
+
|
50
|
+
CollaborativeFilter.log " Average sims per user: #{qty.to_f / similar_users.size}"
|
51
|
+
similar_users
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
@@ -0,0 +1,92 @@
|
|
1
|
+
class CollaborativeFilter
|
2
|
+
# Given any number of similarity hashes of a particular form recommend Items
|
3
|
+
# for Users. Weights according to cosine similarity of the recommendation and
|
4
|
+
# the cosine similarity threshold.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
# Threshold is set to 0.9. This particular recommendation is 0.95.
|
8
|
+
# 1.0 - 0.9 = 0.1
|
9
|
+
# 0.95 - 0.9 = 0.5
|
10
|
+
# 0.5 / 0.1 = 0.5 = 50%
|
11
|
+
# So the 0.95 rec would be worth 50%.
|
12
|
+
#
|
13
|
+
# The purpose of this of course, is for the case when you are similar to multiple
|
14
|
+
# users who have rated a certain item differently. If you are highly correlated to
|
15
|
+
# Bob, and slightly correlated to Joe... and Bob rated X as 5 stars, and Joe rated
|
16
|
+
# X as 2 stars... Bob's rating should carry more weight in determining your
|
17
|
+
# recommendation.
|
18
|
+
#
|
19
|
+
# Sim hashes look like: { (user_identifier) => [[(closeness),(user_identifier)], ...] }
|
20
|
+
#
|
21
|
+
# Input:
|
22
|
+
# Array of DataSet objects, with #similarities populated
|
23
|
+
#
|
24
|
+
# Output:
|
25
|
+
# Array in the form:
|
26
|
+
# [ [ (user id), [ [ (item id), (score) ], ... ] ], ... ]
|
27
|
+
class SimplestRecommender
|
28
|
+
def run(datasets, options)
|
29
|
+
options[:threshold] ||= 4.2
|
30
|
+
|
31
|
+
datasets.inject({}) { |ratings,(name,ds)|
|
32
|
+
mult = 1.0 - ds.options[:cosine_similarity]
|
33
|
+
ds.similarities.each do |user_idx,sim_list|
|
34
|
+
ratings[ds.users[user_idx]] ||= {}
|
35
|
+
blacklist = generate_blacklist(user_idx,ds)
|
36
|
+
sim_list.each do |sim_idx,similarity|
|
37
|
+
# grab the list of the similar users' item ratings
|
38
|
+
ds.m.col(sim_idx).to_a.each_with_index do |score,item_idx|
|
39
|
+
next if score == 0 || blacklist.include?(item_idx)
|
40
|
+
|
41
|
+
# need to use the item_id instead of idx so the content booster can find
|
42
|
+
# its own index of it.
|
43
|
+
item_id = ds.items[item_idx]
|
44
|
+
|
45
|
+
ratings[ds.users[user_idx]][item_id] ||= []
|
46
|
+
ratings[ds.users[user_idx]][item_id] << [score, (similarity - ds.options[:cosine_similarity]) * mult]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
ratings
|
51
|
+
}.map { |c,rlists|
|
52
|
+
averaged_ratings = rlists.map { |i,rs|
|
53
|
+
score_sum, sim_sum = rs.inject([0,0]) { |sums,(score,similarity)| [sums.first + score, sums.last + similarity] }
|
54
|
+
[i, score_sum / sim_sum]
|
55
|
+
}.select { |k,v|
|
56
|
+
v >= options[:threshold]
|
57
|
+
}.sort { |(k1,v1),(k2,v2)| v2 <=> v1 }[0,options[:max_per_user]]
|
58
|
+
[c, averaged_ratings]
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
62
|
+
def generate_blacklist(user_idx,ds)
|
63
|
+
blacklist = []
|
64
|
+
ratings = ds.m.col(user_idx).to_a
|
65
|
+
ds.items.each_index { |idx| blacklist << idx if ratings[idx] != 0 }
|
66
|
+
blacklist
|
67
|
+
end
|
68
|
+
|
69
|
+
# We don't want to recommend things that people have already rated, purchased, or subscribed to.
|
70
|
+
# Not used at the moment
|
71
|
+
def generate_blacklists(ds)
|
72
|
+
blacklists = []
|
73
|
+
ds.users.each_with_index do |user_id, user_idx|
|
74
|
+
blacklist = []
|
75
|
+
ds.m.col(user_idx).to_a.each_with_index { |r,i| blacklist << ds.items[i] if r == 0 }
|
76
|
+
|
77
|
+
#user = Customer.find(user_id)
|
78
|
+
|
79
|
+
#user.subscription_list &&
|
80
|
+
# user.subscription_list.subscriptions.each { |sub| blacklist << [sub.subscribable_id, sub.subscribable_type] }
|
81
|
+
|
82
|
+
#user.orders.map(&:line_items).flatten.each do |li|
|
83
|
+
# blacklist << [li.product_id, li.product_type]
|
84
|
+
# blacklist << [li.product.title_id, 'Title'] if li.product.respond_to?(:title)
|
85
|
+
#end
|
86
|
+
blacklists << blacklist
|
87
|
+
end
|
88
|
+
blacklists
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tyler-collaborative_filter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Tyler McMullen
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-10-09 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: A Collaborative Filtering framework in Ruby.
|
17
|
+
email: tbmcmullen@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- README.textile
|
26
|
+
- LICENSE
|
27
|
+
- Rakefile
|
28
|
+
- lib/boosters/simple_booster.rb
|
29
|
+
- lib/collaborative_filter/config.rb
|
30
|
+
- lib/collaborative_filter/content_booster.rb
|
31
|
+
- lib/collaborative_filter/data_set.rb
|
32
|
+
- lib/collaborative_filter/output
|
33
|
+
- lib/collaborative_filter/output/mysql_adapter.rb
|
34
|
+
- lib/collaborative_filter/output/yaml_adapter.rb
|
35
|
+
- lib/collaborative_filter/output.rb
|
36
|
+
- lib/collaborative_filter.rb
|
37
|
+
- lib/correlators/simple_svd.rb
|
38
|
+
- lib/recommenders/simplest_recommender.rb
|
39
|
+
has_rdoc: false
|
40
|
+
homepage: http://github.com/tyler/collaborative_filter
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
version:
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "0"
|
57
|
+
version:
|
58
|
+
requirements: []
|
59
|
+
|
60
|
+
rubyforge_project:
|
61
|
+
rubygems_version: 1.2.0
|
62
|
+
signing_key:
|
63
|
+
specification_version: 2
|
64
|
+
summary: A Collaborative Filtering framework in Ruby.
|
65
|
+
test_files: []
|
66
|
+
|