tyler-collaborative_filter 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +686 -0
- data/README.textile +109 -0
- data/Rakefile +1 -0
- data/lib/boosters/simple_booster.rb +137 -0
- data/lib/collaborative_filter.rb +43 -0
- data/lib/collaborative_filter/config.rb +58 -0
- data/lib/collaborative_filter/content_booster.rb +51 -0
- data/lib/collaborative_filter/data_set.rb +75 -0
- data/lib/collaborative_filter/output.rb +13 -0
- data/lib/collaborative_filter/output/mysql_adapter.rb +42 -0
- data/lib/collaborative_filter/output/yaml_adapter.rb +13 -0
- data/lib/correlators/simple_svd.rb +55 -0
- data/lib/recommenders/simplest_recommender.rb +92 -0
- metadata +66 -0
@@ -0,0 +1,13 @@
|
|
1
|
+
class CollaborativeFilter
|
2
|
+
class Output
|
3
|
+
def self.store(options, recommendations)
|
4
|
+
@@adapters[options[:type]].new(options[:options], recommendations)
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.register(name, class_name)
|
8
|
+
@@adapters ||= {}
|
9
|
+
@@adapters[name] = class_name
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class CollaborativeFilter
|
2
|
+
class Output
|
3
|
+
class SqlAdapter
|
4
|
+
CollaborativeFilter::Output.register :sql, self
|
5
|
+
|
6
|
+
def initialize(options, recommendations)
|
7
|
+
setup_mapping options[:mapping] || {}
|
8
|
+
recommendations.each do |user_id, recs|
|
9
|
+
next if recs.empty?
|
10
|
+
ActiveRecord::Base.connection.execute \
|
11
|
+
"INSERT INTO #{options[:table_name]} (#{@mapping_values.join(',')}) VALUES #{records_to_sql(user_id, recs)}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def setup_mapping(config_mapping)
|
16
|
+
@mapping = { :user_id => :user_id,
|
17
|
+
:user_type => nil,
|
18
|
+
:item_id => :item_id,
|
19
|
+
:item_type => :item_type,
|
20
|
+
:score => :score }
|
21
|
+
@mapping.merge!(config_mapping)
|
22
|
+
@mapping.each { |k,v| @mapping.delete(k) unless v }
|
23
|
+
ma = @mapping.to_a
|
24
|
+
@mapping_keys = ma.map(&:first)
|
25
|
+
@mapping_values = ma.map(&:last)
|
26
|
+
end
|
27
|
+
|
28
|
+
def records_to_sql(user_id, recs)
|
29
|
+
recs.map { |item_id, score|
|
30
|
+
data = {}
|
31
|
+
data[:user_id] = user_id
|
32
|
+
data[:user_id], data[:user_type] = data[:user_id] if data[:user_id].is_a?(Array)
|
33
|
+
data[:item_id] = item_id
|
34
|
+
data[:item_id], data[:item_type] = data[:item_id] if data[:item_id].is_a?(Array)
|
35
|
+
data[:score] = score
|
36
|
+
|
37
|
+
'(' + @mapping_keys.map { |key| "'#{data[key]}'" }.join(',') + ')'
|
38
|
+
}.join(',')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class CollaborativeFilter
|
2
|
+
class Output
|
3
|
+
class YamlAdapter
|
4
|
+
CollaborativeFilter::Output.register :yaml, self
|
5
|
+
|
6
|
+
def initialize(options, recommendations)
|
7
|
+
require 'yaml'
|
8
|
+
filename = options[:filename] || 'recommendations.yml'
|
9
|
+
File.open(filename,'w') { |f| f << recommendations.to_yaml }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# A correlator helps us find users who are similar to each other. There are
|
2
|
+
# a crapton of ways to accomplish this. In this case we're using a
|
3
|
+
# singular-value-decomposition algorithm. In essence, we decompose the matrix
|
4
|
+
# of user-item nodes (where nodes are rating, purchases, etc) into two matrices
|
5
|
+
# U and V, and their singular values S. We take the first two columns of
|
6
|
+
# V-transpose and plot them in 2-dimensional space as if the corresponding
|
7
|
+
# entries in the columns were X and Y coordinates. This will clump the users
|
8
|
+
# into groups. A simple, and moderately accurate, way to find those groups
|
9
|
+
# is to find the cosine similarities of the different users.
|
10
|
+
#
|
11
|
+
# So the correlator takes a sparse matrix, a users array, an items array, and
|
12
|
+
# options. It outputs a hash that looks like...
|
13
|
+
#
|
14
|
+
# { user_id => [[cos_sim, sim_user_1], [cos_sim, sim_user_2], ...] }
|
15
|
+
class CollaborativeFilter
|
16
|
+
class SimpleSvd
|
17
|
+
def run(matrix,users,items,options)
|
18
|
+
qty = 0
|
19
|
+
|
20
|
+
u,v,s = matrix.svd
|
21
|
+
|
22
|
+
# we use the transpose of the V matrix
|
23
|
+
xs,ys = [v.transpose.col(0).to_a, v.transpose.col(1).to_a]
|
24
|
+
|
25
|
+
# precompute some of the terms from the cos. sim function. thanks pete!
|
26
|
+
precomputes = []
|
27
|
+
xs.each_index { |i| precomputes << Math.sqrt((xs[i] * xs[i]) + (ys[i] * ys[i])) }
|
28
|
+
|
29
|
+
similar_users = {}
|
30
|
+
# compute the similarities between each user and each other user currently this is O(n^2)...
|
31
|
+
# there is one major improvement that could be made to it... which is to cache the results
|
32
|
+
xs.each_index do |user_idx|
|
33
|
+
x1, y1 = xs[user_idx], ys[user_idx]
|
34
|
+
sims = []
|
35
|
+
xs.each_index do |target_idx|
|
36
|
+
next if user_idx == target_idx
|
37
|
+
x2, y2 = xs[target_idx], ys[target_idx]
|
38
|
+
|
39
|
+
# compute the cosine similarity between user and target
|
40
|
+
sim = ((x1 * x2) + (y1 * y2)) / (precomputes[user_idx] * precomputes[target_idx])
|
41
|
+
|
42
|
+
sims << [target_idx, sim] if sim >= options[:cosine_similarity]
|
43
|
+
end
|
44
|
+
|
45
|
+
x = sims.sort_by(&:last).reverse[0, (options[:max_similar_users] || sims.size)]
|
46
|
+
qty += x.size
|
47
|
+
similar_users[user_idx] = x
|
48
|
+
end
|
49
|
+
|
50
|
+
CollaborativeFilter.log " Average sims per user: #{qty.to_f / similar_users.size}"
|
51
|
+
similar_users
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
@@ -0,0 +1,92 @@
|
|
1
|
+
class CollaborativeFilter
|
2
|
+
# Given any number of similarity hashes of a particular form recommend Items
|
3
|
+
# for Users. Weights according to cosine similarity of the recommendation and
|
4
|
+
# the cosine similarity threshold.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
# Threshold is set to 0.9. This particular recommendation is 0.95.
|
8
|
+
# 1.0 - 0.9 = 0.1
|
9
|
+
# 0.95 - 0.9 = 0.5
|
10
|
+
# 0.5 / 0.1 = 0.5 = 50%
|
11
|
+
# So the 0.95 rec would be worth 50%.
|
12
|
+
#
|
13
|
+
# The purpose of this of course, is for the case when you are similar to multiple
|
14
|
+
# users who have rated a certain item differently. If you are highly correlated to
|
15
|
+
# Bob, and slightly correlated to Joe... and Bob rated X as 5 stars, and Joe rated
|
16
|
+
# X as 2 stars... Bob's rating should carry more weight in determining your
|
17
|
+
# recommendation.
|
18
|
+
#
|
19
|
+
# Sim hashes look like: { (user_identifier) => [[(closeness),(user_identifier)], ...] }
|
20
|
+
#
|
21
|
+
# Input:
|
22
|
+
# Array of DataSet objects, with #similarities populated
|
23
|
+
#
|
24
|
+
# Output:
|
25
|
+
# Array in the form:
|
26
|
+
# [ [ (user id), [ [ (item id), (score) ], ... ] ], ... ]
|
27
|
+
class SimplestRecommender
|
28
|
+
def run(datasets, options)
|
29
|
+
options[:threshold] ||= 4.2
|
30
|
+
|
31
|
+
datasets.inject({}) { |ratings,(name,ds)|
|
32
|
+
mult = 1.0 - ds.options[:cosine_similarity]
|
33
|
+
ds.similarities.each do |user_idx,sim_list|
|
34
|
+
ratings[ds.users[user_idx]] ||= {}
|
35
|
+
blacklist = generate_blacklist(user_idx,ds)
|
36
|
+
sim_list.each do |sim_idx,similarity|
|
37
|
+
# grab the list of the similar users' item ratings
|
38
|
+
ds.m.col(sim_idx).to_a.each_with_index do |score,item_idx|
|
39
|
+
next if score == 0 || blacklist.include?(item_idx)
|
40
|
+
|
41
|
+
# need to use the item_id instead of idx so the content booster can find
|
42
|
+
# its own index of it.
|
43
|
+
item_id = ds.items[item_idx]
|
44
|
+
|
45
|
+
ratings[ds.users[user_idx]][item_id] ||= []
|
46
|
+
ratings[ds.users[user_idx]][item_id] << [score, (similarity - ds.options[:cosine_similarity]) * mult]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
ratings
|
51
|
+
}.map { |c,rlists|
|
52
|
+
averaged_ratings = rlists.map { |i,rs|
|
53
|
+
score_sum, sim_sum = rs.inject([0,0]) { |sums,(score,similarity)| [sums.first + score, sums.last + similarity] }
|
54
|
+
[i, score_sum / sim_sum]
|
55
|
+
}.select { |k,v|
|
56
|
+
v >= options[:threshold]
|
57
|
+
}.sort { |(k1,v1),(k2,v2)| v2 <=> v1 }[0,options[:max_per_user]]
|
58
|
+
[c, averaged_ratings]
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
62
|
+
def generate_blacklist(user_idx,ds)
|
63
|
+
blacklist = []
|
64
|
+
ratings = ds.m.col(user_idx).to_a
|
65
|
+
ds.items.each_index { |idx| blacklist << idx if ratings[idx] != 0 }
|
66
|
+
blacklist
|
67
|
+
end
|
68
|
+
|
69
|
+
# We don't want to recommend things that people have already rated, purchased, or subscribed to.
|
70
|
+
# Not used at the moment
|
71
|
+
def generate_blacklists(ds)
|
72
|
+
blacklists = []
|
73
|
+
ds.users.each_with_index do |user_id, user_idx|
|
74
|
+
blacklist = []
|
75
|
+
ds.m.col(user_idx).to_a.each_with_index { |r,i| blacklist << ds.items[i] if r == 0 }
|
76
|
+
|
77
|
+
#user = Customer.find(user_id)
|
78
|
+
|
79
|
+
#user.subscription_list &&
|
80
|
+
# user.subscription_list.subscriptions.each { |sub| blacklist << [sub.subscribable_id, sub.subscribable_type] }
|
81
|
+
|
82
|
+
#user.orders.map(&:line_items).flatten.each do |li|
|
83
|
+
# blacklist << [li.product_id, li.product_type]
|
84
|
+
# blacklist << [li.product.title_id, 'Title'] if li.product.respond_to?(:title)
|
85
|
+
#end
|
86
|
+
blacklists << blacklist
|
87
|
+
end
|
88
|
+
blacklists
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tyler-collaborative_filter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Tyler McMullen
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-10-09 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: A Collaborative Filtering framework in Ruby.
|
17
|
+
email: tbmcmullen@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- README.textile
|
26
|
+
- LICENSE
|
27
|
+
- Rakefile
|
28
|
+
- lib/boosters/simple_booster.rb
|
29
|
+
- lib/collaborative_filter/config.rb
|
30
|
+
- lib/collaborative_filter/content_booster.rb
|
31
|
+
- lib/collaborative_filter/data_set.rb
|
32
|
+
- lib/collaborative_filter/output
|
33
|
+
- lib/collaborative_filter/output/mysql_adapter.rb
|
34
|
+
- lib/collaborative_filter/output/yaml_adapter.rb
|
35
|
+
- lib/collaborative_filter/output.rb
|
36
|
+
- lib/collaborative_filter.rb
|
37
|
+
- lib/correlators/simple_svd.rb
|
38
|
+
- lib/recommenders/simplest_recommender.rb
|
39
|
+
has_rdoc: false
|
40
|
+
homepage: http://github.com/tyler/collaborative_filter
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
version:
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "0"
|
57
|
+
version:
|
58
|
+
requirements: []
|
59
|
+
|
60
|
+
rubyforge_project:
|
61
|
+
rubygems_version: 1.2.0
|
62
|
+
signing_key:
|
63
|
+
specification_version: 2
|
64
|
+
summary: A Collaborative Filtering framework in Ruby.
|
65
|
+
test_files: []
|
66
|
+
|