tyler-collaborative_filter 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,13 @@
1
+ class CollaborativeFilter
2
+ class Output
3
+ def self.store(options, recommendations)
4
+ @@adapters[options[:type]].new(options[:options], recommendations)
5
+ end
6
+
7
+ def self.register(name, class_name)
8
+ @@adapters ||= {}
9
+ @@adapters[name] = class_name
10
+ end
11
+ end
12
+ end
13
+
@@ -0,0 +1,42 @@
1
+ class CollaborativeFilter
2
+ class Output
3
+ class SqlAdapter
4
+ CollaborativeFilter::Output.register :sql, self
5
+
6
+ def initialize(options, recommendations)
7
+ setup_mapping options[:mapping] || {}
8
+ recommendations.each do |user_id, recs|
9
+ next if recs.empty?
10
+ ActiveRecord::Base.connection.execute \
11
+ "INSERT INTO #{options[:table_name]} (#{@mapping_values.join(',')}) VALUES #{records_to_sql(user_id, recs)}"
12
+ end
13
+ end
14
+
15
+ def setup_mapping(config_mapping)
16
+ @mapping = { :user_id => :user_id,
17
+ :user_type => nil,
18
+ :item_id => :item_id,
19
+ :item_type => :item_type,
20
+ :score => :score }
21
+ @mapping.merge!(config_mapping)
22
+ @mapping.each { |k,v| @mapping.delete(k) unless v }
23
+ ma = @mapping.to_a
24
+ @mapping_keys = ma.map(&:first)
25
+ @mapping_values = ma.map(&:last)
26
+ end
27
+
28
+ def records_to_sql(user_id, recs)
29
+ recs.map { |item_id, score|
30
+ data = {}
31
+ data[:user_id] = user_id
32
+ data[:user_id], data[:user_type] = data[:user_id] if data[:user_id].is_a?(Array)
33
+ data[:item_id] = item_id
34
+ data[:item_id], data[:item_type] = data[:item_id] if data[:item_id].is_a?(Array)
35
+ data[:score] = score
36
+
37
+ '(' + @mapping_keys.map { |key| "'#{data[key]}'" }.join(',') + ')'
38
+ }.join(',')
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,13 @@
1
+ class CollaborativeFilter
2
+ class Output
3
+ class YamlAdapter
4
+ CollaborativeFilter::Output.register :yaml, self
5
+
6
+ def initialize(options, recommendations)
7
+ require 'yaml'
8
+ filename = options[:filename] || 'recommendations.yml'
9
+ File.open(filename,'w') { |f| f << recommendations.to_yaml }
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,55 @@
1
+ # A correlator helps us find users who are similar to each other. There are
2
+ # a crapton of ways to accomplish this. In this case we're using a
3
+ # singular-value-decomposition algorithm. In essence, we decompose the matrix
4
+ # of user-item nodes (where nodes are rating, purchases, etc) into two matrices
5
+ # U and V, and their singular values S. We take the first two columns of
6
+ # V-transpose and plot them in 2-dimensional space as if the corresponding
7
+ # entries in the columns were X and Y coordinates. This will clump the users
8
+ # into groups. A simple, and moderately accurate, way to find those groups
9
+ # is to find the cosine similarities of the different users.
10
+ #
11
+ # So the correlator takes a sparse matrix, a users array, an items array, and
12
+ # options. It outputs a hash that looks like...
13
+ #
14
+ # { user_id => [[cos_sim, sim_user_1], [cos_sim, sim_user_2], ...] }
15
+ class CollaborativeFilter
16
+ class SimpleSvd
17
+ def run(matrix,users,items,options)
18
+ qty = 0
19
+
20
+ u,v,s = matrix.svd
21
+
22
+ # we use the transpose of the V matrix
23
+ xs,ys = [v.transpose.col(0).to_a, v.transpose.col(1).to_a]
24
+
25
+ # precompute some of the terms from the cos. sim function. thanks pete!
26
+ precomputes = []
27
+ xs.each_index { |i| precomputes << Math.sqrt((xs[i] * xs[i]) + (ys[i] * ys[i])) }
28
+
29
+ similar_users = {}
30
+ # compute the similarities between each user and each other user currently this is O(n^2)...
31
+ # there is one major improvement that could be made to it... which is to cache the results
32
+ xs.each_index do |user_idx|
33
+ x1, y1 = xs[user_idx], ys[user_idx]
34
+ sims = []
35
+ xs.each_index do |target_idx|
36
+ next if user_idx == target_idx
37
+ x2, y2 = xs[target_idx], ys[target_idx]
38
+
39
+ # compute the cosine similarity between user and target
40
+ sim = ((x1 * x2) + (y1 * y2)) / (precomputes[user_idx] * precomputes[target_idx])
41
+
42
+ sims << [target_idx, sim] if sim >= options[:cosine_similarity]
43
+ end
44
+
45
+ x = sims.sort_by(&:last).reverse[0, (options[:max_similar_users] || sims.size)]
46
+ qty += x.size
47
+ similar_users[user_idx] = x
48
+ end
49
+
50
+ CollaborativeFilter.log " Average sims per user: #{qty.to_f / similar_users.size}"
51
+ similar_users
52
+ end
53
+ end
54
+ end
55
+
@@ -0,0 +1,92 @@
1
+ class CollaborativeFilter
2
+ # Given any number of similarity hashes of a particular form recommend Items
3
+ # for Users. Weights according to cosine similarity of the recommendation and
4
+ # the cosine similarity threshold.
5
+ #
6
+ # Example:
7
+ # Threshold is set to 0.9. This particular recommendation is 0.95.
8
+ # 1.0 - 0.9 = 0.1
9
+ # 0.95 - 0.9 = 0.5
10
+ # 0.5 / 0.1 = 0.5 = 50%
11
+ # So the 0.95 rec would be worth 50%.
12
+ #
13
+ # The purpose of this of course, is for the case when you are similar to multiple
14
+ # users who have rated a certain item differently. If you are highly correlated to
15
+ # Bob, and slightly correlated to Joe... and Bob rated X as 5 stars, and Joe rated
16
+ # X as 2 stars... Bob's rating should carry more weight in determining your
17
+ # recommendation.
18
+ #
19
+ # Sim hashes look like: { (user_identifier) => [[(closeness),(user_identifier)], ...] }
20
+ #
21
+ # Input:
22
+ # Array of DataSet objects, with #similarities populated
23
+ #
24
+ # Output:
25
+ # Array in the form:
26
+ # [ [ (user id), [ [ (item id), (score) ], ... ] ], ... ]
27
+ class SimplestRecommender
28
+ def run(datasets, options)
29
+ options[:threshold] ||= 4.2
30
+
31
+ datasets.inject({}) { |ratings,(name,ds)|
32
+ mult = 1.0 - ds.options[:cosine_similarity]
33
+ ds.similarities.each do |user_idx,sim_list|
34
+ ratings[ds.users[user_idx]] ||= {}
35
+ blacklist = generate_blacklist(user_idx,ds)
36
+ sim_list.each do |sim_idx,similarity|
37
+ # grab the list of the similar users' item ratings
38
+ ds.m.col(sim_idx).to_a.each_with_index do |score,item_idx|
39
+ next if score == 0 || blacklist.include?(item_idx)
40
+
41
+ # need to use the item_id instead of idx so the content booster can find
42
+ # its own index of it.
43
+ item_id = ds.items[item_idx]
44
+
45
+ ratings[ds.users[user_idx]][item_id] ||= []
46
+ ratings[ds.users[user_idx]][item_id] << [score, (similarity - ds.options[:cosine_similarity]) * mult]
47
+ end
48
+ end
49
+ end
50
+ ratings
51
+ }.map { |c,rlists|
52
+ averaged_ratings = rlists.map { |i,rs|
53
+ score_sum, sim_sum = rs.inject([0,0]) { |sums,(score,similarity)| [sums.first + score, sums.last + similarity] }
54
+ [i, score_sum / sim_sum]
55
+ }.select { |k,v|
56
+ v >= options[:threshold]
57
+ }.sort { |(k1,v1),(k2,v2)| v2 <=> v1 }[0,options[:max_per_user]]
58
+ [c, averaged_ratings]
59
+ }
60
+ end
61
+
62
+ def generate_blacklist(user_idx,ds)
63
+ blacklist = []
64
+ ratings = ds.m.col(user_idx).to_a
65
+ ds.items.each_index { |idx| blacklist << idx if ratings[idx] != 0 }
66
+ blacklist
67
+ end
68
+
69
+ # We don't want to recommend things that people have already rated, purchased, or subscribed to.
70
+ # Not used at the moment
71
+ def generate_blacklists(ds)
72
+ blacklists = []
73
+ ds.users.each_with_index do |user_id, user_idx|
74
+ blacklist = []
75
+ ds.m.col(user_idx).to_a.each_with_index { |r,i| blacklist << ds.items[i] if r == 0 }
76
+
77
+ #user = Customer.find(user_id)
78
+
79
+ #user.subscription_list &&
80
+ # user.subscription_list.subscriptions.each { |sub| blacklist << [sub.subscribable_id, sub.subscribable_type] }
81
+
82
+ #user.orders.map(&:line_items).flatten.each do |li|
83
+ # blacklist << [li.product_id, li.product_type]
84
+ # blacklist << [li.product.title_id, 'Title'] if li.product.respond_to?(:title)
85
+ #end
86
+ blacklists << blacklist
87
+ end
88
+ blacklists
89
+ end
90
+ end
91
+ end
92
+
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tyler-collaborative_filter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Tyler McMullen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-10-09 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A Collaborative Filtering framework in Ruby.
17
+ email: tbmcmullen@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - README.textile
26
+ - LICENSE
27
+ - Rakefile
28
+ - lib/boosters/simple_booster.rb
29
+ - lib/collaborative_filter/config.rb
30
+ - lib/collaborative_filter/content_booster.rb
31
+ - lib/collaborative_filter/data_set.rb
32
+ - lib/collaborative_filter/output
33
+ - lib/collaborative_filter/output/mysql_adapter.rb
34
+ - lib/collaborative_filter/output/yaml_adapter.rb
35
+ - lib/collaborative_filter/output.rb
36
+ - lib/collaborative_filter.rb
37
+ - lib/correlators/simple_svd.rb
38
+ - lib/recommenders/simplest_recommender.rb
39
+ has_rdoc: false
40
+ homepage: http://github.com/tyler/collaborative_filter
41
+ post_install_message:
42
+ rdoc_options: []
43
+
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: "0"
51
+ version:
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ version:
58
+ requirements: []
59
+
60
+ rubyforge_project:
61
+ rubygems_version: 1.2.0
62
+ signing_key:
63
+ specification_version: 2
64
+ summary: A Collaborative Filtering framework in Ruby.
65
+ test_files: []
66
+