tyler-collaborative_filter 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ class CollaborativeFilter
2
+ class Output
3
+ def self.store(options, recommendations)
4
+ @@adapters[options[:type]].new(options[:options], recommendations)
5
+ end
6
+
7
+ def self.register(name, class_name)
8
+ @@adapters ||= {}
9
+ @@adapters[name] = class_name
10
+ end
11
+ end
12
+ end
13
+
@@ -0,0 +1,42 @@
1
+ class CollaborativeFilter
2
+ class Output
3
+ class SqlAdapter
4
+ CollaborativeFilter::Output.register :sql, self
5
+
6
+ def initialize(options, recommendations)
7
+ setup_mapping options[:mapping] || {}
8
+ recommendations.each do |user_id, recs|
9
+ next if recs.empty?
10
+ ActiveRecord::Base.connection.execute \
11
+ "INSERT INTO #{options[:table_name]} (#{@mapping_values.join(',')}) VALUES #{records_to_sql(user_id, recs)}"
12
+ end
13
+ end
14
+
15
+ def setup_mapping(config_mapping)
16
+ @mapping = { :user_id => :user_id,
17
+ :user_type => nil,
18
+ :item_id => :item_id,
19
+ :item_type => :item_type,
20
+ :score => :score }
21
+ @mapping.merge!(config_mapping)
22
+ @mapping.each { |k,v| @mapping.delete(k) unless v }
23
+ ma = @mapping.to_a
24
+ @mapping_keys = ma.map(&:first)
25
+ @mapping_values = ma.map(&:last)
26
+ end
27
+
28
+ def records_to_sql(user_id, recs)
29
+ recs.map { |item_id, score|
30
+ data = {}
31
+ data[:user_id] = user_id
32
+ data[:user_id], data[:user_type] = data[:user_id] if data[:user_id].is_a?(Array)
33
+ data[:item_id] = item_id
34
+ data[:item_id], data[:item_type] = data[:item_id] if data[:item_id].is_a?(Array)
35
+ data[:score] = score
36
+
37
+ '(' + @mapping_keys.map { |key| "'#{data[key]}'" }.join(',') + ')'
38
+ }.join(',')
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,13 @@
1
+ class CollaborativeFilter
2
+ class Output
3
+ class YamlAdapter
4
+ CollaborativeFilter::Output.register :yaml, self
5
+
6
+ def initialize(options, recommendations)
7
+ require 'yaml'
8
+ filename = options[:filename] || 'recommendations.yml'
9
+ File.open(filename,'w') { |f| f << recommendations.to_yaml }
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,55 @@
1
+ # A correlator helps us find users who are similar to each other. There are
2
+ # a crapton of ways to accomplish this. In this case we're using a
3
+ # singular-value-decomposition algorithm. In essence, we decompose the matrix
4
+ # of user-item nodes (where nodes are rating, purchases, etc) into two matrices
5
+ # U and V, and their singular values S. We take the first two columns of
6
+ # V-transpose and plot them in 2-dimensional space as if the corresponding
7
+ # entries in the columns were X and Y coordinates. This will clump the users
8
+ # into groups. A simple, and moderately accurate, way to find those groups
9
+ # is to find the cosine similarities of the different users.
10
+ #
11
+ # So the correlator takes a sparse matrix, a users array, an items array, and
12
+ # options. It outputs a hash that looks like...
13
+ #
14
+ # { user_id => [[cos_sim, sim_user_1], [cos_sim, sim_user_2], ...] }
15
+ class CollaborativeFilter
16
+ class SimpleSvd
17
+ def run(matrix,users,items,options)
18
+ qty = 0
19
+
20
+ u,v,s = matrix.svd
21
+
22
+ # we use the transpose of the V matrix
23
+ xs,ys = [v.transpose.col(0).to_a, v.transpose.col(1).to_a]
24
+
25
+ # precompute some of the terms from the cos. sim function. thanks pete!
26
+ precomputes = []
27
+ xs.each_index { |i| precomputes << Math.sqrt((xs[i] * xs[i]) + (ys[i] * ys[i])) }
28
+
29
+ similar_users = {}
30
+ # compute the similarities between each user and each other user currently this is O(n^2)...
31
+ # there is one major improvement that could be made to it... which is to cache the results
32
+ xs.each_index do |user_idx|
33
+ x1, y1 = xs[user_idx], ys[user_idx]
34
+ sims = []
35
+ xs.each_index do |target_idx|
36
+ next if user_idx == target_idx
37
+ x2, y2 = xs[target_idx], ys[target_idx]
38
+
39
+ # compute the cosine similarity between user and target
40
+ sim = ((x1 * x2) + (y1 * y2)) / (precomputes[user_idx] * precomputes[target_idx])
41
+
42
+ sims << [target_idx, sim] if sim >= options[:cosine_similarity]
43
+ end
44
+
45
+ x = sims.sort_by(&:last).reverse[0, (options[:max_similar_users] || sims.size)]
46
+ qty += x.size
47
+ similar_users[user_idx] = x
48
+ end
49
+
50
+ CollaborativeFilter.log " Average sims per user: #{qty.to_f / similar_users.size}"
51
+ similar_users
52
+ end
53
+ end
54
+ end
55
+
@@ -0,0 +1,92 @@
1
+ class CollaborativeFilter
2
+ # Given any number of similarity hashes of a particular form recommend Items
3
+ # for Users. Weights according to cosine similarity of the recommendation and
4
+ # the cosine similarity threshold.
5
+ #
6
+ # Example:
7
+ # Threshold is set to 0.9. This particular recommendation is 0.95.
8
+ # 1.0 - 0.9 = 0.1
9
+ # 0.95 - 0.9 = 0.5
10
+ # 0.5 / 0.1 = 0.5 = 50%
11
+ # So the 0.95 rec would be worth 50%.
12
+ #
13
+ # The purpose of this of course, is for the case when you are similar to multiple
14
+ # users who have rated a certain item differently. If you are highly correlated to
15
+ # Bob, and slightly correlated to Joe... and Bob rated X as 5 stars, and Joe rated
16
+ # X as 2 stars... Bob's rating should carry more weight in determining your
17
+ # recommendation.
18
+ #
19
+ # Sim hashes look like: { (user_identifier) => [[(closeness),(user_identifier)], ...] }
20
+ #
21
+ # Input:
22
+ # Array of DataSet objects, with #similarities populated
23
+ #
24
+ # Output:
25
+ # Array in the form:
26
+ # [ [ (user id), [ [ (item id), (score) ], ... ] ], ... ]
27
+ class SimplestRecommender
28
+ def run(datasets, options)
29
+ options[:threshold] ||= 4.2
30
+
31
+ datasets.inject({}) { |ratings,(name,ds)|
32
+ mult = 1.0 - ds.options[:cosine_similarity]
33
+ ds.similarities.each do |user_idx,sim_list|
34
+ ratings[ds.users[user_idx]] ||= {}
35
+ blacklist = generate_blacklist(user_idx,ds)
36
+ sim_list.each do |sim_idx,similarity|
37
+ # grab the list of the similar users' item ratings
38
+ ds.m.col(sim_idx).to_a.each_with_index do |score,item_idx|
39
+ next if score == 0 || blacklist.include?(item_idx)
40
+
41
+ # need to use the item_id instead of idx so the content booster can find
42
+ # its own index of it.
43
+ item_id = ds.items[item_idx]
44
+
45
+ ratings[ds.users[user_idx]][item_id] ||= []
46
+ ratings[ds.users[user_idx]][item_id] << [score, (similarity - ds.options[:cosine_similarity]) * mult]
47
+ end
48
+ end
49
+ end
50
+ ratings
51
+ }.map { |c,rlists|
52
+ averaged_ratings = rlists.map { |i,rs|
53
+ score_sum, sim_sum = rs.inject([0,0]) { |sums,(score,similarity)| [sums.first + score, sums.last + similarity] }
54
+ [i, score_sum / sim_sum]
55
+ }.select { |k,v|
56
+ v >= options[:threshold]
57
+ }.sort { |(k1,v1),(k2,v2)| v2 <=> v1 }[0,options[:max_per_user]]
58
+ [c, averaged_ratings]
59
+ }
60
+ end
61
+
62
+ def generate_blacklist(user_idx,ds)
63
+ blacklist = []
64
+ ratings = ds.m.col(user_idx).to_a
65
+ ds.items.each_index { |idx| blacklist << idx if ratings[idx] != 0 }
66
+ blacklist
67
+ end
68
+
69
+ # We don't want to recommend things that people have already rated, purchased, or subscribed to.
70
+ # Not used at the moment
71
+ def generate_blacklists(ds)
72
+ blacklists = []
73
+ ds.users.each_with_index do |user_id, user_idx|
74
+ blacklist = []
75
+ ds.m.col(user_idx).to_a.each_with_index { |r,i| blacklist << ds.items[i] if r == 0 }
76
+
77
+ #user = Customer.find(user_id)
78
+
79
+ #user.subscription_list &&
80
+ # user.subscription_list.subscriptions.each { |sub| blacklist << [sub.subscribable_id, sub.subscribable_type] }
81
+
82
+ #user.orders.map(&:line_items).flatten.each do |li|
83
+ # blacklist << [li.product_id, li.product_type]
84
+ # blacklist << [li.product.title_id, 'Title'] if li.product.respond_to?(:title)
85
+ #end
86
+ blacklists << blacklist
87
+ end
88
+ blacklists
89
+ end
90
+ end
91
+ end
92
+
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tyler-collaborative_filter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Tyler McMullen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-10-09 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A Collaborative Filtering framework in Ruby.
17
+ email: tbmcmullen@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - README.textile
26
+ - LICENSE
27
+ - Rakefile
28
+ - lib/boosters/simple_booster.rb
29
+ - lib/collaborative_filter/config.rb
30
+ - lib/collaborative_filter/content_booster.rb
31
+ - lib/collaborative_filter/data_set.rb
32
+ - lib/collaborative_filter/output
33
+ - lib/collaborative_filter/output/mysql_adapter.rb
34
+ - lib/collaborative_filter/output/yaml_adapter.rb
35
+ - lib/collaborative_filter/output.rb
36
+ - lib/collaborative_filter.rb
37
+ - lib/correlators/simple_svd.rb
38
+ - lib/recommenders/simplest_recommender.rb
39
+ has_rdoc: false
40
+ homepage: http://github.com/tyler/collaborative_filter
41
+ post_install_message:
42
+ rdoc_options: []
43
+
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: "0"
51
+ version:
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ version:
58
+ requirements: []
59
+
60
+ rubyforge_project:
61
+ rubygems_version: 1.2.0
62
+ signing_key:
63
+ specification_version: 2
64
+ summary: A Collaborative Filtering framework in Ruby.
65
+ test_files: []
66
+