acts_as_recommendable 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NTViNzc1YWExZjJkZTY4YTIxYzRlMGM5YmU2ZjEwMGNmNDI2ZGM4OQ==
5
+ data.tar.gz: !binary |-
6
+ MmIwNzJhMGIxYzNkN2Q1ZGU3YjJkMzIxNDc3OTdlYmI1OGE5YmVjYQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ MDc3NmE1Mzk5OWQ0ZmRhYTJiNzViOTg4ZTc1NTY2NTY3ZTE3MjRkM2I4ODFh
10
+ NDEwMWU5ZDIzMWVhNzIzZTUwYWFiODY5YzI5OWUzOTM3OTY0NGY5OWMwYjNl
11
+ MzY4M2YwMGRkZDUzMzk3OTJmZTUwZjZlYWI0ODI0NTQ2YWU4NzE=
12
+ data.tar.gz: !binary |-
13
+ ZTE2YTY2MTRiMGNjYWJjOWE2ZDY2OTc5YWRkMTQ1ZjIzMDEwMjgwMGM4NjI4
14
+ MThhZmU5ZTQ0ZWVjOTQ2NGQwYmU2NmM4MDQxNWQ1YWMyNmZjMDFjNzU0NjUy
15
+ ZGU4MDRlNjA2MzBlMTMwMjAyNWMzNDY1Y2JjZTFhMmE2ZDQ1YWE=
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Made by Many Ltd
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,123 @@
1
+ ActsAsRecommendable
2
+ ===================
3
+
4
+ ActsAsRecommendable is a plugin for Rails that simplifies collaborative filtering
5
+
6
+ The plugin provides a mechanism for finding loose associations between users and items which we can tell you
7
+ * Given a user, return other similar users based on what items they have all bought/bookmarked/rated/etc
8
+ * Given a user, return recommended items based on the items bought/bookmarked/rated/etc by that user and the items bought/bookmarked/rated/etc by other users.
9
+
10
+ The plugin calculations can be made online and offline and stored using the rails cache (such as memcache) for online retrieval. Online retrieval of recommendations uses item-based collaborative filtering using the offline items similarity matrix stored in the cache. This can give up-to-date results with a much lower processing overhead.
11
+
12
+ Much thanks to Toby Segaran and his excellent book Programming Collective Intelligence (http://oreilly.com/catalog/9780596529321/).
13
+
14
+ Features
15
+ ========
16
+
17
+ Use join rating scores
18
+ Using abitary calculated scores
19
+ Similar Items
20
+ Recommended Users
21
+ Cached dataset
22
+
23
+ Current Release
24
+ ===============
25
+
26
+ v0.1 should be considered early alpha and not ready for production applications.
27
+
28
+ Lots of performance optimisations still to be done.
29
+
30
+ Example
31
+ =======
32
+
33
+ class Book < ActiveRecord::Base
34
+ has_many :user_books
35
+ has_many :users, :through => :user_books
36
+ end
37
+
38
+ class UserBook < ActiveRecord::Base
39
+ belongs_to :book
40
+ belongs_to :user
41
+ end
42
+
43
+ class User < ActiveRecord::Base
44
+ has_many :user_books
45
+ has_many :books, :through => :user_books
46
+ acts_as_recommendable :books, :through => :user_books
47
+ end
48
+
49
+ user = User.find(:first)
50
+ user.similar_users #=> [...]
51
+ user.recommended_books #=> [...]
52
+
53
+ book = Book.find(:first)
54
+ book.similar_books #=> [...]
55
+
56
+ Example 2
57
+ =========
58
+
59
+ class Movie < ActiveRecord::Base
60
+ has_many :user_movies
61
+ has_many :users, :through => :user_movies
62
+ end
63
+
64
+ class UserMovie < ActiveRecord::Base
65
+ belongs_to :movie
66
+ belongs_to :user
67
+ end
68
+
69
+ class User < ActiveRecord::Base
70
+ has_many :user_movies
71
+ has_many :movies, :through => :user_movies
72
+ acts_as_recommendable :movies, :through => :user_movies, :score => :score
73
+ # 'score' is an attribute on the users_movies table
74
+ end
75
+
76
+ user = User.find(:first)
77
+ user.similar_users #=> [...]
78
+ user.recommended_movies #=> [...]
79
+
80
+ Example 3
81
+ =========
82
+
83
+ class Book < ActiveRecord::Base
84
+ has_many :user_books
85
+ has_many :users, :through => :user_books, :use_dataset => true
86
+ # Uses cached dataset
87
+ end
88
+
89
+ class UserBook < ActiveRecord::Base
90
+ belongs_to :book
91
+ belongs_to :user
92
+ end
93
+
94
+ class User < ActiveRecord::Base
95
+ has_many :user_books
96
+ has_many :books, :through => :user_books
97
+ acts_as_recommendable :books, :through => :user_books
98
+ end
99
+
100
+ user = User.find(:first)
101
+ user.recommended_books #=> [...]
102
+
103
+ # The example above uses a cached dataset.
104
+ # You need to generate a cached dataset every so often (depending on how much your content changes)
105
+ # You can do that by calling the rake task recommendations:build, you should run this with a cron job every so often.
106
+
107
+
108
+ # If you only want to use the dataset in production put this in production.rb:
109
+ User.aar_options[:use_dataset] = true
110
+
111
+ # Note:
112
+ # user.similar_users doesn't use the dataset
113
+ #
114
+ # The advantage of using a dataset is that you don't need to load all the users & items into
115
+ # memory (which you do normally). The disadvantage is that you won't get as accurate results.
116
+ #
117
+
118
+ Contact
119
+ =======
120
+ alex@madebymany.co.uk
121
+
122
+
123
+ Copyright (c) 2008 Made by Many Ltd, released under the MIT license
@@ -0,0 +1,17 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ require 'bundler/gem_tasks'
6
+
7
+ Bundler::GemHelper.install_tasks
8
+
9
+ desc 'Default: run unit tests.'
10
+ task :default => :test
11
+
12
+ desc 'Test the acts_as_recommended plugin.'
13
+ Rake::TestTask.new(:test) do |t|
14
+ t.libs << 'lib'
15
+ t.pattern = 'test/**/*_test.rb'
16
+ t.verbose = true
17
+ end
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "acts_as_recommendable"
6
+ s.version = '0.0.2'
7
+ s.authors = ["macman"]
8
+ s.email = %q{info@eribium.org}
9
+ s.homepage = %q{https://github.com/maccman/acts_as_recommendable}
10
+ s.summary = %q{Recommendation engine for Rails}
11
+ s.description = %q{A recommendation engine for Rails.}
12
+ s.license = "MIT"
13
+
14
+ s.required_ruby_version = '~> 1.9.3'
15
+
16
+ s.add_dependency 'ruby-progressbar', '~> 0'
17
+ s.add_dependency 'RubyInline', '~> 3.12', '>= 3.12.3'
18
+ s.add_runtime_dependency 'rails', '~> 3.2', '>= 3.2.0'
19
+ s.add_development_dependency 'rake', '~> 0'
20
+ s.add_development_dependency 'sdoc', '~> 0'
21
+ s.add_development_dependency 'minitest', '~> 0'
22
+ s.add_development_dependency 'sqlite3', '~> 0'
23
+ s.add_development_dependency 'ruby-debug-completion', '~> 0'
24
+
25
+ s.files = `git ls-files`.split("\n")
26
+ s.test_files = `git ls-files -- {test,features}/*`.split("\n")
27
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
28
+ s.require_paths = ["lib"]
29
+ end
data/init.rb ADDED
@@ -0,0 +1,14 @@
1
+ # Include hook code here
2
+ require File.dirname(__FILE__) + '/lib/acts_as_recommendable'
3
+ require File.dirname(__FILE__) + '/lib/cache_fix'
4
+
5
+ # Fix RubyInline's permission problem,
6
+ # RubyInline doesn't like directories with
7
+ # group write permissions (like /tmp).
8
+
9
+
10
+ ENV['INLINEDIR'] = File.join(Rails.respond_to?(:root) && !Rails.root.nil? ? Rails.root : RAILS_ROOT, 'tmp', 'rubyinline')
11
+ begin
12
+ require 'inline'
13
+ require File.dirname(__FILE__) + '/lib/optimizations'
14
+ rescue LoadError; end
@@ -0,0 +1,365 @@
1
+ require File.dirname(__FILE__) + '/cache_fix'
2
+ require File.dirname(__FILE__) + '/optimizations'
3
+
4
+ # ActsAsRecommended
5
+ module MadeByMany
6
+ module ActsAsRecommendable
7
+ def self.included(base)
8
+ base.extend(ActsMethods)
9
+ end
10
+
11
+ module ActsMethods
12
+ # Send an array to ActiveRecord without fear that some elements don't exist.
13
+ def find_some_without_failing(ids)
14
+ return self.where("1 = 0") if !ids or ids.empty?
15
+
16
+ ids_list = ids.map { |id| quote_value(id,columns_hash[primary_key]) }
17
+ where(table_name.to_sym => {primary_key.to_sym => ids_list})
18
+ end
19
+
20
+ def acts_as_recommendable(on, options = {})
21
+ defaults = {
22
+ :algorithm => :sim_pearson,
23
+ :use_dataset => false,
24
+ :split_dataset => true,
25
+ :limit => 10,
26
+ :min_score => 0.0
27
+ }
28
+
29
+ options = defaults.merge(options)
30
+
31
+ # reflect on the specified association to derive the extra details we need
32
+ options[:on] = on
33
+ assoc = self.reflections[on.to_sym]
34
+ through_assoc = assoc.through_reflection
35
+ options[:through] = through_assoc.name
36
+ raise "No association specified to recommend." if assoc.nil?
37
+ raise "The #{on} association does not have a :through association" unless through_assoc
38
+
39
+ on_class_name = assoc.class_name
40
+ options[:on_singular] ||= on_class_name.underscore
41
+ options[:on_class] ||= assoc.klass
42
+
43
+ options[:class] = self
44
+
45
+ options[:through_singular] ||= through_assoc.class_name.downcase
46
+ options[:through_class] ||= through_assoc.klass
47
+
48
+ class_attribute :aar_options
49
+ self.aar_options = options
50
+
51
+ options[:on_class].class_eval do
52
+ define_method "similar_#{options[:on]}" do
53
+ Logic.similar_items(self, options)
54
+ end
55
+ end
56
+
57
+ define_method "similar_#{options[:class].name.underscore.pluralize}" do
58
+ Logic.similar_users(self, options)
59
+ end
60
+
61
+ define_method "recommended_#{options[:on_class].name.underscore.pluralize}" do
62
+ if self.aar_options[:use_dataset]
63
+ Logic.dataset_recommended(self, options)
64
+ else
65
+ Logic.recommended(self, options)
66
+ end
67
+ end
68
+
69
+ define_method "aar_items_with_scores" do
70
+ @aar_items_with_scores ||= begin
71
+ self.__send__(self.aar_options[:through]).collect {|ui|
72
+ item = ui.__send__(self.aar_options[:on_singular])
73
+ next unless item
74
+ if self.aar_options[:score]
75
+ score = ui.__send__(self.aar_options[:score]).to_f
76
+ score = 1.0 if !score or score <= 0
77
+ else
78
+ score = 1.0
79
+ end
80
+ def item.aar_score; @aar_score; end
81
+ def item.aar_score=(d); @aar_score = d; end
82
+ item.aar_score = score
83
+ item
84
+ }.compact.inject({}) {|h, item| h[item.id] = item; h }
85
+ end
86
+ end
87
+
88
+ end
89
+ end
90
+
91
+ module Logic
92
+
93
+ def self.matrix(options)
94
+ items = options[:on_class].pluck(:id)
95
+ prefs = {}
96
+ users = options[:class].includes(options[:on])
97
+ users.each do |user|
98
+ prefs[user.id] ||= {}
99
+ items.each do |item_id|
100
+ if user.aar_items_with_scores[item_id]
101
+ score = user.aar_items_with_scores[item_id].aar_score
102
+ prefs[user.id][item_id] = score
103
+ end
104
+ end
105
+ end
106
+ [items, prefs]
107
+ end
108
+
109
+ def self.inverted_matrix(options)
110
+ items = options[:on_class].pluck(:id)
111
+ prefs = {}
112
+ users = options[:class].includes(options[:on])
113
+ items.each do |item_id|
114
+ prefs[item_id] ||= {}
115
+ users.each do |user|
116
+ if user.aar_items_with_scores[item_id]
117
+ score = user.aar_items_with_scores[item_id].aar_score
118
+ prefs[item_id][user.id] = score
119
+ end
120
+ end
121
+ end
122
+ [users.collect(&:id), prefs]
123
+ end
124
+
125
+ # Euclidean distance
126
+ def self.sim_distance(prefs, items, person1, person2)
127
+ return 0 if items.length == 0
128
+
129
+ squares = []
130
+
131
+ items.each do |item|
132
+ squares << ((prefs[person1][item] || 0.0) - (prefs[person2][item] || 0.0)) ** 2
133
+ end
134
+
135
+ sum_of_squares = squares.inject { |sum,value| sum += value }
136
+ return 1/(1 + sum_of_squares)
137
+ end
138
+
139
+ # Pearson score
140
+ def self.sim_pearson(prefs, items, person1, person2)
141
+ n = items.length
142
+ return 0 if n == 0
143
+
144
+ sum1 = sum2 = sum1Sq = sum2Sq = pSum = 0.0
145
+
146
+ items.each do |item|
147
+ prefs1_item = prefs[person1][item] || 0.0
148
+ prefs2_item = prefs[person2][item] || 0.0
149
+ sum1 += prefs1_item
150
+ sum2 += prefs2_item
151
+ sum1Sq += prefs1_item ** 2
152
+ sum2Sq += prefs2_item ** 2
153
+ pSum += prefs2_item * prefs1_item
154
+ end
155
+
156
+ num = pSum - ( ( sum1 * sum2 ) / n )
157
+ den = Math.sqrt( ( sum1Sq - ( sum1 ** 2 ) / n ) * ( sum2Sq - ( sum2 ** 2 ) / n ) )
158
+
159
+ return 0 if den == 0
160
+
161
+ num / den
162
+ end
163
+
164
+ def self.similar_users(user, options)
165
+ rankings = []
166
+ items, prefs = self.matrix(options)
167
+ prefs.each do |u, _|
168
+ next if u == user.id
169
+ rankings << [self.__send__(options[:algorithm], prefs, items, user.id, u), u]
170
+ end
171
+
172
+ rankings = rankings.select {|score, _| score > options[:min_score] }
173
+ rankings = rankings.sort_by {|score, _| score }.reverse
174
+ rankings = rankings[0..(options[:limit] - 1)]
175
+
176
+ # Return the sorted list
177
+ ranking_ids = rankings.collect {|_, u| u }
178
+ ar_users = options[:class].find_some_without_failing(ranking_ids)
179
+ ar_users = ar_users.inject({}){ |h, user| h[user.id] = user; h }
180
+
181
+ rankings.collect {|score, user_id|
182
+ user = ar_users[user_id]
183
+ def user.similar_score; return @similar_score; end
184
+ def user.similar_score=(d); @similar_score = d; end
185
+ user.similar_score = score
186
+ user
187
+ }
188
+ end
189
+
190
+ def self.similar_items(item, options)
191
+ if options[:use_dataset]
192
+ if options[:split_dataset]
193
+ rankings = Rails.cache.read("aar_#{options[:on]}_#{item.id}")
194
+ else
195
+ cached_dataset = Rails.cache.read("aar_#{options[:on]}_dataset")
196
+ logger.warn 'ActsRecommendable has an empty dataset - rebuild it' unless cached_dataset
197
+ rankings = cached_dataset && cached_dataset[item.id]
198
+ end
199
+ else
200
+ users, prefs = self.inverted_matrix(options)
201
+ rankings = []
202
+ prefs.each do |i, _|
203
+ next if i == item.id
204
+ rankings << [self.__send__(options[:algorithm], prefs, users, item.id, i), i]
205
+ end
206
+ end
207
+ return [] unless rankings
208
+
209
+ rankings = rankings.select {|score, _| score > options[:min_score] }
210
+ rankings = rankings.sort_by {|score, _| score }.reverse
211
+ rankings = rankings[0..(options[:limit] - 1)]
212
+
213
+ # Return the sorted list
214
+ ranking_ids = rankings.collect {|_, u| u }
215
+ ar_items = options[:on_class].find_some_without_failing(ranking_ids)
216
+ ar_items = ar_items.inject({}){ |h, item| h[item.id] = item; h }
217
+
218
+ rankings.collect {|score, item_id|
219
+ item = ar_items[item_id]
220
+ def item.similar_score; return @similar_score; end
221
+ def item.similar_score=(d); @similar_score = d; end
222
+ item.similar_score = score
223
+ item
224
+ }
225
+ end
226
+
227
+ def self.recommended(user, options)
228
+ totals = {}
229
+ sim_sums = {}
230
+ items, prefs = self.matrix(options)
231
+ user = user.id
232
+ user_ratings = prefs[user]
233
+
234
+ prefs.keys.each do |other|
235
+ # don't compare me to myself
236
+ next if other == user
237
+
238
+ sim = self.__send__(options[:algorithm], prefs, items, user, other)
239
+
240
+ # ignore scores of zero or lower
241
+ next if sim <= 0
242
+
243
+ prefs[other].keys.each do |item|
244
+ if !prefs[user].include? item or prefs[user][item] == 0
245
+ # similarity * score
246
+ totals.default = 0
247
+ totals[item] += prefs[other][item] * sim
248
+ # sum of similarities
249
+ sim_sums.default = 0
250
+ sim_sums[item] += sim
251
+ end
252
+ end
253
+ end
254
+
255
+ # Create a normalized list
256
+ rankings = []
257
+ items = []
258
+ totals.each do |item,total|
259
+ rankings << [total/sim_sums[item], item]
260
+ end
261
+
262
+ # Return the sorted list
263
+ rankings = rankings.select {|score, _| score > options[:min_score] }
264
+ rankings = rankings.sort_by {|score, _| score }.reverse
265
+ rankings = rankings[0..(options[:limit] - 1)]
266
+
267
+ # So we can do everything in one SQL query
268
+ ranking_ids = rankings.collect {|_, i| i }
269
+ ar_items = options[:on_class].find_some_without_failing(ranking_ids)
270
+ ar_items = ar_items.inject({}){ |h, item| h[item.id] = item; h }
271
+
272
+ rankings.collect {|score, item_id|
273
+ item = ar_items[item_id]
274
+ def item.recommendation_score; return @recommendation_score; end
275
+ def item.recommendation_score=(d); @recommendation_score = d; end
276
+ item.recommendation_score = score
277
+ item
278
+ }
279
+ end
280
+
281
+ def self.generate_dataset(options, matrix = nil)
282
+ users, prefs = matrix || self.inverted_matrix(options)
283
+ for item in prefs.keys
284
+ scores = []
285
+ for other in prefs.keys
286
+ next if other == item
287
+ scores << [self.__send__(options[:algorithm], prefs, users, item, other), other]
288
+ end
289
+ scores = scores.sort_by {|score, _| score }.reverse
290
+ yield(item, scores) if block_given?
291
+ end
292
+ end
293
+
294
+ def self.dataset_recommended(user, options)
295
+ scores = {}
296
+ total_sim = {}
297
+ items = user.aar_items_with_scores
298
+ item_ids = items.values.collect(&:id)
299
+ unless options[:split_dataset]
300
+ cached_dataset = Rails.cache.read("aar_#{options[:on]}_dataset")
301
+ logger.warn 'ActsRecommendable has an empty dataset - rebuild it' unless cached_dataset
302
+ end
303
+
304
+ item_ids.each do |item_id|
305
+ if options[:split_dataset]
306
+ ratings = Rails.cache.read("aar_#{options[:on]}_#{item_id}")
307
+ else
308
+ ratings = cached_dataset && cached_dataset[item_id]
309
+ end
310
+ next unless ratings
311
+
312
+ ratings.each do |similarity, item2_id|
313
+ # Ignore if this user has already rated this item
314
+ next if item_ids.include?(item2_id)
315
+
316
+ scores[item2_id] ||= 0
317
+ total_sim[item2_id] ||= 0
318
+ if options[:score]
319
+ # Weighted sum of rating times similarity
320
+ scores[item2_id] += similarity * items[item_id].aar_score
321
+
322
+ # Sum of all the similarities
323
+ total_sim[item2_id] += similarity
324
+ else
325
+ scores[item2_id] += similarity
326
+ total_sim[item2_id] += 1.0
327
+ end
328
+ end
329
+ end
330
+
331
+ # Divide each total score by total weighting to get an average
332
+ rankings = []
333
+ scores.each do |item, score|
334
+ next unless score > 0.0
335
+ rankings << [score/total_sim[item], item]
336
+ end
337
+
338
+ rankings = rankings.select {|score, _| score > options[:min_score] }
339
+ rankings = rankings.sort_by {|score, _| score }.reverse
340
+ rankings = rankings[0..(options[:limit] - 1)]
341
+
342
+ # So we can do everything in one SQL query
343
+ ranking_ids = rankings.collect {|_, i| i }
344
+ ar_items = options[:on_class].find_some_without_failing(ranking_ids)
345
+ ar_items = ar_items.inject({}){ |h, item| h[item.id] = item; h }
346
+
347
+ rankings.collect {|score, item_id|
348
+ item = ar_items[item_id]
349
+ def item.recommendation_score; @recommendation_score; end
350
+ def item.recommendation_score=(d); @recommendation_score = d; end
351
+ item.recommendation_score = score
352
+ item
353
+ }
354
+ end
355
+
356
+ def self.logger
357
+ RAILS_DEFAULT_LOGGER
358
+ end
359
+
360
+ end
361
+
362
+ end
363
+ end
364
+
365
+ require 'railtie'