acts_as_recommendable 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NTViNzc1YWExZjJkZTY4YTIxYzRlMGM5YmU2ZjEwMGNmNDI2ZGM4OQ==
5
+ data.tar.gz: !binary |-
6
+ MmIwNzJhMGIxYzNkN2Q1ZGU3YjJkMzIxNDc3OTdlYmI1OGE5YmVjYQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ MDc3NmE1Mzk5OWQ0ZmRhYTJiNzViOTg4ZTc1NTY2NTY3ZTE3MjRkM2I4ODFh
10
+ NDEwMWU5ZDIzMWVhNzIzZTUwYWFiODY5YzI5OWUzOTM3OTY0NGY5OWMwYjNl
11
+ MzY4M2YwMGRkZDUzMzk3OTJmZTUwZjZlYWI0ODI0NTQ2YWU4NzE=
12
+ data.tar.gz: !binary |-
13
+ ZTE2YTY2MTRiMGNjYWJjOWE2ZDY2OTc5YWRkMTQ1ZjIzMDEwMjgwMGM4NjI4
14
+ MThhZmU5ZTQ0ZWVjOTQ2NGQwYmU2NmM4MDQxNWQ1YWMyNmZjMDFjNzU0NjUy
15
+ ZGU4MDRlNjA2MzBlMTMwMjAyNWMzNDY1Y2JjZTFhMmE2ZDQ1YWE=
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Made by Many Ltd
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,123 @@
1
+ ActsAsRecommendable
2
+ ===================
3
+
4
+ ActsAsRecommendable is a plugin for Rails that simplifies collaborative filtering
5
+
6
+ The plugin provides a mechanism for finding loose associations between users and items which we can tell you
7
+ * Given a user, return other similar users based on what items they have all bought/bookmarked/rated/etc
8
+ * Given a user, return recommended items based on the items bought/bookmarked/rated/etc by that user and the items bought/bookmarked/rated/etc by other users.
9
+
10
+ The plugin calculations can be made online and offline and stored using the rails cache (such as memcache) for online retrieval. Online retrieval of recommendations uses item-based collaborative filtering using the offline items similarity matrix stored in the cache. This can give up-to-date results with a much lower processing overhead.
11
+
12
+ Much thanks to Toby Segaran and his excellent book Programming Collective Intelligence (http://oreilly.com/catalog/9780596529321/).
13
+
14
+ Features
15
+ ========
16
+
17
+ Use join rating scores
18
+ Using abitary calculated scores
19
+ Similar Items
20
+ Recommended Users
21
+ Cached dataset
22
+
23
+ Current Release
24
+ ===============
25
+
26
+ v0.1 should be considered early alpha and not ready for production applications.
27
+
28
+ Lots of performance optimisations still to be done.
29
+
30
+ Example
31
+ =======
32
+
33
+ class Book < ActiveRecord::Base
34
+ has_many :user_books
35
+ has_many :users, :through => :user_books
36
+ end
37
+
38
+ class UserBook < ActiveRecord::Base
39
+ belongs_to :book
40
+ belongs_to :user
41
+ end
42
+
43
+ class User < ActiveRecord::Base
44
+ has_many :user_books
45
+ has_many :books, :through => :user_books
46
+ acts_as_recommendable :books, :through => :user_books
47
+ end
48
+
49
+ user = User.find(:first)
50
+ user.similar_users #=> [...]
51
+ user.recommended_books #=> [...]
52
+
53
+ book = Book.find(:first)
54
+ book.similar_books #=> [...]
55
+
56
+ Example 2
57
+ =========
58
+
59
+ class Movie < ActiveRecord::Base
60
+ has_many :user_movies
61
+ has_many :users, :through => :user_movies
62
+ end
63
+
64
+ class UserMovie < ActiveRecord::Base
65
+ belongs_to :movie
66
+ belongs_to :user
67
+ end
68
+
69
+ class User < ActiveRecord::Base
70
+ has_many :user_movies
71
+ has_many :movies, :through => :user_movies
72
+ acts_as_recommendable :movies, :through => :user_movies, :score => :score
73
+ # 'score' is an attribute on the users_movies table
74
+ end
75
+
76
+ user = User.find(:first)
77
+ user.similar_users #=> [...]
78
+ user.recommended_movies #=> [...]
79
+
80
+ Example 3
81
+ =========
82
+
83
+ class Book < ActiveRecord::Base
84
+ has_many :user_books
85
+ has_many :users, :through => :user_books, :use_dataset => true
86
+ # Uses cached dataset
87
+ end
88
+
89
+ class UserBook < ActiveRecord::Base
90
+ belongs_to :book
91
+ belongs_to :user
92
+ end
93
+
94
+ class User < ActiveRecord::Base
95
+ has_many :user_books
96
+ has_many :books, :through => :user_books
97
+ acts_as_recommendable :books, :through => :user_books
98
+ end
99
+
100
+ user = User.find(:first)
101
+ user.recommended_books #=> [...]
102
+
103
+ # The example above uses a cached dataset.
104
+ # You need to generate a cached dataset every so often (depending on how much your content changes)
105
+ # You can do that by calling the rake task recommendations:build, you should run this with a cron job every so often.
106
+
107
+
108
+ # If you only want to use the dataset in production put this in production.rb:
109
+ User.aar_options[:use_dataset] = true
110
+
111
+ # Note:
112
+ # user.similar_users doesn't use the dataset
113
+ #
114
+ # The advantage of using a dataset is that you don't need to load all the users & items into
115
+ # memory (which you do normally). The disadvantage is that you won't get as accurate results.
116
+ #
117
+
118
+ Contact
119
+ =======
120
+ alex@madebymany.co.uk
121
+
122
+
123
+ Copyright (c) 2008 Made by Many Ltd, released under the MIT license
@@ -0,0 +1,17 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ require 'bundler/gem_tasks'
6
+
7
+ Bundler::GemHelper.install_tasks
8
+
9
+ desc 'Default: run unit tests.'
10
+ task :default => :test
11
+
12
+ desc 'Test the acts_as_recommended plugin.'
13
+ Rake::TestTask.new(:test) do |t|
14
+ t.libs << 'lib'
15
+ t.pattern = 'test/**/*_test.rb'
16
+ t.verbose = true
17
+ end
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "acts_as_recommendable"
6
+ s.version = '0.0.2'
7
+ s.authors = ["macman"]
8
+ s.email = %q{info@eribium.org}
9
+ s.homepage = %q{https://github.com/maccman/acts_as_recommendable}
10
+ s.summary = %q{Recommendation engine for Rails}
11
+ s.description = %q{A recommendation engine for Rails.}
12
+ s.license = "MIT"
13
+
14
+ s.required_ruby_version = '~> 1.9.3'
15
+
16
+ s.add_dependency 'ruby-progressbar', '~> 0'
17
+ s.add_dependency 'RubyInline', '~> 3.12', '>= 3.12.3'
18
+ s.add_runtime_dependency 'rails', '~> 3.2', '>= 3.2.0'
19
+ s.add_development_dependency 'rake', '~> 0'
20
+ s.add_development_dependency 'sdoc', '~> 0'
21
+ s.add_development_dependency 'minitest', '~> 0'
22
+ s.add_development_dependency 'sqlite3', '~> 0'
23
+ s.add_development_dependency 'ruby-debug-completion', '~> 0'
24
+
25
+ s.files = `git ls-files`.split("\n")
26
+ s.test_files = `git ls-files -- {test,features}/*`.split("\n")
27
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
28
+ s.require_paths = ["lib"]
29
+ end
data/init.rb ADDED
@@ -0,0 +1,14 @@
1
+ # Include hook code here
2
+ require File.dirname(__FILE__) + '/lib/acts_as_recommendable'
3
+ require File.dirname(__FILE__) + '/lib/cache_fix'
4
+
5
+ # Fix RubyInline's permission problem,
6
+ # RubyInline doesn't like directories with
7
+ # group write permissions (like /tmp).
8
+
9
+
10
+ ENV['INLINEDIR'] = File.join(Rails.respond_to?(:root) && !Rails.root.nil? ? Rails.root : RAILS_ROOT, 'tmp', 'rubyinline')
11
+ begin
12
+ require 'inline'
13
+ require File.dirname(__FILE__) + '/lib/optimizations'
14
+ rescue LoadError; end
@@ -0,0 +1,365 @@
1
+ require File.dirname(__FILE__) + '/cache_fix'
2
+ require File.dirname(__FILE__) + '/optimizations'
3
+
4
+ # ActsAsRecommended
5
+ module MadeByMany
6
+ module ActsAsRecommendable
7
+ def self.included(base)
8
+ base.extend(ActsMethods)
9
+ end
10
+
11
+ module ActsMethods
12
+ # Send an array to ActiveRecord without fear that some elements don't exist.
13
+ def find_some_without_failing(ids)
14
+ return self.where("1 = 0") if !ids or ids.empty?
15
+
16
+ ids_list = ids.map { |id| quote_value(id,columns_hash[primary_key]) }
17
+ where(table_name.to_sym => {primary_key.to_sym => ids_list})
18
+ end
19
+
20
+ def acts_as_recommendable(on, options = {})
21
+ defaults = {
22
+ :algorithm => :sim_pearson,
23
+ :use_dataset => false,
24
+ :split_dataset => true,
25
+ :limit => 10,
26
+ :min_score => 0.0
27
+ }
28
+
29
+ options = defaults.merge(options)
30
+
31
+ # reflect on the specified association to derive the extra details we need
32
+ options[:on] = on
33
+ assoc = self.reflections[on.to_sym]
34
+ through_assoc = assoc.through_reflection
35
+ options[:through] = through_assoc.name
36
+ raise "No association specified to recommend." if assoc.nil?
37
+ raise "The #{on} association does not have a :through association" unless through_assoc
38
+
39
+ on_class_name = assoc.class_name
40
+ options[:on_singular] ||= on_class_name.underscore
41
+ options[:on_class] ||= assoc.klass
42
+
43
+ options[:class] = self
44
+
45
+ options[:through_singular] ||= through_assoc.class_name.downcase
46
+ options[:through_class] ||= through_assoc.klass
47
+
48
+ class_attribute :aar_options
49
+ self.aar_options = options
50
+
51
+ options[:on_class].class_eval do
52
+ define_method "similar_#{options[:on]}" do
53
+ Logic.similar_items(self, options)
54
+ end
55
+ end
56
+
57
+ define_method "similar_#{options[:class].name.underscore.pluralize}" do
58
+ Logic.similar_users(self, options)
59
+ end
60
+
61
+ define_method "recommended_#{options[:on_class].name.underscore.pluralize}" do
62
+ if self.aar_options[:use_dataset]
63
+ Logic.dataset_recommended(self, options)
64
+ else
65
+ Logic.recommended(self, options)
66
+ end
67
+ end
68
+
69
+ define_method "aar_items_with_scores" do
70
+ @aar_items_with_scores ||= begin
71
+ self.__send__(self.aar_options[:through]).collect {|ui|
72
+ item = ui.__send__(self.aar_options[:on_singular])
73
+ next unless item
74
+ if self.aar_options[:score]
75
+ score = ui.__send__(self.aar_options[:score]).to_f
76
+ score = 1.0 if !score or score <= 0
77
+ else
78
+ score = 1.0
79
+ end
80
+ def item.aar_score; @aar_score; end
81
+ def item.aar_score=(d); @aar_score = d; end
82
+ item.aar_score = score
83
+ item
84
+ }.compact.inject({}) {|h, item| h[item.id] = item; h }
85
+ end
86
+ end
87
+
88
+ end
89
+ end
90
+
91
+ module Logic
92
+
93
+ def self.matrix(options)
94
+ items = options[:on_class].pluck(:id)
95
+ prefs = {}
96
+ users = options[:class].includes(options[:on])
97
+ users.each do |user|
98
+ prefs[user.id] ||= {}
99
+ items.each do |item_id|
100
+ if user.aar_items_with_scores[item_id]
101
+ score = user.aar_items_with_scores[item_id].aar_score
102
+ prefs[user.id][item_id] = score
103
+ end
104
+ end
105
+ end
106
+ [items, prefs]
107
+ end
108
+
109
+ def self.inverted_matrix(options)
110
+ items = options[:on_class].pluck(:id)
111
+ prefs = {}
112
+ users = options[:class].includes(options[:on])
113
+ items.each do |item_id|
114
+ prefs[item_id] ||= {}
115
+ users.each do |user|
116
+ if user.aar_items_with_scores[item_id]
117
+ score = user.aar_items_with_scores[item_id].aar_score
118
+ prefs[item_id][user.id] = score
119
+ end
120
+ end
121
+ end
122
+ [users.collect(&:id), prefs]
123
+ end
124
+
125
+ # Euclidean distance
126
+ def self.sim_distance(prefs, items, person1, person2)
127
+ return 0 if items.length == 0
128
+
129
+ squares = []
130
+
131
+ items.each do |item|
132
+ squares << ((prefs[person1][item] || 0.0) - (prefs[person2][item] || 0.0)) ** 2
133
+ end
134
+
135
+ sum_of_squares = squares.inject { |sum,value| sum += value }
136
+ return 1/(1 + sum_of_squares)
137
+ end
138
+
139
+ # Pearson score
140
+ def self.sim_pearson(prefs, items, person1, person2)
141
+ n = items.length
142
+ return 0 if n == 0
143
+
144
+ sum1 = sum2 = sum1Sq = sum2Sq = pSum = 0.0
145
+
146
+ items.each do |item|
147
+ prefs1_item = prefs[person1][item] || 0.0
148
+ prefs2_item = prefs[person2][item] || 0.0
149
+ sum1 += prefs1_item
150
+ sum2 += prefs2_item
151
+ sum1Sq += prefs1_item ** 2
152
+ sum2Sq += prefs2_item ** 2
153
+ pSum += prefs2_item * prefs1_item
154
+ end
155
+
156
+ num = pSum - ( ( sum1 * sum2 ) / n )
157
+ den = Math.sqrt( ( sum1Sq - ( sum1 ** 2 ) / n ) * ( sum2Sq - ( sum2 ** 2 ) / n ) )
158
+
159
+ return 0 if den == 0
160
+
161
+ num / den
162
+ end
163
+
164
+ def self.similar_users(user, options)
165
+ rankings = []
166
+ items, prefs = self.matrix(options)
167
+ prefs.each do |u, _|
168
+ next if u == user.id
169
+ rankings << [self.__send__(options[:algorithm], prefs, items, user.id, u), u]
170
+ end
171
+
172
+ rankings = rankings.select {|score, _| score > options[:min_score] }
173
+ rankings = rankings.sort_by {|score, _| score }.reverse
174
+ rankings = rankings[0..(options[:limit] - 1)]
175
+
176
+ # Return the sorted list
177
+ ranking_ids = rankings.collect {|_, u| u }
178
+ ar_users = options[:class].find_some_without_failing(ranking_ids)
179
+ ar_users = ar_users.inject({}){ |h, user| h[user.id] = user; h }
180
+
181
+ rankings.collect {|score, user_id|
182
+ user = ar_users[user_id]
183
+ def user.similar_score; return @similar_score; end
184
+ def user.similar_score=(d); @similar_score = d; end
185
+ user.similar_score = score
186
+ user
187
+ }
188
+ end
189
+
190
+ def self.similar_items(item, options)
191
+ if options[:use_dataset]
192
+ if options[:split_dataset]
193
+ rankings = Rails.cache.read("aar_#{options[:on]}_#{item.id}")
194
+ else
195
+ cached_dataset = Rails.cache.read("aar_#{options[:on]}_dataset")
196
+ logger.warn 'ActsRecommendable has an empty dataset - rebuild it' unless cached_dataset
197
+ rankings = cached_dataset && cached_dataset[item.id]
198
+ end
199
+ else
200
+ users, prefs = self.inverted_matrix(options)
201
+ rankings = []
202
+ prefs.each do |i, _|
203
+ next if i == item.id
204
+ rankings << [self.__send__(options[:algorithm], prefs, users, item.id, i), i]
205
+ end
206
+ end
207
+ return [] unless rankings
208
+
209
+ rankings = rankings.select {|score, _| score > options[:min_score] }
210
+ rankings = rankings.sort_by {|score, _| score }.reverse
211
+ rankings = rankings[0..(options[:limit] - 1)]
212
+
213
+ # Return the sorted list
214
+ ranking_ids = rankings.collect {|_, u| u }
215
+ ar_items = options[:on_class].find_some_without_failing(ranking_ids)
216
+ ar_items = ar_items.inject({}){ |h, item| h[item.id] = item; h }
217
+
218
+ rankings.collect {|score, item_id|
219
+ item = ar_items[item_id]
220
+ def item.similar_score; return @similar_score; end
221
+ def item.similar_score=(d); @similar_score = d; end
222
+ item.similar_score = score
223
+ item
224
+ }
225
+ end
226
+
227
+ def self.recommended(user, options)
228
+ totals = {}
229
+ sim_sums = {}
230
+ items, prefs = self.matrix(options)
231
+ user = user.id
232
+ user_ratings = prefs[user]
233
+
234
+ prefs.keys.each do |other|
235
+ # don't compare me to myself
236
+ next if other == user
237
+
238
+ sim = self.__send__(options[:algorithm], prefs, items, user, other)
239
+
240
+ # ignore scores of zero or lower
241
+ next if sim <= 0
242
+
243
+ prefs[other].keys.each do |item|
244
+ if !prefs[user].include? item or prefs[user][item] == 0
245
+ # similarity * score
246
+ totals.default = 0
247
+ totals[item] += prefs[other][item] * sim
248
+ # sum of similarities
249
+ sim_sums.default = 0
250
+ sim_sums[item] += sim
251
+ end
252
+ end
253
+ end
254
+
255
+ # Create a normalized list
256
+ rankings = []
257
+ items = []
258
+ totals.each do |item,total|
259
+ rankings << [total/sim_sums[item], item]
260
+ end
261
+
262
+ # Return the sorted list
263
+ rankings = rankings.select {|score, _| score > options[:min_score] }
264
+ rankings = rankings.sort_by {|score, _| score }.reverse
265
+ rankings = rankings[0..(options[:limit] - 1)]
266
+
267
+ # So we can do everything in one SQL query
268
+ ranking_ids = rankings.collect {|_, i| i }
269
+ ar_items = options[:on_class].find_some_without_failing(ranking_ids)
270
+ ar_items = ar_items.inject({}){ |h, item| h[item.id] = item; h }
271
+
272
+ rankings.collect {|score, item_id|
273
+ item = ar_items[item_id]
274
+ def item.recommendation_score; return @recommendation_score; end
275
+ def item.recommendation_score=(d); @recommendation_score = d; end
276
+ item.recommendation_score = score
277
+ item
278
+ }
279
+ end
280
+
281
+ def self.generate_dataset(options, matrix = nil)
282
+ users, prefs = matrix || self.inverted_matrix(options)
283
+ for item in prefs.keys
284
+ scores = []
285
+ for other in prefs.keys
286
+ next if other == item
287
+ scores << [self.__send__(options[:algorithm], prefs, users, item, other), other]
288
+ end
289
+ scores = scores.sort_by {|score, _| score }.reverse
290
+ yield(item, scores) if block_given?
291
+ end
292
+ end
293
+
294
+ def self.dataset_recommended(user, options)
295
+ scores = {}
296
+ total_sim = {}
297
+ items = user.aar_items_with_scores
298
+ item_ids = items.values.collect(&:id)
299
+ unless options[:split_dataset]
300
+ cached_dataset = Rails.cache.read("aar_#{options[:on]}_dataset")
301
+ logger.warn 'ActsRecommendable has an empty dataset - rebuild it' unless cached_dataset
302
+ end
303
+
304
+ item_ids.each do |item_id|
305
+ if options[:split_dataset]
306
+ ratings = Rails.cache.read("aar_#{options[:on]}_#{item_id}")
307
+ else
308
+ ratings = cached_dataset && cached_dataset[item_id]
309
+ end
310
+ next unless ratings
311
+
312
+ ratings.each do |similarity, item2_id|
313
+ # Ignore if this user has already rated this item
314
+ next if item_ids.include?(item2_id)
315
+
316
+ scores[item2_id] ||= 0
317
+ total_sim[item2_id] ||= 0
318
+ if options[:score]
319
+ # Weighted sum of rating times similarity
320
+ scores[item2_id] += similarity * items[item_id].aar_score
321
+
322
+ # Sum of all the similarities
323
+ total_sim[item2_id] += similarity
324
+ else
325
+ scores[item2_id] += similarity
326
+ total_sim[item2_id] += 1.0
327
+ end
328
+ end
329
+ end
330
+
331
+ # Divide each total score by total weighting to get an average
332
+ rankings = []
333
+ scores.each do |item, score|
334
+ next unless score > 0.0
335
+ rankings << [score/total_sim[item], item]
336
+ end
337
+
338
+ rankings = rankings.select {|score, _| score > options[:min_score] }
339
+ rankings = rankings.sort_by {|score, _| score }.reverse
340
+ rankings = rankings[0..(options[:limit] - 1)]
341
+
342
+ # So we can do everything in one SQL query
343
+ ranking_ids = rankings.collect {|_, i| i }
344
+ ar_items = options[:on_class].find_some_without_failing(ranking_ids)
345
+ ar_items = ar_items.inject({}){ |h, item| h[item.id] = item; h }
346
+
347
+ rankings.collect {|score, item_id|
348
+ item = ar_items[item_id]
349
+ def item.recommendation_score; @recommendation_score; end
350
+ def item.recommendation_score=(d); @recommendation_score = d; end
351
+ item.recommendation_score = score
352
+ item
353
+ }
354
+ end
355
+
356
+ def self.logger
357
+ RAILS_DEFAULT_LOGGER
358
+ end
359
+
360
+ end
361
+
362
+ end
363
+ end
364
+
365
+ require 'railtie'