leobessa-acts_as_recommendable 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/MIT-LICENSE +20 -0
- data/README +123 -0
- data/Rakefile +40 -0
- data/VERSION +1 -0
- data/acts_as_recommendable.gemspec +48 -0
- data/init.rb +3 -0
- data/lib/active_record/acts/optimizations.rb +73 -0
- data/lib/active_record/acts/recommendable.rb +367 -0
- data/tasks/acts_as_recommendable_tasks.rake +55 -0
- data/test/.gitignore +3 -0
- data/test/recommendable_test.rb +133 -0
- metadata +65 -0
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
tmp/
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 Made by Many Ltd
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
ActsAsRecommendable
|
2
|
+
===================
|
3
|
+
|
4
|
+
ActsAsRecommendable is a plugin for Rails that simplifies collaborative filtering
|
5
|
+
|
6
|
+
The plugin provides a mechanism for finding loose associations between users and items which we can tell you
|
7
|
+
* Given a user, return other similar users based on what items they have all bought/bookmarked/rated/etc
|
8
|
+
* Given a user, return recommended items based on the items bought/bookmarked/rated/etc by that user and the items bought/bookmarked/rated/etc by other users.
|
9
|
+
|
10
|
+
The plugin calculations can be made online and offline and stored using the rails cache (such as memcache) for online retrieval. Online retrieval of recommendations uses item-based collaborative filtering using the offline items similarity matrix stored in the cache. This can give up-to-date results with a much lower processing overhead.
|
11
|
+
|
12
|
+
Much thanks to Toby Segaran and his excellent book Programming Collective Intelligence (http://oreilly.com/catalog/9780596529321/).
|
13
|
+
|
14
|
+
Features
|
15
|
+
========
|
16
|
+
|
17
|
+
Use join rating scores
|
18
|
+
Using abitary calculated scores
|
19
|
+
Similar Items
|
20
|
+
Recommended Users
|
21
|
+
Cached dataset
|
22
|
+
|
23
|
+
Current Release
|
24
|
+
===============
|
25
|
+
|
26
|
+
v0.1 should be considered early alpha and not ready for production applications.
|
27
|
+
|
28
|
+
Lots of performance optimisations still to be done.
|
29
|
+
|
30
|
+
Example
|
31
|
+
=======
|
32
|
+
|
33
|
+
class Book < ActiveRecord::Base
|
34
|
+
has_many :user_books
|
35
|
+
has_many :users, :through => :user_books
|
36
|
+
end
|
37
|
+
|
38
|
+
class UserBook < ActiveRecord::Base
|
39
|
+
belongs_to :book
|
40
|
+
belongs_to :user
|
41
|
+
end
|
42
|
+
|
43
|
+
class User < ActiveRecord::Base
|
44
|
+
has_many :user_books
|
45
|
+
has_many :books, :through => :user_books
|
46
|
+
acts_as_recommendable :books, :through => :user_books
|
47
|
+
end
|
48
|
+
|
49
|
+
user = User.find(:first)
|
50
|
+
user.similar_users #=> [...]
|
51
|
+
user.recommended_books #=> [...]
|
52
|
+
|
53
|
+
book = Book.find(:first)
|
54
|
+
book.similar_books #=> [...]
|
55
|
+
|
56
|
+
Example 2
|
57
|
+
=========
|
58
|
+
|
59
|
+
class Movie < ActiveRecord::Base
|
60
|
+
has_many :user_movies
|
61
|
+
has_many :users, :through => :user_movies
|
62
|
+
end
|
63
|
+
|
64
|
+
class UserMovie < ActiveRecord::Base
|
65
|
+
belongs_to :movie
|
66
|
+
belongs_to :user
|
67
|
+
end
|
68
|
+
|
69
|
+
class User < ActiveRecord::Base
|
70
|
+
has_many :user_movies
|
71
|
+
has_many :movies, :through => :user_movies
|
72
|
+
acts_as_recommendable :movies, :through => :user_movies, :score => :score
|
73
|
+
# 'score' is an attribute on the users_movies table
|
74
|
+
end
|
75
|
+
|
76
|
+
user = User.find(:first)
|
77
|
+
user.similar_users #=> [...]
|
78
|
+
user.recommended_movies #=> [...]
|
79
|
+
|
80
|
+
Example 3
|
81
|
+
=========
|
82
|
+
|
83
|
+
class Book < ActiveRecord::Base
|
84
|
+
has_many :user_books
|
85
|
+
has_many :users, :through => :user_books, :use_dataset => true
|
86
|
+
# Uses cached dataset
|
87
|
+
end
|
88
|
+
|
89
|
+
class UserBook < ActiveRecord::Base
|
90
|
+
belongs_to :book
|
91
|
+
belongs_to :user
|
92
|
+
end
|
93
|
+
|
94
|
+
class User < ActiveRecord::Base
|
95
|
+
has_many :user_books
|
96
|
+
has_many :books, :through => :user_books
|
97
|
+
acts_as_recommendable :books, :through => :user_books
|
98
|
+
end
|
99
|
+
|
100
|
+
user = User.find(:first)
|
101
|
+
user.recommended_books #=> [...]
|
102
|
+
|
103
|
+
# The example above uses a cached dataset.
|
104
|
+
# You need to generate a cached dataset every so often (depending on how much your content changes)
|
105
|
+
# You can do that by calling the rake task recommendations:build, you should run this with a cron job every so often.
|
106
|
+
|
107
|
+
|
108
|
+
# If you only want to use the dataset in production put this in production.rb:
|
109
|
+
User.aar_options[:use_dataset] = true
|
110
|
+
|
111
|
+
# Note:
|
112
|
+
# user.similar_users doesn't use the dataset
|
113
|
+
#
|
114
|
+
# The advantage of using a dataset is that you don't need to load all the users & items into
|
115
|
+
# memory (which you do normally). The disadvantage is that you won't get as accurate results.
|
116
|
+
#
|
117
|
+
|
118
|
+
Contact
|
119
|
+
=======
|
120
|
+
alex@madebymany.co.uk
|
121
|
+
|
122
|
+
|
123
|
+
Copyright (c) 2008 Made by Many Ltd, released under the MIT license
|
data/Rakefile
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
|
5
|
+
desc 'Default: run unit tests.'
|
6
|
+
task :default => :test
|
7
|
+
|
8
|
+
desc 'Test the acts_as_recommended plugin.'
|
9
|
+
Rake::TestTask.new(:test) do |test|
|
10
|
+
test.test_files = FileList.new('test/**/*_test.rb') do |list|
|
11
|
+
list.exclude 'test/test_helper.rb'
|
12
|
+
end
|
13
|
+
test.libs << 'test'
|
14
|
+
test.verbose = true
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'Generate documentation for the acts_as_recommendable plugin.'
|
18
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
19
|
+
rdoc.rdoc_dir = 'rdoc'
|
20
|
+
rdoc.title = 'ActsAsRecommendable'
|
21
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
22
|
+
rdoc.rdoc_files.include('README')
|
23
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
24
|
+
end
|
25
|
+
|
26
|
+
begin
|
27
|
+
require 'jeweler'
|
28
|
+
Jeweler::Tasks.new do |gemspec|
|
29
|
+
gemspec.name = "acts_as_recommendable"
|
30
|
+
gemspec.summary = "Colaborative Filtering Library"
|
31
|
+
gemspec.description = "Recommendation engine from the Programming Collective Inteligence book"
|
32
|
+
gemspec.email = "leobessa@gmail.com"
|
33
|
+
gemspec.homepage = "http://github.com/leobessa/acts_as_recommendable"
|
34
|
+
gemspec.authors = ["Leonardo Bessa"]
|
35
|
+
end
|
36
|
+
rescue LoadError
|
37
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
38
|
+
end
|
39
|
+
|
40
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.0
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{acts_as_recommendable}
|
5
|
+
s.version = "0.0.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Leonardo Bessa"]
|
9
|
+
s.date = %q{2009-08-07}
|
10
|
+
s.description = %q{Recommendation engine from the Programming Collective Inteligence book}
|
11
|
+
s.email = %q{leobessa@gmail.com}
|
12
|
+
s.extra_rdoc_files = [
|
13
|
+
"README"
|
14
|
+
]
|
15
|
+
s.files = [
|
16
|
+
".gitignore",
|
17
|
+
"MIT-LICENSE",
|
18
|
+
"README",
|
19
|
+
"Rakefile",
|
20
|
+
"VERSION",
|
21
|
+
"acts_as_recommendable.gemspec",
|
22
|
+
"init.rb",
|
23
|
+
"lib/active_record/acts/optimizations.rb",
|
24
|
+
"lib/active_record/acts/recommendable.rb",
|
25
|
+
"tasks/acts_as_recommendable_tasks.rake",
|
26
|
+
"test/.gitignore",
|
27
|
+
"test/recommendable_test.rb"
|
28
|
+
]
|
29
|
+
s.has_rdoc = true
|
30
|
+
s.homepage = %q{http://github.com/leobessa/acts_as_recommendable}
|
31
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
32
|
+
s.require_paths = ["lib"]
|
33
|
+
s.rubygems_version = %q{1.3.1}
|
34
|
+
s.summary = %q{Colaborative Filtering Library}
|
35
|
+
s.test_files = [
|
36
|
+
"test/recommendable_test.rb"
|
37
|
+
]
|
38
|
+
|
39
|
+
if s.respond_to? :specification_version then
|
40
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
41
|
+
s.specification_version = 2
|
42
|
+
|
43
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
44
|
+
else
|
45
|
+
end
|
46
|
+
else
|
47
|
+
end
|
48
|
+
end
|
data/init.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'inline'
|
2
|
+
module MadeByMany
|
3
|
+
module ActsAsRecommendable
|
4
|
+
class Optimizations
|
5
|
+
InlineC = Module.new do
|
6
|
+
inline do |builder|
|
7
|
+
builder.c '
|
8
|
+
#include <math.h>
|
9
|
+
#include "ruby.h"
|
10
|
+
double c_sim_pearson(VALUE items, int n, VALUE prefs1, VALUE prefs2) {
|
11
|
+
double sum1 = 0.0;
|
12
|
+
double sum2 = 0.0;
|
13
|
+
double sum1Sq = 0.0;
|
14
|
+
double sum2Sq = 0.0;
|
15
|
+
double pSum = 0.0;
|
16
|
+
|
17
|
+
VALUE *items_a = RARRAY(items) ->ptr;
|
18
|
+
|
19
|
+
int i;
|
20
|
+
for(i=0; i<n; i++) {
|
21
|
+
VALUE prefs1_item_ob;
|
22
|
+
VALUE prefs2_item_ob;
|
23
|
+
|
24
|
+
double prefs1_item;
|
25
|
+
double prefs2_item;
|
26
|
+
|
27
|
+
if (!st_lookup(RHASH(prefs1)->tbl, items_a[i], &prefs1_item_ob)) {
|
28
|
+
prefs1_item = 0.0;
|
29
|
+
} else {
|
30
|
+
prefs1_item = NUM2DBL(prefs1_item_ob);
|
31
|
+
}
|
32
|
+
|
33
|
+
if (!st_lookup(RHASH(prefs2)->tbl, items_a[i], &prefs2_item_ob)) {
|
34
|
+
prefs2_item = 0.0;
|
35
|
+
} else {
|
36
|
+
prefs2_item = NUM2DBL(prefs2_item_ob);
|
37
|
+
}
|
38
|
+
|
39
|
+
sum1 += prefs1_item;
|
40
|
+
sum2 += prefs2_item;
|
41
|
+
sum1Sq += pow(prefs1_item, 2);
|
42
|
+
sum2Sq += pow(prefs2_item, 2);
|
43
|
+
pSum += prefs2_item * prefs1_item;
|
44
|
+
}
|
45
|
+
|
46
|
+
double num;
|
47
|
+
double den;
|
48
|
+
num = pSum - ( ( sum1 * sum2 ) / n );
|
49
|
+
den = sqrt( ( sum1Sq - ( pow(sum1, 2) ) / n ) * ( sum2Sq - ( pow(sum2, 2) ) / n ) );
|
50
|
+
if(den == 0){
|
51
|
+
return 0.0;
|
52
|
+
} else {
|
53
|
+
return num / den;
|
54
|
+
}
|
55
|
+
}'
|
56
|
+
end
|
57
|
+
end
|
58
|
+
class << self
|
59
|
+
include InlineC
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
module Logic
|
64
|
+
# Pearson score
|
65
|
+
def self.sim_pearson(prefs, items, person1, person2)
|
66
|
+
n = items.length
|
67
|
+
return 0 if n == 0
|
68
|
+
Optimizations.c_sim_pearson(items, n, prefs[person1], prefs[person2])
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,367 @@
|
|
1
|
+
# ActsAsRecommended
|
2
|
+
module ActiveRecord
|
3
|
+
|
4
|
+
module Acts
|
5
|
+
|
6
|
+
module Recommendable
|
7
|
+
def self.included(base)
|
8
|
+
base.extend(ClassMethods)
|
9
|
+
end
|
10
|
+
|
11
|
+
module ClassMethods
|
12
|
+
# Send an array to ActiveRecord without fear that some elements don't exist.
|
13
|
+
def find_some_without_failing(ids, options = {})
|
14
|
+
return [] if !ids or ids.empty?
|
15
|
+
conditions = " AND (#{sanitize_sql(options[:conditions])})" if options[:conditions]
|
16
|
+
ids_list = ids.map { |id| quote_value(id,columns_hash[primary_key]) }.join(',')
|
17
|
+
options.update :conditions => "#{quoted_table_name}.#{connection.quote_column_name(primary_key)} IN (#{ids_list})#{conditions}"
|
18
|
+
result = find_every(options)
|
19
|
+
result
|
20
|
+
end
|
21
|
+
|
22
|
+
def acts_as_recommendable(on, options = {})
|
23
|
+
defaults = {
|
24
|
+
:algorithm => :sim_pearson,
|
25
|
+
:use_dataset => false,
|
26
|
+
:split_dataset => true,
|
27
|
+
:limit => 10,
|
28
|
+
:min_score => 0.0
|
29
|
+
}
|
30
|
+
|
31
|
+
options = defaults.merge(options)
|
32
|
+
|
33
|
+
# reflect on the specified association to derive the extra details we need
|
34
|
+
options[:on] = on
|
35
|
+
assoc = self.reflections[on.to_sym]
|
36
|
+
through_assoc = assoc.through_reflection
|
37
|
+
options[:through] = through_assoc.name
|
38
|
+
raise "No association specified to recommend." if assoc.nil?
|
39
|
+
raise "The #{on} association does not have a :through association" unless through_assoc
|
40
|
+
|
41
|
+
on_class_name = assoc.class_name
|
42
|
+
options[:on_singular] ||= on_class_name.underscore
|
43
|
+
options[:on_class] ||= assoc.klass
|
44
|
+
|
45
|
+
options[:class] = self
|
46
|
+
|
47
|
+
options[:through_singular] ||= through_assoc.class_name.downcase
|
48
|
+
options[:through_class] ||= through_assoc.klass
|
49
|
+
|
50
|
+
class_inheritable_accessor :aar_options
|
51
|
+
self.aar_options = options
|
52
|
+
|
53
|
+
options[:on_class].class_eval do
|
54
|
+
define_method "similar_#{options[:on]}" do
|
55
|
+
Logic.similar_items(self, options)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
define_method "similar_#{options[:class].name.underscore.pluralize}" do
|
60
|
+
Logic.similar_users(self, options)
|
61
|
+
end
|
62
|
+
|
63
|
+
define_method "recommended_#{options[:on_class].name.underscore.pluralize}" do
|
64
|
+
if self.aar_options[:use_dataset]
|
65
|
+
Logic.dataset_recommended(self, options)
|
66
|
+
else
|
67
|
+
Logic.recommended(self, options)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
define_method "aar_items_with_scores" do
|
72
|
+
@aar_items_with_scores ||= begin
|
73
|
+
self.__send__(self.aar_options[:through]).collect {|ui|
|
74
|
+
item = ui.__send__(self.aar_options[:on_singular])
|
75
|
+
next unless item
|
76
|
+
if self.aar_options[:score]
|
77
|
+
score = ui.__send__(self.aar_options[:score]).to_f
|
78
|
+
score = 1.0 if !score or score <= 0
|
79
|
+
else
|
80
|
+
score = 1.0
|
81
|
+
end
|
82
|
+
def item.aar_score; @aar_score; end
|
83
|
+
def item.aar_score=(d); @aar_score = d; end
|
84
|
+
item.aar_score = score
|
85
|
+
item
|
86
|
+
}.compact.inject({}) {|h, item| h[item.id] = item; h }
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
module Logic
|
94
|
+
|
95
|
+
def self.matrix(options)
|
96
|
+
items = options[:on_class].find(:all).collect(&:id)
|
97
|
+
prefs = {}
|
98
|
+
users = options[:class].find(:all, :include => options[:on])
|
99
|
+
users.each do |user|
|
100
|
+
prefs[user.id] ||= {}
|
101
|
+
items.each do |item_id|
|
102
|
+
if user.aar_items_with_scores[item_id]
|
103
|
+
score = user.aar_items_with_scores[item_id].aar_score
|
104
|
+
prefs[user.id][item_id] = score
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
[items, prefs]
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.inverted_matrix(options)
|
112
|
+
items = options[:on_class].find(:all).collect(&:id)
|
113
|
+
prefs = {}
|
114
|
+
users = options[:class].find(:all, :include => options[:on])
|
115
|
+
items.each do |item_id|
|
116
|
+
prefs[item_id] ||= {}
|
117
|
+
users.each do |user|
|
118
|
+
if user.aar_items_with_scores[item_id]
|
119
|
+
score = user.aar_items_with_scores[item_id].aar_score
|
120
|
+
prefs[item_id][user.id] = score
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
[users.collect(&:id), prefs]
|
125
|
+
end
|
126
|
+
|
127
|
+
# Euclidean distance
|
128
|
+
def self.sim_distance(prefs, items, person1, person2)
|
129
|
+
return 0 if items.length == 0
|
130
|
+
|
131
|
+
squares = []
|
132
|
+
|
133
|
+
items.each do |item|
|
134
|
+
squares << ((prefs[person1][item] || 0.0) - (prefs[person2][item] || 0.0)) ** 2
|
135
|
+
end
|
136
|
+
|
137
|
+
sum_of_squares = squares.inject { |sum,value| sum += value }
|
138
|
+
return 1/(1 + sum_of_squares)
|
139
|
+
end
|
140
|
+
|
141
|
+
# Pearson score
|
142
|
+
def self.sim_pearson(prefs, items, person1, person2)
|
143
|
+
n = items.length
|
144
|
+
return 0 if n == 0
|
145
|
+
|
146
|
+
sum1 = sum2 = sum1Sq = sum2Sq = pSum = 0.0
|
147
|
+
|
148
|
+
items.each do |item|
|
149
|
+
prefs1_item = prefs[person1][item] || 0.0
|
150
|
+
prefs2_item = prefs[person2][item] || 0.0
|
151
|
+
sum1 += prefs1_item
|
152
|
+
sum2 += prefs2_item
|
153
|
+
sum1Sq += prefs1_item ** 2
|
154
|
+
sum2Sq += prefs2_item ** 2
|
155
|
+
pSum += prefs2_item * prefs1_item
|
156
|
+
end
|
157
|
+
|
158
|
+
num = pSum - ( ( sum1 * sum2 ) / n )
|
159
|
+
den = Math.sqrt( ( sum1Sq - ( sum1 ** 2 ) / n ) * ( sum2Sq - ( sum2 ** 2 ) / n ) )
|
160
|
+
|
161
|
+
return 0 if den == 0
|
162
|
+
|
163
|
+
num / den
|
164
|
+
end
|
165
|
+
|
166
|
+
def self.similar_users(user, options)
|
167
|
+
rankings = []
|
168
|
+
items, prefs = self.matrix(options)
|
169
|
+
prefs.each do |u, _|
|
170
|
+
next if u == user.id
|
171
|
+
rankings << [self.__send__(options[:algorithm], prefs, items, user.id, u), u]
|
172
|
+
end
|
173
|
+
|
174
|
+
rankings = rankings.select {|score, _| score > options[:min_score] }
|
175
|
+
rankings = rankings.sort_by {|score, _| score }.reverse
|
176
|
+
rankings = rankings[0..(options[:limit] - 1)]
|
177
|
+
|
178
|
+
# Return the sorted list
|
179
|
+
ranking_ids = rankings.collect {|_, u| u }
|
180
|
+
ar_users = options[:class].find_some_without_failing(ranking_ids)
|
181
|
+
ar_users = ar_users.inject({}){ |h, user| h[user.id] = user; h }
|
182
|
+
|
183
|
+
rankings.collect {|score, user_id|
|
184
|
+
user = ar_users[user_id]
|
185
|
+
def user.similar_score; return @similar_score; end
|
186
|
+
def user.similar_score=(d); @similar_score = d; end
|
187
|
+
user.similar_score = score
|
188
|
+
user
|
189
|
+
}
|
190
|
+
end
|
191
|
+
|
192
|
+
def self.similar_items(item, options)
|
193
|
+
if options[:use_dataset]
|
194
|
+
if options[:split_dataset]
|
195
|
+
rankings = Rails.cache.read("aar_#{options[:on]}_#{item.id}")
|
196
|
+
else
|
197
|
+
cached_dataset = Rails.cache.read("aar_#{options[:on]}_dataset")
|
198
|
+
logger.warn 'ActsRecommendable has an empty dataset - rebuild it' unless cached_dataset
|
199
|
+
rankings = cached_dataset && cached_dataset[item.id]
|
200
|
+
end
|
201
|
+
else
|
202
|
+
users, prefs = self.inverted_matrix(options)
|
203
|
+
rankings = []
|
204
|
+
prefs.each do |i, _|
|
205
|
+
next if i == item.id
|
206
|
+
rankings << [self.__send__(options[:algorithm], prefs, users, item.id, i), i]
|
207
|
+
end
|
208
|
+
end
|
209
|
+
return [] unless rankings
|
210
|
+
|
211
|
+
rankings = rankings.select {|score, _| score > options[:min_score] }
|
212
|
+
rankings = rankings.sort_by {|score, _| score }.reverse
|
213
|
+
rankings = rankings[0..(options[:limit] - 1)]
|
214
|
+
|
215
|
+
# Return the sorted list
|
216
|
+
ranking_ids = rankings.collect {|_, u| u }
|
217
|
+
ar_items = options[:on_class].find_some_without_failing(ranking_ids)
|
218
|
+
ar_items = ar_items.inject({}){ |h, item| h[item.id] = item; h }
|
219
|
+
|
220
|
+
rankings.collect {|score, item_id|
|
221
|
+
item = ar_items[item_id]
|
222
|
+
def item.similar_score; return @similar_score; end
|
223
|
+
def item.similar_score=(d); @similar_score = d; end
|
224
|
+
item.similar_score = score
|
225
|
+
item
|
226
|
+
}
|
227
|
+
end
|
228
|
+
|
229
|
+
def self.recommended(user, options)
|
230
|
+
totals = {}
|
231
|
+
sim_sums = {}
|
232
|
+
items, prefs = self.matrix(options)
|
233
|
+
user = user.id
|
234
|
+
user_ratings = prefs[user]
|
235
|
+
|
236
|
+
prefs.keys.each do |other|
|
237
|
+
# don't compare me to myself
|
238
|
+
next if other == user
|
239
|
+
|
240
|
+
sim = self.__send__(options[:algorithm], prefs, items, user, other)
|
241
|
+
|
242
|
+
# ignore scores of zero or lower
|
243
|
+
next if sim <= 0
|
244
|
+
|
245
|
+
prefs[other].keys.each do |item|
|
246
|
+
if !prefs[user].include? item or prefs[user][item] == 0
|
247
|
+
# similarity * score
|
248
|
+
totals.default = 0
|
249
|
+
totals[item] += prefs[other][item] * sim
|
250
|
+
# sum of similarities
|
251
|
+
sim_sums.default = 0
|
252
|
+
sim_sums[item] += sim
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
# Create a normalized list
|
258
|
+
rankings = []
|
259
|
+
items = []
|
260
|
+
totals.each do |item,total|
|
261
|
+
rankings << [total/sim_sums[item], item]
|
262
|
+
end
|
263
|
+
|
264
|
+
# Return the sorted list
|
265
|
+
rankings = rankings.select {|score, _| score > options[:min_score] }
|
266
|
+
rankings = rankings.sort_by {|score, _| score }.reverse
|
267
|
+
rankings = rankings[0..(options[:limit] - 1)]
|
268
|
+
|
269
|
+
# So we can do everything in one SQL query
|
270
|
+
ranking_ids = rankings.collect {|_, i| i }
|
271
|
+
ar_items = options[:on_class].find_some_without_failing(ranking_ids)
|
272
|
+
ar_items = ar_items.inject({}){ |h, item| h[item.id] = item; h }
|
273
|
+
|
274
|
+
rankings.collect {|score, item_id|
|
275
|
+
item = ar_items[item_id]
|
276
|
+
def item.recommendation_score; return @recommendation_score; end
|
277
|
+
def item.recommendation_score=(d); @recommendation_score = d; end
|
278
|
+
item.recommendation_score = score
|
279
|
+
item
|
280
|
+
}
|
281
|
+
end
|
282
|
+
|
283
|
+
def self.generate_dataset(options, matrix = nil)
|
284
|
+
users, prefs = matrix || self.inverted_matrix(options)
|
285
|
+
for item in prefs.keys
|
286
|
+
scores = []
|
287
|
+
for other in prefs.keys
|
288
|
+
next if other == item
|
289
|
+
scores << [self.__send__(options[:algorithm], prefs, users, item, other), other]
|
290
|
+
end
|
291
|
+
scores = scores.sort_by {|score, _| score }.reverse
|
292
|
+
yield(item, scores) if block_given?
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
def self.dataset_recommended(user, options)
|
297
|
+
scores = {}
|
298
|
+
total_sim = {}
|
299
|
+
items = user.aar_items_with_scores
|
300
|
+
item_ids = items.values.collect(&:id)
|
301
|
+
unless options[:split_dataset]
|
302
|
+
cached_dataset = Rails.cache.read("aar_#{options[:on]}_dataset")
|
303
|
+
logger.warn 'ActsRecommendable has an empty dataset - rebuild it' unless cached_dataset
|
304
|
+
end
|
305
|
+
|
306
|
+
item_ids.each do |item_id|
|
307
|
+
if options[:split_dataset]
|
308
|
+
ratings = Rails.cache.read("aar_#{options[:on]}_#{item_id}")
|
309
|
+
else
|
310
|
+
ratings = cached_dataset && cached_dataset[item_id]
|
311
|
+
end
|
312
|
+
next unless ratings
|
313
|
+
|
314
|
+
ratings.each do |similarity, item2_id|
|
315
|
+
# Ignore if this user has already rated this item
|
316
|
+
next if item_ids.include?(item2_id)
|
317
|
+
|
318
|
+
scores[item2_id] ||= 0
|
319
|
+
total_sim[item2_id] ||= 0
|
320
|
+
if options[:score]
|
321
|
+
# Weighted sum of rating times similarity
|
322
|
+
scores[item2_id] += similarity * items[item_id].aar_score
|
323
|
+
|
324
|
+
# Sum of all the similarities
|
325
|
+
total_sim[item2_id] += similarity
|
326
|
+
else
|
327
|
+
scores[item2_id] += similarity
|
328
|
+
total_sim[item2_id] += 1.0
|
329
|
+
end
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
# Divide each total score by total weighting to get an average
|
334
|
+
rankings = []
|
335
|
+
scores.each do |item, score|
|
336
|
+
next unless score > 0.0
|
337
|
+
rankings << [score/total_sim[item], item]
|
338
|
+
end
|
339
|
+
|
340
|
+
rankings = rankings.select {|score, _| score > options[:min_score] }
|
341
|
+
rankings = rankings.sort_by {|score, _| score }.reverse
|
342
|
+
rankings = rankings[0..(options[:limit] - 1)]
|
343
|
+
|
344
|
+
# So we can do everything in one SQL query
|
345
|
+
ranking_ids = rankings.collect {|_, i| i }
|
346
|
+
ar_items = options[:on_class].find_some_without_failing(ranking_ids)
|
347
|
+
ar_items = ar_items.inject({}){ |h, item| h[item.id] = item; h }
|
348
|
+
|
349
|
+
rankings.collect {|score, item_id|
|
350
|
+
item = ar_items[item_id]
|
351
|
+
def item.recommendation_score; @recommendation_score; end
|
352
|
+
def item.recommendation_score=(d); @recommendation_score = d; end
|
353
|
+
item.recommendation_score = score
|
354
|
+
item
|
355
|
+
}
|
356
|
+
end
|
357
|
+
|
358
|
+
def self.logger
|
359
|
+
RAILS_DEFAULT_LOGGER
|
360
|
+
end
|
361
|
+
|
362
|
+
end
|
363
|
+
|
364
|
+
end
|
365
|
+
|
366
|
+
end
|
367
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
namespace :recommendations do
|
2
|
+
task :build => [:environment] do
|
3
|
+
MadeByMany::ActsAsRecommendable::Logic.module_eval do
|
4
|
+
# This will need to change to your specific model:
|
5
|
+
options = User.aar_options
|
6
|
+
|
7
|
+
puts 'Finding items...'
|
8
|
+
|
9
|
+
# You may want to optimize this SQL, like this:
|
10
|
+
# items = options[:on_class].connection.select_values("SELECT id from #{options[:on_class].table_name}").collect(&:to_i)
|
11
|
+
items = options[:on_class].find(:all).collect(&:id)
|
12
|
+
|
13
|
+
prefs = {}
|
14
|
+
|
15
|
+
puts 'Finding users...'
|
16
|
+
|
17
|
+
# You may want to optimize this SQL
|
18
|
+
users = options[:class].find(:all, :include => options[:on])
|
19
|
+
|
20
|
+
pbar = MadeByMany::ProgressBar.new('Gen matrix', items.length)
|
21
|
+
items.each do |item_id|
|
22
|
+
prefs[item_id] ||= {}
|
23
|
+
users.each do |user|
|
24
|
+
if user.aar_items_with_scores[item_id]
|
25
|
+
score = user.aar_items_with_scores[item_id].aar_score
|
26
|
+
prefs[item_id][user.id] = score
|
27
|
+
end
|
28
|
+
end
|
29
|
+
pbar.inc
|
30
|
+
end
|
31
|
+
pbar.finish
|
32
|
+
matrix = [users.collect(&:id), prefs]
|
33
|
+
|
34
|
+
pbar = MadeByMany::ProgressBar.new('Gen dataset', prefs.keys.length)
|
35
|
+
|
36
|
+
if options[:split_dataset]
|
37
|
+
generate_dataset(options, matrix) {|item, scores|
|
38
|
+
Rails.cache.write("aar_#{options[:on]}_#{item}", scores)
|
39
|
+
pbar.inc
|
40
|
+
}
|
41
|
+
else
|
42
|
+
result = {}
|
43
|
+
generate_dataset(options, matrix) {|item, scores|
|
44
|
+
result[item] = scores
|
45
|
+
pbar.inc
|
46
|
+
}
|
47
|
+
Rails.cache.write("aar_#{options[:on]}_dataset", result)
|
48
|
+
end
|
49
|
+
|
50
|
+
pbar.finish
|
51
|
+
|
52
|
+
puts 'Rebuild successful'
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/test/.gitignore
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
gem 'activerecord', '>= 1.15.4.7794'
|
5
|
+
gem 'rails'
|
6
|
+
require 'active_record'
|
7
|
+
|
8
|
+
require "#{File.dirname(__FILE__)}/../init"
|
9
|
+
|
10
|
+
ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :dbfile => ":memory:")
|
11
|
+
|
12
|
+
def setup_db
|
13
|
+
ActiveRecord::Schema.define(:version => 1) do
|
14
|
+
create_table "books", :force => true do |t|
|
15
|
+
t.string "name"
|
16
|
+
t.datetime "created_at"
|
17
|
+
t.datetime "updated_at"
|
18
|
+
end
|
19
|
+
|
20
|
+
create_table "reviews", :force => true do |t|
|
21
|
+
t.integer "user_id", :null => false
|
22
|
+
t.integer "book_id", :null => false
|
23
|
+
t.integer "score", :default => 0
|
24
|
+
end
|
25
|
+
|
26
|
+
create_table "users", :force => true do |t|
|
27
|
+
t.string "name"
|
28
|
+
t.datetime "created_at"
|
29
|
+
t.datetime "updated_at"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def teardown_db
|
35
|
+
ActiveRecord::Base.connection.tables.each do |table|
|
36
|
+
ActiveRecord::Base.connection.drop_table(table)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class Book < ActiveRecord::Base
|
41
|
+
has_many :reviews
|
42
|
+
has_many :users, :through => :reviews
|
43
|
+
end
|
44
|
+
|
45
|
+
class Review < ActiveRecord::Base
|
46
|
+
belongs_to :book
|
47
|
+
belongs_to :user
|
48
|
+
end
|
49
|
+
|
50
|
+
class User < ActiveRecord::Base
|
51
|
+
has_many :reviews
|
52
|
+
has_many :books, :through => :reviews
|
53
|
+
acts_as_recommendable :books, :through => :reviews
|
54
|
+
end
|
55
|
+
|
56
|
+
class RecommendableTest < Test::Unit::TestCase
|
57
|
+
|
58
|
+
def setup
|
59
|
+
setup_db
|
60
|
+
@leo = User.create :name => 'Leo'
|
61
|
+
@bru = User.create :name => 'Bru'
|
62
|
+
@carol = User.create :name => 'Carol'
|
63
|
+
@ipod = Book.create :name => 'ipod'
|
64
|
+
@wii = Book.create :name => 'wii'
|
65
|
+
@bla = Book.create :name => 'Dr. bla'
|
66
|
+
Review.create(:book => @ipod, :user =>@leo)
|
67
|
+
Review.create(:book => @wii, :user => @leo)
|
68
|
+
Review.create(:book => @wii, :user => @bru)
|
69
|
+
Review.create(:book => @bla, :user => @carol)
|
70
|
+
end
|
71
|
+
|
72
|
+
def teardown
|
73
|
+
teardown_db
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_available_methods
|
77
|
+
user = User.new
|
78
|
+
assert_not_nil user
|
79
|
+
assert_respond_to user, :similar_users
|
80
|
+
assert_respond_to user, :recommended_books
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_similar_users
|
84
|
+
sim_users = @leo.similar_users
|
85
|
+
assert sim_users.include?(@bru)
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_similar_users_format
|
89
|
+
sim_users = @leo.similar_users
|
90
|
+
assert_kind_of Array, sim_users
|
91
|
+
assert_kind_of User, sim_users.first
|
92
|
+
assert_kind_of Numeric, sim_users.first.similar_score
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_similar_users_results
|
96
|
+
sim_users = @leo.similar_users
|
97
|
+
assert sim_users.include?(@bru)
|
98
|
+
assert_respond_to sim_users[0], :similar_score
|
99
|
+
assert !sim_users.include?(@carol)
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_similar_users_scores
|
103
|
+
sim_users = @leo.similar_users
|
104
|
+
assert_respond_to sim_users[0], :similar_score
|
105
|
+
assert sim_users[0].similar_score > 0
|
106
|
+
end
|
107
|
+
|
108
|
+
def test_recommended_books
|
109
|
+
recommended_books = @leo.recommended_books
|
110
|
+
assert_not_nil recommended_books
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_recommended_books_format
|
114
|
+
recommended_books = @bru.recommended_books
|
115
|
+
assert_kind_of Array, recommended_books
|
116
|
+
assert_kind_of Book, recommended_books.first
|
117
|
+
assert_kind_of Numeric, recommended_books.first.recommendation_score
|
118
|
+
end
|
119
|
+
|
120
|
+
def test_recommended_books_results
|
121
|
+
recommended_books = @bru.recommended_books
|
122
|
+
assert_equal true, recommended_books.include?(@ipod)
|
123
|
+
assert recommended_books.find {|b| b == @ipod }.recommendation_score > 0
|
124
|
+
end
|
125
|
+
|
126
|
+
def test_recommended_books_scores
|
127
|
+
recommended_books = @bru.recommended_books
|
128
|
+
assert_respond_to recommended_books[0], :recommendation_score
|
129
|
+
assert recommended_books[0].recommendation_score > 0
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: leobessa-acts_as_recommendable
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Leonardo Bessa
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-08-07 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Recommendation engine from the Programming Collective Inteligence book
|
17
|
+
email: leobessa@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README
|
24
|
+
files:
|
25
|
+
- .gitignore
|
26
|
+
- MIT-LICENSE
|
27
|
+
- README
|
28
|
+
- Rakefile
|
29
|
+
- VERSION
|
30
|
+
- acts_as_recommendable.gemspec
|
31
|
+
- init.rb
|
32
|
+
- lib/active_record/acts/optimizations.rb
|
33
|
+
- lib/active_record/acts/recommendable.rb
|
34
|
+
- tasks/acts_as_recommendable_tasks.rake
|
35
|
+
- test/.gitignore
|
36
|
+
- test/recommendable_test.rb
|
37
|
+
has_rdoc: true
|
38
|
+
homepage: http://github.com/leobessa/acts_as_recommendable
|
39
|
+
licenses:
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options:
|
42
|
+
- --charset=UTF-8
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: "0"
|
50
|
+
version:
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: "0"
|
56
|
+
version:
|
57
|
+
requirements: []
|
58
|
+
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 1.3.5
|
61
|
+
signing_key:
|
62
|
+
specification_version: 2
|
63
|
+
summary: Colaborative Filtering Library
|
64
|
+
test_files:
|
65
|
+
- test/recommendable_test.rb
|