recommendify-ruby 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,51 @@
1
+ module Recommendify::CCMatrix
2
+
3
+ def ccmatrix
4
+ @ccmatrix ||= Recommendify::SparseMatrix.new(
5
+ :redis_prefix => @opts.fetch(:redis_prefix),
6
+ :key => [@opts.fetch(:key), :ccmatrix].join(":")
7
+ )
8
+ end
9
+
10
+ def add_set(set_id, item_ids)
11
+ # FIXPAUL: forbid | and : in item_ids
12
+ item_ids.each do |item_id|
13
+ item_count_incr(item_id)
14
+ end
15
+ all_pairs(item_ids).map do |pair|
16
+ i1, i2 = pair.split(":")
17
+ ccmatrix.incr(i1, i2)
18
+ end
19
+ end
20
+
21
+ def add_single(set_id, item_id, other_item_ids)
22
+ item_count_incr(item_id)
23
+ other_item_ids.each do |other_item|
24
+ ccmatrix.incr(item_id, other_item)
25
+ end
26
+ end
27
+
28
+ def all_items
29
+ Recommendify.redis.hkeys(redis_key(:items))
30
+ end
31
+
32
+ def delete_item(item_id)
33
+ Recommendify.redis.hdel(redis_key(:items), item_id)
34
+ ccmatrix.send(:k_delall, item_id)
35
+ end
36
+
37
+ private
38
+
39
+ def all_pairs(keys)
40
+ keys.map{ |k1| (keys-[k1]).map{ |k2| [k1,k2].sort.join(":") } }.flatten.uniq
41
+ end
42
+
43
+ def item_count_incr(key)
44
+ Recommendify.redis.hincrby(redis_key(:items), key, 1)
45
+ end
46
+
47
+ def item_count(key)
48
+ Recommendify.redis.hget(redis_key(:items), key).to_i
49
+ end
50
+
51
+ end
@@ -0,0 +1,7 @@
1
+ class Recommendify::CosineInputMatrix < Recommendify::InputMatrix
2
+
3
+ include Recommendify::CCMatrix
4
+
5
+ # here be dragons ;)
6
+
7
+ end
@@ -0,0 +1,52 @@
1
+ class Recommendify::InputMatrix
2
+
3
+ def self.create(opts)
4
+ klass = "#{Recommendify.capitalize(opts[:similarity_func])}InputMatrix"
5
+ Recommendify.constantize(klass.intern).new(opts)
6
+ end
7
+
8
+ def initialize(opts)
9
+ @opts = opts
10
+ end
11
+
12
+ def redis_key(append=nil)
13
+ [@opts.fetch(:redis_prefix), @opts.fetch(:key), append].flatten.compact.join(":")
14
+ end
15
+
16
+ def weight
17
+ (@opts[:weight] || 1).to_f
18
+ end
19
+
20
+ # add a set of item_ids to the matrix
21
+ def add_set(set_id, item_ids)
22
+ raise "implemented in subclass"
23
+ end
24
+
25
+ # add a single item to a set of item_ids to the matrix
26
+ def add_single(set_id, item_id, other_item_ids)
27
+ raise "implemented in subclass"
28
+ end
29
+
30
+ # calculate the similarity between item1 and item1 (0.0-1.0)
31
+ def similarity(item1, item2)
32
+ raise "implemented in subclass"
33
+ end
34
+
35
+ # calculate all similarities to other items in the matrix for item1
36
+ def similarities_for(item1)
37
+ # return => [ ["item23", 0.6], ["item42", 0.23], (...) ]
38
+ raise "implemented in subclass"
39
+ end
40
+
41
+ # retrieve all item_ids in the matrix
42
+ def all_items
43
+ # retzrb => [ "item23", "item42", "item17", (...) ]
44
+ raise "implemented in subclass"
45
+ end
46
+
47
+ # delete item_id from the matrix
48
+ def delete_item(item_id)
49
+ raise "implemented in subclass"
50
+ end
51
+
52
+ end
@@ -0,0 +1,62 @@
1
+ class Recommendify::JaccardInputMatrix < Recommendify::InputMatrix
2
+
3
+ include Recommendify::CCMatrix
4
+
5
+ def initialize(opts={})
6
+ check_native if opts[:native]
7
+ super(opts)
8
+ end
9
+
10
+ def similarity(item1, item2)
11
+ calculate_jaccard_cached(item1, item2)
12
+ end
13
+
14
+ def similarities_for(item1)
15
+ return run_native(item1) if @opts[:native]
16
+ calculate_similarities(item1)
17
+ end
18
+
19
+ private
20
+
21
+ def calculate_similarities(item1)
22
+ (all_items - [item1]).map do |item2|
23
+ [item2, similarity(item1, item2)]
24
+ end
25
+ end
26
+
27
+ def calculate_jaccard_cached(item1, item2)
28
+ val = ccmatrix[item1, item2]
29
+ val.to_f / (item_count(item1)+item_count(item2)-val).to_f
30
+ end
31
+
32
+ def calculate_jaccard(set1, set2)
33
+ (set1&set2).length.to_f / (set1 + set2).uniq.length.to_f
34
+ end
35
+
36
+ def run_native(item_id)
37
+ res = %x{#{native_path} --jaccard "#{redis_key}" "#{item_id}" "#{redis_url}"}
38
+ raise "error: dirty exit (#{$?})" if $? != 0
39
+ res.split("\n").map do |line|
40
+ sim = line.match(/OUT: \(([^\)]*)\) \(([^\)]*)\)/)
41
+ unless sim
42
+ raise "error: #{res}" unless (res||"").include?('exit:')
43
+ else
44
+ [sim[1], sim[2].to_f]
45
+ end
46
+ end.compact
47
+ end
48
+
49
+ def check_native
50
+ return true if ::File.exists?(native_path)
51
+ raise "recommendify_native not found - you need to run rake build_native first"
52
+ end
53
+
54
+ def native_path
55
+ ::File.expand_path('../../../bin/recommendify', __FILE__)
56
+ end
57
+
58
+ def redis_url
59
+ Recommendify.redis.client.location
60
+ end
61
+
62
+ end
@@ -0,0 +1,19 @@
1
+ class Recommendify::Neighbor
2
+
3
+ def initialize(data)
4
+ @data = data
5
+ end
6
+
7
+ def item_id
8
+ @data.fetch(:item_id).to_s
9
+ end
10
+
11
+ def similarity
12
+ @data.fetch(:similarity)
13
+ end
14
+
15
+ def <=>(other)
16
+ other.similarity <=> self.similarity
17
+ end
18
+
19
+ end
@@ -0,0 +1,25 @@
1
+ module Recommendify
2
+
3
+ DEFAULT_MAX_NEIGHBORS = 50
4
+
5
+ @@redis = nil
6
+
7
+ def self.redis=(redis)
8
+ @@redis = redis
9
+ end
10
+
11
+ def self.redis
12
+ return @@redis unless @@redis.nil?
13
+ raise "redis not configured! - Recommendify.redis = Redis.new"
14
+ end
15
+
16
+ def self.capitalize(str_or_sym)
17
+ str = str_or_sym.to_s.each_char.to_a
18
+ str.first.upcase + str[1..-1].join("").downcase
19
+ end
20
+
21
+ def self.constantize(klass)
22
+ Object.module_eval("Recommendify::#{klass}", __FILE__, __LINE__)
23
+ end
24
+
25
+ end
@@ -0,0 +1,62 @@
1
+ class Recommendify::SimilarityMatrix
2
+
3
+ attr_reader :write_queue
4
+
5
+ def initialize(opts={})
6
+ @opts = opts
7
+ @write_queue = Hash.new{ |h,k| h[k] = {} }
8
+ end
9
+
10
+ def redis_key(append=nil)
11
+ [@opts.fetch(:redis_prefix), @opts.fetch(:key), append].flatten.compact.join(":")
12
+ end
13
+
14
+ def max_neighbors
15
+ @opts[:max_neighbors] || Recommendify::DEFAULT_MAX_NEIGHBORS
16
+ end
17
+
18
+ def update(item_id, neighbors)
19
+ neighbors.each do |neighbor_id, score|
20
+ if @write_queue[item_id].has_key?(neighbor_id)
21
+ @write_queue[item_id][neighbor_id] += score
22
+ else
23
+ @write_queue[item_id][neighbor_id] = score
24
+ end
25
+ end
26
+ end
27
+
28
+ def [](item_id)
29
+ if @write_queue.has_key?(item_id)
30
+ @write_queue[item_id]
31
+ else
32
+ retrieve_item(item_id)
33
+ end
34
+ end
35
+
36
+ def commit_item!(item_id)
37
+ serialized = serialize_item(item_id)
38
+ Recommendify.redis.hset(redis_key, item_id, serialized)
39
+ @write_queue.delete(item_id)
40
+ end
41
+
42
+ # optimize: the items are already stored in a sorted fashion. we shouldn't
43
+ # throw away this info by storing them in a hash (and re-sorting later). maybe
44
+ # use activesupport's orderedhash?
45
+ def retrieve_item(item_id)
46
+ data = Recommendify.redis.hget(redis_key, item_id)
47
+ return {} if data.nil?
48
+ Hash[data.split("|").map{ |i| (k,s=i.split(":")) && [k,s.to_f] }]
49
+ end
50
+
51
+ private
52
+
53
+ # optimize: implement a better sort. never add more than 50 items the the array
54
+ def serialize_item(item_id, max_precision=5)
55
+ items = @write_queue[item_id].to_a
56
+ items.sort!{ |a,b| b[1] <=> a[1] }
57
+ items = items[0..max_neighbors-1]
58
+ items = items.map{ |i,s| s>0 ? "#{i}:#{s.to_s[0..max_precision]}" : nil }
59
+ items.compact * "|"
60
+ end
61
+
62
+ end
@@ -0,0 +1,53 @@
1
+ class Recommendify::SparseMatrix
2
+
3
+ def initialize(opts={})
4
+ @opts = opts
5
+ end
6
+
7
+ def redis_key
8
+ [@opts.fetch(:redis_prefix), @opts.fetch(:key)].join(":")
9
+ end
10
+
11
+ def [](x,y)
12
+ k_get(key(x,y))
13
+ end
14
+
15
+ def []=(x,y,v)
16
+ v == 0 ? k_del(key(x,y)) : k_set(key(x,y), v)
17
+ end
18
+
19
+ def incr(x,y)
20
+ k_incr(key(x,y))
21
+ end
22
+
23
+ private
24
+
25
+ def key(x,y)
26
+ [x,y].sort.join(":")
27
+ end
28
+
29
+ def k_set(key, val)
30
+ Recommendify.redis.hset(redis_key, key, val)
31
+ end
32
+
33
+ def k_del(key)
34
+ Recommendify.redis.hdel(redis_key, key)
35
+ end
36
+
37
+ def k_get(key)
38
+ Recommendify.redis.hget(redis_key, key).to_f
39
+ end
40
+
41
+ def k_incr(key)
42
+ Recommendify.redis.hincrby(redis_key, key, 1)
43
+ end
44
+
45
+ # OPTIMIZE: use scripting/lua in redis 2.6
46
+ def k_delall(*keys)
47
+ Recommendify.redis.hkeys(redis_key).each do |iikey|
48
+ next unless (iikey.split(":") & keys).size > 0
49
+ Recommendify.redis.hdel(redis_key, iikey)
50
+ end
51
+ end
52
+
53
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "recommendify-ruby"
6
+ s.version = "0.3.8"
7
+ s.date = Date.today.to_s
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Paul Asmuth", "Raj Shah"]
10
+ s.email = ["paul@paulasmuth.com", "brainix@gmail.com"]
11
+ s.homepage = "http://github.com/brainix/recommendify"
12
+ s.summary = %q{ruby/redis based recommendation engine (collaborative filtering)}
13
+ s.description = %q{Recommendify is a distributed, incremental item-based recommendation engine for binary input ratings. It's based on ruby and redis and uses an approach called "Collaborative Filtering"}
14
+ s.licenses = ["MIT"]
15
+
16
+ # s.extensions = ['ext/extconf.rb']
17
+
18
+ s.add_dependency "redis", ">= 2.2.2"
19
+
20
+ s.add_development_dependency "rspec", "~> 2.8.0"
21
+
22
+ s.files = `git ls-files`.split("\n") - [".gitignore", ".rspec", ".travis.yml"]
23
+ s.test_files = `git ls-files -- spec/*`.split("\n")
24
+ s.require_paths = ["lib"]
25
+ end
data/spec/base_spec.rb ADDED
@@ -0,0 +1,188 @@
1
+ require ::File.expand_path('../spec_helper', __FILE__)
2
+
3
+ describe Recommendify::Base do
4
+
5
+ before(:each) do
6
+ flush_redis!
7
+ Recommendify::Base.send(:class_variable_set, :@@max_neighbors, nil)
8
+ Recommendify::Base.send(:class_variable_set, :@@input_matrices, {})
9
+ end
10
+
11
+ describe "configuration" do
12
+
13
+ it "should return default max_neighbors if not configured" do
14
+ Recommendify::DEFAULT_MAX_NEIGHBORS.should == 50
15
+ sm = Recommendify::Base.new
16
+ sm.max_neighbors.should == 50
17
+ end
18
+
19
+ it "should remember max_neighbors if configured" do
20
+ Recommendify::Base.max_neighbors(23)
21
+ sm = Recommendify::Base.new
22
+ sm.max_neighbors.should == 23
23
+ end
24
+
25
+ it "should add an input_matrix by 'key'" do
26
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
27
+ Recommendify::Base.send(:class_variable_get, :@@input_matrices).keys.should == [:myinput]
28
+ end
29
+
30
+ it "should retrieve an input_matrix on a new instance" do
31
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
32
+ sm = Recommendify::Base.new
33
+ lambda{ sm.myinput }.should_not raise_error
34
+ end
35
+
36
+ it "should retrieve an input_matrix on a new instance and correctly overload respond_to?" do
37
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
38
+ sm = Recommendify::Base.new
39
+ sm.respond_to?(:process!).should be_true
40
+ sm.respond_to?(:myinput).should be_true
41
+ sm.respond_to?(:fnord).should be_false
42
+ end
43
+
44
+ it "should retrieve an input_matrix on a new instance and intialize the correct class" do
45
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
46
+ sm = Recommendify::Base.new
47
+ sm.myinput.should be_a(Recommendify::JaccardInputMatrix)
48
+ end
49
+
50
+ end
51
+
52
+ describe "process_item!" do
53
+
54
+ it "should call similarities_for on each input_matrix" do
55
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard)
56
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
57
+ sm = Recommendify::Base.new
58
+ sm.myfirstinput.should_receive(:similarities_for).with("fnorditem").and_return([["fooitem",0.5]])
59
+ sm.mysecondinput.should_receive(:similarities_for).with("fnorditem").and_return([["fooitem",0.5]])
60
+ sm.similarity_matrix.stub!(:update)
61
+ sm.process_item!("fnorditem")
62
+ end
63
+
64
+ it "should call similarities_for on each input_matrix and add all outputs to the similarity matrix" do
65
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard)
66
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
67
+ sm = Recommendify::Base.new
68
+ sm.myfirstinput.should_receive(:similarities_for).and_return([["fooitem",0.5]])
69
+ sm.mysecondinput.should_receive(:similarities_for).and_return([["fooitem",0.75], ["baritem", 1.0]])
70
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",0.5]])
71
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",0.75], ["baritem", 1.0]])
72
+ sm.process_item!("fnorditem")
73
+ end
74
+
75
+ it "should call similarities_for on each input_matrix and add all outputs to the similarity matrix with weight" do
76
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard, :weight => 4.0)
77
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
78
+ sm = Recommendify::Base.new
79
+ sm.myfirstinput.should_receive(:similarities_for).and_return([["fooitem",0.5]])
80
+ sm.mysecondinput.should_receive(:similarities_for).and_return([["fooitem",0.75], ["baritem", 1.0]])
81
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",2.0]])
82
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",0.75], ["baritem", 1.0]])
83
+ sm.process_item!("fnorditem")
84
+ end
85
+
86
+ it "should retrieve all items from all input matrices" do
87
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
88
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "shmoo"])
89
+ sm = Recommendify::Base.new
90
+ sm.all_items.length.should == 4
91
+ sm.all_items.should include("foo")
92
+ sm.all_items.should include("bar")
93
+ sm.all_items.should include("fnord")
94
+ sm.all_items.should include("shmoo")
95
+ end
96
+
97
+ it "should retrieve all items from all input matrices (uniquely)" do
98
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
99
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "bar"])
100
+ sm = Recommendify::Base.new
101
+ sm.all_items.length.should == 3
102
+ sm.all_items.should include("foo")
103
+ sm.all_items.should include("bar")
104
+ sm.all_items.should include("fnord")
105
+ end
106
+
107
+ end
108
+
109
+ describe "process!" do
110
+
111
+ it "should call process_item for all input_matrix.all_items's" do
112
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
113
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "shmoo"])
114
+ sm = Recommendify::Base.new
115
+ sm.should_receive(:process_item!).exactly(4).times
116
+ sm.process!
117
+ end
118
+
119
+ it "should call process_item for all input_matrix.all_items's (uniquely)" do
120
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
121
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "bar"])
122
+ sm = Recommendify::Base.new
123
+ sm.should_receive(:process_item!).exactly(3).times
124
+ sm.process!
125
+ end
126
+
127
+ end
128
+
129
+ describe "for(item_id)" do
130
+
131
+ it "should retrieve the n-most similar neighbors" do
132
+ sm = Recommendify::Base.new
133
+ sm.similarity_matrix.should_receive(:[]).with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
134
+ sm.for("fnorditem").length.should == 2
135
+ end
136
+
137
+ it "should not throw exception for non existing items" do
138
+ sm = Recommendify::Base.new
139
+ sm.for("not_existing_item").length.should == 0
140
+ end
141
+
142
+ it "should retrieve the n-most similar neighbors as Recommendify::Neighbor objects" do
143
+ sm = Recommendify::Base.new
144
+ sm.similarity_matrix.should_receive(:[]).exactly(2).times.with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
145
+ sm.for("fnorditem").first.should be_a(Recommendify::Neighbor)
146
+ sm.for("fnorditem").last.should be_a(Recommendify::Neighbor)
147
+ end
148
+
149
+ it "should retrieve the n-most similar neighbors in the correct order" do
150
+ sm = Recommendify::Base.new
151
+ sm.similarity_matrix.should_receive(:[]).exactly(4).times.with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
152
+ sm.for("fnorditem").first.similarity.should == 1.5
153
+ sm.for("fnorditem").first.item_id.should == "baritem"
154
+ sm.for("fnorditem").last.similarity.should == 0.4
155
+ sm.for("fnorditem").last.item_id.should == "fooitem"
156
+ end
157
+
158
+ it "should return an empty array if the item if no neighbors were found" do
159
+ sm = Recommendify::Base.new
160
+ sm.similarity_matrix.should_receive(:[]).with("fnorditem").and_return({})
161
+ sm.for("fnorditem").should == []
162
+ end
163
+
164
+ it "should not call split on nil when retrieving a non-existent item (return an empty array)" do
165
+ sm = Recommendify::Base.new
166
+ sm.for("NONEXISTENT").should == []
167
+ end
168
+
169
+ end
170
+
171
+ describe "delete_item!" do
172
+
173
+ it "should call delete_item on each input_matrix" do
174
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard)
175
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
176
+ sm = Recommendify::Base.new
177
+ sm.myfirstinput.should_receive(:delete_item).with("fnorditem")
178
+ sm.mysecondinput.should_receive(:delete_item).with("fnorditem")
179
+ sm.delete_item!("fnorditem")
180
+ end
181
+
182
+ it "should delete the item from the similarity matrix"
183
+
184
+ it "should delete all occurences of the item in other similarity sets from the similarity matrix"
185
+
186
+ end
187
+
188
+ end