recommendify-ruby 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ module Recommendify::CCMatrix
2
+
3
+ def ccmatrix
4
+ @ccmatrix ||= Recommendify::SparseMatrix.new(
5
+ :redis_prefix => @opts.fetch(:redis_prefix),
6
+ :key => [@opts.fetch(:key), :ccmatrix].join(":")
7
+ )
8
+ end
9
+
10
+ def add_set(set_id, item_ids)
11
+ # FIXPAUL: forbid | and : in item_ids
12
+ item_ids.each do |item_id|
13
+ item_count_incr(item_id)
14
+ end
15
+ all_pairs(item_ids).map do |pair|
16
+ i1, i2 = pair.split(":")
17
+ ccmatrix.incr(i1, i2)
18
+ end
19
+ end
20
+
21
+ def add_single(set_id, item_id, other_item_ids)
22
+ item_count_incr(item_id)
23
+ other_item_ids.each do |other_item|
24
+ ccmatrix.incr(item_id, other_item)
25
+ end
26
+ end
27
+
28
+ def all_items
29
+ Recommendify.redis.hkeys(redis_key(:items))
30
+ end
31
+
32
+ def delete_item(item_id)
33
+ Recommendify.redis.hdel(redis_key(:items), item_id)
34
+ ccmatrix.send(:k_delall, item_id)
35
+ end
36
+
37
+ private
38
+
39
+ def all_pairs(keys)
40
+ keys.map{ |k1| (keys-[k1]).map{ |k2| [k1,k2].sort.join(":") } }.flatten.uniq
41
+ end
42
+
43
+ def item_count_incr(key)
44
+ Recommendify.redis.hincrby(redis_key(:items), key, 1)
45
+ end
46
+
47
+ def item_count(key)
48
+ Recommendify.redis.hget(redis_key(:items), key).to_i
49
+ end
50
+
51
+ end
@@ -0,0 +1,7 @@
1
+ class Recommendify::CosineInputMatrix < Recommendify::InputMatrix
2
+
3
+ include Recommendify::CCMatrix
4
+
5
+ # here be dragons ;)
6
+
7
+ end
@@ -0,0 +1,52 @@
1
+ class Recommendify::InputMatrix
2
+
3
+ def self.create(opts)
4
+ klass = "#{Recommendify.capitalize(opts[:similarity_func])}InputMatrix"
5
+ Recommendify.constantize(klass.intern).new(opts)
6
+ end
7
+
8
+ def initialize(opts)
9
+ @opts = opts
10
+ end
11
+
12
+ def redis_key(append=nil)
13
+ [@opts.fetch(:redis_prefix), @opts.fetch(:key), append].flatten.compact.join(":")
14
+ end
15
+
16
+ def weight
17
+ (@opts[:weight] || 1).to_f
18
+ end
19
+
20
+ # add a set of item_ids to the matrix
21
+ def add_set(set_id, item_ids)
22
+ raise "implemented in subclass"
23
+ end
24
+
25
+ # add a single item to a set of item_ids to the matrix
26
+ def add_single(set_id, item_id, other_item_ids)
27
+ raise "implemented in subclass"
28
+ end
29
+
30
+ # calculate the similarity between item1 and item1 (0.0-1.0)
31
+ def similarity(item1, item2)
32
+ raise "implemented in subclass"
33
+ end
34
+
35
+ # calculate all similarities to other items in the matrix for item1
36
+ def similarities_for(item1)
37
+ # return => [ ["item23", 0.6], ["item42", 0.23], (...) ]
38
+ raise "implemented in subclass"
39
+ end
40
+
41
+ # retrieve all item_ids in the matrix
42
+ def all_items
43
+ # retzrb => [ "item23", "item42", "item17", (...) ]
44
+ raise "implemented in subclass"
45
+ end
46
+
47
+ # delete item_id from the matrix
48
+ def delete_item(item_id)
49
+ raise "implemented in subclass"
50
+ end
51
+
52
+ end
@@ -0,0 +1,62 @@
1
+ class Recommendify::JaccardInputMatrix < Recommendify::InputMatrix
2
+
3
+ include Recommendify::CCMatrix
4
+
5
+ def initialize(opts={})
6
+ check_native if opts[:native]
7
+ super(opts)
8
+ end
9
+
10
+ def similarity(item1, item2)
11
+ calculate_jaccard_cached(item1, item2)
12
+ end
13
+
14
+ def similarities_for(item1)
15
+ return run_native(item1) if @opts[:native]
16
+ calculate_similarities(item1)
17
+ end
18
+
19
+ private
20
+
21
+ def calculate_similarities(item1)
22
+ (all_items - [item1]).map do |item2|
23
+ [item2, similarity(item1, item2)]
24
+ end
25
+ end
26
+
27
+ def calculate_jaccard_cached(item1, item2)
28
+ val = ccmatrix[item1, item2]
29
+ val.to_f / (item_count(item1)+item_count(item2)-val).to_f
30
+ end
31
+
32
+ def calculate_jaccard(set1, set2)
33
+ (set1&set2).length.to_f / (set1 + set2).uniq.length.to_f
34
+ end
35
+
36
+ def run_native(item_id)
37
+ res = %x{#{native_path} --jaccard "#{redis_key}" "#{item_id}" "#{redis_url}"}
38
+ raise "error: dirty exit (#{$?})" if $? != 0
39
+ res.split("\n").map do |line|
40
+ sim = line.match(/OUT: \(([^\)]*)\) \(([^\)]*)\)/)
41
+ unless sim
42
+ raise "error: #{res}" unless (res||"").include?('exit:')
43
+ else
44
+ [sim[1], sim[2].to_f]
45
+ end
46
+ end.compact
47
+ end
48
+
49
+ def check_native
50
+ return true if ::File.exists?(native_path)
51
+ raise "recommendify_native not found - you need to run rake build_native first"
52
+ end
53
+
54
+ def native_path
55
+ ::File.expand_path('../../../bin/recommendify', __FILE__)
56
+ end
57
+
58
+ def redis_url
59
+ Recommendify.redis.client.location
60
+ end
61
+
62
+ end
@@ -0,0 +1,19 @@
1
+ class Recommendify::Neighbor
2
+
3
+ def initialize(data)
4
+ @data = data
5
+ end
6
+
7
+ def item_id
8
+ @data.fetch(:item_id).to_s
9
+ end
10
+
11
+ def similarity
12
+ @data.fetch(:similarity)
13
+ end
14
+
15
+ def <=>(other)
16
+ other.similarity <=> self.similarity
17
+ end
18
+
19
+ end
@@ -0,0 +1,25 @@
1
+ module Recommendify
2
+
3
+ DEFAULT_MAX_NEIGHBORS = 50
4
+
5
+ @@redis = nil
6
+
7
+ def self.redis=(redis)
8
+ @@redis = redis
9
+ end
10
+
11
+ def self.redis
12
+ return @@redis unless @@redis.nil?
13
+ raise "redis not configured! - Recommendify.redis = Redis.new"
14
+ end
15
+
16
+ def self.capitalize(str_or_sym)
17
+ str = str_or_sym.to_s.each_char.to_a
18
+ str.first.upcase + str[1..-1].join("").downcase
19
+ end
20
+
21
+ def self.constantize(klass)
22
+ Object.module_eval("Recommendify::#{klass}", __FILE__, __LINE__)
23
+ end
24
+
25
+ end
@@ -0,0 +1,62 @@
1
+ class Recommendify::SimilarityMatrix
2
+
3
+ attr_reader :write_queue
4
+
5
+ def initialize(opts={})
6
+ @opts = opts
7
+ @write_queue = Hash.new{ |h,k| h[k] = {} }
8
+ end
9
+
10
+ def redis_key(append=nil)
11
+ [@opts.fetch(:redis_prefix), @opts.fetch(:key), append].flatten.compact.join(":")
12
+ end
13
+
14
+ def max_neighbors
15
+ @opts[:max_neighbors] || Recommendify::DEFAULT_MAX_NEIGHBORS
16
+ end
17
+
18
+ def update(item_id, neighbors)
19
+ neighbors.each do |neighbor_id, score|
20
+ if @write_queue[item_id].has_key?(neighbor_id)
21
+ @write_queue[item_id][neighbor_id] += score
22
+ else
23
+ @write_queue[item_id][neighbor_id] = score
24
+ end
25
+ end
26
+ end
27
+
28
+ def [](item_id)
29
+ if @write_queue.has_key?(item_id)
30
+ @write_queue[item_id]
31
+ else
32
+ retrieve_item(item_id)
33
+ end
34
+ end
35
+
36
+ def commit_item!(item_id)
37
+ serialized = serialize_item(item_id)
38
+ Recommendify.redis.hset(redis_key, item_id, serialized)
39
+ @write_queue.delete(item_id)
40
+ end
41
+
42
+ # optimize: the items are already stored in a sorted fashion. we shouldn't
43
+ # throw away this info by storing them in a hash (and re-sorting later). maybe
44
+ # use activesupport's orderedhash?
45
+ def retrieve_item(item_id)
46
+ data = Recommendify.redis.hget(redis_key, item_id)
47
+ return {} if data.nil?
48
+ Hash[data.split("|").map{ |i| (k,s=i.split(":")) && [k,s.to_f] }]
49
+ end
50
+
51
+ private
52
+
53
+ # optimize: implement a better sort. never add more than 50 items the the array
54
+ def serialize_item(item_id, max_precision=5)
55
+ items = @write_queue[item_id].to_a
56
+ items.sort!{ |a,b| b[1] <=> a[1] }
57
+ items = items[0..max_neighbors-1]
58
+ items = items.map{ |i,s| s>0 ? "#{i}:#{s.to_s[0..max_precision]}" : nil }
59
+ items.compact * "|"
60
+ end
61
+
62
+ end
@@ -0,0 +1,53 @@
1
+ class Recommendify::SparseMatrix
2
+
3
+ def initialize(opts={})
4
+ @opts = opts
5
+ end
6
+
7
+ def redis_key
8
+ [@opts.fetch(:redis_prefix), @opts.fetch(:key)].join(":")
9
+ end
10
+
11
+ def [](x,y)
12
+ k_get(key(x,y))
13
+ end
14
+
15
+ def []=(x,y,v)
16
+ v == 0 ? k_del(key(x,y)) : k_set(key(x,y), v)
17
+ end
18
+
19
+ def incr(x,y)
20
+ k_incr(key(x,y))
21
+ end
22
+
23
+ private
24
+
25
+ def key(x,y)
26
+ [x,y].sort.join(":")
27
+ end
28
+
29
+ def k_set(key, val)
30
+ Recommendify.redis.hset(redis_key, key, val)
31
+ end
32
+
33
+ def k_del(key)
34
+ Recommendify.redis.hdel(redis_key, key)
35
+ end
36
+
37
+ def k_get(key)
38
+ Recommendify.redis.hget(redis_key, key).to_f
39
+ end
40
+
41
+ def k_incr(key)
42
+ Recommendify.redis.hincrby(redis_key, key, 1)
43
+ end
44
+
45
+ # OPTIMIZE: use scripting/lua in redis 2.6
46
+ def k_delall(*keys)
47
+ Recommendify.redis.hkeys(redis_key).each do |iikey|
48
+ next unless (iikey.split(":") & keys).size > 0
49
+ Recommendify.redis.hdel(redis_key, iikey)
50
+ end
51
+ end
52
+
53
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "recommendify-ruby"
6
+ s.version = "0.3.8"
7
+ s.date = Date.today.to_s
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Paul Asmuth", "Raj Shah"]
10
+ s.email = ["paul@paulasmuth.com", "brainix@gmail.com"]
11
+ s.homepage = "http://github.com/brainix/recommendify"
12
+ s.summary = %q{ruby/redis based recommendation engine (collaborative filtering)}
13
+ s.description = %q{Recommendify is a distributed, incremental item-based recommendation engine for binary input ratings. It's based on ruby and redis and uses an approach called "Collaborative Filtering"}
14
+ s.licenses = ["MIT"]
15
+
16
+ # s.extensions = ['ext/extconf.rb']
17
+
18
+ s.add_dependency "redis", ">= 2.2.2"
19
+
20
+ s.add_development_dependency "rspec", "~> 2.8.0"
21
+
22
+ s.files = `git ls-files`.split("\n") - [".gitignore", ".rspec", ".travis.yml"]
23
+ s.test_files = `git ls-files -- spec/*`.split("\n")
24
+ s.require_paths = ["lib"]
25
+ end
data/spec/base_spec.rb ADDED
@@ -0,0 +1,188 @@
1
+ require ::File.expand_path('../spec_helper', __FILE__)
2
+
3
+ describe Recommendify::Base do
4
+
5
+ before(:each) do
6
+ flush_redis!
7
+ Recommendify::Base.send(:class_variable_set, :@@max_neighbors, nil)
8
+ Recommendify::Base.send(:class_variable_set, :@@input_matrices, {})
9
+ end
10
+
11
+ describe "configuration" do
12
+
13
+ it "should return default max_neighbors if not configured" do
14
+ Recommendify::DEFAULT_MAX_NEIGHBORS.should == 50
15
+ sm = Recommendify::Base.new
16
+ sm.max_neighbors.should == 50
17
+ end
18
+
19
+ it "should remember max_neighbors if configured" do
20
+ Recommendify::Base.max_neighbors(23)
21
+ sm = Recommendify::Base.new
22
+ sm.max_neighbors.should == 23
23
+ end
24
+
25
+ it "should add an input_matrix by 'key'" do
26
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
27
+ Recommendify::Base.send(:class_variable_get, :@@input_matrices).keys.should == [:myinput]
28
+ end
29
+
30
+ it "should retrieve an input_matrix on a new instance" do
31
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
32
+ sm = Recommendify::Base.new
33
+ lambda{ sm.myinput }.should_not raise_error
34
+ end
35
+
36
+ it "should retrieve an input_matrix on a new instance and correctly overload respond_to?" do
37
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
38
+ sm = Recommendify::Base.new
39
+ sm.respond_to?(:process!).should be_true
40
+ sm.respond_to?(:myinput).should be_true
41
+ sm.respond_to?(:fnord).should be_false
42
+ end
43
+
44
+ it "should retrieve an input_matrix on a new instance and intialize the correct class" do
45
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
46
+ sm = Recommendify::Base.new
47
+ sm.myinput.should be_a(Recommendify::JaccardInputMatrix)
48
+ end
49
+
50
+ end
51
+
52
+ describe "process_item!" do
53
+
54
+ it "should call similarities_for on each input_matrix" do
55
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard)
56
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
57
+ sm = Recommendify::Base.new
58
+ sm.myfirstinput.should_receive(:similarities_for).with("fnorditem").and_return([["fooitem",0.5]])
59
+ sm.mysecondinput.should_receive(:similarities_for).with("fnorditem").and_return([["fooitem",0.5]])
60
+ sm.similarity_matrix.stub!(:update)
61
+ sm.process_item!("fnorditem")
62
+ end
63
+
64
+ it "should call similarities_for on each input_matrix and add all outputs to the similarity matrix" do
65
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard)
66
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
67
+ sm = Recommendify::Base.new
68
+ sm.myfirstinput.should_receive(:similarities_for).and_return([["fooitem",0.5]])
69
+ sm.mysecondinput.should_receive(:similarities_for).and_return([["fooitem",0.75], ["baritem", 1.0]])
70
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",0.5]])
71
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",0.75], ["baritem", 1.0]])
72
+ sm.process_item!("fnorditem")
73
+ end
74
+
75
+ it "should call similarities_for on each input_matrix and add all outputs to the similarity matrix with weight" do
76
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard, :weight => 4.0)
77
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
78
+ sm = Recommendify::Base.new
79
+ sm.myfirstinput.should_receive(:similarities_for).and_return([["fooitem",0.5]])
80
+ sm.mysecondinput.should_receive(:similarities_for).and_return([["fooitem",0.75], ["baritem", 1.0]])
81
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",2.0]])
82
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",0.75], ["baritem", 1.0]])
83
+ sm.process_item!("fnorditem")
84
+ end
85
+
86
+ it "should retrieve all items from all input matrices" do
87
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
88
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "shmoo"])
89
+ sm = Recommendify::Base.new
90
+ sm.all_items.length.should == 4
91
+ sm.all_items.should include("foo")
92
+ sm.all_items.should include("bar")
93
+ sm.all_items.should include("fnord")
94
+ sm.all_items.should include("shmoo")
95
+ end
96
+
97
+ it "should retrieve all items from all input matrices (uniquely)" do
98
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
99
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "bar"])
100
+ sm = Recommendify::Base.new
101
+ sm.all_items.length.should == 3
102
+ sm.all_items.should include("foo")
103
+ sm.all_items.should include("bar")
104
+ sm.all_items.should include("fnord")
105
+ end
106
+
107
+ end
108
+
109
+ describe "process!" do
110
+
111
+ it "should call process_item for all input_matrix.all_items's" do
112
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
113
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "shmoo"])
114
+ sm = Recommendify::Base.new
115
+ sm.should_receive(:process_item!).exactly(4).times
116
+ sm.process!
117
+ end
118
+
119
+ it "should call process_item for all input_matrix.all_items's (uniquely)" do
120
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
121
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "bar"])
122
+ sm = Recommendify::Base.new
123
+ sm.should_receive(:process_item!).exactly(3).times
124
+ sm.process!
125
+ end
126
+
127
+ end
128
+
129
+ describe "for(item_id)" do
130
+
131
+ it "should retrieve the n-most similar neighbors" do
132
+ sm = Recommendify::Base.new
133
+ sm.similarity_matrix.should_receive(:[]).with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
134
+ sm.for("fnorditem").length.should == 2
135
+ end
136
+
137
+ it "should not throw exception for non existing items" do
138
+ sm = Recommendify::Base.new
139
+ sm.for("not_existing_item").length.should == 0
140
+ end
141
+
142
+ it "should retrieve the n-most similar neighbors as Recommendify::Neighbor objects" do
143
+ sm = Recommendify::Base.new
144
+ sm.similarity_matrix.should_receive(:[]).exactly(2).times.with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
145
+ sm.for("fnorditem").first.should be_a(Recommendify::Neighbor)
146
+ sm.for("fnorditem").last.should be_a(Recommendify::Neighbor)
147
+ end
148
+
149
+ it "should retrieve the n-most similar neighbors in the correct order" do
150
+ sm = Recommendify::Base.new
151
+ sm.similarity_matrix.should_receive(:[]).exactly(4).times.with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
152
+ sm.for("fnorditem").first.similarity.should == 1.5
153
+ sm.for("fnorditem").first.item_id.should == "baritem"
154
+ sm.for("fnorditem").last.similarity.should == 0.4
155
+ sm.for("fnorditem").last.item_id.should == "fooitem"
156
+ end
157
+
158
+ it "should return an empty array if the item if no neighbors were found" do
159
+ sm = Recommendify::Base.new
160
+ sm.similarity_matrix.should_receive(:[]).with("fnorditem").and_return({})
161
+ sm.for("fnorditem").should == []
162
+ end
163
+
164
+ it "should not call split on nil when retrieving a non-existent item (return an empty array)" do
165
+ sm = Recommendify::Base.new
166
+ sm.for("NONEXISTENT").should == []
167
+ end
168
+
169
+ end
170
+
171
+ describe "delete_item!" do
172
+
173
+ it "should call delete_item on each input_matrix" do
174
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard)
175
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
176
+ sm = Recommendify::Base.new
177
+ sm.myfirstinput.should_receive(:delete_item).with("fnorditem")
178
+ sm.mysecondinput.should_receive(:delete_item).with("fnorditem")
179
+ sm.delete_item!("fnorditem")
180
+ end
181
+
182
+ it "should delete the item from the similarity matrix"
183
+
184
+ it "should delete all occurences of the item in other similarity sets from the similarity matrix"
185
+
186
+ end
187
+
188
+ end