recommendify 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,9 @@
1
+ require "recommendify/recommendify"
2
+ require "recommendify/sparse_matrix"
3
+ require "recommendify/cc_matrix"
4
+ require "recommendify/similarity_matrix"
5
+ require "recommendify/input_matrix"
6
+ require "recommendify/jaccard_input_matrix"
7
+ require "recommendify/cosine_input_matrix"
8
+ require "recommendify/base"
9
+ require "recommendify/neighbor"
@@ -0,0 +1,80 @@
1
+ class Recommendify::Base
2
+
3
+ attr_reader :similarity_matrix, :input_matrices
4
+
5
+ @@max_neighbors = nil
6
+ @@input_matrices = {}
7
+
8
+ def self.max_neighbors(n=nil)
9
+ return @@max_neighbors unless n
10
+ @@max_neighbors = n
11
+ end
12
+
13
+ def self.input_matrix(key, opts)
14
+ @@input_matrices[key] = opts
15
+ end
16
+
17
+ def self.input_matrices
18
+ @@input_matrices
19
+ end
20
+
21
+ def initialize
22
+ @input_matrices = Hash[self.class.input_matrices.map{ |key, opts|
23
+ opts.merge!(:key => key, :redis_prefix => redis_prefix)
24
+ [ key, Recommendify::InputMatrix.create(opts) ]
25
+ }]
26
+ @similarity_matrix = Recommendify::SimilarityMatrix.new(
27
+ :max_neighbors => max_neighbors,
28
+ :key => :similarities,
29
+ :redis_prefix => redis_prefix
30
+ )
31
+ end
32
+
33
+ def redis_prefix
34
+ "recommendify"
35
+ end
36
+
37
+ def max_neighbors
38
+ self.class.max_neighbors || Recommendify::DEFAULT_MAX_NEIGHBORS
39
+ end
40
+
41
+ def method_missing(method, *args)
42
+ if @input_matrices.has_key?(method)
43
+ @input_matrices[method]
44
+ else
45
+ raise NoMethodError.new(method.to_s)
46
+ end
47
+ end
48
+
49
+ def respond_to?(method)
50
+ @input_matrices.has_key?(method) ? true : super
51
+ end
52
+
53
+ def all_items
54
+ @input_matrices.map{ |k,m| m.all_items }.flatten.uniq
55
+ end
56
+
57
+ def for(item_id)
58
+ similarity_matrix[item_id].map do |item_id, similarity|
59
+ Recommendify::Neighbor.new(
60
+ :item_id => item_id,
61
+ :similarity => similarity
62
+ )
63
+ end.sort
64
+ end
65
+
66
+ def process!
67
+ all_items.each{ |item_id,n| process_item!(item_id) }
68
+ end
69
+
70
+ def process_item!(item_id)
71
+ input_matrices.map do |k,m|
72
+ neighbors = m.similarities_for(item_id).map do |i,w|
73
+ [i,w*m.weight]
74
+ end
75
+ similarity_matrix.update(item_id, neighbors)
76
+ end
77
+ similarity_matrix.commit_item!(item_id)
78
+ end
79
+
80
+ end
@@ -0,0 +1,46 @@
1
+ module Recommendify::CCMatrix
2
+
3
+ def ccmatrix
4
+ @ccmatrix ||= Recommendify::SparseMatrix.new(
5
+ :redis_prefix => @opts.fetch(:redis_prefix),
6
+ :key => [@opts.fetch(:key), :ccmatrix].join(":")
7
+ )
8
+ end
9
+
10
+ def add_set(set_id, item_ids)
11
+ item_ids.each do |item_id|
12
+ item_count_incr(item_id)
13
+ end
14
+ all_pairs(item_ids).map do |pair|
15
+ i1, i2 = pair.split(":")
16
+ ccmatrix.incr(i1, i2)
17
+ end
18
+ end
19
+
20
+ def add_single(set_id, item_id, other_item_ids)
21
+ # todo: add single item to set after set was added (incrementally)
22
+ # item_count_incr(item_id)
23
+ # other_item_ids.each do |other_item|
24
+ # @ccmatrix.incr(item_id, other_idem)
25
+ # end
26
+ end
27
+
28
+ def all_items
29
+ Recommendify.redis.hkeys(redis_key(:items))
30
+ end
31
+
32
+ private
33
+
34
+ def all_pairs(keys)
35
+ keys.map{ |k1| (keys-[k1]).map{ |k2| [k1,k2].sort.join(":") } }.flatten.uniq
36
+ end
37
+
38
+ def item_count_incr(key)
39
+ Recommendify.redis.hincrby(redis_key(:items), key, 1)
40
+ end
41
+
42
+ def item_count(key)
43
+ Recommendify.redis.hget(redis_key(:items), key).to_i
44
+ end
45
+
46
+ end
@@ -0,0 +1,7 @@
1
+ class Recommendify::CosineInputMatrix < Recommendify::InputMatrix
2
+
3
+ include Recommendify::CCMatrix
4
+
5
+ # here be dragons ;)
6
+
7
+ end
@@ -0,0 +1,47 @@
1
+ class Recommendify::InputMatrix
2
+
3
+ def self.create(opts)
4
+ klass = "#{Recommendify.capitalize(opts[:similarity_func])}InputMatrix"
5
+ Recommendify.constantize(klass.intern).new(opts)
6
+ end
7
+
8
+ def initialize(opts)
9
+ @opts = opts
10
+ end
11
+
12
+ def redis_key(append=nil)
13
+ [@opts.fetch(:redis_prefix), @opts.fetch(:key), append].flatten.compact.join(":")
14
+ end
15
+
16
+ def weight
17
+ (@opts[:weight] || 1).to_f
18
+ end
19
+
20
+ # add a set of item_ids to the matrix
21
+ def add_set(set_id, item_ids)
22
+ raise "implemented in subclass"
23
+ end
24
+
25
+ # add a single item to a set of item_ids to the matrix
26
+ def add_single(set_id, item_id, other_item_ids)
27
+ raise "implemented in subclass"
28
+ end
29
+
30
+ # calculate the similarity between item1 and item1 (0.0-1.0)
31
+ def similarity(item1, item2)
32
+ raise "implemented in subclass"
33
+ end
34
+
35
+ # calculate all similarities to other items in the matrix for item1
36
+ def similarities_for(item1)
37
+ # return => [ ["item23", 0.6], ["item42", 0.23], (...) ]
38
+ raise "implemented in subclass"
39
+ end
40
+
41
+ # retrieve all item_ids in the matrix
42
+ def all_items
43
+ # retzrb => [ "item23", "item42", "item17", (...) ]
44
+ raise "implemented in subclass"
45
+ end
46
+
47
+ end
@@ -0,0 +1,35 @@
1
+ class Recommendify::JaccardInputMatrix < Recommendify::InputMatrix
2
+
3
+ include Recommendify::CCMatrix
4
+
5
+ def initialize(opts={})
6
+ super(opts)
7
+ end
8
+
9
+ def similarity(item1, item2)
10
+ calculate_jaccard_cached(item1, item2)
11
+ end
12
+
13
+ # optimize: get all item-counts and the cc-row with 2 redis hmgets.
14
+ # optimize: don't return more than sm.max_neighbors items (truncate set while collecting)
15
+ def similarities_for(item1)
16
+ # todo: optimize native. execute with own redis conn and write top K to stdout
17
+ # native_ouput = %x{recommendify_native jaccard "#{redis_key}" "#{item1}"}
18
+ # return native_output.split("\n").map{ |l| l.split(",") }
19
+ (all_items - [item1]).map do |item2|
20
+ [item2, similarity(item1, item2)]
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def calculate_jaccard_cached(item1, item2)
27
+ val = ccmatrix[item1, item2]
28
+ val.to_f / (item_count(item1)+item_count(item2)-val).to_f
29
+ end
30
+
31
+ def calculate_jaccard(set1, set2)
32
+ (set1&set2).length.to_f / (set1 + set2).uniq.length.to_f
33
+ end
34
+
35
+ end
@@ -0,0 +1,19 @@
1
+ class Recommendify::Neighbor
2
+
3
+ def initialize(data)
4
+ @data = data
5
+ end
6
+
7
+ def item_id
8
+ @data.fetch(:item_id).to_s
9
+ end
10
+
11
+ def similarity
12
+ @data.fetch(:similarity)
13
+ end
14
+
15
+ def <=>(other)
16
+ other.similarity <=> self.similarity
17
+ end
18
+
19
+ end
@@ -0,0 +1,25 @@
1
+ module Recommendify
2
+
3
+ DEFAULT_MAX_NEIGHBORS = 50
4
+
5
+ @@redis = nil
6
+
7
+ def self.redis=(redis)
8
+ @@redis = redis
9
+ end
10
+
11
+ def self.redis
12
+ return @@redis unless @@redis.nil?
13
+ raise "redis not configured! - Recommendify.redis = Redis.new"
14
+ end
15
+
16
+ def self.capitalize(str_or_sym)
17
+ str = str_or_sym.to_s.each_char.to_a
18
+ str.first.upcase + str[1..-1].join("").downcase
19
+ end
20
+
21
+ def self.constantize(klass)
22
+ Object.module_eval("Recommendify::#{klass}", __FILE__, __LINE__)
23
+ end
24
+
25
+ end
@@ -0,0 +1,61 @@
1
+ class Recommendify::SimilarityMatrix
2
+
3
+ attr_reader :write_queue
4
+
5
+ def initialize(opts={})
6
+ @opts = opts
7
+ @write_queue = Hash.new{ |h,k| h[k] = {} }
8
+ end
9
+
10
+ def redis_key(append=nil)
11
+ [@opts.fetch(:redis_prefix), @opts.fetch(:key), append].flatten.compact.join(":")
12
+ end
13
+
14
+ def max_neighbors
15
+ @opts[:max_neighbors] || Recommendify::DEFAULT_MAX_NEIGHBORS
16
+ end
17
+
18
+ def update(item_id, neighbors)
19
+ neighbors.each do |neighbor_id, score|
20
+ if @write_queue[item_id].has_key?(neighbor_id)
21
+ @write_queue[item_id][neighbor_id] += score
22
+ else
23
+ @write_queue[item_id][neighbor_id] = score
24
+ end
25
+ end
26
+ end
27
+
28
+ def [](item_id)
29
+ if @write_queue.has_key?(item_id)
30
+ @write_queue[item_id]
31
+ else
32
+ retrieve_item(item_id)
33
+ end
34
+ end
35
+
36
+ def commit_item!(item_id)
37
+ serialized = serialize_item(item_id)
38
+ Recommendify.redis.hset(redis_key, item_id, serialized)
39
+ @write_queue.delete(item_id)
40
+ end
41
+
42
+ # optimize: the items are already stored in a sorted fashion. we shouldn't
43
+ # throw away this info by storing them in a hash (and re-sorting later). maybe
44
+ # use activesupport's orderedhash?
45
+ def retrieve_item(item_id)
46
+ data = Recommendify.redis.hget(redis_key, item_id)
47
+ Hash[data.split("|").map{ |i| (k,s=i.split(":")) && [k,s.to_f] }]
48
+ end
49
+
50
+ private
51
+
52
+ # optimize: implement a better sort. never add more than 50 items the the array
53
+ def serialize_item(item_id, max_precision=5)
54
+ items = @write_queue[item_id].to_a
55
+ items.sort!{ |a,b| b[1] <=> a[1] }
56
+ items = items[0..max_neighbors-1]
57
+ items = items.map{ |i,s| s>0 ? "#{i}:#{s.to_s[0..max_precision]}" : nil }
58
+ items.compact * "|"
59
+ end
60
+
61
+ end
@@ -0,0 +1,45 @@
1
+ class Recommendify::SparseMatrix
2
+
3
+ def initialize(opts={})
4
+ @opts = opts
5
+ end
6
+
7
+ def redis_key
8
+ [@opts.fetch(:redis_prefix), @opts.fetch(:key)].join(":")
9
+ end
10
+
11
+ def [](x,y)
12
+ k_get(key(x,y))
13
+ end
14
+
15
+ def []=(x,y,v)
16
+ v == 0 ? k_del(key(x,y)) : k_set(key(x,y), v)
17
+ end
18
+
19
+ def incr(x,y)
20
+ k_incr(key(x,y))
21
+ end
22
+
23
+ private
24
+
25
+ def key(x,y)
26
+ [x,y].sort.join(":")
27
+ end
28
+
29
+ def k_set(key, val)
30
+ Recommendify.redis.hset(redis_key, key, val)
31
+ end
32
+
33
+ def k_del(key)
34
+ Recommendify.redis.hdel(redis_key, key)
35
+ end
36
+
37
+ def k_get(key)
38
+ Recommendify.redis.hget(redis_key, key).to_f
39
+ end
40
+
41
+ def k_incr(key)
42
+ Recommendify.redis.hincrby(redis_key, key, 1)
43
+ end
44
+
45
+ end
@@ -0,0 +1,161 @@
1
+ require ::File.expand_path('../spec_helper', __FILE__)
2
+
3
+ describe Recommendify::Base do
4
+
5
+ before(:each) do
6
+ flush_redis!
7
+ Recommendify::Base.class_variable_set(:@@max_neighbors, nil)
8
+ Recommendify::Base.class_variable_set(:@@input_matrices, {})
9
+ end
10
+
11
+ describe "configuration" do
12
+
13
+ it "should return default max_neighbors if not configured" do
14
+ Recommendify::DEFAULT_MAX_NEIGHBORS.should == 50
15
+ sm = Recommendify::Base.new
16
+ sm.max_neighbors.should == 50
17
+ end
18
+
19
+ it "should remember max_neighbors if configured" do
20
+ Recommendify::Base.max_neighbors(23)
21
+ sm = Recommendify::Base.new
22
+ sm.max_neighbors.should == 23
23
+ end
24
+
25
+ it "should add an input_matrix by 'key'" do
26
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
27
+ Recommendify::Base.class_variable_get(:@@input_matrices).keys.should == [:myinput]
28
+ end
29
+
30
+ it "should retrieve an input_matrix on a new instance" do
31
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
32
+ sm = Recommendify::Base.new
33
+ lambda{ sm.myinput }.should_not raise_error
34
+ end
35
+
36
+ it "should retrieve an input_matrix on a new instance and correctly overload respond_to?" do
37
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
38
+ sm = Recommendify::Base.new
39
+ sm.respond_to?(:process!).should be_true
40
+ sm.respond_to?(:myinput).should be_true
41
+ sm.respond_to?(:fnord).should be_false
42
+ end
43
+
44
+ it "should retrieve an input_matrix on a new instance and intialize the correct class" do
45
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
46
+ sm = Recommendify::Base.new
47
+ sm.myinput.should be_a(Recommendify::JaccardInputMatrix)
48
+ end
49
+
50
+ end
51
+
52
+ describe "process_item!" do
53
+
54
+ it "should call similarities_for on each input_matrix" do
55
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard)
56
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
57
+ sm = Recommendify::Base.new
58
+ sm.myfirstinput.should_receive(:similarities_for).with("fnorditem").and_return([["fooitem",0.5]])
59
+ sm.mysecondinput.should_receive(:similarities_for).with("fnorditem").and_return([["fooitem",0.5]])
60
+ sm.similarity_matrix.stub!(:update)
61
+ sm.process_item!("fnorditem")
62
+ end
63
+
64
+ it "should call similarities_for on each input_matrix and add all outputs to the similarity matrix" do
65
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard)
66
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
67
+ sm = Recommendify::Base.new
68
+ sm.myfirstinput.should_receive(:similarities_for).and_return([["fooitem",0.5]])
69
+ sm.mysecondinput.should_receive(:similarities_for).and_return([["fooitem",0.75], ["baritem", 1.0]])
70
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",0.5]])
71
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",0.75], ["baritem", 1.0]])
72
+ sm.process_item!("fnorditem")
73
+ end
74
+
75
+ it "should call similarities_for on each input_matrix and add all outputs to the similarity matrix with weight" do
76
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard, :weight => 4.0)
77
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
78
+ sm = Recommendify::Base.new
79
+ sm.myfirstinput.should_receive(:similarities_for).and_return([["fooitem",0.5]])
80
+ sm.mysecondinput.should_receive(:similarities_for).and_return([["fooitem",0.75], ["baritem", 1.0]])
81
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",2.0]])
82
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",0.75], ["baritem", 1.0]])
83
+ sm.process_item!("fnorditem")
84
+ end
85
+
86
+ it "should retrieve all items from all input matrices" do
87
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
88
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "shmoo"])
89
+ sm = Recommendify::Base.new
90
+ sm.all_items.length.should == 4
91
+ sm.all_items.should include("foo")
92
+ sm.all_items.should include("bar")
93
+ sm.all_items.should include("fnord")
94
+ sm.all_items.should include("shmoo")
95
+ end
96
+
97
+ it "should retrieve all items from all input matrices (uniquely)" do
98
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
99
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "bar"])
100
+ sm = Recommendify::Base.new
101
+ sm.all_items.length.should == 3
102
+ sm.all_items.should include("foo")
103
+ sm.all_items.should include("bar")
104
+ sm.all_items.should include("fnord")
105
+ end
106
+
107
+ end
108
+
109
+ describe "process!" do
110
+
111
+ it "should call process_item for all input_matrix.all_items's" do
112
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
113
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "shmoo"])
114
+ sm = Recommendify::Base.new
115
+ sm.should_receive(:process_item!).exactly(4).times
116
+ sm.process!
117
+ end
118
+
119
+ it "should call process_item for all input_matrix.all_items's (uniquely)" do
120
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
121
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "bar"])
122
+ sm = Recommendify::Base.new
123
+ sm.should_receive(:process_item!).exactly(3).times
124
+ sm.process!
125
+ end
126
+
127
+ end
128
+
129
+ describe "for(item_id)" do
130
+
131
+ it "should retrieve the n-most similar neighbors" do
132
+ sm = Recommendify::Base.new
133
+ sm.similarity_matrix.should_receive(:[]).with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
134
+ sm.for("fnorditem").length.should == 2
135
+ end
136
+
137
+ it "should retrieve the n-most similar neighbors as Recommendify::Neighbor objects" do
138
+ sm = Recommendify::Base.new
139
+ sm.similarity_matrix.should_receive(:[]).exactly(2).times.with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
140
+ sm.for("fnorditem").first.should be_a(Recommendify::Neighbor)
141
+ sm.for("fnorditem").last.should be_a(Recommendify::Neighbor)
142
+ end
143
+
144
+ it "should retrieve the n-most similar neighbors in the correct order" do
145
+ sm = Recommendify::Base.new
146
+ sm.similarity_matrix.should_receive(:[]).exactly(4).times.with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
147
+ sm.for("fnorditem").first.similarity.should == 1.5
148
+ sm.for("fnorditem").first.item_id.should == "baritem"
149
+ sm.for("fnorditem").last.similarity.should == 0.4
150
+ sm.for("fnorditem").last.item_id.should == "fooitem"
151
+ end
152
+
153
+ it "should return an empty array if the item if no neighbors were found" do
154
+ sm = Recommendify::Base.new
155
+ sm.similarity_matrix.should_receive(:[]).with("fnorditem").and_return({})
156
+ sm.for("fnorditem").should == []
157
+ end
158
+
159
+ end
160
+
161
+ end