recommendify 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ require "recommendify/recommendify"
2
+ require "recommendify/sparse_matrix"
3
+ require "recommendify/cc_matrix"
4
+ require "recommendify/similarity_matrix"
5
+ require "recommendify/input_matrix"
6
+ require "recommendify/jaccard_input_matrix"
7
+ require "recommendify/cosine_input_matrix"
8
+ require "recommendify/base"
9
+ require "recommendify/neighbor"
@@ -0,0 +1,80 @@
1
+ class Recommendify::Base
2
+
3
+ attr_reader :similarity_matrix, :input_matrices
4
+
5
+ @@max_neighbors = nil
6
+ @@input_matrices = {}
7
+
8
+ def self.max_neighbors(n=nil)
9
+ return @@max_neighbors unless n
10
+ @@max_neighbors = n
11
+ end
12
+
13
+ def self.input_matrix(key, opts)
14
+ @@input_matrices[key] = opts
15
+ end
16
+
17
+ def self.input_matrices
18
+ @@input_matrices
19
+ end
20
+
21
+ def initialize
22
+ @input_matrices = Hash[self.class.input_matrices.map{ |key, opts|
23
+ opts.merge!(:key => key, :redis_prefix => redis_prefix)
24
+ [ key, Recommendify::InputMatrix.create(opts) ]
25
+ }]
26
+ @similarity_matrix = Recommendify::SimilarityMatrix.new(
27
+ :max_neighbors => max_neighbors,
28
+ :key => :similarities,
29
+ :redis_prefix => redis_prefix
30
+ )
31
+ end
32
+
33
+ def redis_prefix
34
+ "recommendify"
35
+ end
36
+
37
+ def max_neighbors
38
+ self.class.max_neighbors || Recommendify::DEFAULT_MAX_NEIGHBORS
39
+ end
40
+
41
+ def method_missing(method, *args)
42
+ if @input_matrices.has_key?(method)
43
+ @input_matrices[method]
44
+ else
45
+ raise NoMethodError.new(method.to_s)
46
+ end
47
+ end
48
+
49
+ def respond_to?(method)
50
+ @input_matrices.has_key?(method) ? true : super
51
+ end
52
+
53
+ def all_items
54
+ @input_matrices.map{ |k,m| m.all_items }.flatten.uniq
55
+ end
56
+
57
+ def for(item_id)
58
+ similarity_matrix[item_id].map do |item_id, similarity|
59
+ Recommendify::Neighbor.new(
60
+ :item_id => item_id,
61
+ :similarity => similarity
62
+ )
63
+ end.sort
64
+ end
65
+
66
+ def process!
67
+ all_items.each{ |item_id,n| process_item!(item_id) }
68
+ end
69
+
70
+ def process_item!(item_id)
71
+ input_matrices.map do |k,m|
72
+ neighbors = m.similarities_for(item_id).map do |i,w|
73
+ [i,w*m.weight]
74
+ end
75
+ similarity_matrix.update(item_id, neighbors)
76
+ end
77
+ similarity_matrix.commit_item!(item_id)
78
+ end
79
+
80
+ end
@@ -0,0 +1,46 @@
1
+ module Recommendify::CCMatrix
2
+
3
+ def ccmatrix
4
+ @ccmatrix ||= Recommendify::SparseMatrix.new(
5
+ :redis_prefix => @opts.fetch(:redis_prefix),
6
+ :key => [@opts.fetch(:key), :ccmatrix].join(":")
7
+ )
8
+ end
9
+
10
+ def add_set(set_id, item_ids)
11
+ item_ids.each do |item_id|
12
+ item_count_incr(item_id)
13
+ end
14
+ all_pairs(item_ids).map do |pair|
15
+ i1, i2 = pair.split(":")
16
+ ccmatrix.incr(i1, i2)
17
+ end
18
+ end
19
+
20
+ def add_single(set_id, item_id, other_item_ids)
21
+ # todo: add single item to set after set was added (incrementally)
22
+ # item_count_incr(item_id)
23
+ # other_item_ids.each do |other_item|
24
+ # @ccmatrix.incr(item_id, other_idem)
25
+ # end
26
+ end
27
+
28
+ def all_items
29
+ Recommendify.redis.hkeys(redis_key(:items))
30
+ end
31
+
32
+ private
33
+
34
+ def all_pairs(keys)
35
+ keys.map{ |k1| (keys-[k1]).map{ |k2| [k1,k2].sort.join(":") } }.flatten.uniq
36
+ end
37
+
38
+ def item_count_incr(key)
39
+ Recommendify.redis.hincrby(redis_key(:items), key, 1)
40
+ end
41
+
42
+ def item_count(key)
43
+ Recommendify.redis.hget(redis_key(:items), key).to_i
44
+ end
45
+
46
+ end
@@ -0,0 +1,7 @@
1
+ class Recommendify::CosineInputMatrix < Recommendify::InputMatrix
2
+
3
+ include Recommendify::CCMatrix
4
+
5
+ # here be dragons ;)
6
+
7
+ end
@@ -0,0 +1,47 @@
1
+ class Recommendify::InputMatrix
2
+
3
+ def self.create(opts)
4
+ klass = "#{Recommendify.capitalize(opts[:similarity_func])}InputMatrix"
5
+ Recommendify.constantize(klass.intern).new(opts)
6
+ end
7
+
8
+ def initialize(opts)
9
+ @opts = opts
10
+ end
11
+
12
+ def redis_key(append=nil)
13
+ [@opts.fetch(:redis_prefix), @opts.fetch(:key), append].flatten.compact.join(":")
14
+ end
15
+
16
+ def weight
17
+ (@opts[:weight] || 1).to_f
18
+ end
19
+
20
+ # add a set of item_ids to the matrix
21
+ def add_set(set_id, item_ids)
22
+ raise "implemented in subclass"
23
+ end
24
+
25
+ # add a single item to a set of item_ids to the matrix
26
+ def add_single(set_id, item_id, other_item_ids)
27
+ raise "implemented in subclass"
28
+ end
29
+
30
+ # calculate the similarity between item1 and item1 (0.0-1.0)
31
+ def similarity(item1, item2)
32
+ raise "implemented in subclass"
33
+ end
34
+
35
+ # calculate all similarities to other items in the matrix for item1
36
+ def similarities_for(item1)
37
+ # return => [ ["item23", 0.6], ["item42", 0.23], (...) ]
38
+ raise "implemented in subclass"
39
+ end
40
+
41
+ # retrieve all item_ids in the matrix
42
+ def all_items
43
+ # retzrb => [ "item23", "item42", "item17", (...) ]
44
+ raise "implemented in subclass"
45
+ end
46
+
47
+ end
@@ -0,0 +1,35 @@
1
+ class Recommendify::JaccardInputMatrix < Recommendify::InputMatrix
2
+
3
+ include Recommendify::CCMatrix
4
+
5
+ def initialize(opts={})
6
+ super(opts)
7
+ end
8
+
9
+ def similarity(item1, item2)
10
+ calculate_jaccard_cached(item1, item2)
11
+ end
12
+
13
+ # optimize: get all item-counts and the cc-row with 2 redis hmgets.
14
+ # optimize: don't return more than sm.max_neighbors items (truncate set while collecting)
15
+ def similarities_for(item1)
16
+ # todo: optimize native. execute with own redis conn and write top K to stdout
17
+ # native_ouput = %x{recommendify_native jaccard "#{redis_key}" "#{item1}"}
18
+ # return native_output.split("\n").map{ |l| l.split(",") }
19
+ (all_items - [item1]).map do |item2|
20
+ [item2, similarity(item1, item2)]
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def calculate_jaccard_cached(item1, item2)
27
+ val = ccmatrix[item1, item2]
28
+ val.to_f / (item_count(item1)+item_count(item2)-val).to_f
29
+ end
30
+
31
+ def calculate_jaccard(set1, set2)
32
+ (set1&set2).length.to_f / (set1 + set2).uniq.length.to_f
33
+ end
34
+
35
+ end
@@ -0,0 +1,19 @@
1
+ class Recommendify::Neighbor
2
+
3
+ def initialize(data)
4
+ @data = data
5
+ end
6
+
7
+ def item_id
8
+ @data.fetch(:item_id).to_s
9
+ end
10
+
11
+ def similarity
12
+ @data.fetch(:similarity)
13
+ end
14
+
15
+ def <=>(other)
16
+ other.similarity <=> self.similarity
17
+ end
18
+
19
+ end
@@ -0,0 +1,25 @@
1
+ module Recommendify
2
+
3
+ DEFAULT_MAX_NEIGHBORS = 50
4
+
5
+ @@redis = nil
6
+
7
+ def self.redis=(redis)
8
+ @@redis = redis
9
+ end
10
+
11
+ def self.redis
12
+ return @@redis unless @@redis.nil?
13
+ raise "redis not configured! - Recommendify.redis = Redis.new"
14
+ end
15
+
16
+ def self.capitalize(str_or_sym)
17
+ str = str_or_sym.to_s.each_char.to_a
18
+ str.first.upcase + str[1..-1].join("").downcase
19
+ end
20
+
21
+ def self.constantize(klass)
22
+ Object.module_eval("Recommendify::#{klass}", __FILE__, __LINE__)
23
+ end
24
+
25
+ end
@@ -0,0 +1,61 @@
1
+ class Recommendify::SimilarityMatrix
2
+
3
+ attr_reader :write_queue
4
+
5
+ def initialize(opts={})
6
+ @opts = opts
7
+ @write_queue = Hash.new{ |h,k| h[k] = {} }
8
+ end
9
+
10
+ def redis_key(append=nil)
11
+ [@opts.fetch(:redis_prefix), @opts.fetch(:key), append].flatten.compact.join(":")
12
+ end
13
+
14
+ def max_neighbors
15
+ @opts[:max_neighbors] || Recommendify::DEFAULT_MAX_NEIGHBORS
16
+ end
17
+
18
+ def update(item_id, neighbors)
19
+ neighbors.each do |neighbor_id, score|
20
+ if @write_queue[item_id].has_key?(neighbor_id)
21
+ @write_queue[item_id][neighbor_id] += score
22
+ else
23
+ @write_queue[item_id][neighbor_id] = score
24
+ end
25
+ end
26
+ end
27
+
28
+ def [](item_id)
29
+ if @write_queue.has_key?(item_id)
30
+ @write_queue[item_id]
31
+ else
32
+ retrieve_item(item_id)
33
+ end
34
+ end
35
+
36
+ def commit_item!(item_id)
37
+ serialized = serialize_item(item_id)
38
+ Recommendify.redis.hset(redis_key, item_id, serialized)
39
+ @write_queue.delete(item_id)
40
+ end
41
+
42
+ # optimize: the items are already stored in a sorted fashion. we shouldn't
43
+ # throw away this info by storing them in a hash (and re-sorting later). maybe
44
+ # use activesupport's orderedhash?
45
+ def retrieve_item(item_id)
46
+ data = Recommendify.redis.hget(redis_key, item_id)
47
+ Hash[data.split("|").map{ |i| (k,s=i.split(":")) && [k,s.to_f] }]
48
+ end
49
+
50
+ private
51
+
52
+ # optimize: implement a better sort. never add more than 50 items the the array
53
+ def serialize_item(item_id, max_precision=5)
54
+ items = @write_queue[item_id].to_a
55
+ items.sort!{ |a,b| b[1] <=> a[1] }
56
+ items = items[0..max_neighbors-1]
57
+ items = items.map{ |i,s| s>0 ? "#{i}:#{s.to_s[0..max_precision]}" : nil }
58
+ items.compact * "|"
59
+ end
60
+
61
+ end
@@ -0,0 +1,45 @@
1
+ class Recommendify::SparseMatrix
2
+
3
+ def initialize(opts={})
4
+ @opts = opts
5
+ end
6
+
7
+ def redis_key
8
+ [@opts.fetch(:redis_prefix), @opts.fetch(:key)].join(":")
9
+ end
10
+
11
+ def [](x,y)
12
+ k_get(key(x,y))
13
+ end
14
+
15
+ def []=(x,y,v)
16
+ v == 0 ? k_del(key(x,y)) : k_set(key(x,y), v)
17
+ end
18
+
19
+ def incr(x,y)
20
+ k_incr(key(x,y))
21
+ end
22
+
23
+ private
24
+
25
+ def key(x,y)
26
+ [x,y].sort.join(":")
27
+ end
28
+
29
+ def k_set(key, val)
30
+ Recommendify.redis.hset(redis_key, key, val)
31
+ end
32
+
33
+ def k_del(key)
34
+ Recommendify.redis.hdel(redis_key, key)
35
+ end
36
+
37
+ def k_get(key)
38
+ Recommendify.redis.hget(redis_key, key).to_f
39
+ end
40
+
41
+ def k_incr(key)
42
+ Recommendify.redis.hincrby(redis_key, key, 1)
43
+ end
44
+
45
+ end
@@ -0,0 +1,161 @@
1
+ require ::File.expand_path('../spec_helper', __FILE__)
2
+
3
+ describe Recommendify::Base do
4
+
5
+ before(:each) do
6
+ flush_redis!
7
+ Recommendify::Base.class_variable_set(:@@max_neighbors, nil)
8
+ Recommendify::Base.class_variable_set(:@@input_matrices, {})
9
+ end
10
+
11
+ describe "configuration" do
12
+
13
+ it "should return default max_neighbors if not configured" do
14
+ Recommendify::DEFAULT_MAX_NEIGHBORS.should == 50
15
+ sm = Recommendify::Base.new
16
+ sm.max_neighbors.should == 50
17
+ end
18
+
19
+ it "should remember max_neighbors if configured" do
20
+ Recommendify::Base.max_neighbors(23)
21
+ sm = Recommendify::Base.new
22
+ sm.max_neighbors.should == 23
23
+ end
24
+
25
+ it "should add an input_matrix by 'key'" do
26
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
27
+ Recommendify::Base.class_variable_get(:@@input_matrices).keys.should == [:myinput]
28
+ end
29
+
30
+ it "should retrieve an input_matrix on a new instance" do
31
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
32
+ sm = Recommendify::Base.new
33
+ lambda{ sm.myinput }.should_not raise_error
34
+ end
35
+
36
+ it "should retrieve an input_matrix on a new instance and correctly overload respond_to?" do
37
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
38
+ sm = Recommendify::Base.new
39
+ sm.respond_to?(:process!).should be_true
40
+ sm.respond_to?(:myinput).should be_true
41
+ sm.respond_to?(:fnord).should be_false
42
+ end
43
+
44
+ it "should retrieve an input_matrix on a new instance and intialize the correct class" do
45
+ Recommendify::Base.input_matrix(:myinput, :similarity_func => :jaccard)
46
+ sm = Recommendify::Base.new
47
+ sm.myinput.should be_a(Recommendify::JaccardInputMatrix)
48
+ end
49
+
50
+ end
51
+
52
+ describe "process_item!" do
53
+
54
+ it "should call similarities_for on each input_matrix" do
55
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard)
56
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
57
+ sm = Recommendify::Base.new
58
+ sm.myfirstinput.should_receive(:similarities_for).with("fnorditem").and_return([["fooitem",0.5]])
59
+ sm.mysecondinput.should_receive(:similarities_for).with("fnorditem").and_return([["fooitem",0.5]])
60
+ sm.similarity_matrix.stub!(:update)
61
+ sm.process_item!("fnorditem")
62
+ end
63
+
64
+ it "should call similarities_for on each input_matrix and add all outputs to the similarity matrix" do
65
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard)
66
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
67
+ sm = Recommendify::Base.new
68
+ sm.myfirstinput.should_receive(:similarities_for).and_return([["fooitem",0.5]])
69
+ sm.mysecondinput.should_receive(:similarities_for).and_return([["fooitem",0.75], ["baritem", 1.0]])
70
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",0.5]])
71
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",0.75], ["baritem", 1.0]])
72
+ sm.process_item!("fnorditem")
73
+ end
74
+
75
+ it "should call similarities_for on each input_matrix and add all outputs to the similarity matrix with weight" do
76
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard, :weight => 4.0)
77
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
78
+ sm = Recommendify::Base.new
79
+ sm.myfirstinput.should_receive(:similarities_for).and_return([["fooitem",0.5]])
80
+ sm.mysecondinput.should_receive(:similarities_for).and_return([["fooitem",0.75], ["baritem", 1.0]])
81
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",2.0]])
82
+ sm.similarity_matrix.should_receive(:update).with("fnorditem", [["fooitem",0.75], ["baritem", 1.0]])
83
+ sm.process_item!("fnorditem")
84
+ end
85
+
86
+ it "should retrieve all items from all input matrices" do
87
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
88
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "shmoo"])
89
+ sm = Recommendify::Base.new
90
+ sm.all_items.length.should == 4
91
+ sm.all_items.should include("foo")
92
+ sm.all_items.should include("bar")
93
+ sm.all_items.should include("fnord")
94
+ sm.all_items.should include("shmoo")
95
+ end
96
+
97
+ it "should retrieve all items from all input matrices (uniquely)" do
98
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
99
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "bar"])
100
+ sm = Recommendify::Base.new
101
+ sm.all_items.length.should == 3
102
+ sm.all_items.should include("foo")
103
+ sm.all_items.should include("bar")
104
+ sm.all_items.should include("fnord")
105
+ end
106
+
107
+ end
108
+
109
+ describe "process!" do
110
+
111
+ it "should call process_item for all input_matrix.all_items's" do
112
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
113
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "shmoo"])
114
+ sm = Recommendify::Base.new
115
+ sm.should_receive(:process_item!).exactly(4).times
116
+ sm.process!
117
+ end
118
+
119
+ it "should call process_item for all input_matrix.all_items's (uniquely)" do
120
+ Recommendify::Base.input_matrix(:anotherinput, :similarity_func => :test, :all_items => ["foo", "bar"])
121
+ Recommendify::Base.input_matrix(:yetanotherinput, :similarity_func => :test, :all_items => ["fnord", "bar"])
122
+ sm = Recommendify::Base.new
123
+ sm.should_receive(:process_item!).exactly(3).times
124
+ sm.process!
125
+ end
126
+
127
+ end
128
+
129
+ describe "for(item_id)" do
130
+
131
+ it "should retrieve the n-most similar neighbors" do
132
+ sm = Recommendify::Base.new
133
+ sm.similarity_matrix.should_receive(:[]).with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
134
+ sm.for("fnorditem").length.should == 2
135
+ end
136
+
137
+ it "should retrieve the n-most similar neighbors as Recommendify::Neighbor objects" do
138
+ sm = Recommendify::Base.new
139
+ sm.similarity_matrix.should_receive(:[]).exactly(2).times.with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
140
+ sm.for("fnorditem").first.should be_a(Recommendify::Neighbor)
141
+ sm.for("fnorditem").last.should be_a(Recommendify::Neighbor)
142
+ end
143
+
144
+ it "should retrieve the n-most similar neighbors in the correct order" do
145
+ sm = Recommendify::Base.new
146
+ sm.similarity_matrix.should_receive(:[]).exactly(4).times.with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
147
+ sm.for("fnorditem").first.similarity.should == 1.5
148
+ sm.for("fnorditem").first.item_id.should == "baritem"
149
+ sm.for("fnorditem").last.similarity.should == 0.4
150
+ sm.for("fnorditem").last.item_id.should == "fooitem"
151
+ end
152
+
153
+ it "should return an empty array if the item if no neighbors were found" do
154
+ sm = Recommendify::Base.new
155
+ sm.similarity_matrix.should_receive(:[]).with("fnorditem").and_return({})
156
+ sm.for("fnorditem").should == []
157
+ end
158
+
159
+ end
160
+
161
+ end