recommendify 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -2
- data/doc/example.rb +3 -1
- data/lib/recommendify/base.rb +6 -0
- data/lib/recommendify/cc_matrix.rb +5 -0
- data/lib/recommendify/input_matrix.rb +5 -0
- data/lib/recommendify/sparse_matrix.rb +8 -0
- data/recommendify.gemspec +1 -1
- data/spec/base_spec.rb +17 -0
- data/spec/cc_matrix_shared.rb +21 -0
- metadata +1 -1
data/README.md
CHANGED
@@ -86,7 +86,7 @@ recommender.for("item23")
|
|
86
86
|
|
87
87
|
# remove "item23" from the similarity matrix and the input matrices. you should
|
88
88
|
# do this if your items 'expire', since it will speed up the calculation
|
89
|
-
recommender.
|
89
|
+
recommender.delete_item!("item23")
|
90
90
|
```
|
91
91
|
|
92
92
|
### how it works
|
@@ -114,7 +114,7 @@ These recommendations were calculated from 2,3mb "profile visit"-data (taken fro
|
|
114
114
|
|
115
115
|
full snippet: http://falbala.23loc.com/~paul/recommendify_out_1.html
|
116
116
|
|
117
|
-
Initially processing the 120.047 `visitor_id->profile_id` pairs currently takes around half an hour
|
117
|
+
Initially processing the 120.047 `visitor_id->profile_id` pairs currently takes around half an hour with the ruby-only implementation and ~130 seconds with the native/c implementation on a single core. It creates a 24.1mb hashtable in redis (with truncated user_rows a' max 100 items). In another real data set with very short user rows (purchase/payment data) it used only 3.4mb for 90k items with very good results. You can try this for yourself; the complete data and code is in `doc/example.rb` and `doc/example_data.csv`.
|
118
118
|
|
119
119
|
|
120
120
|
|
data/doc/example.rb
CHANGED
data/lib/recommendify/base.rb
CHANGED
@@ -29,6 +29,11 @@ module Recommendify::CCMatrix
|
|
29
29
|
Recommendify.redis.hkeys(redis_key(:items))
|
30
30
|
end
|
31
31
|
|
32
|
+
def delete_item(item_id)
|
33
|
+
Recommendify.redis.hdel(redis_key(:items), item_id)
|
34
|
+
@ccmatrix.send(:k_delall, item_id)
|
35
|
+
end
|
36
|
+
|
32
37
|
private
|
33
38
|
|
34
39
|
def all_pairs(keys)
|
@@ -42,4 +42,12 @@ private
|
|
42
42
|
Recommendify.redis.hincrby(redis_key, key, 1)
|
43
43
|
end
|
44
44
|
|
45
|
+
# OPTIMIZE: use scripting/lua in redis 2.6
|
46
|
+
def k_delall(key)
|
47
|
+
Recommendify.redis.hkeys(redis_key).each do |iikey|
|
48
|
+
next unless iikey.split(":").include?(key)
|
49
|
+
Recommendify.redis.hdel(redis_key, iikey)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
45
53
|
end
|
data/recommendify.gemspec
CHANGED
data/spec/base_spec.rb
CHANGED
@@ -158,4 +158,21 @@ describe Recommendify::Base do
|
|
158
158
|
|
159
159
|
end
|
160
160
|
|
161
|
+
describe "delete_item!" do
|
162
|
+
|
163
|
+
it "should call delete_item on each input_matrix" do
|
164
|
+
Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard)
|
165
|
+
Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
|
166
|
+
sm = Recommendify::Base.new
|
167
|
+
sm.myfirstinput.should_receive(:delete_item).with("fnorditem")
|
168
|
+
sm.mysecondinput.should_receive(:delete_item).with("fnorditem")
|
169
|
+
sm.delete_item!("fnorditem")
|
170
|
+
end
|
171
|
+
|
172
|
+
it "should delete the item from the similarity matrix"
|
173
|
+
|
174
|
+
it "should delete all occurences of the item in other similarity sets from the similarity matrix"
|
175
|
+
|
176
|
+
end
|
177
|
+
|
161
178
|
end
|
data/spec/cc_matrix_shared.rb
CHANGED
@@ -51,4 +51,25 @@ share_examples_for Recommendify::CCMatrix do
|
|
51
51
|
@matrix.all_items.should include("wurst")
|
52
52
|
end
|
53
53
|
|
54
|
+
it "should delete all item<->item pairs on item deletion" do
|
55
|
+
@matrix.ccmatrix["foo", "fnord"] = 2
|
56
|
+
@matrix.add_set("user123", ["foo", "bar", "fnord"])
|
57
|
+
@matrix.add_set("user456", ["fnord", "blubb"])
|
58
|
+
@matrix.ccmatrix["bar", "foo"].should == 1
|
59
|
+
@matrix.ccmatrix["foo", "fnord"].should == 3
|
60
|
+
@matrix.ccmatrix["blubb", "fnord"].should == 1
|
61
|
+
@matrix.delete_item("fnord")
|
62
|
+
@matrix.ccmatrix["bar", "foo"].should == 1
|
63
|
+
@matrix.ccmatrix["foo", "fnord"].should == 0
|
64
|
+
@matrix.ccmatrix["blubb", "fnord"].should == 0
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should delete the item count on deletion" do
|
68
|
+
@matrix.add_set("user123", ["foo", "bar", "fnord"])
|
69
|
+
@matrix.add_set("user456", ["fnord", "blubb"])
|
70
|
+
@matrix.send(:item_count, "fnord").should == 2
|
71
|
+
@matrix.delete_item("fnord")
|
72
|
+
@matrix.send(:item_count, "fnord").should == 0
|
73
|
+
end
|
74
|
+
|
54
75
|
end
|