recommendify 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -86,7 +86,7 @@ recommender.for("item23")
86
86
 
87
87
  # remove "item23" from the similarity matrix and the input matrices. you should
88
88
  # do this if your items 'expire', since it will speed up the calculation
89
- recommender.remove_item!("item23")
89
+ recommender.delete_item!("item23")
90
90
  ```
91
91
 
92
92
  ### how it works
@@ -114,7 +114,7 @@ These recommendations were calculated from 2,3mb "profile visit"-data (taken fro
114
114
 
115
115
  full snippet: http://falbala.23loc.com/~paul/recommendify_out_1.html
116
116
 
117
- Initially processing the 120.047 `visitor_id->profile_id` pairs currently takes around half an hour on a single core and creates a 126.64mb hashtable in redis. The high memory usage of >100mb for only 5000 items is due to the very long user rows. If you limit the user rows to 100 items (mahout's default) it shrinks to 31mb for the 5k items from example_data.csv. In another real data set with very short user rows (purchase/payment data) it used only 3.4mb for 90k items with very good results. You can try this for yourself; the complete data and code is in `doc/example.rb` and `doc/example_data.csv`.
117
+ Initially processing the 120.047 `visitor_id->profile_id` pairs currently takes around half an hour with the ruby-only implementation and ~130 seconds with the native/c implementation on a single core. It creates a 24.1mb hashtable in redis (with truncated user_rows a' max 100 items). In another real data set with very short user rows (purchase/payment data) it used only 3.4mb for 90k items with very good results. You can try this for yourself; the complete data and code is in `doc/example.rb` and `doc/example_data.csv`.
118
118
 
119
119
 
120
120
 
@@ -11,7 +11,9 @@ class UserRecommender < Recommendify::Base
11
11
  #max_neighbors 50
12
12
 
13
13
  input_matrix :visits,
14
- :similarity_func => :jaccard
14
+ :similarity_func => :jaccard,
15
+ :native => true
16
+
15
17
  end
16
18
 
17
19
  recommender = UserRecommender.new
@@ -77,4 +77,10 @@ class Recommendify::Base
77
77
  similarity_matrix.commit_item!(item_id)
78
78
  end
79
79
 
80
+ def delete_item!(item_id)
81
+ input_matrices.map do |k,m|
82
+ m.delete_item(item_id)
83
+ end
84
+ end
85
+
80
86
  end
@@ -29,6 +29,11 @@ module Recommendify::CCMatrix
29
29
  Recommendify.redis.hkeys(redis_key(:items))
30
30
  end
31
31
 
32
+ def delete_item(item_id)
33
+ Recommendify.redis.hdel(redis_key(:items), item_id)
34
+ @ccmatrix.send(:k_delall, item_id)
35
+ end
36
+
32
37
  private
33
38
 
34
39
  def all_pairs(keys)
@@ -44,4 +44,9 @@ class Recommendify::InputMatrix
44
44
  raise "implemented in subclass"
45
45
  end
46
46
 
47
+ # delete item_id from the matrix
48
+ def delete_item(item_id)
49
+ raise "implemented in subclass"
50
+ end
51
+
47
52
  end
@@ -42,4 +42,12 @@ private
42
42
  Recommendify.redis.hincrby(redis_key, key, 1)
43
43
  end
44
44
 
45
+ # OPTIMIZE: use scripting/lua in redis 2.6
46
+ def k_delall(key)
47
+ Recommendify.redis.hkeys(redis_key).each do |iikey|
48
+ next unless iikey.split(":").include?(key)
49
+ Recommendify.redis.hdel(redis_key, iikey)
50
+ end
51
+ end
52
+
45
53
  end
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "recommendify"
6
- s.version = "0.1.0"
6
+ s.version = "0.2.0"
7
7
  s.date = Date.today.to_s
8
8
  s.platform = Gem::Platform::RUBY
9
9
  s.authors = ["Paul Asmuth"]
@@ -158,4 +158,21 @@ describe Recommendify::Base do
158
158
 
159
159
  end
160
160
 
161
+ describe "delete_item!" do
162
+
163
+ it "should call delete_item on each input_matrix" do
164
+ Recommendify::Base.input_matrix(:myfirstinput, :similarity_func => :jaccard)
165
+ Recommendify::Base.input_matrix(:mysecondinput, :similarity_func => :jaccard)
166
+ sm = Recommendify::Base.new
167
+ sm.myfirstinput.should_receive(:delete_item).with("fnorditem")
168
+ sm.mysecondinput.should_receive(:delete_item).with("fnorditem")
169
+ sm.delete_item!("fnorditem")
170
+ end
171
+
172
+ it "should delete the item from the similarity matrix"
173
+
174
+ it "should delete all occurences of the item in other similarity sets from the similarity matrix"
175
+
176
+ end
177
+
161
178
  end
@@ -51,4 +51,25 @@ share_examples_for Recommendify::CCMatrix do
51
51
  @matrix.all_items.should include("wurst")
52
52
  end
53
53
 
54
+ it "should delete all item<->item pairs on item deletion" do
55
+ @matrix.ccmatrix["foo", "fnord"] = 2
56
+ @matrix.add_set("user123", ["foo", "bar", "fnord"])
57
+ @matrix.add_set("user456", ["fnord", "blubb"])
58
+ @matrix.ccmatrix["bar", "foo"].should == 1
59
+ @matrix.ccmatrix["foo", "fnord"].should == 3
60
+ @matrix.ccmatrix["blubb", "fnord"].should == 1
61
+ @matrix.delete_item("fnord")
62
+ @matrix.ccmatrix["bar", "foo"].should == 1
63
+ @matrix.ccmatrix["foo", "fnord"].should == 0
64
+ @matrix.ccmatrix["blubb", "fnord"].should == 0
65
+ end
66
+
67
+ it "should delete the item count on deletion" do
68
+ @matrix.add_set("user123", ["foo", "bar", "fnord"])
69
+ @matrix.add_set("user456", ["fnord", "blubb"])
70
+ @matrix.send(:item_count, "fnord").should == 2
71
+ @matrix.delete_item("fnord")
72
+ @matrix.send(:item_count, "fnord").should == 0
73
+ end
74
+
54
75
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: recommendify
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.1.0
5
+ version: 0.2.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - Paul Asmuth