recommendify 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +58 -53
- data/lib/recommendify/similarity_matrix.rb +1 -0
- data/recommendify.gemspec +1 -1
- data/spec/base_spec.rb +5 -0
- metadata +43 -37
- data/Gemfile.lock +0 -24
data/README.md
CHANGED
@@ -1,97 +1,102 @@
|
|
1
1
|
recommendify
|
2
2
|
============
|
3
3
|
|
4
|
-
|
4
|
+
_Recommendify is a ruby/redis based recommendation engine_ - The recommendations can be updated/processed incrementally and on multiple hosts. The worker is implemented in plain ruby and native C.
|
5
5
|
|
6
6
|
[ ![Build status - Travis-ci](https://secure.travis-ci.org/paulasmuth/recommendify.png) ](http://travis-ci.org/paulasmuth/recommendify)
|
7
7
|
|
8
|
+
---
|
8
9
|
|
9
|
-
|
10
|
+
#### usecases
|
10
11
|
|
11
|
-
+ "Users that bought this product also bought..."
|
12
|
-
+ "Users that viewed this video also viewed..."
|
13
|
-
+ "Users that
|
12
|
+
+ __"Users that bought this product also bought..."__ from `user_id--bought-->product_id` pairs
|
13
|
+
+ __"Users that viewed this video also viewed..."__ from `user_id--viewed-->video_id` pairs
|
14
|
+
+ __"Users that like this venue also like..."__ from `user_id--likes-->venue_id` pairs
|
14
15
|
|
15
16
|
|
16
|
-
usage
|
17
|
-
-----
|
18
17
|
|
19
|
-
|
18
|
+
synopsis
|
19
|
+
--------
|
20
|
+
|
21
|
+
Your input data (the so called interaction-sets) should look like this:
|
22
|
+
|
23
|
+
```
|
24
|
+
# FORMAT A: user bought products (select buyerid, productid from sales group_by buyerid)
|
25
|
+
[user23] product5 produt42 product17
|
26
|
+
[user42] product8 produt16 product5
|
27
|
+
|
28
|
+
# FORMAT B: user watched video (this can be transformed to the upper representation with a map/reduce)
|
29
|
+
user3 -> video3
|
30
|
+
user6 -> video19
|
31
|
+
user3 -> video6
|
32
|
+
user1 -> video42
|
33
|
+
```
|
34
|
+
|
35
|
+
The output data will look like this:
|
20
36
|
|
21
37
|
```
|
22
|
-
#
|
23
|
-
|
24
|
-
|
38
|
+
# similar products based on co-concurrent buys
|
39
|
+
product5 => product17 (0.78), product8 (0.43), product42 (0.31)
|
40
|
+
product17 => product5 (0.36), product8 (0.21), product42 (0.18)
|
25
41
|
|
26
|
-
#
|
27
|
-
|
28
|
-
|
42
|
+
# similar videos based on co-concurrent views
|
43
|
+
video19 => video3 (0.93), video6 (0.56), video42 (0.34)
|
44
|
+
video42 => video19 (0.32), video3 (0.21), video6 (0.08)
|
29
45
|
```
|
30
46
|
|
31
|
-
You can add new interaction-sets to the processor incrementally, but the
|
47
|
+
You can add new interaction-sets to the processor incrementally, but the similarities for changed items have to be re-processed after new interactions were added. You can either re-process all items (recommender.process!) from time to time or keep track of the updates and only process the changed items (recommender.process_item!)
|
48
|
+
|
49
|
+
|
50
|
+
usage
|
51
|
+
-----
|
32
52
|
|
33
53
|
```ruby
|
34
54
|
|
35
55
|
# Our similarity matrix, we calculate the similarity via co-concurrence
|
36
|
-
# of
|
37
|
-
# two `item x item` matrices and the jaccard/cosine similarity measure.
|
56
|
+
# of products in "orders" using the jaccard similarity measure.
|
38
57
|
class MyRecommender < Recommendify::Base
|
39
58
|
|
40
|
-
# store
|
59
|
+
# store only the top fifty neighbors per item
|
41
60
|
max_neighbors 50
|
42
61
|
|
43
|
-
# define an input data set "
|
62
|
+
# define an input data set "order_items". we'll add "order_id->product_id"
|
44
63
|
# pairs to this input and use the jaccard coefficient to retrieve a
|
45
64
|
# "customers that ordered item i1 also ordered item i2" statement and apply
|
46
65
|
# the result to the item<->item similarity matrix with a weight of 5.0
|
47
|
-
input_matrix :order_items,
|
48
|
-
:
|
66
|
+
input_matrix :order_items,
|
67
|
+
# :native => true,
|
68
|
+
:similarity_func => :jaccard,
|
49
69
|
:weight => 5.0
|
50
|
-
|
51
|
-
# define an input data set "like_item_s". we'll add "user_id->item_id"
|
52
|
-
# pairs to this input and use a cosine-based similarity measure to retrieve
|
53
|
-
# a "users that liked item i1 also liked item i2" statement and apply the
|
54
|
-
# result to the item<->item similarity matrix with a weight of 1.0
|
55
|
-
input_matrix :like_items
|
56
|
-
:similarity_func => :cosine,
|
57
|
-
:weight => 1.0
|
58
70
|
|
59
71
|
end
|
60
72
|
|
61
73
|
recommender = MyRecommender.new
|
62
74
|
|
63
|
-
# add `order_id->
|
75
|
+
# add `order_id->product_id` interactions to the order_item_sim input
|
64
76
|
# you can add data incrementally and call RecommendedItem.process! to update
|
65
77
|
# the similarity matrix at any time.
|
66
|
-
recommender.order_items.add_set("order1", ["
|
67
|
-
recommender.order_items.add_set("order2", ["
|
68
|
-
|
69
|
-
# add `user_id->item_id` interactions to the like_time_sim input
|
70
|
-
recommender.like_items.add_set("user1", ["item23", "item65", "item23"])
|
71
|
-
recommender.like_items.add_set("user2", ["item14", "item23"])
|
72
|
-
|
78
|
+
recommender.order_items.add_set("order1", ["product23", "product65", "productm23"])
|
79
|
+
recommender.order_items.add_set("order2", ["product14", "product23"])
|
73
80
|
|
74
81
|
# Calculate all elements of the similarity matrix
|
75
82
|
recommender.process!
|
76
83
|
|
77
84
|
# ...or calculate a specific row of the similarity matrix (a specific item)
|
78
85
|
# use this to avoid re-processing the whole matrix after incremental updates
|
79
|
-
recommender.process_item!("
|
86
|
+
recommender.process_item!("product65")
|
80
87
|
|
81
|
-
|
82
|
-
# retrieve similar items to "item23"
|
88
|
+
# retrieve similar products to "product23"
|
83
89
|
recommender.for("item23")
|
84
|
-
=> [ <Recommendify::Neighbor item_id:"
|
85
|
-
|
90
|
+
=> [ <Recommendify::Neighbor item_id:"product65" similarity:0.23>, (...) ]
|
86
91
|
|
87
|
-
# remove "
|
92
|
+
# remove "product23" from the similarity matrix and the input matrices. you should
|
88
93
|
# do this if your items 'expire', since it will speed up the calculation
|
89
|
-
recommender.delete_item!("
|
94
|
+
recommender.delete_item!("product23")
|
90
95
|
```
|
91
96
|
|
92
97
|
### how it works
|
93
98
|
|
94
|
-
Recommendify keeps an incrementally updated `item x item` matrix, the "co-concurrency matrix". This matrix stores the number of times that a combination of two items has appeared in an interaction/preferrence set. The co-concurrence counts are processed with a similarity measure to retrieve another `item x item` similarity matrix, which is used to find the N most similar items for each item. This
|
99
|
+
Recommendify keeps an incrementally updated `item x item` matrix, the "co-concurrency matrix". This matrix stores the number of times that a combination of two items has appeared in an interaction/preferrence set. The co-concurrence counts are processed with a jaccard similarity measure to retrieve another `item x item` similarity matrix, which is used to find the N most similar items for each item. This is also called "Item-based Collaborative Filtering with binary ratings" (see Miranda, Alipio et al. [1])
|
95
100
|
|
96
101
|
1. Group the input user->item pairs by user-id and store them into interaction sets
|
97
102
|
2. For each item<->item combination in the interaction set increment the respective element in the co-concurrence matrix
|
@@ -103,7 +108,14 @@ Recommendify keeps an incrementally updated `item x item` matrix, the "co-concur
|
|
103
108
|
|
104
109
|
The maximum number of entries in the co-concurrence and similarity matrix is k(n) = (n^2)-(n/2), it grows O(n^2). However, in a real scenario it is very unlikely that all item<->item combinations appear in a interaction set and we use a sparse matrix which will only use memory for elemtens with a value > 0. The size of the similarity grows O(n).
|
105
110
|
|
111
|
+
### native/fast worker
|
106
112
|
|
113
|
+
After you have compiled the native worker, you can pass the `:native => true` option to the input_matrix. This speeds up processing by at least 10x.
|
114
|
+
|
115
|
+
```
|
116
|
+
cd ~/.rvm/gems/ruby-1.9.3-p0/gems/recommendify-0.2.2/
|
117
|
+
bundle exec rake build_native
|
118
|
+
```
|
107
119
|
|
108
120
|
example
|
109
121
|
-------
|
@@ -145,15 +157,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
145
157
|
|
146
158
|
### todo
|
147
159
|
|
148
|
-
+ rake benchmark CLASS=MySimilarityMatrix
|
149
|
-
+ optimize JaccardInputMatrix
|
150
160
|
+ implement CosineInputMatrix
|
151
|
-
+ implement NativeJaccardInputMatrix (C)
|
152
|
-
+ implement NativeCosineInputMatrix (C)
|
153
|
-
+ todo: remove item (remove from all matrices)
|
154
|
-
+ redis prefix issue
|
155
161
|
+ forbid ':' and '|' in item_ids
|
156
162
|
+ recommendify::base no key part issue
|
157
|
-
+ optimize sparsematrix memory usage (somehow)
|
158
163
|
+ make max_row length configurable
|
159
|
-
|
164
|
+
|
@@ -44,6 +44,7 @@ class Recommendify::SimilarityMatrix
|
|
44
44
|
# use activesupport's orderedhash?
|
45
45
|
def retrieve_item(item_id)
|
46
46
|
data = Recommendify.redis.hget(redis_key, item_id)
|
47
|
+
return {} if data.nil?
|
47
48
|
Hash[data.split("|").map{ |i| (k,s=i.split(":")) && [k,s.to_f] }]
|
48
49
|
end
|
49
50
|
|
data/recommendify.gemspec
CHANGED
data/spec/base_spec.rb
CHANGED
@@ -133,6 +133,11 @@ describe Recommendify::Base do
|
|
133
133
|
sm.similarity_matrix.should_receive(:[]).with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
|
134
134
|
sm.for("fnorditem").length.should == 2
|
135
135
|
end
|
136
|
+
|
137
|
+
it "should not throw exception for non existing items" do
|
138
|
+
sm = Recommendify::Base.new
|
139
|
+
sm.for("not_existing_item").length.should == 0
|
140
|
+
end
|
136
141
|
|
137
142
|
it "should retrieve the n-most similar neighbors as Recommendify::Neighbor objects" do
|
138
143
|
sm = Recommendify::Base.new
|
metadata
CHANGED
@@ -1,47 +1,51 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: recommendify
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.2
|
3
|
+
version: !ruby/object:Gem::Version
|
5
4
|
prerelease:
|
5
|
+
version: 0.2.3
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- Paul Asmuth
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
|
13
|
+
date: 2012-02-25 00:00:00 +01:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
15
17
|
name: redis
|
16
|
-
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
17
20
|
none: false
|
18
|
-
requirements:
|
19
|
-
- -
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
21
24
|
version: 2.2.2
|
22
25
|
type: :runtime
|
23
|
-
|
24
|
-
|
25
|
-
- !ruby/object:Gem::Dependency
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
26
28
|
name: rspec
|
27
|
-
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
28
31
|
none: false
|
29
|
-
requirements:
|
32
|
+
requirements:
|
30
33
|
- - ~>
|
31
|
-
- !ruby/object:Gem::Version
|
34
|
+
- !ruby/object:Gem::Version
|
32
35
|
version: 2.8.0
|
33
36
|
type: :development
|
34
|
-
|
35
|
-
version_requirements: *71976450
|
37
|
+
version_requirements: *id002
|
36
38
|
description: Distributed item-based "Collaborative Filtering" with ruby and redis
|
37
|
-
email:
|
39
|
+
email:
|
38
40
|
- paul@paulasmuth.com
|
39
41
|
executables: []
|
42
|
+
|
40
43
|
extensions: []
|
44
|
+
|
41
45
|
extra_rdoc_files: []
|
42
|
-
|
46
|
+
|
47
|
+
files:
|
43
48
|
- Gemfile
|
44
|
-
- Gemfile.lock
|
45
49
|
- README.md
|
46
50
|
- Rakefile
|
47
51
|
- doc/example.png
|
@@ -76,32 +80,35 @@ files:
|
|
76
80
|
- src/recommendify.c
|
77
81
|
- src/sort.c
|
78
82
|
- src/version.h
|
83
|
+
has_rdoc: true
|
79
84
|
homepage: http://github.com/paulasmuth/recommendify
|
80
|
-
licenses:
|
85
|
+
licenses:
|
81
86
|
- MIT
|
82
87
|
post_install_message:
|
83
88
|
rdoc_options: []
|
84
|
-
|
89
|
+
|
90
|
+
require_paths:
|
85
91
|
- lib
|
86
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
93
|
none: false
|
88
|
-
requirements:
|
89
|
-
- -
|
90
|
-
- !ruby/object:Gem::Version
|
91
|
-
version:
|
92
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: "0"
|
98
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
99
|
none: false
|
94
|
-
requirements:
|
95
|
-
- -
|
96
|
-
- !ruby/object:Gem::Version
|
97
|
-
version:
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: "0"
|
98
104
|
requirements: []
|
105
|
+
|
99
106
|
rubyforge_project:
|
100
|
-
rubygems_version: 1.
|
107
|
+
rubygems_version: 1.6.2
|
101
108
|
signing_key:
|
102
109
|
specification_version: 3
|
103
110
|
summary: Distributed item-based "Collaborative Filtering" with ruby and redis
|
104
|
-
test_files:
|
111
|
+
test_files:
|
105
112
|
- spec/base_spec.rb
|
106
113
|
- spec/cc_matrix_shared.rb
|
107
114
|
- spec/cosine_input_matrix_spec.rb
|
@@ -112,4 +119,3 @@ test_files:
|
|
112
119
|
- spec/similarity_matrix_spec.rb
|
113
120
|
- spec/sparse_matrix_spec.rb
|
114
121
|
- spec/spec_helper.rb
|
115
|
-
has_rdoc:
|
data/Gemfile.lock
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
GEM
|
2
|
-
remote: http://rubygems.org/
|
3
|
-
specs:
|
4
|
-
diff-lcs (1.1.3)
|
5
|
-
rake (0.9.2.2)
|
6
|
-
redis (2.2.2)
|
7
|
-
rspec (2.8.0)
|
8
|
-
rspec-core (~> 2.8.0)
|
9
|
-
rspec-expectations (~> 2.8.0)
|
10
|
-
rspec-mocks (~> 2.8.0)
|
11
|
-
rspec-core (2.8.0)
|
12
|
-
rspec-expectations (2.8.0)
|
13
|
-
diff-lcs (~> 1.1.2)
|
14
|
-
rspec-mocks (2.8.0)
|
15
|
-
yard (0.7.4)
|
16
|
-
|
17
|
-
PLATFORMS
|
18
|
-
ruby
|
19
|
-
|
20
|
-
DEPENDENCIES
|
21
|
-
rake
|
22
|
-
redis
|
23
|
-
rspec
|
24
|
-
yard
|