recommendify 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +58 -53
- data/lib/recommendify/similarity_matrix.rb +1 -0
- data/recommendify.gemspec +1 -1
- data/spec/base_spec.rb +5 -0
- metadata +43 -37
- data/Gemfile.lock +0 -24
data/README.md
CHANGED
@@ -1,97 +1,102 @@
|
|
1
1
|
recommendify
|
2
2
|
============
|
3
3
|
|
4
|
-
|
4
|
+
_Recommendify is a ruby/redis based recommendation engine_ - The recommendations can be updated/processed incrementally and on multiple hosts. The worker is implemented in plain ruby and native C.
|
5
5
|
|
6
6
|
[  ](http://travis-ci.org/paulasmuth/recommendify)
|
7
7
|
|
8
|
+
---
|
8
9
|
|
9
|
-
|
10
|
+
#### usecases
|
10
11
|
|
11
|
-
+ "Users that bought this product also bought..."
|
12
|
-
+ "Users that viewed this video also viewed..."
|
13
|
-
+ "Users that
|
12
|
+
+ __"Users that bought this product also bought..."__ from `user_id--bought-->product_id` pairs
|
13
|
+
+ __"Users that viewed this video also viewed..."__ from `user_id--viewed-->video_id` pairs
|
14
|
+
+ __"Users that like this venue also like..."__ from `user_id--likes-->venue_id` pairs
|
14
15
|
|
15
16
|
|
16
|
-
usage
|
17
|
-
-----
|
18
17
|
|
19
|
-
|
18
|
+
synopsis
|
19
|
+
--------
|
20
|
+
|
21
|
+
Your input data (the so called interaction-sets) should look like this:
|
22
|
+
|
23
|
+
```
|
24
|
+
# FORMAT A: user bought products (select buyerid, productid from sales group_by buyerid)
|
25
|
+
[user23] product5 produt42 product17
|
26
|
+
[user42] product8 produt16 product5
|
27
|
+
|
28
|
+
# FORMAT B: user watched video (this can be transformed to the upper representation with a map/reduce)
|
29
|
+
user3 -> video3
|
30
|
+
user6 -> video19
|
31
|
+
user3 -> video6
|
32
|
+
user1 -> video42
|
33
|
+
```
|
34
|
+
|
35
|
+
The output data will look like this:
|
20
36
|
|
21
37
|
```
|
22
|
-
#
|
23
|
-
|
24
|
-
|
38
|
+
# similar products based on co-concurrent buys
|
39
|
+
product5 => product17 (0.78), product8 (0.43), product42 (0.31)
|
40
|
+
product17 => product5 (0.36), product8 (0.21), product42 (0.18)
|
25
41
|
|
26
|
-
#
|
27
|
-
|
28
|
-
|
42
|
+
# similar videos based on co-concurrent views
|
43
|
+
video19 => video3 (0.93), video6 (0.56), video42 (0.34)
|
44
|
+
video42 => video19 (0.32), video3 (0.21), video6 (0.08)
|
29
45
|
```
|
30
46
|
|
31
|
-
You can add new interaction-sets to the processor incrementally, but the
|
47
|
+
You can add new interaction-sets to the processor incrementally, but the similarities for changed items have to be re-processed after new interactions were added. You can either re-process all items (recommender.process!) from time to time or keep track of the updates and only process the changed items (recommender.process_item!)
|
48
|
+
|
49
|
+
|
50
|
+
usage
|
51
|
+
-----
|
32
52
|
|
33
53
|
```ruby
|
34
54
|
|
35
55
|
# Our similarity matrix, we calculate the similarity via co-concurrence
|
36
|
-
# of
|
37
|
-
# two `item x item` matrices and the jaccard/cosine similarity measure.
|
56
|
+
# of products in "orders" using the jaccard similarity measure.
|
38
57
|
class MyRecommender < Recommendify::Base
|
39
58
|
|
40
|
-
# store
|
59
|
+
# store only the top fifty neighbors per item
|
41
60
|
max_neighbors 50
|
42
61
|
|
43
|
-
# define an input data set "
|
62
|
+
# define an input data set "order_items". we'll add "order_id->product_id"
|
44
63
|
# pairs to this input and use the jaccard coefficient to retrieve a
|
45
64
|
# "customers that ordered item i1 also ordered item i2" statement and apply
|
46
65
|
# the result to the item<->item similarity matrix with a weight of 5.0
|
47
|
-
input_matrix :order_items,
|
48
|
-
:
|
66
|
+
input_matrix :order_items,
|
67
|
+
# :native => true,
|
68
|
+
:similarity_func => :jaccard,
|
49
69
|
:weight => 5.0
|
50
|
-
|
51
|
-
# define an input data set "like_item_s". we'll add "user_id->item_id"
|
52
|
-
# pairs to this input and use a cosine-based similarity measure to retrieve
|
53
|
-
# a "users that liked item i1 also liked item i2" statement and apply the
|
54
|
-
# result to the item<->item similarity matrix with a weight of 1.0
|
55
|
-
input_matrix :like_items
|
56
|
-
:similarity_func => :cosine,
|
57
|
-
:weight => 1.0
|
58
70
|
|
59
71
|
end
|
60
72
|
|
61
73
|
recommender = MyRecommender.new
|
62
74
|
|
63
|
-
# add `order_id->
|
75
|
+
# add `order_id->product_id` interactions to the order_item_sim input
|
64
76
|
# you can add data incrementally and call RecommendedItem.process! to update
|
65
77
|
# the similarity matrix at any time.
|
66
|
-
recommender.order_items.add_set("order1", ["
|
67
|
-
recommender.order_items.add_set("order2", ["
|
68
|
-
|
69
|
-
# add `user_id->item_id` interactions to the like_time_sim input
|
70
|
-
recommender.like_items.add_set("user1", ["item23", "item65", "item23"])
|
71
|
-
recommender.like_items.add_set("user2", ["item14", "item23"])
|
72
|
-
|
78
|
+
recommender.order_items.add_set("order1", ["product23", "product65", "productm23"])
|
79
|
+
recommender.order_items.add_set("order2", ["product14", "product23"])
|
73
80
|
|
74
81
|
# Calculate all elements of the similarity matrix
|
75
82
|
recommender.process!
|
76
83
|
|
77
84
|
# ...or calculate a specific row of the similarity matrix (a specific item)
|
78
85
|
# use this to avoid re-processing the whole matrix after incremental updates
|
79
|
-
recommender.process_item!("
|
86
|
+
recommender.process_item!("product65")
|
80
87
|
|
81
|
-
|
82
|
-
# retrieve similar items to "item23"
|
88
|
+
# retrieve similar products to "product23"
|
83
89
|
recommender.for("item23")
|
84
|
-
=> [ <Recommendify::Neighbor item_id:"
|
85
|
-
|
90
|
+
=> [ <Recommendify::Neighbor item_id:"product65" similarity:0.23>, (...) ]
|
86
91
|
|
87
|
-
# remove "
|
92
|
+
# remove "product23" from the similarity matrix and the input matrices. you should
|
88
93
|
# do this if your items 'expire', since it will speed up the calculation
|
89
|
-
recommender.delete_item!("
|
94
|
+
recommender.delete_item!("product23")
|
90
95
|
```
|
91
96
|
|
92
97
|
### how it works
|
93
98
|
|
94
|
-
Recommendify keeps an incrementally updated `item x item` matrix, the "co-concurrency matrix". This matrix stores the number of times that a combination of two items has appeared in an interaction/preferrence set. The co-concurrence counts are processed with a similarity measure to retrieve another `item x item` similarity matrix, which is used to find the N most similar items for each item. This
|
99
|
+
Recommendify keeps an incrementally updated `item x item` matrix, the "co-concurrency matrix". This matrix stores the number of times that a combination of two items has appeared in an interaction/preferrence set. The co-concurrence counts are processed with a jaccard similarity measure to retrieve another `item x item` similarity matrix, which is used to find the N most similar items for each item. This is also called "Item-based Collaborative Filtering with binary ratings" (see Miranda, Alipio et al. [1])
|
95
100
|
|
96
101
|
1. Group the input user->item pairs by user-id and store them into interaction sets
|
97
102
|
2. For each item<->item combination in the interaction set increment the respective element in the co-concurrence matrix
|
@@ -103,7 +108,14 @@ Recommendify keeps an incrementally updated `item x item` matrix, the "co-concur
|
|
103
108
|
|
104
109
|
The maximum number of entries in the co-concurrence and similarity matrix is k(n) = (n^2)-(n/2), it grows O(n^2). However, in a real scenario it is very unlikely that all item<->item combinations appear in a interaction set and we use a sparse matrix which will only use memory for elemtens with a value > 0. The size of the similarity grows O(n).
|
105
110
|
|
111
|
+
### native/fast worker
|
106
112
|
|
113
|
+
After you have compiled the native worker, you can pass the `:native => true` option to the input_matrix. This speeds up processing by at least 10x.
|
114
|
+
|
115
|
+
```
|
116
|
+
cd ~/.rvm/gems/ruby-1.9.3-p0/gems/recommendify-0.2.2/
|
117
|
+
bundle exec rake build_native
|
118
|
+
```
|
107
119
|
|
108
120
|
example
|
109
121
|
-------
|
@@ -145,15 +157,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
145
157
|
|
146
158
|
### todo
|
147
159
|
|
148
|
-
+ rake benchmark CLASS=MySimilarityMatrix
|
149
|
-
+ optimize JaccardInputMatrix
|
150
160
|
+ implement CosineInputMatrix
|
151
|
-
+ implement NativeJaccardInputMatrix (C)
|
152
|
-
+ implement NativeCosineInputMatrix (C)
|
153
|
-
+ todo: remove item (remove from all matrices)
|
154
|
-
+ redis prefix issue
|
155
161
|
+ forbid ':' and '|' in item_ids
|
156
162
|
+ recommendify::base no key part issue
|
157
|
-
+ optimize sparsematrix memory usage (somehow)
|
158
163
|
+ make max_row length configurable
|
159
|
-
|
164
|
+
|
@@ -44,6 +44,7 @@ class Recommendify::SimilarityMatrix
|
|
44
44
|
# use activesupport's orderedhash?
|
45
45
|
def retrieve_item(item_id)
|
46
46
|
data = Recommendify.redis.hget(redis_key, item_id)
|
47
|
+
return {} if data.nil?
|
47
48
|
Hash[data.split("|").map{ |i| (k,s=i.split(":")) && [k,s.to_f] }]
|
48
49
|
end
|
49
50
|
|
data/recommendify.gemspec
CHANGED
data/spec/base_spec.rb
CHANGED
@@ -133,6 +133,11 @@ describe Recommendify::Base do
|
|
133
133
|
sm.similarity_matrix.should_receive(:[]).with("fnorditem").and_return({:fooitem => 0.4, :baritem => 1.5})
|
134
134
|
sm.for("fnorditem").length.should == 2
|
135
135
|
end
|
136
|
+
|
137
|
+
it "should not throw exception for non existing items" do
|
138
|
+
sm = Recommendify::Base.new
|
139
|
+
sm.for("not_existing_item").length.should == 0
|
140
|
+
end
|
136
141
|
|
137
142
|
it "should retrieve the n-most similar neighbors as Recommendify::Neighbor objects" do
|
138
143
|
sm = Recommendify::Base.new
|
metadata
CHANGED
@@ -1,47 +1,51 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: recommendify
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.2
|
3
|
+
version: !ruby/object:Gem::Version
|
5
4
|
prerelease:
|
5
|
+
version: 0.2.3
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- Paul Asmuth
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
|
13
|
+
date: 2012-02-25 00:00:00 +01:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
15
17
|
name: redis
|
16
|
-
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
17
20
|
none: false
|
18
|
-
requirements:
|
19
|
-
- -
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
21
24
|
version: 2.2.2
|
22
25
|
type: :runtime
|
23
|
-
|
24
|
-
|
25
|
-
- !ruby/object:Gem::Dependency
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
26
28
|
name: rspec
|
27
|
-
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
28
31
|
none: false
|
29
|
-
requirements:
|
32
|
+
requirements:
|
30
33
|
- - ~>
|
31
|
-
- !ruby/object:Gem::Version
|
34
|
+
- !ruby/object:Gem::Version
|
32
35
|
version: 2.8.0
|
33
36
|
type: :development
|
34
|
-
|
35
|
-
version_requirements: *71976450
|
37
|
+
version_requirements: *id002
|
36
38
|
description: Distributed item-based "Collaborative Filtering" with ruby and redis
|
37
|
-
email:
|
39
|
+
email:
|
38
40
|
- paul@paulasmuth.com
|
39
41
|
executables: []
|
42
|
+
|
40
43
|
extensions: []
|
44
|
+
|
41
45
|
extra_rdoc_files: []
|
42
|
-
|
46
|
+
|
47
|
+
files:
|
43
48
|
- Gemfile
|
44
|
-
- Gemfile.lock
|
45
49
|
- README.md
|
46
50
|
- Rakefile
|
47
51
|
- doc/example.png
|
@@ -76,32 +80,35 @@ files:
|
|
76
80
|
- src/recommendify.c
|
77
81
|
- src/sort.c
|
78
82
|
- src/version.h
|
83
|
+
has_rdoc: true
|
79
84
|
homepage: http://github.com/paulasmuth/recommendify
|
80
|
-
licenses:
|
85
|
+
licenses:
|
81
86
|
- MIT
|
82
87
|
post_install_message:
|
83
88
|
rdoc_options: []
|
84
|
-
|
89
|
+
|
90
|
+
require_paths:
|
85
91
|
- lib
|
86
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
93
|
none: false
|
88
|
-
requirements:
|
89
|
-
- -
|
90
|
-
- !ruby/object:Gem::Version
|
91
|
-
version:
|
92
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: "0"
|
98
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
99
|
none: false
|
94
|
-
requirements:
|
95
|
-
- -
|
96
|
-
- !ruby/object:Gem::Version
|
97
|
-
version:
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: "0"
|
98
104
|
requirements: []
|
105
|
+
|
99
106
|
rubyforge_project:
|
100
|
-
rubygems_version: 1.
|
107
|
+
rubygems_version: 1.6.2
|
101
108
|
signing_key:
|
102
109
|
specification_version: 3
|
103
110
|
summary: Distributed item-based "Collaborative Filtering" with ruby and redis
|
104
|
-
test_files:
|
111
|
+
test_files:
|
105
112
|
- spec/base_spec.rb
|
106
113
|
- spec/cc_matrix_shared.rb
|
107
114
|
- spec/cosine_input_matrix_spec.rb
|
@@ -112,4 +119,3 @@ test_files:
|
|
112
119
|
- spec/similarity_matrix_spec.rb
|
113
120
|
- spec/sparse_matrix_spec.rb
|
114
121
|
- spec/spec_helper.rb
|
115
|
-
has_rdoc:
|
data/Gemfile.lock
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
GEM
|
2
|
-
remote: http://rubygems.org/
|
3
|
-
specs:
|
4
|
-
diff-lcs (1.1.3)
|
5
|
-
rake (0.9.2.2)
|
6
|
-
redis (2.2.2)
|
7
|
-
rspec (2.8.0)
|
8
|
-
rspec-core (~> 2.8.0)
|
9
|
-
rspec-expectations (~> 2.8.0)
|
10
|
-
rspec-mocks (~> 2.8.0)
|
11
|
-
rspec-core (2.8.0)
|
12
|
-
rspec-expectations (2.8.0)
|
13
|
-
diff-lcs (~> 1.1.2)
|
14
|
-
rspec-mocks (2.8.0)
|
15
|
-
yard (0.7.4)
|
16
|
-
|
17
|
-
PLATFORMS
|
18
|
-
ruby
|
19
|
-
|
20
|
-
DEPENDENCIES
|
21
|
-
rake
|
22
|
-
redis
|
23
|
-
rspec
|
24
|
-
yard
|