cmfrec 0.1.2 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/LICENSE.txt +1 -1
- data/README.md +74 -0
- data/lib/cmfrec.rb +5 -1
- data/lib/cmfrec/recommender.rb +364 -139
- data/lib/cmfrec/version.rb +1 -1
- data/vendor/libcmfrec.arm64.dylib +0 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34e8dc08914cbc418470cd7eb3adf3d33013b786319f4510212c80bf3629f3ca
|
4
|
+
data.tar.gz: c1b91a1f77b4b51a5ca4491376f8a02230ea54873f8c1b2b06f4761d6ddd0686
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3c57734379199196a4e3f51d9ec02b19ef1abac13d57a10ca3c20e9b76c9ee5db4b17d790330d41a9576c2ba28a9eeccafeb5760b54cfdf80a7431368895068
|
7
|
+
data.tar.gz: 5a24a77a6665854abb38916a22e8141a6cae637a51f98e3df3762566f2e73cb60b9bd9a25303df0411ea53dec3211bf7534711dc55c510311620341cbe4e4ac3
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,28 @@
|
|
1
|
+
## 0.1.6 (2021-08-12)
|
2
|
+
|
3
|
+
- Added `user_ids` and `item_ids` methods
|
4
|
+
- Added `user_id` argument to `user_factors`
|
5
|
+
- Added `item_id` argument to `item_factors`
|
6
|
+
- Added `user_id` argument to `user_bias`
|
7
|
+
- Added `item_id` argument to `item_bias`
|
8
|
+
- Added `item_ids` argument to `new_user_recs`
|
9
|
+
- Fixed order for `user_recs`
|
10
|
+
|
11
|
+
## 0.1.5 (2021-08-10)
|
12
|
+
|
13
|
+
- Fixed issue with `user_recs` and `new_user_recs` returning rated items
|
14
|
+
- Fixed error with `new_user_recs`
|
15
|
+
|
16
|
+
## 0.1.4 (2021-02-04)
|
17
|
+
|
18
|
+
- Added support for saving and loading recommenders
|
19
|
+
- Added `similar_users` and `similar_items`
|
20
|
+
- Improved ARM detection
|
21
|
+
|
22
|
+
## 0.1.3 (2020-12-28)
|
23
|
+
|
24
|
+
- Added ARM shared library for Mac
|
25
|
+
|
1
26
|
## 0.1.2 (2020-12-09)
|
2
27
|
|
3
28
|
- Added `load_movielens` method
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -107,6 +107,26 @@ Get recommendations with only side information
|
|
107
107
|
recommender.new_user_recs([], user_info: {cats: 0, dogs: 2})
|
108
108
|
```
|
109
109
|
|
110
|
+
## Similarity
|
111
|
+
|
112
|
+
Add this line to your application’s Gemfile:
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
gem 'ngt'
|
116
|
+
```
|
117
|
+
|
118
|
+
Get similar users
|
119
|
+
|
120
|
+
```ruby
|
121
|
+
recommender.similar_users(user_id)
|
122
|
+
```
|
123
|
+
|
124
|
+
Get similar items - “users who liked this item also liked”
|
125
|
+
|
126
|
+
```ruby
|
127
|
+
recommender.similar_items(item_id)
|
128
|
+
```
|
129
|
+
|
110
130
|
## Examples
|
111
131
|
|
112
132
|
### MovieLens
|
@@ -125,6 +145,35 @@ recommender.fit(ratings.first(80000), user_info: user_info, item_info: item_info
|
|
125
145
|
recommender.predict(ratings.last(20000))
|
126
146
|
```
|
127
147
|
|
148
|
+
### Ahoy
|
149
|
+
|
150
|
+
[Ahoy](https://github.com/ankane/ahoy) is a great source for implicit feedback
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
views = Ahoy::Event.
|
154
|
+
where(name: "Viewed post").
|
155
|
+
group(:user_id).
|
156
|
+
group("properties->>'post_id'"). # postgres syntax
|
157
|
+
count
|
158
|
+
|
159
|
+
data =
|
160
|
+
views.map do |(user_id, post_id), count|
|
161
|
+
{
|
162
|
+
user_id: user_id,
|
163
|
+
item_id: post_id,
|
164
|
+
value: count
|
165
|
+
}
|
166
|
+
end
|
167
|
+
```
|
168
|
+
|
169
|
+
Create a recommender and get recommended posts for a user
|
170
|
+
|
171
|
+
```ruby
|
172
|
+
recommender = Cmfrec::Recommender.new
|
173
|
+
recommender.fit(data)
|
174
|
+
recommender.user_recs(current_user.id)
|
175
|
+
```
|
176
|
+
|
128
177
|
## Options
|
129
178
|
|
130
179
|
Specify the number of factors and epochs
|
@@ -163,8 +212,33 @@ Or a Rover data frame
|
|
163
212
|
Rover.read_csv("ratings.csv")
|
164
213
|
```
|
165
214
|
|
215
|
+
## Storing Recommenders
|
216
|
+
|
217
|
+
Store the recommender
|
218
|
+
|
219
|
+
```ruby
|
220
|
+
bin = Marshal.dump(recommender)
|
221
|
+
File.binwrite("recommender.bin", bin)
|
222
|
+
```
|
223
|
+
|
224
|
+
> You can save it to a file, database, or any other storage system
|
225
|
+
|
226
|
+
Load a recommender
|
227
|
+
|
228
|
+
```ruby
|
229
|
+
bin = File.binread("recommender.bin")
|
230
|
+
recommender = Marshal.load(bin)
|
231
|
+
```
|
232
|
+
|
166
233
|
## Reference
|
167
234
|
|
235
|
+
Get ids
|
236
|
+
|
237
|
+
```ruby
|
238
|
+
recommender.user_ids
|
239
|
+
recommender.item_ids
|
240
|
+
```
|
241
|
+
|
168
242
|
Get the global mean
|
169
243
|
|
170
244
|
```ruby
|
data/lib/cmfrec.rb
CHANGED
@@ -19,7 +19,11 @@ module Cmfrec
|
|
19
19
|
if Gem.win_platform?
|
20
20
|
"cmfrec.dll"
|
21
21
|
elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
|
22
|
-
"
|
22
|
+
if RbConfig::CONFIG["host_cpu"] =~ /arm/i
|
23
|
+
"libcmfrec.arm64.dylib"
|
24
|
+
else
|
25
|
+
"libcmfrec.dylib"
|
26
|
+
end
|
23
27
|
else
|
24
28
|
"libcmfrec.so"
|
25
29
|
end
|
data/lib/cmfrec/recommender.rb
CHANGED
@@ -11,29 +11,193 @@ module Cmfrec
|
|
11
11
|
item_bias: item_bias,
|
12
12
|
add_implicit_features: add_implicit_features
|
13
13
|
)
|
14
|
+
|
15
|
+
@fit = false
|
16
|
+
@user_map = {}
|
17
|
+
@item_map = {}
|
18
|
+
@user_info_map = {}
|
19
|
+
@item_info_map = {}
|
14
20
|
end
|
15
21
|
|
16
22
|
def fit(train_set, user_info: nil, item_info: nil)
|
23
|
+
reset
|
24
|
+
partial_fit(train_set, user_info: user_info, item_info: item_info)
|
25
|
+
end
|
26
|
+
|
27
|
+
def predict(data)
|
28
|
+
check_fit
|
29
|
+
|
30
|
+
data = to_dataset(data)
|
31
|
+
|
32
|
+
u = data.map { |v| @user_map[v[:user_id]] || @user_map.size }
|
33
|
+
i = data.map { |v| @item_map[v[:item_id]] || @item_map.size }
|
34
|
+
|
35
|
+
row = int_ptr(u)
|
36
|
+
col = int_ptr(i)
|
37
|
+
n_predict = data.size
|
38
|
+
predicted = Fiddle::Pointer.malloc(n_predict * Fiddle::SIZEOF_DOUBLE)
|
39
|
+
|
40
|
+
if @implicit
|
41
|
+
check_status FFI.predict_X_old_collective_implicit(
|
42
|
+
row, col, predicted, n_predict,
|
43
|
+
@a, @b,
|
44
|
+
@k, @k_user, @k_item, @k_main,
|
45
|
+
@m, @n,
|
46
|
+
@nthreads
|
47
|
+
)
|
48
|
+
else
|
49
|
+
check_status FFI.predict_X_old_collective_explicit(
|
50
|
+
row, col, predicted, n_predict,
|
51
|
+
@a, @bias_a,
|
52
|
+
@b, @bias_b,
|
53
|
+
@global_mean,
|
54
|
+
@k, @k_user, @k_item, @k_main,
|
55
|
+
@m, @n,
|
56
|
+
@nthreads
|
57
|
+
)
|
58
|
+
end
|
59
|
+
|
60
|
+
predictions = real_array(predicted)
|
61
|
+
predictions.map! { |v| v.nan? ? @global_mean : v } if @implicit
|
62
|
+
predictions
|
63
|
+
end
|
64
|
+
|
65
|
+
def user_recs(user_id, count: 5, item_ids: nil)
|
66
|
+
check_fit
|
67
|
+
user = @user_map[user_id]
|
68
|
+
|
69
|
+
if user
|
70
|
+
a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
|
71
|
+
a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
|
72
|
+
# @rated[user] will be nil for recommenders saved before 0.1.5
|
73
|
+
top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: (@rated[user] || {}).keys, item_ids: item_ids)
|
74
|
+
else
|
75
|
+
# no items if user is unknown
|
76
|
+
# TODO maybe most popular items
|
77
|
+
[]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def new_user_recs(data, count: 5, user_info: nil, item_ids: nil)
|
82
|
+
check_fit
|
83
|
+
|
84
|
+
a_vec, a_bias, rated = factors_warm(data, user_info: user_info)
|
85
|
+
top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: rated, item_ids: item_ids)
|
86
|
+
end
|
87
|
+
|
88
|
+
def user_ids
|
89
|
+
@user_map.keys
|
90
|
+
end
|
91
|
+
|
92
|
+
def item_ids
|
93
|
+
@item_map.keys
|
94
|
+
end
|
95
|
+
|
96
|
+
def user_factors(user_id = nil)
|
97
|
+
read_factors(@a, [@m, @m_u].max, @k_user + @k + @k_main, user_id, @user_map)
|
98
|
+
end
|
99
|
+
|
100
|
+
def item_factors(item_id = nil)
|
101
|
+
read_factors(@b, [@n, @n_i].max, @k_item + @k + @k_main, item_id, @item_map)
|
102
|
+
end
|
103
|
+
|
104
|
+
def user_bias(user_id = nil)
|
105
|
+
read_bias(@bias_a, user_id, @user_map) if @bias_a
|
106
|
+
end
|
107
|
+
|
108
|
+
def item_bias(item_id = nil)
|
109
|
+
read_bias(@bias_b, item_id, @item_map) if @bias_b
|
110
|
+
end
|
111
|
+
|
112
|
+
def similar_items(item_id, count: 5)
|
113
|
+
check_fit
|
114
|
+
similar(item_id, @item_map, item_factors, count, item_index)
|
115
|
+
end
|
116
|
+
alias_method :item_recs, :similar_items
|
117
|
+
|
118
|
+
def similar_users(user_id, count: 5)
|
119
|
+
check_fit
|
120
|
+
similar(user_id, @user_map, user_factors, count, user_index)
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
def user_index
|
126
|
+
@user_index ||= create_index(user_factors)
|
127
|
+
end
|
128
|
+
|
129
|
+
def item_index
|
130
|
+
@item_index ||= create_index(item_factors)
|
131
|
+
end
|
132
|
+
|
133
|
+
def create_index(factors)
|
134
|
+
require "ngt"
|
135
|
+
|
136
|
+
index = Ngt::Index.new(@k, distance_type: "Cosine")
|
137
|
+
index.batch_insert(factors)
|
138
|
+
index
|
139
|
+
end
|
140
|
+
|
141
|
+
# TODO include bias
|
142
|
+
def similar(id, map, factors, count, index)
|
143
|
+
i = map[id]
|
144
|
+
if i
|
145
|
+
keys = map.keys
|
146
|
+
result = index.search(factors[i], size: count + 1)[1..-1]
|
147
|
+
result.map do |v|
|
148
|
+
{
|
149
|
+
# ids from batch_insert start at 1 instead of 0
|
150
|
+
item_id: keys[v[:id] - 1],
|
151
|
+
# convert cosine distance to cosine similarity
|
152
|
+
score: 1 - v[:distance]
|
153
|
+
}
|
154
|
+
end
|
155
|
+
else
|
156
|
+
[]
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def reset
|
161
|
+
@fit = false
|
162
|
+
@user_map.clear
|
163
|
+
@item_map.clear
|
164
|
+
@user_info_map.clear
|
165
|
+
@item_info_map.clear
|
166
|
+
@user_index = nil
|
167
|
+
@item_index = nil
|
168
|
+
end
|
169
|
+
|
170
|
+
# TODO resize pointers as needed and reset values for new memory
|
171
|
+
def partial_fit(train_set, user_info: nil, item_info: nil)
|
17
172
|
train_set = to_dataset(train_set)
|
18
173
|
|
19
|
-
@
|
174
|
+
unless @fit
|
175
|
+
@implicit = !train_set.any? { |v| v[:rating] }
|
176
|
+
end
|
177
|
+
|
20
178
|
unless @implicit
|
21
179
|
ratings = train_set.map { |o| o[:rating] }
|
22
180
|
check_ratings(ratings)
|
23
181
|
end
|
24
182
|
|
25
183
|
check_training_set(train_set)
|
26
|
-
|
184
|
+
update_maps(train_set)
|
27
185
|
|
28
186
|
x_row = []
|
29
187
|
x_col = []
|
30
188
|
x_val = []
|
31
189
|
value_key = @implicit ? :value : :rating
|
190
|
+
@rated = Hash.new { |hash, key| hash[key] = {} }
|
32
191
|
train_set.each do |v|
|
33
|
-
|
34
|
-
|
192
|
+
u = @user_map[v[:user_id]]
|
193
|
+
i = @item_map[v[:item_id]]
|
194
|
+
@rated[u][i] = true
|
195
|
+
|
196
|
+
x_row << u
|
197
|
+
x_col << i
|
35
198
|
x_val << (v[value_key] || 1)
|
36
199
|
end
|
200
|
+
@rated.default = nil
|
37
201
|
|
38
202
|
@m = @user_map.size
|
39
203
|
@n = @item_map.size
|
@@ -52,16 +216,14 @@ module Cmfrec
|
|
52
216
|
uu = nil
|
53
217
|
ii = nil
|
54
218
|
|
55
|
-
|
219
|
+
# side info
|
56
220
|
u_row, u_col, u_sp, nnz_u, @m_u, p_ = process_info(user_info, @user_map, @user_info_map, :user_id)
|
57
|
-
|
58
|
-
@item_info_map = {}
|
59
221
|
i_row, i_col, i_sp, nnz_i, @n_i, q = process_info(item_info, @item_map, @item_info_map, :item_id)
|
60
222
|
|
61
223
|
@precompute_for_predictions = false
|
62
224
|
|
63
225
|
# initialize w/ normal distribution
|
64
|
-
reset_values =
|
226
|
+
reset_values = !@fit
|
65
227
|
|
66
228
|
@a = Fiddle::Pointer.malloc([@m, @m_u].max * (@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
67
229
|
@b = Fiddle::Pointer.malloc([@n, @n_i].max * (@k_item + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
@@ -75,16 +237,7 @@ module Cmfrec
|
|
75
237
|
i_colmeans = Fiddle::Pointer.malloc(q * Fiddle::SIZEOF_DOUBLE)
|
76
238
|
|
77
239
|
if @implicit
|
78
|
-
|
79
|
-
@alpha = 1.0
|
80
|
-
@adjust_weight = false # downweight?
|
81
|
-
@apply_log_transf = false
|
82
|
-
|
83
|
-
# different defaults
|
84
|
-
@lambda_ = 1e0
|
85
|
-
@w_user = 10
|
86
|
-
@w_item = 10
|
87
|
-
@finalize_chol = false
|
240
|
+
set_implicit_vars
|
88
241
|
|
89
242
|
args = [
|
90
243
|
@a, @b,
|
@@ -175,104 +328,13 @@ module Cmfrec
|
|
175
328
|
@global_mean = real_array(glob_mean).first
|
176
329
|
end
|
177
330
|
|
178
|
-
@u_colmeans =
|
179
|
-
@i_colmeans = real_array(i_colmeans)
|
180
|
-
@u_colmeans_ptr = u_colmeans
|
181
|
-
|
182
|
-
self
|
183
|
-
end
|
184
|
-
|
185
|
-
def predict(data)
|
186
|
-
check_fit
|
187
|
-
|
188
|
-
data = to_dataset(data)
|
189
|
-
|
190
|
-
u = data.map { |v| @user_map[v[:user_id]] || @user_map.size }
|
191
|
-
i = data.map { |v| @item_map[v[:item_id]] || @item_map.size }
|
192
|
-
|
193
|
-
row = int_ptr(u)
|
194
|
-
col = int_ptr(i)
|
195
|
-
n_predict = data.size
|
196
|
-
predicted = Fiddle::Pointer.malloc(n_predict * Fiddle::SIZEOF_DOUBLE)
|
197
|
-
|
198
|
-
if @implicit
|
199
|
-
check_status FFI.predict_X_old_collective_implicit(
|
200
|
-
row, col, predicted, n_predict,
|
201
|
-
@a, @b,
|
202
|
-
@k, @k_user, @k_item, @k_main,
|
203
|
-
@m, @n,
|
204
|
-
@nthreads
|
205
|
-
)
|
206
|
-
else
|
207
|
-
check_status FFI.predict_X_old_collective_explicit(
|
208
|
-
row, col, predicted, n_predict,
|
209
|
-
@a, @bias_a,
|
210
|
-
@b, @bias_b,
|
211
|
-
@global_mean,
|
212
|
-
@k, @k_user, @k_item, @k_main,
|
213
|
-
@m, @n,
|
214
|
-
@nthreads
|
215
|
-
)
|
216
|
-
end
|
217
|
-
|
218
|
-
predictions = real_array(predicted)
|
219
|
-
predictions.map! { |v| v.nan? ? @global_mean : v } if @implicit
|
220
|
-
predictions
|
221
|
-
end
|
222
|
-
|
223
|
-
def user_recs(user_id, count: 5, item_ids: nil)
|
224
|
-
check_fit
|
225
|
-
user = @user_map[user_id]
|
226
|
-
|
227
|
-
if user
|
228
|
-
if item_ids
|
229
|
-
# remove missing ids
|
230
|
-
item_ids = item_ids.select { |v| @item_map[v] }
|
231
|
-
|
232
|
-
data = item_ids.map { |v| {user_id: user_id, item_id: v} }
|
233
|
-
scores = predict(data)
|
234
|
-
|
235
|
-
item_ids.zip(scores).map do |item_id, score|
|
236
|
-
{item_id: item_id, score: score}
|
237
|
-
end
|
238
|
-
else
|
239
|
-
a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
|
240
|
-
a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
|
241
|
-
top_n(a_vec: a_vec, a_bias: a_bias, count: count)
|
242
|
-
end
|
243
|
-
else
|
244
|
-
# no items if user is unknown
|
245
|
-
# TODO maybe most popular items
|
246
|
-
[]
|
247
|
-
end
|
248
|
-
end
|
249
|
-
|
250
|
-
# TODO add item_ids
|
251
|
-
def new_user_recs(data, count: 5, user_info: nil)
|
252
|
-
check_fit
|
253
|
-
|
254
|
-
a_vec, a_bias = factors_warm(data, user_info: user_info)
|
255
|
-
top_n(a_vec: a_vec, a_bias: a_bias, count: count)
|
256
|
-
end
|
257
|
-
|
258
|
-
def user_factors
|
259
|
-
read_factors(@a, [@m, @m_u].max, @k_user + @k + @k_main)
|
260
|
-
end
|
261
|
-
|
262
|
-
def item_factors
|
263
|
-
read_factors(@b, [@n, @n_i].max, @k_item + @k + @k_main)
|
264
|
-
end
|
331
|
+
@u_colmeans = u_colmeans
|
265
332
|
|
266
|
-
|
267
|
-
read_bias(@bias_a) if @bias_a
|
268
|
-
end
|
333
|
+
@fit = true
|
269
334
|
|
270
|
-
|
271
|
-
read_bias(@bias_b) if @bias_b
|
335
|
+
self
|
272
336
|
end
|
273
337
|
|
274
|
-
private
|
275
|
-
|
276
338
|
def set_params(
|
277
339
|
k: 40, lambda_: 1e+1, method: "als", use_cg: true, user_bias: true,
|
278
340
|
item_bias: true, add_implicit_features: false,
|
@@ -329,15 +391,14 @@ module Cmfrec
|
|
329
391
|
@nthreads = nthreads
|
330
392
|
end
|
331
393
|
|
332
|
-
def
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
raise ArgumentError, "Missing user_id" if user_ids.any?(&:nil?)
|
337
|
-
raise ArgumentError, "Missing item_id" if item_ids.any?(&:nil?)
|
394
|
+
def update_maps(train_set)
|
395
|
+
raise ArgumentError, "Missing user_id" if train_set.any? { |v| v[:user_id].nil? }
|
396
|
+
raise ArgumentError, "Missing item_id" if train_set.any? { |v| v[:item_id].nil? }
|
338
397
|
|
339
|
-
|
340
|
-
|
398
|
+
train_set.each do |v|
|
399
|
+
@user_map[v[:user_id]] ||= @user_map.size
|
400
|
+
@item_map[v[:item_id]] ||= @item_map.size
|
401
|
+
end
|
341
402
|
end
|
342
403
|
|
343
404
|
def check_ratings(ratings)
|
@@ -354,7 +415,7 @@ module Cmfrec
|
|
354
415
|
end
|
355
416
|
|
356
417
|
def check_fit
|
357
|
-
raise "Not fit" unless
|
418
|
+
raise "Not fit" unless @fit
|
358
419
|
end
|
359
420
|
|
360
421
|
def to_dataset(dataset)
|
@@ -376,26 +437,59 @@ module Cmfrec
|
|
376
437
|
end
|
377
438
|
end
|
378
439
|
|
379
|
-
def read_factors(ptr, d1, d2)
|
380
|
-
arr = []
|
381
|
-
offset = 0
|
440
|
+
def read_factors(ptr, d1, d2, id, map)
|
382
441
|
width = d2 * Fiddle::SIZEOF_DOUBLE
|
383
|
-
|
384
|
-
|
385
|
-
|
442
|
+
if id
|
443
|
+
i = map[id]
|
444
|
+
ptr[i * width, width].unpack("d*") if i
|
445
|
+
else
|
446
|
+
arr = []
|
447
|
+
offset = 0
|
448
|
+
d1.times do |i|
|
449
|
+
arr << ptr[offset, width].unpack("d*")
|
450
|
+
offset += width
|
451
|
+
end
|
452
|
+
arr
|
386
453
|
end
|
387
|
-
arr
|
388
454
|
end
|
389
455
|
|
390
|
-
def read_bias(ptr)
|
391
|
-
|
456
|
+
def read_bias(ptr, id, map)
|
457
|
+
if id
|
458
|
+
i = map[id]
|
459
|
+
ptr[i * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") if i
|
460
|
+
else
|
461
|
+
real_array(ptr)
|
462
|
+
end
|
392
463
|
end
|
393
464
|
|
394
|
-
def top_n(a_vec:, a_bias:, count:)
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
465
|
+
def top_n(a_vec:, a_bias:, count:, rated: nil, item_ids: nil)
|
466
|
+
if item_ids
|
467
|
+
# remove missing ids
|
468
|
+
item_ids = item_ids.map { |v| @item_map[v] }.compact
|
469
|
+
return [] if item_ids.empty?
|
470
|
+
|
471
|
+
include_ix = int_ptr(item_ids)
|
472
|
+
n_include = item_ids.size
|
473
|
+
|
474
|
+
# TODO uncomment in 0.2.0
|
475
|
+
count = n_include # if n_include < count
|
476
|
+
else
|
477
|
+
include_ix = nil
|
478
|
+
n_include = 0
|
479
|
+
end
|
480
|
+
|
481
|
+
if rated && !item_ids
|
482
|
+
# assumes rated is unique and all items are known
|
483
|
+
# calling code is responsible for this
|
484
|
+
exclude_ix = int_ptr(rated)
|
485
|
+
n_exclude = rated.size
|
486
|
+
remaining = @item_map.size - n_exclude
|
487
|
+
return [] if remaining == 0
|
488
|
+
count = remaining if remaining < count
|
489
|
+
else
|
490
|
+
exclude_ix = nil
|
491
|
+
n_exclude = 0
|
492
|
+
end
|
399
493
|
|
400
494
|
outp_ix = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_INT)
|
401
495
|
outp_score = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_DOUBLE)
|
@@ -425,6 +519,16 @@ module Cmfrec
|
|
425
519
|
data = to_dataset(data)
|
426
520
|
user_info = to_dataset(user_info) if user_info
|
427
521
|
|
522
|
+
# remove unknown items
|
523
|
+
data, unknown_data = data.partition { |d| @item_map[d[:item_id]] }
|
524
|
+
|
525
|
+
if unknown_data.any?
|
526
|
+
# TODO warn for unknown items?
|
527
|
+
# warn "[cmfrec] Unknown items: #{unknown_data.map { |d| d[:item_id] }.join(", ")}"
|
528
|
+
end
|
529
|
+
|
530
|
+
item_ids = data.map { |d| @item_map[d[:item_id]] }
|
531
|
+
|
428
532
|
nnz = data.size
|
429
533
|
a_vec = Fiddle::Pointer.malloc((@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
430
534
|
bias_a = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
|
@@ -465,7 +569,7 @@ module Cmfrec
|
|
465
569
|
check_ratings(ratings)
|
466
570
|
end
|
467
571
|
xa = real_ptr(ratings)
|
468
|
-
x_col = int_ptr(
|
572
|
+
x_col = int_ptr(item_ids)
|
469
573
|
else
|
470
574
|
xa = nil
|
471
575
|
x_col = nil
|
@@ -479,7 +583,7 @@ module Cmfrec
|
|
479
583
|
u_vec_sp, u_vec_x_col, nnz_u_vec,
|
480
584
|
@na_as_zero_user,
|
481
585
|
@nonneg,
|
482
|
-
@
|
586
|
+
@u_colmeans,
|
483
587
|
@b, @n, @c,
|
484
588
|
xa, x_col, nnz,
|
485
589
|
@k, @k_user, @k_item, @k_main,
|
@@ -505,7 +609,7 @@ module Cmfrec
|
|
505
609
|
@na_as_zero_user, @na_as_zero,
|
506
610
|
@nonneg,
|
507
611
|
@c, cb,
|
508
|
-
@global_mean, @bias_b, @
|
612
|
+
@global_mean, @bias_b, @u_colmeans,
|
509
613
|
xa, x_col, nnz, xa_dense,
|
510
614
|
@n, weight, @b, @bi,
|
511
615
|
@add_implicit_features,
|
@@ -528,7 +632,7 @@ module Cmfrec
|
|
528
632
|
check_status FFI.factors_collective_explicit_single(*fiddle_args(args))
|
529
633
|
end
|
530
634
|
|
531
|
-
[a_vec, real_array(bias_a).first]
|
635
|
+
[a_vec, real_array(bias_a).first, item_ids.uniq]
|
532
636
|
end
|
533
637
|
|
534
638
|
# convert boolean to int
|
@@ -585,5 +689,126 @@ module Cmfrec
|
|
585
689
|
def real_array(ptr)
|
586
690
|
ptr.to_s(ptr.size).unpack("d*")
|
587
691
|
end
|
692
|
+
|
693
|
+
def set_implicit_vars
|
694
|
+
@w_main_multiplier = 1.0
|
695
|
+
@alpha = 1.0
|
696
|
+
@adjust_weight = false # downweight?
|
697
|
+
@apply_log_transf = false
|
698
|
+
|
699
|
+
# different defaults
|
700
|
+
@lambda_ = 1e0
|
701
|
+
@w_user = 10
|
702
|
+
@w_item = 10
|
703
|
+
@finalize_chol = false
|
704
|
+
end
|
705
|
+
|
706
|
+
def dump_ptr(ptr)
|
707
|
+
ptr.to_s(ptr.size) if ptr
|
708
|
+
end
|
709
|
+
|
710
|
+
def load_ptr(str)
|
711
|
+
Fiddle::Pointer[str] if str
|
712
|
+
end
|
713
|
+
|
714
|
+
def marshal_dump
|
715
|
+
obj = {
|
716
|
+
implicit: @implicit
|
717
|
+
}
|
718
|
+
|
719
|
+
# options
|
720
|
+
obj[:factors] = @k
|
721
|
+
obj[:epochs] = @niter
|
722
|
+
obj[:verbose] = @verbose
|
723
|
+
|
724
|
+
# factors
|
725
|
+
obj[:user_map] = @user_map
|
726
|
+
obj[:item_map] = @item_map
|
727
|
+
obj[:rated] = @rated
|
728
|
+
obj[:user_factors] = dump_ptr(@a)
|
729
|
+
obj[:item_factors] = dump_ptr(@b)
|
730
|
+
|
731
|
+
# bias
|
732
|
+
obj[:user_bias] = dump_ptr(@bias_a)
|
733
|
+
obj[:item_bias] = dump_ptr(@bias_b)
|
734
|
+
|
735
|
+
# mean
|
736
|
+
obj[:global_mean] = @global_mean
|
737
|
+
|
738
|
+
# side info
|
739
|
+
obj[:user_info_map] = @user_info_map
|
740
|
+
obj[:item_info_map] = @item_info_map
|
741
|
+
obj[:user_info_factors] = dump_ptr(@c)
|
742
|
+
obj[:item_info_factors] = dump_ptr(@d)
|
743
|
+
|
744
|
+
# implicit features
|
745
|
+
obj[:add_implicit_features] = @add_implicit_features
|
746
|
+
obj[:user_factors_implicit] = dump_ptr(@ai)
|
747
|
+
obj[:item_factors_implicit] = dump_ptr(@bi)
|
748
|
+
|
749
|
+
unless @implicit
|
750
|
+
obj[:min_rating] = @min_rating
|
751
|
+
obj[:max_rating] = @max_rating
|
752
|
+
end
|
753
|
+
|
754
|
+
obj[:user_means] = dump_ptr(@u_colmeans)
|
755
|
+
|
756
|
+
obj
|
757
|
+
end
|
758
|
+
|
759
|
+
def marshal_load(obj)
|
760
|
+
@implicit = obj[:implicit]
|
761
|
+
|
762
|
+
# options
|
763
|
+
set_params(
|
764
|
+
k: obj[:factors],
|
765
|
+
niter: obj[:epochs],
|
766
|
+
verbose: obj[:verbose],
|
767
|
+
user_bias: !obj[:user_bias].nil?,
|
768
|
+
item_bias: !obj[:item_bias].nil?,
|
769
|
+
add_implicit_features: obj[:add_implicit_features]
|
770
|
+
)
|
771
|
+
|
772
|
+
# factors
|
773
|
+
@user_map = obj[:user_map]
|
774
|
+
@item_map = obj[:item_map]
|
775
|
+
@rated = obj[:rated] || {}
|
776
|
+
@a = load_ptr(obj[:user_factors])
|
777
|
+
@b = load_ptr(obj[:item_factors])
|
778
|
+
|
779
|
+
# bias
|
780
|
+
@bias_a = load_ptr(obj[:user_bias])
|
781
|
+
@bias_b = load_ptr(obj[:item_bias])
|
782
|
+
|
783
|
+
# mean
|
784
|
+
@global_mean = obj[:global_mean]
|
785
|
+
|
786
|
+
# side info
|
787
|
+
@user_info_map = obj[:user_info_map]
|
788
|
+
@item_info_map = obj[:item_info_map]
|
789
|
+
@c = load_ptr(obj[:user_info_factors])
|
790
|
+
@d = load_ptr(obj[:item_info_factors])
|
791
|
+
|
792
|
+
# implicit features
|
793
|
+
@add_implicit_features = obj[:add_implicit_features]
|
794
|
+
@ai = load_ptr(obj[:user_factors_implicit])
|
795
|
+
@bi = load_ptr(obj[:item_factors_implicit])
|
796
|
+
|
797
|
+
unless @implicit
|
798
|
+
@min_rating = obj[:min_rating]
|
799
|
+
@max_rating = obj[:max_rating]
|
800
|
+
end
|
801
|
+
|
802
|
+
@u_colmeans = load_ptr(obj[:user_means])
|
803
|
+
|
804
|
+
@m = @user_map.size
|
805
|
+
@n = @item_map.size
|
806
|
+
@m_u = @user_info_map.size
|
807
|
+
@n_i = @item_info_map.size
|
808
|
+
|
809
|
+
set_implicit_vars if @implicit
|
810
|
+
|
811
|
+
@fit = @m > 0
|
812
|
+
end
|
588
813
|
end
|
589
814
|
end
|
data/lib/cmfrec/version.rb
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmfrec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
|
-
email: andrew@
|
14
|
+
email: andrew@ankane.org
|
15
15
|
executables: []
|
16
16
|
extensions: []
|
17
17
|
extra_rdoc_files: []
|
@@ -25,6 +25,7 @@ files:
|
|
25
25
|
- lib/cmfrec/recommender.rb
|
26
26
|
- lib/cmfrec/version.rb
|
27
27
|
- vendor/LICENSE.txt
|
28
|
+
- vendor/libcmfrec.arm64.dylib
|
28
29
|
- vendor/libcmfrec.dylib
|
29
30
|
- vendor/libcmfrec.so
|
30
31
|
homepage: https://github.com/ankane/cmfrec
|
@@ -46,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
46
47
|
- !ruby/object:Gem::Version
|
47
48
|
version: '0'
|
48
49
|
requirements: []
|
49
|
-
rubygems_version: 3.
|
50
|
+
rubygems_version: 3.2.22
|
50
51
|
signing_key:
|
51
52
|
specification_version: 4
|
52
53
|
summary: Recommendations for Ruby using collective matrix factorization
|