cmfrec 0.1.2 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/LICENSE.txt +1 -1
- data/README.md +74 -0
- data/lib/cmfrec.rb +5 -1
- data/lib/cmfrec/recommender.rb +364 -139
- data/lib/cmfrec/version.rb +1 -1
- data/vendor/libcmfrec.arm64.dylib +0 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34e8dc08914cbc418470cd7eb3adf3d33013b786319f4510212c80bf3629f3ca
|
4
|
+
data.tar.gz: c1b91a1f77b4b51a5ca4491376f8a02230ea54873f8c1b2b06f4761d6ddd0686
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3c57734379199196a4e3f51d9ec02b19ef1abac13d57a10ca3c20e9b76c9ee5db4b17d790330d41a9576c2ba28a9eeccafeb5760b54cfdf80a7431368895068
|
7
|
+
data.tar.gz: 5a24a77a6665854abb38916a22e8141a6cae637a51f98e3df3762566f2e73cb60b9bd9a25303df0411ea53dec3211bf7534711dc55c510311620341cbe4e4ac3
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,28 @@
|
|
1
|
+
## 0.1.6 (2021-08-12)
|
2
|
+
|
3
|
+
- Added `user_ids` and `item_ids` methods
|
4
|
+
- Added `user_id` argument to `user_factors`
|
5
|
+
- Added `item_id` argument to `item_factors`
|
6
|
+
- Added `user_id` argument to `user_bias`
|
7
|
+
- Added `item_id` argument to `item_bias`
|
8
|
+
- Added `item_ids` argument to `new_user_recs`
|
9
|
+
- Fixed order for `user_recs`
|
10
|
+
|
11
|
+
## 0.1.5 (2021-08-10)
|
12
|
+
|
13
|
+
- Fixed issue with `user_recs` and `new_user_recs` returning rated items
|
14
|
+
- Fixed error with `new_user_recs`
|
15
|
+
|
16
|
+
## 0.1.4 (2021-02-04)
|
17
|
+
|
18
|
+
- Added support for saving and loading recommenders
|
19
|
+
- Added `similar_users` and `similar_items`
|
20
|
+
- Improved ARM detection
|
21
|
+
|
22
|
+
## 0.1.3 (2020-12-28)
|
23
|
+
|
24
|
+
- Added ARM shared library for Mac
|
25
|
+
|
1
26
|
## 0.1.2 (2020-12-09)
|
2
27
|
|
3
28
|
- Added `load_movielens` method
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -107,6 +107,26 @@ Get recommendations with only side information
|
|
107
107
|
recommender.new_user_recs([], user_info: {cats: 0, dogs: 2})
|
108
108
|
```
|
109
109
|
|
110
|
+
## Similarity
|
111
|
+
|
112
|
+
Add this line to your application’s Gemfile:
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
gem 'ngt'
|
116
|
+
```
|
117
|
+
|
118
|
+
Get similar users
|
119
|
+
|
120
|
+
```ruby
|
121
|
+
recommender.similar_users(user_id)
|
122
|
+
```
|
123
|
+
|
124
|
+
Get similar items - “users who liked this item also liked”
|
125
|
+
|
126
|
+
```ruby
|
127
|
+
recommender.similar_items(item_id)
|
128
|
+
```
|
129
|
+
|
110
130
|
## Examples
|
111
131
|
|
112
132
|
### MovieLens
|
@@ -125,6 +145,35 @@ recommender.fit(ratings.first(80000), user_info: user_info, item_info: item_info
|
|
125
145
|
recommender.predict(ratings.last(20000))
|
126
146
|
```
|
127
147
|
|
148
|
+
### Ahoy
|
149
|
+
|
150
|
+
[Ahoy](https://github.com/ankane/ahoy) is a great source for implicit feedback
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
views = Ahoy::Event.
|
154
|
+
where(name: "Viewed post").
|
155
|
+
group(:user_id).
|
156
|
+
group("properties->>'post_id'"). # postgres syntax
|
157
|
+
count
|
158
|
+
|
159
|
+
data =
|
160
|
+
views.map do |(user_id, post_id), count|
|
161
|
+
{
|
162
|
+
user_id: user_id,
|
163
|
+
item_id: post_id,
|
164
|
+
value: count
|
165
|
+
}
|
166
|
+
end
|
167
|
+
```
|
168
|
+
|
169
|
+
Create a recommender and get recommended posts for a user
|
170
|
+
|
171
|
+
```ruby
|
172
|
+
recommender = Cmfrec::Recommender.new
|
173
|
+
recommender.fit(data)
|
174
|
+
recommender.user_recs(current_user.id)
|
175
|
+
```
|
176
|
+
|
128
177
|
## Options
|
129
178
|
|
130
179
|
Specify the number of factors and epochs
|
@@ -163,8 +212,33 @@ Or a Rover data frame
|
|
163
212
|
Rover.read_csv("ratings.csv")
|
164
213
|
```
|
165
214
|
|
215
|
+
## Storing Recommenders
|
216
|
+
|
217
|
+
Store the recommender
|
218
|
+
|
219
|
+
```ruby
|
220
|
+
bin = Marshal.dump(recommender)
|
221
|
+
File.binwrite("recommender.bin", bin)
|
222
|
+
```
|
223
|
+
|
224
|
+
> You can save it to a file, database, or any other storage system
|
225
|
+
|
226
|
+
Load a recommender
|
227
|
+
|
228
|
+
```ruby
|
229
|
+
bin = File.binread("recommender.bin")
|
230
|
+
recommender = Marshal.load(bin)
|
231
|
+
```
|
232
|
+
|
166
233
|
## Reference
|
167
234
|
|
235
|
+
Get ids
|
236
|
+
|
237
|
+
```ruby
|
238
|
+
recommender.user_ids
|
239
|
+
recommender.item_ids
|
240
|
+
```
|
241
|
+
|
168
242
|
Get the global mean
|
169
243
|
|
170
244
|
```ruby
|
data/lib/cmfrec.rb
CHANGED
@@ -19,7 +19,11 @@ module Cmfrec
|
|
19
19
|
if Gem.win_platform?
|
20
20
|
"cmfrec.dll"
|
21
21
|
elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
|
22
|
-
"
|
22
|
+
if RbConfig::CONFIG["host_cpu"] =~ /arm/i
|
23
|
+
"libcmfrec.arm64.dylib"
|
24
|
+
else
|
25
|
+
"libcmfrec.dylib"
|
26
|
+
end
|
23
27
|
else
|
24
28
|
"libcmfrec.so"
|
25
29
|
end
|
data/lib/cmfrec/recommender.rb
CHANGED
@@ -11,29 +11,193 @@ module Cmfrec
|
|
11
11
|
item_bias: item_bias,
|
12
12
|
add_implicit_features: add_implicit_features
|
13
13
|
)
|
14
|
+
|
15
|
+
@fit = false
|
16
|
+
@user_map = {}
|
17
|
+
@item_map = {}
|
18
|
+
@user_info_map = {}
|
19
|
+
@item_info_map = {}
|
14
20
|
end
|
15
21
|
|
16
22
|
def fit(train_set, user_info: nil, item_info: nil)
|
23
|
+
reset
|
24
|
+
partial_fit(train_set, user_info: user_info, item_info: item_info)
|
25
|
+
end
|
26
|
+
|
27
|
+
def predict(data)
|
28
|
+
check_fit
|
29
|
+
|
30
|
+
data = to_dataset(data)
|
31
|
+
|
32
|
+
u = data.map { |v| @user_map[v[:user_id]] || @user_map.size }
|
33
|
+
i = data.map { |v| @item_map[v[:item_id]] || @item_map.size }
|
34
|
+
|
35
|
+
row = int_ptr(u)
|
36
|
+
col = int_ptr(i)
|
37
|
+
n_predict = data.size
|
38
|
+
predicted = Fiddle::Pointer.malloc(n_predict * Fiddle::SIZEOF_DOUBLE)
|
39
|
+
|
40
|
+
if @implicit
|
41
|
+
check_status FFI.predict_X_old_collective_implicit(
|
42
|
+
row, col, predicted, n_predict,
|
43
|
+
@a, @b,
|
44
|
+
@k, @k_user, @k_item, @k_main,
|
45
|
+
@m, @n,
|
46
|
+
@nthreads
|
47
|
+
)
|
48
|
+
else
|
49
|
+
check_status FFI.predict_X_old_collective_explicit(
|
50
|
+
row, col, predicted, n_predict,
|
51
|
+
@a, @bias_a,
|
52
|
+
@b, @bias_b,
|
53
|
+
@global_mean,
|
54
|
+
@k, @k_user, @k_item, @k_main,
|
55
|
+
@m, @n,
|
56
|
+
@nthreads
|
57
|
+
)
|
58
|
+
end
|
59
|
+
|
60
|
+
predictions = real_array(predicted)
|
61
|
+
predictions.map! { |v| v.nan? ? @global_mean : v } if @implicit
|
62
|
+
predictions
|
63
|
+
end
|
64
|
+
|
65
|
+
def user_recs(user_id, count: 5, item_ids: nil)
|
66
|
+
check_fit
|
67
|
+
user = @user_map[user_id]
|
68
|
+
|
69
|
+
if user
|
70
|
+
a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
|
71
|
+
a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
|
72
|
+
# @rated[user] will be nil for recommenders saved before 0.1.5
|
73
|
+
top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: (@rated[user] || {}).keys, item_ids: item_ids)
|
74
|
+
else
|
75
|
+
# no items if user is unknown
|
76
|
+
# TODO maybe most popular items
|
77
|
+
[]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def new_user_recs(data, count: 5, user_info: nil, item_ids: nil)
|
82
|
+
check_fit
|
83
|
+
|
84
|
+
a_vec, a_bias, rated = factors_warm(data, user_info: user_info)
|
85
|
+
top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: rated, item_ids: item_ids)
|
86
|
+
end
|
87
|
+
|
88
|
+
def user_ids
|
89
|
+
@user_map.keys
|
90
|
+
end
|
91
|
+
|
92
|
+
def item_ids
|
93
|
+
@item_map.keys
|
94
|
+
end
|
95
|
+
|
96
|
+
def user_factors(user_id = nil)
|
97
|
+
read_factors(@a, [@m, @m_u].max, @k_user + @k + @k_main, user_id, @user_map)
|
98
|
+
end
|
99
|
+
|
100
|
+
def item_factors(item_id = nil)
|
101
|
+
read_factors(@b, [@n, @n_i].max, @k_item + @k + @k_main, item_id, @item_map)
|
102
|
+
end
|
103
|
+
|
104
|
+
def user_bias(user_id = nil)
|
105
|
+
read_bias(@bias_a, user_id, @user_map) if @bias_a
|
106
|
+
end
|
107
|
+
|
108
|
+
def item_bias(item_id = nil)
|
109
|
+
read_bias(@bias_b, item_id, @item_map) if @bias_b
|
110
|
+
end
|
111
|
+
|
112
|
+
def similar_items(item_id, count: 5)
|
113
|
+
check_fit
|
114
|
+
similar(item_id, @item_map, item_factors, count, item_index)
|
115
|
+
end
|
116
|
+
alias_method :item_recs, :similar_items
|
117
|
+
|
118
|
+
def similar_users(user_id, count: 5)
|
119
|
+
check_fit
|
120
|
+
similar(user_id, @user_map, user_factors, count, user_index)
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
def user_index
|
126
|
+
@user_index ||= create_index(user_factors)
|
127
|
+
end
|
128
|
+
|
129
|
+
def item_index
|
130
|
+
@item_index ||= create_index(item_factors)
|
131
|
+
end
|
132
|
+
|
133
|
+
def create_index(factors)
|
134
|
+
require "ngt"
|
135
|
+
|
136
|
+
index = Ngt::Index.new(@k, distance_type: "Cosine")
|
137
|
+
index.batch_insert(factors)
|
138
|
+
index
|
139
|
+
end
|
140
|
+
|
141
|
+
# TODO include bias
|
142
|
+
def similar(id, map, factors, count, index)
|
143
|
+
i = map[id]
|
144
|
+
if i
|
145
|
+
keys = map.keys
|
146
|
+
result = index.search(factors[i], size: count + 1)[1..-1]
|
147
|
+
result.map do |v|
|
148
|
+
{
|
149
|
+
# ids from batch_insert start at 1 instead of 0
|
150
|
+
item_id: keys[v[:id] - 1],
|
151
|
+
# convert cosine distance to cosine similarity
|
152
|
+
score: 1 - v[:distance]
|
153
|
+
}
|
154
|
+
end
|
155
|
+
else
|
156
|
+
[]
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def reset
|
161
|
+
@fit = false
|
162
|
+
@user_map.clear
|
163
|
+
@item_map.clear
|
164
|
+
@user_info_map.clear
|
165
|
+
@item_info_map.clear
|
166
|
+
@user_index = nil
|
167
|
+
@item_index = nil
|
168
|
+
end
|
169
|
+
|
170
|
+
# TODO resize pointers as needed and reset values for new memory
|
171
|
+
def partial_fit(train_set, user_info: nil, item_info: nil)
|
17
172
|
train_set = to_dataset(train_set)
|
18
173
|
|
19
|
-
@
|
174
|
+
unless @fit
|
175
|
+
@implicit = !train_set.any? { |v| v[:rating] }
|
176
|
+
end
|
177
|
+
|
20
178
|
unless @implicit
|
21
179
|
ratings = train_set.map { |o| o[:rating] }
|
22
180
|
check_ratings(ratings)
|
23
181
|
end
|
24
182
|
|
25
183
|
check_training_set(train_set)
|
26
|
-
|
184
|
+
update_maps(train_set)
|
27
185
|
|
28
186
|
x_row = []
|
29
187
|
x_col = []
|
30
188
|
x_val = []
|
31
189
|
value_key = @implicit ? :value : :rating
|
190
|
+
@rated = Hash.new { |hash, key| hash[key] = {} }
|
32
191
|
train_set.each do |v|
|
33
|
-
|
34
|
-
|
192
|
+
u = @user_map[v[:user_id]]
|
193
|
+
i = @item_map[v[:item_id]]
|
194
|
+
@rated[u][i] = true
|
195
|
+
|
196
|
+
x_row << u
|
197
|
+
x_col << i
|
35
198
|
x_val << (v[value_key] || 1)
|
36
199
|
end
|
200
|
+
@rated.default = nil
|
37
201
|
|
38
202
|
@m = @user_map.size
|
39
203
|
@n = @item_map.size
|
@@ -52,16 +216,14 @@ module Cmfrec
|
|
52
216
|
uu = nil
|
53
217
|
ii = nil
|
54
218
|
|
55
|
-
|
219
|
+
# side info
|
56
220
|
u_row, u_col, u_sp, nnz_u, @m_u, p_ = process_info(user_info, @user_map, @user_info_map, :user_id)
|
57
|
-
|
58
|
-
@item_info_map = {}
|
59
221
|
i_row, i_col, i_sp, nnz_i, @n_i, q = process_info(item_info, @item_map, @item_info_map, :item_id)
|
60
222
|
|
61
223
|
@precompute_for_predictions = false
|
62
224
|
|
63
225
|
# initialize w/ normal distribution
|
64
|
-
reset_values =
|
226
|
+
reset_values = !@fit
|
65
227
|
|
66
228
|
@a = Fiddle::Pointer.malloc([@m, @m_u].max * (@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
67
229
|
@b = Fiddle::Pointer.malloc([@n, @n_i].max * (@k_item + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
@@ -75,16 +237,7 @@ module Cmfrec
|
|
75
237
|
i_colmeans = Fiddle::Pointer.malloc(q * Fiddle::SIZEOF_DOUBLE)
|
76
238
|
|
77
239
|
if @implicit
|
78
|
-
|
79
|
-
@alpha = 1.0
|
80
|
-
@adjust_weight = false # downweight?
|
81
|
-
@apply_log_transf = false
|
82
|
-
|
83
|
-
# different defaults
|
84
|
-
@lambda_ = 1e0
|
85
|
-
@w_user = 10
|
86
|
-
@w_item = 10
|
87
|
-
@finalize_chol = false
|
240
|
+
set_implicit_vars
|
88
241
|
|
89
242
|
args = [
|
90
243
|
@a, @b,
|
@@ -175,104 +328,13 @@ module Cmfrec
|
|
175
328
|
@global_mean = real_array(glob_mean).first
|
176
329
|
end
|
177
330
|
|
178
|
-
@u_colmeans =
|
179
|
-
@i_colmeans = real_array(i_colmeans)
|
180
|
-
@u_colmeans_ptr = u_colmeans
|
181
|
-
|
182
|
-
self
|
183
|
-
end
|
184
|
-
|
185
|
-
def predict(data)
|
186
|
-
check_fit
|
187
|
-
|
188
|
-
data = to_dataset(data)
|
189
|
-
|
190
|
-
u = data.map { |v| @user_map[v[:user_id]] || @user_map.size }
|
191
|
-
i = data.map { |v| @item_map[v[:item_id]] || @item_map.size }
|
192
|
-
|
193
|
-
row = int_ptr(u)
|
194
|
-
col = int_ptr(i)
|
195
|
-
n_predict = data.size
|
196
|
-
predicted = Fiddle::Pointer.malloc(n_predict * Fiddle::SIZEOF_DOUBLE)
|
197
|
-
|
198
|
-
if @implicit
|
199
|
-
check_status FFI.predict_X_old_collective_implicit(
|
200
|
-
row, col, predicted, n_predict,
|
201
|
-
@a, @b,
|
202
|
-
@k, @k_user, @k_item, @k_main,
|
203
|
-
@m, @n,
|
204
|
-
@nthreads
|
205
|
-
)
|
206
|
-
else
|
207
|
-
check_status FFI.predict_X_old_collective_explicit(
|
208
|
-
row, col, predicted, n_predict,
|
209
|
-
@a, @bias_a,
|
210
|
-
@b, @bias_b,
|
211
|
-
@global_mean,
|
212
|
-
@k, @k_user, @k_item, @k_main,
|
213
|
-
@m, @n,
|
214
|
-
@nthreads
|
215
|
-
)
|
216
|
-
end
|
217
|
-
|
218
|
-
predictions = real_array(predicted)
|
219
|
-
predictions.map! { |v| v.nan? ? @global_mean : v } if @implicit
|
220
|
-
predictions
|
221
|
-
end
|
222
|
-
|
223
|
-
def user_recs(user_id, count: 5, item_ids: nil)
|
224
|
-
check_fit
|
225
|
-
user = @user_map[user_id]
|
226
|
-
|
227
|
-
if user
|
228
|
-
if item_ids
|
229
|
-
# remove missing ids
|
230
|
-
item_ids = item_ids.select { |v| @item_map[v] }
|
231
|
-
|
232
|
-
data = item_ids.map { |v| {user_id: user_id, item_id: v} }
|
233
|
-
scores = predict(data)
|
234
|
-
|
235
|
-
item_ids.zip(scores).map do |item_id, score|
|
236
|
-
{item_id: item_id, score: score}
|
237
|
-
end
|
238
|
-
else
|
239
|
-
a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
|
240
|
-
a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
|
241
|
-
top_n(a_vec: a_vec, a_bias: a_bias, count: count)
|
242
|
-
end
|
243
|
-
else
|
244
|
-
# no items if user is unknown
|
245
|
-
# TODO maybe most popular items
|
246
|
-
[]
|
247
|
-
end
|
248
|
-
end
|
249
|
-
|
250
|
-
# TODO add item_ids
|
251
|
-
def new_user_recs(data, count: 5, user_info: nil)
|
252
|
-
check_fit
|
253
|
-
|
254
|
-
a_vec, a_bias = factors_warm(data, user_info: user_info)
|
255
|
-
top_n(a_vec: a_vec, a_bias: a_bias, count: count)
|
256
|
-
end
|
257
|
-
|
258
|
-
def user_factors
|
259
|
-
read_factors(@a, [@m, @m_u].max, @k_user + @k + @k_main)
|
260
|
-
end
|
261
|
-
|
262
|
-
def item_factors
|
263
|
-
read_factors(@b, [@n, @n_i].max, @k_item + @k + @k_main)
|
264
|
-
end
|
331
|
+
@u_colmeans = u_colmeans
|
265
332
|
|
266
|
-
|
267
|
-
read_bias(@bias_a) if @bias_a
|
268
|
-
end
|
333
|
+
@fit = true
|
269
334
|
|
270
|
-
|
271
|
-
read_bias(@bias_b) if @bias_b
|
335
|
+
self
|
272
336
|
end
|
273
337
|
|
274
|
-
private
|
275
|
-
|
276
338
|
def set_params(
|
277
339
|
k: 40, lambda_: 1e+1, method: "als", use_cg: true, user_bias: true,
|
278
340
|
item_bias: true, add_implicit_features: false,
|
@@ -329,15 +391,14 @@ module Cmfrec
|
|
329
391
|
@nthreads = nthreads
|
330
392
|
end
|
331
393
|
|
332
|
-
def
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
raise ArgumentError, "Missing user_id" if user_ids.any?(&:nil?)
|
337
|
-
raise ArgumentError, "Missing item_id" if item_ids.any?(&:nil?)
|
394
|
+
def update_maps(train_set)
|
395
|
+
raise ArgumentError, "Missing user_id" if train_set.any? { |v| v[:user_id].nil? }
|
396
|
+
raise ArgumentError, "Missing item_id" if train_set.any? { |v| v[:item_id].nil? }
|
338
397
|
|
339
|
-
|
340
|
-
|
398
|
+
train_set.each do |v|
|
399
|
+
@user_map[v[:user_id]] ||= @user_map.size
|
400
|
+
@item_map[v[:item_id]] ||= @item_map.size
|
401
|
+
end
|
341
402
|
end
|
342
403
|
|
343
404
|
def check_ratings(ratings)
|
@@ -354,7 +415,7 @@ module Cmfrec
|
|
354
415
|
end
|
355
416
|
|
356
417
|
def check_fit
|
357
|
-
raise "Not fit" unless
|
418
|
+
raise "Not fit" unless @fit
|
358
419
|
end
|
359
420
|
|
360
421
|
def to_dataset(dataset)
|
@@ -376,26 +437,59 @@ module Cmfrec
|
|
376
437
|
end
|
377
438
|
end
|
378
439
|
|
379
|
-
def read_factors(ptr, d1, d2)
|
380
|
-
arr = []
|
381
|
-
offset = 0
|
440
|
+
def read_factors(ptr, d1, d2, id, map)
|
382
441
|
width = d2 * Fiddle::SIZEOF_DOUBLE
|
383
|
-
|
384
|
-
|
385
|
-
|
442
|
+
if id
|
443
|
+
i = map[id]
|
444
|
+
ptr[i * width, width].unpack("d*") if i
|
445
|
+
else
|
446
|
+
arr = []
|
447
|
+
offset = 0
|
448
|
+
d1.times do |i|
|
449
|
+
arr << ptr[offset, width].unpack("d*")
|
450
|
+
offset += width
|
451
|
+
end
|
452
|
+
arr
|
386
453
|
end
|
387
|
-
arr
|
388
454
|
end
|
389
455
|
|
390
|
-
def read_bias(ptr)
|
391
|
-
|
456
|
+
def read_bias(ptr, id, map)
|
457
|
+
if id
|
458
|
+
i = map[id]
|
459
|
+
ptr[i * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") if i
|
460
|
+
else
|
461
|
+
real_array(ptr)
|
462
|
+
end
|
392
463
|
end
|
393
464
|
|
394
|
-
def top_n(a_vec:, a_bias:, count:)
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
465
|
+
def top_n(a_vec:, a_bias:, count:, rated: nil, item_ids: nil)
|
466
|
+
if item_ids
|
467
|
+
# remove missing ids
|
468
|
+
item_ids = item_ids.map { |v| @item_map[v] }.compact
|
469
|
+
return [] if item_ids.empty?
|
470
|
+
|
471
|
+
include_ix = int_ptr(item_ids)
|
472
|
+
n_include = item_ids.size
|
473
|
+
|
474
|
+
# TODO uncomment in 0.2.0
|
475
|
+
count = n_include # if n_include < count
|
476
|
+
else
|
477
|
+
include_ix = nil
|
478
|
+
n_include = 0
|
479
|
+
end
|
480
|
+
|
481
|
+
if rated && !item_ids
|
482
|
+
# assumes rated is unique and all items are known
|
483
|
+
# calling code is responsible for this
|
484
|
+
exclude_ix = int_ptr(rated)
|
485
|
+
n_exclude = rated.size
|
486
|
+
remaining = @item_map.size - n_exclude
|
487
|
+
return [] if remaining == 0
|
488
|
+
count = remaining if remaining < count
|
489
|
+
else
|
490
|
+
exclude_ix = nil
|
491
|
+
n_exclude = 0
|
492
|
+
end
|
399
493
|
|
400
494
|
outp_ix = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_INT)
|
401
495
|
outp_score = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_DOUBLE)
|
@@ -425,6 +519,16 @@ module Cmfrec
|
|
425
519
|
data = to_dataset(data)
|
426
520
|
user_info = to_dataset(user_info) if user_info
|
427
521
|
|
522
|
+
# remove unknown items
|
523
|
+
data, unknown_data = data.partition { |d| @item_map[d[:item_id]] }
|
524
|
+
|
525
|
+
if unknown_data.any?
|
526
|
+
# TODO warn for unknown items?
|
527
|
+
# warn "[cmfrec] Unknown items: #{unknown_data.map { |d| d[:item_id] }.join(", ")}"
|
528
|
+
end
|
529
|
+
|
530
|
+
item_ids = data.map { |d| @item_map[d[:item_id]] }
|
531
|
+
|
428
532
|
nnz = data.size
|
429
533
|
a_vec = Fiddle::Pointer.malloc((@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
430
534
|
bias_a = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
|
@@ -465,7 +569,7 @@ module Cmfrec
|
|
465
569
|
check_ratings(ratings)
|
466
570
|
end
|
467
571
|
xa = real_ptr(ratings)
|
468
|
-
x_col = int_ptr(
|
572
|
+
x_col = int_ptr(item_ids)
|
469
573
|
else
|
470
574
|
xa = nil
|
471
575
|
x_col = nil
|
@@ -479,7 +583,7 @@ module Cmfrec
|
|
479
583
|
u_vec_sp, u_vec_x_col, nnz_u_vec,
|
480
584
|
@na_as_zero_user,
|
481
585
|
@nonneg,
|
482
|
-
@
|
586
|
+
@u_colmeans,
|
483
587
|
@b, @n, @c,
|
484
588
|
xa, x_col, nnz,
|
485
589
|
@k, @k_user, @k_item, @k_main,
|
@@ -505,7 +609,7 @@ module Cmfrec
|
|
505
609
|
@na_as_zero_user, @na_as_zero,
|
506
610
|
@nonneg,
|
507
611
|
@c, cb,
|
508
|
-
@global_mean, @bias_b, @
|
612
|
+
@global_mean, @bias_b, @u_colmeans,
|
509
613
|
xa, x_col, nnz, xa_dense,
|
510
614
|
@n, weight, @b, @bi,
|
511
615
|
@add_implicit_features,
|
@@ -528,7 +632,7 @@ module Cmfrec
|
|
528
632
|
check_status FFI.factors_collective_explicit_single(*fiddle_args(args))
|
529
633
|
end
|
530
634
|
|
531
|
-
[a_vec, real_array(bias_a).first]
|
635
|
+
[a_vec, real_array(bias_a).first, item_ids.uniq]
|
532
636
|
end
|
533
637
|
|
534
638
|
# convert boolean to int
|
@@ -585,5 +689,126 @@ module Cmfrec
|
|
585
689
|
def real_array(ptr)
|
586
690
|
ptr.to_s(ptr.size).unpack("d*")
|
587
691
|
end
|
692
|
+
|
693
|
+
def set_implicit_vars
|
694
|
+
@w_main_multiplier = 1.0
|
695
|
+
@alpha = 1.0
|
696
|
+
@adjust_weight = false # downweight?
|
697
|
+
@apply_log_transf = false
|
698
|
+
|
699
|
+
# different defaults
|
700
|
+
@lambda_ = 1e0
|
701
|
+
@w_user = 10
|
702
|
+
@w_item = 10
|
703
|
+
@finalize_chol = false
|
704
|
+
end
|
705
|
+
|
706
|
+
def dump_ptr(ptr)
|
707
|
+
ptr.to_s(ptr.size) if ptr
|
708
|
+
end
|
709
|
+
|
710
|
+
def load_ptr(str)
|
711
|
+
Fiddle::Pointer[str] if str
|
712
|
+
end
|
713
|
+
|
714
|
+
def marshal_dump
|
715
|
+
obj = {
|
716
|
+
implicit: @implicit
|
717
|
+
}
|
718
|
+
|
719
|
+
# options
|
720
|
+
obj[:factors] = @k
|
721
|
+
obj[:epochs] = @niter
|
722
|
+
obj[:verbose] = @verbose
|
723
|
+
|
724
|
+
# factors
|
725
|
+
obj[:user_map] = @user_map
|
726
|
+
obj[:item_map] = @item_map
|
727
|
+
obj[:rated] = @rated
|
728
|
+
obj[:user_factors] = dump_ptr(@a)
|
729
|
+
obj[:item_factors] = dump_ptr(@b)
|
730
|
+
|
731
|
+
# bias
|
732
|
+
obj[:user_bias] = dump_ptr(@bias_a)
|
733
|
+
obj[:item_bias] = dump_ptr(@bias_b)
|
734
|
+
|
735
|
+
# mean
|
736
|
+
obj[:global_mean] = @global_mean
|
737
|
+
|
738
|
+
# side info
|
739
|
+
obj[:user_info_map] = @user_info_map
|
740
|
+
obj[:item_info_map] = @item_info_map
|
741
|
+
obj[:user_info_factors] = dump_ptr(@c)
|
742
|
+
obj[:item_info_factors] = dump_ptr(@d)
|
743
|
+
|
744
|
+
# implicit features
|
745
|
+
obj[:add_implicit_features] = @add_implicit_features
|
746
|
+
obj[:user_factors_implicit] = dump_ptr(@ai)
|
747
|
+
obj[:item_factors_implicit] = dump_ptr(@bi)
|
748
|
+
|
749
|
+
unless @implicit
|
750
|
+
obj[:min_rating] = @min_rating
|
751
|
+
obj[:max_rating] = @max_rating
|
752
|
+
end
|
753
|
+
|
754
|
+
obj[:user_means] = dump_ptr(@u_colmeans)
|
755
|
+
|
756
|
+
obj
|
757
|
+
end
|
758
|
+
|
759
|
+
def marshal_load(obj)
|
760
|
+
@implicit = obj[:implicit]
|
761
|
+
|
762
|
+
# options
|
763
|
+
set_params(
|
764
|
+
k: obj[:factors],
|
765
|
+
niter: obj[:epochs],
|
766
|
+
verbose: obj[:verbose],
|
767
|
+
user_bias: !obj[:user_bias].nil?,
|
768
|
+
item_bias: !obj[:item_bias].nil?,
|
769
|
+
add_implicit_features: obj[:add_implicit_features]
|
770
|
+
)
|
771
|
+
|
772
|
+
# factors
|
773
|
+
@user_map = obj[:user_map]
|
774
|
+
@item_map = obj[:item_map]
|
775
|
+
@rated = obj[:rated] || {}
|
776
|
+
@a = load_ptr(obj[:user_factors])
|
777
|
+
@b = load_ptr(obj[:item_factors])
|
778
|
+
|
779
|
+
# bias
|
780
|
+
@bias_a = load_ptr(obj[:user_bias])
|
781
|
+
@bias_b = load_ptr(obj[:item_bias])
|
782
|
+
|
783
|
+
# mean
|
784
|
+
@global_mean = obj[:global_mean]
|
785
|
+
|
786
|
+
# side info
|
787
|
+
@user_info_map = obj[:user_info_map]
|
788
|
+
@item_info_map = obj[:item_info_map]
|
789
|
+
@c = load_ptr(obj[:user_info_factors])
|
790
|
+
@d = load_ptr(obj[:item_info_factors])
|
791
|
+
|
792
|
+
# implicit features
|
793
|
+
@add_implicit_features = obj[:add_implicit_features]
|
794
|
+
@ai = load_ptr(obj[:user_factors_implicit])
|
795
|
+
@bi = load_ptr(obj[:item_factors_implicit])
|
796
|
+
|
797
|
+
unless @implicit
|
798
|
+
@min_rating = obj[:min_rating]
|
799
|
+
@max_rating = obj[:max_rating]
|
800
|
+
end
|
801
|
+
|
802
|
+
@u_colmeans = load_ptr(obj[:user_means])
|
803
|
+
|
804
|
+
@m = @user_map.size
|
805
|
+
@n = @item_map.size
|
806
|
+
@m_u = @user_info_map.size
|
807
|
+
@n_i = @item_info_map.size
|
808
|
+
|
809
|
+
set_implicit_vars if @implicit
|
810
|
+
|
811
|
+
@fit = @m > 0
|
812
|
+
end
|
588
813
|
end
|
589
814
|
end
|
data/lib/cmfrec/version.rb
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmfrec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
|
-
email: andrew@
|
14
|
+
email: andrew@ankane.org
|
15
15
|
executables: []
|
16
16
|
extensions: []
|
17
17
|
extra_rdoc_files: []
|
@@ -25,6 +25,7 @@ files:
|
|
25
25
|
- lib/cmfrec/recommender.rb
|
26
26
|
- lib/cmfrec/version.rb
|
27
27
|
- vendor/LICENSE.txt
|
28
|
+
- vendor/libcmfrec.arm64.dylib
|
28
29
|
- vendor/libcmfrec.dylib
|
29
30
|
- vendor/libcmfrec.so
|
30
31
|
homepage: https://github.com/ankane/cmfrec
|
@@ -46,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
46
47
|
- !ruby/object:Gem::Version
|
47
48
|
version: '0'
|
48
49
|
requirements: []
|
49
|
-
rubygems_version: 3.
|
50
|
+
rubygems_version: 3.2.22
|
50
51
|
signing_key:
|
51
52
|
specification_version: 4
|
52
53
|
summary: Recommendations for Ruby using collective matrix factorization
|