cmfrec 0.1.1 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/LICENSE.txt +1 -1
- data/README.md +94 -1
- data/lib/cmfrec.rb +8 -1
- data/lib/cmfrec/data.rb +100 -0
- data/lib/cmfrec/ffi.rb +7 -5
- data/lib/cmfrec/recommender.rb +358 -136
- data/lib/cmfrec/version.rb +1 -1
- data/vendor/libcmfrec.arm64.dylib +0 -0
- data/vendor/libcmfrec.dylib +0 -0
- data/vendor/libcmfrec.so +0 -0
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a200b99780aeee83c5a0190e593c3806c4140aa4c096b0ef2c112fd21a858b7
|
4
|
+
data.tar.gz: 27b354dd491ca1d7a728d4ef2318c72c56d76c691286bc267c74d3b8dafd7c5b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0c1ceefeac9131a804d94b6da78c7a6614f50c15d144841277829e5279ce2583872a681e972e336e21cb82d49466dee9f5f1de6a482fe8a99ab7aa5176ab0e5c
|
7
|
+
data.tar.gz: 930b20b017b92555071699b38d903230da3ca1fc7b8b91b9e2f96ae3ae4dda9c8c289a47117beaa1e2539e5c9d622fe958d431f5106e8ab7c4809ea17e9fb6e8
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,23 @@
|
|
1
|
+
## 0.1.5 (2021-08-10)
|
2
|
+
|
3
|
+
- Fixed issue with `user_recs` and `new_user_recs` returning rated items
|
4
|
+
- Fixed error with `new_user_recs`
|
5
|
+
|
6
|
+
## 0.1.4 (2021-02-04)
|
7
|
+
|
8
|
+
- Added support for saving and loading recommenders
|
9
|
+
- Added `similar_users` and `similar_items`
|
10
|
+
- Improved ARM detection
|
11
|
+
|
12
|
+
## 0.1.3 (2020-12-28)
|
13
|
+
|
14
|
+
- Added ARM shared library for Mac
|
15
|
+
|
16
|
+
## 0.1.2 (2020-12-09)
|
17
|
+
|
18
|
+
- Added `load_movielens` method
|
19
|
+
- Updated cmfrec to 2.4.1
|
20
|
+
|
1
21
|
## 0.1.1 (2020-11-28)
|
2
22
|
|
3
23
|
- Added `predict` method
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -16,7 +16,7 @@ Add this line to your application’s Gemfile:
|
|
16
16
|
gem 'cmfrec'
|
17
17
|
```
|
18
18
|
|
19
|
-
|
19
|
+
For Windows, also follow [these instructions](#windows-installation).
|
20
20
|
|
21
21
|
## Getting Started
|
22
22
|
|
@@ -107,6 +107,73 @@ Get recommendations with only side information
|
|
107
107
|
recommender.new_user_recs([], user_info: {cats: 0, dogs: 2})
|
108
108
|
```
|
109
109
|
|
110
|
+
## Similarity
|
111
|
+
|
112
|
+
Add this line to your application’s Gemfile:
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
gem 'ngt'
|
116
|
+
```
|
117
|
+
|
118
|
+
Get similar users
|
119
|
+
|
120
|
+
```ruby
|
121
|
+
recommender.similar_users(user_id)
|
122
|
+
```
|
123
|
+
|
124
|
+
Get similar items - “users who liked this item also liked”
|
125
|
+
|
126
|
+
```ruby
|
127
|
+
recommender.similar_items(item_id)
|
128
|
+
```
|
129
|
+
|
130
|
+
## Examples
|
131
|
+
|
132
|
+
### MovieLens
|
133
|
+
|
134
|
+
Load the data
|
135
|
+
|
136
|
+
```ruby
|
137
|
+
ratings, user_info, item_info = Cmfrec.load_movielens
|
138
|
+
```
|
139
|
+
|
140
|
+
Create a recommender and get predictions
|
141
|
+
|
142
|
+
```ruby
|
143
|
+
recommender = Cmfrec::Recommender.new(factors: 20)
|
144
|
+
recommender.fit(ratings.first(80000), user_info: user_info, item_info: item_info)
|
145
|
+
recommender.predict(ratings.last(20000))
|
146
|
+
```
|
147
|
+
|
148
|
+
### Ahoy
|
149
|
+
|
150
|
+
[Ahoy](https://github.com/ankane/ahoy) is a great source for implicit feedback
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
views = Ahoy::Event.
|
154
|
+
where(name: "Viewed post").
|
155
|
+
group(:user_id).
|
156
|
+
group("properties->>'post_id'"). # postgres syntax
|
157
|
+
count
|
158
|
+
|
159
|
+
data =
|
160
|
+
views.map do |(user_id, post_id), count|
|
161
|
+
{
|
162
|
+
user_id: user_id,
|
163
|
+
item_id: post_id,
|
164
|
+
value: count
|
165
|
+
}
|
166
|
+
end
|
167
|
+
```
|
168
|
+
|
169
|
+
Create a recommender and get recommended posts for a user
|
170
|
+
|
171
|
+
```ruby
|
172
|
+
recommender = Cmfrec::Recommender.new
|
173
|
+
recommender.fit(data)
|
174
|
+
recommender.user_recs(current_user.id)
|
175
|
+
```
|
176
|
+
|
110
177
|
## Options
|
111
178
|
|
112
179
|
Specify the number of factors and epochs
|
@@ -145,6 +212,24 @@ Or a Rover data frame
|
|
145
212
|
Rover.read_csv("ratings.csv")
|
146
213
|
```
|
147
214
|
|
215
|
+
## Storing Recommenders
|
216
|
+
|
217
|
+
Store the recommender
|
218
|
+
|
219
|
+
```ruby
|
220
|
+
bin = Marshal.dump(recommender)
|
221
|
+
File.binwrite("recommender.bin", bin)
|
222
|
+
```
|
223
|
+
|
224
|
+
> You can save it to a file, database, or any other storage system
|
225
|
+
|
226
|
+
Load a recommender
|
227
|
+
|
228
|
+
```ruby
|
229
|
+
bin = File.binread("recommender.bin")
|
230
|
+
recommender = Marshal.load(bin)
|
231
|
+
```
|
232
|
+
|
148
233
|
## Reference
|
149
234
|
|
150
235
|
Get the global mean
|
@@ -167,6 +252,14 @@ recommender.user_bias
|
|
167
252
|
recommender.item_bias
|
168
253
|
```
|
169
254
|
|
255
|
+
## Windows Installation
|
256
|
+
|
257
|
+
On Windows, build the [cmfrec C shared library](https://github.com/david-cortes/cmfrec#instalation) and set:
|
258
|
+
|
259
|
+
```ruby
|
260
|
+
Cmfrec.ffi_lib = "path/to/cmfrec.dll"
|
261
|
+
```
|
262
|
+
|
170
263
|
## History
|
171
264
|
|
172
265
|
View the [changelog](https://github.com/ankane/cmfrec/blob/master/CHANGELOG.md)
|
data/lib/cmfrec.rb
CHANGED
@@ -3,12 +3,15 @@ require "etc"
|
|
3
3
|
require "fiddle/import"
|
4
4
|
|
5
5
|
# modules
|
6
|
+
require "cmfrec/data"
|
6
7
|
require "cmfrec/recommender"
|
7
8
|
require "cmfrec/version"
|
8
9
|
|
9
10
|
module Cmfrec
|
10
11
|
class Error < StandardError; end
|
11
12
|
|
13
|
+
extend Data
|
14
|
+
|
12
15
|
class << self
|
13
16
|
attr_accessor :ffi_lib
|
14
17
|
end
|
@@ -16,7 +19,11 @@ module Cmfrec
|
|
16
19
|
if Gem.win_platform?
|
17
20
|
"cmfrec.dll"
|
18
21
|
elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
|
19
|
-
"
|
22
|
+
if RbConfig::CONFIG["host_cpu"] =~ /arm/i
|
23
|
+
"libcmfrec.arm64.dylib"
|
24
|
+
else
|
25
|
+
"libcmfrec.dylib"
|
26
|
+
end
|
20
27
|
else
|
21
28
|
"libcmfrec.so"
|
22
29
|
end
|
data/lib/cmfrec/data.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
module Cmfrec
|
2
|
+
module Data
|
3
|
+
def load_movielens
|
4
|
+
require "csv"
|
5
|
+
|
6
|
+
data_path = download_file("ml-100k/u.data", "http://files.grouplens.org/datasets/movielens/ml-100k/u.data",
|
7
|
+
file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
|
8
|
+
user_path = download_file("ml-100k/u.user", "http://files.grouplens.org/datasets/movielens/ml-100k/u.user",
|
9
|
+
file_hash: "f120e114da2e8cf314fd28f99417c94ae9ddf1cb6db8ce0e4b5995d40e90e62c")
|
10
|
+
item_path = download_file("ml-100k/u.item", "http://files.grouplens.org/datasets/movielens/ml-100k/u.item",
|
11
|
+
file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
|
12
|
+
|
13
|
+
# convert u.item to utf-8
|
14
|
+
movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
|
15
|
+
|
16
|
+
user_info = []
|
17
|
+
CSV.foreach(user_path, col_sep: "|") do |row|
|
18
|
+
user = {user_id: row[0].to_i}
|
19
|
+
10.times do |i|
|
20
|
+
user[:"region#{i}"] = row[4][0] == i.to_s ? 1 : 0
|
21
|
+
end
|
22
|
+
user_info << user
|
23
|
+
end
|
24
|
+
|
25
|
+
item_info = []
|
26
|
+
movies = {}
|
27
|
+
genres = %w(unknown action adventure animation childrens comedy crime documentary drama fantasy filmnoir horror musical mystery romance scifi thriller war western)
|
28
|
+
CSV.parse(movies_str, col_sep: "|", converters: [:numeric]) do |row|
|
29
|
+
movies[row[0]] = row[1]
|
30
|
+
item = {item_id: row[1], year: row[2] ? Date.parse(row[2]).year : 1970}
|
31
|
+
genres.each_with_index do |genre, i|
|
32
|
+
item[:"genre_#{genre}"] = row[i + 5]
|
33
|
+
end
|
34
|
+
item_info << item
|
35
|
+
end
|
36
|
+
|
37
|
+
data = []
|
38
|
+
CSV.foreach(data_path, col_sep: "\t", converters: [:numeric]) do |row|
|
39
|
+
data << {
|
40
|
+
user_id: row[0],
|
41
|
+
item_id: movies[row[1]],
|
42
|
+
rating: row[2]
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
[data, user_info, item_info]
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def download_file(fname, origin, file_hash:)
|
52
|
+
require "fileutils"
|
53
|
+
|
54
|
+
# TODO handle this better
|
55
|
+
raise "No HOME" unless ENV["HOME"]
|
56
|
+
dest = "#{ENV["HOME"]}/.cmfrec/#{fname}"
|
57
|
+
FileUtils.mkdir_p(File.dirname(dest))
|
58
|
+
|
59
|
+
return dest if File.exist?(dest)
|
60
|
+
|
61
|
+
require "digest"
|
62
|
+
require "net/http"
|
63
|
+
require "tmpdir"
|
64
|
+
|
65
|
+
temp_path = "#{Dir.tmpdir}/cmfrec-#{Time.now.to_f}" # TODO better name
|
66
|
+
|
67
|
+
digest = Digest::SHA2.new
|
68
|
+
|
69
|
+
uri = URI(origin)
|
70
|
+
|
71
|
+
# Net::HTTP automatically adds Accept-Encoding for compression
|
72
|
+
# of response bodies and automatically decompresses gzip
|
73
|
+
# and deflateresponses unless a Range header was sent.
|
74
|
+
# https://ruby-doc.org/stdlib-2.6.4/libdoc/net/http/rdoc/Net/HTTP.html
|
75
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
|
76
|
+
request = Net::HTTP::Get.new(uri)
|
77
|
+
|
78
|
+
puts "Downloading data from #{origin}"
|
79
|
+
File.open(temp_path, "wb") do |f|
|
80
|
+
http.request(request) do |response|
|
81
|
+
response.read_body do |chunk|
|
82
|
+
f.write(chunk)
|
83
|
+
digest.update(chunk)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
if digest.hexdigest != file_hash
|
90
|
+
raise Error, "Bad hash: #{digest.hexdigest}"
|
91
|
+
end
|
92
|
+
|
93
|
+
puts "Hash verified: #{file_hash}"
|
94
|
+
|
95
|
+
FileUtils.mv(temp_path, dest)
|
96
|
+
|
97
|
+
dest
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
data/lib/cmfrec/ffi.rb
CHANGED
@@ -10,17 +10,19 @@ module Cmfrec
|
|
10
10
|
raise e
|
11
11
|
end
|
12
12
|
|
13
|
+
# https://github.com/david-cortes/cmfrec/blob/master/src/cmfrec.h
|
14
|
+
|
13
15
|
typealias "bool", "char"
|
14
16
|
# determined by CMakeLists.txt
|
15
17
|
typealias "int_t", "int"
|
16
18
|
typealias "real_t", "double"
|
17
19
|
|
18
|
-
extern "int_t fit_collective_explicit_als(real_t *biasA, real_t *biasB, real_t *A, real_t *B, real_t *C, real_t *D, real_t *Ai, real_t *Bi, bool add_implicit_features, bool reset_values, int_t seed, real_t *glob_mean, real_t *U_colmeans, real_t *I_colmeans, int_t m, int_t n, int_t k, int_t
|
19
|
-
extern "int_t fit_collective_implicit_als(real_t *A, real_t *B, real_t *C, real_t *D, bool reset_values, int_t seed, real_t *U_colmeans, real_t *I_colmeans, int_t m, int_t n, int_t k, int_t
|
20
|
-
extern "int_t factors_collective_explicit_single(real_t *a_vec, real_t *a_bias,
|
21
|
-
extern "int_t factors_collective_implicit_single(real_t *a_vec,
|
22
|
-
extern "void predict_multiple(real_t *restrict A, int_t k_user, real_t *restrict B, int_t k_item, real_t *restrict biasA, real_t *restrict biasB, real_t glob_mean, int_t k, int_t k_main, int_t m, int_t n, int_t predA[], int_t predB[], size_t nnz, real_t *restrict outp, int_t nthreads)"
|
20
|
+
extern "int_t fit_collective_explicit_als(real_t *restrict biasA, real_t *restrict biasB, real_t *restrict A, real_t *restrict B, real_t *restrict C, real_t *restrict D, real_t *restrict Ai, real_t *restrict Bi, bool add_implicit_features, bool reset_values, int_t seed, real_t *restrict glob_mean, real_t *restrict U_colmeans, real_t *restrict I_colmeans, int_t m, int_t n, int_t k, int_t ixA[], int_t ixB[], real_t *restrict X, size_t nnz, real_t *restrict Xfull, real_t *restrict weight, bool user_bias, bool item_bias, bool center, real_t lam, real_t *restrict lam_unique, real_t l1_lam, real_t *restrict l1_lam_unique, bool scale_lam, bool scale_lam_sideinfo, real_t *restrict U, int_t m_u, int_t p, real_t *restrict II, int_t n_i, int_t q, int_t U_row[], int_t U_col[], real_t *restrict U_sp, size_t nnz_U, int_t I_row[], int_t I_col[], real_t *restrict I_sp, size_t nnz_I, bool NA_as_zero_X, bool NA_as_zero_U, bool NA_as_zero_I, int_t k_main, int_t k_user, int_t k_item, real_t w_main, real_t w_user, real_t w_item, real_t w_implicit, int_t niter, int_t nthreads, bool verbose, bool handle_interrupt, bool use_cg, int_t max_cg_steps, bool finalize_chol, bool nonneg, int_t max_cd_steps, bool nonneg_C, bool nonneg_D, bool precompute_for_predictions, bool include_all_X, real_t *restrict B_plus_bias, real_t *restrict precomputedBtB, real_t *restrict precomputedTransBtBinvBt, real_t *restrict precomputedBtXbias, real_t *restrict precomputedBeTBeChol, real_t *restrict precomputedBiTBi, real_t *restrict precomputedTransCtCinvCt, real_t *restrict precomputedCtCw)"
|
21
|
+
extern "int_t fit_collective_implicit_als(real_t *restrict A, real_t *restrict B, real_t *restrict C, real_t *restrict D, bool reset_values, int_t seed, real_t *restrict U_colmeans, real_t *restrict I_colmeans, int_t m, int_t n, int_t k, int_t ixA[], int_t ixB[], real_t *restrict X, size_t nnz, real_t lam, real_t *restrict lam_unique, real_t l1_lam, real_t *restrict l1_lam_unique, real_t *restrict U, int_t m_u, int_t p, real_t *restrict II, int_t n_i, int_t q, int_t U_row[], int_t U_col[], real_t *restrict U_sp, size_t nnz_U, int_t I_row[], int_t I_col[], real_t *restrict I_sp, size_t nnz_I, bool NA_as_zero_U, bool NA_as_zero_I, int_t k_main, int_t k_user, int_t k_item, real_t w_main, real_t w_user, real_t w_item, real_t *restrict w_main_multiplier, real_t alpha, bool adjust_weight, bool apply_log_transf, int_t niter, int_t nthreads, bool verbose, bool handle_interrupt, bool use_cg, int_t max_cg_steps, bool finalize_chol, bool nonneg, int_t max_cd_steps, bool nonneg_C, bool nonneg_D, bool precompute_for_predictions, real_t *restrict precomputedBtB, real_t *restrict precomputedBeTBe, real_t *restrict precomputedBeTBeChol)"
|
22
|
+
extern "int_t factors_collective_explicit_single(real_t *restrict a_vec, real_t *restrict a_bias,real_t *restrict u_vec, int_t p,real_t *restrict u_vec_sp, int_t u_vec_ixB[], size_t nnz_u_vec,real_t *restrict u_bin_vec, int_t pbin,bool NA_as_zero_U, bool NA_as_zero_X,bool nonneg,real_t *restrict C, real_t *restrict Cb,real_t glob_mean, real_t *restrict biasB,real_t *restrict U_colmeans,real_t *restrict Xa, int_t ixB[], size_t nnz,real_t *restrict Xa_dense, int_t n,real_t *restrict weight,real_t *restrict B,real_t *restrict Bi, bool add_implicit_features,int_t k, int_t k_user, int_t k_item, int_t k_main,real_t lam, real_t *restrict lam_unique,real_t l1_lam, real_t *restrict l1_lam_unique,bool scale_lam, bool scale_lam_sideinfo,real_t w_main, real_t w_user, real_t w_implicit,int_t n_max, bool include_all_X,real_t *restrict BtB,real_t *restrict TransBtBinvBt,real_t *restrict BtXbias,real_t *restrict BeTBeChol,real_t *restrict BiTBi,real_t *restrict CtCw,real_t *restrict TransCtCinvCt,real_t *restrict B_plus_bias)"
|
23
|
+
extern "int_t factors_collective_implicit_single(real_t *restrict a_vec,real_t *restrict u_vec, int_t p,real_t *restrict u_vec_sp, int_t u_vec_ixB[], size_t nnz_u_vec,bool NA_as_zero_U,bool nonneg,real_t *restrict U_colmeans,real_t *restrict B, int_t n, real_t *restrict C,real_t *restrict Xa, int_t ixB[], size_t nnz,int_t k, int_t k_user, int_t k_item, int_t k_main,real_t lam, real_t l1_lam, real_t alpha, real_t w_main, real_t w_user,real_t w_main_multiplier,bool apply_log_transf,real_t *restrict BeTBe,real_t *restrict BtB,real_t *restrict BeTBeChol)"
|
23
24
|
extern "int_t predict_X_old_collective_explicit(int_t row[], int_t col[], real_t *restrict predicted, size_t n_predict, real_t *restrict A, real_t *restrict biasA, real_t *restrict B, real_t *restrict biasB, real_t glob_mean, int_t k, int_t k_user, int_t k_item, int_t k_main, int_t m, int_t n_max, int_t nthreads)"
|
25
|
+
extern "int_t predict_X_old_collective_implicit(int_t row[], int_t col[], real_t *restrict predicted, size_t n_predict, real_t *restrict A, real_t *restrict B, int_t k, int_t k_user, int_t k_item, int_t k_main, int_t m, int_t n, int_t nthreads)"
|
24
26
|
extern "int_t topN(real_t *restrict a_vec, int_t k_user, real_t *restrict B, int_t k_item, real_t *restrict biasB, real_t glob_mean, real_t biasA, int_t k, int_t k_main, int_t *restrict include_ix, int_t n_include, int_t *restrict exclude_ix, int_t n_exclude, int_t *restrict outp_ix, real_t *restrict outp_score, int_t n_top, int_t n, int_t nthreads)"
|
25
27
|
end
|
26
28
|
end
|
data/lib/cmfrec/recommender.rb
CHANGED
@@ -11,29 +11,199 @@ module Cmfrec
|
|
11
11
|
item_bias: item_bias,
|
12
12
|
add_implicit_features: add_implicit_features
|
13
13
|
)
|
14
|
+
|
15
|
+
@fit = false
|
16
|
+
@user_map = {}
|
17
|
+
@item_map = {}
|
18
|
+
@user_info_map = {}
|
19
|
+
@item_info_map = {}
|
14
20
|
end
|
15
21
|
|
16
22
|
def fit(train_set, user_info: nil, item_info: nil)
|
23
|
+
reset
|
24
|
+
partial_fit(train_set, user_info: user_info, item_info: item_info)
|
25
|
+
end
|
26
|
+
|
27
|
+
def predict(data)
|
28
|
+
check_fit
|
29
|
+
|
30
|
+
data = to_dataset(data)
|
31
|
+
|
32
|
+
u = data.map { |v| @user_map[v[:user_id]] || @user_map.size }
|
33
|
+
i = data.map { |v| @item_map[v[:item_id]] || @item_map.size }
|
34
|
+
|
35
|
+
row = int_ptr(u)
|
36
|
+
col = int_ptr(i)
|
37
|
+
n_predict = data.size
|
38
|
+
predicted = Fiddle::Pointer.malloc(n_predict * Fiddle::SIZEOF_DOUBLE)
|
39
|
+
|
40
|
+
if @implicit
|
41
|
+
check_status FFI.predict_X_old_collective_implicit(
|
42
|
+
row, col, predicted, n_predict,
|
43
|
+
@a, @b,
|
44
|
+
@k, @k_user, @k_item, @k_main,
|
45
|
+
@m, @n,
|
46
|
+
@nthreads
|
47
|
+
)
|
48
|
+
else
|
49
|
+
check_status FFI.predict_X_old_collective_explicit(
|
50
|
+
row, col, predicted, n_predict,
|
51
|
+
@a, @bias_a,
|
52
|
+
@b, @bias_b,
|
53
|
+
@global_mean,
|
54
|
+
@k, @k_user, @k_item, @k_main,
|
55
|
+
@m, @n,
|
56
|
+
@nthreads
|
57
|
+
)
|
58
|
+
end
|
59
|
+
|
60
|
+
predictions = real_array(predicted)
|
61
|
+
predictions.map! { |v| v.nan? ? @global_mean : v } if @implicit
|
62
|
+
predictions
|
63
|
+
end
|
64
|
+
|
65
|
+
def user_recs(user_id, count: 5, item_ids: nil)
|
66
|
+
check_fit
|
67
|
+
user = @user_map[user_id]
|
68
|
+
|
69
|
+
if user
|
70
|
+
# TODO use top_n for item_ids as well
|
71
|
+
if item_ids
|
72
|
+
# remove missing ids
|
73
|
+
item_ids = item_ids.select { |v| @item_map[v] }
|
74
|
+
|
75
|
+
data = item_ids.map { |v| {user_id: user_id, item_id: v} }
|
76
|
+
scores = predict(data)
|
77
|
+
|
78
|
+
item_ids.zip(scores).map do |item_id, score|
|
79
|
+
{item_id: item_id, score: score}
|
80
|
+
end
|
81
|
+
else
|
82
|
+
a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
|
83
|
+
a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
|
84
|
+
# @rated[user] will be nil for recommenders saved before 0.1.5
|
85
|
+
top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: (@rated[user] || {}).keys)
|
86
|
+
end
|
87
|
+
else
|
88
|
+
# no items if user is unknown
|
89
|
+
# TODO maybe most popular items
|
90
|
+
[]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# TODO add item_ids
|
95
|
+
def new_user_recs(data, count: 5, user_info: nil)
|
96
|
+
check_fit
|
97
|
+
|
98
|
+
a_vec, a_bias, rated = factors_warm(data, user_info: user_info)
|
99
|
+
top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: rated)
|
100
|
+
end
|
101
|
+
|
102
|
+
def user_factors
|
103
|
+
read_factors(@a, [@m, @m_u].max, @k_user + @k + @k_main)
|
104
|
+
end
|
105
|
+
|
106
|
+
def item_factors
|
107
|
+
read_factors(@b, [@n, @n_i].max, @k_item + @k + @k_main)
|
108
|
+
end
|
109
|
+
|
110
|
+
def user_bias
|
111
|
+
read_bias(@bias_a) if @bias_a
|
112
|
+
end
|
113
|
+
|
114
|
+
def item_bias
|
115
|
+
read_bias(@bias_b) if @bias_b
|
116
|
+
end
|
117
|
+
|
118
|
+
def similar_items(item_id, count: 5)
|
119
|
+
check_fit
|
120
|
+
similar(item_id, @item_map, item_factors, count, item_index)
|
121
|
+
end
|
122
|
+
alias_method :item_recs, :similar_items
|
123
|
+
|
124
|
+
def similar_users(user_id, count: 5)
|
125
|
+
check_fit
|
126
|
+
similar(user_id, @user_map, user_factors, count, user_index)
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
def user_index
|
132
|
+
@user_index ||= create_index(user_factors)
|
133
|
+
end
|
134
|
+
|
135
|
+
def item_index
|
136
|
+
@item_index ||= create_index(item_factors)
|
137
|
+
end
|
138
|
+
|
139
|
+
def create_index(factors)
|
140
|
+
require "ngt"
|
141
|
+
|
142
|
+
index = Ngt::Index.new(@k, distance_type: "Cosine")
|
143
|
+
index.batch_insert(factors)
|
144
|
+
index
|
145
|
+
end
|
146
|
+
|
147
|
+
# TODO include bias
|
148
|
+
def similar(id, map, factors, count, index)
|
149
|
+
i = map[id]
|
150
|
+
if i
|
151
|
+
keys = map.keys
|
152
|
+
result = index.search(factors[i], size: count + 1)[1..-1]
|
153
|
+
result.map do |v|
|
154
|
+
{
|
155
|
+
# ids from batch_insert start at 1 instead of 0
|
156
|
+
item_id: keys[v[:id] - 1],
|
157
|
+
# convert cosine distance to cosine similarity
|
158
|
+
score: 1 - v[:distance]
|
159
|
+
}
|
160
|
+
end
|
161
|
+
else
|
162
|
+
[]
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def reset
|
167
|
+
@fit = false
|
168
|
+
@user_map.clear
|
169
|
+
@item_map.clear
|
170
|
+
@user_info_map.clear
|
171
|
+
@item_info_map.clear
|
172
|
+
@user_index = nil
|
173
|
+
@item_index = nil
|
174
|
+
end
|
175
|
+
|
176
|
+
# TODO resize pointers as needed and reset values for new memory
|
177
|
+
def partial_fit(train_set, user_info: nil, item_info: nil)
|
17
178
|
train_set = to_dataset(train_set)
|
18
179
|
|
19
|
-
@
|
180
|
+
unless @fit
|
181
|
+
@implicit = !train_set.any? { |v| v[:rating] }
|
182
|
+
end
|
183
|
+
|
20
184
|
unless @implicit
|
21
185
|
ratings = train_set.map { |o| o[:rating] }
|
22
186
|
check_ratings(ratings)
|
23
187
|
end
|
24
188
|
|
25
189
|
check_training_set(train_set)
|
26
|
-
|
190
|
+
update_maps(train_set)
|
27
191
|
|
28
192
|
x_row = []
|
29
193
|
x_col = []
|
30
194
|
x_val = []
|
31
195
|
value_key = @implicit ? :value : :rating
|
196
|
+
@rated = Hash.new { |hash, key| hash[key] = {} }
|
32
197
|
train_set.each do |v|
|
33
|
-
|
34
|
-
|
198
|
+
u = @user_map[v[:user_id]]
|
199
|
+
i = @item_map[v[:item_id]]
|
200
|
+
@rated[u][i] = true
|
201
|
+
|
202
|
+
x_row << u
|
203
|
+
x_col << i
|
35
204
|
x_val << (v[value_key] || 1)
|
36
205
|
end
|
206
|
+
@rated.default = nil
|
37
207
|
|
38
208
|
@m = @user_map.size
|
39
209
|
@n = @item_map.size
|
@@ -46,20 +216,20 @@ module Cmfrec
|
|
46
216
|
x_full = nil
|
47
217
|
weight = nil
|
48
218
|
lam_unique = nil
|
219
|
+
l1_lambda = 0
|
220
|
+
l1_lam_unique = nil
|
49
221
|
|
50
222
|
uu = nil
|
51
223
|
ii = nil
|
52
224
|
|
53
|
-
|
225
|
+
# side info
|
54
226
|
u_row, u_col, u_sp, nnz_u, @m_u, p_ = process_info(user_info, @user_map, @user_info_map, :user_id)
|
55
|
-
|
56
|
-
@item_info_map = {}
|
57
227
|
i_row, i_col, i_sp, nnz_i, @n_i, q = process_info(item_info, @item_map, @item_info_map, :item_id)
|
58
228
|
|
59
229
|
@precompute_for_predictions = false
|
60
230
|
|
61
231
|
# initialize w/ normal distribution
|
62
|
-
reset_values =
|
232
|
+
reset_values = !@fit
|
63
233
|
|
64
234
|
@a = Fiddle::Pointer.malloc([@m, @m_u].max * (@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
65
235
|
@b = Fiddle::Pointer.malloc([@n, @n_i].max * (@k_item + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
@@ -73,16 +243,7 @@ module Cmfrec
|
|
73
243
|
i_colmeans = Fiddle::Pointer.malloc(q * Fiddle::SIZEOF_DOUBLE)
|
74
244
|
|
75
245
|
if @implicit
|
76
|
-
|
77
|
-
@alpha = 1.0
|
78
|
-
@adjust_weight = false # downweight?
|
79
|
-
@apply_log_transf = false
|
80
|
-
|
81
|
-
# different defaults
|
82
|
-
@lambda_ = 1e0
|
83
|
-
@w_user = 10
|
84
|
-
@w_item = 10
|
85
|
-
@finalize_chol = false
|
246
|
+
set_implicit_vars
|
86
247
|
|
87
248
|
args = [
|
88
249
|
@a, @b,
|
@@ -92,6 +253,7 @@ module Cmfrec
|
|
92
253
|
@m, @n, @k,
|
93
254
|
x_row, x_col, x, nnz,
|
94
255
|
@lambda_, lam_unique,
|
256
|
+
l1_lambda, l1_lam_unique,
|
95
257
|
uu, @m_u, p_,
|
96
258
|
ii, @n_i, q,
|
97
259
|
u_row, u_col, u_sp, nnz_u,
|
@@ -125,6 +287,10 @@ module Cmfrec
|
|
125
287
|
|
126
288
|
glob_mean = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
|
127
289
|
|
290
|
+
center = true
|
291
|
+
scale_lam = false
|
292
|
+
scale_lam_sideinfo = false
|
293
|
+
|
128
294
|
args = [
|
129
295
|
@bias_a, @bias_b,
|
130
296
|
@a, @b,
|
@@ -138,8 +304,10 @@ module Cmfrec
|
|
138
304
|
x_row, x_col, x, nnz,
|
139
305
|
x_full,
|
140
306
|
weight,
|
141
|
-
@user_bias, @item_bias,
|
307
|
+
@user_bias, @item_bias, center,
|
142
308
|
@lambda_, lam_unique,
|
309
|
+
l1_lambda, l1_lam_unique,
|
310
|
+
scale_lam, scale_lam_sideinfo,
|
143
311
|
uu, @m_u, p_,
|
144
312
|
ii, @n_i, q,
|
145
313
|
u_row, u_col, u_sp, nnz_u,
|
@@ -155,6 +323,7 @@ module Cmfrec
|
|
155
323
|
nil, #B_plus_bias,
|
156
324
|
nil, #precomputedBtB,
|
157
325
|
nil, #precomputedTransBtBinvBt,
|
326
|
+
nil, #precomputedBtXbias
|
158
327
|
nil, #precomputedBeTBeChol,
|
159
328
|
nil, #precomputedBiTBi,
|
160
329
|
nil, #precomputedTransCtCinvCt,
|
@@ -165,109 +334,13 @@ module Cmfrec
|
|
165
334
|
@global_mean = real_array(glob_mean).first
|
166
335
|
end
|
167
336
|
|
168
|
-
@u_colmeans =
|
169
|
-
@i_colmeans = real_array(i_colmeans)
|
170
|
-
@u_colmeans_ptr = u_colmeans
|
171
|
-
|
172
|
-
self
|
173
|
-
end
|
174
|
-
|
175
|
-
def predict(data)
|
176
|
-
check_fit
|
177
|
-
|
178
|
-
data = to_dataset(data)
|
179
|
-
|
180
|
-
u = data.map { |v| @user_map[v[:user_id]] || -1 }
|
181
|
-
i = data.map { |v| @item_map[v[:item_id]] || -1 }
|
182
|
-
|
183
|
-
pred_a = int_ptr(u)
|
184
|
-
pred_b = int_ptr(i)
|
185
|
-
nnz = data.size
|
186
|
-
outp = Fiddle::Pointer.malloc(nnz * Fiddle::SIZEOF_DOUBLE)
|
187
|
-
|
188
|
-
FFI.predict_multiple(
|
189
|
-
@a, @k_user,
|
190
|
-
@b, @k_item,
|
191
|
-
@bias_a, @bias_b,
|
192
|
-
@global_mean,
|
193
|
-
@k, @k_main,
|
194
|
-
@m, @n,
|
195
|
-
pred_a, pred_b, nnz,
|
196
|
-
outp,
|
197
|
-
@nthreads
|
198
|
-
)
|
199
|
-
|
200
|
-
predictions = real_array(outp)
|
201
|
-
|
202
|
-
nan_index = predictions.each_index.select { |j| predictions[j].nan? }
|
203
|
-
if nan_index.any?
|
204
|
-
# TODO improve performance
|
205
|
-
user_bias = send(:user_bias)
|
206
|
-
item_bias = send(:item_bias)
|
207
|
-
nan_index.each do |j|
|
208
|
-
v = @global_mean
|
209
|
-
v += user_bias[u[j]] if user_bias && u[j] != -1
|
210
|
-
v += item_bias[i[j]] if item_bias && i[j] != -1
|
211
|
-
predictions[j] = v
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
predictions
|
216
|
-
end
|
217
|
-
|
218
|
-
def user_recs(user_id, count: 5, item_ids: nil)
|
219
|
-
check_fit
|
220
|
-
user = @user_map[user_id]
|
221
|
-
|
222
|
-
if user
|
223
|
-
if item_ids
|
224
|
-
# remove missing ids
|
225
|
-
item_ids = item_ids.select { |v| @item_map[v] }
|
226
|
-
|
227
|
-
data = item_ids.map { |v| {user_id: user_id, item_id: v} }
|
228
|
-
scores = predict(data)
|
229
|
-
|
230
|
-
item_ids.zip(scores).map do |item_id, score|
|
231
|
-
{item_id: item_id, score: score}
|
232
|
-
end
|
233
|
-
else
|
234
|
-
a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
|
235
|
-
a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
|
236
|
-
top_n(a_vec: a_vec, a_bias: a_bias, count: count)
|
237
|
-
end
|
238
|
-
else
|
239
|
-
# no items if user is unknown
|
240
|
-
# TODO maybe most popular items
|
241
|
-
[]
|
242
|
-
end
|
243
|
-
end
|
244
|
-
|
245
|
-
# TODO add item_ids
|
246
|
-
def new_user_recs(data, count: 5, user_info: nil)
|
247
|
-
check_fit
|
248
|
-
|
249
|
-
a_vec, a_bias = factors_warm(data, user_info: user_info)
|
250
|
-
top_n(a_vec: a_vec, a_bias: a_bias, count: count)
|
251
|
-
end
|
252
|
-
|
253
|
-
def user_factors
|
254
|
-
read_factors(@a, [@m, @m_u].max, @k_user + @k + @k_main)
|
255
|
-
end
|
256
|
-
|
257
|
-
def item_factors
|
258
|
-
read_factors(@b, [@n, @n_i].max, @k_item + @k + @k_main)
|
259
|
-
end
|
337
|
+
@u_colmeans = u_colmeans
|
260
338
|
|
261
|
-
|
262
|
-
read_bias(@bias_a) if @bias_a
|
263
|
-
end
|
339
|
+
@fit = true
|
264
340
|
|
265
|
-
|
266
|
-
read_bias(@bias_b) if @bias_b
|
341
|
+
self
|
267
342
|
end
|
268
343
|
|
269
|
-
private
|
270
|
-
|
271
344
|
def set_params(
|
272
345
|
k: 40, lambda_: 1e+1, method: "als", use_cg: true, user_bias: true,
|
273
346
|
item_bias: true, add_implicit_features: false,
|
@@ -324,15 +397,14 @@ module Cmfrec
|
|
324
397
|
@nthreads = nthreads
|
325
398
|
end
|
326
399
|
|
327
|
-
def
|
328
|
-
|
329
|
-
|
400
|
+
def update_maps(train_set)
|
401
|
+
raise ArgumentError, "Missing user_id" if train_set.any? { |v| v[:user_id].nil? }
|
402
|
+
raise ArgumentError, "Missing item_id" if train_set.any? { |v| v[:item_id].nil? }
|
330
403
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
@item_map = item_ids.zip(item_ids.size.times).to_h
|
404
|
+
train_set.each do |v|
|
405
|
+
@user_map[v[:user_id]] ||= @user_map.size
|
406
|
+
@item_map[v[:item_id]] ||= @item_map.size
|
407
|
+
end
|
336
408
|
end
|
337
409
|
|
338
410
|
def check_ratings(ratings)
|
@@ -349,7 +421,7 @@ module Cmfrec
|
|
349
421
|
end
|
350
422
|
|
351
423
|
def check_fit
|
352
|
-
raise "Not fit" unless
|
424
|
+
raise "Not fit" unless @fit
|
353
425
|
end
|
354
426
|
|
355
427
|
def to_dataset(dataset)
|
@@ -386,11 +458,22 @@ module Cmfrec
|
|
386
458
|
real_array(ptr)
|
387
459
|
end
|
388
460
|
|
389
|
-
def top_n(a_vec:, a_bias:, count:)
|
461
|
+
def top_n(a_vec:, a_bias:, count:, rated: nil)
|
390
462
|
include_ix = nil
|
391
463
|
n_include = 0
|
392
|
-
|
393
|
-
|
464
|
+
|
465
|
+
if rated
|
466
|
+
# assumes rated is unique and all items are known
|
467
|
+
# calling code is responsible for this
|
468
|
+
exclude_ix = int_ptr(rated)
|
469
|
+
n_exclude = rated.size
|
470
|
+
remaining = @item_map.size - n_exclude
|
471
|
+
return [] if remaining == 0
|
472
|
+
count = remaining if remaining < count
|
473
|
+
else
|
474
|
+
exclude_ix = nil
|
475
|
+
n_exclude = 0
|
476
|
+
end
|
394
477
|
|
395
478
|
outp_ix = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_INT)
|
396
479
|
outp_score = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_DOUBLE)
|
@@ -420,6 +503,16 @@ module Cmfrec
|
|
420
503
|
data = to_dataset(data)
|
421
504
|
user_info = to_dataset(user_info) if user_info
|
422
505
|
|
506
|
+
# remove unknown items
|
507
|
+
data, unknown_data = data.partition { |d| @item_map[d[:item_id]] }
|
508
|
+
|
509
|
+
if unknown_data.any?
|
510
|
+
# TODO warn for unknown items?
|
511
|
+
# warn "[cmfrec] Unknown items: #{unknown_data.map { |d| d[:item_id] }.join(", ")}"
|
512
|
+
end
|
513
|
+
|
514
|
+
item_ids = data.map { |d| @item_map[d[:item_id]] }
|
515
|
+
|
423
516
|
nnz = data.size
|
424
517
|
a_vec = Fiddle::Pointer.malloc((@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
425
518
|
bias_a = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
|
@@ -448,6 +541,8 @@ module Cmfrec
|
|
448
541
|
|
449
542
|
weight = nil
|
450
543
|
lam_unique = nil
|
544
|
+
l1_lambda = 0
|
545
|
+
l1_lam_unique = nil
|
451
546
|
n_max = @n
|
452
547
|
|
453
548
|
if data.any?
|
@@ -458,7 +553,7 @@ module Cmfrec
|
|
458
553
|
check_ratings(ratings)
|
459
554
|
end
|
460
555
|
xa = real_ptr(ratings)
|
461
|
-
x_col = int_ptr(
|
556
|
+
x_col = int_ptr(item_ids)
|
462
557
|
else
|
463
558
|
xa = nil
|
464
559
|
x_col = nil
|
@@ -472,11 +567,11 @@ module Cmfrec
|
|
472
567
|
u_vec_sp, u_vec_x_col, nnz_u_vec,
|
473
568
|
@na_as_zero_user,
|
474
569
|
@nonneg,
|
475
|
-
@
|
570
|
+
@u_colmeans,
|
476
571
|
@b, @n, @c,
|
477
572
|
xa, x_col, nnz,
|
478
573
|
@k, @k_user, @k_item, @k_main,
|
479
|
-
@lambda_, @alpha,
|
574
|
+
@lambda_, l1_lambda, @alpha,
|
480
575
|
@w_main, @w_user, @w_main_multiplier,
|
481
576
|
@apply_log_transf,
|
482
577
|
nil, #BeTBe,
|
@@ -487,6 +582,9 @@ module Cmfrec
|
|
487
582
|
else
|
488
583
|
cb = nil
|
489
584
|
|
585
|
+
scale_lam = false
|
586
|
+
scale_lam_sideinfo = false
|
587
|
+
|
490
588
|
args = [
|
491
589
|
a_vec, bias_a,
|
492
590
|
u_vec, p_,
|
@@ -495,17 +593,20 @@ module Cmfrec
|
|
495
593
|
@na_as_zero_user, @na_as_zero,
|
496
594
|
@nonneg,
|
497
595
|
@c, cb,
|
498
|
-
@global_mean, @bias_b, @
|
596
|
+
@global_mean, @bias_b, @u_colmeans,
|
499
597
|
xa, x_col, nnz, xa_dense,
|
500
598
|
@n, weight, @b, @bi,
|
501
599
|
@add_implicit_features,
|
502
600
|
@k, @k_user, @k_item, @k_main,
|
503
601
|
@lambda_, lam_unique,
|
602
|
+
l1_lambda, l1_lam_unique,
|
603
|
+
scale_lam, scale_lam_sideinfo,
|
504
604
|
@w_main, @w_user, @w_implicit,
|
505
605
|
n_max,
|
506
606
|
@include_all_x,
|
507
|
-
nil, #TransBtBinvBt,
|
508
607
|
nil, #BtB,
|
608
|
+
nil, #TransBtBinvBt,
|
609
|
+
nil, #BtXbias,
|
509
610
|
nil, #BeTBeChol,
|
510
611
|
nil, #BiTBi,
|
511
612
|
nil, #CtCw,
|
@@ -515,7 +616,7 @@ module Cmfrec
|
|
515
616
|
check_status FFI.factors_collective_explicit_single(*fiddle_args(args))
|
516
617
|
end
|
517
618
|
|
518
|
-
[a_vec, real_array(bias_a).first]
|
619
|
+
[a_vec, real_array(bias_a).first, item_ids.uniq]
|
519
620
|
end
|
520
621
|
|
521
622
|
# convert boolean to int
|
@@ -572,5 +673,126 @@ module Cmfrec
|
|
572
673
|
def real_array(ptr)
|
573
674
|
ptr.to_s(ptr.size).unpack("d*")
|
574
675
|
end
|
676
|
+
|
677
|
+
def set_implicit_vars
|
678
|
+
@w_main_multiplier = 1.0
|
679
|
+
@alpha = 1.0
|
680
|
+
@adjust_weight = false # downweight?
|
681
|
+
@apply_log_transf = false
|
682
|
+
|
683
|
+
# different defaults
|
684
|
+
@lambda_ = 1e0
|
685
|
+
@w_user = 10
|
686
|
+
@w_item = 10
|
687
|
+
@finalize_chol = false
|
688
|
+
end
|
689
|
+
|
690
|
+
def dump_ptr(ptr)
|
691
|
+
ptr.to_s(ptr.size) if ptr
|
692
|
+
end
|
693
|
+
|
694
|
+
def load_ptr(str)
|
695
|
+
Fiddle::Pointer[str] if str
|
696
|
+
end
|
697
|
+
|
698
|
+
def marshal_dump
|
699
|
+
obj = {
|
700
|
+
implicit: @implicit
|
701
|
+
}
|
702
|
+
|
703
|
+
# options
|
704
|
+
obj[:factors] = @k
|
705
|
+
obj[:epochs] = @niter
|
706
|
+
obj[:verbose] = @verbose
|
707
|
+
|
708
|
+
# factors
|
709
|
+
obj[:user_map] = @user_map
|
710
|
+
obj[:item_map] = @item_map
|
711
|
+
obj[:rated] = @rated
|
712
|
+
obj[:user_factors] = dump_ptr(@a)
|
713
|
+
obj[:item_factors] = dump_ptr(@b)
|
714
|
+
|
715
|
+
# bias
|
716
|
+
obj[:user_bias] = dump_ptr(@bias_a)
|
717
|
+
obj[:item_bias] = dump_ptr(@bias_b)
|
718
|
+
|
719
|
+
# mean
|
720
|
+
obj[:global_mean] = @global_mean
|
721
|
+
|
722
|
+
# side info
|
723
|
+
obj[:user_info_map] = @user_info_map
|
724
|
+
obj[:item_info_map] = @item_info_map
|
725
|
+
obj[:user_info_factors] = dump_ptr(@c)
|
726
|
+
obj[:item_info_factors] = dump_ptr(@d)
|
727
|
+
|
728
|
+
# implicit features
|
729
|
+
obj[:add_implicit_features] = @add_implicit_features
|
730
|
+
obj[:user_factors_implicit] = dump_ptr(@ai)
|
731
|
+
obj[:item_factors_implicit] = dump_ptr(@bi)
|
732
|
+
|
733
|
+
unless @implicit
|
734
|
+
obj[:min_rating] = @min_rating
|
735
|
+
obj[:max_rating] = @max_rating
|
736
|
+
end
|
737
|
+
|
738
|
+
obj[:user_means] = dump_ptr(@u_colmeans)
|
739
|
+
|
740
|
+
obj
|
741
|
+
end
|
742
|
+
|
743
|
+
def marshal_load(obj)
|
744
|
+
@implicit = obj[:implicit]
|
745
|
+
|
746
|
+
# options
|
747
|
+
set_params(
|
748
|
+
k: obj[:factors],
|
749
|
+
niter: obj[:epochs],
|
750
|
+
verbose: obj[:verbose],
|
751
|
+
user_bias: !obj[:user_bias].nil?,
|
752
|
+
item_bias: !obj[:item_bias].nil?,
|
753
|
+
add_implicit_features: obj[:add_implicit_features]
|
754
|
+
)
|
755
|
+
|
756
|
+
# factors
|
757
|
+
@user_map = obj[:user_map]
|
758
|
+
@item_map = obj[:item_map]
|
759
|
+
@rated = obj[:rated] || {}
|
760
|
+
@a = load_ptr(obj[:user_factors])
|
761
|
+
@b = load_ptr(obj[:item_factors])
|
762
|
+
|
763
|
+
# bias
|
764
|
+
@bias_a = load_ptr(obj[:user_bias])
|
765
|
+
@bias_b = load_ptr(obj[:item_bias])
|
766
|
+
|
767
|
+
# mean
|
768
|
+
@global_mean = obj[:global_mean]
|
769
|
+
|
770
|
+
# side info
|
771
|
+
@user_info_map = obj[:user_info_map]
|
772
|
+
@item_info_map = obj[:item_info_map]
|
773
|
+
@c = load_ptr(obj[:user_info_factors])
|
774
|
+
@d = load_ptr(obj[:item_info_factors])
|
775
|
+
|
776
|
+
# implicit features
|
777
|
+
@add_implicit_features = obj[:add_implicit_features]
|
778
|
+
@ai = load_ptr(obj[:user_factors_implicit])
|
779
|
+
@bi = load_ptr(obj[:item_factors_implicit])
|
780
|
+
|
781
|
+
unless @implicit
|
782
|
+
@min_rating = obj[:min_rating]
|
783
|
+
@max_rating = obj[:max_rating]
|
784
|
+
end
|
785
|
+
|
786
|
+
@u_colmeans = load_ptr(obj[:user_means])
|
787
|
+
|
788
|
+
@m = @user_map.size
|
789
|
+
@n = @item_map.size
|
790
|
+
@m_u = @user_info_map.size
|
791
|
+
@n_i = @item_info_map.size
|
792
|
+
|
793
|
+
set_implicit_vars if @implicit
|
794
|
+
|
795
|
+
@fit = @m > 0
|
796
|
+
end
|
575
797
|
end
|
576
798
|
end
|
data/lib/cmfrec/version.rb
CHANGED
Binary file
|
data/vendor/libcmfrec.dylib
CHANGED
Binary file
|
data/vendor/libcmfrec.so
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmfrec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
|
-
email: andrew@
|
14
|
+
email: andrew@ankane.org
|
15
15
|
executables: []
|
16
16
|
extensions: []
|
17
17
|
extra_rdoc_files: []
|
@@ -20,10 +20,12 @@ files:
|
|
20
20
|
- LICENSE.txt
|
21
21
|
- README.md
|
22
22
|
- lib/cmfrec.rb
|
23
|
+
- lib/cmfrec/data.rb
|
23
24
|
- lib/cmfrec/ffi.rb
|
24
25
|
- lib/cmfrec/recommender.rb
|
25
26
|
- lib/cmfrec/version.rb
|
26
27
|
- vendor/LICENSE.txt
|
28
|
+
- vendor/libcmfrec.arm64.dylib
|
27
29
|
- vendor/libcmfrec.dylib
|
28
30
|
- vendor/libcmfrec.so
|
29
31
|
homepage: https://github.com/ankane/cmfrec
|
@@ -45,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
45
47
|
- !ruby/object:Gem::Version
|
46
48
|
version: '0'
|
47
49
|
requirements: []
|
48
|
-
rubygems_version: 3.
|
50
|
+
rubygems_version: 3.2.22
|
49
51
|
signing_key:
|
50
52
|
specification_version: 4
|
51
53
|
summary: Recommendations for Ruby using collective matrix factorization
|