cmfrec 0.1.1 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/LICENSE.txt +1 -1
- data/README.md +94 -1
- data/lib/cmfrec.rb +8 -1
- data/lib/cmfrec/data.rb +100 -0
- data/lib/cmfrec/ffi.rb +7 -5
- data/lib/cmfrec/recommender.rb +358 -136
- data/lib/cmfrec/version.rb +1 -1
- data/vendor/libcmfrec.arm64.dylib +0 -0
- data/vendor/libcmfrec.dylib +0 -0
- data/vendor/libcmfrec.so +0 -0
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a200b99780aeee83c5a0190e593c3806c4140aa4c096b0ef2c112fd21a858b7
|
4
|
+
data.tar.gz: 27b354dd491ca1d7a728d4ef2318c72c56d76c691286bc267c74d3b8dafd7c5b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0c1ceefeac9131a804d94b6da78c7a6614f50c15d144841277829e5279ce2583872a681e972e336e21cb82d49466dee9f5f1de6a482fe8a99ab7aa5176ab0e5c
|
7
|
+
data.tar.gz: 930b20b017b92555071699b38d903230da3ca1fc7b8b91b9e2f96ae3ae4dda9c8c289a47117beaa1e2539e5c9d622fe958d431f5106e8ab7c4809ea17e9fb6e8
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,23 @@
|
|
1
|
+
## 0.1.5 (2021-08-10)
|
2
|
+
|
3
|
+
- Fixed issue with `user_recs` and `new_user_recs` returning rated items
|
4
|
+
- Fixed error with `new_user_recs`
|
5
|
+
|
6
|
+
## 0.1.4 (2021-02-04)
|
7
|
+
|
8
|
+
- Added support for saving and loading recommenders
|
9
|
+
- Added `similar_users` and `similar_items`
|
10
|
+
- Improved ARM detection
|
11
|
+
|
12
|
+
## 0.1.3 (2020-12-28)
|
13
|
+
|
14
|
+
- Added ARM shared library for Mac
|
15
|
+
|
16
|
+
## 0.1.2 (2020-12-09)
|
17
|
+
|
18
|
+
- Added `load_movielens` method
|
19
|
+
- Updated cmfrec to 2.4.1
|
20
|
+
|
1
21
|
## 0.1.1 (2020-11-28)
|
2
22
|
|
3
23
|
- Added `predict` method
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -16,7 +16,7 @@ Add this line to your application’s Gemfile:
|
|
16
16
|
gem 'cmfrec'
|
17
17
|
```
|
18
18
|
|
19
|
-
|
19
|
+
For Windows, also follow [these instructions](#windows-installation).
|
20
20
|
|
21
21
|
## Getting Started
|
22
22
|
|
@@ -107,6 +107,73 @@ Get recommendations with only side information
|
|
107
107
|
recommender.new_user_recs([], user_info: {cats: 0, dogs: 2})
|
108
108
|
```
|
109
109
|
|
110
|
+
## Similarity
|
111
|
+
|
112
|
+
Add this line to your application’s Gemfile:
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
gem 'ngt'
|
116
|
+
```
|
117
|
+
|
118
|
+
Get similar users
|
119
|
+
|
120
|
+
```ruby
|
121
|
+
recommender.similar_users(user_id)
|
122
|
+
```
|
123
|
+
|
124
|
+
Get similar items - “users who liked this item also liked”
|
125
|
+
|
126
|
+
```ruby
|
127
|
+
recommender.similar_items(item_id)
|
128
|
+
```
|
129
|
+
|
130
|
+
## Examples
|
131
|
+
|
132
|
+
### MovieLens
|
133
|
+
|
134
|
+
Load the data
|
135
|
+
|
136
|
+
```ruby
|
137
|
+
ratings, user_info, item_info = Cmfrec.load_movielens
|
138
|
+
```
|
139
|
+
|
140
|
+
Create a recommender and get predictions
|
141
|
+
|
142
|
+
```ruby
|
143
|
+
recommender = Cmfrec::Recommender.new(factors: 20)
|
144
|
+
recommender.fit(ratings.first(80000), user_info: user_info, item_info: item_info)
|
145
|
+
recommender.predict(ratings.last(20000))
|
146
|
+
```
|
147
|
+
|
148
|
+
### Ahoy
|
149
|
+
|
150
|
+
[Ahoy](https://github.com/ankane/ahoy) is a great source for implicit feedback
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
views = Ahoy::Event.
|
154
|
+
where(name: "Viewed post").
|
155
|
+
group(:user_id).
|
156
|
+
group("properties->>'post_id'"). # postgres syntax
|
157
|
+
count
|
158
|
+
|
159
|
+
data =
|
160
|
+
views.map do |(user_id, post_id), count|
|
161
|
+
{
|
162
|
+
user_id: user_id,
|
163
|
+
item_id: post_id,
|
164
|
+
value: count
|
165
|
+
}
|
166
|
+
end
|
167
|
+
```
|
168
|
+
|
169
|
+
Create a recommender and get recommended posts for a user
|
170
|
+
|
171
|
+
```ruby
|
172
|
+
recommender = Cmfrec::Recommender.new
|
173
|
+
recommender.fit(data)
|
174
|
+
recommender.user_recs(current_user.id)
|
175
|
+
```
|
176
|
+
|
110
177
|
## Options
|
111
178
|
|
112
179
|
Specify the number of factors and epochs
|
@@ -145,6 +212,24 @@ Or a Rover data frame
|
|
145
212
|
Rover.read_csv("ratings.csv")
|
146
213
|
```
|
147
214
|
|
215
|
+
## Storing Recommenders
|
216
|
+
|
217
|
+
Store the recommender
|
218
|
+
|
219
|
+
```ruby
|
220
|
+
bin = Marshal.dump(recommender)
|
221
|
+
File.binwrite("recommender.bin", bin)
|
222
|
+
```
|
223
|
+
|
224
|
+
> You can save it to a file, database, or any other storage system
|
225
|
+
|
226
|
+
Load a recommender
|
227
|
+
|
228
|
+
```ruby
|
229
|
+
bin = File.binread("recommender.bin")
|
230
|
+
recommender = Marshal.load(bin)
|
231
|
+
```
|
232
|
+
|
148
233
|
## Reference
|
149
234
|
|
150
235
|
Get the global mean
|
@@ -167,6 +252,14 @@ recommender.user_bias
|
|
167
252
|
recommender.item_bias
|
168
253
|
```
|
169
254
|
|
255
|
+
## Windows Installation
|
256
|
+
|
257
|
+
On Windows, build the [cmfrec C shared library](https://github.com/david-cortes/cmfrec#instalation) and set:
|
258
|
+
|
259
|
+
```ruby
|
260
|
+
Cmfrec.ffi_lib = "path/to/cmfrec.dll"
|
261
|
+
```
|
262
|
+
|
170
263
|
## History
|
171
264
|
|
172
265
|
View the [changelog](https://github.com/ankane/cmfrec/blob/master/CHANGELOG.md)
|
data/lib/cmfrec.rb
CHANGED
@@ -3,12 +3,15 @@ require "etc"
|
|
3
3
|
require "fiddle/import"
|
4
4
|
|
5
5
|
# modules
|
6
|
+
require "cmfrec/data"
|
6
7
|
require "cmfrec/recommender"
|
7
8
|
require "cmfrec/version"
|
8
9
|
|
9
10
|
module Cmfrec
|
10
11
|
class Error < StandardError; end
|
11
12
|
|
13
|
+
extend Data
|
14
|
+
|
12
15
|
class << self
|
13
16
|
attr_accessor :ffi_lib
|
14
17
|
end
|
@@ -16,7 +19,11 @@ module Cmfrec
|
|
16
19
|
if Gem.win_platform?
|
17
20
|
"cmfrec.dll"
|
18
21
|
elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
|
19
|
-
"
|
22
|
+
if RbConfig::CONFIG["host_cpu"] =~ /arm/i
|
23
|
+
"libcmfrec.arm64.dylib"
|
24
|
+
else
|
25
|
+
"libcmfrec.dylib"
|
26
|
+
end
|
20
27
|
else
|
21
28
|
"libcmfrec.so"
|
22
29
|
end
|
data/lib/cmfrec/data.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
module Cmfrec
|
2
|
+
module Data
|
3
|
+
def load_movielens
|
4
|
+
require "csv"
|
5
|
+
|
6
|
+
data_path = download_file("ml-100k/u.data", "http://files.grouplens.org/datasets/movielens/ml-100k/u.data",
|
7
|
+
file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
|
8
|
+
user_path = download_file("ml-100k/u.user", "http://files.grouplens.org/datasets/movielens/ml-100k/u.user",
|
9
|
+
file_hash: "f120e114da2e8cf314fd28f99417c94ae9ddf1cb6db8ce0e4b5995d40e90e62c")
|
10
|
+
item_path = download_file("ml-100k/u.item", "http://files.grouplens.org/datasets/movielens/ml-100k/u.item",
|
11
|
+
file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
|
12
|
+
|
13
|
+
# convert u.item to utf-8
|
14
|
+
movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
|
15
|
+
|
16
|
+
user_info = []
|
17
|
+
CSV.foreach(user_path, col_sep: "|") do |row|
|
18
|
+
user = {user_id: row[0].to_i}
|
19
|
+
10.times do |i|
|
20
|
+
user[:"region#{i}"] = row[4][0] == i.to_s ? 1 : 0
|
21
|
+
end
|
22
|
+
user_info << user
|
23
|
+
end
|
24
|
+
|
25
|
+
item_info = []
|
26
|
+
movies = {}
|
27
|
+
genres = %w(unknown action adventure animation childrens comedy crime documentary drama fantasy filmnoir horror musical mystery romance scifi thriller war western)
|
28
|
+
CSV.parse(movies_str, col_sep: "|", converters: [:numeric]) do |row|
|
29
|
+
movies[row[0]] = row[1]
|
30
|
+
item = {item_id: row[1], year: row[2] ? Date.parse(row[2]).year : 1970}
|
31
|
+
genres.each_with_index do |genre, i|
|
32
|
+
item[:"genre_#{genre}"] = row[i + 5]
|
33
|
+
end
|
34
|
+
item_info << item
|
35
|
+
end
|
36
|
+
|
37
|
+
data = []
|
38
|
+
CSV.foreach(data_path, col_sep: "\t", converters: [:numeric]) do |row|
|
39
|
+
data << {
|
40
|
+
user_id: row[0],
|
41
|
+
item_id: movies[row[1]],
|
42
|
+
rating: row[2]
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
[data, user_info, item_info]
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def download_file(fname, origin, file_hash:)
|
52
|
+
require "fileutils"
|
53
|
+
|
54
|
+
# TODO handle this better
|
55
|
+
raise "No HOME" unless ENV["HOME"]
|
56
|
+
dest = "#{ENV["HOME"]}/.cmfrec/#{fname}"
|
57
|
+
FileUtils.mkdir_p(File.dirname(dest))
|
58
|
+
|
59
|
+
return dest if File.exist?(dest)
|
60
|
+
|
61
|
+
require "digest"
|
62
|
+
require "net/http"
|
63
|
+
require "tmpdir"
|
64
|
+
|
65
|
+
temp_path = "#{Dir.tmpdir}/cmfrec-#{Time.now.to_f}" # TODO better name
|
66
|
+
|
67
|
+
digest = Digest::SHA2.new
|
68
|
+
|
69
|
+
uri = URI(origin)
|
70
|
+
|
71
|
+
# Net::HTTP automatically adds Accept-Encoding for compression
|
72
|
+
# of response bodies and automatically decompresses gzip
|
73
|
+
# and deflateresponses unless a Range header was sent.
|
74
|
+
# https://ruby-doc.org/stdlib-2.6.4/libdoc/net/http/rdoc/Net/HTTP.html
|
75
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
|
76
|
+
request = Net::HTTP::Get.new(uri)
|
77
|
+
|
78
|
+
puts "Downloading data from #{origin}"
|
79
|
+
File.open(temp_path, "wb") do |f|
|
80
|
+
http.request(request) do |response|
|
81
|
+
response.read_body do |chunk|
|
82
|
+
f.write(chunk)
|
83
|
+
digest.update(chunk)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
if digest.hexdigest != file_hash
|
90
|
+
raise Error, "Bad hash: #{digest.hexdigest}"
|
91
|
+
end
|
92
|
+
|
93
|
+
puts "Hash verified: #{file_hash}"
|
94
|
+
|
95
|
+
FileUtils.mv(temp_path, dest)
|
96
|
+
|
97
|
+
dest
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
data/lib/cmfrec/ffi.rb
CHANGED
@@ -10,17 +10,19 @@ module Cmfrec
|
|
10
10
|
raise e
|
11
11
|
end
|
12
12
|
|
13
|
+
# https://github.com/david-cortes/cmfrec/blob/master/src/cmfrec.h
|
14
|
+
|
13
15
|
typealias "bool", "char"
|
14
16
|
# determined by CMakeLists.txt
|
15
17
|
typealias "int_t", "int"
|
16
18
|
typealias "real_t", "double"
|
17
19
|
|
18
|
-
extern "int_t fit_collective_explicit_als(real_t *biasA, real_t *biasB, real_t *A, real_t *B, real_t *C, real_t *D, real_t *Ai, real_t *Bi, bool add_implicit_features, bool reset_values, int_t seed, real_t *glob_mean, real_t *U_colmeans, real_t *I_colmeans, int_t m, int_t n, int_t k, int_t
|
19
|
-
extern "int_t fit_collective_implicit_als(real_t *A, real_t *B, real_t *C, real_t *D, bool reset_values, int_t seed, real_t *U_colmeans, real_t *I_colmeans, int_t m, int_t n, int_t k, int_t
|
20
|
-
extern "int_t factors_collective_explicit_single(real_t *a_vec, real_t *a_bias,
|
21
|
-
extern "int_t factors_collective_implicit_single(real_t *a_vec,
|
22
|
-
extern "void predict_multiple(real_t *restrict A, int_t k_user, real_t *restrict B, int_t k_item, real_t *restrict biasA, real_t *restrict biasB, real_t glob_mean, int_t k, int_t k_main, int_t m, int_t n, int_t predA[], int_t predB[], size_t nnz, real_t *restrict outp, int_t nthreads)"
|
20
|
+
extern "int_t fit_collective_explicit_als(real_t *restrict biasA, real_t *restrict biasB, real_t *restrict A, real_t *restrict B, real_t *restrict C, real_t *restrict D, real_t *restrict Ai, real_t *restrict Bi, bool add_implicit_features, bool reset_values, int_t seed, real_t *restrict glob_mean, real_t *restrict U_colmeans, real_t *restrict I_colmeans, int_t m, int_t n, int_t k, int_t ixA[], int_t ixB[], real_t *restrict X, size_t nnz, real_t *restrict Xfull, real_t *restrict weight, bool user_bias, bool item_bias, bool center, real_t lam, real_t *restrict lam_unique, real_t l1_lam, real_t *restrict l1_lam_unique, bool scale_lam, bool scale_lam_sideinfo, real_t *restrict U, int_t m_u, int_t p, real_t *restrict II, int_t n_i, int_t q, int_t U_row[], int_t U_col[], real_t *restrict U_sp, size_t nnz_U, int_t I_row[], int_t I_col[], real_t *restrict I_sp, size_t nnz_I, bool NA_as_zero_X, bool NA_as_zero_U, bool NA_as_zero_I, int_t k_main, int_t k_user, int_t k_item, real_t w_main, real_t w_user, real_t w_item, real_t w_implicit, int_t niter, int_t nthreads, bool verbose, bool handle_interrupt, bool use_cg, int_t max_cg_steps, bool finalize_chol, bool nonneg, int_t max_cd_steps, bool nonneg_C, bool nonneg_D, bool precompute_for_predictions, bool include_all_X, real_t *restrict B_plus_bias, real_t *restrict precomputedBtB, real_t *restrict precomputedTransBtBinvBt, real_t *restrict precomputedBtXbias, real_t *restrict precomputedBeTBeChol, real_t *restrict precomputedBiTBi, real_t *restrict precomputedTransCtCinvCt, real_t *restrict precomputedCtCw)"
|
21
|
+
extern "int_t fit_collective_implicit_als(real_t *restrict A, real_t *restrict B, real_t *restrict C, real_t *restrict D, bool reset_values, int_t seed, real_t *restrict U_colmeans, real_t *restrict I_colmeans, int_t m, int_t n, int_t k, int_t ixA[], int_t ixB[], real_t *restrict X, size_t nnz, real_t lam, real_t *restrict lam_unique, real_t l1_lam, real_t *restrict l1_lam_unique, real_t *restrict U, int_t m_u, int_t p, real_t *restrict II, int_t n_i, int_t q, int_t U_row[], int_t U_col[], real_t *restrict U_sp, size_t nnz_U, int_t I_row[], int_t I_col[], real_t *restrict I_sp, size_t nnz_I, bool NA_as_zero_U, bool NA_as_zero_I, int_t k_main, int_t k_user, int_t k_item, real_t w_main, real_t w_user, real_t w_item, real_t *restrict w_main_multiplier, real_t alpha, bool adjust_weight, bool apply_log_transf, int_t niter, int_t nthreads, bool verbose, bool handle_interrupt, bool use_cg, int_t max_cg_steps, bool finalize_chol, bool nonneg, int_t max_cd_steps, bool nonneg_C, bool nonneg_D, bool precompute_for_predictions, real_t *restrict precomputedBtB, real_t *restrict precomputedBeTBe, real_t *restrict precomputedBeTBeChol)"
|
22
|
+
extern "int_t factors_collective_explicit_single(real_t *restrict a_vec, real_t *restrict a_bias,real_t *restrict u_vec, int_t p,real_t *restrict u_vec_sp, int_t u_vec_ixB[], size_t nnz_u_vec,real_t *restrict u_bin_vec, int_t pbin,bool NA_as_zero_U, bool NA_as_zero_X,bool nonneg,real_t *restrict C, real_t *restrict Cb,real_t glob_mean, real_t *restrict biasB,real_t *restrict U_colmeans,real_t *restrict Xa, int_t ixB[], size_t nnz,real_t *restrict Xa_dense, int_t n,real_t *restrict weight,real_t *restrict B,real_t *restrict Bi, bool add_implicit_features,int_t k, int_t k_user, int_t k_item, int_t k_main,real_t lam, real_t *restrict lam_unique,real_t l1_lam, real_t *restrict l1_lam_unique,bool scale_lam, bool scale_lam_sideinfo,real_t w_main, real_t w_user, real_t w_implicit,int_t n_max, bool include_all_X,real_t *restrict BtB,real_t *restrict TransBtBinvBt,real_t *restrict BtXbias,real_t *restrict BeTBeChol,real_t *restrict BiTBi,real_t *restrict CtCw,real_t *restrict TransCtCinvCt,real_t *restrict B_plus_bias)"
|
23
|
+
extern "int_t factors_collective_implicit_single(real_t *restrict a_vec,real_t *restrict u_vec, int_t p,real_t *restrict u_vec_sp, int_t u_vec_ixB[], size_t nnz_u_vec,bool NA_as_zero_U,bool nonneg,real_t *restrict U_colmeans,real_t *restrict B, int_t n, real_t *restrict C,real_t *restrict Xa, int_t ixB[], size_t nnz,int_t k, int_t k_user, int_t k_item, int_t k_main,real_t lam, real_t l1_lam, real_t alpha, real_t w_main, real_t w_user,real_t w_main_multiplier,bool apply_log_transf,real_t *restrict BeTBe,real_t *restrict BtB,real_t *restrict BeTBeChol)"
|
23
24
|
extern "int_t predict_X_old_collective_explicit(int_t row[], int_t col[], real_t *restrict predicted, size_t n_predict, real_t *restrict A, real_t *restrict biasA, real_t *restrict B, real_t *restrict biasB, real_t glob_mean, int_t k, int_t k_user, int_t k_item, int_t k_main, int_t m, int_t n_max, int_t nthreads)"
|
25
|
+
extern "int_t predict_X_old_collective_implicit(int_t row[], int_t col[], real_t *restrict predicted, size_t n_predict, real_t *restrict A, real_t *restrict B, int_t k, int_t k_user, int_t k_item, int_t k_main, int_t m, int_t n, int_t nthreads)"
|
24
26
|
extern "int_t topN(real_t *restrict a_vec, int_t k_user, real_t *restrict B, int_t k_item, real_t *restrict biasB, real_t glob_mean, real_t biasA, int_t k, int_t k_main, int_t *restrict include_ix, int_t n_include, int_t *restrict exclude_ix, int_t n_exclude, int_t *restrict outp_ix, real_t *restrict outp_score, int_t n_top, int_t n, int_t nthreads)"
|
25
27
|
end
|
26
28
|
end
|
data/lib/cmfrec/recommender.rb
CHANGED
@@ -11,29 +11,199 @@ module Cmfrec
|
|
11
11
|
item_bias: item_bias,
|
12
12
|
add_implicit_features: add_implicit_features
|
13
13
|
)
|
14
|
+
|
15
|
+
@fit = false
|
16
|
+
@user_map = {}
|
17
|
+
@item_map = {}
|
18
|
+
@user_info_map = {}
|
19
|
+
@item_info_map = {}
|
14
20
|
end
|
15
21
|
|
16
22
|
def fit(train_set, user_info: nil, item_info: nil)
|
23
|
+
reset
|
24
|
+
partial_fit(train_set, user_info: user_info, item_info: item_info)
|
25
|
+
end
|
26
|
+
|
27
|
+
def predict(data)
|
28
|
+
check_fit
|
29
|
+
|
30
|
+
data = to_dataset(data)
|
31
|
+
|
32
|
+
u = data.map { |v| @user_map[v[:user_id]] || @user_map.size }
|
33
|
+
i = data.map { |v| @item_map[v[:item_id]] || @item_map.size }
|
34
|
+
|
35
|
+
row = int_ptr(u)
|
36
|
+
col = int_ptr(i)
|
37
|
+
n_predict = data.size
|
38
|
+
predicted = Fiddle::Pointer.malloc(n_predict * Fiddle::SIZEOF_DOUBLE)
|
39
|
+
|
40
|
+
if @implicit
|
41
|
+
check_status FFI.predict_X_old_collective_implicit(
|
42
|
+
row, col, predicted, n_predict,
|
43
|
+
@a, @b,
|
44
|
+
@k, @k_user, @k_item, @k_main,
|
45
|
+
@m, @n,
|
46
|
+
@nthreads
|
47
|
+
)
|
48
|
+
else
|
49
|
+
check_status FFI.predict_X_old_collective_explicit(
|
50
|
+
row, col, predicted, n_predict,
|
51
|
+
@a, @bias_a,
|
52
|
+
@b, @bias_b,
|
53
|
+
@global_mean,
|
54
|
+
@k, @k_user, @k_item, @k_main,
|
55
|
+
@m, @n,
|
56
|
+
@nthreads
|
57
|
+
)
|
58
|
+
end
|
59
|
+
|
60
|
+
predictions = real_array(predicted)
|
61
|
+
predictions.map! { |v| v.nan? ? @global_mean : v } if @implicit
|
62
|
+
predictions
|
63
|
+
end
|
64
|
+
|
65
|
+
def user_recs(user_id, count: 5, item_ids: nil)
|
66
|
+
check_fit
|
67
|
+
user = @user_map[user_id]
|
68
|
+
|
69
|
+
if user
|
70
|
+
# TODO use top_n for item_ids as well
|
71
|
+
if item_ids
|
72
|
+
# remove missing ids
|
73
|
+
item_ids = item_ids.select { |v| @item_map[v] }
|
74
|
+
|
75
|
+
data = item_ids.map { |v| {user_id: user_id, item_id: v} }
|
76
|
+
scores = predict(data)
|
77
|
+
|
78
|
+
item_ids.zip(scores).map do |item_id, score|
|
79
|
+
{item_id: item_id, score: score}
|
80
|
+
end
|
81
|
+
else
|
82
|
+
a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
|
83
|
+
a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
|
84
|
+
# @rated[user] will be nil for recommenders saved before 0.1.5
|
85
|
+
top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: (@rated[user] || {}).keys)
|
86
|
+
end
|
87
|
+
else
|
88
|
+
# no items if user is unknown
|
89
|
+
# TODO maybe most popular items
|
90
|
+
[]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# TODO add item_ids
|
95
|
+
def new_user_recs(data, count: 5, user_info: nil)
|
96
|
+
check_fit
|
97
|
+
|
98
|
+
a_vec, a_bias, rated = factors_warm(data, user_info: user_info)
|
99
|
+
top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: rated)
|
100
|
+
end
|
101
|
+
|
102
|
+
def user_factors
|
103
|
+
read_factors(@a, [@m, @m_u].max, @k_user + @k + @k_main)
|
104
|
+
end
|
105
|
+
|
106
|
+
def item_factors
|
107
|
+
read_factors(@b, [@n, @n_i].max, @k_item + @k + @k_main)
|
108
|
+
end
|
109
|
+
|
110
|
+
def user_bias
|
111
|
+
read_bias(@bias_a) if @bias_a
|
112
|
+
end
|
113
|
+
|
114
|
+
def item_bias
|
115
|
+
read_bias(@bias_b) if @bias_b
|
116
|
+
end
|
117
|
+
|
118
|
+
def similar_items(item_id, count: 5)
|
119
|
+
check_fit
|
120
|
+
similar(item_id, @item_map, item_factors, count, item_index)
|
121
|
+
end
|
122
|
+
alias_method :item_recs, :similar_items
|
123
|
+
|
124
|
+
def similar_users(user_id, count: 5)
|
125
|
+
check_fit
|
126
|
+
similar(user_id, @user_map, user_factors, count, user_index)
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
def user_index
|
132
|
+
@user_index ||= create_index(user_factors)
|
133
|
+
end
|
134
|
+
|
135
|
+
def item_index
|
136
|
+
@item_index ||= create_index(item_factors)
|
137
|
+
end
|
138
|
+
|
139
|
+
def create_index(factors)
|
140
|
+
require "ngt"
|
141
|
+
|
142
|
+
index = Ngt::Index.new(@k, distance_type: "Cosine")
|
143
|
+
index.batch_insert(factors)
|
144
|
+
index
|
145
|
+
end
|
146
|
+
|
147
|
+
# TODO include bias
|
148
|
+
def similar(id, map, factors, count, index)
|
149
|
+
i = map[id]
|
150
|
+
if i
|
151
|
+
keys = map.keys
|
152
|
+
result = index.search(factors[i], size: count + 1)[1..-1]
|
153
|
+
result.map do |v|
|
154
|
+
{
|
155
|
+
# ids from batch_insert start at 1 instead of 0
|
156
|
+
item_id: keys[v[:id] - 1],
|
157
|
+
# convert cosine distance to cosine similarity
|
158
|
+
score: 1 - v[:distance]
|
159
|
+
}
|
160
|
+
end
|
161
|
+
else
|
162
|
+
[]
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def reset
|
167
|
+
@fit = false
|
168
|
+
@user_map.clear
|
169
|
+
@item_map.clear
|
170
|
+
@user_info_map.clear
|
171
|
+
@item_info_map.clear
|
172
|
+
@user_index = nil
|
173
|
+
@item_index = nil
|
174
|
+
end
|
175
|
+
|
176
|
+
# TODO resize pointers as needed and reset values for new memory
|
177
|
+
def partial_fit(train_set, user_info: nil, item_info: nil)
|
17
178
|
train_set = to_dataset(train_set)
|
18
179
|
|
19
|
-
@
|
180
|
+
unless @fit
|
181
|
+
@implicit = !train_set.any? { |v| v[:rating] }
|
182
|
+
end
|
183
|
+
|
20
184
|
unless @implicit
|
21
185
|
ratings = train_set.map { |o| o[:rating] }
|
22
186
|
check_ratings(ratings)
|
23
187
|
end
|
24
188
|
|
25
189
|
check_training_set(train_set)
|
26
|
-
|
190
|
+
update_maps(train_set)
|
27
191
|
|
28
192
|
x_row = []
|
29
193
|
x_col = []
|
30
194
|
x_val = []
|
31
195
|
value_key = @implicit ? :value : :rating
|
196
|
+
@rated = Hash.new { |hash, key| hash[key] = {} }
|
32
197
|
train_set.each do |v|
|
33
|
-
|
34
|
-
|
198
|
+
u = @user_map[v[:user_id]]
|
199
|
+
i = @item_map[v[:item_id]]
|
200
|
+
@rated[u][i] = true
|
201
|
+
|
202
|
+
x_row << u
|
203
|
+
x_col << i
|
35
204
|
x_val << (v[value_key] || 1)
|
36
205
|
end
|
206
|
+
@rated.default = nil
|
37
207
|
|
38
208
|
@m = @user_map.size
|
39
209
|
@n = @item_map.size
|
@@ -46,20 +216,20 @@ module Cmfrec
|
|
46
216
|
x_full = nil
|
47
217
|
weight = nil
|
48
218
|
lam_unique = nil
|
219
|
+
l1_lambda = 0
|
220
|
+
l1_lam_unique = nil
|
49
221
|
|
50
222
|
uu = nil
|
51
223
|
ii = nil
|
52
224
|
|
53
|
-
|
225
|
+
# side info
|
54
226
|
u_row, u_col, u_sp, nnz_u, @m_u, p_ = process_info(user_info, @user_map, @user_info_map, :user_id)
|
55
|
-
|
56
|
-
@item_info_map = {}
|
57
227
|
i_row, i_col, i_sp, nnz_i, @n_i, q = process_info(item_info, @item_map, @item_info_map, :item_id)
|
58
228
|
|
59
229
|
@precompute_for_predictions = false
|
60
230
|
|
61
231
|
# initialize w/ normal distribution
|
62
|
-
reset_values =
|
232
|
+
reset_values = !@fit
|
63
233
|
|
64
234
|
@a = Fiddle::Pointer.malloc([@m, @m_u].max * (@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
65
235
|
@b = Fiddle::Pointer.malloc([@n, @n_i].max * (@k_item + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
@@ -73,16 +243,7 @@ module Cmfrec
|
|
73
243
|
i_colmeans = Fiddle::Pointer.malloc(q * Fiddle::SIZEOF_DOUBLE)
|
74
244
|
|
75
245
|
if @implicit
|
76
|
-
|
77
|
-
@alpha = 1.0
|
78
|
-
@adjust_weight = false # downweight?
|
79
|
-
@apply_log_transf = false
|
80
|
-
|
81
|
-
# different defaults
|
82
|
-
@lambda_ = 1e0
|
83
|
-
@w_user = 10
|
84
|
-
@w_item = 10
|
85
|
-
@finalize_chol = false
|
246
|
+
set_implicit_vars
|
86
247
|
|
87
248
|
args = [
|
88
249
|
@a, @b,
|
@@ -92,6 +253,7 @@ module Cmfrec
|
|
92
253
|
@m, @n, @k,
|
93
254
|
x_row, x_col, x, nnz,
|
94
255
|
@lambda_, lam_unique,
|
256
|
+
l1_lambda, l1_lam_unique,
|
95
257
|
uu, @m_u, p_,
|
96
258
|
ii, @n_i, q,
|
97
259
|
u_row, u_col, u_sp, nnz_u,
|
@@ -125,6 +287,10 @@ module Cmfrec
|
|
125
287
|
|
126
288
|
glob_mean = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
|
127
289
|
|
290
|
+
center = true
|
291
|
+
scale_lam = false
|
292
|
+
scale_lam_sideinfo = false
|
293
|
+
|
128
294
|
args = [
|
129
295
|
@bias_a, @bias_b,
|
130
296
|
@a, @b,
|
@@ -138,8 +304,10 @@ module Cmfrec
|
|
138
304
|
x_row, x_col, x, nnz,
|
139
305
|
x_full,
|
140
306
|
weight,
|
141
|
-
@user_bias, @item_bias,
|
307
|
+
@user_bias, @item_bias, center,
|
142
308
|
@lambda_, lam_unique,
|
309
|
+
l1_lambda, l1_lam_unique,
|
310
|
+
scale_lam, scale_lam_sideinfo,
|
143
311
|
uu, @m_u, p_,
|
144
312
|
ii, @n_i, q,
|
145
313
|
u_row, u_col, u_sp, nnz_u,
|
@@ -155,6 +323,7 @@ module Cmfrec
|
|
155
323
|
nil, #B_plus_bias,
|
156
324
|
nil, #precomputedBtB,
|
157
325
|
nil, #precomputedTransBtBinvBt,
|
326
|
+
nil, #precomputedBtXbias
|
158
327
|
nil, #precomputedBeTBeChol,
|
159
328
|
nil, #precomputedBiTBi,
|
160
329
|
nil, #precomputedTransCtCinvCt,
|
@@ -165,109 +334,13 @@ module Cmfrec
|
|
165
334
|
@global_mean = real_array(glob_mean).first
|
166
335
|
end
|
167
336
|
|
168
|
-
@u_colmeans =
|
169
|
-
@i_colmeans = real_array(i_colmeans)
|
170
|
-
@u_colmeans_ptr = u_colmeans
|
171
|
-
|
172
|
-
self
|
173
|
-
end
|
174
|
-
|
175
|
-
def predict(data)
|
176
|
-
check_fit
|
177
|
-
|
178
|
-
data = to_dataset(data)
|
179
|
-
|
180
|
-
u = data.map { |v| @user_map[v[:user_id]] || -1 }
|
181
|
-
i = data.map { |v| @item_map[v[:item_id]] || -1 }
|
182
|
-
|
183
|
-
pred_a = int_ptr(u)
|
184
|
-
pred_b = int_ptr(i)
|
185
|
-
nnz = data.size
|
186
|
-
outp = Fiddle::Pointer.malloc(nnz * Fiddle::SIZEOF_DOUBLE)
|
187
|
-
|
188
|
-
FFI.predict_multiple(
|
189
|
-
@a, @k_user,
|
190
|
-
@b, @k_item,
|
191
|
-
@bias_a, @bias_b,
|
192
|
-
@global_mean,
|
193
|
-
@k, @k_main,
|
194
|
-
@m, @n,
|
195
|
-
pred_a, pred_b, nnz,
|
196
|
-
outp,
|
197
|
-
@nthreads
|
198
|
-
)
|
199
|
-
|
200
|
-
predictions = real_array(outp)
|
201
|
-
|
202
|
-
nan_index = predictions.each_index.select { |j| predictions[j].nan? }
|
203
|
-
if nan_index.any?
|
204
|
-
# TODO improve performance
|
205
|
-
user_bias = send(:user_bias)
|
206
|
-
item_bias = send(:item_bias)
|
207
|
-
nan_index.each do |j|
|
208
|
-
v = @global_mean
|
209
|
-
v += user_bias[u[j]] if user_bias && u[j] != -1
|
210
|
-
v += item_bias[i[j]] if item_bias && i[j] != -1
|
211
|
-
predictions[j] = v
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
predictions
|
216
|
-
end
|
217
|
-
|
218
|
-
def user_recs(user_id, count: 5, item_ids: nil)
|
219
|
-
check_fit
|
220
|
-
user = @user_map[user_id]
|
221
|
-
|
222
|
-
if user
|
223
|
-
if item_ids
|
224
|
-
# remove missing ids
|
225
|
-
item_ids = item_ids.select { |v| @item_map[v] }
|
226
|
-
|
227
|
-
data = item_ids.map { |v| {user_id: user_id, item_id: v} }
|
228
|
-
scores = predict(data)
|
229
|
-
|
230
|
-
item_ids.zip(scores).map do |item_id, score|
|
231
|
-
{item_id: item_id, score: score}
|
232
|
-
end
|
233
|
-
else
|
234
|
-
a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
|
235
|
-
a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
|
236
|
-
top_n(a_vec: a_vec, a_bias: a_bias, count: count)
|
237
|
-
end
|
238
|
-
else
|
239
|
-
# no items if user is unknown
|
240
|
-
# TODO maybe most popular items
|
241
|
-
[]
|
242
|
-
end
|
243
|
-
end
|
244
|
-
|
245
|
-
# TODO add item_ids
|
246
|
-
def new_user_recs(data, count: 5, user_info: nil)
|
247
|
-
check_fit
|
248
|
-
|
249
|
-
a_vec, a_bias = factors_warm(data, user_info: user_info)
|
250
|
-
top_n(a_vec: a_vec, a_bias: a_bias, count: count)
|
251
|
-
end
|
252
|
-
|
253
|
-
def user_factors
|
254
|
-
read_factors(@a, [@m, @m_u].max, @k_user + @k + @k_main)
|
255
|
-
end
|
256
|
-
|
257
|
-
def item_factors
|
258
|
-
read_factors(@b, [@n, @n_i].max, @k_item + @k + @k_main)
|
259
|
-
end
|
337
|
+
@u_colmeans = u_colmeans
|
260
338
|
|
261
|
-
|
262
|
-
read_bias(@bias_a) if @bias_a
|
263
|
-
end
|
339
|
+
@fit = true
|
264
340
|
|
265
|
-
|
266
|
-
read_bias(@bias_b) if @bias_b
|
341
|
+
self
|
267
342
|
end
|
268
343
|
|
269
|
-
private
|
270
|
-
|
271
344
|
def set_params(
|
272
345
|
k: 40, lambda_: 1e+1, method: "als", use_cg: true, user_bias: true,
|
273
346
|
item_bias: true, add_implicit_features: false,
|
@@ -324,15 +397,14 @@ module Cmfrec
|
|
324
397
|
@nthreads = nthreads
|
325
398
|
end
|
326
399
|
|
327
|
-
def
|
328
|
-
|
329
|
-
|
400
|
+
def update_maps(train_set)
|
401
|
+
raise ArgumentError, "Missing user_id" if train_set.any? { |v| v[:user_id].nil? }
|
402
|
+
raise ArgumentError, "Missing item_id" if train_set.any? { |v| v[:item_id].nil? }
|
330
403
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
@item_map = item_ids.zip(item_ids.size.times).to_h
|
404
|
+
train_set.each do |v|
|
405
|
+
@user_map[v[:user_id]] ||= @user_map.size
|
406
|
+
@item_map[v[:item_id]] ||= @item_map.size
|
407
|
+
end
|
336
408
|
end
|
337
409
|
|
338
410
|
def check_ratings(ratings)
|
@@ -349,7 +421,7 @@ module Cmfrec
|
|
349
421
|
end
|
350
422
|
|
351
423
|
def check_fit
|
352
|
-
raise "Not fit" unless
|
424
|
+
raise "Not fit" unless @fit
|
353
425
|
end
|
354
426
|
|
355
427
|
def to_dataset(dataset)
|
@@ -386,11 +458,22 @@ module Cmfrec
|
|
386
458
|
real_array(ptr)
|
387
459
|
end
|
388
460
|
|
389
|
-
def top_n(a_vec:, a_bias:, count:)
|
461
|
+
def top_n(a_vec:, a_bias:, count:, rated: nil)
|
390
462
|
include_ix = nil
|
391
463
|
n_include = 0
|
392
|
-
|
393
|
-
|
464
|
+
|
465
|
+
if rated
|
466
|
+
# assumes rated is unique and all items are known
|
467
|
+
# calling code is responsible for this
|
468
|
+
exclude_ix = int_ptr(rated)
|
469
|
+
n_exclude = rated.size
|
470
|
+
remaining = @item_map.size - n_exclude
|
471
|
+
return [] if remaining == 0
|
472
|
+
count = remaining if remaining < count
|
473
|
+
else
|
474
|
+
exclude_ix = nil
|
475
|
+
n_exclude = 0
|
476
|
+
end
|
394
477
|
|
395
478
|
outp_ix = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_INT)
|
396
479
|
outp_score = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_DOUBLE)
|
@@ -420,6 +503,16 @@ module Cmfrec
|
|
420
503
|
data = to_dataset(data)
|
421
504
|
user_info = to_dataset(user_info) if user_info
|
422
505
|
|
506
|
+
# remove unknown items
|
507
|
+
data, unknown_data = data.partition { |d| @item_map[d[:item_id]] }
|
508
|
+
|
509
|
+
if unknown_data.any?
|
510
|
+
# TODO warn for unknown items?
|
511
|
+
# warn "[cmfrec] Unknown items: #{unknown_data.map { |d| d[:item_id] }.join(", ")}"
|
512
|
+
end
|
513
|
+
|
514
|
+
item_ids = data.map { |d| @item_map[d[:item_id]] }
|
515
|
+
|
423
516
|
nnz = data.size
|
424
517
|
a_vec = Fiddle::Pointer.malloc((@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
425
518
|
bias_a = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
|
@@ -448,6 +541,8 @@ module Cmfrec
|
|
448
541
|
|
449
542
|
weight = nil
|
450
543
|
lam_unique = nil
|
544
|
+
l1_lambda = 0
|
545
|
+
l1_lam_unique = nil
|
451
546
|
n_max = @n
|
452
547
|
|
453
548
|
if data.any?
|
@@ -458,7 +553,7 @@ module Cmfrec
|
|
458
553
|
check_ratings(ratings)
|
459
554
|
end
|
460
555
|
xa = real_ptr(ratings)
|
461
|
-
x_col = int_ptr(
|
556
|
+
x_col = int_ptr(item_ids)
|
462
557
|
else
|
463
558
|
xa = nil
|
464
559
|
x_col = nil
|
@@ -472,11 +567,11 @@ module Cmfrec
|
|
472
567
|
u_vec_sp, u_vec_x_col, nnz_u_vec,
|
473
568
|
@na_as_zero_user,
|
474
569
|
@nonneg,
|
475
|
-
@
|
570
|
+
@u_colmeans,
|
476
571
|
@b, @n, @c,
|
477
572
|
xa, x_col, nnz,
|
478
573
|
@k, @k_user, @k_item, @k_main,
|
479
|
-
@lambda_, @alpha,
|
574
|
+
@lambda_, l1_lambda, @alpha,
|
480
575
|
@w_main, @w_user, @w_main_multiplier,
|
481
576
|
@apply_log_transf,
|
482
577
|
nil, #BeTBe,
|
@@ -487,6 +582,9 @@ module Cmfrec
|
|
487
582
|
else
|
488
583
|
cb = nil
|
489
584
|
|
585
|
+
scale_lam = false
|
586
|
+
scale_lam_sideinfo = false
|
587
|
+
|
490
588
|
args = [
|
491
589
|
a_vec, bias_a,
|
492
590
|
u_vec, p_,
|
@@ -495,17 +593,20 @@ module Cmfrec
|
|
495
593
|
@na_as_zero_user, @na_as_zero,
|
496
594
|
@nonneg,
|
497
595
|
@c, cb,
|
498
|
-
@global_mean, @bias_b, @
|
596
|
+
@global_mean, @bias_b, @u_colmeans,
|
499
597
|
xa, x_col, nnz, xa_dense,
|
500
598
|
@n, weight, @b, @bi,
|
501
599
|
@add_implicit_features,
|
502
600
|
@k, @k_user, @k_item, @k_main,
|
503
601
|
@lambda_, lam_unique,
|
602
|
+
l1_lambda, l1_lam_unique,
|
603
|
+
scale_lam, scale_lam_sideinfo,
|
504
604
|
@w_main, @w_user, @w_implicit,
|
505
605
|
n_max,
|
506
606
|
@include_all_x,
|
507
|
-
nil, #TransBtBinvBt,
|
508
607
|
nil, #BtB,
|
608
|
+
nil, #TransBtBinvBt,
|
609
|
+
nil, #BtXbias,
|
509
610
|
nil, #BeTBeChol,
|
510
611
|
nil, #BiTBi,
|
511
612
|
nil, #CtCw,
|
@@ -515,7 +616,7 @@ module Cmfrec
|
|
515
616
|
check_status FFI.factors_collective_explicit_single(*fiddle_args(args))
|
516
617
|
end
|
517
618
|
|
518
|
-
[a_vec, real_array(bias_a).first]
|
619
|
+
[a_vec, real_array(bias_a).first, item_ids.uniq]
|
519
620
|
end
|
520
621
|
|
521
622
|
# convert boolean to int
|
@@ -572,5 +673,126 @@ module Cmfrec
|
|
572
673
|
def real_array(ptr)
|
573
674
|
ptr.to_s(ptr.size).unpack("d*")
|
574
675
|
end
|
676
|
+
|
677
|
+
def set_implicit_vars
|
678
|
+
@w_main_multiplier = 1.0
|
679
|
+
@alpha = 1.0
|
680
|
+
@adjust_weight = false # downweight?
|
681
|
+
@apply_log_transf = false
|
682
|
+
|
683
|
+
# different defaults
|
684
|
+
@lambda_ = 1e0
|
685
|
+
@w_user = 10
|
686
|
+
@w_item = 10
|
687
|
+
@finalize_chol = false
|
688
|
+
end
|
689
|
+
|
690
|
+
def dump_ptr(ptr)
|
691
|
+
ptr.to_s(ptr.size) if ptr
|
692
|
+
end
|
693
|
+
|
694
|
+
def load_ptr(str)
|
695
|
+
Fiddle::Pointer[str] if str
|
696
|
+
end
|
697
|
+
|
698
|
+
def marshal_dump
|
699
|
+
obj = {
|
700
|
+
implicit: @implicit
|
701
|
+
}
|
702
|
+
|
703
|
+
# options
|
704
|
+
obj[:factors] = @k
|
705
|
+
obj[:epochs] = @niter
|
706
|
+
obj[:verbose] = @verbose
|
707
|
+
|
708
|
+
# factors
|
709
|
+
obj[:user_map] = @user_map
|
710
|
+
obj[:item_map] = @item_map
|
711
|
+
obj[:rated] = @rated
|
712
|
+
obj[:user_factors] = dump_ptr(@a)
|
713
|
+
obj[:item_factors] = dump_ptr(@b)
|
714
|
+
|
715
|
+
# bias
|
716
|
+
obj[:user_bias] = dump_ptr(@bias_a)
|
717
|
+
obj[:item_bias] = dump_ptr(@bias_b)
|
718
|
+
|
719
|
+
# mean
|
720
|
+
obj[:global_mean] = @global_mean
|
721
|
+
|
722
|
+
# side info
|
723
|
+
obj[:user_info_map] = @user_info_map
|
724
|
+
obj[:item_info_map] = @item_info_map
|
725
|
+
obj[:user_info_factors] = dump_ptr(@c)
|
726
|
+
obj[:item_info_factors] = dump_ptr(@d)
|
727
|
+
|
728
|
+
# implicit features
|
729
|
+
obj[:add_implicit_features] = @add_implicit_features
|
730
|
+
obj[:user_factors_implicit] = dump_ptr(@ai)
|
731
|
+
obj[:item_factors_implicit] = dump_ptr(@bi)
|
732
|
+
|
733
|
+
unless @implicit
|
734
|
+
obj[:min_rating] = @min_rating
|
735
|
+
obj[:max_rating] = @max_rating
|
736
|
+
end
|
737
|
+
|
738
|
+
obj[:user_means] = dump_ptr(@u_colmeans)
|
739
|
+
|
740
|
+
obj
|
741
|
+
end
|
742
|
+
|
743
|
+
def marshal_load(obj)
|
744
|
+
@implicit = obj[:implicit]
|
745
|
+
|
746
|
+
# options
|
747
|
+
set_params(
|
748
|
+
k: obj[:factors],
|
749
|
+
niter: obj[:epochs],
|
750
|
+
verbose: obj[:verbose],
|
751
|
+
user_bias: !obj[:user_bias].nil?,
|
752
|
+
item_bias: !obj[:item_bias].nil?,
|
753
|
+
add_implicit_features: obj[:add_implicit_features]
|
754
|
+
)
|
755
|
+
|
756
|
+
# factors
|
757
|
+
@user_map = obj[:user_map]
|
758
|
+
@item_map = obj[:item_map]
|
759
|
+
@rated = obj[:rated] || {}
|
760
|
+
@a = load_ptr(obj[:user_factors])
|
761
|
+
@b = load_ptr(obj[:item_factors])
|
762
|
+
|
763
|
+
# bias
|
764
|
+
@bias_a = load_ptr(obj[:user_bias])
|
765
|
+
@bias_b = load_ptr(obj[:item_bias])
|
766
|
+
|
767
|
+
# mean
|
768
|
+
@global_mean = obj[:global_mean]
|
769
|
+
|
770
|
+
# side info
|
771
|
+
@user_info_map = obj[:user_info_map]
|
772
|
+
@item_info_map = obj[:item_info_map]
|
773
|
+
@c = load_ptr(obj[:user_info_factors])
|
774
|
+
@d = load_ptr(obj[:item_info_factors])
|
775
|
+
|
776
|
+
# implicit features
|
777
|
+
@add_implicit_features = obj[:add_implicit_features]
|
778
|
+
@ai = load_ptr(obj[:user_factors_implicit])
|
779
|
+
@bi = load_ptr(obj[:item_factors_implicit])
|
780
|
+
|
781
|
+
unless @implicit
|
782
|
+
@min_rating = obj[:min_rating]
|
783
|
+
@max_rating = obj[:max_rating]
|
784
|
+
end
|
785
|
+
|
786
|
+
@u_colmeans = load_ptr(obj[:user_means])
|
787
|
+
|
788
|
+
@m = @user_map.size
|
789
|
+
@n = @item_map.size
|
790
|
+
@m_u = @user_info_map.size
|
791
|
+
@n_i = @item_info_map.size
|
792
|
+
|
793
|
+
set_implicit_vars if @implicit
|
794
|
+
|
795
|
+
@fit = @m > 0
|
796
|
+
end
|
575
797
|
end
|
576
798
|
end
|
data/lib/cmfrec/version.rb
CHANGED
Binary file
|
data/vendor/libcmfrec.dylib
CHANGED
Binary file
|
data/vendor/libcmfrec.so
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmfrec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
|
-
email: andrew@
|
14
|
+
email: andrew@ankane.org
|
15
15
|
executables: []
|
16
16
|
extensions: []
|
17
17
|
extra_rdoc_files: []
|
@@ -20,10 +20,12 @@ files:
|
|
20
20
|
- LICENSE.txt
|
21
21
|
- README.md
|
22
22
|
- lib/cmfrec.rb
|
23
|
+
- lib/cmfrec/data.rb
|
23
24
|
- lib/cmfrec/ffi.rb
|
24
25
|
- lib/cmfrec/recommender.rb
|
25
26
|
- lib/cmfrec/version.rb
|
26
27
|
- vendor/LICENSE.txt
|
28
|
+
- vendor/libcmfrec.arm64.dylib
|
27
29
|
- vendor/libcmfrec.dylib
|
28
30
|
- vendor/libcmfrec.so
|
29
31
|
homepage: https://github.com/ankane/cmfrec
|
@@ -45,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
45
47
|
- !ruby/object:Gem::Version
|
46
48
|
version: '0'
|
47
49
|
requirements: []
|
48
|
-
rubygems_version: 3.
|
50
|
+
rubygems_version: 3.2.22
|
49
51
|
signing_key:
|
50
52
|
specification_version: 4
|
51
53
|
summary: Recommendations for Ruby using collective matrix factorization
|