cmfrec 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +7 -7
- data/lib/cmfrec/recommender.rb +127 -0
- data/lib/cmfrec/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3bd946bc2c7425ba3550a9cd5bf346b0cac6de5597d0d38ff0dfb49db32e754d
|
4
|
+
data.tar.gz: 20038dc0c401389d75dc3a35415919f6721dab7e32459eeb34c9dcb23569c49a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1d0019e89fe0ca946cd83d60c052bff2e3dcc3990e3bf37de92b620870b6bf8a31010aec87bb2e5a352c446441b683f0148da83497b5d44ef89d62396f4b7d3c
|
7
|
+
data.tar.gz: f4d65f294b9a313c2c86111eaf66e734999924392fedc4715c51919a4ca4de3864ba3eec732f3d707dd06ea62ddb931d9b28b8436a09a807c01e14ac2c542e44
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -82,11 +82,11 @@ Add side information about users, items, or both
|
|
82
82
|
```ruby
|
83
83
|
user_info = [
|
84
84
|
{user_id: 1, cats: 1, dogs: 0},
|
85
|
-
{user_id: 2, cats: 2, dogs: 1}
|
85
|
+
{user_id: 2, cats: 2, dogs: 1}
|
86
86
|
]
|
87
87
|
item_info = [
|
88
88
|
{item_id: 1, genre_comedy: 1, genre_drama: 0},
|
89
|
-
{item_id: 2, genre_comedy: 0, genre_drama: 1}
|
89
|
+
{item_id: 2, genre_comedy: 0, genre_drama: 1}
|
90
90
|
]
|
91
91
|
recommender.fit(ratings, user_info: user_info, item_info: item_info)
|
92
92
|
```
|
@@ -213,17 +213,17 @@ Rover.read_csv("ratings.csv")
|
|
213
213
|
Store the recommender
|
214
214
|
|
215
215
|
```ruby
|
216
|
-
|
217
|
-
File.
|
216
|
+
json = recommender.to_json
|
217
|
+
File.write("recommender.json", json)
|
218
218
|
```
|
219
219
|
|
220
|
-
|
220
|
+
The serialized recommender includes user activity from the training data (to avoid recommending previously rated items), so be sure to protect it. You can save it to a file, database, or any other storage system, or use a tool like [Trove](https://github.com/ankane/trove). Also, user and item IDs should be integers or strings for this.
|
221
221
|
|
222
222
|
Load a recommender
|
223
223
|
|
224
224
|
```ruby
|
225
|
-
|
226
|
-
recommender =
|
225
|
+
json = File.read("recommender.json")
|
226
|
+
recommender = Cmfrec::Recommender.load_json(json)
|
227
227
|
```
|
228
228
|
|
229
229
|
Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor). See the [examples](https://github.com/ankane/neighbor/tree/master/examples) for Disco, which has a similar API. For explicit feedback, you should [disable the bias](#explicit-feedback) with this approach.
|
data/lib/cmfrec/recommender.rb
CHANGED
@@ -249,6 +249,68 @@ module Cmfrec
|
|
249
249
|
similar(user_id, @user_map, user_factors, count, user_index)
|
250
250
|
end
|
251
251
|
|
252
|
+
def to_json
|
253
|
+
require "base64"
|
254
|
+
require "json"
|
255
|
+
|
256
|
+
obj = {
|
257
|
+
implicit: @implicit
|
258
|
+
}
|
259
|
+
|
260
|
+
# options
|
261
|
+
obj[:factors] = @k
|
262
|
+
obj[:epochs] = @niter
|
263
|
+
obj[:verbose] = @verbose
|
264
|
+
|
265
|
+
# factors
|
266
|
+
obj[:user_ids] = @user_map.keys
|
267
|
+
obj[:item_ids] = @item_map.keys
|
268
|
+
obj[:rated] = @user_map.map { |_, u| (@rated[u] || {}).keys }
|
269
|
+
obj[:user_factors] = json_dump_ptr(@a)
|
270
|
+
obj[:item_factors] = json_dump_ptr(@b)
|
271
|
+
|
272
|
+
# bias
|
273
|
+
obj[:user_bias] = json_dump_ptr(@bias_a)
|
274
|
+
obj[:item_bias] = json_dump_ptr(@bias_b)
|
275
|
+
|
276
|
+
# mean
|
277
|
+
obj[:global_mean] = @global_mean
|
278
|
+
|
279
|
+
unless (@user_info_map.keys + @item_info_map.keys).all? { |v| v.is_a?(Symbol) }
|
280
|
+
raise "Side info keys must be symbols to save"
|
281
|
+
end
|
282
|
+
|
283
|
+
# side info
|
284
|
+
obj[:user_info_ids] = @user_info_map.keys
|
285
|
+
obj[:item_info_ids] = @item_info_map.keys
|
286
|
+
obj[:user_info_factors] = json_dump_ptr(@c)
|
287
|
+
obj[:item_info_factors] = json_dump_ptr(@d)
|
288
|
+
|
289
|
+
# implicit features
|
290
|
+
obj[:add_implicit_features] = @add_implicit_features
|
291
|
+
obj[:user_factors_implicit] = json_dump_ptr(@ai)
|
292
|
+
obj[:item_factors_implicit] = json_dump_ptr(@bi)
|
293
|
+
|
294
|
+
unless @implicit
|
295
|
+
obj[:min_rating] = @min_rating
|
296
|
+
obj[:max_rating] = @max_rating
|
297
|
+
end
|
298
|
+
|
299
|
+
obj[:user_means] = json_dump_ptr(@u_colmeans)
|
300
|
+
|
301
|
+
JSON.generate(obj)
|
302
|
+
end
|
303
|
+
|
304
|
+
def self.load_json(json)
|
305
|
+
require "json"
|
306
|
+
|
307
|
+
obj = JSON.parse(json)
|
308
|
+
|
309
|
+
recommender = new
|
310
|
+
recommender.send(:json_load, obj)
|
311
|
+
recommender
|
312
|
+
end
|
313
|
+
|
252
314
|
private
|
253
315
|
|
254
316
|
def user_index
|
@@ -856,5 +918,70 @@ module Cmfrec
|
|
856
918
|
|
857
919
|
@fit = @m > 0
|
858
920
|
end
|
921
|
+
|
922
|
+
def json_dump_ptr(ptr)
|
923
|
+
Base64.strict_encode64(ptr.to_s(ptr.size)) if ptr
|
924
|
+
end
|
925
|
+
|
926
|
+
def json_load_ptr(str)
|
927
|
+
Fiddle::Pointer[Base64.strict_decode64(str)] if str
|
928
|
+
end
|
929
|
+
|
930
|
+
def json_load(obj)
|
931
|
+
require "base64"
|
932
|
+
|
933
|
+
@implicit = obj["implicit"]
|
934
|
+
|
935
|
+
# options
|
936
|
+
set_params(
|
937
|
+
k: obj["factors"],
|
938
|
+
niter: obj["epochs"],
|
939
|
+
verbose: obj["verbose"],
|
940
|
+
user_bias: !obj["user_bias"].nil?,
|
941
|
+
item_bias: !obj["item_bias"].nil?,
|
942
|
+
add_implicit_features: obj["add_implicit_features"]
|
943
|
+
)
|
944
|
+
|
945
|
+
# factors
|
946
|
+
@user_map = obj["user_ids"].map.with_index.to_h
|
947
|
+
@item_map = obj["item_ids"].map.with_index.to_h
|
948
|
+
@rated = obj["rated"].map.with_index.to_h { |r, i| [i, r.to_h { |v| [v, true] }] }
|
949
|
+
@a = json_load_ptr(obj["user_factors"])
|
950
|
+
@b = json_load_ptr(obj["item_factors"])
|
951
|
+
|
952
|
+
# bias
|
953
|
+
@bias_a = json_load_ptr(obj["user_bias"])
|
954
|
+
@bias_b = json_load_ptr(obj["item_bias"])
|
955
|
+
|
956
|
+
# mean
|
957
|
+
@global_mean = obj["global_mean"]
|
958
|
+
|
959
|
+
# side info
|
960
|
+
@user_info_map = obj["user_info_ids"].map(&:to_sym).map.with_index.to_h
|
961
|
+
@item_info_map = obj["item_info_ids"].map(&:to_sym).map.with_index.to_h
|
962
|
+
@c = json_load_ptr(obj["user_info_factors"])
|
963
|
+
@d = json_load_ptr(obj["item_info_factors"])
|
964
|
+
|
965
|
+
# implicit features
|
966
|
+
@add_implicit_features = obj["add_implicit_features"]
|
967
|
+
@ai = json_load_ptr(obj["user_factors_implicit"])
|
968
|
+
@bi = json_load_ptr(obj["item_factors_implicit"])
|
969
|
+
|
970
|
+
unless @implicit
|
971
|
+
@min_rating = obj["min_rating"]
|
972
|
+
@max_rating = obj["max_rating"]
|
973
|
+
end
|
974
|
+
|
975
|
+
@u_colmeans = json_load_ptr(obj["user_means"])
|
976
|
+
|
977
|
+
@m = @user_map.size
|
978
|
+
@n = @item_map.size
|
979
|
+
@m_u = @user_info_map.size
|
980
|
+
@n_i = @item_info_map.size
|
981
|
+
|
982
|
+
set_implicit_vars if @implicit
|
983
|
+
|
984
|
+
@fit = @m > 0
|
985
|
+
end
|
859
986
|
end
|
860
987
|
end
|
data/lib/cmfrec/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmfrec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|