cmfrec 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +7 -7
- data/lib/cmfrec/recommender.rb +127 -0
- data/lib/cmfrec/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3bd946bc2c7425ba3550a9cd5bf346b0cac6de5597d0d38ff0dfb49db32e754d
|
4
|
+
data.tar.gz: 20038dc0c401389d75dc3a35415919f6721dab7e32459eeb34c9dcb23569c49a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1d0019e89fe0ca946cd83d60c052bff2e3dcc3990e3bf37de92b620870b6bf8a31010aec87bb2e5a352c446441b683f0148da83497b5d44ef89d62396f4b7d3c
|
7
|
+
data.tar.gz: f4d65f294b9a313c2c86111eaf66e734999924392fedc4715c51919a4ca4de3864ba3eec732f3d707dd06ea62ddb931d9b28b8436a09a807c01e14ac2c542e44
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -82,11 +82,11 @@ Add side information about users, items, or both
|
|
82
82
|
```ruby
|
83
83
|
user_info = [
|
84
84
|
{user_id: 1, cats: 1, dogs: 0},
|
85
|
-
{user_id: 2, cats: 2, dogs: 1}
|
85
|
+
{user_id: 2, cats: 2, dogs: 1}
|
86
86
|
]
|
87
87
|
item_info = [
|
88
88
|
{item_id: 1, genre_comedy: 1, genre_drama: 0},
|
89
|
-
{item_id: 2, genre_comedy: 0, genre_drama: 1}
|
89
|
+
{item_id: 2, genre_comedy: 0, genre_drama: 1}
|
90
90
|
]
|
91
91
|
recommender.fit(ratings, user_info: user_info, item_info: item_info)
|
92
92
|
```
|
@@ -213,17 +213,17 @@ Rover.read_csv("ratings.csv")
|
|
213
213
|
Store the recommender
|
214
214
|
|
215
215
|
```ruby
|
216
|
-
|
217
|
-
File.
|
216
|
+
json = recommender.to_json
|
217
|
+
File.write("recommender.json", json)
|
218
218
|
```
|
219
219
|
|
220
|
-
|
220
|
+
The serialized recommender includes user activity from the training data (to avoid recommending previously rated items), so be sure to protect it. You can save it to a file, database, or any other storage system, or use a tool like [Trove](https://github.com/ankane/trove). Also, user and item IDs should be integers or strings for this.
|
221
221
|
|
222
222
|
Load a recommender
|
223
223
|
|
224
224
|
```ruby
|
225
|
-
|
226
|
-
recommender =
|
225
|
+
json = File.read("recommender.json")
|
226
|
+
recommender = Cmfrec::Recommender.load_json(json)
|
227
227
|
```
|
228
228
|
|
229
229
|
Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor). See the [examples](https://github.com/ankane/neighbor/tree/master/examples) for Disco, which has a similar API. For explicit feedback, you should [disable the bias](#explicit-feedback) with this approach.
|
data/lib/cmfrec/recommender.rb
CHANGED
@@ -249,6 +249,68 @@ module Cmfrec
|
|
249
249
|
similar(user_id, @user_map, user_factors, count, user_index)
|
250
250
|
end
|
251
251
|
|
252
|
+
def to_json
|
253
|
+
require "base64"
|
254
|
+
require "json"
|
255
|
+
|
256
|
+
obj = {
|
257
|
+
implicit: @implicit
|
258
|
+
}
|
259
|
+
|
260
|
+
# options
|
261
|
+
obj[:factors] = @k
|
262
|
+
obj[:epochs] = @niter
|
263
|
+
obj[:verbose] = @verbose
|
264
|
+
|
265
|
+
# factors
|
266
|
+
obj[:user_ids] = @user_map.keys
|
267
|
+
obj[:item_ids] = @item_map.keys
|
268
|
+
obj[:rated] = @user_map.map { |_, u| (@rated[u] || {}).keys }
|
269
|
+
obj[:user_factors] = json_dump_ptr(@a)
|
270
|
+
obj[:item_factors] = json_dump_ptr(@b)
|
271
|
+
|
272
|
+
# bias
|
273
|
+
obj[:user_bias] = json_dump_ptr(@bias_a)
|
274
|
+
obj[:item_bias] = json_dump_ptr(@bias_b)
|
275
|
+
|
276
|
+
# mean
|
277
|
+
obj[:global_mean] = @global_mean
|
278
|
+
|
279
|
+
unless (@user_info_map.keys + @item_info_map.keys).all? { |v| v.is_a?(Symbol) }
|
280
|
+
raise "Side info keys must be symbols to save"
|
281
|
+
end
|
282
|
+
|
283
|
+
# side info
|
284
|
+
obj[:user_info_ids] = @user_info_map.keys
|
285
|
+
obj[:item_info_ids] = @item_info_map.keys
|
286
|
+
obj[:user_info_factors] = json_dump_ptr(@c)
|
287
|
+
obj[:item_info_factors] = json_dump_ptr(@d)
|
288
|
+
|
289
|
+
# implicit features
|
290
|
+
obj[:add_implicit_features] = @add_implicit_features
|
291
|
+
obj[:user_factors_implicit] = json_dump_ptr(@ai)
|
292
|
+
obj[:item_factors_implicit] = json_dump_ptr(@bi)
|
293
|
+
|
294
|
+
unless @implicit
|
295
|
+
obj[:min_rating] = @min_rating
|
296
|
+
obj[:max_rating] = @max_rating
|
297
|
+
end
|
298
|
+
|
299
|
+
obj[:user_means] = json_dump_ptr(@u_colmeans)
|
300
|
+
|
301
|
+
JSON.generate(obj)
|
302
|
+
end
|
303
|
+
|
304
|
+
def self.load_json(json)
|
305
|
+
require "json"
|
306
|
+
|
307
|
+
obj = JSON.parse(json)
|
308
|
+
|
309
|
+
recommender = new
|
310
|
+
recommender.send(:json_load, obj)
|
311
|
+
recommender
|
312
|
+
end
|
313
|
+
|
252
314
|
private
|
253
315
|
|
254
316
|
def user_index
|
@@ -856,5 +918,70 @@ module Cmfrec
|
|
856
918
|
|
857
919
|
@fit = @m > 0
|
858
920
|
end
|
921
|
+
|
922
|
+
def json_dump_ptr(ptr)
|
923
|
+
Base64.strict_encode64(ptr.to_s(ptr.size)) if ptr
|
924
|
+
end
|
925
|
+
|
926
|
+
def json_load_ptr(str)
|
927
|
+
Fiddle::Pointer[Base64.strict_decode64(str)] if str
|
928
|
+
end
|
929
|
+
|
930
|
+
def json_load(obj)
|
931
|
+
require "base64"
|
932
|
+
|
933
|
+
@implicit = obj["implicit"]
|
934
|
+
|
935
|
+
# options
|
936
|
+
set_params(
|
937
|
+
k: obj["factors"],
|
938
|
+
niter: obj["epochs"],
|
939
|
+
verbose: obj["verbose"],
|
940
|
+
user_bias: !obj["user_bias"].nil?,
|
941
|
+
item_bias: !obj["item_bias"].nil?,
|
942
|
+
add_implicit_features: obj["add_implicit_features"]
|
943
|
+
)
|
944
|
+
|
945
|
+
# factors
|
946
|
+
@user_map = obj["user_ids"].map.with_index.to_h
|
947
|
+
@item_map = obj["item_ids"].map.with_index.to_h
|
948
|
+
@rated = obj["rated"].map.with_index.to_h { |r, i| [i, r.to_h { |v| [v, true] }] }
|
949
|
+
@a = json_load_ptr(obj["user_factors"])
|
950
|
+
@b = json_load_ptr(obj["item_factors"])
|
951
|
+
|
952
|
+
# bias
|
953
|
+
@bias_a = json_load_ptr(obj["user_bias"])
|
954
|
+
@bias_b = json_load_ptr(obj["item_bias"])
|
955
|
+
|
956
|
+
# mean
|
957
|
+
@global_mean = obj["global_mean"]
|
958
|
+
|
959
|
+
# side info
|
960
|
+
@user_info_map = obj["user_info_ids"].map(&:to_sym).map.with_index.to_h
|
961
|
+
@item_info_map = obj["item_info_ids"].map(&:to_sym).map.with_index.to_h
|
962
|
+
@c = json_load_ptr(obj["user_info_factors"])
|
963
|
+
@d = json_load_ptr(obj["item_info_factors"])
|
964
|
+
|
965
|
+
# implicit features
|
966
|
+
@add_implicit_features = obj["add_implicit_features"]
|
967
|
+
@ai = json_load_ptr(obj["user_factors_implicit"])
|
968
|
+
@bi = json_load_ptr(obj["item_factors_implicit"])
|
969
|
+
|
970
|
+
unless @implicit
|
971
|
+
@min_rating = obj["min_rating"]
|
972
|
+
@max_rating = obj["max_rating"]
|
973
|
+
end
|
974
|
+
|
975
|
+
@u_colmeans = json_load_ptr(obj["user_means"])
|
976
|
+
|
977
|
+
@m = @user_map.size
|
978
|
+
@n = @item_map.size
|
979
|
+
@m_u = @user_info_map.size
|
980
|
+
@n_i = @item_info_map.size
|
981
|
+
|
982
|
+
set_implicit_vars if @implicit
|
983
|
+
|
984
|
+
@fit = @m > 0
|
985
|
+
end
|
859
986
|
end
|
860
987
|
end
|
data/lib/cmfrec/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmfrec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|