cmfrec 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e6dbbc801e415a4f505ffc436be23ccf066d144da072669e782b88c02e14b0f8
4
- data.tar.gz: 3851230f0a4dc4be9fbc24fe81681de0758bdbb583803780f8e07b10741f4bd1
3
+ metadata.gz: 3bd946bc2c7425ba3550a9cd5bf346b0cac6de5597d0d38ff0dfb49db32e754d
4
+ data.tar.gz: 20038dc0c401389d75dc3a35415919f6721dab7e32459eeb34c9dcb23569c49a
5
5
  SHA512:
6
- metadata.gz: 103c09a7c0e13fca3cb81dc68c667e776fc965a485cf358e0fc8f350a97474b54bdfc0910f4472051020dbe34b8c097908c1024d5fc036ad77d0444372885109
7
- data.tar.gz: b107b36333f714106d981168f24fda48a2a211f288a2dbe01f570adb607b7d6a5215c18df4c67d159bb367d652f3d10faeb4dbc66688eaf117c5b7a1432ee951
6
+ metadata.gz: 1d0019e89fe0ca946cd83d60c052bff2e3dcc3990e3bf37de92b620870b6bf8a31010aec87bb2e5a352c446441b683f0148da83497b5d44ef89d62396f4b7d3c
7
+ data.tar.gz: f4d65f294b9a313c2c86111eaf66e734999924392fedc4715c51919a4ca4de3864ba3eec732f3d707dd06ea62ddb931d9b28b8436a09a807c01e14ac2c542e44
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.1 (2022-07-11)
2
+
3
+ - Added support for JSON serialization
4
+
1
5
  ## 0.2.0 (2022-06-14)
2
6
 
3
7
  - Updated cmfrec to 3.4.2
data/README.md CHANGED
@@ -82,11 +82,11 @@ Add side information about users, items, or both
82
82
  ```ruby
83
83
  user_info = [
84
84
  {user_id: 1, cats: 1, dogs: 0},
85
- {user_id: 2, cats: 2, dogs: 1},
85
+ {user_id: 2, cats: 2, dogs: 1}
86
86
  ]
87
87
  item_info = [
88
88
  {item_id: 1, genre_comedy: 1, genre_drama: 0},
89
- {item_id: 2, genre_comedy: 0, genre_drama: 1},
89
+ {item_id: 2, genre_comedy: 0, genre_drama: 1}
90
90
  ]
91
91
  recommender.fit(ratings, user_info: user_info, item_info: item_info)
92
92
  ```
@@ -213,17 +213,17 @@ Rover.read_csv("ratings.csv")
213
213
  Store the recommender
214
214
 
215
215
  ```ruby
216
- bin = Marshal.dump(recommender)
217
- File.binwrite("recommender.bin", bin)
216
+ json = recommender.to_json
217
+ File.write("recommender.json", json)
218
218
  ```
219
219
 
220
- > You can save it to a file, database, or any other storage system
220
+ The serialized recommender includes user activity from the training data (to avoid recommending previously rated items), so be sure to protect it. You can save it to a file, database, or any other storage system, or use a tool like [Trove](https://github.com/ankane/trove). Also, user and item IDs should be integers or strings for this.
221
221
 
222
222
  Load a recommender
223
223
 
224
224
  ```ruby
225
- bin = File.binread("recommender.bin")
226
- recommender = Marshal.load(bin)
225
+ json = File.read("recommender.json")
226
+ recommender = Cmfrec::Recommender.load_json(json)
227
227
  ```
228
228
 
229
229
  Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor). See the [examples](https://github.com/ankane/neighbor/tree/master/examples) for Disco, which has a similar API. For explicit feedback, you should [disable the bias](#explicit-feedback) with this approach.
@@ -249,6 +249,68 @@ module Cmfrec
249
249
  similar(user_id, @user_map, user_factors, count, user_index)
250
250
  end
251
251
 
252
+ def to_json
253
+ require "base64"
254
+ require "json"
255
+
256
+ obj = {
257
+ implicit: @implicit
258
+ }
259
+
260
+ # options
261
+ obj[:factors] = @k
262
+ obj[:epochs] = @niter
263
+ obj[:verbose] = @verbose
264
+
265
+ # factors
266
+ obj[:user_ids] = @user_map.keys
267
+ obj[:item_ids] = @item_map.keys
268
+ obj[:rated] = @user_map.map { |_, u| (@rated[u] || {}).keys }
269
+ obj[:user_factors] = json_dump_ptr(@a)
270
+ obj[:item_factors] = json_dump_ptr(@b)
271
+
272
+ # bias
273
+ obj[:user_bias] = json_dump_ptr(@bias_a)
274
+ obj[:item_bias] = json_dump_ptr(@bias_b)
275
+
276
+ # mean
277
+ obj[:global_mean] = @global_mean
278
+
279
+ unless (@user_info_map.keys + @item_info_map.keys).all? { |v| v.is_a?(Symbol) }
280
+ raise "Side info keys must be symbols to save"
281
+ end
282
+
283
+ # side info
284
+ obj[:user_info_ids] = @user_info_map.keys
285
+ obj[:item_info_ids] = @item_info_map.keys
286
+ obj[:user_info_factors] = json_dump_ptr(@c)
287
+ obj[:item_info_factors] = json_dump_ptr(@d)
288
+
289
+ # implicit features
290
+ obj[:add_implicit_features] = @add_implicit_features
291
+ obj[:user_factors_implicit] = json_dump_ptr(@ai)
292
+ obj[:item_factors_implicit] = json_dump_ptr(@bi)
293
+
294
+ unless @implicit
295
+ obj[:min_rating] = @min_rating
296
+ obj[:max_rating] = @max_rating
297
+ end
298
+
299
+ obj[:user_means] = json_dump_ptr(@u_colmeans)
300
+
301
+ JSON.generate(obj)
302
+ end
303
+
304
+ def self.load_json(json)
305
+ require "json"
306
+
307
+ obj = JSON.parse(json)
308
+
309
+ recommender = new
310
+ recommender.send(:json_load, obj)
311
+ recommender
312
+ end
313
+
252
314
  private
253
315
 
254
316
  def user_index
@@ -856,5 +918,70 @@ module Cmfrec
856
918
 
857
919
  @fit = @m > 0
858
920
  end
921
+
922
+ def json_dump_ptr(ptr)
923
+ Base64.strict_encode64(ptr.to_s(ptr.size)) if ptr
924
+ end
925
+
926
+ def json_load_ptr(str)
927
+ Fiddle::Pointer[Base64.strict_decode64(str)] if str
928
+ end
929
+
930
+ def json_load(obj)
931
+ require "base64"
932
+
933
+ @implicit = obj["implicit"]
934
+
935
+ # options
936
+ set_params(
937
+ k: obj["factors"],
938
+ niter: obj["epochs"],
939
+ verbose: obj["verbose"],
940
+ user_bias: !obj["user_bias"].nil?,
941
+ item_bias: !obj["item_bias"].nil?,
942
+ add_implicit_features: obj["add_implicit_features"]
943
+ )
944
+
945
+ # factors
946
+ @user_map = obj["user_ids"].map.with_index.to_h
947
+ @item_map = obj["item_ids"].map.with_index.to_h
948
+ @rated = obj["rated"].map.with_index.to_h { |r, i| [i, r.to_h { |v| [v, true] }] }
949
+ @a = json_load_ptr(obj["user_factors"])
950
+ @b = json_load_ptr(obj["item_factors"])
951
+
952
+ # bias
953
+ @bias_a = json_load_ptr(obj["user_bias"])
954
+ @bias_b = json_load_ptr(obj["item_bias"])
955
+
956
+ # mean
957
+ @global_mean = obj["global_mean"]
958
+
959
+ # side info
960
+ @user_info_map = obj["user_info_ids"].map(&:to_sym).map.with_index.to_h
961
+ @item_info_map = obj["item_info_ids"].map(&:to_sym).map.with_index.to_h
962
+ @c = json_load_ptr(obj["user_info_factors"])
963
+ @d = json_load_ptr(obj["item_info_factors"])
964
+
965
+ # implicit features
966
+ @add_implicit_features = obj["add_implicit_features"]
967
+ @ai = json_load_ptr(obj["user_factors_implicit"])
968
+ @bi = json_load_ptr(obj["item_factors_implicit"])
969
+
970
+ unless @implicit
971
+ @min_rating = obj["min_rating"]
972
+ @max_rating = obj["max_rating"]
973
+ end
974
+
975
+ @u_colmeans = json_load_ptr(obj["user_means"])
976
+
977
+ @m = @user_map.size
978
+ @n = @item_map.size
979
+ @m_u = @user_info_map.size
980
+ @n_i = @item_info_map.size
981
+
982
+ set_implicit_vars if @implicit
983
+
984
+ @fit = @m > 0
985
+ end
859
986
  end
860
987
  end
@@ -1,3 +1,3 @@
1
1
  module Cmfrec
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmfrec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-15 00:00:00.000000000 Z
11
+ date: 2022-07-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org