cmfrec 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e6dbbc801e415a4f505ffc436be23ccf066d144da072669e782b88c02e14b0f8
4
- data.tar.gz: 3851230f0a4dc4be9fbc24fe81681de0758bdbb583803780f8e07b10741f4bd1
3
+ metadata.gz: 3bd946bc2c7425ba3550a9cd5bf346b0cac6de5597d0d38ff0dfb49db32e754d
4
+ data.tar.gz: 20038dc0c401389d75dc3a35415919f6721dab7e32459eeb34c9dcb23569c49a
5
5
  SHA512:
6
- metadata.gz: 103c09a7c0e13fca3cb81dc68c667e776fc965a485cf358e0fc8f350a97474b54bdfc0910f4472051020dbe34b8c097908c1024d5fc036ad77d0444372885109
7
- data.tar.gz: b107b36333f714106d981168f24fda48a2a211f288a2dbe01f570adb607b7d6a5215c18df4c67d159bb367d652f3d10faeb4dbc66688eaf117c5b7a1432ee951
6
+ metadata.gz: 1d0019e89fe0ca946cd83d60c052bff2e3dcc3990e3bf37de92b620870b6bf8a31010aec87bb2e5a352c446441b683f0148da83497b5d44ef89d62396f4b7d3c
7
+ data.tar.gz: f4d65f294b9a313c2c86111eaf66e734999924392fedc4715c51919a4ca4de3864ba3eec732f3d707dd06ea62ddb931d9b28b8436a09a807c01e14ac2c542e44
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.1 (2022-07-11)
2
+
3
+ - Added support for JSON serialization
4
+
1
5
  ## 0.2.0 (2022-06-14)
2
6
 
3
7
  - Updated cmfrec to 3.4.2
data/README.md CHANGED
@@ -82,11 +82,11 @@ Add side information about users, items, or both
82
82
  ```ruby
83
83
  user_info = [
84
84
  {user_id: 1, cats: 1, dogs: 0},
85
- {user_id: 2, cats: 2, dogs: 1},
85
+ {user_id: 2, cats: 2, dogs: 1}
86
86
  ]
87
87
  item_info = [
88
88
  {item_id: 1, genre_comedy: 1, genre_drama: 0},
89
- {item_id: 2, genre_comedy: 0, genre_drama: 1},
89
+ {item_id: 2, genre_comedy: 0, genre_drama: 1}
90
90
  ]
91
91
  recommender.fit(ratings, user_info: user_info, item_info: item_info)
92
92
  ```
@@ -213,17 +213,17 @@ Rover.read_csv("ratings.csv")
213
213
  Store the recommender
214
214
 
215
215
  ```ruby
216
- bin = Marshal.dump(recommender)
217
- File.binwrite("recommender.bin", bin)
216
+ json = recommender.to_json
217
+ File.write("recommender.json", json)
218
218
  ```
219
219
 
220
- > You can save it to a file, database, or any other storage system
220
+ The serialized recommender includes user activity from the training data (to avoid recommending previously rated items), so be sure to protect it. You can save it to a file, database, or any other storage system, or use a tool like [Trove](https://github.com/ankane/trove). Also, user and item IDs should be integers or strings for this.
221
221
 
222
222
  Load a recommender
223
223
 
224
224
  ```ruby
225
- bin = File.binread("recommender.bin")
226
- recommender = Marshal.load(bin)
225
+ json = File.read("recommender.json")
226
+ recommender = Cmfrec::Recommender.load_json(json)
227
227
  ```
228
228
 
229
229
  Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor). See the [examples](https://github.com/ankane/neighbor/tree/master/examples) for Disco, which has a similar API. For explicit feedback, you should [disable the bias](#explicit-feedback) with this approach.
@@ -249,6 +249,68 @@ module Cmfrec
249
249
  similar(user_id, @user_map, user_factors, count, user_index)
250
250
  end
251
251
 
252
+ def to_json
253
+ require "base64"
254
+ require "json"
255
+
256
+ obj = {
257
+ implicit: @implicit
258
+ }
259
+
260
+ # options
261
+ obj[:factors] = @k
262
+ obj[:epochs] = @niter
263
+ obj[:verbose] = @verbose
264
+
265
+ # factors
266
+ obj[:user_ids] = @user_map.keys
267
+ obj[:item_ids] = @item_map.keys
268
+ obj[:rated] = @user_map.map { |_, u| (@rated[u] || {}).keys }
269
+ obj[:user_factors] = json_dump_ptr(@a)
270
+ obj[:item_factors] = json_dump_ptr(@b)
271
+
272
+ # bias
273
+ obj[:user_bias] = json_dump_ptr(@bias_a)
274
+ obj[:item_bias] = json_dump_ptr(@bias_b)
275
+
276
+ # mean
277
+ obj[:global_mean] = @global_mean
278
+
279
+ unless (@user_info_map.keys + @item_info_map.keys).all? { |v| v.is_a?(Symbol) }
280
+ raise "Side info keys must be symbols to save"
281
+ end
282
+
283
+ # side info
284
+ obj[:user_info_ids] = @user_info_map.keys
285
+ obj[:item_info_ids] = @item_info_map.keys
286
+ obj[:user_info_factors] = json_dump_ptr(@c)
287
+ obj[:item_info_factors] = json_dump_ptr(@d)
288
+
289
+ # implicit features
290
+ obj[:add_implicit_features] = @add_implicit_features
291
+ obj[:user_factors_implicit] = json_dump_ptr(@ai)
292
+ obj[:item_factors_implicit] = json_dump_ptr(@bi)
293
+
294
+ unless @implicit
295
+ obj[:min_rating] = @min_rating
296
+ obj[:max_rating] = @max_rating
297
+ end
298
+
299
+ obj[:user_means] = json_dump_ptr(@u_colmeans)
300
+
301
+ JSON.generate(obj)
302
+ end
303
+
304
+ def self.load_json(json)
305
+ require "json"
306
+
307
+ obj = JSON.parse(json)
308
+
309
+ recommender = new
310
+ recommender.send(:json_load, obj)
311
+ recommender
312
+ end
313
+
252
314
  private
253
315
 
254
316
  def user_index
@@ -856,5 +918,70 @@ module Cmfrec
856
918
 
857
919
  @fit = @m > 0
858
920
  end
921
+
922
+ def json_dump_ptr(ptr)
923
+ Base64.strict_encode64(ptr.to_s(ptr.size)) if ptr
924
+ end
925
+
926
+ def json_load_ptr(str)
927
+ Fiddle::Pointer[Base64.strict_decode64(str)] if str
928
+ end
929
+
930
+ def json_load(obj)
931
+ require "base64"
932
+
933
+ @implicit = obj["implicit"]
934
+
935
+ # options
936
+ set_params(
937
+ k: obj["factors"],
938
+ niter: obj["epochs"],
939
+ verbose: obj["verbose"],
940
+ user_bias: !obj["user_bias"].nil?,
941
+ item_bias: !obj["item_bias"].nil?,
942
+ add_implicit_features: obj["add_implicit_features"]
943
+ )
944
+
945
+ # factors
946
+ @user_map = obj["user_ids"].map.with_index.to_h
947
+ @item_map = obj["item_ids"].map.with_index.to_h
948
+ @rated = obj["rated"].map.with_index.to_h { |r, i| [i, r.to_h { |v| [v, true] }] }
949
+ @a = json_load_ptr(obj["user_factors"])
950
+ @b = json_load_ptr(obj["item_factors"])
951
+
952
+ # bias
953
+ @bias_a = json_load_ptr(obj["user_bias"])
954
+ @bias_b = json_load_ptr(obj["item_bias"])
955
+
956
+ # mean
957
+ @global_mean = obj["global_mean"]
958
+
959
+ # side info
960
+ @user_info_map = obj["user_info_ids"].map(&:to_sym).map.with_index.to_h
961
+ @item_info_map = obj["item_info_ids"].map(&:to_sym).map.with_index.to_h
962
+ @c = json_load_ptr(obj["user_info_factors"])
963
+ @d = json_load_ptr(obj["item_info_factors"])
964
+
965
+ # implicit features
966
+ @add_implicit_features = obj["add_implicit_features"]
967
+ @ai = json_load_ptr(obj["user_factors_implicit"])
968
+ @bi = json_load_ptr(obj["item_factors_implicit"])
969
+
970
+ unless @implicit
971
+ @min_rating = obj["min_rating"]
972
+ @max_rating = obj["max_rating"]
973
+ end
974
+
975
+ @u_colmeans = json_load_ptr(obj["user_means"])
976
+
977
+ @m = @user_map.size
978
+ @n = @item_map.size
979
+ @m_u = @user_info_map.size
980
+ @n_i = @item_info_map.size
981
+
982
+ set_implicit_vars if @implicit
983
+
984
+ @fit = @m > 0
985
+ end
859
986
  end
860
987
  end
@@ -1,3 +1,3 @@
1
1
  module Cmfrec
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmfrec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-15 00:00:00.000000000 Z
11
+ date: 2022-07-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org