disco 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 815bc7de802959be7093d9e0478d83a0cf49a522e72a2df928de86223799d83d
4
- data.tar.gz: cbfacf86f1e0507abe4df07b45f20bc3d06d682617c482419a05935186a61c15
3
+ metadata.gz: a4af4d7df56f884618557fd98f97da2686cecbfaf3ce1f1f52b6ba1a3a9155f5
4
+ data.tar.gz: ddbc7551c3534c41284e958042a94d988cdd5c52248f3ca7e4a8d8a72c6b168e
5
5
  SHA512:
6
- metadata.gz: d0f3285b53cb8fe7e7d5ef30a970c632c52112c6b0503b8c81155f6cdb37583f036107b052c37019671355d0838512f904a61aeff5b69b7f6f8a2c1f4fabe785
7
- data.tar.gz: f1b9c5759d77c1f497a0ac09ccf455beda29417024c4cd8ba6c0f8fcbac3347ab233c9e8c558a75382ef3b41495b5b693495ab0533c3a084a416c1f75a38313b
6
+ metadata.gz: 5f40f125fe4096dcf09eaf1d1295f23e68fef9bbe1e5651a1ecfa1ed06748df7a0f4c9ea048b767b429b67b4d0add1f32663388707616571a6936a55ffd1d6b7
7
+ data.tar.gz: baf3caa4deec5422bd85e9372bc717f683d4cbb067f468eb7d4c964556885f7bb273fa0b6339201aef917c8f046346c20356358462fc73103243608a5245ebe6
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.3.1 (2022-07-10)
2
+
3
+ - Added support for JSON serialization
4
+
1
5
  ## 0.3.0 (2022-03-22)
2
6
 
3
7
  - Changed `item_id` to `user_id` for `similar_users`
data/README.md CHANGED
@@ -183,17 +183,19 @@ For Rails < 6, speed up inserts by adding [activerecord-import](https://github.c
183
183
  If you’d prefer to perform recommendations on-the-fly, store the recommender
184
184
 
185
185
  ```ruby
186
- bin = Marshal.dump(recommender)
187
- File.binwrite("recommender.bin", bin)
186
+ json = recommender.to_json
187
+ File.write("recommender.json", json)
188
188
  ```
189
189
 
190
- > You can save it to a file, database, or any other storage system
190
+ > You can save it to a file, database, or any other storage system. Also, user and item IDs should be integers or strings for this.
191
+
192
+ The serialized recommender includes user activity from the training data (to avoid recommending previously rated items), so be sure to protect it.
191
193
 
192
194
  Load a recommender
193
195
 
194
196
  ```ruby
195
- bin = File.binread("recommender.bin")
196
- recommender = Marshal.load(bin)
197
+ json = File.read("recommender.json")
198
+ recommender = Disco::Recommender.load_json(json)
197
199
  ```
198
200
 
199
201
  Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor). See the [examples](https://github.com/ankane/neighbor/tree/master/examples).
@@ -255,6 +255,46 @@ module Disco
255
255
  to_s # for now
256
256
  end
257
257
 
258
+ def to_json
259
+ require "base64"
260
+ require "json"
261
+
262
+ obj = {
263
+ implicit: @implicit,
264
+ user_ids: @user_map.keys,
265
+ item_ids: @item_map.keys,
266
+ rated: @user_map.map { |_, u| (@rated[u] || {}).keys },
267
+ global_mean: @global_mean,
268
+ user_factors: Base64.strict_encode64(@user_factors.to_binary),
269
+ item_factors: Base64.strict_encode64(@item_factors.to_binary),
270
+ factors: @factors,
271
+ epochs: @epochs,
272
+ verbose: @verbose
273
+ }
274
+
275
+ unless @implicit
276
+ obj[:min_rating] = @min_rating
277
+ obj[:max_rating] = @max_rating
278
+ end
279
+
280
+ if @top_items
281
+ obj[:item_count] = @item_count
282
+ obj[:item_sum] = @item_sum
283
+ end
284
+
285
+ JSON.generate(obj)
286
+ end
287
+
288
+ def self.load_json(json)
289
+ require "json"
290
+
291
+ obj = JSON.parse(json)
292
+
293
+ recommender = new
294
+ recommender.send(:json_load, obj)
295
+ recommender
296
+ end
297
+
258
298
  private
259
299
 
260
300
  # factors should already be normalized for similar users/items
@@ -434,5 +474,31 @@ module Disco
434
474
  @item_sum = obj[:item_sum]
435
475
  end
436
476
  end
477
+
478
+ def json_load(obj)
479
+ require "base64"
480
+
481
+ @implicit = obj["implicit"]
482
+ @user_map = obj["user_ids"].map.with_index.to_h
483
+ @item_map = obj["item_ids"].map.with_index.to_h
484
+ @rated = obj["rated"].map.with_index.to_h { |r, i| [i, r.to_h { |v| [v, true] }] }
485
+ @global_mean = obj["global_mean"].to_f
486
+ @factors = obj["factors"].to_i
487
+ @user_factors = Numo::SFloat.from_binary(Base64.strict_decode64(obj["user_factors"]), [@user_map.size, @factors])
488
+ @item_factors = Numo::SFloat.from_binary(Base64.strict_decode64(obj["item_factors"]), [@item_map.size, @factors])
489
+ @epochs = obj["epochs"].to_i
490
+ @verbose = obj["verbose"]
491
+
492
+ unless @implicit
493
+ @min_rating = obj["min_rating"]
494
+ @max_rating = obj["max_rating"]
495
+ end
496
+
497
+ @top_items = obj.key?("item_count")
498
+ if @top_items
499
+ @item_count = obj["item_count"]
500
+ @item_sum = obj["item_sum"]
501
+ end
502
+ end
437
503
  end
438
504
  end
data/lib/disco/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Disco
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: disco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-03-22 00:00:00.000000000 Z
11
+ date: 2022-07-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: libmf