disco 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 815bc7de802959be7093d9e0478d83a0cf49a522e72a2df928de86223799d83d
4
- data.tar.gz: cbfacf86f1e0507abe4df07b45f20bc3d06d682617c482419a05935186a61c15
3
+ metadata.gz: a4af4d7df56f884618557fd98f97da2686cecbfaf3ce1f1f52b6ba1a3a9155f5
4
+ data.tar.gz: ddbc7551c3534c41284e958042a94d988cdd5c52248f3ca7e4a8d8a72c6b168e
5
5
  SHA512:
6
- metadata.gz: d0f3285b53cb8fe7e7d5ef30a970c632c52112c6b0503b8c81155f6cdb37583f036107b052c37019671355d0838512f904a61aeff5b69b7f6f8a2c1f4fabe785
7
- data.tar.gz: f1b9c5759d77c1f497a0ac09ccf455beda29417024c4cd8ba6c0f8fcbac3347ab233c9e8c558a75382ef3b41495b5b693495ab0533c3a084a416c1f75a38313b
6
+ metadata.gz: 5f40f125fe4096dcf09eaf1d1295f23e68fef9bbe1e5651a1ecfa1ed06748df7a0f4c9ea048b767b429b67b4d0add1f32663388707616571a6936a55ffd1d6b7
7
+ data.tar.gz: baf3caa4deec5422bd85e9372bc717f683d4cbb067f468eb7d4c964556885f7bb273fa0b6339201aef917c8f046346c20356358462fc73103243608a5245ebe6
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.3.1 (2022-07-10)
2
+
3
+ - Added support for JSON serialization
4
+
1
5
  ## 0.3.0 (2022-03-22)
2
6
 
3
7
  - Changed `item_id` to `user_id` for `similar_users`
data/README.md CHANGED
@@ -183,17 +183,19 @@ For Rails < 6, speed up inserts by adding [activerecord-import](https://github.c
183
183
  If you’d prefer to perform recommendations on-the-fly, store the recommender
184
184
 
185
185
  ```ruby
186
- bin = Marshal.dump(recommender)
187
- File.binwrite("recommender.bin", bin)
186
+ json = recommender.to_json
187
+ File.write("recommender.json", json)
188
188
  ```
189
189
 
190
- > You can save it to a file, database, or any other storage system
190
+ > You can save it to a file, database, or any other storage system. Also, user and item IDs should be integers or strings for this.
191
+
192
+ The serialized recommender includes user activity from the training data (to avoid recommending previously rated items), so be sure to protect it.
191
193
 
192
194
  Load a recommender
193
195
 
194
196
  ```ruby
195
- bin = File.binread("recommender.bin")
196
- recommender = Marshal.load(bin)
197
+ json = File.read("recommender.json")
198
+ recommender = Disco::Recommender.load_json(json)
197
199
  ```
198
200
 
199
201
  Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor). See the [examples](https://github.com/ankane/neighbor/tree/master/examples).
@@ -255,6 +255,46 @@ module Disco
255
255
  to_s # for now
256
256
  end
257
257
 
258
+ def to_json
259
+ require "base64"
260
+ require "json"
261
+
262
+ obj = {
263
+ implicit: @implicit,
264
+ user_ids: @user_map.keys,
265
+ item_ids: @item_map.keys,
266
+ rated: @user_map.map { |_, u| (@rated[u] || {}).keys },
267
+ global_mean: @global_mean,
268
+ user_factors: Base64.strict_encode64(@user_factors.to_binary),
269
+ item_factors: Base64.strict_encode64(@item_factors.to_binary),
270
+ factors: @factors,
271
+ epochs: @epochs,
272
+ verbose: @verbose
273
+ }
274
+
275
+ unless @implicit
276
+ obj[:min_rating] = @min_rating
277
+ obj[:max_rating] = @max_rating
278
+ end
279
+
280
+ if @top_items
281
+ obj[:item_count] = @item_count
282
+ obj[:item_sum] = @item_sum
283
+ end
284
+
285
+ JSON.generate(obj)
286
+ end
287
+
288
+ def self.load_json(json)
289
+ require "json"
290
+
291
+ obj = JSON.parse(json)
292
+
293
+ recommender = new
294
+ recommender.send(:json_load, obj)
295
+ recommender
296
+ end
297
+
258
298
  private
259
299
 
260
300
  # factors should already be normalized for similar users/items
@@ -434,5 +474,31 @@ module Disco
434
474
  @item_sum = obj[:item_sum]
435
475
  end
436
476
  end
477
+
478
+ def json_load(obj)
479
+ require "base64"
480
+
481
+ @implicit = obj["implicit"]
482
+ @user_map = obj["user_ids"].map.with_index.to_h
483
+ @item_map = obj["item_ids"].map.with_index.to_h
484
+ @rated = obj["rated"].map.with_index.to_h { |r, i| [i, r.to_h { |v| [v, true] }] }
485
+ @global_mean = obj["global_mean"].to_f
486
+ @factors = obj["factors"].to_i
487
+ @user_factors = Numo::SFloat.from_binary(Base64.strict_decode64(obj["user_factors"]), [@user_map.size, @factors])
488
+ @item_factors = Numo::SFloat.from_binary(Base64.strict_decode64(obj["item_factors"]), [@item_map.size, @factors])
489
+ @epochs = obj["epochs"].to_i
490
+ @verbose = obj["verbose"]
491
+
492
+ unless @implicit
493
+ @min_rating = obj["min_rating"]
494
+ @max_rating = obj["max_rating"]
495
+ end
496
+
497
+ @top_items = obj.key?("item_count")
498
+ if @top_items
499
+ @item_count = obj["item_count"]
500
+ @item_sum = obj["item_sum"]
501
+ end
502
+ end
437
503
  end
438
504
  end
data/lib/disco/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Disco
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: disco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-03-22 00:00:00.000000000 Z
11
+ date: 2022-07-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: libmf