disco 0.3.0 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +6 -6
- data/lib/disco/recommender.rb +71 -0
- data/lib/disco/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 645a5f169c78e36ee6394a71ba61ac611cb333176adff84812c8a25549e2ea28
|
4
|
+
data.tar.gz: 68caa44554bff09a39a68522ad5c8008164840186c08e0fdbad8f9e465855c89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 104a016c693c0256cae13d4545df4112538a1f7a66c8790c38a1b434f144c8d2fb173d2428bbc9168e2581e0e9399d761083500fca69919c07f86b2f1e6582ee
|
7
|
+
data.tar.gz: 3d06f45fdcf63ea26fa48c18ec5304d22f3306da5bb3fa0a2bcd107455002dbfda56e0df22f61270c12374cb693ad6da2d747bd363c0ec001048f23690b3efe1
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -183,17 +183,17 @@ For Rails < 6, speed up inserts by adding [activerecord-import](https://github.c
|
|
183
183
|
If you’d prefer to perform recommendations on-the-fly, store the recommender
|
184
184
|
|
185
185
|
```ruby
|
186
|
-
|
187
|
-
File.
|
186
|
+
json = recommender.to_json
|
187
|
+
File.write("recommender.json", json)
|
188
188
|
```
|
189
189
|
|
190
|
-
|
190
|
+
The serialized recommender includes user activity from the training data (to avoid recommending previously rated items), so be sure to protect it. You can save it to a file, database, or any other storage system, or use a tool like [Trove](https://github.com/ankane/trove). Also, user and item IDs should be integers or strings for this.
|
191
191
|
|
192
192
|
Load a recommender
|
193
193
|
|
194
194
|
```ruby
|
195
|
-
|
196
|
-
recommender =
|
195
|
+
json = File.read("recommender.json")
|
196
|
+
recommender = Disco::Recommender.load_json(json)
|
197
197
|
```
|
198
198
|
|
199
199
|
Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor). See the [examples](https://github.com/ankane/neighbor/tree/master/examples).
|
@@ -223,7 +223,7 @@ recommender.fit(data, validation_set: validation_set)
|
|
223
223
|
|
224
224
|
## Cold Start
|
225
225
|
|
226
|
-
Collaborative filtering suffers from the [cold start problem](https://
|
226
|
+
Collaborative filtering suffers from the [cold start problem](https://en.wikipedia.org/wiki/Cold_start_(recommender_systems)). It’s unable to make good recommendations without data on a user or item, which is problematic for new users and items.
|
227
227
|
|
228
228
|
```ruby
|
229
229
|
recommender.user_recs(new_user_id) # returns empty array
|
data/lib/disco/recommender.rb
CHANGED
@@ -36,6 +36,8 @@ module Disco
|
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
|
+
@user_map = {}
|
40
|
+
@item_map = {}
|
39
41
|
@rated = Hash.new { |hash, key| hash[key] = {} }
|
40
42
|
input = []
|
41
43
|
train_set.each do |v|
|
@@ -56,6 +58,9 @@ module Disco
|
|
56
58
|
# TODO improve performance
|
57
59
|
unless @implicit
|
58
60
|
@min_rating, @max_rating = train_set.minmax_by { |o| o[:rating] }.map { |o| o[:rating] }
|
61
|
+
else
|
62
|
+
@min_rating = nil
|
63
|
+
@max_rating = nil
|
59
64
|
end
|
60
65
|
|
61
66
|
if @top_items
|
@@ -255,6 +260,46 @@ module Disco
|
|
255
260
|
to_s # for now
|
256
261
|
end
|
257
262
|
|
263
|
+
def to_json
|
264
|
+
require "base64"
|
265
|
+
require "json"
|
266
|
+
|
267
|
+
obj = {
|
268
|
+
implicit: @implicit,
|
269
|
+
user_ids: @user_map.keys,
|
270
|
+
item_ids: @item_map.keys,
|
271
|
+
rated: @user_map.map { |_, u| (@rated[u] || {}).keys },
|
272
|
+
global_mean: @global_mean,
|
273
|
+
user_factors: Base64.strict_encode64(@user_factors.to_binary),
|
274
|
+
item_factors: Base64.strict_encode64(@item_factors.to_binary),
|
275
|
+
factors: @factors,
|
276
|
+
epochs: @epochs,
|
277
|
+
verbose: @verbose
|
278
|
+
}
|
279
|
+
|
280
|
+
unless @implicit
|
281
|
+
obj[:min_rating] = @min_rating
|
282
|
+
obj[:max_rating] = @max_rating
|
283
|
+
end
|
284
|
+
|
285
|
+
if @top_items
|
286
|
+
obj[:item_count] = @item_count
|
287
|
+
obj[:item_sum] = @item_sum
|
288
|
+
end
|
289
|
+
|
290
|
+
JSON.generate(obj)
|
291
|
+
end
|
292
|
+
|
293
|
+
def self.load_json(json)
|
294
|
+
require "json"
|
295
|
+
|
296
|
+
obj = JSON.parse(json)
|
297
|
+
|
298
|
+
recommender = new
|
299
|
+
recommender.send(:json_load, obj)
|
300
|
+
recommender
|
301
|
+
end
|
302
|
+
|
258
303
|
private
|
259
304
|
|
260
305
|
# factors should already be normalized for similar users/items
|
@@ -434,5 +479,31 @@ module Disco
|
|
434
479
|
@item_sum = obj[:item_sum]
|
435
480
|
end
|
436
481
|
end
|
482
|
+
|
483
|
+
def json_load(obj)
|
484
|
+
require "base64"
|
485
|
+
|
486
|
+
@implicit = obj["implicit"]
|
487
|
+
@user_map = obj["user_ids"].map.with_index.to_h
|
488
|
+
@item_map = obj["item_ids"].map.with_index.to_h
|
489
|
+
@rated = obj["rated"].map.with_index.to_h { |r, i| [i, r.to_h { |v| [v, true] }] }
|
490
|
+
@global_mean = obj["global_mean"].to_f
|
491
|
+
@factors = obj["factors"].to_i
|
492
|
+
@user_factors = Numo::SFloat.from_binary(Base64.strict_decode64(obj["user_factors"]), [@user_map.size, @factors])
|
493
|
+
@item_factors = Numo::SFloat.from_binary(Base64.strict_decode64(obj["item_factors"]), [@item_map.size, @factors])
|
494
|
+
@epochs = obj["epochs"].to_i
|
495
|
+
@verbose = obj["verbose"]
|
496
|
+
|
497
|
+
unless @implicit
|
498
|
+
@min_rating = obj["min_rating"]
|
499
|
+
@max_rating = obj["max_rating"]
|
500
|
+
end
|
501
|
+
|
502
|
+
@top_items = obj.key?("item_count")
|
503
|
+
if @top_items
|
504
|
+
@item_count = obj["item_count"]
|
505
|
+
@item_sum = obj["item_sum"]
|
506
|
+
end
|
507
|
+
end
|
437
508
|
end
|
438
509
|
end
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: libmf
|