disco 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +6 -6
- data/lib/disco/recommender.rb +71 -0
- data/lib/disco/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 645a5f169c78e36ee6394a71ba61ac611cb333176adff84812c8a25549e2ea28
|
4
|
+
data.tar.gz: 68caa44554bff09a39a68522ad5c8008164840186c08e0fdbad8f9e465855c89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 104a016c693c0256cae13d4545df4112538a1f7a66c8790c38a1b434f144c8d2fb173d2428bbc9168e2581e0e9399d761083500fca69919c07f86b2f1e6582ee
|
7
|
+
data.tar.gz: 3d06f45fdcf63ea26fa48c18ec5304d22f3306da5bb3fa0a2bcd107455002dbfda56e0df22f61270c12374cb693ad6da2d747bd363c0ec001048f23690b3efe1
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -183,17 +183,17 @@ For Rails < 6, speed up inserts by adding [activerecord-import](https://github.c
|
|
183
183
|
If you’d prefer to perform recommendations on-the-fly, store the recommender
|
184
184
|
|
185
185
|
```ruby
|
186
|
-
|
187
|
-
File.
|
186
|
+
json = recommender.to_json
|
187
|
+
File.write("recommender.json", json)
|
188
188
|
```
|
189
189
|
|
190
|
-
|
190
|
+
The serialized recommender includes user activity from the training data (to avoid recommending previously rated items), so be sure to protect it. You can save it to a file, database, or any other storage system, or use a tool like [Trove](https://github.com/ankane/trove). Also, user and item IDs should be integers or strings for this.
|
191
191
|
|
192
192
|
Load a recommender
|
193
193
|
|
194
194
|
```ruby
|
195
|
-
|
196
|
-
recommender =
|
195
|
+
json = File.read("recommender.json")
|
196
|
+
recommender = Disco::Recommender.load_json(json)
|
197
197
|
```
|
198
198
|
|
199
199
|
Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor). See the [examples](https://github.com/ankane/neighbor/tree/master/examples).
|
@@ -223,7 +223,7 @@ recommender.fit(data, validation_set: validation_set)
|
|
223
223
|
|
224
224
|
## Cold Start
|
225
225
|
|
226
|
-
Collaborative filtering suffers from the [cold start problem](https://
|
226
|
+
Collaborative filtering suffers from the [cold start problem](https://en.wikipedia.org/wiki/Cold_start_(recommender_systems)). It’s unable to make good recommendations without data on a user or item, which is problematic for new users and items.
|
227
227
|
|
228
228
|
```ruby
|
229
229
|
recommender.user_recs(new_user_id) # returns empty array
|
data/lib/disco/recommender.rb
CHANGED
@@ -36,6 +36,8 @@ module Disco
|
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
|
+
@user_map = {}
|
40
|
+
@item_map = {}
|
39
41
|
@rated = Hash.new { |hash, key| hash[key] = {} }
|
40
42
|
input = []
|
41
43
|
train_set.each do |v|
|
@@ -56,6 +58,9 @@ module Disco
|
|
56
58
|
# TODO improve performance
|
57
59
|
unless @implicit
|
58
60
|
@min_rating, @max_rating = train_set.minmax_by { |o| o[:rating] }.map { |o| o[:rating] }
|
61
|
+
else
|
62
|
+
@min_rating = nil
|
63
|
+
@max_rating = nil
|
59
64
|
end
|
60
65
|
|
61
66
|
if @top_items
|
@@ -255,6 +260,46 @@ module Disco
|
|
255
260
|
to_s # for now
|
256
261
|
end
|
257
262
|
|
263
|
+
def to_json
|
264
|
+
require "base64"
|
265
|
+
require "json"
|
266
|
+
|
267
|
+
obj = {
|
268
|
+
implicit: @implicit,
|
269
|
+
user_ids: @user_map.keys,
|
270
|
+
item_ids: @item_map.keys,
|
271
|
+
rated: @user_map.map { |_, u| (@rated[u] || {}).keys },
|
272
|
+
global_mean: @global_mean,
|
273
|
+
user_factors: Base64.strict_encode64(@user_factors.to_binary),
|
274
|
+
item_factors: Base64.strict_encode64(@item_factors.to_binary),
|
275
|
+
factors: @factors,
|
276
|
+
epochs: @epochs,
|
277
|
+
verbose: @verbose
|
278
|
+
}
|
279
|
+
|
280
|
+
unless @implicit
|
281
|
+
obj[:min_rating] = @min_rating
|
282
|
+
obj[:max_rating] = @max_rating
|
283
|
+
end
|
284
|
+
|
285
|
+
if @top_items
|
286
|
+
obj[:item_count] = @item_count
|
287
|
+
obj[:item_sum] = @item_sum
|
288
|
+
end
|
289
|
+
|
290
|
+
JSON.generate(obj)
|
291
|
+
end
|
292
|
+
|
293
|
+
def self.load_json(json)
|
294
|
+
require "json"
|
295
|
+
|
296
|
+
obj = JSON.parse(json)
|
297
|
+
|
298
|
+
recommender = new
|
299
|
+
recommender.send(:json_load, obj)
|
300
|
+
recommender
|
301
|
+
end
|
302
|
+
|
258
303
|
private
|
259
304
|
|
260
305
|
# factors should already be normalized for similar users/items
|
@@ -434,5 +479,31 @@ module Disco
|
|
434
479
|
@item_sum = obj[:item_sum]
|
435
480
|
end
|
436
481
|
end
|
482
|
+
|
483
|
+
def json_load(obj)
|
484
|
+
require "base64"
|
485
|
+
|
486
|
+
@implicit = obj["implicit"]
|
487
|
+
@user_map = obj["user_ids"].map.with_index.to_h
|
488
|
+
@item_map = obj["item_ids"].map.with_index.to_h
|
489
|
+
@rated = obj["rated"].map.with_index.to_h { |r, i| [i, r.to_h { |v| [v, true] }] }
|
490
|
+
@global_mean = obj["global_mean"].to_f
|
491
|
+
@factors = obj["factors"].to_i
|
492
|
+
@user_factors = Numo::SFloat.from_binary(Base64.strict_decode64(obj["user_factors"]), [@user_map.size, @factors])
|
493
|
+
@item_factors = Numo::SFloat.from_binary(Base64.strict_decode64(obj["item_factors"]), [@item_map.size, @factors])
|
494
|
+
@epochs = obj["epochs"].to_i
|
495
|
+
@verbose = obj["verbose"]
|
496
|
+
|
497
|
+
unless @implicit
|
498
|
+
@min_rating = obj["min_rating"]
|
499
|
+
@max_rating = obj["max_rating"]
|
500
|
+
end
|
501
|
+
|
502
|
+
@top_items = obj.key?("item_count")
|
503
|
+
if @top_items
|
504
|
+
@item_count = obj["item_count"]
|
505
|
+
@item_sum = obj["item_sum"]
|
506
|
+
end
|
507
|
+
end
|
437
508
|
end
|
438
509
|
end
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: libmf
|