disco 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/lib/disco/recommender.rb +31 -25
- data/lib/disco/version.rb +1 -1
- metadata +3 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8ca6da20099f6ec36c5f07547c61bb368066f2f480fba03c775287f48407a1f8
|
4
|
+
data.tar.gz: 3fbd73bb42398534d31a7b574d5d8dd6e40a1fe867e7cf4fe0ede4bdad5cd0c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 570275aaf7887c98318040efde2440f5b6e8e04be25faa7a78b73453e05dec845963067f0808eff09e28621dbd7b3277e98da83521e067b6170a585f717462f5
|
7
|
+
data.tar.gz: 57b86344de0656aa8e4bb0f130d0f3bcd928d05365b7f39479e4c6a69cd49d3a2f2a10082abedaa351d5febb212681236548520a01c9b321132e6f798e74a690
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 0.5.2 (2025-09-12)
|
2
|
+
|
3
|
+
- Fixed recommendations when numo-narray compiled with GCC 13+
|
4
|
+
- Fixed error with Rover 0.5+
|
5
|
+
|
6
|
+
## 0.5.1 (2024-12-29)
|
7
|
+
|
8
|
+
- Removed dependency on `base64` gem for serialization
|
9
|
+
|
1
10
|
## 0.5.0 (2024-10-22)
|
2
11
|
|
3
12
|
- Changed dataset directory to match XDG Base Directory Specification
|
data/lib/disco/recommender.rb
CHANGED
@@ -138,17 +138,13 @@ module Disco
|
|
138
138
|
return [] if ids.size == 0
|
139
139
|
|
140
140
|
predictions = @item_factors[ids, true].inner(@user_factors[u, true])
|
141
|
-
indexes = predictions.
|
142
|
-
indexes = indexes[0...[count + rated.size, indexes.size].min] if count
|
143
|
-
predictions = predictions[indexes]
|
141
|
+
predictions, indexes = top_k(predictions, count ? count + rated.size : nil)
|
144
142
|
ids = ids[indexes]
|
145
143
|
elsif @user_recs_index && count
|
146
144
|
predictions, ids = @user_recs_index.search(@user_factors[u, true].expand_dims(0), count + rated.size).map { |v| v[0, true] }
|
147
145
|
else
|
148
146
|
predictions = @item_factors.inner(@user_factors[u, true])
|
149
|
-
indexes = predictions
|
150
|
-
indexes = indexes[0...[count + rated.size, indexes.size].min] if count
|
151
|
-
predictions = predictions[indexes]
|
147
|
+
predictions, indexes = top_k(predictions, count ? count + rated.size : nil)
|
152
148
|
ids = indexes
|
153
149
|
end
|
154
150
|
|
@@ -196,7 +192,7 @@ module Disco
|
|
196
192
|
# wilson score with continuity correction
|
197
193
|
# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction
|
198
194
|
z = 1.96 # 95% confidence
|
199
|
-
range = @max_rating -
|
195
|
+
range = @max_rating - min_rating
|
200
196
|
n = Numo::DFloat.cast(@item_count)
|
201
197
|
phat = (Numo::DFloat.cast(@item_sum) - (min_rating * n)) / range / n
|
202
198
|
phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
|
@@ -204,9 +200,7 @@ module Disco
|
|
204
200
|
scores = scores * range + min_rating
|
205
201
|
end
|
206
202
|
|
207
|
-
indexes = scores
|
208
|
-
indexes = indexes[0...[count, indexes.size].min] if count
|
209
|
-
scores = scores[indexes]
|
203
|
+
scores, indexes = top_k(scores, count)
|
210
204
|
|
211
205
|
keys = @item_map.keys
|
212
206
|
indexes.size.times.map do |i|
|
@@ -261,7 +255,6 @@ module Disco
|
|
261
255
|
end
|
262
256
|
|
263
257
|
def to_json
|
264
|
-
require "base64"
|
265
258
|
require "json"
|
266
259
|
|
267
260
|
obj = {
|
@@ -270,8 +263,8 @@ module Disco
|
|
270
263
|
item_ids: @item_map.keys,
|
271
264
|
rated: @user_map.map { |_, u| (@rated[u] || {}).keys },
|
272
265
|
global_mean: @global_mean,
|
273
|
-
user_factors:
|
274
|
-
item_factors:
|
266
|
+
user_factors: [@user_factors.to_binary].pack("m0"),
|
267
|
+
item_factors: [@item_factors.to_binary].pack("m0"),
|
275
268
|
factors: @factors,
|
276
269
|
epochs: @epochs,
|
277
270
|
verbose: @verbose
|
@@ -372,9 +365,7 @@ module Disco
|
|
372
365
|
end
|
373
366
|
else
|
374
367
|
predictions = factors.inner(factors[i, true]) / (norms * norms[i])
|
375
|
-
indexes = predictions
|
376
|
-
indexes = indexes[0...[count + 1, indexes.size].min] if count
|
377
|
-
predictions = predictions[indexes]
|
368
|
+
predictions, indexes = top_k(predictions, count ? count + 1 : nil)
|
378
369
|
ids = indexes
|
379
370
|
end
|
380
371
|
|
@@ -395,6 +386,27 @@ module Disco
|
|
395
386
|
end
|
396
387
|
end
|
397
388
|
|
389
|
+
def top_k(values, count)
|
390
|
+
if self.class.sort_index?
|
391
|
+
indexes = values.sort_index.reverse
|
392
|
+
indexes = indexes[0...[count, indexes.size].min] if count
|
393
|
+
else
|
394
|
+
indexes = values.to_a.each_with_index.sort_by { |v, _| -v }
|
395
|
+
indexes = indexes.first(count) if count
|
396
|
+
indexes = indexes.map(&:last)
|
397
|
+
end
|
398
|
+
[values[indexes], indexes]
|
399
|
+
end
|
400
|
+
|
401
|
+
# https://github.com/ruby-numo/numo-narray/issues/243
|
402
|
+
def self.sort_index?
|
403
|
+
unless defined?(@sort_index)
|
404
|
+
arr = Numo::SFloat.new(100).rand
|
405
|
+
@sort_index = arr[arr.sort_index].to_a == arr.to_a.sort
|
406
|
+
end
|
407
|
+
@sort_index
|
408
|
+
end
|
409
|
+
|
398
410
|
def check_ratings(ratings)
|
399
411
|
unless ratings.all? { |r| !r[:rating].nil? }
|
400
412
|
raise ArgumentError, "Missing rating"
|
@@ -415,11 +427,7 @@ module Disco
|
|
415
427
|
def to_dataset(dataset)
|
416
428
|
if defined?(Rover::DataFrame) && dataset.is_a?(Rover::DataFrame)
|
417
429
|
# convert keys to symbols
|
418
|
-
dataset
|
419
|
-
dataset.keys.each do |k, v|
|
420
|
-
dataset[k.to_sym] ||= dataset.delete(k)
|
421
|
-
end
|
422
|
-
dataset.to_a
|
430
|
+
dataset.each_row.map { |v| v.transform_keys(&:to_sym) }
|
423
431
|
elsif defined?(Daru::DataFrame) && dataset.is_a?(Daru::DataFrame)
|
424
432
|
# convert keys to symbols
|
425
433
|
dataset = dataset.dup
|
@@ -432,16 +440,14 @@ module Disco
|
|
432
440
|
end
|
433
441
|
|
434
442
|
def json_load(obj)
|
435
|
-
require "base64"
|
436
|
-
|
437
443
|
@implicit = obj["implicit"]
|
438
444
|
@user_map = obj["user_ids"].map.with_index.to_h
|
439
445
|
@item_map = obj["item_ids"].map.with_index.to_h
|
440
446
|
@rated = obj["rated"].map.with_index.to_h { |r, i| [i, r.to_h { |v| [v, true] }] }
|
441
447
|
@global_mean = obj["global_mean"].to_f
|
442
448
|
@factors = obj["factors"].to_i
|
443
|
-
@user_factors = Numo::SFloat.from_binary(
|
444
|
-
@item_factors = Numo::SFloat.from_binary(
|
449
|
+
@user_factors = Numo::SFloat.from_binary(obj["user_factors"].unpack1("m0"), [@user_map.size, @factors])
|
450
|
+
@item_factors = Numo::SFloat.from_binary(obj["item_factors"].unpack1("m0"), [@item_map.size, @factors])
|
445
451
|
@epochs = obj["epochs"].to_i
|
446
452
|
@verbose = obj["verbose"]
|
447
453
|
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: libmf
|
@@ -38,7 +37,6 @@ dependencies:
|
|
38
37
|
- - ">="
|
39
38
|
- !ruby/object:Gem::Version
|
40
39
|
version: 0.9.2
|
41
|
-
description:
|
42
40
|
email: andrew@ankane.org
|
43
41
|
executables: []
|
44
42
|
extensions: []
|
@@ -61,7 +59,6 @@ homepage: https://github.com/ankane/disco
|
|
61
59
|
licenses:
|
62
60
|
- MIT
|
63
61
|
metadata: {}
|
64
|
-
post_install_message:
|
65
62
|
rdoc_options: []
|
66
63
|
require_paths:
|
67
64
|
- lib
|
@@ -76,8 +73,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
76
73
|
- !ruby/object:Gem::Version
|
77
74
|
version: '0'
|
78
75
|
requirements: []
|
79
|
-
rubygems_version: 3.
|
80
|
-
signing_key:
|
76
|
+
rubygems_version: 3.6.9
|
81
77
|
specification_version: 4
|
82
78
|
summary: Recommendations for Ruby and Rails using collaborative filtering
|
83
79
|
test_files: []
|