disco 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/disco/recommender.rb +27 -18
- data/lib/disco/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8ca6da20099f6ec36c5f07547c61bb368066f2f480fba03c775287f48407a1f8
|
4
|
+
data.tar.gz: 3fbd73bb42398534d31a7b574d5d8dd6e40a1fe867e7cf4fe0ede4bdad5cd0c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 570275aaf7887c98318040efde2440f5b6e8e04be25faa7a78b73453e05dec845963067f0808eff09e28621dbd7b3277e98da83521e067b6170a585f717462f5
|
7
|
+
data.tar.gz: 57b86344de0656aa8e4bb0f130d0f3bcd928d05365b7f39479e4c6a69cd49d3a2f2a10082abedaa351d5febb212681236548520a01c9b321132e6f798e74a690
|
data/CHANGELOG.md
CHANGED
data/lib/disco/recommender.rb
CHANGED
@@ -138,17 +138,13 @@ module Disco
|
|
138
138
|
return [] if ids.size == 0
|
139
139
|
|
140
140
|
predictions = @item_factors[ids, true].inner(@user_factors[u, true])
|
141
|
-
indexes = predictions.
|
142
|
-
indexes = indexes[0...[count + rated.size, indexes.size].min] if count
|
143
|
-
predictions = predictions[indexes]
|
141
|
+
predictions, indexes = top_k(predictions, count ? count + rated.size : nil)
|
144
142
|
ids = ids[indexes]
|
145
143
|
elsif @user_recs_index && count
|
146
144
|
predictions, ids = @user_recs_index.search(@user_factors[u, true].expand_dims(0), count + rated.size).map { |v| v[0, true] }
|
147
145
|
else
|
148
146
|
predictions = @item_factors.inner(@user_factors[u, true])
|
149
|
-
indexes = predictions
|
150
|
-
indexes = indexes[0...[count + rated.size, indexes.size].min] if count
|
151
|
-
predictions = predictions[indexes]
|
147
|
+
predictions, indexes = top_k(predictions, count ? count + rated.size : nil)
|
152
148
|
ids = indexes
|
153
149
|
end
|
154
150
|
|
@@ -196,7 +192,7 @@ module Disco
|
|
196
192
|
# wilson score with continuity correction
|
197
193
|
# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction
|
198
194
|
z = 1.96 # 95% confidence
|
199
|
-
range = @max_rating -
|
195
|
+
range = @max_rating - min_rating
|
200
196
|
n = Numo::DFloat.cast(@item_count)
|
201
197
|
phat = (Numo::DFloat.cast(@item_sum) - (min_rating * n)) / range / n
|
202
198
|
phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
|
@@ -204,9 +200,7 @@ module Disco
|
|
204
200
|
scores = scores * range + min_rating
|
205
201
|
end
|
206
202
|
|
207
|
-
indexes = scores
|
208
|
-
indexes = indexes[0...[count, indexes.size].min] if count
|
209
|
-
scores = scores[indexes]
|
203
|
+
scores, indexes = top_k(scores, count)
|
210
204
|
|
211
205
|
keys = @item_map.keys
|
212
206
|
indexes.size.times.map do |i|
|
@@ -371,9 +365,7 @@ module Disco
|
|
371
365
|
end
|
372
366
|
else
|
373
367
|
predictions = factors.inner(factors[i, true]) / (norms * norms[i])
|
374
|
-
indexes = predictions
|
375
|
-
indexes = indexes[0...[count + 1, indexes.size].min] if count
|
376
|
-
predictions = predictions[indexes]
|
368
|
+
predictions, indexes = top_k(predictions, count ? count + 1 : nil)
|
377
369
|
ids = indexes
|
378
370
|
end
|
379
371
|
|
@@ -394,6 +386,27 @@ module Disco
|
|
394
386
|
end
|
395
387
|
end
|
396
388
|
|
389
|
+
def top_k(values, count)
|
390
|
+
if self.class.sort_index?
|
391
|
+
indexes = values.sort_index.reverse
|
392
|
+
indexes = indexes[0...[count, indexes.size].min] if count
|
393
|
+
else
|
394
|
+
indexes = values.to_a.each_with_index.sort_by { |v, _| -v }
|
395
|
+
indexes = indexes.first(count) if count
|
396
|
+
indexes = indexes.map(&:last)
|
397
|
+
end
|
398
|
+
[values[indexes], indexes]
|
399
|
+
end
|
400
|
+
|
401
|
+
# https://github.com/ruby-numo/numo-narray/issues/243
|
402
|
+
def self.sort_index?
|
403
|
+
unless defined?(@sort_index)
|
404
|
+
arr = Numo::SFloat.new(100).rand
|
405
|
+
@sort_index = arr[arr.sort_index].to_a == arr.to_a.sort
|
406
|
+
end
|
407
|
+
@sort_index
|
408
|
+
end
|
409
|
+
|
397
410
|
def check_ratings(ratings)
|
398
411
|
unless ratings.all? { |r| !r[:rating].nil? }
|
399
412
|
raise ArgumentError, "Missing rating"
|
@@ -414,11 +427,7 @@ module Disco
|
|
414
427
|
def to_dataset(dataset)
|
415
428
|
if defined?(Rover::DataFrame) && dataset.is_a?(Rover::DataFrame)
|
416
429
|
# convert keys to symbols
|
417
|
-
dataset
|
418
|
-
dataset.keys.each do |k, v|
|
419
|
-
dataset[k.to_sym] ||= dataset.delete(k)
|
420
|
-
end
|
421
|
-
dataset.to_a
|
430
|
+
dataset.each_row.map { |v| v.transform_keys(&:to_sym) }
|
422
431
|
elsif defined?(Daru::DataFrame) && dataset.is_a?(Daru::DataFrame)
|
423
432
|
# convert keys to symbols
|
424
433
|
dataset = dataset.dup
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: libmf
|
@@ -73,7 +73,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
73
73
|
- !ruby/object:Gem::Version
|
74
74
|
version: '0'
|
75
75
|
requirements: []
|
76
|
-
rubygems_version: 3.6.
|
76
|
+
rubygems_version: 3.6.9
|
77
77
|
specification_version: 4
|
78
78
|
summary: Recommendations for Ruby and Rails using collaborative filtering
|
79
79
|
test_files: []
|