disco 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b1d7b56a9c40088874bbd79ff7fa5bf8783a4406a9e5dadf1dddb04303ca7fee
4
- data.tar.gz: 0d843da83fc246b5da2aa097eba3534d729b51d9b03c368226b884fe33e61795
3
+ metadata.gz: 8ca6da20099f6ec36c5f07547c61bb368066f2f480fba03c775287f48407a1f8
4
+ data.tar.gz: 3fbd73bb42398534d31a7b574d5d8dd6e40a1fe867e7cf4fe0ede4bdad5cd0c9
5
5
  SHA512:
6
- metadata.gz: 49341b30e2ffa7348c19af2c0073f8f9e15afe3dcd856922fb5ae3fd3cd6e5ab555731c38bf3678f5f4efb064ecfce994d24b6238aa93fbc62e4621d7de036ab
7
- data.tar.gz: 9bb387230927cf55e94c3614c4a68881031206c9ee2dee2d48c06f6053134cdff0af4c4d24eba825acf3429394556368378bd51860f4dfcddf0dd1742309794f
6
+ metadata.gz: 570275aaf7887c98318040efde2440f5b6e8e04be25faa7a78b73453e05dec845963067f0808eff09e28621dbd7b3277e98da83521e067b6170a585f717462f5
7
+ data.tar.gz: 57b86344de0656aa8e4bb0f130d0f3bcd928d05365b7f39479e4c6a69cd49d3a2f2a10082abedaa351d5febb212681236548520a01c9b321132e6f798e74a690
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.5.2 (2025-09-12)
2
+
3
+ - Fixed recommendations when numo-narray compiled with GCC 13+
4
+ - Fixed error with Rover 0.5+
5
+
1
6
  ## 0.5.1 (2024-12-29)
2
7
 
3
8
  - Removed dependency on `base64` gem for serialization
@@ -138,17 +138,13 @@ module Disco
138
138
  return [] if ids.size == 0
139
139
 
140
140
  predictions = @item_factors[ids, true].inner(@user_factors[u, true])
141
- indexes = predictions.sort_index.reverse
142
- indexes = indexes[0...[count + rated.size, indexes.size].min] if count
143
- predictions = predictions[indexes]
141
+ predictions, indexes = top_k(predictions, count ? count + rated.size : nil)
144
142
  ids = ids[indexes]
145
143
  elsif @user_recs_index && count
146
144
  predictions, ids = @user_recs_index.search(@user_factors[u, true].expand_dims(0), count + rated.size).map { |v| v[0, true] }
147
145
  else
148
146
  predictions = @item_factors.inner(@user_factors[u, true])
149
- indexes = predictions.sort_index.reverse # reverse just creates view
150
- indexes = indexes[0...[count + rated.size, indexes.size].min] if count
151
- predictions = predictions[indexes]
147
+ predictions, indexes = top_k(predictions, count ? count + rated.size : nil)
152
148
  ids = indexes
153
149
  end
154
150
 
@@ -196,7 +192,7 @@ module Disco
196
192
  # wilson score with continuity correction
197
193
  # https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction
198
194
  z = 1.96 # 95% confidence
199
- range = @max_rating - @min_rating
195
+ range = @max_rating - min_rating
200
196
  n = Numo::DFloat.cast(@item_count)
201
197
  phat = (Numo::DFloat.cast(@item_sum) - (min_rating * n)) / range / n
202
198
  phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
@@ -204,9 +200,7 @@ module Disco
204
200
  scores = scores * range + min_rating
205
201
  end
206
202
 
207
- indexes = scores.sort_index.reverse
208
- indexes = indexes[0...[count, indexes.size].min] if count
209
- scores = scores[indexes]
203
+ scores, indexes = top_k(scores, count)
210
204
 
211
205
  keys = @item_map.keys
212
206
  indexes.size.times.map do |i|
@@ -371,9 +365,7 @@ module Disco
371
365
  end
372
366
  else
373
367
  predictions = factors.inner(factors[i, true]) / (norms * norms[i])
374
- indexes = predictions.sort_index.reverse
375
- indexes = indexes[0...[count + 1, indexes.size].min] if count
376
- predictions = predictions[indexes]
368
+ predictions, indexes = top_k(predictions, count ? count + 1 : nil)
377
369
  ids = indexes
378
370
  end
379
371
 
@@ -394,6 +386,27 @@ module Disco
394
386
  end
395
387
  end
396
388
 
389
+ def top_k(values, count)
390
+ if self.class.sort_index?
391
+ indexes = values.sort_index.reverse
392
+ indexes = indexes[0...[count, indexes.size].min] if count
393
+ else
394
+ indexes = values.to_a.each_with_index.sort_by { |v, _| -v }
395
+ indexes = indexes.first(count) if count
396
+ indexes = indexes.map(&:last)
397
+ end
398
+ [values[indexes], indexes]
399
+ end
400
+
401
+ # https://github.com/ruby-numo/numo-narray/issues/243
402
+ def self.sort_index?
403
+ unless defined?(@sort_index)
404
+ arr = Numo::SFloat.new(100).rand
405
+ @sort_index = arr[arr.sort_index].to_a == arr.to_a.sort
406
+ end
407
+ @sort_index
408
+ end
409
+
397
410
  def check_ratings(ratings)
398
411
  unless ratings.all? { |r| !r[:rating].nil? }
399
412
  raise ArgumentError, "Missing rating"
@@ -414,11 +427,7 @@ module Disco
414
427
  def to_dataset(dataset)
415
428
  if defined?(Rover::DataFrame) && dataset.is_a?(Rover::DataFrame)
416
429
  # convert keys to symbols
417
- dataset = dataset.dup
418
- dataset.keys.each do |k, v|
419
- dataset[k.to_sym] ||= dataset.delete(k)
420
- end
421
- dataset.to_a
430
+ dataset.each_row.map { |v| v.transform_keys(&:to_sym) }
422
431
  elsif defined?(Daru::DataFrame) && dataset.is_a?(Daru::DataFrame)
423
432
  # convert keys to symbols
424
433
  dataset = dataset.dup
data/lib/disco/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Disco
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.2"
3
3
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: disco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-12-30 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: libmf
@@ -73,7 +73,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
73
73
  - !ruby/object:Gem::Version
74
74
  version: '0'
75
75
  requirements: []
76
- rubygems_version: 3.6.2
76
+ rubygems_version: 3.6.9
77
77
  specification_version: 4
78
78
  summary: Recommendations for Ruby and Rails using collaborative filtering
79
79
  test_files: []