disco 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ffa944ca55d7da5a4dc4cced181d013a9fdc0f1fbb76af6003ba3049bb299682
4
- data.tar.gz: 64f389a3a93173b3d24cad85e5072b4d3f8965a729387a5845ed9c86098a9496
3
+ metadata.gz: 8ca6da20099f6ec36c5f07547c61bb368066f2f480fba03c775287f48407a1f8
4
+ data.tar.gz: 3fbd73bb42398534d31a7b574d5d8dd6e40a1fe867e7cf4fe0ede4bdad5cd0c9
5
5
  SHA512:
6
- metadata.gz: f167237c7553d5a506467844d6acf860e81efca11da4e8facb8988cc994fbe70a52b38ff87b0d1157bcf9312b172c8794b49e6e8607748f77b129bcac26742d2
7
- data.tar.gz: f5feab5c5d88dac8cc5a01cbab437110d0fe41c4fa98283464174d253996d8c8efa9dc92724f30bbae334c7fe2c7cb932b2b6335bd37f44aa645ef33fa7ec7ea
6
+ metadata.gz: 570275aaf7887c98318040efde2440f5b6e8e04be25faa7a78b73453e05dec845963067f0808eff09e28621dbd7b3277e98da83521e067b6170a585f717462f5
7
+ data.tar.gz: 57b86344de0656aa8e4bb0f130d0f3bcd928d05365b7f39479e4c6a69cd49d3a2f2a10082abedaa351d5febb212681236548520a01c9b321132e6f798e74a690
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.5.2 (2025-09-12)
2
+
3
+ - Fixed recommendations when numo-narray compiled with GCC 13+
4
+ - Fixed error with Rover 0.5+
5
+
6
+ ## 0.5.1 (2024-12-29)
7
+
8
+ - Removed dependency on `base64` gem for serialization
9
+
1
10
  ## 0.5.0 (2024-10-22)
2
11
 
3
12
  - Changed dataset directory to match XDG Base Directory Specification
@@ -138,17 +138,13 @@ module Disco
138
138
  return [] if ids.size == 0
139
139
 
140
140
  predictions = @item_factors[ids, true].inner(@user_factors[u, true])
141
- indexes = predictions.sort_index.reverse
142
- indexes = indexes[0...[count + rated.size, indexes.size].min] if count
143
- predictions = predictions[indexes]
141
+ predictions, indexes = top_k(predictions, count ? count + rated.size : nil)
144
142
  ids = ids[indexes]
145
143
  elsif @user_recs_index && count
146
144
  predictions, ids = @user_recs_index.search(@user_factors[u, true].expand_dims(0), count + rated.size).map { |v| v[0, true] }
147
145
  else
148
146
  predictions = @item_factors.inner(@user_factors[u, true])
149
- indexes = predictions.sort_index.reverse # reverse just creates view
150
- indexes = indexes[0...[count + rated.size, indexes.size].min] if count
151
- predictions = predictions[indexes]
147
+ predictions, indexes = top_k(predictions, count ? count + rated.size : nil)
152
148
  ids = indexes
153
149
  end
154
150
 
@@ -196,7 +192,7 @@ module Disco
196
192
  # wilson score with continuity correction
197
193
  # https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction
198
194
  z = 1.96 # 95% confidence
199
- range = @max_rating - @min_rating
195
+ range = @max_rating - min_rating
200
196
  n = Numo::DFloat.cast(@item_count)
201
197
  phat = (Numo::DFloat.cast(@item_sum) - (min_rating * n)) / range / n
202
198
  phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
@@ -204,9 +200,7 @@ module Disco
204
200
  scores = scores * range + min_rating
205
201
  end
206
202
 
207
- indexes = scores.sort_index.reverse
208
- indexes = indexes[0...[count, indexes.size].min] if count
209
- scores = scores[indexes]
203
+ scores, indexes = top_k(scores, count)
210
204
 
211
205
  keys = @item_map.keys
212
206
  indexes.size.times.map do |i|
@@ -261,7 +255,6 @@ module Disco
261
255
  end
262
256
 
263
257
  def to_json
264
- require "base64"
265
258
  require "json"
266
259
 
267
260
  obj = {
@@ -270,8 +263,8 @@ module Disco
270
263
  item_ids: @item_map.keys,
271
264
  rated: @user_map.map { |_, u| (@rated[u] || {}).keys },
272
265
  global_mean: @global_mean,
273
- user_factors: Base64.strict_encode64(@user_factors.to_binary),
274
- item_factors: Base64.strict_encode64(@item_factors.to_binary),
266
+ user_factors: [@user_factors.to_binary].pack("m0"),
267
+ item_factors: [@item_factors.to_binary].pack("m0"),
275
268
  factors: @factors,
276
269
  epochs: @epochs,
277
270
  verbose: @verbose
@@ -372,9 +365,7 @@ module Disco
372
365
  end
373
366
  else
374
367
  predictions = factors.inner(factors[i, true]) / (norms * norms[i])
375
- indexes = predictions.sort_index.reverse
376
- indexes = indexes[0...[count + 1, indexes.size].min] if count
377
- predictions = predictions[indexes]
368
+ predictions, indexes = top_k(predictions, count ? count + 1 : nil)
378
369
  ids = indexes
379
370
  end
380
371
 
@@ -395,6 +386,27 @@ module Disco
395
386
  end
396
387
  end
397
388
 
389
+ def top_k(values, count)
390
+ if self.class.sort_index?
391
+ indexes = values.sort_index.reverse
392
+ indexes = indexes[0...[count, indexes.size].min] if count
393
+ else
394
+ indexes = values.to_a.each_with_index.sort_by { |v, _| -v }
395
+ indexes = indexes.first(count) if count
396
+ indexes = indexes.map(&:last)
397
+ end
398
+ [values[indexes], indexes]
399
+ end
400
+
401
+ # https://github.com/ruby-numo/numo-narray/issues/243
402
+ def self.sort_index?
403
+ unless defined?(@sort_index)
404
+ arr = Numo::SFloat.new(100).rand
405
+ @sort_index = arr[arr.sort_index].to_a == arr.to_a.sort
406
+ end
407
+ @sort_index
408
+ end
409
+
398
410
  def check_ratings(ratings)
399
411
  unless ratings.all? { |r| !r[:rating].nil? }
400
412
  raise ArgumentError, "Missing rating"
@@ -415,11 +427,7 @@ module Disco
415
427
  def to_dataset(dataset)
416
428
  if defined?(Rover::DataFrame) && dataset.is_a?(Rover::DataFrame)
417
429
  # convert keys to symbols
418
- dataset = dataset.dup
419
- dataset.keys.each do |k, v|
420
- dataset[k.to_sym] ||= dataset.delete(k)
421
- end
422
- dataset.to_a
430
+ dataset.each_row.map { |v| v.transform_keys(&:to_sym) }
423
431
  elsif defined?(Daru::DataFrame) && dataset.is_a?(Daru::DataFrame)
424
432
  # convert keys to symbols
425
433
  dataset = dataset.dup
@@ -432,16 +440,14 @@ module Disco
432
440
  end
433
441
 
434
442
  def json_load(obj)
435
- require "base64"
436
-
437
443
  @implicit = obj["implicit"]
438
444
  @user_map = obj["user_ids"].map.with_index.to_h
439
445
  @item_map = obj["item_ids"].map.with_index.to_h
440
446
  @rated = obj["rated"].map.with_index.to_h { |r, i| [i, r.to_h { |v| [v, true] }] }
441
447
  @global_mean = obj["global_mean"].to_f
442
448
  @factors = obj["factors"].to_i
443
- @user_factors = Numo::SFloat.from_binary(Base64.strict_decode64(obj["user_factors"]), [@user_map.size, @factors])
444
- @item_factors = Numo::SFloat.from_binary(Base64.strict_decode64(obj["item_factors"]), [@item_map.size, @factors])
449
+ @user_factors = Numo::SFloat.from_binary(obj["user_factors"].unpack1("m0"), [@user_map.size, @factors])
450
+ @item_factors = Numo::SFloat.from_binary(obj["item_factors"].unpack1("m0"), [@item_map.size, @factors])
445
451
  @epochs = obj["epochs"].to_i
446
452
  @verbose = obj["verbose"]
447
453
 
data/lib/disco/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Disco
2
- VERSION = "0.5.0"
2
+ VERSION = "0.5.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: disco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-10-23 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: libmf
@@ -38,7 +37,6 @@ dependencies:
38
37
  - - ">="
39
38
  - !ruby/object:Gem::Version
40
39
  version: 0.9.2
41
- description:
42
40
  email: andrew@ankane.org
43
41
  executables: []
44
42
  extensions: []
@@ -61,7 +59,6 @@ homepage: https://github.com/ankane/disco
61
59
  licenses:
62
60
  - MIT
63
61
  metadata: {}
64
- post_install_message:
65
62
  rdoc_options: []
66
63
  require_paths:
67
64
  - lib
@@ -76,8 +73,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
76
73
  - !ruby/object:Gem::Version
77
74
  version: '0'
78
75
  requirements: []
79
- rubygems_version: 3.5.16
80
- signing_key:
76
+ rubygems_version: 3.6.9
81
77
  specification_version: 4
82
78
  summary: Recommendations for Ruby and Rails using collaborative filtering
83
79
  test_files: []