disco 0.5.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8ca6da20099f6ec36c5f07547c61bb368066f2f480fba03c775287f48407a1f8
4
- data.tar.gz: 3fbd73bb42398534d31a7b574d5d8dd6e40a1fe867e7cf4fe0ede4bdad5cd0c9
3
+ metadata.gz: bb67e295f1ad63fed7441947934c3bf4c3b527cfddb1f1d9620d551c9e409223
4
+ data.tar.gz: 6c86546e3890792c89008fe9850a242e894b097eed2621bb9dba3bef6a213296
5
5
  SHA512:
6
- metadata.gz: 570275aaf7887c98318040efde2440f5b6e8e04be25faa7a78b73453e05dec845963067f0808eff09e28621dbd7b3277e98da83521e067b6170a585f717462f5
7
- data.tar.gz: 57b86344de0656aa8e4bb0f130d0f3bcd928d05365b7f39479e4c6a69cd49d3a2f2a10082abedaa351d5febb212681236548520a01c9b321132e6f798e74a690
6
+ metadata.gz: f277e589e5a833a834874c5cf25dba32d7908b197047fe0b6e44ae6b8950a55204dc235de8d5c36c6b5ec6a78c53c4ba63c1607c64b8da4c1555eba4b0dcd7bd
7
+ data.tar.gz: 2abee5543ba9a600dc4692e15af30d63505dc87eb6ccc3dd42b530c4760aacef562c597882b26a9e1c586b813ae1f91c79307627d7511321298e879205d1a631
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 1.0.0 (2026-04-17)
2
+
3
+ - Switched to `numo-narray-alt`
4
+ - Fixed handling of new users and items in validation set
5
+ - Dropped support for Daru
6
+ - Dropped support for Ruby < 3.3 and Rails < 7.2
7
+
1
8
  ## 0.5.2 (2025-09-12)
2
9
 
3
10
  - Fixed recommendations when numo-narray compiled with GCC 13+
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2019-2024 Andrew Kane
1
+ Copyright (c) 2019-2026 Andrew Kane
2
2
 
3
3
  MIT License
4
4
 
data/README.md CHANGED
@@ -256,12 +256,6 @@ Or a Rover data frame
256
256
  Rover.read_csv("ratings.csv")
257
257
  ```
258
258
 
259
- Or a Daru data frame
260
-
261
- ```ruby
262
- Daru::DataFrame.from_csv("ratings.csv")
263
- ```
264
-
265
259
  ## Performance
266
260
 
267
261
  If you have a large number of users or items, you can use an approximate nearest neighbors library like [Faiss](https://github.com/ankane/faiss) to improve the performance of certain methods.
data/lib/disco/data.rb CHANGED
@@ -29,9 +29,7 @@ module Disco
29
29
 
30
30
  def download_file(fname, origin, file_hash:)
31
31
  require "digest"
32
- require "fileutils"
33
- require "net/http"
34
- require "tmpdir"
32
+ require "open-uri"
35
33
 
36
34
  cache_home = ENV["XDG_CACHE_HOME"] || "#{ENV.fetch("HOME")}/.cache"
37
35
  dest = "#{cache_home}/disco/#{fname}"
@@ -39,38 +37,24 @@ module Disco
39
37
 
40
38
  return dest if File.exist?(dest)
41
39
 
42
- temp_path = "#{Dir.tmpdir}/disco-#{Time.now.to_f}" # TODO better name
43
-
44
- digest = Digest::SHA2.new
45
-
46
- uri = URI(origin)
47
-
48
- # Net::HTTP automatically adds Accept-Encoding for compression
49
- # of response bodies and automatically decompresses gzip
50
- # and deflateresponses unless a Range header was sent.
51
- # https://ruby-doc.org/stdlib-2.6.4/libdoc/net/http/rdoc/Net/HTTP.html
52
- Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
53
- request = Net::HTTP::Get.new(uri)
54
-
55
- puts "Downloading data from #{origin}"
56
- File.open(temp_path, "wb") do |f|
57
- http.request(request) do |response|
58
- response.read_body do |chunk|
59
- f.write(chunk)
60
- digest.update(chunk)
61
- end
40
+ puts "Downloading data from #{origin}"
41
+ URI.parse(origin).open(redirect: false) do |download|
42
+ digest =
43
+ if download.respond_to?(:path)
44
+ download.flush
45
+ Digest::SHA256.file(download.path).hexdigest
46
+ else
47
+ Digest::SHA256.hexdigest(download.string)
62
48
  end
49
+
50
+ if digest != file_hash
51
+ raise Error, "Bad hash: #{digest}"
63
52
  end
64
- end
53
+ puts "Hash verified: #{file_hash}"
65
54
 
66
- if digest.hexdigest != file_hash
67
- raise Error, "Bad hash: #{digest.hexdigest}"
55
+ IO.copy_stream(download, dest)
68
56
  end
69
57
 
70
- puts "Hash verified: #{file_hash}"
71
-
72
- FileUtils.mv(temp_path, dest)
73
-
74
58
  dest
75
59
  end
76
60
  end
data/lib/disco/model.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  module Disco
2
2
  module Model
3
3
  def has_recommended(name, class_name: nil)
4
- if ActiveRecord::VERSION::MAJOR < 7
5
- raise Disco::Error, "Requires Active Record 7+"
4
+ if ActiveRecord::VERSION::STRING.to_f < 7.2
5
+ raise Disco::Error, "Requires Active Record 7.2+"
6
6
  end
7
7
 
8
8
  class_name ||= name.to_s.singularize.camelize
@@ -38,18 +38,17 @@ module Disco
38
38
 
39
39
  @user_map = {}
40
40
  @item_map = {}
41
- @rated = Hash.new { |hash, key| hash[key] = {} }
41
+ @rated = []
42
42
  input = []
43
43
  train_set.each do |v|
44
44
  # update maps and build matrix in single pass
45
45
  u = (@user_map[v[:user_id]] ||= @user_map.size)
46
46
  i = (@item_map[v[:item_id]] ||= @item_map.size)
47
- @rated[u][i] = true
47
+ (@rated[u] ||= Set.new) << i
48
48
 
49
49
  # explicit will always have a value due to check_ratings
50
50
  input << [u, i, @implicit ? 1 : v[:rating]]
51
51
  end
52
- @rated.default = nil
53
52
 
54
53
  # much more efficient than checking every value in another pass
55
54
  raise ArgumentError, "Missing user_id" if @user_map.key?(nil)
@@ -74,15 +73,24 @@ module Disco
74
73
  end
75
74
 
76
75
  eval_set = nil
77
- if validation_set
76
+ if validation_set&.any?
78
77
  eval_set = []
79
78
  validation_set.each do |v|
80
79
  u = @user_map[v[:user_id]]
81
80
  i = @item_map[v[:item_id]]
82
81
 
83
- # set to non-existent item
84
- u ||= -1
85
- i ||= -1
82
+ if @implicit
83
+ if u.nil?
84
+ raise ArgumentError, "Validation set cannot have new users for implicit feedback"
85
+ end
86
+
87
+ if i.nil?
88
+ raise ArgumentError, "Validation set cannot have new items for implicit feedback"
89
+ end
90
+ else
91
+ u ||= @user_map.size
92
+ i ||= @item_map.size
93
+ end
86
94
 
87
95
  eval_set << [u, i, @implicit ? 1 : v[:rating]]
88
96
  end
@@ -134,7 +142,7 @@ module Disco
134
142
  rated = item_ids ? {} : @rated[u]
135
143
 
136
144
  if item_ids
137
- ids = Numo::NArray.cast(item_ids.map { |i| @item_map[i] }.compact)
145
+ ids = Numo::NArray.cast(item_ids.filter_map { |i| @item_map[i] })
138
146
  return [] if ids.size == 0
139
147
 
140
148
  predictions = @item_factors[ids, true].inner(@user_factors[u, true])
@@ -153,7 +161,7 @@ module Disco
153
161
  keys = @item_map.keys
154
162
  result = []
155
163
  ids.each_with_index do |item_id, i|
156
- next if rated[item_id]
164
+ next if rated.include?(item_id)
157
165
 
158
166
  result << {item_id: keys[item_id], score: predictions[i]}
159
167
  break if result.size == count
@@ -261,7 +269,7 @@ module Disco
261
269
  implicit: @implicit,
262
270
  user_ids: @user_map.keys,
263
271
  item_ids: @item_map.keys,
264
- rated: @user_map.map { |_, u| (@rated[u] || {}).keys },
272
+ rated: @rated.map { |v| v.to_a.sort },
265
273
  global_mean: @global_mean,
266
274
  user_factors: [@user_factors.to_binary].pack("m0"),
267
275
  item_factors: [@item_factors.to_binary].pack("m0"),
@@ -387,26 +395,11 @@ module Disco
387
395
  end
388
396
 
389
397
  def top_k(values, count)
390
- if self.class.sort_index?
391
- indexes = values.sort_index.reverse
392
- indexes = indexes[0...[count, indexes.size].min] if count
393
- else
394
- indexes = values.to_a.each_with_index.sort_by { |v, _| -v }
395
- indexes = indexes.first(count) if count
396
- indexes = indexes.map(&:last)
397
- end
398
+ indexes = values.sort_index.reverse
399
+ indexes = indexes[0...[count, indexes.size].min] if count
398
400
  [values[indexes], indexes]
399
401
  end
400
402
 
401
- # https://github.com/ruby-numo/numo-narray/issues/243
402
- def self.sort_index?
403
- unless defined?(@sort_index)
404
- arr = Numo::SFloat.new(100).rand
405
- @sort_index = arr[arr.sort_index].to_a == arr.to_a.sort
406
- end
407
- @sort_index
408
- end
409
-
410
403
  def check_ratings(ratings)
411
404
  unless ratings.all? { |r| !r[:rating].nil? }
412
405
  raise ArgumentError, "Missing rating"
@@ -428,12 +421,6 @@ module Disco
428
421
  if defined?(Rover::DataFrame) && dataset.is_a?(Rover::DataFrame)
429
422
  # convert keys to symbols
430
423
  dataset.each_row.map { |v| v.transform_keys(&:to_sym) }
431
- elsif defined?(Daru::DataFrame) && dataset.is_a?(Daru::DataFrame)
432
- # convert keys to symbols
433
- dataset = dataset.dup
434
- new_names = dataset.vectors.to_a.map { |k| [k, k.to_sym] }.to_h
435
- dataset.rename_vectors!(new_names)
436
- dataset.to_a[0]
437
424
  else
438
425
  dataset
439
426
  end
@@ -443,7 +430,7 @@ module Disco
443
430
  @implicit = obj["implicit"]
444
431
  @user_map = obj["user_ids"].map.with_index.to_h
445
432
  @item_map = obj["item_ids"].map.with_index.to_h
446
- @rated = obj["rated"].map.with_index.to_h { |r, i| [i, r.to_h { |v| [v, true] }] }
433
+ @rated = obj["rated"].map { |r| Set.new(r) }
447
434
  @global_mean = obj["global_mean"].to_f
448
435
  @factors = obj["factors"].to_i
449
436
  @user_factors = Numo::SFloat.from_binary(obj["user_factors"].unpack1("m0"), [@user_map.size, @factors])
data/lib/disco/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Disco
2
- VERSION = "0.5.2"
2
+ VERSION = "1.0.0"
3
3
  end
data/lib/disco.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  # dependencies
2
2
  require "libmf"
3
- require "numo/narray"
3
+ require "numo/narray/alt"
4
+
5
+ # stdlib
6
+ require "set"
4
7
 
5
8
  # modules
6
9
  require_relative "disco/data"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: disco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -15,28 +15,28 @@ dependencies:
15
15
  requirements:
16
16
  - - ">="
17
17
  - !ruby/object:Gem::Version
18
- version: '0.4'
18
+ version: '0.5'
19
19
  type: :runtime
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - ">="
24
24
  - !ruby/object:Gem::Version
25
- version: '0.4'
25
+ version: '0.5'
26
26
  - !ruby/object:Gem::Dependency
27
- name: numo-narray
27
+ name: numo-narray-alt
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements:
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
- version: 0.9.2
32
+ version: '0.10'
33
33
  type: :runtime
34
34
  prerelease: false
35
35
  version_requirements: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
- version: 0.9.2
39
+ version: '0.10'
40
40
  email: andrew@ankane.org
41
41
  executables: []
42
42
  extensions: []
@@ -66,14 +66,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - ">="
68
68
  - !ruby/object:Gem::Version
69
- version: '3.1'
69
+ version: '3.3'
70
70
  required_rubygems_version: !ruby/object:Gem::Requirement
71
71
  requirements:
72
72
  - - ">="
73
73
  - !ruby/object:Gem::Version
74
74
  version: '0'
75
75
  requirements: []
76
- rubygems_version: 3.6.9
76
+ rubygems_version: 4.0.6
77
77
  specification_version: 4
78
78
  summary: Recommendations for Ruby and Rails using collaborative filtering
79
79
  test_files: []