disco 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb58ae5c2579ef03fc0089f9f9e0421cd38bd8996472bf04ef9383b601759e5a
4
- data.tar.gz: 9bb8c5aa186ab1e6364d0ed446dc61e69a0f0ee730cee4c8367586dac69b666a
3
+ metadata.gz: ffa944ca55d7da5a4dc4cced181d013a9fdc0f1fbb76af6003ba3049bb299682
4
+ data.tar.gz: 64f389a3a93173b3d24cad85e5072b4d3f8965a729387a5845ed9c86098a9496
5
5
  SHA512:
6
- metadata.gz: ecee89ddb2db25a9ba697ec637f16a2408079b8b3d7d287df0cfdb30d9b7c0a996d6cb38075766d7e3fecf529be6ad90187a1ec95d2d141076ecfe3e2236209a
7
- data.tar.gz: 31d7259ac86779530b468392b611f0920b24682f8017e3b50f9a3ca41d30ad08454219881e26eae67d1add7a47d645f7a8b77d253ada4a3d0b5db0f5dbf5e33d
6
+ metadata.gz: f167237c7553d5a506467844d6acf860e81efca11da4e8facb8988cc994fbe70a52b38ff87b0d1157bcf9312b172c8794b49e6e8607748f77b129bcac26742d2
7
+ data.tar.gz: f5feab5c5d88dac8cc5a01cbab437110d0fe41c4fa98283464174d253996d8c8efa9dc92724f30bbae334c7fe2c7cb932b2b6335bd37f44aa645ef33fa7ec7ea
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## 0.5.0 (2024-10-22)
2
+
3
+ - Changed dataset directory to match XDG Base Directory Specification
4
+ - Dropped support for marshal serialization
5
+ - Dropped support for Ruby < 3.1 and Rails < 7
6
+
7
+ ## 0.4.2 (2024-06-24)
8
+
9
+ - Removed dependency on `csv` gem for `load_movielens`
10
+
1
11
  ## 0.4.1 (2024-05-23)
2
12
 
3
13
  - Reduced memory for `item_recs` and `similar_users`
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2019-2022 Andrew Kane
1
+ Copyright (c) 2019-2024 Andrew Kane
2
2
 
3
3
  MIT License
4
4
 
data/README.md CHANGED
@@ -194,7 +194,7 @@ json = File.read("recommender.json")
194
194
  recommender = Disco::Recommender.load_json(json)
195
195
  ```
196
196
 
197
- Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor). See the [examples](https://github.com/ankane/neighbor/tree/master/examples).
197
+ Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor). See the [examples](https://github.com/ankane/neighbor/tree/master/examples/disco).
198
198
 
199
199
  ## Algorithms
200
200
 
data/lib/disco/data.rb CHANGED
@@ -1,23 +1,20 @@
1
1
  module Disco
2
2
  module Data
3
3
  def load_movielens
4
- require "csv"
5
-
6
4
  item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
7
5
  file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
8
6
  data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
9
7
  file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
10
8
 
11
- # convert u.item to utf-8
12
- movies_str = File.read(item_path).encode("UTF-8", "ISO-8859-1")
13
-
14
9
  movies = {}
15
- CSV.parse(movies_str, col_sep: "|") do |row|
10
+ File.foreach(item_path) do |line|
11
+ row = line.encode("UTF-8", "ISO-8859-1").split("|")
16
12
  movies[row[0]] = row[1]
17
13
  end
18
14
 
19
15
  data = []
20
- CSV.foreach(data_path, col_sep: "\t") do |row|
16
+ File.foreach(data_path) do |line|
17
+ row = line.split("\t")
21
18
  data << {
22
19
  user_id: row[0].to_i,
23
20
  item_id: movies[row[1]],
@@ -36,9 +33,8 @@ module Disco
36
33
  require "net/http"
37
34
  require "tmpdir"
38
35
 
39
- # TODO handle this better
40
- raise "No HOME" unless ENV["HOME"]
41
- dest = "#{ENV["HOME"]}/.disco/#{fname}"
36
+ cache_home = ENV["XDG_CACHE_HOME"] || "#{ENV.fetch("HOME")}/.cache"
37
+ dest = "#{cache_home}/disco/#{fname}"
42
38
  FileUtils.mkdir_p(File.dirname(dest))
43
39
 
44
40
  return dest if File.exist?(dest)
data/lib/disco/model.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  module Disco
2
2
  module Model
3
3
  def has_recommended(name, class_name: nil)
4
- if ActiveRecord::VERSION::MAJOR < 6
5
- raise Disco::Error, "Requires Active Record 6+"
4
+ if ActiveRecord::VERSION::MAJOR < 7
5
+ raise Disco::Error, "Requires Active Record 7+"
6
6
  end
7
7
 
8
8
  class_name ||= name.to_s.singularize.camelize
@@ -64,8 +64,8 @@ module Disco
64
64
  end
65
65
 
66
66
  if @top_items
67
- @item_count = [0] * @item_map.size
68
- @item_sum = [0.0] * @item_map.size
67
+ @item_count = Array.new(@item_map.size, 0)
68
+ @item_sum = Array.new(@item_map.size, 0.0)
69
69
  train_set.each do |v|
70
70
  i = @item_map[v[:item_id]]
71
71
  @item_count[i] += 1
@@ -431,61 +431,6 @@ module Disco
431
431
  end
432
432
  end
433
433
 
434
- def marshal_dump
435
- warn "[disco] Marshal serialization is deprecated - use JSON instead"
436
-
437
- obj = {
438
- implicit: @implicit,
439
- user_map: @user_map,
440
- item_map: @item_map,
441
- rated: @rated,
442
- global_mean: @global_mean,
443
- user_factors: @user_factors,
444
- item_factors: @item_factors,
445
- factors: @factors,
446
- epochs: @epochs,
447
- verbose: @verbose
448
- }
449
-
450
- unless @implicit
451
- obj[:min_rating] = @min_rating
452
- obj[:max_rating] = @max_rating
453
- end
454
-
455
- if @top_items
456
- obj[:item_count] = @item_count
457
- obj[:item_sum] = @item_sum
458
- end
459
-
460
- obj
461
- end
462
-
463
- def marshal_load(obj)
464
- warn "[disco] Marshal serialization is deprecated - use JSON instead"
465
-
466
- @implicit = obj[:implicit]
467
- @user_map = obj[:user_map]
468
- @item_map = obj[:item_map]
469
- @rated = obj[:rated]
470
- @global_mean = obj[:global_mean]
471
- @user_factors = obj[:user_factors]
472
- @item_factors = obj[:item_factors]
473
- @factors = obj[:factors]
474
- @epochs = obj[:epochs]
475
- @verbose = obj[:verbose]
476
-
477
- unless @implicit
478
- @min_rating = obj[:min_rating]
479
- @max_rating = obj[:max_rating]
480
- end
481
-
482
- @top_items = obj.key?(:item_count)
483
- if @top_items
484
- @item_count = obj[:item_count]
485
- @item_sum = obj[:item_sum]
486
- end
487
- end
488
-
489
434
  def json_load(obj)
490
435
  require "base64"
491
436
 
data/lib/disco/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Disco
2
- VERSION = "0.4.1"
2
+ VERSION = "0.5.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: disco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-05-23 00:00:00.000000000 Z
11
+ date: 2024-10-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: libmf
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0.2'
19
+ version: '0.4'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0.2'
26
+ version: '0.4'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: numo-narray
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: 0.9.2
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: 0.9.2
41
41
  description:
42
42
  email: andrew@ankane.org
43
43
  executables: []
@@ -69,14 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
69
69
  requirements:
70
70
  - - ">="
71
71
  - !ruby/object:Gem::Version
72
- version: '2.7'
72
+ version: '3.1'
73
73
  required_rubygems_version: !ruby/object:Gem::Requirement
74
74
  requirements:
75
75
  - - ">="
76
76
  - !ruby/object:Gem::Version
77
77
  version: '0'
78
78
  requirements: []
79
- rubygems_version: 3.5.9
79
+ rubygems_version: 3.5.16
80
80
  signing_key:
81
81
  specification_version: 4
82
82
  summary: Recommendations for Ruby and Rails using collaborative filtering