cmfrec 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3bd946bc2c7425ba3550a9cd5bf346b0cac6de5597d0d38ff0dfb49db32e754d
4
- data.tar.gz: 20038dc0c401389d75dc3a35415919f6721dab7e32459eeb34c9dcb23569c49a
3
+ metadata.gz: d3cc7d70530eefc7b13324753b454d03573da7d832c46cb4dee2ab9213eafcdd
4
+ data.tar.gz: 662af2ec4ab1a1bd33a39c18773d0c48967422286a2fa137960772b7e67d437a
5
5
  SHA512:
6
- metadata.gz: 1d0019e89fe0ca946cd83d60c052bff2e3dcc3990e3bf37de92b620870b6bf8a31010aec87bb2e5a352c446441b683f0148da83497b5d44ef89d62396f4b7d3c
7
- data.tar.gz: f4d65f294b9a313c2c86111eaf66e734999924392fedc4715c51919a4ca4de3864ba3eec732f3d707dd06ea62ddb931d9b28b8436a09a807c01e14ac2c542e44
6
+ metadata.gz: 8fd6f1f8f0bd7d7c870c28fb57a0cec89aacf2d27aed53b5d68fb6935f5071dbe73931a5ff776f4a864f0cc91a17c793eabfe2a2b21f9b368a4c36ada5cb929d
7
+ data.tar.gz: 116d26ddafeeb439ef0895e30805afa0d2d2a453aeb369cf7122f13f5bf3ad457dac65c974d26debc84f353baf5f0c889c559e56620ef7458af4968ee9f5262a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 0.3.0 (2024-10-23)
2
+
3
+ - Changed dataset directory to match XDG Base Directory Specification
4
+ - Removed dependency on `csv` gem for `load_movielens`
5
+ - Dropped support for marshal serialization
6
+ - Dropped support for Ruby < 3.1
7
+
1
8
  ## 0.2.1 (2022-07-11)
2
9
 
3
10
  - Added support for JSON serialization
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) 2020 David Cortes
4
- Copyright (c) 2020-2021 Andrew Kane
4
+ Copyright (c) 2020-2024 Andrew Kane
5
5
 
6
6
  All rights reserved.
7
7
 
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  - Works with explicit and implicit feedback
7
7
  - Uses high-performance matrix factorization
8
8
 
9
- [![Build Status](https://github.com/ankane/cmfrec-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/cmfrec-ruby/actions)
9
+ [![Build Status](https://github.com/ankane/cmfrec-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/cmfrec-ruby/actions)
10
10
 
11
11
  ## Installation
12
12
 
data/lib/cmfrec/data.rb CHANGED
@@ -1,8 +1,6 @@
1
1
  module Cmfrec
2
2
  module Data
3
3
  def load_movielens
4
- require "csv"
5
-
6
4
  data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
7
5
  file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
8
6
  user_path = download_file("ml-100k/u.user", "https://files.grouplens.org/datasets/movielens/ml-100k/u.user",
@@ -10,11 +8,9 @@ module Cmfrec
10
8
  item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
11
9
  file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
12
10
 
13
- # convert u.item to utf-8
14
- movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
15
-
16
11
  user_info = []
17
- CSV.foreach(user_path, col_sep: "|") do |row|
12
+ File.foreach(user_path) do |line|
13
+ row = line.split("|")
18
14
  user = {user_id: row[0].to_i}
19
15
  10.times do |i|
20
16
  user[:"region#{i}"] = row[4][0] == i.to_s ? 1 : 0
@@ -26,26 +22,28 @@ module Cmfrec
26
22
  movies = {}
27
23
  movie_names = {}
28
24
  genres = %w(unknown action adventure animation childrens comedy crime documentary drama fantasy filmnoir horror musical mystery romance scifi thriller war western)
29
- CSV.parse(movies_str, col_sep: "|", converters: [:numeric]) do |row|
25
+ File.foreach(item_path) do |line|
26
+ row = line.encode("UTF-8", "ISO-8859-1").split("|")
30
27
  movies[row[0]] = row[1]
31
28
 
32
29
  # filter duplicates
33
30
  next if movie_names[row[1]]
34
31
  movie_names[row[1]] = true
35
32
 
36
- item = {item_id: row[1], year: row[2] ? Date.parse(row[2]).year : 1970}
33
+ item = {item_id: row[1], year: !row[2].empty? ? Date.parse(row[2]).year : 1970}
37
34
  genres.each_with_index do |genre, i|
38
- item[:"genre_#{genre}"] = row[i + 5]
35
+ item[:"genre_#{genre}"] = row[i + 5].to_i
39
36
  end
40
37
  item_info << item
41
38
  end
42
39
 
43
40
  data = []
44
- CSV.foreach(data_path, col_sep: "\t", converters: [:numeric]) do |row|
41
+ File.foreach(data_path) do |line|
42
+ row = line.split("\t")
45
43
  data << {
46
- user_id: row[0],
44
+ user_id: row[0].to_i,
47
45
  item_id: movies[row[1]],
48
- rating: row[2]
46
+ rating: row[2].to_i
49
47
  }
50
48
  end
51
49
 
@@ -60,9 +58,8 @@ module Cmfrec
60
58
  require "net/http"
61
59
  require "tmpdir"
62
60
 
63
- # TODO handle this better
64
- raise "No HOME" unless ENV["HOME"]
65
- dest = "#{ENV["HOME"]}/.cmfrec/#{fname}"
61
+ cache_home = ENV["XDG_CACHE_HOME"] || "#{ENV.fetch("HOME")}/.cache"
62
+ dest = "#{cache_home}/cmfrec/#{fname}"
66
63
  FileUtils.mkdir_p(File.dirname(dest))
67
64
 
68
65
  return dest if File.exist?(dest)
@@ -514,7 +514,7 @@ module Cmfrec
514
514
  nil, #precomputedBiTBi,
515
515
  nil, #precomputedTransCtCinvCt,
516
516
  nil, #precomputedCtCw
517
- nil, #precomputedCtUbias
517
+ nil #precomputedCtUbias
518
518
  ]
519
519
  check_status FFI.fit_collective_explicit_als(*fiddle_args(args))
520
520
 
@@ -811,114 +811,6 @@ module Cmfrec
811
811
  @finalize_chol = false
812
812
  end
813
813
 
814
- def dump_ptr(ptr)
815
- ptr.to_s(ptr.size) if ptr
816
- end
817
-
818
- def load_ptr(str)
819
- Fiddle::Pointer[str] if str
820
- end
821
-
822
- def marshal_dump
823
- obj = {
824
- implicit: @implicit
825
- }
826
-
827
- # options
828
- obj[:factors] = @k
829
- obj[:epochs] = @niter
830
- obj[:verbose] = @verbose
831
-
832
- # factors
833
- obj[:user_map] = @user_map
834
- obj[:item_map] = @item_map
835
- obj[:rated] = @rated
836
- obj[:user_factors] = dump_ptr(@a)
837
- obj[:item_factors] = dump_ptr(@b)
838
-
839
- # bias
840
- obj[:user_bias] = dump_ptr(@bias_a)
841
- obj[:item_bias] = dump_ptr(@bias_b)
842
-
843
- # mean
844
- obj[:global_mean] = @global_mean
845
-
846
- # side info
847
- obj[:user_info_map] = @user_info_map
848
- obj[:item_info_map] = @item_info_map
849
- obj[:user_info_factors] = dump_ptr(@c)
850
- obj[:item_info_factors] = dump_ptr(@d)
851
-
852
- # implicit features
853
- obj[:add_implicit_features] = @add_implicit_features
854
- obj[:user_factors_implicit] = dump_ptr(@ai)
855
- obj[:item_factors_implicit] = dump_ptr(@bi)
856
-
857
- unless @implicit
858
- obj[:min_rating] = @min_rating
859
- obj[:max_rating] = @max_rating
860
- end
861
-
862
- obj[:user_means] = dump_ptr(@u_colmeans)
863
-
864
- obj
865
- end
866
-
867
- def marshal_load(obj)
868
- @implicit = obj[:implicit]
869
-
870
- # options
871
- set_params(
872
- k: obj[:factors],
873
- niter: obj[:epochs],
874
- verbose: obj[:verbose],
875
- user_bias: !obj[:user_bias].nil?,
876
- item_bias: !obj[:item_bias].nil?,
877
- add_implicit_features: obj[:add_implicit_features]
878
- )
879
-
880
- # factors
881
- @user_map = obj[:user_map]
882
- @item_map = obj[:item_map]
883
- @rated = obj[:rated] || {}
884
- @a = load_ptr(obj[:user_factors])
885
- @b = load_ptr(obj[:item_factors])
886
-
887
- # bias
888
- @bias_a = load_ptr(obj[:user_bias])
889
- @bias_b = load_ptr(obj[:item_bias])
890
-
891
- # mean
892
- @global_mean = obj[:global_mean]
893
-
894
- # side info
895
- @user_info_map = obj[:user_info_map]
896
- @item_info_map = obj[:item_info_map]
897
- @c = load_ptr(obj[:user_info_factors])
898
- @d = load_ptr(obj[:item_info_factors])
899
-
900
- # implicit features
901
- @add_implicit_features = obj[:add_implicit_features]
902
- @ai = load_ptr(obj[:user_factors_implicit])
903
- @bi = load_ptr(obj[:item_factors_implicit])
904
-
905
- unless @implicit
906
- @min_rating = obj[:min_rating]
907
- @max_rating = obj[:max_rating]
908
- end
909
-
910
- @u_colmeans = load_ptr(obj[:user_means])
911
-
912
- @m = @user_map.size
913
- @n = @item_map.size
914
- @m_u = @user_info_map.size
915
- @n_i = @item_info_map.size
916
-
917
- set_implicit_vars if @implicit
918
-
919
- @fit = @m > 0
920
- end
921
-
922
814
  def json_dump_ptr(ptr)
923
815
  Base64.strict_encode64(ptr.to_s(ptr.size)) if ptr
924
816
  end
@@ -1,3 +1,3 @@
1
1
  module Cmfrec
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmfrec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-11 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2024-10-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: fiddle
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  description:
14
28
  email: andrew@ankane.org
15
29
  executables: []
@@ -45,14 +59,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
45
59
  requirements:
46
60
  - - ">="
47
61
  - !ruby/object:Gem::Version
48
- version: '2.7'
62
+ version: '3.1'
49
63
  required_rubygems_version: !ruby/object:Gem::Requirement
50
64
  requirements:
51
65
  - - ">="
52
66
  - !ruby/object:Gem::Version
53
67
  version: '0'
54
68
  requirements: []
55
- rubygems_version: 3.3.7
69
+ rubygems_version: 3.5.16
56
70
  signing_key:
57
71
  specification_version: 4
58
72
  summary: Recommendations for Ruby using collective matrix factorization