cmfrec 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3bd946bc2c7425ba3550a9cd5bf346b0cac6de5597d0d38ff0dfb49db32e754d
4
- data.tar.gz: 20038dc0c401389d75dc3a35415919f6721dab7e32459eeb34c9dcb23569c49a
3
+ metadata.gz: d3cc7d70530eefc7b13324753b454d03573da7d832c46cb4dee2ab9213eafcdd
4
+ data.tar.gz: 662af2ec4ab1a1bd33a39c18773d0c48967422286a2fa137960772b7e67d437a
5
5
  SHA512:
6
- metadata.gz: 1d0019e89fe0ca946cd83d60c052bff2e3dcc3990e3bf37de92b620870b6bf8a31010aec87bb2e5a352c446441b683f0148da83497b5d44ef89d62396f4b7d3c
7
- data.tar.gz: f4d65f294b9a313c2c86111eaf66e734999924392fedc4715c51919a4ca4de3864ba3eec732f3d707dd06ea62ddb931d9b28b8436a09a807c01e14ac2c542e44
6
+ metadata.gz: 8fd6f1f8f0bd7d7c870c28fb57a0cec89aacf2d27aed53b5d68fb6935f5071dbe73931a5ff776f4a864f0cc91a17c793eabfe2a2b21f9b368a4c36ada5cb929d
7
+ data.tar.gz: 116d26ddafeeb439ef0895e30805afa0d2d2a453aeb369cf7122f13f5bf3ad457dac65c974d26debc84f353baf5f0c889c559e56620ef7458af4968ee9f5262a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 0.3.0 (2024-10-23)
2
+
3
+ - Changed dataset directory to match XDG Base Directory Specification
4
+ - Removed dependency on `csv` gem for `load_movielens`
5
+ - Dropped support for marshal serialization
6
+ - Dropped support for Ruby < 3.1
7
+
1
8
  ## 0.2.1 (2022-07-11)
2
9
 
3
10
  - Added support for JSON serialization
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) 2020 David Cortes
4
- Copyright (c) 2020-2021 Andrew Kane
4
+ Copyright (c) 2020-2024 Andrew Kane
5
5
 
6
6
  All rights reserved.
7
7
 
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  - Works with explicit and implicit feedback
7
7
  - Uses high-performance matrix factorization
8
8
 
9
- [![Build Status](https://github.com/ankane/cmfrec-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/cmfrec-ruby/actions)
9
+ [![Build Status](https://github.com/ankane/cmfrec-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/cmfrec-ruby/actions)
10
10
 
11
11
  ## Installation
12
12
 
data/lib/cmfrec/data.rb CHANGED
@@ -1,8 +1,6 @@
1
1
  module Cmfrec
2
2
  module Data
3
3
  def load_movielens
4
- require "csv"
5
-
6
4
  data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
7
5
  file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
8
6
  user_path = download_file("ml-100k/u.user", "https://files.grouplens.org/datasets/movielens/ml-100k/u.user",
@@ -10,11 +8,9 @@ module Cmfrec
10
8
  item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
11
9
  file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
12
10
 
13
- # convert u.item to utf-8
14
- movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
15
-
16
11
  user_info = []
17
- CSV.foreach(user_path, col_sep: "|") do |row|
12
+ File.foreach(user_path) do |line|
13
+ row = line.split("|")
18
14
  user = {user_id: row[0].to_i}
19
15
  10.times do |i|
20
16
  user[:"region#{i}"] = row[4][0] == i.to_s ? 1 : 0
@@ -26,26 +22,28 @@ module Cmfrec
26
22
  movies = {}
27
23
  movie_names = {}
28
24
  genres = %w(unknown action adventure animation childrens comedy crime documentary drama fantasy filmnoir horror musical mystery romance scifi thriller war western)
29
- CSV.parse(movies_str, col_sep: "|", converters: [:numeric]) do |row|
25
+ File.foreach(item_path) do |line|
26
+ row = line.encode("UTF-8", "ISO-8859-1").split("|")
30
27
  movies[row[0]] = row[1]
31
28
 
32
29
  # filter duplicates
33
30
  next if movie_names[row[1]]
34
31
  movie_names[row[1]] = true
35
32
 
36
- item = {item_id: row[1], year: row[2] ? Date.parse(row[2]).year : 1970}
33
+ item = {item_id: row[1], year: !row[2].empty? ? Date.parse(row[2]).year : 1970}
37
34
  genres.each_with_index do |genre, i|
38
- item[:"genre_#{genre}"] = row[i + 5]
35
+ item[:"genre_#{genre}"] = row[i + 5].to_i
39
36
  end
40
37
  item_info << item
41
38
  end
42
39
 
43
40
  data = []
44
- CSV.foreach(data_path, col_sep: "\t", converters: [:numeric]) do |row|
41
+ File.foreach(data_path) do |line|
42
+ row = line.split("\t")
45
43
  data << {
46
- user_id: row[0],
44
+ user_id: row[0].to_i,
47
45
  item_id: movies[row[1]],
48
- rating: row[2]
46
+ rating: row[2].to_i
49
47
  }
50
48
  end
51
49
 
@@ -60,9 +58,8 @@ module Cmfrec
60
58
  require "net/http"
61
59
  require "tmpdir"
62
60
 
63
- # TODO handle this better
64
- raise "No HOME" unless ENV["HOME"]
65
- dest = "#{ENV["HOME"]}/.cmfrec/#{fname}"
61
+ cache_home = ENV["XDG_CACHE_HOME"] || "#{ENV.fetch("HOME")}/.cache"
62
+ dest = "#{cache_home}/cmfrec/#{fname}"
66
63
  FileUtils.mkdir_p(File.dirname(dest))
67
64
 
68
65
  return dest if File.exist?(dest)
@@ -514,7 +514,7 @@ module Cmfrec
514
514
  nil, #precomputedBiTBi,
515
515
  nil, #precomputedTransCtCinvCt,
516
516
  nil, #precomputedCtCw
517
- nil, #precomputedCtUbias
517
+ nil #precomputedCtUbias
518
518
  ]
519
519
  check_status FFI.fit_collective_explicit_als(*fiddle_args(args))
520
520
 
@@ -811,114 +811,6 @@ module Cmfrec
811
811
  @finalize_chol = false
812
812
  end
813
813
 
814
- def dump_ptr(ptr)
815
- ptr.to_s(ptr.size) if ptr
816
- end
817
-
818
- def load_ptr(str)
819
- Fiddle::Pointer[str] if str
820
- end
821
-
822
- def marshal_dump
823
- obj = {
824
- implicit: @implicit
825
- }
826
-
827
- # options
828
- obj[:factors] = @k
829
- obj[:epochs] = @niter
830
- obj[:verbose] = @verbose
831
-
832
- # factors
833
- obj[:user_map] = @user_map
834
- obj[:item_map] = @item_map
835
- obj[:rated] = @rated
836
- obj[:user_factors] = dump_ptr(@a)
837
- obj[:item_factors] = dump_ptr(@b)
838
-
839
- # bias
840
- obj[:user_bias] = dump_ptr(@bias_a)
841
- obj[:item_bias] = dump_ptr(@bias_b)
842
-
843
- # mean
844
- obj[:global_mean] = @global_mean
845
-
846
- # side info
847
- obj[:user_info_map] = @user_info_map
848
- obj[:item_info_map] = @item_info_map
849
- obj[:user_info_factors] = dump_ptr(@c)
850
- obj[:item_info_factors] = dump_ptr(@d)
851
-
852
- # implicit features
853
- obj[:add_implicit_features] = @add_implicit_features
854
- obj[:user_factors_implicit] = dump_ptr(@ai)
855
- obj[:item_factors_implicit] = dump_ptr(@bi)
856
-
857
- unless @implicit
858
- obj[:min_rating] = @min_rating
859
- obj[:max_rating] = @max_rating
860
- end
861
-
862
- obj[:user_means] = dump_ptr(@u_colmeans)
863
-
864
- obj
865
- end
866
-
867
- def marshal_load(obj)
868
- @implicit = obj[:implicit]
869
-
870
- # options
871
- set_params(
872
- k: obj[:factors],
873
- niter: obj[:epochs],
874
- verbose: obj[:verbose],
875
- user_bias: !obj[:user_bias].nil?,
876
- item_bias: !obj[:item_bias].nil?,
877
- add_implicit_features: obj[:add_implicit_features]
878
- )
879
-
880
- # factors
881
- @user_map = obj[:user_map]
882
- @item_map = obj[:item_map]
883
- @rated = obj[:rated] || {}
884
- @a = load_ptr(obj[:user_factors])
885
- @b = load_ptr(obj[:item_factors])
886
-
887
- # bias
888
- @bias_a = load_ptr(obj[:user_bias])
889
- @bias_b = load_ptr(obj[:item_bias])
890
-
891
- # mean
892
- @global_mean = obj[:global_mean]
893
-
894
- # side info
895
- @user_info_map = obj[:user_info_map]
896
- @item_info_map = obj[:item_info_map]
897
- @c = load_ptr(obj[:user_info_factors])
898
- @d = load_ptr(obj[:item_info_factors])
899
-
900
- # implicit features
901
- @add_implicit_features = obj[:add_implicit_features]
902
- @ai = load_ptr(obj[:user_factors_implicit])
903
- @bi = load_ptr(obj[:item_factors_implicit])
904
-
905
- unless @implicit
906
- @min_rating = obj[:min_rating]
907
- @max_rating = obj[:max_rating]
908
- end
909
-
910
- @u_colmeans = load_ptr(obj[:user_means])
911
-
912
- @m = @user_map.size
913
- @n = @item_map.size
914
- @m_u = @user_info_map.size
915
- @n_i = @item_info_map.size
916
-
917
- set_implicit_vars if @implicit
918
-
919
- @fit = @m > 0
920
- end
921
-
922
814
  def json_dump_ptr(ptr)
923
815
  Base64.strict_encode64(ptr.to_s(ptr.size)) if ptr
924
816
  end
@@ -1,3 +1,3 @@
1
1
  module Cmfrec
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmfrec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-11 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2024-10-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: fiddle
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  description:
14
28
  email: andrew@ankane.org
15
29
  executables: []
@@ -45,14 +59,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
45
59
  requirements:
46
60
  - - ">="
47
61
  - !ruby/object:Gem::Version
48
- version: '2.7'
62
+ version: '3.1'
49
63
  required_rubygems_version: !ruby/object:Gem::Requirement
50
64
  requirements:
51
65
  - - ">="
52
66
  - !ruby/object:Gem::Version
53
67
  version: '0'
54
68
  requirements: []
55
- rubygems_version: 3.3.7
69
+ rubygems_version: 3.5.16
56
70
  signing_key:
57
71
  specification_version: 4
58
72
  summary: Recommendations for Ruby using collective matrix factorization