cmfrec 0.2.1 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3bd946bc2c7425ba3550a9cd5bf346b0cac6de5597d0d38ff0dfb49db32e754d
4
- data.tar.gz: 20038dc0c401389d75dc3a35415919f6721dab7e32459eeb34c9dcb23569c49a
3
+ metadata.gz: c549b3c94d2b80e94938f61694636fc35b85bb0217bec39d5234f81dbba1684b
4
+ data.tar.gz: e953ddaa00c5ba4b13838300aad880175db5d2c1531bf284866348b69ff2075b
5
5
  SHA512:
6
- metadata.gz: 1d0019e89fe0ca946cd83d60c052bff2e3dcc3990e3bf37de92b620870b6bf8a31010aec87bb2e5a352c446441b683f0148da83497b5d44ef89d62396f4b7d3c
7
- data.tar.gz: f4d65f294b9a313c2c86111eaf66e734999924392fedc4715c51919a4ca4de3864ba3eec732f3d707dd06ea62ddb931d9b28b8436a09a807c01e14ac2c542e44
6
+ metadata.gz: 53e65f7b6ab488238713ff468c438cf45ecdbe85608c505fe4a66f140486b279785355fbf4aaedd72f6c4d6bc5fad502c97d7d592c24c4952520186db83adef1
7
+ data.tar.gz: 4dd240635c221c6eada3c66f9fa244da90c342655ebcdf98731fbd805bec1015a51c72fdb87a1c6423b0994ccd9d4729a784d6af603a2f6154aa1178ef37bf6b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## 0.3.1 (2024-12-29)
2
+
3
+ - Removed dependency on `base64` gem for serialization
4
+
5
+ ## 0.3.0 (2024-10-23)
6
+
7
+ - Changed dataset directory to match XDG Base Directory Specification
8
+ - Removed dependency on `csv` gem for `load_movielens`
9
+ - Dropped support for marshal serialization
10
+ - Dropped support for Ruby < 3.1
11
+
1
12
  ## 0.2.1 (2022-07-11)
2
13
 
3
14
  - Added support for JSON serialization
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) 2020 David Cortes
4
- Copyright (c) 2020-2021 Andrew Kane
4
+ Copyright (c) 2020-2024 Andrew Kane
5
5
 
6
6
  All rights reserved.
7
7
 
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  - Works with explicit and implicit feedback
7
7
  - Uses high-performance matrix factorization
8
8
 
9
- [![Build Status](https://github.com/ankane/cmfrec-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/cmfrec-ruby/actions)
9
+ [![Build Status](https://github.com/ankane/cmfrec-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/cmfrec-ruby/actions)
10
10
 
11
11
  ## Installation
12
12
 
data/lib/cmfrec/data.rb CHANGED
@@ -1,8 +1,6 @@
1
1
  module Cmfrec
2
2
  module Data
3
3
  def load_movielens
4
- require "csv"
5
-
6
4
  data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
7
5
  file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
8
6
  user_path = download_file("ml-100k/u.user", "https://files.grouplens.org/datasets/movielens/ml-100k/u.user",
@@ -10,11 +8,9 @@ module Cmfrec
10
8
  item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
11
9
  file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
12
10
 
13
- # convert u.item to utf-8
14
- movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
15
-
16
11
  user_info = []
17
- CSV.foreach(user_path, col_sep: "|") do |row|
12
+ File.foreach(user_path) do |line|
13
+ row = line.split("|")
18
14
  user = {user_id: row[0].to_i}
19
15
  10.times do |i|
20
16
  user[:"region#{i}"] = row[4][0] == i.to_s ? 1 : 0
@@ -26,26 +22,28 @@ module Cmfrec
26
22
  movies = {}
27
23
  movie_names = {}
28
24
  genres = %w(unknown action adventure animation childrens comedy crime documentary drama fantasy filmnoir horror musical mystery romance scifi thriller war western)
29
- CSV.parse(movies_str, col_sep: "|", converters: [:numeric]) do |row|
25
+ File.foreach(item_path) do |line|
26
+ row = line.encode("UTF-8", "ISO-8859-1").split("|")
30
27
  movies[row[0]] = row[1]
31
28
 
32
29
  # filter duplicates
33
30
  next if movie_names[row[1]]
34
31
  movie_names[row[1]] = true
35
32
 
36
- item = {item_id: row[1], year: row[2] ? Date.parse(row[2]).year : 1970}
33
+ item = {item_id: row[1], year: !row[2].empty? ? Date.parse(row[2]).year : 1970}
37
34
  genres.each_with_index do |genre, i|
38
- item[:"genre_#{genre}"] = row[i + 5]
35
+ item[:"genre_#{genre}"] = row[i + 5].to_i
39
36
  end
40
37
  item_info << item
41
38
  end
42
39
 
43
40
  data = []
44
- CSV.foreach(data_path, col_sep: "\t", converters: [:numeric]) do |row|
41
+ File.foreach(data_path) do |line|
42
+ row = line.split("\t")
45
43
  data << {
46
- user_id: row[0],
44
+ user_id: row[0].to_i,
47
45
  item_id: movies[row[1]],
48
- rating: row[2]
46
+ rating: row[2].to_i
49
47
  }
50
48
  end
51
49
 
@@ -60,9 +58,8 @@ module Cmfrec
60
58
  require "net/http"
61
59
  require "tmpdir"
62
60
 
63
- # TODO handle this better
64
- raise "No HOME" unless ENV["HOME"]
65
- dest = "#{ENV["HOME"]}/.cmfrec/#{fname}"
61
+ cache_home = ENV["XDG_CACHE_HOME"] || "#{ENV.fetch("HOME")}/.cache"
62
+ dest = "#{cache_home}/cmfrec/#{fname}"
66
63
  FileUtils.mkdir_p(File.dirname(dest))
67
64
 
68
65
  return dest if File.exist?(dest)
@@ -250,7 +250,6 @@ module Cmfrec
250
250
  end
251
251
 
252
252
  def to_json
253
- require "base64"
254
253
  require "json"
255
254
 
256
255
  obj = {
@@ -514,7 +513,7 @@ module Cmfrec
514
513
  nil, #precomputedBiTBi,
515
514
  nil, #precomputedTransCtCinvCt,
516
515
  nil, #precomputedCtCw
517
- nil, #precomputedCtUbias
516
+ nil #precomputedCtUbias
518
517
  ]
519
518
  check_status FFI.fit_collective_explicit_als(*fiddle_args(args))
520
519
 
@@ -811,125 +810,15 @@ module Cmfrec
811
810
  @finalize_chol = false
812
811
  end
813
812
 
814
- def dump_ptr(ptr)
815
- ptr.to_s(ptr.size) if ptr
816
- end
817
-
818
- def load_ptr(str)
819
- Fiddle::Pointer[str] if str
820
- end
821
-
822
- def marshal_dump
823
- obj = {
824
- implicit: @implicit
825
- }
826
-
827
- # options
828
- obj[:factors] = @k
829
- obj[:epochs] = @niter
830
- obj[:verbose] = @verbose
831
-
832
- # factors
833
- obj[:user_map] = @user_map
834
- obj[:item_map] = @item_map
835
- obj[:rated] = @rated
836
- obj[:user_factors] = dump_ptr(@a)
837
- obj[:item_factors] = dump_ptr(@b)
838
-
839
- # bias
840
- obj[:user_bias] = dump_ptr(@bias_a)
841
- obj[:item_bias] = dump_ptr(@bias_b)
842
-
843
- # mean
844
- obj[:global_mean] = @global_mean
845
-
846
- # side info
847
- obj[:user_info_map] = @user_info_map
848
- obj[:item_info_map] = @item_info_map
849
- obj[:user_info_factors] = dump_ptr(@c)
850
- obj[:item_info_factors] = dump_ptr(@d)
851
-
852
- # implicit features
853
- obj[:add_implicit_features] = @add_implicit_features
854
- obj[:user_factors_implicit] = dump_ptr(@ai)
855
- obj[:item_factors_implicit] = dump_ptr(@bi)
856
-
857
- unless @implicit
858
- obj[:min_rating] = @min_rating
859
- obj[:max_rating] = @max_rating
860
- end
861
-
862
- obj[:user_means] = dump_ptr(@u_colmeans)
863
-
864
- obj
865
- end
866
-
867
- def marshal_load(obj)
868
- @implicit = obj[:implicit]
869
-
870
- # options
871
- set_params(
872
- k: obj[:factors],
873
- niter: obj[:epochs],
874
- verbose: obj[:verbose],
875
- user_bias: !obj[:user_bias].nil?,
876
- item_bias: !obj[:item_bias].nil?,
877
- add_implicit_features: obj[:add_implicit_features]
878
- )
879
-
880
- # factors
881
- @user_map = obj[:user_map]
882
- @item_map = obj[:item_map]
883
- @rated = obj[:rated] || {}
884
- @a = load_ptr(obj[:user_factors])
885
- @b = load_ptr(obj[:item_factors])
886
-
887
- # bias
888
- @bias_a = load_ptr(obj[:user_bias])
889
- @bias_b = load_ptr(obj[:item_bias])
890
-
891
- # mean
892
- @global_mean = obj[:global_mean]
893
-
894
- # side info
895
- @user_info_map = obj[:user_info_map]
896
- @item_info_map = obj[:item_info_map]
897
- @c = load_ptr(obj[:user_info_factors])
898
- @d = load_ptr(obj[:item_info_factors])
899
-
900
- # implicit features
901
- @add_implicit_features = obj[:add_implicit_features]
902
- @ai = load_ptr(obj[:user_factors_implicit])
903
- @bi = load_ptr(obj[:item_factors_implicit])
904
-
905
- unless @implicit
906
- @min_rating = obj[:min_rating]
907
- @max_rating = obj[:max_rating]
908
- end
909
-
910
- @u_colmeans = load_ptr(obj[:user_means])
911
-
912
- @m = @user_map.size
913
- @n = @item_map.size
914
- @m_u = @user_info_map.size
915
- @n_i = @item_info_map.size
916
-
917
- set_implicit_vars if @implicit
918
-
919
- @fit = @m > 0
920
- end
921
-
922
813
  def json_dump_ptr(ptr)
923
- Base64.strict_encode64(ptr.to_s(ptr.size)) if ptr
814
+ [ptr.to_s(ptr.size)].pack("m0") if ptr
924
815
  end
925
816
 
926
817
  def json_load_ptr(str)
927
- Fiddle::Pointer[Base64.strict_decode64(str)] if str
818
+ Fiddle::Pointer[str.unpack1("m0")] if str
928
819
  end
929
820
 
930
821
  def json_load(obj)
931
- require "base64"
932
-
933
822
  @implicit = obj["implicit"]
934
823
 
935
824
  # options
@@ -1,3 +1,3 @@
1
1
  module Cmfrec
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.1"
3
3
  end
metadata CHANGED
@@ -1,16 +1,28 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmfrec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2022-07-11 00:00:00.000000000 Z
12
- dependencies: []
13
- description:
10
+ date: 2024-12-30 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: fiddle
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
14
26
  email: andrew@ankane.org
15
27
  executables: []
16
28
  extensions: []
@@ -37,7 +49,6 @@ homepage: https://github.com/ankane/cmfrec-ruby
37
49
  licenses:
38
50
  - MIT
39
51
  metadata: {}
40
- post_install_message:
41
52
  rdoc_options: []
42
53
  require_paths:
43
54
  - lib
@@ -45,15 +56,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
45
56
  requirements:
46
57
  - - ">="
47
58
  - !ruby/object:Gem::Version
48
- version: '2.7'
59
+ version: '3.1'
49
60
  required_rubygems_version: !ruby/object:Gem::Requirement
50
61
  requirements:
51
62
  - - ">="
52
63
  - !ruby/object:Gem::Version
53
64
  version: '0'
54
65
  requirements: []
55
- rubygems_version: 3.3.7
56
- signing_key:
66
+ rubygems_version: 3.6.2
57
67
  specification_version: 4
58
68
  summary: Recommendations for Ruby using collective matrix factorization
59
69
  test_files: []