cmfrec 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3bd946bc2c7425ba3550a9cd5bf346b0cac6de5597d0d38ff0dfb49db32e754d
4
- data.tar.gz: 20038dc0c401389d75dc3a35415919f6721dab7e32459eeb34c9dcb23569c49a
3
+ metadata.gz: c549b3c94d2b80e94938f61694636fc35b85bb0217bec39d5234f81dbba1684b
4
+ data.tar.gz: e953ddaa00c5ba4b13838300aad880175db5d2c1531bf284866348b69ff2075b
5
5
  SHA512:
6
- metadata.gz: 1d0019e89fe0ca946cd83d60c052bff2e3dcc3990e3bf37de92b620870b6bf8a31010aec87bb2e5a352c446441b683f0148da83497b5d44ef89d62396f4b7d3c
7
- data.tar.gz: f4d65f294b9a313c2c86111eaf66e734999924392fedc4715c51919a4ca4de3864ba3eec732f3d707dd06ea62ddb931d9b28b8436a09a807c01e14ac2c542e44
6
+ metadata.gz: 53e65f7b6ab488238713ff468c438cf45ecdbe85608c505fe4a66f140486b279785355fbf4aaedd72f6c4d6bc5fad502c97d7d592c24c4952520186db83adef1
7
+ data.tar.gz: 4dd240635c221c6eada3c66f9fa244da90c342655ebcdf98731fbd805bec1015a51c72fdb87a1c6423b0994ccd9d4729a784d6af603a2f6154aa1178ef37bf6b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## 0.3.1 (2024-12-29)
2
+
3
+ - Removed dependency on `base64` gem for serialization
4
+
5
+ ## 0.3.0 (2024-10-23)
6
+
7
+ - Changed dataset directory to match XDG Base Directory Specification
8
+ - Removed dependency on `csv` gem for `load_movielens`
9
+ - Dropped support for marshal serialization
10
+ - Dropped support for Ruby < 3.1
11
+
1
12
  ## 0.2.1 (2022-07-11)
2
13
 
3
14
  - Added support for JSON serialization
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) 2020 David Cortes
4
- Copyright (c) 2020-2021 Andrew Kane
4
+ Copyright (c) 2020-2024 Andrew Kane
5
5
 
6
6
  All rights reserved.
7
7
 
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  - Works with explicit and implicit feedback
7
7
  - Uses high-performance matrix factorization
8
8
 
9
- [![Build Status](https://github.com/ankane/cmfrec-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/cmfrec-ruby/actions)
9
+ [![Build Status](https://github.com/ankane/cmfrec-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/cmfrec-ruby/actions)
10
10
 
11
11
  ## Installation
12
12
 
data/lib/cmfrec/data.rb CHANGED
@@ -1,8 +1,6 @@
1
1
  module Cmfrec
2
2
  module Data
3
3
  def load_movielens
4
- require "csv"
5
-
6
4
  data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
7
5
  file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
8
6
  user_path = download_file("ml-100k/u.user", "https://files.grouplens.org/datasets/movielens/ml-100k/u.user",
@@ -10,11 +8,9 @@ module Cmfrec
10
8
  item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
11
9
  file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
12
10
 
13
- # convert u.item to utf-8
14
- movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
15
-
16
11
  user_info = []
17
- CSV.foreach(user_path, col_sep: "|") do |row|
12
+ File.foreach(user_path) do |line|
13
+ row = line.split("|")
18
14
  user = {user_id: row[0].to_i}
19
15
  10.times do |i|
20
16
  user[:"region#{i}"] = row[4][0] == i.to_s ? 1 : 0
@@ -26,26 +22,28 @@ module Cmfrec
26
22
  movies = {}
27
23
  movie_names = {}
28
24
  genres = %w(unknown action adventure animation childrens comedy crime documentary drama fantasy filmnoir horror musical mystery romance scifi thriller war western)
29
- CSV.parse(movies_str, col_sep: "|", converters: [:numeric]) do |row|
25
+ File.foreach(item_path) do |line|
26
+ row = line.encode("UTF-8", "ISO-8859-1").split("|")
30
27
  movies[row[0]] = row[1]
31
28
 
32
29
  # filter duplicates
33
30
  next if movie_names[row[1]]
34
31
  movie_names[row[1]] = true
35
32
 
36
- item = {item_id: row[1], year: row[2] ? Date.parse(row[2]).year : 1970}
33
+ item = {item_id: row[1], year: !row[2].empty? ? Date.parse(row[2]).year : 1970}
37
34
  genres.each_with_index do |genre, i|
38
- item[:"genre_#{genre}"] = row[i + 5]
35
+ item[:"genre_#{genre}"] = row[i + 5].to_i
39
36
  end
40
37
  item_info << item
41
38
  end
42
39
 
43
40
  data = []
44
- CSV.foreach(data_path, col_sep: "\t", converters: [:numeric]) do |row|
41
+ File.foreach(data_path) do |line|
42
+ row = line.split("\t")
45
43
  data << {
46
- user_id: row[0],
44
+ user_id: row[0].to_i,
47
45
  item_id: movies[row[1]],
48
- rating: row[2]
46
+ rating: row[2].to_i
49
47
  }
50
48
  end
51
49
 
@@ -60,9 +58,8 @@ module Cmfrec
60
58
  require "net/http"
61
59
  require "tmpdir"
62
60
 
63
- # TODO handle this better
64
- raise "No HOME" unless ENV["HOME"]
65
- dest = "#{ENV["HOME"]}/.cmfrec/#{fname}"
61
+ cache_home = ENV["XDG_CACHE_HOME"] || "#{ENV.fetch("HOME")}/.cache"
62
+ dest = "#{cache_home}/cmfrec/#{fname}"
66
63
  FileUtils.mkdir_p(File.dirname(dest))
67
64
 
68
65
  return dest if File.exist?(dest)
@@ -250,7 +250,6 @@ module Cmfrec
250
250
  end
251
251
 
252
252
  def to_json
253
- require "base64"
254
253
  require "json"
255
254
 
256
255
  obj = {
@@ -514,7 +513,7 @@ module Cmfrec
514
513
  nil, #precomputedBiTBi,
515
514
  nil, #precomputedTransCtCinvCt,
516
515
  nil, #precomputedCtCw
517
- nil, #precomputedCtUbias
516
+ nil #precomputedCtUbias
518
517
  ]
519
518
  check_status FFI.fit_collective_explicit_als(*fiddle_args(args))
520
519
 
@@ -811,125 +810,15 @@ module Cmfrec
811
810
  @finalize_chol = false
812
811
  end
813
812
 
814
- def dump_ptr(ptr)
815
- ptr.to_s(ptr.size) if ptr
816
- end
817
-
818
- def load_ptr(str)
819
- Fiddle::Pointer[str] if str
820
- end
821
-
822
- def marshal_dump
823
- obj = {
824
- implicit: @implicit
825
- }
826
-
827
- # options
828
- obj[:factors] = @k
829
- obj[:epochs] = @niter
830
- obj[:verbose] = @verbose
831
-
832
- # factors
833
- obj[:user_map] = @user_map
834
- obj[:item_map] = @item_map
835
- obj[:rated] = @rated
836
- obj[:user_factors] = dump_ptr(@a)
837
- obj[:item_factors] = dump_ptr(@b)
838
-
839
- # bias
840
- obj[:user_bias] = dump_ptr(@bias_a)
841
- obj[:item_bias] = dump_ptr(@bias_b)
842
-
843
- # mean
844
- obj[:global_mean] = @global_mean
845
-
846
- # side info
847
- obj[:user_info_map] = @user_info_map
848
- obj[:item_info_map] = @item_info_map
849
- obj[:user_info_factors] = dump_ptr(@c)
850
- obj[:item_info_factors] = dump_ptr(@d)
851
-
852
- # implicit features
853
- obj[:add_implicit_features] = @add_implicit_features
854
- obj[:user_factors_implicit] = dump_ptr(@ai)
855
- obj[:item_factors_implicit] = dump_ptr(@bi)
856
-
857
- unless @implicit
858
- obj[:min_rating] = @min_rating
859
- obj[:max_rating] = @max_rating
860
- end
861
-
862
- obj[:user_means] = dump_ptr(@u_colmeans)
863
-
864
- obj
865
- end
866
-
867
- def marshal_load(obj)
868
- @implicit = obj[:implicit]
869
-
870
- # options
871
- set_params(
872
- k: obj[:factors],
873
- niter: obj[:epochs],
874
- verbose: obj[:verbose],
875
- user_bias: !obj[:user_bias].nil?,
876
- item_bias: !obj[:item_bias].nil?,
877
- add_implicit_features: obj[:add_implicit_features]
878
- )
879
-
880
- # factors
881
- @user_map = obj[:user_map]
882
- @item_map = obj[:item_map]
883
- @rated = obj[:rated] || {}
884
- @a = load_ptr(obj[:user_factors])
885
- @b = load_ptr(obj[:item_factors])
886
-
887
- # bias
888
- @bias_a = load_ptr(obj[:user_bias])
889
- @bias_b = load_ptr(obj[:item_bias])
890
-
891
- # mean
892
- @global_mean = obj[:global_mean]
893
-
894
- # side info
895
- @user_info_map = obj[:user_info_map]
896
- @item_info_map = obj[:item_info_map]
897
- @c = load_ptr(obj[:user_info_factors])
898
- @d = load_ptr(obj[:item_info_factors])
899
-
900
- # implicit features
901
- @add_implicit_features = obj[:add_implicit_features]
902
- @ai = load_ptr(obj[:user_factors_implicit])
903
- @bi = load_ptr(obj[:item_factors_implicit])
904
-
905
- unless @implicit
906
- @min_rating = obj[:min_rating]
907
- @max_rating = obj[:max_rating]
908
- end
909
-
910
- @u_colmeans = load_ptr(obj[:user_means])
911
-
912
- @m = @user_map.size
913
- @n = @item_map.size
914
- @m_u = @user_info_map.size
915
- @n_i = @item_info_map.size
916
-
917
- set_implicit_vars if @implicit
918
-
919
- @fit = @m > 0
920
- end
921
-
922
813
  def json_dump_ptr(ptr)
923
- Base64.strict_encode64(ptr.to_s(ptr.size)) if ptr
814
+ [ptr.to_s(ptr.size)].pack("m0") if ptr
924
815
  end
925
816
 
926
817
  def json_load_ptr(str)
927
- Fiddle::Pointer[Base64.strict_decode64(str)] if str
818
+ Fiddle::Pointer[str.unpack1("m0")] if str
928
819
  end
929
820
 
930
821
  def json_load(obj)
931
- require "base64"
932
-
933
822
  @implicit = obj["implicit"]
934
823
 
935
824
  # options
@@ -1,3 +1,3 @@
1
1
  module Cmfrec
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.1"
3
3
  end
metadata CHANGED
@@ -1,16 +1,28 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmfrec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2022-07-11 00:00:00.000000000 Z
12
- dependencies: []
13
- description:
10
+ date: 2024-12-30 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: fiddle
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
14
26
  email: andrew@ankane.org
15
27
  executables: []
16
28
  extensions: []
@@ -37,7 +49,6 @@ homepage: https://github.com/ankane/cmfrec-ruby
37
49
  licenses:
38
50
  - MIT
39
51
  metadata: {}
40
- post_install_message:
41
52
  rdoc_options: []
42
53
  require_paths:
43
54
  - lib
@@ -45,15 +56,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
45
56
  requirements:
46
57
  - - ">="
47
58
  - !ruby/object:Gem::Version
48
- version: '2.7'
59
+ version: '3.1'
49
60
  required_rubygems_version: !ruby/object:Gem::Requirement
50
61
  requirements:
51
62
  - - ">="
52
63
  - !ruby/object:Gem::Version
53
64
  version: '0'
54
65
  requirements: []
55
- rubygems_version: 3.3.7
56
- signing_key:
66
+ rubygems_version: 3.6.2
57
67
  specification_version: 4
58
68
  summary: Recommendations for Ruby using collective matrix factorization
59
69
  test_files: []