cmfrec 0.2.1 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/lib/cmfrec/data.rb +12 -15
- data/lib/cmfrec/recommender.rb +3 -114
- data/lib/cmfrec/version.rb +1 -1
- metadata +19 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c549b3c94d2b80e94938f61694636fc35b85bb0217bec39d5234f81dbba1684b
|
4
|
+
data.tar.gz: e953ddaa00c5ba4b13838300aad880175db5d2c1531bf284866348b69ff2075b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 53e65f7b6ab488238713ff468c438cf45ecdbe85608c505fe4a66f140486b279785355fbf4aaedd72f6c4d6bc5fad502c97d7d592c24c4952520186db83adef1
|
7
|
+
data.tar.gz: 4dd240635c221c6eada3c66f9fa244da90c342655ebcdf98731fbd805bec1015a51c72fdb87a1c6423b0994ccd9d4729a784d6af603a2f6154aa1178ef37bf6b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
## 0.3.1 (2024-12-29)
|
2
|
+
|
3
|
+
- Removed dependency on `base64` gem for serialization
|
4
|
+
|
5
|
+
## 0.3.0 (2024-10-23)
|
6
|
+
|
7
|
+
- Changed dataset directory to match XDG Base Directory Specification
|
8
|
+
- Removed dependency on `csv` gem for `load_movielens`
|
9
|
+
- Dropped support for marshal serialization
|
10
|
+
- Dropped support for Ruby < 3.1
|
11
|
+
|
1
12
|
## 0.2.1 (2022-07-11)
|
2
13
|
|
3
14
|
- Added support for JSON serialization
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
- Works with explicit and implicit feedback
|
7
7
|
- Uses high-performance matrix factorization
|
8
8
|
|
9
|
-
[![Build Status](https://github.com/ankane/cmfrec-ruby/workflows/build/badge.svg
|
9
|
+
[![Build Status](https://github.com/ankane/cmfrec-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/cmfrec-ruby/actions)
|
10
10
|
|
11
11
|
## Installation
|
12
12
|
|
data/lib/cmfrec/data.rb
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
module Cmfrec
|
2
2
|
module Data
|
3
3
|
def load_movielens
|
4
|
-
require "csv"
|
5
|
-
|
6
4
|
data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
|
7
5
|
file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
|
8
6
|
user_path = download_file("ml-100k/u.user", "https://files.grouplens.org/datasets/movielens/ml-100k/u.user",
|
@@ -10,11 +8,9 @@ module Cmfrec
|
|
10
8
|
item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
|
11
9
|
file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
|
12
10
|
|
13
|
-
# convert u.item to utf-8
|
14
|
-
movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
|
15
|
-
|
16
11
|
user_info = []
|
17
|
-
|
12
|
+
File.foreach(user_path) do |line|
|
13
|
+
row = line.split("|")
|
18
14
|
user = {user_id: row[0].to_i}
|
19
15
|
10.times do |i|
|
20
16
|
user[:"region#{i}"] = row[4][0] == i.to_s ? 1 : 0
|
@@ -26,26 +22,28 @@ module Cmfrec
|
|
26
22
|
movies = {}
|
27
23
|
movie_names = {}
|
28
24
|
genres = %w(unknown action adventure animation childrens comedy crime documentary drama fantasy filmnoir horror musical mystery romance scifi thriller war western)
|
29
|
-
|
25
|
+
File.foreach(item_path) do |line|
|
26
|
+
row = line.encode("UTF-8", "ISO-8859-1").split("|")
|
30
27
|
movies[row[0]] = row[1]
|
31
28
|
|
32
29
|
# filter duplicates
|
33
30
|
next if movie_names[row[1]]
|
34
31
|
movie_names[row[1]] = true
|
35
32
|
|
36
|
-
item = {item_id: row[1], year: row[2] ? Date.parse(row[2]).year : 1970}
|
33
|
+
item = {item_id: row[1], year: !row[2].empty? ? Date.parse(row[2]).year : 1970}
|
37
34
|
genres.each_with_index do |genre, i|
|
38
|
-
item[:"genre_#{genre}"] = row[i + 5]
|
35
|
+
item[:"genre_#{genre}"] = row[i + 5].to_i
|
39
36
|
end
|
40
37
|
item_info << item
|
41
38
|
end
|
42
39
|
|
43
40
|
data = []
|
44
|
-
|
41
|
+
File.foreach(data_path) do |line|
|
42
|
+
row = line.split("\t")
|
45
43
|
data << {
|
46
|
-
user_id: row[0],
|
44
|
+
user_id: row[0].to_i,
|
47
45
|
item_id: movies[row[1]],
|
48
|
-
rating: row[2]
|
46
|
+
rating: row[2].to_i
|
49
47
|
}
|
50
48
|
end
|
51
49
|
|
@@ -60,9 +58,8 @@ module Cmfrec
|
|
60
58
|
require "net/http"
|
61
59
|
require "tmpdir"
|
62
60
|
|
63
|
-
|
64
|
-
|
65
|
-
dest = "#{ENV["HOME"]}/.cmfrec/#{fname}"
|
61
|
+
cache_home = ENV["XDG_CACHE_HOME"] || "#{ENV.fetch("HOME")}/.cache"
|
62
|
+
dest = "#{cache_home}/cmfrec/#{fname}"
|
66
63
|
FileUtils.mkdir_p(File.dirname(dest))
|
67
64
|
|
68
65
|
return dest if File.exist?(dest)
|
data/lib/cmfrec/recommender.rb
CHANGED
@@ -250,7 +250,6 @@ module Cmfrec
|
|
250
250
|
end
|
251
251
|
|
252
252
|
def to_json
|
253
|
-
require "base64"
|
254
253
|
require "json"
|
255
254
|
|
256
255
|
obj = {
|
@@ -514,7 +513,7 @@ module Cmfrec
|
|
514
513
|
nil, #precomputedBiTBi,
|
515
514
|
nil, #precomputedTransCtCinvCt,
|
516
515
|
nil, #precomputedCtCw
|
517
|
-
nil
|
516
|
+
nil #precomputedCtUbias
|
518
517
|
]
|
519
518
|
check_status FFI.fit_collective_explicit_als(*fiddle_args(args))
|
520
519
|
|
@@ -811,125 +810,15 @@ module Cmfrec
|
|
811
810
|
@finalize_chol = false
|
812
811
|
end
|
813
812
|
|
814
|
-
def dump_ptr(ptr)
|
815
|
-
ptr.to_s(ptr.size) if ptr
|
816
|
-
end
|
817
|
-
|
818
|
-
def load_ptr(str)
|
819
|
-
Fiddle::Pointer[str] if str
|
820
|
-
end
|
821
|
-
|
822
|
-
def marshal_dump
|
823
|
-
obj = {
|
824
|
-
implicit: @implicit
|
825
|
-
}
|
826
|
-
|
827
|
-
# options
|
828
|
-
obj[:factors] = @k
|
829
|
-
obj[:epochs] = @niter
|
830
|
-
obj[:verbose] = @verbose
|
831
|
-
|
832
|
-
# factors
|
833
|
-
obj[:user_map] = @user_map
|
834
|
-
obj[:item_map] = @item_map
|
835
|
-
obj[:rated] = @rated
|
836
|
-
obj[:user_factors] = dump_ptr(@a)
|
837
|
-
obj[:item_factors] = dump_ptr(@b)
|
838
|
-
|
839
|
-
# bias
|
840
|
-
obj[:user_bias] = dump_ptr(@bias_a)
|
841
|
-
obj[:item_bias] = dump_ptr(@bias_b)
|
842
|
-
|
843
|
-
# mean
|
844
|
-
obj[:global_mean] = @global_mean
|
845
|
-
|
846
|
-
# side info
|
847
|
-
obj[:user_info_map] = @user_info_map
|
848
|
-
obj[:item_info_map] = @item_info_map
|
849
|
-
obj[:user_info_factors] = dump_ptr(@c)
|
850
|
-
obj[:item_info_factors] = dump_ptr(@d)
|
851
|
-
|
852
|
-
# implicit features
|
853
|
-
obj[:add_implicit_features] = @add_implicit_features
|
854
|
-
obj[:user_factors_implicit] = dump_ptr(@ai)
|
855
|
-
obj[:item_factors_implicit] = dump_ptr(@bi)
|
856
|
-
|
857
|
-
unless @implicit
|
858
|
-
obj[:min_rating] = @min_rating
|
859
|
-
obj[:max_rating] = @max_rating
|
860
|
-
end
|
861
|
-
|
862
|
-
obj[:user_means] = dump_ptr(@u_colmeans)
|
863
|
-
|
864
|
-
obj
|
865
|
-
end
|
866
|
-
|
867
|
-
def marshal_load(obj)
|
868
|
-
@implicit = obj[:implicit]
|
869
|
-
|
870
|
-
# options
|
871
|
-
set_params(
|
872
|
-
k: obj[:factors],
|
873
|
-
niter: obj[:epochs],
|
874
|
-
verbose: obj[:verbose],
|
875
|
-
user_bias: !obj[:user_bias].nil?,
|
876
|
-
item_bias: !obj[:item_bias].nil?,
|
877
|
-
add_implicit_features: obj[:add_implicit_features]
|
878
|
-
)
|
879
|
-
|
880
|
-
# factors
|
881
|
-
@user_map = obj[:user_map]
|
882
|
-
@item_map = obj[:item_map]
|
883
|
-
@rated = obj[:rated] || {}
|
884
|
-
@a = load_ptr(obj[:user_factors])
|
885
|
-
@b = load_ptr(obj[:item_factors])
|
886
|
-
|
887
|
-
# bias
|
888
|
-
@bias_a = load_ptr(obj[:user_bias])
|
889
|
-
@bias_b = load_ptr(obj[:item_bias])
|
890
|
-
|
891
|
-
# mean
|
892
|
-
@global_mean = obj[:global_mean]
|
893
|
-
|
894
|
-
# side info
|
895
|
-
@user_info_map = obj[:user_info_map]
|
896
|
-
@item_info_map = obj[:item_info_map]
|
897
|
-
@c = load_ptr(obj[:user_info_factors])
|
898
|
-
@d = load_ptr(obj[:item_info_factors])
|
899
|
-
|
900
|
-
# implicit features
|
901
|
-
@add_implicit_features = obj[:add_implicit_features]
|
902
|
-
@ai = load_ptr(obj[:user_factors_implicit])
|
903
|
-
@bi = load_ptr(obj[:item_factors_implicit])
|
904
|
-
|
905
|
-
unless @implicit
|
906
|
-
@min_rating = obj[:min_rating]
|
907
|
-
@max_rating = obj[:max_rating]
|
908
|
-
end
|
909
|
-
|
910
|
-
@u_colmeans = load_ptr(obj[:user_means])
|
911
|
-
|
912
|
-
@m = @user_map.size
|
913
|
-
@n = @item_map.size
|
914
|
-
@m_u = @user_info_map.size
|
915
|
-
@n_i = @item_info_map.size
|
916
|
-
|
917
|
-
set_implicit_vars if @implicit
|
918
|
-
|
919
|
-
@fit = @m > 0
|
920
|
-
end
|
921
|
-
|
922
813
|
def json_dump_ptr(ptr)
|
923
|
-
|
814
|
+
[ptr.to_s(ptr.size)].pack("m0") if ptr
|
924
815
|
end
|
925
816
|
|
926
817
|
def json_load_ptr(str)
|
927
|
-
Fiddle::Pointer[
|
818
|
+
Fiddle::Pointer[str.unpack1("m0")] if str
|
928
819
|
end
|
929
820
|
|
930
821
|
def json_load(obj)
|
931
|
-
require "base64"
|
932
|
-
|
933
822
|
@implicit = obj["implicit"]
|
934
823
|
|
935
824
|
# options
|
data/lib/cmfrec/version.rb
CHANGED
metadata
CHANGED
@@ -1,16 +1,28 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmfrec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
13
|
-
|
10
|
+
date: 2024-12-30 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: fiddle
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '0'
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - ">="
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '0'
|
14
26
|
email: andrew@ankane.org
|
15
27
|
executables: []
|
16
28
|
extensions: []
|
@@ -37,7 +49,6 @@ homepage: https://github.com/ankane/cmfrec-ruby
|
|
37
49
|
licenses:
|
38
50
|
- MIT
|
39
51
|
metadata: {}
|
40
|
-
post_install_message:
|
41
52
|
rdoc_options: []
|
42
53
|
require_paths:
|
43
54
|
- lib
|
@@ -45,15 +56,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
45
56
|
requirements:
|
46
57
|
- - ">="
|
47
58
|
- !ruby/object:Gem::Version
|
48
|
-
version: '
|
59
|
+
version: '3.1'
|
49
60
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
61
|
requirements:
|
51
62
|
- - ">="
|
52
63
|
- !ruby/object:Gem::Version
|
53
64
|
version: '0'
|
54
65
|
requirements: []
|
55
|
-
rubygems_version: 3.
|
56
|
-
signing_key:
|
66
|
+
rubygems_version: 3.6.2
|
57
67
|
specification_version: 4
|
58
68
|
summary: Recommendations for Ruby using collective matrix factorization
|
59
69
|
test_files: []
|