cmfrec 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/lib/cmfrec/data.rb +12 -15
- data/lib/cmfrec/recommender.rb +3 -114
- data/lib/cmfrec/version.rb +1 -1
- metadata +19 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c549b3c94d2b80e94938f61694636fc35b85bb0217bec39d5234f81dbba1684b
|
4
|
+
data.tar.gz: e953ddaa00c5ba4b13838300aad880175db5d2c1531bf284866348b69ff2075b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 53e65f7b6ab488238713ff468c438cf45ecdbe85608c505fe4a66f140486b279785355fbf4aaedd72f6c4d6bc5fad502c97d7d592c24c4952520186db83adef1
|
7
|
+
data.tar.gz: 4dd240635c221c6eada3c66f9fa244da90c342655ebcdf98731fbd805bec1015a51c72fdb87a1c6423b0994ccd9d4729a784d6af603a2f6154aa1178ef37bf6b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
## 0.3.1 (2024-12-29)
|
2
|
+
|
3
|
+
- Removed dependency on `base64` gem for serialization
|
4
|
+
|
5
|
+
## 0.3.0 (2024-10-23)
|
6
|
+
|
7
|
+
- Changed dataset directory to match XDG Base Directory Specification
|
8
|
+
- Removed dependency on `csv` gem for `load_movielens`
|
9
|
+
- Dropped support for marshal serialization
|
10
|
+
- Dropped support for Ruby < 3.1
|
11
|
+
|
1
12
|
## 0.2.1 (2022-07-11)
|
2
13
|
|
3
14
|
- Added support for JSON serialization
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
- Works with explicit and implicit feedback
|
7
7
|
- Uses high-performance matrix factorization
|
8
8
|
|
9
|
-
[](https://github.com/ankane/cmfrec-ruby/actions)
|
10
10
|
|
11
11
|
## Installation
|
12
12
|
|
data/lib/cmfrec/data.rb
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
module Cmfrec
|
2
2
|
module Data
|
3
3
|
def load_movielens
|
4
|
-
require "csv"
|
5
|
-
|
6
4
|
data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
|
7
5
|
file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
|
8
6
|
user_path = download_file("ml-100k/u.user", "https://files.grouplens.org/datasets/movielens/ml-100k/u.user",
|
@@ -10,11 +8,9 @@ module Cmfrec
|
|
10
8
|
item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
|
11
9
|
file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
|
12
10
|
|
13
|
-
# convert u.item to utf-8
|
14
|
-
movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
|
15
|
-
|
16
11
|
user_info = []
|
17
|
-
|
12
|
+
File.foreach(user_path) do |line|
|
13
|
+
row = line.split("|")
|
18
14
|
user = {user_id: row[0].to_i}
|
19
15
|
10.times do |i|
|
20
16
|
user[:"region#{i}"] = row[4][0] == i.to_s ? 1 : 0
|
@@ -26,26 +22,28 @@ module Cmfrec
|
|
26
22
|
movies = {}
|
27
23
|
movie_names = {}
|
28
24
|
genres = %w(unknown action adventure animation childrens comedy crime documentary drama fantasy filmnoir horror musical mystery romance scifi thriller war western)
|
29
|
-
|
25
|
+
File.foreach(item_path) do |line|
|
26
|
+
row = line.encode("UTF-8", "ISO-8859-1").split("|")
|
30
27
|
movies[row[0]] = row[1]
|
31
28
|
|
32
29
|
# filter duplicates
|
33
30
|
next if movie_names[row[1]]
|
34
31
|
movie_names[row[1]] = true
|
35
32
|
|
36
|
-
item = {item_id: row[1], year: row[2] ? Date.parse(row[2]).year : 1970}
|
33
|
+
item = {item_id: row[1], year: !row[2].empty? ? Date.parse(row[2]).year : 1970}
|
37
34
|
genres.each_with_index do |genre, i|
|
38
|
-
item[:"genre_#{genre}"] = row[i + 5]
|
35
|
+
item[:"genre_#{genre}"] = row[i + 5].to_i
|
39
36
|
end
|
40
37
|
item_info << item
|
41
38
|
end
|
42
39
|
|
43
40
|
data = []
|
44
|
-
|
41
|
+
File.foreach(data_path) do |line|
|
42
|
+
row = line.split("\t")
|
45
43
|
data << {
|
46
|
-
user_id: row[0],
|
44
|
+
user_id: row[0].to_i,
|
47
45
|
item_id: movies[row[1]],
|
48
|
-
rating: row[2]
|
46
|
+
rating: row[2].to_i
|
49
47
|
}
|
50
48
|
end
|
51
49
|
|
@@ -60,9 +58,8 @@ module Cmfrec
|
|
60
58
|
require "net/http"
|
61
59
|
require "tmpdir"
|
62
60
|
|
63
|
-
|
64
|
-
|
65
|
-
dest = "#{ENV["HOME"]}/.cmfrec/#{fname}"
|
61
|
+
cache_home = ENV["XDG_CACHE_HOME"] || "#{ENV.fetch("HOME")}/.cache"
|
62
|
+
dest = "#{cache_home}/cmfrec/#{fname}"
|
66
63
|
FileUtils.mkdir_p(File.dirname(dest))
|
67
64
|
|
68
65
|
return dest if File.exist?(dest)
|
data/lib/cmfrec/recommender.rb
CHANGED
@@ -250,7 +250,6 @@ module Cmfrec
|
|
250
250
|
end
|
251
251
|
|
252
252
|
def to_json
|
253
|
-
require "base64"
|
254
253
|
require "json"
|
255
254
|
|
256
255
|
obj = {
|
@@ -514,7 +513,7 @@ module Cmfrec
|
|
514
513
|
nil, #precomputedBiTBi,
|
515
514
|
nil, #precomputedTransCtCinvCt,
|
516
515
|
nil, #precomputedCtCw
|
517
|
-
nil
|
516
|
+
nil #precomputedCtUbias
|
518
517
|
]
|
519
518
|
check_status FFI.fit_collective_explicit_als(*fiddle_args(args))
|
520
519
|
|
@@ -811,125 +810,15 @@ module Cmfrec
|
|
811
810
|
@finalize_chol = false
|
812
811
|
end
|
813
812
|
|
814
|
-
def dump_ptr(ptr)
|
815
|
-
ptr.to_s(ptr.size) if ptr
|
816
|
-
end
|
817
|
-
|
818
|
-
def load_ptr(str)
|
819
|
-
Fiddle::Pointer[str] if str
|
820
|
-
end
|
821
|
-
|
822
|
-
def marshal_dump
|
823
|
-
obj = {
|
824
|
-
implicit: @implicit
|
825
|
-
}
|
826
|
-
|
827
|
-
# options
|
828
|
-
obj[:factors] = @k
|
829
|
-
obj[:epochs] = @niter
|
830
|
-
obj[:verbose] = @verbose
|
831
|
-
|
832
|
-
# factors
|
833
|
-
obj[:user_map] = @user_map
|
834
|
-
obj[:item_map] = @item_map
|
835
|
-
obj[:rated] = @rated
|
836
|
-
obj[:user_factors] = dump_ptr(@a)
|
837
|
-
obj[:item_factors] = dump_ptr(@b)
|
838
|
-
|
839
|
-
# bias
|
840
|
-
obj[:user_bias] = dump_ptr(@bias_a)
|
841
|
-
obj[:item_bias] = dump_ptr(@bias_b)
|
842
|
-
|
843
|
-
# mean
|
844
|
-
obj[:global_mean] = @global_mean
|
845
|
-
|
846
|
-
# side info
|
847
|
-
obj[:user_info_map] = @user_info_map
|
848
|
-
obj[:item_info_map] = @item_info_map
|
849
|
-
obj[:user_info_factors] = dump_ptr(@c)
|
850
|
-
obj[:item_info_factors] = dump_ptr(@d)
|
851
|
-
|
852
|
-
# implicit features
|
853
|
-
obj[:add_implicit_features] = @add_implicit_features
|
854
|
-
obj[:user_factors_implicit] = dump_ptr(@ai)
|
855
|
-
obj[:item_factors_implicit] = dump_ptr(@bi)
|
856
|
-
|
857
|
-
unless @implicit
|
858
|
-
obj[:min_rating] = @min_rating
|
859
|
-
obj[:max_rating] = @max_rating
|
860
|
-
end
|
861
|
-
|
862
|
-
obj[:user_means] = dump_ptr(@u_colmeans)
|
863
|
-
|
864
|
-
obj
|
865
|
-
end
|
866
|
-
|
867
|
-
def marshal_load(obj)
|
868
|
-
@implicit = obj[:implicit]
|
869
|
-
|
870
|
-
# options
|
871
|
-
set_params(
|
872
|
-
k: obj[:factors],
|
873
|
-
niter: obj[:epochs],
|
874
|
-
verbose: obj[:verbose],
|
875
|
-
user_bias: !obj[:user_bias].nil?,
|
876
|
-
item_bias: !obj[:item_bias].nil?,
|
877
|
-
add_implicit_features: obj[:add_implicit_features]
|
878
|
-
)
|
879
|
-
|
880
|
-
# factors
|
881
|
-
@user_map = obj[:user_map]
|
882
|
-
@item_map = obj[:item_map]
|
883
|
-
@rated = obj[:rated] || {}
|
884
|
-
@a = load_ptr(obj[:user_factors])
|
885
|
-
@b = load_ptr(obj[:item_factors])
|
886
|
-
|
887
|
-
# bias
|
888
|
-
@bias_a = load_ptr(obj[:user_bias])
|
889
|
-
@bias_b = load_ptr(obj[:item_bias])
|
890
|
-
|
891
|
-
# mean
|
892
|
-
@global_mean = obj[:global_mean]
|
893
|
-
|
894
|
-
# side info
|
895
|
-
@user_info_map = obj[:user_info_map]
|
896
|
-
@item_info_map = obj[:item_info_map]
|
897
|
-
@c = load_ptr(obj[:user_info_factors])
|
898
|
-
@d = load_ptr(obj[:item_info_factors])
|
899
|
-
|
900
|
-
# implicit features
|
901
|
-
@add_implicit_features = obj[:add_implicit_features]
|
902
|
-
@ai = load_ptr(obj[:user_factors_implicit])
|
903
|
-
@bi = load_ptr(obj[:item_factors_implicit])
|
904
|
-
|
905
|
-
unless @implicit
|
906
|
-
@min_rating = obj[:min_rating]
|
907
|
-
@max_rating = obj[:max_rating]
|
908
|
-
end
|
909
|
-
|
910
|
-
@u_colmeans = load_ptr(obj[:user_means])
|
911
|
-
|
912
|
-
@m = @user_map.size
|
913
|
-
@n = @item_map.size
|
914
|
-
@m_u = @user_info_map.size
|
915
|
-
@n_i = @item_info_map.size
|
916
|
-
|
917
|
-
set_implicit_vars if @implicit
|
918
|
-
|
919
|
-
@fit = @m > 0
|
920
|
-
end
|
921
|
-
|
922
813
|
def json_dump_ptr(ptr)
|
923
|
-
|
814
|
+
[ptr.to_s(ptr.size)].pack("m0") if ptr
|
924
815
|
end
|
925
816
|
|
926
817
|
def json_load_ptr(str)
|
927
|
-
Fiddle::Pointer[
|
818
|
+
Fiddle::Pointer[str.unpack1("m0")] if str
|
928
819
|
end
|
929
820
|
|
930
821
|
def json_load(obj)
|
931
|
-
require "base64"
|
932
|
-
|
933
822
|
@implicit = obj["implicit"]
|
934
823
|
|
935
824
|
# options
|
data/lib/cmfrec/version.rb
CHANGED
metadata
CHANGED
@@ -1,16 +1,28 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmfrec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
13
|
-
|
10
|
+
date: 2024-12-30 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: fiddle
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '0'
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - ">="
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '0'
|
14
26
|
email: andrew@ankane.org
|
15
27
|
executables: []
|
16
28
|
extensions: []
|
@@ -37,7 +49,6 @@ homepage: https://github.com/ankane/cmfrec-ruby
|
|
37
49
|
licenses:
|
38
50
|
- MIT
|
39
51
|
metadata: {}
|
40
|
-
post_install_message:
|
41
52
|
rdoc_options: []
|
42
53
|
require_paths:
|
43
54
|
- lib
|
@@ -45,15 +56,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
45
56
|
requirements:
|
46
57
|
- - ">="
|
47
58
|
- !ruby/object:Gem::Version
|
48
|
-
version: '
|
59
|
+
version: '3.1'
|
49
60
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
61
|
requirements:
|
51
62
|
- - ">="
|
52
63
|
- !ruby/object:Gem::Version
|
53
64
|
version: '0'
|
54
65
|
requirements: []
|
55
|
-
rubygems_version: 3.
|
56
|
-
signing_key:
|
66
|
+
rubygems_version: 3.6.2
|
57
67
|
specification_version: 4
|
58
68
|
summary: Recommendations for Ruby using collective matrix factorization
|
59
69
|
test_files: []
|