cmfrec 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/lib/cmfrec/data.rb +12 -15
- data/lib/cmfrec/recommender.rb +1 -109
- data/lib/cmfrec/version.rb +1 -1
- metadata +19 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d3cc7d70530eefc7b13324753b454d03573da7d832c46cb4dee2ab9213eafcdd
|
4
|
+
data.tar.gz: 662af2ec4ab1a1bd33a39c18773d0c48967422286a2fa137960772b7e67d437a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8fd6f1f8f0bd7d7c870c28fb57a0cec89aacf2d27aed53b5d68fb6935f5071dbe73931a5ff776f4a864f0cc91a17c793eabfe2a2b21f9b368a4c36ada5cb929d
|
7
|
+
data.tar.gz: 116d26ddafeeb439ef0895e30805afa0d2d2a453aeb369cf7122f13f5bf3ad457dac65c974d26debc84f353baf5f0c889c559e56620ef7458af4968ee9f5262a
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## 0.3.0 (2024-10-23)
|
2
|
+
|
3
|
+
- Changed dataset directory to match XDG Base Directory Specification
|
4
|
+
- Removed dependency on `csv` gem for `load_movielens`
|
5
|
+
- Dropped support for marshal serialization
|
6
|
+
- Dropped support for Ruby < 3.1
|
7
|
+
|
1
8
|
## 0.2.1 (2022-07-11)
|
2
9
|
|
3
10
|
- Added support for JSON serialization
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
- Works with explicit and implicit feedback
|
7
7
|
- Uses high-performance matrix factorization
|
8
8
|
|
9
|
-
[](https://github.com/ankane/cmfrec-ruby/actions)
|
10
10
|
|
11
11
|
## Installation
|
12
12
|
|
data/lib/cmfrec/data.rb
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
module Cmfrec
|
2
2
|
module Data
|
3
3
|
def load_movielens
|
4
|
-
require "csv"
|
5
|
-
|
6
4
|
data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
|
7
5
|
file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
|
8
6
|
user_path = download_file("ml-100k/u.user", "https://files.grouplens.org/datasets/movielens/ml-100k/u.user",
|
@@ -10,11 +8,9 @@ module Cmfrec
|
|
10
8
|
item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
|
11
9
|
file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
|
12
10
|
|
13
|
-
# convert u.item to utf-8
|
14
|
-
movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
|
15
|
-
|
16
11
|
user_info = []
|
17
|
-
|
12
|
+
File.foreach(user_path) do |line|
|
13
|
+
row = line.split("|")
|
18
14
|
user = {user_id: row[0].to_i}
|
19
15
|
10.times do |i|
|
20
16
|
user[:"region#{i}"] = row[4][0] == i.to_s ? 1 : 0
|
@@ -26,26 +22,28 @@ module Cmfrec
|
|
26
22
|
movies = {}
|
27
23
|
movie_names = {}
|
28
24
|
genres = %w(unknown action adventure animation childrens comedy crime documentary drama fantasy filmnoir horror musical mystery romance scifi thriller war western)
|
29
|
-
|
25
|
+
File.foreach(item_path) do |line|
|
26
|
+
row = line.encode("UTF-8", "ISO-8859-1").split("|")
|
30
27
|
movies[row[0]] = row[1]
|
31
28
|
|
32
29
|
# filter duplicates
|
33
30
|
next if movie_names[row[1]]
|
34
31
|
movie_names[row[1]] = true
|
35
32
|
|
36
|
-
item = {item_id: row[1], year: row[2] ? Date.parse(row[2]).year : 1970}
|
33
|
+
item = {item_id: row[1], year: !row[2].empty? ? Date.parse(row[2]).year : 1970}
|
37
34
|
genres.each_with_index do |genre, i|
|
38
|
-
item[:"genre_#{genre}"] = row[i + 5]
|
35
|
+
item[:"genre_#{genre}"] = row[i + 5].to_i
|
39
36
|
end
|
40
37
|
item_info << item
|
41
38
|
end
|
42
39
|
|
43
40
|
data = []
|
44
|
-
|
41
|
+
File.foreach(data_path) do |line|
|
42
|
+
row = line.split("\t")
|
45
43
|
data << {
|
46
|
-
user_id: row[0],
|
44
|
+
user_id: row[0].to_i,
|
47
45
|
item_id: movies[row[1]],
|
48
|
-
rating: row[2]
|
46
|
+
rating: row[2].to_i
|
49
47
|
}
|
50
48
|
end
|
51
49
|
|
@@ -60,9 +58,8 @@ module Cmfrec
|
|
60
58
|
require "net/http"
|
61
59
|
require "tmpdir"
|
62
60
|
|
63
|
-
|
64
|
-
|
65
|
-
dest = "#{ENV["HOME"]}/.cmfrec/#{fname}"
|
61
|
+
cache_home = ENV["XDG_CACHE_HOME"] || "#{ENV.fetch("HOME")}/.cache"
|
62
|
+
dest = "#{cache_home}/cmfrec/#{fname}"
|
66
63
|
FileUtils.mkdir_p(File.dirname(dest))
|
67
64
|
|
68
65
|
return dest if File.exist?(dest)
|
data/lib/cmfrec/recommender.rb
CHANGED
@@ -514,7 +514,7 @@ module Cmfrec
|
|
514
514
|
nil, #precomputedBiTBi,
|
515
515
|
nil, #precomputedTransCtCinvCt,
|
516
516
|
nil, #precomputedCtCw
|
517
|
-
nil
|
517
|
+
nil #precomputedCtUbias
|
518
518
|
]
|
519
519
|
check_status FFI.fit_collective_explicit_als(*fiddle_args(args))
|
520
520
|
|
@@ -811,114 +811,6 @@ module Cmfrec
|
|
811
811
|
@finalize_chol = false
|
812
812
|
end
|
813
813
|
|
814
|
-
def dump_ptr(ptr)
|
815
|
-
ptr.to_s(ptr.size) if ptr
|
816
|
-
end
|
817
|
-
|
818
|
-
def load_ptr(str)
|
819
|
-
Fiddle::Pointer[str] if str
|
820
|
-
end
|
821
|
-
|
822
|
-
def marshal_dump
|
823
|
-
obj = {
|
824
|
-
implicit: @implicit
|
825
|
-
}
|
826
|
-
|
827
|
-
# options
|
828
|
-
obj[:factors] = @k
|
829
|
-
obj[:epochs] = @niter
|
830
|
-
obj[:verbose] = @verbose
|
831
|
-
|
832
|
-
# factors
|
833
|
-
obj[:user_map] = @user_map
|
834
|
-
obj[:item_map] = @item_map
|
835
|
-
obj[:rated] = @rated
|
836
|
-
obj[:user_factors] = dump_ptr(@a)
|
837
|
-
obj[:item_factors] = dump_ptr(@b)
|
838
|
-
|
839
|
-
# bias
|
840
|
-
obj[:user_bias] = dump_ptr(@bias_a)
|
841
|
-
obj[:item_bias] = dump_ptr(@bias_b)
|
842
|
-
|
843
|
-
# mean
|
844
|
-
obj[:global_mean] = @global_mean
|
845
|
-
|
846
|
-
# side info
|
847
|
-
obj[:user_info_map] = @user_info_map
|
848
|
-
obj[:item_info_map] = @item_info_map
|
849
|
-
obj[:user_info_factors] = dump_ptr(@c)
|
850
|
-
obj[:item_info_factors] = dump_ptr(@d)
|
851
|
-
|
852
|
-
# implicit features
|
853
|
-
obj[:add_implicit_features] = @add_implicit_features
|
854
|
-
obj[:user_factors_implicit] = dump_ptr(@ai)
|
855
|
-
obj[:item_factors_implicit] = dump_ptr(@bi)
|
856
|
-
|
857
|
-
unless @implicit
|
858
|
-
obj[:min_rating] = @min_rating
|
859
|
-
obj[:max_rating] = @max_rating
|
860
|
-
end
|
861
|
-
|
862
|
-
obj[:user_means] = dump_ptr(@u_colmeans)
|
863
|
-
|
864
|
-
obj
|
865
|
-
end
|
866
|
-
|
867
|
-
def marshal_load(obj)
|
868
|
-
@implicit = obj[:implicit]
|
869
|
-
|
870
|
-
# options
|
871
|
-
set_params(
|
872
|
-
k: obj[:factors],
|
873
|
-
niter: obj[:epochs],
|
874
|
-
verbose: obj[:verbose],
|
875
|
-
user_bias: !obj[:user_bias].nil?,
|
876
|
-
item_bias: !obj[:item_bias].nil?,
|
877
|
-
add_implicit_features: obj[:add_implicit_features]
|
878
|
-
)
|
879
|
-
|
880
|
-
# factors
|
881
|
-
@user_map = obj[:user_map]
|
882
|
-
@item_map = obj[:item_map]
|
883
|
-
@rated = obj[:rated] || {}
|
884
|
-
@a = load_ptr(obj[:user_factors])
|
885
|
-
@b = load_ptr(obj[:item_factors])
|
886
|
-
|
887
|
-
# bias
|
888
|
-
@bias_a = load_ptr(obj[:user_bias])
|
889
|
-
@bias_b = load_ptr(obj[:item_bias])
|
890
|
-
|
891
|
-
# mean
|
892
|
-
@global_mean = obj[:global_mean]
|
893
|
-
|
894
|
-
# side info
|
895
|
-
@user_info_map = obj[:user_info_map]
|
896
|
-
@item_info_map = obj[:item_info_map]
|
897
|
-
@c = load_ptr(obj[:user_info_factors])
|
898
|
-
@d = load_ptr(obj[:item_info_factors])
|
899
|
-
|
900
|
-
# implicit features
|
901
|
-
@add_implicit_features = obj[:add_implicit_features]
|
902
|
-
@ai = load_ptr(obj[:user_factors_implicit])
|
903
|
-
@bi = load_ptr(obj[:item_factors_implicit])
|
904
|
-
|
905
|
-
unless @implicit
|
906
|
-
@min_rating = obj[:min_rating]
|
907
|
-
@max_rating = obj[:max_rating]
|
908
|
-
end
|
909
|
-
|
910
|
-
@u_colmeans = load_ptr(obj[:user_means])
|
911
|
-
|
912
|
-
@m = @user_map.size
|
913
|
-
@n = @item_map.size
|
914
|
-
@m_u = @user_info_map.size
|
915
|
-
@n_i = @item_info_map.size
|
916
|
-
|
917
|
-
set_implicit_vars if @implicit
|
918
|
-
|
919
|
-
@fit = @m > 0
|
920
|
-
end
|
921
|
-
|
922
814
|
def json_dump_ptr(ptr)
|
923
815
|
Base64.strict_encode64(ptr.to_s(ptr.size)) if ptr
|
924
816
|
end
|
data/lib/cmfrec/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmfrec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
11
|
+
date: 2024-10-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: fiddle
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
description:
|
14
28
|
email: andrew@ankane.org
|
15
29
|
executables: []
|
@@ -45,14 +59,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
45
59
|
requirements:
|
46
60
|
- - ">="
|
47
61
|
- !ruby/object:Gem::Version
|
48
|
-
version: '
|
62
|
+
version: '3.1'
|
49
63
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
64
|
requirements:
|
51
65
|
- - ">="
|
52
66
|
- !ruby/object:Gem::Version
|
53
67
|
version: '0'
|
54
68
|
requirements: []
|
55
|
-
rubygems_version: 3.
|
69
|
+
rubygems_version: 3.5.16
|
56
70
|
signing_key:
|
57
71
|
specification_version: 4
|
58
72
|
summary: Recommendations for Ruby using collective matrix factorization
|