libmf 0.1.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -2
- data/LICENSE.txt +26 -18
- data/README.md +87 -33
- data/lib/libmf.rb +11 -6
- data/lib/libmf/ffi.rb +2 -6
- data/lib/libmf/model.rb +52 -25
- data/lib/libmf/version.rb +1 -1
- data/vendor/{libmf/COPYRIGHT → COPYRIGHT} +0 -0
- data/vendor/{libmf/demo → demo}/real_matrix.te.txt +0 -0
- data/vendor/{libmf/demo → demo}/real_matrix.tr.txt +0 -0
- data/vendor/libmf.arm64.dylib +0 -0
- data/vendor/libmf.dylib +0 -0
- data/vendor/libmf.so +0 -0
- data/vendor/mf.dll +0 -0
- metadata +18 -89
- data/ext/libmf/extconf.rb +0 -18
- data/vendor/libmf/Makefile +0 -34
- data/vendor/libmf/Makefile.win +0 -36
- data/vendor/libmf/README +0 -637
- data/vendor/libmf/demo/all_one_matrix.te.txt +0 -1382
- data/vendor/libmf/demo/all_one_matrix.tr.txt +0 -5172
- data/vendor/libmf/demo/binary_matrix.te.txt +0 -1312
- data/vendor/libmf/demo/binary_matrix.tr.txt +0 -4937
- data/vendor/libmf/demo/demo.bat +0 -40
- data/vendor/libmf/demo/demo.sh +0 -58
- data/vendor/libmf/mf-predict.cpp +0 -207
- data/vendor/libmf/mf-train.cpp +0 -378
- data/vendor/libmf/mf.cpp +0 -4683
- data/vendor/libmf/mf.def +0 -21
- data/vendor/libmf/mf.h +0 -130
- data/vendor/libmf/windows/mf-predict.exe +0 -0
- data/vendor/libmf/windows/mf-train.exe +0 -0
- data/vendor/libmf/windows/mf.dll +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e654382e818f1b59bc4437e71bc3e0ae6f4d4d79b9c85aeb53fb3494b2baf888
|
4
|
+
data.tar.gz: 560fa519794c7cd8b29c27b9ac9f4247e0485e9f13f229c18a4562c8bee62868
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d16fb17f9b58cea5c53814a68fbf9de91440fd77e1669b52bbe5cb3a3837c94449d8bbc348405c0c117f219d2a513f1fc2e813b330ea127c93ac04a0c4101d07
|
7
|
+
data.tar.gz: 3d5d3962d3878fe992f76ef9d2fa5e30866d007cd5ae3ab97a666fc53f97f9f3f2487bb425e15b59becde6118c61ecac6c8511692742d64efa2b9e26fe0d2872
|
data/CHANGELOG.md
CHANGED
@@ -1,7 +1,32 @@
|
|
1
|
-
## 0.
|
1
|
+
## 0.2.2 (2021-02-04)
|
2
|
+
|
3
|
+
- Reduced allocations
|
4
|
+
- Improved ARM detection
|
5
|
+
|
6
|
+
## 0.2.1 (2020-12-28)
|
7
|
+
|
8
|
+
- Added ARM shared library for Mac
|
9
|
+
|
10
|
+
## 0.2.0 (2020-03-26)
|
11
|
+
|
12
|
+
- Changed to BSD 3-Clause license to match LIBMF
|
13
|
+
- Added support for reading data directly from files
|
14
|
+
- Added `format: :numo` option to `p_factors` and `q_factors`
|
15
|
+
- Improved performance of loading data by 5x
|
16
|
+
|
17
|
+
## 0.1.3 (2019-11-07)
|
18
|
+
|
19
|
+
- Made parameter names more Ruby-like
|
20
|
+
- No need to set `do_nmf` with generalized KL-divergence
|
21
|
+
|
22
|
+
## 0.1.2 (2019-11-06)
|
23
|
+
|
24
|
+
- Fixed bug in `p_factors` and `q_factors` methods
|
25
|
+
|
26
|
+
## 0.1.1 (2019-11-05)
|
2
27
|
|
3
28
|
- Fixed errors on Linux and Windows
|
4
29
|
|
5
|
-
## 0.1.0
|
30
|
+
## 0.1.0 (2019-11-04)
|
6
31
|
|
7
32
|
- First release
|
data/LICENSE.txt
CHANGED
@@ -1,22 +1,30 @@
|
|
1
|
-
|
1
|
+
BSD 3-Clause License
|
2
2
|
|
3
|
-
|
3
|
+
Copyright (c) 2014-2015, The LIBMF Project
|
4
|
+
Copyright (c) 2019-2021, Andrew Kane
|
5
|
+
All rights reserved.
|
4
6
|
|
5
|
-
|
6
|
-
|
7
|
-
"Software"), to deal in the Software without restriction, including
|
8
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
-
permit persons to whom the Software is furnished to do so, subject to
|
11
|
-
the following conditions:
|
7
|
+
Redistribution and use in source and binary forms, with or without
|
8
|
+
modification, are permitted provided that the following conditions are met:
|
12
9
|
|
13
|
-
|
14
|
-
|
10
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
11
|
+
list of conditions and the following disclaimer.
|
15
12
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
13
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
14
|
+
this list of conditions and the following disclaimer in the documentation
|
15
|
+
and/or other materials provided with the distribution.
|
16
|
+
|
17
|
+
3. Neither the name of the copyright holder nor the names of its
|
18
|
+
contributors may be used to endorse or promote products derived from
|
19
|
+
this software without specific prior written permission.
|
20
|
+
|
21
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
22
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
23
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
24
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
25
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
26
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
27
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
28
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
29
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
|
3
3
|
[LIBMF](https://github.com/cjlin1/libmf) - large-scale sparse matrix factorization - for Ruby
|
4
4
|
|
5
|
-
|
5
|
+
Check out [Disco](https://github.com/ankane/disco) for higher-level collaborative filtering
|
6
6
|
|
7
|
-
[![Build Status](https://
|
7
|
+
[![Build Status](https://github.com/ankane/libmf/workflows/build/badge.svg?branch=master)](https://github.com/ankane/libmf/actions)
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
@@ -39,14 +39,19 @@ Make predictions
|
|
39
39
|
model.predict(row_index, column_index)
|
40
40
|
```
|
41
41
|
|
42
|
-
Get the
|
42
|
+
Get the latent factors (these approximate the training matrix)
|
43
43
|
|
44
44
|
```ruby
|
45
|
-
model.bias
|
46
45
|
model.p_factors
|
47
46
|
model.q_factors
|
48
47
|
```
|
49
48
|
|
49
|
+
Get the bias (average of all elements in the training matrix)
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
model.bias
|
53
|
+
```
|
54
|
+
|
50
55
|
Save the model to a file
|
51
56
|
|
52
57
|
```ruby
|
@@ -65,48 +70,87 @@ Pass a validation set
|
|
65
70
|
model.fit(data, eval_set: eval_set)
|
66
71
|
```
|
67
72
|
|
73
|
+
## Cross-Validation
|
74
|
+
|
75
|
+
Perform cross-validation
|
76
|
+
|
77
|
+
```ruby
|
78
|
+
model.cv(data)
|
79
|
+
```
|
80
|
+
|
81
|
+
Specify the number of folds
|
82
|
+
|
83
|
+
```ruby
|
84
|
+
model.cv(data, folds: 5)
|
85
|
+
```
|
86
|
+
|
68
87
|
## Parameters
|
69
88
|
|
70
|
-
Pass parameters
|
89
|
+
Pass parameters - default values below
|
71
90
|
|
72
91
|
```ruby
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
lambda_q2 coefficient of L2-norm regularization on Q 0.1
|
90
|
-
eta learning rate 0.1
|
91
|
-
alpha importance of negative entries 0.1
|
92
|
-
c desired value of negative entries 0.0001
|
93
|
-
do_nmf perform non-negative MF (NMF) false
|
94
|
-
quiet no outputs to stdout false
|
95
|
-
copy_data copy data in training procedure true
|
92
|
+
Libmf::Model.new(
|
93
|
+
loss: 0, # loss function
|
94
|
+
factors: 8, # number of latent factors
|
95
|
+
threads: 12, # number of threads used
|
96
|
+
bins: 25, # number of bins
|
97
|
+
iterations: 20, # number of iterations
|
98
|
+
lambda_p1: 0, # coefficient of L1-norm regularization on P
|
99
|
+
lambda_p2: 0.1, # coefficient of L2-norm regularization on P
|
100
|
+
lambda_q1: 0, # coefficient of L1-norm regularization on Q
|
101
|
+
lambda_q2: 0.1, # coefficient of L2-norm regularization on Q
|
102
|
+
learning_rate: 0.1, # learning rate
|
103
|
+
alpha: 0.1, # importance of negative entries
|
104
|
+
c: 0.0001, # desired value of negative entries
|
105
|
+
nmf: false, # perform non-negative MF (NMF)
|
106
|
+
quiet: false # no outputs to stdout
|
107
|
+
)
|
96
108
|
```
|
97
109
|
|
98
|
-
|
110
|
+
### Loss Functions
|
99
111
|
|
100
|
-
|
112
|
+
For real-valued matrix factorization
|
113
|
+
|
114
|
+
- 0 - squared error (L2-norm)
|
115
|
+
- 1 - absolute error (L1-norm)
|
116
|
+
- 2 - generalized KL-divergence
|
117
|
+
|
118
|
+
For binary matrix factorization
|
119
|
+
|
120
|
+
- 5 - logarithmic error
|
121
|
+
- 6 - squared hinge loss
|
122
|
+
- 7 - hinge loss
|
123
|
+
|
124
|
+
For one-class matrix factorization
|
125
|
+
|
126
|
+
- 10 - row-oriented pair-wise logarithmic loss
|
127
|
+
- 11 - column-oriented pair-wise logarithmic loss
|
128
|
+
- 12 - squared error (L2-norm)
|
129
|
+
|
130
|
+
## Performance
|
131
|
+
|
132
|
+
For performance, read data directly from files
|
101
133
|
|
102
134
|
```ruby
|
103
|
-
model.
|
135
|
+
model.fit("train.txt", eval_set: "validate.txt")
|
136
|
+
model.cv("train.txt")
|
104
137
|
```
|
105
138
|
|
106
|
-
|
139
|
+
Data should be in the format `row_index column_index value`:
|
140
|
+
|
141
|
+
```txt
|
142
|
+
0 0 5.0
|
143
|
+
0 2 3.5
|
144
|
+
1 1 4.0
|
145
|
+
```
|
146
|
+
|
147
|
+
## Numo
|
148
|
+
|
149
|
+
Get latent factors as Numo arrays
|
107
150
|
|
108
151
|
```ruby
|
109
|
-
model.
|
152
|
+
model.p_factors(format: :numo)
|
153
|
+
model.q_factors(format: :numo)
|
110
154
|
```
|
111
155
|
|
112
156
|
## Resources
|
@@ -125,3 +169,13 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
125
169
|
- Fix bugs and [submit pull requests](https://github.com/ankane/libmf/pulls)
|
126
170
|
- Write, clarify, or fix documentation
|
127
171
|
- Suggest or add new features
|
172
|
+
|
173
|
+
To get started with development:
|
174
|
+
|
175
|
+
```sh
|
176
|
+
git clone --recursive https://github.com/ankane/libmf.git
|
177
|
+
cd libmf
|
178
|
+
bundle install
|
179
|
+
bundle exec rake vendor:all
|
180
|
+
bundle exec rake test
|
181
|
+
```
|
data/lib/libmf.rb
CHANGED
@@ -11,15 +11,20 @@ module Libmf
|
|
11
11
|
class << self
|
12
12
|
attr_accessor :ffi_lib
|
13
13
|
end
|
14
|
-
|
15
|
-
if
|
16
|
-
"
|
17
|
-
elsif ::
|
18
|
-
"
|
14
|
+
lib_name =
|
15
|
+
if Gem.win_platform?
|
16
|
+
"mf.dll"
|
17
|
+
elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
|
18
|
+
if RbConfig::CONFIG["host_cpu"] =~ /arm/i
|
19
|
+
"libmf.arm64.dylib"
|
20
|
+
else
|
21
|
+
"libmf.dylib"
|
22
|
+
end
|
19
23
|
else
|
20
24
|
"libmf.so"
|
21
25
|
end
|
22
|
-
|
26
|
+
vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__)
|
27
|
+
self.ffi_lib = [vendor_lib]
|
23
28
|
|
24
29
|
# friendlier error message
|
25
30
|
autoload :FFI, "libmf/ffi"
|
data/lib/libmf/ffi.rb
CHANGED
@@ -2,12 +2,7 @@ module Libmf
|
|
2
2
|
module FFI
|
3
3
|
extend ::FFI::Library
|
4
4
|
|
5
|
-
|
6
|
-
ffi_lib Libmf.ffi_lib
|
7
|
-
rescue LoadError => e
|
8
|
-
raise e if ENV["LIBMF_DEBUG"]
|
9
|
-
raise LoadError, "Could not find LIBMF"
|
10
|
-
end
|
5
|
+
ffi_lib Libmf.ffi_lib
|
11
6
|
|
12
7
|
class Node < ::FFI::Struct
|
13
8
|
layout :u, :int,
|
@@ -51,6 +46,7 @@ module Libmf
|
|
51
46
|
end
|
52
47
|
|
53
48
|
attach_function :mf_get_default_param, [], Parameter.by_value
|
49
|
+
attach_function :mf_read_problem, [:string], Problem.by_value
|
54
50
|
attach_function :mf_save_model, [Model.by_ref, :string], :int
|
55
51
|
attach_function :mf_load_model, [:string], Model.by_ref
|
56
52
|
attach_function :mf_destroy_model, [Model.by_ref], :void
|
data/lib/libmf/model.rb
CHANGED
@@ -51,16 +51,27 @@ module Libmf
|
|
51
51
|
model[:b]
|
52
52
|
end
|
53
53
|
|
54
|
-
def p_factors
|
55
|
-
|
54
|
+
def p_factors(format: nil)
|
55
|
+
_factors(model[:p], rows, format)
|
56
56
|
end
|
57
57
|
|
58
|
-
def q_factors
|
59
|
-
|
58
|
+
def q_factors(format: nil)
|
59
|
+
_factors(model[:q], columns, format)
|
60
60
|
end
|
61
61
|
|
62
62
|
private
|
63
63
|
|
64
|
+
def _factors(ptr, n, format)
|
65
|
+
case format
|
66
|
+
when :numo
|
67
|
+
Numo::SFloat.from_string(ptr.read_bytes(n * factors * 4)).reshape(n, factors)
|
68
|
+
when nil
|
69
|
+
ptr.read_array_of_float(n * factors).each_slice(factors).to_a
|
70
|
+
else
|
71
|
+
raise ArgumentError, "Invalid format"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
64
75
|
def model
|
65
76
|
raise Error, "Not fit" unless @model
|
66
77
|
@model
|
@@ -68,45 +79,61 @@ module Libmf
|
|
68
79
|
|
69
80
|
def param
|
70
81
|
param = FFI.mf_get_default_param
|
82
|
+
options = @options.dup
|
71
83
|
# silence insufficient blocks warning with default params
|
72
|
-
options
|
84
|
+
options[:bins] ||= 25 unless options[:nr_bins]
|
85
|
+
options[:copy_data] = false unless options.key?(:copy_data)
|
86
|
+
options_map = {
|
87
|
+
:loss => :fun,
|
88
|
+
:factors => :k,
|
89
|
+
:threads => :nr_threads,
|
90
|
+
:bins => :nr_bins,
|
91
|
+
:iterations => :nr_iters,
|
92
|
+
:learning_rate => :eta,
|
93
|
+
:nmf => :do_nmf
|
94
|
+
}
|
73
95
|
options.each do |k, v|
|
96
|
+
k = options_map[k] if options_map[k]
|
74
97
|
param[k] = v
|
75
98
|
end
|
99
|
+
# do_nmf must be true for generalized KL-divergence
|
100
|
+
param[:do_nmf] = true if param[:fun] == 2
|
76
101
|
param
|
77
102
|
end
|
78
103
|
|
79
104
|
def create_problem(data)
|
105
|
+
if data.is_a?(String)
|
106
|
+
# need to expand path so it's absolute
|
107
|
+
return FFI.mf_read_problem(File.expand_path(data))
|
108
|
+
end
|
109
|
+
|
80
110
|
raise Error, "No data" if data.empty?
|
81
111
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
nodes << n
|
112
|
+
# TODO do in C for better performance
|
113
|
+
# can use FIX2INT() and RFLOAT_VALUE() instead of pack
|
114
|
+
# and write directly to C string
|
115
|
+
buffer = String.new
|
116
|
+
pack_format = "iif"
|
117
|
+
data.each do |row|
|
118
|
+
row.pack(pack_format, buffer: buffer)
|
90
119
|
end
|
91
120
|
|
92
|
-
|
93
|
-
|
121
|
+
r = ::FFI::MemoryPointer.new(FFI::Node, data.size)
|
122
|
+
r.write_bytes(buffer)
|
123
|
+
|
124
|
+
# double check size is what we expect
|
125
|
+
# FFI will throw an error above if too long
|
126
|
+
raise Error, "Bad buffer size" if r.size != buffer.bytesize
|
127
|
+
|
128
|
+
m = data.max_by { |r| r[0] }[0] + 1
|
129
|
+
n = data.max_by { |r| r[1] }[1] + 1
|
94
130
|
|
95
131
|
prob = FFI::Problem.new
|
96
132
|
prob[:m] = m
|
97
133
|
prob[:n] = n
|
98
|
-
prob[:nnz] =
|
134
|
+
prob[:nnz] = data.size
|
99
135
|
prob[:r] = r
|
100
136
|
prob
|
101
137
|
end
|
102
|
-
|
103
|
-
def reshape(arr, dims)
|
104
|
-
rows = dims.first
|
105
|
-
new_arr = rows.times.map { [] }
|
106
|
-
arr.each_with_index do |v, i|
|
107
|
-
new_arr[i % rows] << v
|
108
|
-
end
|
109
|
-
new_arr
|
110
|
-
end
|
111
138
|
end
|
112
139
|
end
|
data/lib/libmf/version.rb
CHANGED
File without changes
|
File without changes
|
File without changes
|
Binary file
|
data/vendor/libmf.dylib
ADDED
Binary file
|
data/vendor/libmf.so
ADDED
Binary file
|
data/vendor/mf.dll
ADDED
Binary file
|