libmf 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +125 -0
- data/ext/libmf/extconf.rb +18 -0
- data/lib/libmf.bundle +0 -0
- data/lib/libmf.rb +26 -0
- data/lib/libmf/ffi.rb +62 -0
- data/lib/libmf/model.rb +112 -0
- data/lib/libmf/version.rb +3 -0
- data/vendor/libmf/COPYRIGHT +31 -0
- data/vendor/libmf/Makefile +34 -0
- data/vendor/libmf/Makefile.win +36 -0
- data/vendor/libmf/README +637 -0
- data/vendor/libmf/demo/all_one_matrix.te.txt +1382 -0
- data/vendor/libmf/demo/all_one_matrix.tr.txt +5172 -0
- data/vendor/libmf/demo/binary_matrix.te.txt +1312 -0
- data/vendor/libmf/demo/binary_matrix.tr.txt +4937 -0
- data/vendor/libmf/demo/demo.bat +40 -0
- data/vendor/libmf/demo/demo.sh +58 -0
- data/vendor/libmf/demo/real_matrix.te.txt +794 -0
- data/vendor/libmf/demo/real_matrix.tr.txt +5000 -0
- data/vendor/libmf/mf-predict.cpp +207 -0
- data/vendor/libmf/mf-train.cpp +378 -0
- data/vendor/libmf/mf.cpp +4683 -0
- data/vendor/libmf/mf.def +21 -0
- data/vendor/libmf/mf.h +130 -0
- data/vendor/libmf/windows/mf-predict.exe +0 -0
- data/vendor/libmf/windows/mf-train.exe +0 -0
- data/vendor/libmf/windows/mf.dll +0 -0
- metadata +142 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 85fc60af42649286b87cf23130c0efafd0d8951423d31b187d13097b2418e7d1
|
4
|
+
data.tar.gz: ab568af8e036b6d38fcc604746eeda4fa29e2e6e7541e6af53b77f9979e9fe82
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: efcbffd9ed9e6f66911a63e74d694da77d798d0fde04cb1490c1ee4eaf8d9e1e93b1af13ab8e23a4a858b74503d28a616ad6499a590f4a1568df2a9dcb65d85f
|
7
|
+
data.tar.gz: 671b306cad36c2ea5da6633de6703892e19fbb5774cf22a0d458ba3d967ab50dc5ae56ed271771e4ed1a0483346d8d9f34ec5db9c8424d16f369e81c5f467857
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2019 Andrew Kane
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
# LIBMF
|
2
|
+
|
3
|
+
[LIBMF](https://github.com/cjlin1/libmf) - large-scale sparse matrix factorization - for Ruby
|
4
|
+
|
5
|
+
:fire: Uses the C API for blazing performance
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application’s Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'libmf'
|
13
|
+
```
|
14
|
+
|
15
|
+
## Getting Started
|
16
|
+
|
17
|
+
Prep your data in the format `[row_index, column_index, value]`
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
data = [
|
21
|
+
[0, 0, 5.0],
|
22
|
+
[0, 2, 3.5],
|
23
|
+
[1, 1, 4.0]
|
24
|
+
]
|
25
|
+
```
|
26
|
+
|
27
|
+
Create a model
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
model = Libmf::Model.new
|
31
|
+
model.fit(data)
|
32
|
+
```
|
33
|
+
|
34
|
+
Make predictions
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
model.predict(row_index, column_index)
|
38
|
+
```
|
39
|
+
|
40
|
+
Get the bias and latent factors
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
model.bias
|
44
|
+
model.p_factors
|
45
|
+
model.q_factors
|
46
|
+
```
|
47
|
+
|
48
|
+
Save the model to a file
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
model.save_model("model.txt")
|
52
|
+
```
|
53
|
+
|
54
|
+
Load the model from a file
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
model.load_model("model.txt")
|
58
|
+
```
|
59
|
+
|
60
|
+
Pass a validation set
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
model.fit(data, eval_set: eval_set)
|
64
|
+
```
|
65
|
+
|
66
|
+
## Parameters
|
67
|
+
|
68
|
+
Pass parameters
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
model = Libmf::Model.new(k: 20, nr_iters: 50)
|
72
|
+
```
|
73
|
+
|
74
|
+
Supports the same parameters as LIBMF
|
75
|
+
|
76
|
+
```text
|
77
|
+
variable meaning default
|
78
|
+
================================================================
|
79
|
+
fun loss function 0
|
80
|
+
k number of latent factors 8
|
81
|
+
nr_threads number of threads used 12
|
82
|
+
nr_bins number of bins 25
|
83
|
+
nr_iters number of iterations 20
|
84
|
+
lambda_p1 coefficient of L1-norm regularization on P 0
|
85
|
+
lambda_p2 coefficient of L2-norm regularization on P 0.1
|
86
|
+
lambda_q1 coefficient of L1-norm regularization on Q 0
|
87
|
+
lambda_q2 coefficient of L2-norm regularization on Q 0.1
|
88
|
+
eta learning rate 0.1
|
89
|
+
alpha importance of negative entries 0.1
|
90
|
+
c desired value of negative entries 0.0001
|
91
|
+
do_nmf perform non-negative MF (NMF) false
|
92
|
+
quiet no outputs to stdout false
|
93
|
+
copy_data copy data in training procedure true
|
94
|
+
```
|
95
|
+
|
96
|
+
## Cross-Validation
|
97
|
+
|
98
|
+
Perform cross-validation
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
model.cv(data)
|
102
|
+
```
|
103
|
+
|
104
|
+
Specify the number of folds
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
model.cv(data, folds: 5)
|
108
|
+
```
|
109
|
+
|
110
|
+
## Resources
|
111
|
+
|
112
|
+
- [LIBMF: A Library for Parallel Matrix Factorization in Shared-memory Systems](https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_open_source.pdf)
|
113
|
+
|
114
|
+
## History
|
115
|
+
|
116
|
+
View the [changelog](https://github.com/ankane/libmf/blob/master/CHANGELOG.md)
|
117
|
+
|
118
|
+
## Contributing
|
119
|
+
|
120
|
+
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
121
|
+
|
122
|
+
- [Report bugs](https://github.com/ankane/libmf/issues)
|
123
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/libmf/pulls)
|
124
|
+
- Write, clarify, or fix documentation
|
125
|
+
- Suggest or add new features
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require "mkmf"
|
2
|
+
|
3
|
+
arch = RbConfig::CONFIG["arch"]
|
4
|
+
case arch
|
5
|
+
when /mingw/
|
6
|
+
File.write("Makefile", dummy_makefile("libmf").join)
|
7
|
+
else
|
8
|
+
abort "Missing stdc++" unless have_library("stdc++")
|
9
|
+
$CXXFLAGS << " -std=c++11"
|
10
|
+
|
11
|
+
# TODO
|
12
|
+
# if have_library("libomp")
|
13
|
+
# end
|
14
|
+
|
15
|
+
$objs = ["mf.o"]
|
16
|
+
vendor_path = File.expand_path("../../vendor/libmf", __dir__)
|
17
|
+
create_makefile("libmf", vendor_path)
|
18
|
+
end
|
data/lib/libmf.bundle
ADDED
Binary file
|
data/lib/libmf.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# dependencies
|
2
|
+
require "ffi"
|
3
|
+
|
4
|
+
# modules
|
5
|
+
require "libmf/model"
|
6
|
+
require "libmf/version"
|
7
|
+
|
8
|
+
module Libmf
|
9
|
+
class Error < StandardError; end
|
10
|
+
|
11
|
+
class << self
|
12
|
+
attr_accessor :ffi_lib
|
13
|
+
end
|
14
|
+
self.ffi_lib = ["mf"]
|
15
|
+
|
16
|
+
lib_path =
|
17
|
+
if ::FFI::Platform.windows?
|
18
|
+
"../vendor/windows/mf.dll"
|
19
|
+
else
|
20
|
+
"libmf.bundle"
|
21
|
+
end
|
22
|
+
self.ffi_lib << File.expand_path(lib_path, __dir__)
|
23
|
+
|
24
|
+
# friendlier error message
|
25
|
+
autoload :FFI, "libmf/ffi"
|
26
|
+
end
|
data/lib/libmf/ffi.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
module Libmf
|
2
|
+
module FFI
|
3
|
+
extend ::FFI::Library
|
4
|
+
|
5
|
+
begin
|
6
|
+
ffi_lib Libmf.ffi_lib
|
7
|
+
rescue LoadError => e
|
8
|
+
raise e if ENV["LIBMF_DEBUG"]
|
9
|
+
raise LoadError, "Could not find LIBMF"
|
10
|
+
end
|
11
|
+
|
12
|
+
class Node < ::FFI::Struct
|
13
|
+
layout :u, :int,
|
14
|
+
:v, :int,
|
15
|
+
:r, :float
|
16
|
+
end
|
17
|
+
|
18
|
+
class Problem < ::FFI::Struct
|
19
|
+
layout :m, :int,
|
20
|
+
:n, :int,
|
21
|
+
:nnz, :long_long,
|
22
|
+
:r, :pointer
|
23
|
+
end
|
24
|
+
|
25
|
+
class Parameter < ::FFI::Struct
|
26
|
+
layout :fun, :int,
|
27
|
+
:k, :int,
|
28
|
+
:nr_threads, :int,
|
29
|
+
:nr_bins, :int,
|
30
|
+
:nr_iters, :int,
|
31
|
+
:lambda_p1, :float,
|
32
|
+
:lambda_p2, :float,
|
33
|
+
:lambda_q1, :float,
|
34
|
+
:lambda_q2, :float,
|
35
|
+
:eta, :float,
|
36
|
+
:alpha, :float,
|
37
|
+
:c, :float,
|
38
|
+
:do_nmf, :bool,
|
39
|
+
:quiet, :bool,
|
40
|
+
:copy_data, :bool
|
41
|
+
end
|
42
|
+
|
43
|
+
class Model < ::FFI::Struct
|
44
|
+
layout :fun, :int,
|
45
|
+
:m, :int,
|
46
|
+
:n, :int,
|
47
|
+
:k, :int,
|
48
|
+
:b, :float,
|
49
|
+
:p, :pointer,
|
50
|
+
:q, :pointer
|
51
|
+
end
|
52
|
+
|
53
|
+
attach_function :mf_get_default_param, [], Parameter.by_value
|
54
|
+
attach_function :mf_save_model, [Model.by_ref, :string], :int
|
55
|
+
attach_function :mf_load_model, [:string], Model.by_ref
|
56
|
+
attach_function :mf_destroy_model, [Model.by_ref], :void
|
57
|
+
attach_function :mf_train, [Problem.by_ref, Parameter.by_value], Model.by_ref
|
58
|
+
attach_function :mf_train_with_validation, [Problem.by_ref, Problem.by_ref, Parameter.by_value], Model.by_ref
|
59
|
+
attach_function :mf_predict, [Model.by_ref, :int, :int], :float
|
60
|
+
attach_function :mf_cross_validation, [Problem.by_ref, :int, Parameter.by_value], :double
|
61
|
+
end
|
62
|
+
end
|
data/lib/libmf/model.rb
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
module Libmf
|
2
|
+
class Model
|
3
|
+
def initialize(**options)
|
4
|
+
@options = options
|
5
|
+
end
|
6
|
+
|
7
|
+
def fit(data, eval_set: nil)
|
8
|
+
train_set = create_problem(data)
|
9
|
+
|
10
|
+
@model =
|
11
|
+
if eval_set
|
12
|
+
eval_set = create_problem(eval_set)
|
13
|
+
FFI.mf_train_with_validation(train_set, eval_set, param)
|
14
|
+
else
|
15
|
+
FFI.mf_train(train_set, param)
|
16
|
+
end
|
17
|
+
|
18
|
+
nil
|
19
|
+
end
|
20
|
+
|
21
|
+
def predict(row, column)
|
22
|
+
FFI.mf_predict(model, row, column)
|
23
|
+
end
|
24
|
+
|
25
|
+
def cv(data, folds: 5)
|
26
|
+
problem = create_problem(data)
|
27
|
+
FFI.mf_cross_validation(problem, folds, param)
|
28
|
+
end
|
29
|
+
|
30
|
+
def save_model(path)
|
31
|
+
FFI.mf_save_model(model, path)
|
32
|
+
end
|
33
|
+
|
34
|
+
def load_model(path)
|
35
|
+
@model = FFI.mf_load_model(path)
|
36
|
+
end
|
37
|
+
|
38
|
+
def rows
|
39
|
+
model[:m]
|
40
|
+
end
|
41
|
+
|
42
|
+
def columns
|
43
|
+
model[:n]
|
44
|
+
end
|
45
|
+
|
46
|
+
def factors
|
47
|
+
model[:k]
|
48
|
+
end
|
49
|
+
|
50
|
+
def bias
|
51
|
+
model[:b]
|
52
|
+
end
|
53
|
+
|
54
|
+
def p_factors
|
55
|
+
reshape(model[:p].read_array_of_float(factors * rows), [rows, factors])
|
56
|
+
end
|
57
|
+
|
58
|
+
def q_factors
|
59
|
+
reshape(model[:q].read_array_of_float(factors * columns), [columns, factors])
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def model
|
65
|
+
raise Error, "Not fit" unless @model
|
66
|
+
@model
|
67
|
+
end
|
68
|
+
|
69
|
+
def param
|
70
|
+
param = FFI.mf_get_default_param
|
71
|
+
# silence insufficient blocks warning with default params
|
72
|
+
options = {nr_bins: 25}.merge(@options)
|
73
|
+
options.each do |k, v|
|
74
|
+
param[k] = v
|
75
|
+
end
|
76
|
+
param
|
77
|
+
end
|
78
|
+
|
79
|
+
def create_problem(data)
|
80
|
+
raise Error, "No data" if data.empty?
|
81
|
+
|
82
|
+
nodes = []
|
83
|
+
r = ::FFI::MemoryPointer.new(FFI::Node, data.size)
|
84
|
+
data.each_with_index do |row, i|
|
85
|
+
n = FFI::Node.new(r[i])
|
86
|
+
n[:u] = row[0]
|
87
|
+
n[:v] = row[1]
|
88
|
+
n[:r] = row[2]
|
89
|
+
nodes << n
|
90
|
+
end
|
91
|
+
|
92
|
+
m = nodes.map { |n| n[:u] }.max + 1
|
93
|
+
n = nodes.map { |n| n[:v] }.max + 1
|
94
|
+
|
95
|
+
prob = FFI::Problem.new
|
96
|
+
prob[:m] = m
|
97
|
+
prob[:n] = n
|
98
|
+
prob[:nnz] = nodes.size
|
99
|
+
prob[:r] = r
|
100
|
+
prob
|
101
|
+
end
|
102
|
+
|
103
|
+
def reshape(arr, dims)
|
104
|
+
rows = dims.first
|
105
|
+
new_arr = rows.times.map { [] }
|
106
|
+
arr.each_with_index do |v, i|
|
107
|
+
new_arr[i % rows] << v
|
108
|
+
end
|
109
|
+
new_arr
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
|
2
|
+
Copyright (c) 2014-2015 The LIBMF Project.
|
3
|
+
All rights reserved.
|
4
|
+
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
6
|
+
modification, are permitted provided that the following conditions
|
7
|
+
are met:
|
8
|
+
|
9
|
+
1. Redistributions of source code must retain the above copyright
|
10
|
+
notice, this list of conditions and the following disclaimer.
|
11
|
+
|
12
|
+
2. Redistributions in binary form must reproduce the above copyright
|
13
|
+
notice, this list of conditions and the following disclaimer in the
|
14
|
+
documentation and/or other materials provided with the distribution.
|
15
|
+
|
16
|
+
3. Neither name of copyright holders nor the names of its contributors
|
17
|
+
may be used to endorse or promote products derived from this software
|
18
|
+
without specific prior written permission.
|
19
|
+
|
20
|
+
|
21
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
22
|
+
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
23
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
24
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
|
25
|
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
26
|
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
27
|
+
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
28
|
+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
29
|
+
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
30
|
+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
31
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@@ -0,0 +1,34 @@
|
|
1
|
+
CXX = g++
|
2
|
+
CXXFLAGS = -Wall -O3 -pthread -std=c++0x -march=native
|
3
|
+
OMPFLAG = -fopenmp
|
4
|
+
SHVER = 2
|
5
|
+
|
6
|
+
# run `make clean all' if you change the following flags.
|
7
|
+
|
8
|
+
# comment the following flag if you want to disable SSE or enable AVX
|
9
|
+
DFLAG = -DUSESSE
|
10
|
+
|
11
|
+
# uncomment the following flags if you want to use AVX
|
12
|
+
#DFLAG = -DUSEAVX
|
13
|
+
#CXXFLAGS += -mavx
|
14
|
+
|
15
|
+
# uncomment the following flags if you do not want to use OpenMP
|
16
|
+
DFLAG += -DUSEOMP
|
17
|
+
CXXFLAGS += $(OMPFLAG)
|
18
|
+
|
19
|
+
all: mf-train mf-predict
|
20
|
+
|
21
|
+
lib:
|
22
|
+
$(CXX) -shared -Wl,-soname,libmf.so.$(SHVER) -o libmf.so.$(SHVER) mf.o
|
23
|
+
|
24
|
+
mf-train: mf-train.cpp mf.o
|
25
|
+
$(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
|
26
|
+
|
27
|
+
mf-predict: mf-predict.cpp mf.o
|
28
|
+
$(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
|
29
|
+
|
30
|
+
mf.o: mf.cpp mf.h
|
31
|
+
$(CXX) $(CXXFLAGS) $(DFLAG) -c -fPIC -o $@ $<
|
32
|
+
|
33
|
+
clean:
|
34
|
+
rm -f mf-train mf-predict mf.o libmf.so.$(SHVER)
|