libmf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +125 -0
- data/ext/libmf/extconf.rb +18 -0
- data/lib/libmf.bundle +0 -0
- data/lib/libmf.rb +26 -0
- data/lib/libmf/ffi.rb +62 -0
- data/lib/libmf/model.rb +112 -0
- data/lib/libmf/version.rb +3 -0
- data/vendor/libmf/COPYRIGHT +31 -0
- data/vendor/libmf/Makefile +34 -0
- data/vendor/libmf/Makefile.win +36 -0
- data/vendor/libmf/README +637 -0
- data/vendor/libmf/demo/all_one_matrix.te.txt +1382 -0
- data/vendor/libmf/demo/all_one_matrix.tr.txt +5172 -0
- data/vendor/libmf/demo/binary_matrix.te.txt +1312 -0
- data/vendor/libmf/demo/binary_matrix.tr.txt +4937 -0
- data/vendor/libmf/demo/demo.bat +40 -0
- data/vendor/libmf/demo/demo.sh +58 -0
- data/vendor/libmf/demo/real_matrix.te.txt +794 -0
- data/vendor/libmf/demo/real_matrix.tr.txt +5000 -0
- data/vendor/libmf/mf-predict.cpp +207 -0
- data/vendor/libmf/mf-train.cpp +378 -0
- data/vendor/libmf/mf.cpp +4683 -0
- data/vendor/libmf/mf.def +21 -0
- data/vendor/libmf/mf.h +130 -0
- data/vendor/libmf/windows/mf-predict.exe +0 -0
- data/vendor/libmf/windows/mf-train.exe +0 -0
- data/vendor/libmf/windows/mf.dll +0 -0
- metadata +142 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 85fc60af42649286b87cf23130c0efafd0d8951423d31b187d13097b2418e7d1
|
4
|
+
data.tar.gz: ab568af8e036b6d38fcc604746eeda4fa29e2e6e7541e6af53b77f9979e9fe82
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: efcbffd9ed9e6f66911a63e74d694da77d798d0fde04cb1490c1ee4eaf8d9e1e93b1af13ab8e23a4a858b74503d28a616ad6499a590f4a1568df2a9dcb65d85f
|
7
|
+
data.tar.gz: 671b306cad36c2ea5da6633de6703892e19fbb5774cf22a0d458ba3d967ab50dc5ae56ed271771e4ed1a0483346d8d9f34ec5db9c8424d16f369e81c5f467857
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2019 Andrew Kane
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
# LIBMF
|
2
|
+
|
3
|
+
[LIBMF](https://github.com/cjlin1/libmf) - large-scale sparse matrix factorization - for Ruby
|
4
|
+
|
5
|
+
:fire: Uses the C API for blazing performance
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application’s Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'libmf'
|
13
|
+
```
|
14
|
+
|
15
|
+
## Getting Started
|
16
|
+
|
17
|
+
Prep your data in the format `[row_index, column_index, value]`
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
data = [
|
21
|
+
[0, 0, 5.0],
|
22
|
+
[0, 2, 3.5],
|
23
|
+
[1, 1, 4.0]
|
24
|
+
]
|
25
|
+
```
|
26
|
+
|
27
|
+
Create a model
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
model = Libmf::Model.new
|
31
|
+
model.fit(data)
|
32
|
+
```
|
33
|
+
|
34
|
+
Make predictions
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
model.predict(row_index, column_index)
|
38
|
+
```
|
39
|
+
|
40
|
+
Get the bias and latent factors
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
model.bias
|
44
|
+
model.p_factors
|
45
|
+
model.q_factors
|
46
|
+
```
|
47
|
+
|
48
|
+
Save the model to a file
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
model.save_model("model.txt")
|
52
|
+
```
|
53
|
+
|
54
|
+
Load the model from a file
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
model.load_model("model.txt")
|
58
|
+
```
|
59
|
+
|
60
|
+
Pass a validation set
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
model.fit(data, eval_set: eval_set)
|
64
|
+
```
|
65
|
+
|
66
|
+
## Parameters
|
67
|
+
|
68
|
+
Pass parameters
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
model = Libmf::Model.new(k: 20, nr_iters: 50)
|
72
|
+
```
|
73
|
+
|
74
|
+
Supports the same parameters as LIBMF
|
75
|
+
|
76
|
+
```text
|
77
|
+
variable meaning default
|
78
|
+
================================================================
|
79
|
+
fun loss function 0
|
80
|
+
k number of latent factors 8
|
81
|
+
nr_threads number of threads used 12
|
82
|
+
nr_bins number of bins 25
|
83
|
+
nr_iters number of iterations 20
|
84
|
+
lambda_p1 coefficient of L1-norm regularization on P 0
|
85
|
+
lambda_p2 coefficient of L2-norm regularization on P 0.1
|
86
|
+
lambda_q1 coefficient of L1-norm regularization on Q 0
|
87
|
+
lambda_q2 coefficient of L2-norm regularization on Q 0.1
|
88
|
+
eta learning rate 0.1
|
89
|
+
alpha importance of negative entries 0.1
|
90
|
+
c desired value of negative entries 0.0001
|
91
|
+
do_nmf perform non-negative MF (NMF) false
|
92
|
+
quiet no outputs to stdout false
|
93
|
+
copy_data copy data in training procedure true
|
94
|
+
```
|
95
|
+
|
96
|
+
## Cross-Validation
|
97
|
+
|
98
|
+
Perform cross-validation
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
model.cv(data)
|
102
|
+
```
|
103
|
+
|
104
|
+
Specify the number of folds
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
model.cv(data, folds: 5)
|
108
|
+
```
|
109
|
+
|
110
|
+
## Resources
|
111
|
+
|
112
|
+
- [LIBMF: A Library for Parallel Matrix Factorization in Shared-memory Systems](https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_open_source.pdf)
|
113
|
+
|
114
|
+
## History
|
115
|
+
|
116
|
+
View the [changelog](https://github.com/ankane/libmf/blob/master/CHANGELOG.md)
|
117
|
+
|
118
|
+
## Contributing
|
119
|
+
|
120
|
+
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
121
|
+
|
122
|
+
- [Report bugs](https://github.com/ankane/libmf/issues)
|
123
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/libmf/pulls)
|
124
|
+
- Write, clarify, or fix documentation
|
125
|
+
- Suggest or add new features
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require "mkmf"
|
2
|
+
|
3
|
+
arch = RbConfig::CONFIG["arch"]
|
4
|
+
case arch
|
5
|
+
when /mingw/
|
6
|
+
File.write("Makefile", dummy_makefile("libmf").join)
|
7
|
+
else
|
8
|
+
abort "Missing stdc++" unless have_library("stdc++")
|
9
|
+
$CXXFLAGS << " -std=c++11"
|
10
|
+
|
11
|
+
# TODO
|
12
|
+
# if have_library("libomp")
|
13
|
+
# end
|
14
|
+
|
15
|
+
$objs = ["mf.o"]
|
16
|
+
vendor_path = File.expand_path("../../vendor/libmf", __dir__)
|
17
|
+
create_makefile("libmf", vendor_path)
|
18
|
+
end
|
data/lib/libmf.bundle
ADDED
Binary file
|
data/lib/libmf.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# dependencies
|
2
|
+
require "ffi"
|
3
|
+
|
4
|
+
# modules
|
5
|
+
require "libmf/model"
|
6
|
+
require "libmf/version"
|
7
|
+
|
8
|
+
module Libmf
|
9
|
+
class Error < StandardError; end
|
10
|
+
|
11
|
+
class << self
|
12
|
+
attr_accessor :ffi_lib
|
13
|
+
end
|
14
|
+
self.ffi_lib = ["mf"]
|
15
|
+
|
16
|
+
lib_path =
|
17
|
+
if ::FFI::Platform.windows?
|
18
|
+
"../vendor/windows/mf.dll"
|
19
|
+
else
|
20
|
+
"libmf.bundle"
|
21
|
+
end
|
22
|
+
self.ffi_lib << File.expand_path(lib_path, __dir__)
|
23
|
+
|
24
|
+
# friendlier error message
|
25
|
+
autoload :FFI, "libmf/ffi"
|
26
|
+
end
|
data/lib/libmf/ffi.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
module Libmf
|
2
|
+
module FFI
|
3
|
+
extend ::FFI::Library
|
4
|
+
|
5
|
+
begin
|
6
|
+
ffi_lib Libmf.ffi_lib
|
7
|
+
rescue LoadError => e
|
8
|
+
raise e if ENV["LIBMF_DEBUG"]
|
9
|
+
raise LoadError, "Could not find LIBMF"
|
10
|
+
end
|
11
|
+
|
12
|
+
class Node < ::FFI::Struct
|
13
|
+
layout :u, :int,
|
14
|
+
:v, :int,
|
15
|
+
:r, :float
|
16
|
+
end
|
17
|
+
|
18
|
+
class Problem < ::FFI::Struct
|
19
|
+
layout :m, :int,
|
20
|
+
:n, :int,
|
21
|
+
:nnz, :long_long,
|
22
|
+
:r, :pointer
|
23
|
+
end
|
24
|
+
|
25
|
+
class Parameter < ::FFI::Struct
|
26
|
+
layout :fun, :int,
|
27
|
+
:k, :int,
|
28
|
+
:nr_threads, :int,
|
29
|
+
:nr_bins, :int,
|
30
|
+
:nr_iters, :int,
|
31
|
+
:lambda_p1, :float,
|
32
|
+
:lambda_p2, :float,
|
33
|
+
:lambda_q1, :float,
|
34
|
+
:lambda_q2, :float,
|
35
|
+
:eta, :float,
|
36
|
+
:alpha, :float,
|
37
|
+
:c, :float,
|
38
|
+
:do_nmf, :bool,
|
39
|
+
:quiet, :bool,
|
40
|
+
:copy_data, :bool
|
41
|
+
end
|
42
|
+
|
43
|
+
class Model < ::FFI::Struct
|
44
|
+
layout :fun, :int,
|
45
|
+
:m, :int,
|
46
|
+
:n, :int,
|
47
|
+
:k, :int,
|
48
|
+
:b, :float,
|
49
|
+
:p, :pointer,
|
50
|
+
:q, :pointer
|
51
|
+
end
|
52
|
+
|
53
|
+
attach_function :mf_get_default_param, [], Parameter.by_value
|
54
|
+
attach_function :mf_save_model, [Model.by_ref, :string], :int
|
55
|
+
attach_function :mf_load_model, [:string], Model.by_ref
|
56
|
+
attach_function :mf_destroy_model, [Model.by_ref], :void
|
57
|
+
attach_function :mf_train, [Problem.by_ref, Parameter.by_value], Model.by_ref
|
58
|
+
attach_function :mf_train_with_validation, [Problem.by_ref, Problem.by_ref, Parameter.by_value], Model.by_ref
|
59
|
+
attach_function :mf_predict, [Model.by_ref, :int, :int], :float
|
60
|
+
attach_function :mf_cross_validation, [Problem.by_ref, :int, Parameter.by_value], :double
|
61
|
+
end
|
62
|
+
end
|
data/lib/libmf/model.rb
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
module Libmf
|
2
|
+
class Model
|
3
|
+
def initialize(**options)
|
4
|
+
@options = options
|
5
|
+
end
|
6
|
+
|
7
|
+
def fit(data, eval_set: nil)
|
8
|
+
train_set = create_problem(data)
|
9
|
+
|
10
|
+
@model =
|
11
|
+
if eval_set
|
12
|
+
eval_set = create_problem(eval_set)
|
13
|
+
FFI.mf_train_with_validation(train_set, eval_set, param)
|
14
|
+
else
|
15
|
+
FFI.mf_train(train_set, param)
|
16
|
+
end
|
17
|
+
|
18
|
+
nil
|
19
|
+
end
|
20
|
+
|
21
|
+
def predict(row, column)
|
22
|
+
FFI.mf_predict(model, row, column)
|
23
|
+
end
|
24
|
+
|
25
|
+
def cv(data, folds: 5)
|
26
|
+
problem = create_problem(data)
|
27
|
+
FFI.mf_cross_validation(problem, folds, param)
|
28
|
+
end
|
29
|
+
|
30
|
+
def save_model(path)
|
31
|
+
FFI.mf_save_model(model, path)
|
32
|
+
end
|
33
|
+
|
34
|
+
def load_model(path)
|
35
|
+
@model = FFI.mf_load_model(path)
|
36
|
+
end
|
37
|
+
|
38
|
+
def rows
|
39
|
+
model[:m]
|
40
|
+
end
|
41
|
+
|
42
|
+
def columns
|
43
|
+
model[:n]
|
44
|
+
end
|
45
|
+
|
46
|
+
def factors
|
47
|
+
model[:k]
|
48
|
+
end
|
49
|
+
|
50
|
+
def bias
|
51
|
+
model[:b]
|
52
|
+
end
|
53
|
+
|
54
|
+
def p_factors
|
55
|
+
reshape(model[:p].read_array_of_float(factors * rows), [rows, factors])
|
56
|
+
end
|
57
|
+
|
58
|
+
def q_factors
|
59
|
+
reshape(model[:q].read_array_of_float(factors * columns), [columns, factors])
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def model
|
65
|
+
raise Error, "Not fit" unless @model
|
66
|
+
@model
|
67
|
+
end
|
68
|
+
|
69
|
+
def param
|
70
|
+
param = FFI.mf_get_default_param
|
71
|
+
# silence insufficient blocks warning with default params
|
72
|
+
options = {nr_bins: 25}.merge(@options)
|
73
|
+
options.each do |k, v|
|
74
|
+
param[k] = v
|
75
|
+
end
|
76
|
+
param
|
77
|
+
end
|
78
|
+
|
79
|
+
def create_problem(data)
|
80
|
+
raise Error, "No data" if data.empty?
|
81
|
+
|
82
|
+
nodes = []
|
83
|
+
r = ::FFI::MemoryPointer.new(FFI::Node, data.size)
|
84
|
+
data.each_with_index do |row, i|
|
85
|
+
n = FFI::Node.new(r[i])
|
86
|
+
n[:u] = row[0]
|
87
|
+
n[:v] = row[1]
|
88
|
+
n[:r] = row[2]
|
89
|
+
nodes << n
|
90
|
+
end
|
91
|
+
|
92
|
+
m = nodes.map { |n| n[:u] }.max + 1
|
93
|
+
n = nodes.map { |n| n[:v] }.max + 1
|
94
|
+
|
95
|
+
prob = FFI::Problem.new
|
96
|
+
prob[:m] = m
|
97
|
+
prob[:n] = n
|
98
|
+
prob[:nnz] = nodes.size
|
99
|
+
prob[:r] = r
|
100
|
+
prob
|
101
|
+
end
|
102
|
+
|
103
|
+
def reshape(arr, dims)
|
104
|
+
rows = dims.first
|
105
|
+
new_arr = rows.times.map { [] }
|
106
|
+
arr.each_with_index do |v, i|
|
107
|
+
new_arr[i % rows] << v
|
108
|
+
end
|
109
|
+
new_arr
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
|
2
|
+
Copyright (c) 2014-2015 The LIBMF Project.
|
3
|
+
All rights reserved.
|
4
|
+
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
6
|
+
modification, are permitted provided that the following conditions
|
7
|
+
are met:
|
8
|
+
|
9
|
+
1. Redistributions of source code must retain the above copyright
|
10
|
+
notice, this list of conditions and the following disclaimer.
|
11
|
+
|
12
|
+
2. Redistributions in binary form must reproduce the above copyright
|
13
|
+
notice, this list of conditions and the following disclaimer in the
|
14
|
+
documentation and/or other materials provided with the distribution.
|
15
|
+
|
16
|
+
3. Neither name of copyright holders nor the names of its contributors
|
17
|
+
may be used to endorse or promote products derived from this software
|
18
|
+
without specific prior written permission.
|
19
|
+
|
20
|
+
|
21
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
22
|
+
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
23
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
24
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
|
25
|
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
26
|
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
27
|
+
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
28
|
+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
29
|
+
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
30
|
+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
31
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@@ -0,0 +1,34 @@
|
|
1
|
+
CXX = g++
|
2
|
+
CXXFLAGS = -Wall -O3 -pthread -std=c++0x -march=native
|
3
|
+
OMPFLAG = -fopenmp
|
4
|
+
SHVER = 2
|
5
|
+
|
6
|
+
# run `make clean all' if you change the following flags.
|
7
|
+
|
8
|
+
# comment the following flag if you want to disable SSE or enable AVX
|
9
|
+
DFLAG = -DUSESSE
|
10
|
+
|
11
|
+
# uncomment the following flags if you want to use AVX
|
12
|
+
#DFLAG = -DUSEAVX
|
13
|
+
#CXXFLAGS += -mavx
|
14
|
+
|
15
|
+
# uncomment the following flags if you do not want to use OpenMP
|
16
|
+
DFLAG += -DUSEOMP
|
17
|
+
CXXFLAGS += $(OMPFLAG)
|
18
|
+
|
19
|
+
all: mf-train mf-predict
|
20
|
+
|
21
|
+
lib:
|
22
|
+
$(CXX) -shared -Wl,-soname,libmf.so.$(SHVER) -o libmf.so.$(SHVER) mf.o
|
23
|
+
|
24
|
+
mf-train: mf-train.cpp mf.o
|
25
|
+
$(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
|
26
|
+
|
27
|
+
mf-predict: mf-predict.cpp mf.o
|
28
|
+
$(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
|
29
|
+
|
30
|
+
mf.o: mf.cpp mf.h
|
31
|
+
$(CXX) $(CXXFLAGS) $(DFLAG) -c -fPIC -o $@ $<
|
32
|
+
|
33
|
+
clean:
|
34
|
+
rm -f mf-train mf-predict mf.o libmf.so.$(SHVER)
|