libmf 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 85fc60af42649286b87cf23130c0efafd0d8951423d31b187d13097b2418e7d1
4
+ data.tar.gz: ab568af8e036b6d38fcc604746eeda4fa29e2e6e7541e6af53b77f9979e9fe82
5
+ SHA512:
6
+ metadata.gz: efcbffd9ed9e6f66911a63e74d694da77d798d0fde04cb1490c1ee4eaf8d9e1e93b1af13ab8e23a4a858b74503d28a616ad6499a590f4a1568df2a9dcb65d85f
7
+ data.tar.gz: 671b306cad36c2ea5da6633de6703892e19fbb5774cf22a0d458ba3d967ab50dc5ae56ed271771e4ed1a0483346d8d9f34ec5db9c8424d16f369e81c5f467857
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ ## 0.1.0
2
+
3
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2019 Andrew Kane
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,125 @@
1
+ # LIBMF
2
+
3
+ [LIBMF](https://github.com/cjlin1/libmf) - large-scale sparse matrix factorization - for Ruby
4
+
5
+ :fire: Uses the C API for blazing performance
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application’s Gemfile:
10
+
11
+ ```ruby
12
+ gem 'libmf'
13
+ ```
14
+
15
+ ## Getting Started
16
+
17
+ Prep your data in the format `[row_index, column_index, value]`
18
+
19
+ ```ruby
20
+ data = [
21
+ [0, 0, 5.0],
22
+ [0, 2, 3.5],
23
+ [1, 1, 4.0]
24
+ ]
25
+ ```
26
+
27
+ Create a model
28
+
29
+ ```ruby
30
+ model = Libmf::Model.new
31
+ model.fit(data)
32
+ ```
33
+
34
+ Make predictions
35
+
36
+ ```ruby
37
+ model.predict(row_index, column_index)
38
+ ```
39
+
40
+ Get the bias and latent factors
41
+
42
+ ```ruby
43
+ model.bias
44
+ model.p_factors
45
+ model.q_factors
46
+ ```
47
+
48
+ Save the model to a file
49
+
50
+ ```ruby
51
+ model.save_model("model.txt")
52
+ ```
53
+
54
+ Load the model from a file
55
+
56
+ ```ruby
57
+ model.load_model("model.txt")
58
+ ```
59
+
60
+ Pass a validation set
61
+
62
+ ```ruby
63
+ model.fit(data, eval_set: eval_set)
64
+ ```
65
+
66
+ ## Parameters
67
+
68
+ Pass parameters
69
+
70
+ ```ruby
71
+ model = Libmf::Model.new(k: 20, nr_iters: 50)
72
+ ```
73
+
74
+ Supports the same parameters as LIBMF
75
+
76
+ ```text
77
+ variable meaning default
78
+ ================================================================
79
+ fun loss function 0
80
+ k number of latent factors 8
81
+ nr_threads number of threads used 12
82
+ nr_bins number of bins 25
83
+ nr_iters number of iterations 20
84
+ lambda_p1 coefficient of L1-norm regularization on P 0
85
+ lambda_p2 coefficient of L2-norm regularization on P 0.1
86
+ lambda_q1 coefficient of L1-norm regularization on Q 0
87
+ lambda_q2 coefficient of L2-norm regularization on Q 0.1
88
+ eta learning rate 0.1
89
+ alpha importance of negative entries 0.1
90
+ c desired value of negative entries 0.0001
91
+ do_nmf perform non-negative MF (NMF) false
92
+ quiet no outputs to stdout false
93
+ copy_data copy data in training procedure true
94
+ ```
95
+
96
+ ## Cross-Validation
97
+
98
+ Perform cross-validation
99
+
100
+ ```ruby
101
+ model.cv(data)
102
+ ```
103
+
104
+ Specify the number of folds
105
+
106
+ ```ruby
107
+ model.cv(data, folds: 5)
108
+ ```
109
+
110
+ ## Resources
111
+
112
+ - [LIBMF: A Library for Parallel Matrix Factorization in Shared-memory Systems](https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_open_source.pdf)
113
+
114
+ ## History
115
+
116
+ View the [changelog](https://github.com/ankane/libmf/blob/master/CHANGELOG.md)
117
+
118
+ ## Contributing
119
+
120
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
121
+
122
+ - [Report bugs](https://github.com/ankane/libmf/issues)
123
+ - Fix bugs and [submit pull requests](https://github.com/ankane/libmf/pulls)
124
+ - Write, clarify, or fix documentation
125
+ - Suggest or add new features
@@ -0,0 +1,18 @@
1
+ require "mkmf"
2
+
3
+ arch = RbConfig::CONFIG["arch"]
4
+ case arch
5
+ when /mingw/
6
+ File.write("Makefile", dummy_makefile("libmf").join)
7
+ else
8
+ abort "Missing stdc++" unless have_library("stdc++")
9
+ $CXXFLAGS << " -std=c++11"
10
+
11
+ # TODO
12
+ # if have_library("libomp")
13
+ # end
14
+
15
+ $objs = ["mf.o"]
16
+ vendor_path = File.expand_path("../../vendor/libmf", __dir__)
17
+ create_makefile("libmf", vendor_path)
18
+ end
data/lib/libmf.bundle ADDED
Binary file
data/lib/libmf.rb ADDED
@@ -0,0 +1,26 @@
1
+ # dependencies
2
+ require "ffi"
3
+
4
+ # modules
5
+ require "libmf/model"
6
+ require "libmf/version"
7
+
8
+ module Libmf
9
+ class Error < StandardError; end
10
+
11
+ class << self
12
+ attr_accessor :ffi_lib
13
+ end
14
+ self.ffi_lib = ["mf"]
15
+
16
+ lib_path =
17
+ if ::FFI::Platform.windows?
18
+ "../vendor/windows/mf.dll"
19
+ else
20
+ "libmf.bundle"
21
+ end
22
+ self.ffi_lib << File.expand_path(lib_path, __dir__)
23
+
24
+ # friendlier error message
25
+ autoload :FFI, "libmf/ffi"
26
+ end
data/lib/libmf/ffi.rb ADDED
@@ -0,0 +1,62 @@
1
+ module Libmf
2
+ module FFI
3
+ extend ::FFI::Library
4
+
5
+ begin
6
+ ffi_lib Libmf.ffi_lib
7
+ rescue LoadError => e
8
+ raise e if ENV["LIBMF_DEBUG"]
9
+ raise LoadError, "Could not find LIBMF"
10
+ end
11
+
12
+ class Node < ::FFI::Struct
13
+ layout :u, :int,
14
+ :v, :int,
15
+ :r, :float
16
+ end
17
+
18
+ class Problem < ::FFI::Struct
19
+ layout :m, :int,
20
+ :n, :int,
21
+ :nnz, :long_long,
22
+ :r, :pointer
23
+ end
24
+
25
+ class Parameter < ::FFI::Struct
26
+ layout :fun, :int,
27
+ :k, :int,
28
+ :nr_threads, :int,
29
+ :nr_bins, :int,
30
+ :nr_iters, :int,
31
+ :lambda_p1, :float,
32
+ :lambda_p2, :float,
33
+ :lambda_q1, :float,
34
+ :lambda_q2, :float,
35
+ :eta, :float,
36
+ :alpha, :float,
37
+ :c, :float,
38
+ :do_nmf, :bool,
39
+ :quiet, :bool,
40
+ :copy_data, :bool
41
+ end
42
+
43
+ class Model < ::FFI::Struct
44
+ layout :fun, :int,
45
+ :m, :int,
46
+ :n, :int,
47
+ :k, :int,
48
+ :b, :float,
49
+ :p, :pointer,
50
+ :q, :pointer
51
+ end
52
+
53
+ attach_function :mf_get_default_param, [], Parameter.by_value
54
+ attach_function :mf_save_model, [Model.by_ref, :string], :int
55
+ attach_function :mf_load_model, [:string], Model.by_ref
56
+ attach_function :mf_destroy_model, [Model.by_ref], :void
57
+ attach_function :mf_train, [Problem.by_ref, Parameter.by_value], Model.by_ref
58
+ attach_function :mf_train_with_validation, [Problem.by_ref, Problem.by_ref, Parameter.by_value], Model.by_ref
59
+ attach_function :mf_predict, [Model.by_ref, :int, :int], :float
60
+ attach_function :mf_cross_validation, [Problem.by_ref, :int, Parameter.by_value], :double
61
+ end
62
+ end
@@ -0,0 +1,112 @@
1
+ module Libmf
2
+ class Model
3
+ def initialize(**options)
4
+ @options = options
5
+ end
6
+
7
+ def fit(data, eval_set: nil)
8
+ train_set = create_problem(data)
9
+
10
+ @model =
11
+ if eval_set
12
+ eval_set = create_problem(eval_set)
13
+ FFI.mf_train_with_validation(train_set, eval_set, param)
14
+ else
15
+ FFI.mf_train(train_set, param)
16
+ end
17
+
18
+ nil
19
+ end
20
+
21
+ def predict(row, column)
22
+ FFI.mf_predict(model, row, column)
23
+ end
24
+
25
+ def cv(data, folds: 5)
26
+ problem = create_problem(data)
27
+ FFI.mf_cross_validation(problem, folds, param)
28
+ end
29
+
30
+ def save_model(path)
31
+ FFI.mf_save_model(model, path)
32
+ end
33
+
34
+ def load_model(path)
35
+ @model = FFI.mf_load_model(path)
36
+ end
37
+
38
+ def rows
39
+ model[:m]
40
+ end
41
+
42
+ def columns
43
+ model[:n]
44
+ end
45
+
46
+ def factors
47
+ model[:k]
48
+ end
49
+
50
+ def bias
51
+ model[:b]
52
+ end
53
+
54
+ def p_factors
55
+ reshape(model[:p].read_array_of_float(factors * rows), [rows, factors])
56
+ end
57
+
58
+ def q_factors
59
+ reshape(model[:q].read_array_of_float(factors * columns), [columns, factors])
60
+ end
61
+
62
+ private
63
+
64
+ def model
65
+ raise Error, "Not fit" unless @model
66
+ @model
67
+ end
68
+
69
+ def param
70
+ param = FFI.mf_get_default_param
71
+ # silence insufficient blocks warning with default params
72
+ options = {nr_bins: 25}.merge(@options)
73
+ options.each do |k, v|
74
+ param[k] = v
75
+ end
76
+ param
77
+ end
78
+
79
+ def create_problem(data)
80
+ raise Error, "No data" if data.empty?
81
+
82
+ nodes = []
83
+ r = ::FFI::MemoryPointer.new(FFI::Node, data.size)
84
+ data.each_with_index do |row, i|
85
+ n = FFI::Node.new(r[i])
86
+ n[:u] = row[0]
87
+ n[:v] = row[1]
88
+ n[:r] = row[2]
89
+ nodes << n
90
+ end
91
+
92
+ m = nodes.map { |n| n[:u] }.max + 1
93
+ n = nodes.map { |n| n[:v] }.max + 1
94
+
95
+ prob = FFI::Problem.new
96
+ prob[:m] = m
97
+ prob[:n] = n
98
+ prob[:nnz] = nodes.size
99
+ prob[:r] = r
100
+ prob
101
+ end
102
+
103
+ def reshape(arr, dims)
104
+ rows = dims.first
105
+ new_arr = rows.times.map { [] }
106
+ arr.each_with_index do |v, i|
107
+ new_arr[i % rows] << v
108
+ end
109
+ new_arr
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,3 @@
1
+ module Libmf
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,31 @@
1
+
2
+ Copyright (c) 2014-2015 The LIBMF Project.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions
7
+ are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright
10
+ notice, this list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright
13
+ notice, this list of conditions and the following disclaimer in the
14
+ documentation and/or other materials provided with the distribution.
15
+
16
+ 3. Neither name of copyright holders nor the names of its contributors
17
+ may be used to endorse or promote products derived from this software
18
+ without specific prior written permission.
19
+
20
+
21
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,34 @@
1
+ CXX = g++
2
+ CXXFLAGS = -Wall -O3 -pthread -std=c++0x -march=native
3
+ OMPFLAG = -fopenmp
4
+ SHVER = 2
5
+
6
+ # run `make clean all' if you change the following flags.
7
+
8
+ # comment the following flag if you want to disable SSE or enable AVX
9
+ DFLAG = -DUSESSE
10
+
11
+ # uncomment the following flags if you want to use AVX
12
+ #DFLAG = -DUSEAVX
13
+ #CXXFLAGS += -mavx
14
+
15
+ # uncomment the following flags if you do not want to use OpenMP
16
+ DFLAG += -DUSEOMP
17
+ CXXFLAGS += $(OMPFLAG)
18
+
19
+ all: mf-train mf-predict
20
+
21
+ lib:
22
+ $(CXX) -shared -Wl,-soname,libmf.so.$(SHVER) -o libmf.so.$(SHVER) mf.o
23
+
24
+ mf-train: mf-train.cpp mf.o
25
+ $(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
26
+
27
+ mf-predict: mf-predict.cpp mf.o
28
+ $(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
29
+
30
+ mf.o: mf.cpp mf.h
31
+ $(CXX) $(CXXFLAGS) $(DFLAG) -c -fPIC -o $@ $<
32
+
33
+ clean:
34
+ rm -f mf-train mf-predict mf.o libmf.so.$(SHVER)