libmf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 85fc60af42649286b87cf23130c0efafd0d8951423d31b187d13097b2418e7d1
4
+ data.tar.gz: ab568af8e036b6d38fcc604746eeda4fa29e2e6e7541e6af53b77f9979e9fe82
5
+ SHA512:
6
+ metadata.gz: efcbffd9ed9e6f66911a63e74d694da77d798d0fde04cb1490c1ee4eaf8d9e1e93b1af13ab8e23a4a858b74503d28a616ad6499a590f4a1568df2a9dcb65d85f
7
+ data.tar.gz: 671b306cad36c2ea5da6633de6703892e19fbb5774cf22a0d458ba3d967ab50dc5ae56ed271771e4ed1a0483346d8d9f34ec5db9c8424d16f369e81c5f467857
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ ## 0.1.0
2
+
3
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2019 Andrew Kane
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,125 @@
1
+ # LIBMF
2
+
3
+ [LIBMF](https://github.com/cjlin1/libmf) - large-scale sparse matrix factorization - for Ruby
4
+
5
+ :fire: Uses the C API for blazing performance
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application’s Gemfile:
10
+
11
+ ```ruby
12
+ gem 'libmf'
13
+ ```
14
+
15
+ ## Getting Started
16
+
17
+ Prep your data in the format `[row_index, column_index, value]`
18
+
19
+ ```ruby
20
+ data = [
21
+ [0, 0, 5.0],
22
+ [0, 2, 3.5],
23
+ [1, 1, 4.0]
24
+ ]
25
+ ```
26
+
27
+ Create a model
28
+
29
+ ```ruby
30
+ model = Libmf::Model.new
31
+ model.fit(data)
32
+ ```
33
+
34
+ Make predictions
35
+
36
+ ```ruby
37
+ model.predict(row_index, column_index)
38
+ ```
39
+
40
+ Get the bias and latent factors
41
+
42
+ ```ruby
43
+ model.bias
44
+ model.p_factors
45
+ model.q_factors
46
+ ```
47
+
48
+ Save the model to a file
49
+
50
+ ```ruby
51
+ model.save_model("model.txt")
52
+ ```
53
+
54
+ Load the model from a file
55
+
56
+ ```ruby
57
+ model.load_model("model.txt")
58
+ ```
59
+
60
+ Pass a validation set
61
+
62
+ ```ruby
63
+ model.fit(data, eval_set: eval_set)
64
+ ```
65
+
66
+ ## Parameters
67
+
68
+ Pass parameters
69
+
70
+ ```ruby
71
+ model = Libmf::Model.new(k: 20, nr_iters: 50)
72
+ ```
73
+
74
+ Supports the same parameters as LIBMF
75
+
76
+ ```text
77
+ variable meaning default
78
+ ================================================================
79
+ fun loss function 0
80
+ k number of latent factors 8
81
+ nr_threads number of threads used 12
82
+ nr_bins number of bins 25
83
+ nr_iters number of iterations 20
84
+ lambda_p1 coefficient of L1-norm regularization on P 0
85
+ lambda_p2 coefficient of L2-norm regularization on P 0.1
86
+ lambda_q1 coefficient of L1-norm regularization on Q 0
87
+ lambda_q2 coefficient of L2-norm regularization on Q 0.1
88
+ eta learning rate 0.1
89
+ alpha importance of negative entries 0.1
90
+ c desired value of negative entries 0.0001
91
+ do_nmf perform non-negative MF (NMF) false
92
+ quiet no outputs to stdout false
93
+ copy_data copy data in training procedure true
94
+ ```
95
+
96
+ ## Cross-Validation
97
+
98
+ Perform cross-validation
99
+
100
+ ```ruby
101
+ model.cv(data)
102
+ ```
103
+
104
+ Specify the number of folds
105
+
106
+ ```ruby
107
+ model.cv(data, folds: 5)
108
+ ```
109
+
110
+ ## Resources
111
+
112
+ - [LIBMF: A Library for Parallel Matrix Factorization in Shared-memory Systems](https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_open_source.pdf)
113
+
114
+ ## History
115
+
116
+ View the [changelog](https://github.com/ankane/libmf/blob/master/CHANGELOG.md)
117
+
118
+ ## Contributing
119
+
120
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
121
+
122
+ - [Report bugs](https://github.com/ankane/libmf/issues)
123
+ - Fix bugs and [submit pull requests](https://github.com/ankane/libmf/pulls)
124
+ - Write, clarify, or fix documentation
125
+ - Suggest or add new features
@@ -0,0 +1,18 @@
1
+ require "mkmf"
2
+
3
+ arch = RbConfig::CONFIG["arch"]
4
+ case arch
5
+ when /mingw/
6
+ File.write("Makefile", dummy_makefile("libmf").join)
7
+ else
8
+ abort "Missing stdc++" unless have_library("stdc++")
9
+ $CXXFLAGS << " -std=c++11"
10
+
11
+ # TODO
12
+ # if have_library("libomp")
13
+ # end
14
+
15
+ $objs = ["mf.o"]
16
+ vendor_path = File.expand_path("../../vendor/libmf", __dir__)
17
+ create_makefile("libmf", vendor_path)
18
+ end
data/lib/libmf.bundle ADDED
Binary file
data/lib/libmf.rb ADDED
@@ -0,0 +1,26 @@
1
+ # dependencies
2
+ require "ffi"
3
+
4
+ # modules
5
+ require "libmf/model"
6
+ require "libmf/version"
7
+
8
+ module Libmf
9
+ class Error < StandardError; end
10
+
11
+ class << self
12
+ attr_accessor :ffi_lib
13
+ end
14
+ self.ffi_lib = ["mf"]
15
+
16
+ lib_path =
17
+ if ::FFI::Platform.windows?
18
+ "../vendor/windows/mf.dll"
19
+ else
20
+ "libmf.bundle"
21
+ end
22
+ self.ffi_lib << File.expand_path(lib_path, __dir__)
23
+
24
+ # friendlier error message
25
+ autoload :FFI, "libmf/ffi"
26
+ end
data/lib/libmf/ffi.rb ADDED
@@ -0,0 +1,62 @@
1
+ module Libmf
2
+ module FFI
3
+ extend ::FFI::Library
4
+
5
+ begin
6
+ ffi_lib Libmf.ffi_lib
7
+ rescue LoadError => e
8
+ raise e if ENV["LIBMF_DEBUG"]
9
+ raise LoadError, "Could not find LIBMF"
10
+ end
11
+
12
+ class Node < ::FFI::Struct
13
+ layout :u, :int,
14
+ :v, :int,
15
+ :r, :float
16
+ end
17
+
18
+ class Problem < ::FFI::Struct
19
+ layout :m, :int,
20
+ :n, :int,
21
+ :nnz, :long_long,
22
+ :r, :pointer
23
+ end
24
+
25
+ class Parameter < ::FFI::Struct
26
+ layout :fun, :int,
27
+ :k, :int,
28
+ :nr_threads, :int,
29
+ :nr_bins, :int,
30
+ :nr_iters, :int,
31
+ :lambda_p1, :float,
32
+ :lambda_p2, :float,
33
+ :lambda_q1, :float,
34
+ :lambda_q2, :float,
35
+ :eta, :float,
36
+ :alpha, :float,
37
+ :c, :float,
38
+ :do_nmf, :bool,
39
+ :quiet, :bool,
40
+ :copy_data, :bool
41
+ end
42
+
43
+ class Model < ::FFI::Struct
44
+ layout :fun, :int,
45
+ :m, :int,
46
+ :n, :int,
47
+ :k, :int,
48
+ :b, :float,
49
+ :p, :pointer,
50
+ :q, :pointer
51
+ end
52
+
53
+ attach_function :mf_get_default_param, [], Parameter.by_value
54
+ attach_function :mf_save_model, [Model.by_ref, :string], :int
55
+ attach_function :mf_load_model, [:string], Model.by_ref
56
+ attach_function :mf_destroy_model, [Model.by_ref], :void
57
+ attach_function :mf_train, [Problem.by_ref, Parameter.by_value], Model.by_ref
58
+ attach_function :mf_train_with_validation, [Problem.by_ref, Problem.by_ref, Parameter.by_value], Model.by_ref
59
+ attach_function :mf_predict, [Model.by_ref, :int, :int], :float
60
+ attach_function :mf_cross_validation, [Problem.by_ref, :int, Parameter.by_value], :double
61
+ end
62
+ end
@@ -0,0 +1,112 @@
1
+ module Libmf
2
+ class Model
3
+ def initialize(**options)
4
+ @options = options
5
+ end
6
+
7
+ def fit(data, eval_set: nil)
8
+ train_set = create_problem(data)
9
+
10
+ @model =
11
+ if eval_set
12
+ eval_set = create_problem(eval_set)
13
+ FFI.mf_train_with_validation(train_set, eval_set, param)
14
+ else
15
+ FFI.mf_train(train_set, param)
16
+ end
17
+
18
+ nil
19
+ end
20
+
21
+ def predict(row, column)
22
+ FFI.mf_predict(model, row, column)
23
+ end
24
+
25
+ def cv(data, folds: 5)
26
+ problem = create_problem(data)
27
+ FFI.mf_cross_validation(problem, folds, param)
28
+ end
29
+
30
+ def save_model(path)
31
+ FFI.mf_save_model(model, path)
32
+ end
33
+
34
+ def load_model(path)
35
+ @model = FFI.mf_load_model(path)
36
+ end
37
+
38
+ def rows
39
+ model[:m]
40
+ end
41
+
42
+ def columns
43
+ model[:n]
44
+ end
45
+
46
+ def factors
47
+ model[:k]
48
+ end
49
+
50
+ def bias
51
+ model[:b]
52
+ end
53
+
54
+ def p_factors
55
+ reshape(model[:p].read_array_of_float(factors * rows), [rows, factors])
56
+ end
57
+
58
+ def q_factors
59
+ reshape(model[:q].read_array_of_float(factors * columns), [columns, factors])
60
+ end
61
+
62
+ private
63
+
64
+ def model
65
+ raise Error, "Not fit" unless @model
66
+ @model
67
+ end
68
+
69
+ def param
70
+ param = FFI.mf_get_default_param
71
+ # silence insufficient blocks warning with default params
72
+ options = {nr_bins: 25}.merge(@options)
73
+ options.each do |k, v|
74
+ param[k] = v
75
+ end
76
+ param
77
+ end
78
+
79
+ def create_problem(data)
80
+ raise Error, "No data" if data.empty?
81
+
82
+ nodes = []
83
+ r = ::FFI::MemoryPointer.new(FFI::Node, data.size)
84
+ data.each_with_index do |row, i|
85
+ n = FFI::Node.new(r[i])
86
+ n[:u] = row[0]
87
+ n[:v] = row[1]
88
+ n[:r] = row[2]
89
+ nodes << n
90
+ end
91
+
92
+ m = nodes.map { |n| n[:u] }.max + 1
93
+ n = nodes.map { |n| n[:v] }.max + 1
94
+
95
+ prob = FFI::Problem.new
96
+ prob[:m] = m
97
+ prob[:n] = n
98
+ prob[:nnz] = nodes.size
99
+ prob[:r] = r
100
+ prob
101
+ end
102
+
103
+ def reshape(arr, dims)
104
+ rows = dims.first
105
+ new_arr = rows.times.map { [] }
106
+ arr.each_with_index do |v, i|
107
+ new_arr[i % rows] << v
108
+ end
109
+ new_arr
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,3 @@
1
+ module Libmf
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,31 @@
1
+
2
+ Copyright (c) 2014-2015 The LIBMF Project.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions
7
+ are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright
10
+ notice, this list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright
13
+ notice, this list of conditions and the following disclaimer in the
14
+ documentation and/or other materials provided with the distribution.
15
+
16
+ 3. Neither name of copyright holders nor the names of its contributors
17
+ may be used to endorse or promote products derived from this software
18
+ without specific prior written permission.
19
+
20
+
21
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,34 @@
1
+ CXX = g++
2
+ CXXFLAGS = -Wall -O3 -pthread -std=c++0x -march=native
3
+ OMPFLAG = -fopenmp
4
+ SHVER = 2
5
+
6
+ # run `make clean all' if you change the following flags.
7
+
8
+ # comment the following flag if you want to disable SSE or enable AVX
9
+ DFLAG = -DUSESSE
10
+
11
+ # uncomment the following flags if you want to use AVX
12
+ #DFLAG = -DUSEAVX
13
+ #CXXFLAGS += -mavx
14
+
15
+ # uncomment the following flags if you do not want to use OpenMP
16
+ DFLAG += -DUSEOMP
17
+ CXXFLAGS += $(OMPFLAG)
18
+
19
+ all: mf-train mf-predict
20
+
21
+ lib:
22
+ $(CXX) -shared -Wl,-soname,libmf.so.$(SHVER) -o libmf.so.$(SHVER) mf.o
23
+
24
+ mf-train: mf-train.cpp mf.o
25
+ $(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
26
+
27
+ mf-predict: mf-predict.cpp mf.o
28
+ $(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
29
+
30
+ mf.o: mf.cpp mf.h
31
+ $(CXX) $(CXXFLAGS) $(DFLAG) -c -fPIC -o $@ $<
32
+
33
+ clean:
34
+ rm -f mf-train mf-predict mf.o libmf.so.$(SHVER)