libmf 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -4
- data/LICENSE.txt +25 -18
- data/README.md +40 -3
- data/lib/libmf.rb +7 -6
- data/lib/libmf/ffi.rb +2 -6
- data/lib/libmf/model.rb +37 -19
- data/lib/libmf/version.rb +1 -1
- data/vendor/{libmf/COPYRIGHT → COPYRIGHT} +0 -0
- data/vendor/{libmf/demo → demo}/real_matrix.te.txt +0 -0
- data/vendor/{libmf/demo → demo}/real_matrix.tr.txt +0 -0
- data/vendor/libmf.dylib +0 -0
- data/vendor/libmf.so +0 -0
- data/vendor/mf.dll +0 -0
- metadata +27 -30
- data/ext/libmf/extconf.rb +0 -18
- data/lib/libmf.bundle +0 -0
- data/vendor/libmf/Makefile +0 -34
- data/vendor/libmf/Makefile.win +0 -36
- data/vendor/libmf/README +0 -637
- data/vendor/libmf/demo/all_one_matrix.te.txt +0 -1382
- data/vendor/libmf/demo/all_one_matrix.tr.txt +0 -5172
- data/vendor/libmf/demo/binary_matrix.te.txt +0 -1312
- data/vendor/libmf/demo/binary_matrix.tr.txt +0 -4937
- data/vendor/libmf/demo/demo.bat +0 -40
- data/vendor/libmf/demo/demo.sh +0 -58
- data/vendor/libmf/mf-predict.cpp +0 -207
- data/vendor/libmf/mf-train.cpp +0 -378
- data/vendor/libmf/mf.cpp +0 -4683
- data/vendor/libmf/mf.def +0 -21
- data/vendor/libmf/mf.h +0 -130
- data/vendor/libmf/windows/mf-predict.exe +0 -0
- data/vendor/libmf/windows/mf-train.exe +0 -0
- data/vendor/libmf/windows/mf.dll +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 58c3cf0afbb36ff976cb7641104f363795499023ce04c859beedf17bccaaffa1
|
4
|
+
data.tar.gz: '09a6b70c631ae252b24bd83c3cbb08ff3a9468d6b174b0255cf8223aaf77ea4d'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad1aad8cdb571fd5fa99aa121a67cf172f6686b793a89a82b555005116378a07772c6675e20308f8575419d618edfa368c0adef738da06847f0c06e860ab650e
|
7
|
+
data.tar.gz: 87f4260c2bccc5f5da8c44497d656fd367467b1d403de6584d802700359f455e55e7f9cbf71db515a5252012e27ace4cf52f5e284528b05a1a6b0be72c693a0d
|
data/CHANGELOG.md
CHANGED
@@ -1,16 +1,23 @@
|
|
1
|
-
## 0.
|
1
|
+
## 0.2.0 (2020-03-26)
|
2
|
+
|
3
|
+
- Changed to BSD 3-Clause license to match LIBMF
|
4
|
+
- Added support for reading data directly from files
|
5
|
+
- Added `format: :numo` option to `p_factors` and `q_factors`
|
6
|
+
- Improved performance of loading data by 5x
|
7
|
+
|
8
|
+
## 0.1.3 (2019-11-07)
|
2
9
|
|
3
10
|
- Made parameter names more Ruby-like
|
4
11
|
- No need to set `do_nmf` with generalized KL-divergence
|
5
12
|
|
6
|
-
## 0.1.2
|
13
|
+
## 0.1.2 (2019-11-06)
|
7
14
|
|
8
15
|
- Fixed bug in `p_factors` and `q_factors` methods
|
9
16
|
|
10
|
-
## 0.1.1
|
17
|
+
## 0.1.1 (2019-11-05)
|
11
18
|
|
12
19
|
- Fixed errors on Linux and Windows
|
13
20
|
|
14
|
-
## 0.1.0
|
21
|
+
## 0.1.0 (2019-11-04)
|
15
22
|
|
16
23
|
- First release
|
data/LICENSE.txt
CHANGED
@@ -1,22 +1,29 @@
|
|
1
|
-
|
1
|
+
BSD 3-Clause License
|
2
2
|
|
3
|
-
|
3
|
+
Copyright (c) 2019-2020, Andrew Kane
|
4
|
+
All rights reserved.
|
4
5
|
|
5
|
-
|
6
|
-
|
7
|
-
"Software"), to deal in the Software without restriction, including
|
8
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
-
permit persons to whom the Software is furnished to do so, subject to
|
11
|
-
the following conditions:
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
12
8
|
|
13
|
-
|
14
|
-
|
9
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
10
|
+
list of conditions and the following disclaimer.
|
15
11
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
12
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
13
|
+
this list of conditions and the following disclaimer in the documentation
|
14
|
+
and/or other materials provided with the distribution.
|
15
|
+
|
16
|
+
3. Neither the name of the copyright holder nor the names of its
|
17
|
+
contributors may be used to endorse or promote products derived from
|
18
|
+
this software without specific prior written permission.
|
19
|
+
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
21
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
22
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
23
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
24
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
25
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
26
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
27
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
28
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
|
3
3
|
[LIBMF](https://github.com/cjlin1/libmf) - large-scale sparse matrix factorization - for Ruby
|
4
4
|
|
5
|
-
[
|
5
|
+
Check out [Disco](https://github.com/ankane/disco) for higher-level collaborative filtering
|
6
|
+
|
7
|
+
[](https://travis-ci.org/ankane/libmf) [](https://ci.appveyor.com/project/ankane/libmf/branch/master)
|
6
8
|
|
7
9
|
## Installation
|
8
10
|
|
@@ -96,8 +98,7 @@ Libmf::Model.new(
|
|
96
98
|
alpha: 0.1, # importance of negative entries
|
97
99
|
c: 0.0001, # desired value of negative entries
|
98
100
|
nmf: false, # perform non-negative MF (NMF)
|
99
|
-
quiet: false
|
100
|
-
copy_data: true # copy data in training procedure
|
101
|
+
quiet: false # no outputs to stdout
|
101
102
|
)
|
102
103
|
```
|
103
104
|
|
@@ -121,6 +122,32 @@ For one-class matrix factorization
|
|
121
122
|
- 11 - column-oriented pair-wise logarithmic loss
|
122
123
|
- 12 - squared error (L2-norm)
|
123
124
|
|
125
|
+
## Performance
|
126
|
+
|
127
|
+
For performance, read data directly from files
|
128
|
+
|
129
|
+
```ruby
|
130
|
+
model.fit("train.txt", eval_set: "validate.txt")
|
131
|
+
model.cv("train.txt")
|
132
|
+
```
|
133
|
+
|
134
|
+
Data should be in the format `row_index column_index value`:
|
135
|
+
|
136
|
+
```txt
|
137
|
+
0 0 5.0
|
138
|
+
0 2 3.5
|
139
|
+
1 1 4.0
|
140
|
+
```
|
141
|
+
|
142
|
+
## Numo
|
143
|
+
|
144
|
+
Get latent factors as Numo arrays
|
145
|
+
|
146
|
+
```ruby
|
147
|
+
model.p_factors(format: :numo)
|
148
|
+
model.q_factors(format: :numo)
|
149
|
+
```
|
150
|
+
|
124
151
|
## Resources
|
125
152
|
|
126
153
|
- [LIBMF: A Library for Parallel Matrix Factorization in Shared-memory Systems](https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_open_source.pdf)
|
@@ -137,3 +164,13 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
137
164
|
- Fix bugs and [submit pull requests](https://github.com/ankane/libmf/pulls)
|
138
165
|
- Write, clarify, or fix documentation
|
139
166
|
- Suggest or add new features
|
167
|
+
|
168
|
+
To get started with development:
|
169
|
+
|
170
|
+
```sh
|
171
|
+
git clone --recursive https://github.com/ankane/libmf.git
|
172
|
+
cd libmf
|
173
|
+
bundle install
|
174
|
+
bundle exec rake vendor:all
|
175
|
+
bundle exec rake test
|
176
|
+
```
|
data/lib/libmf.rb
CHANGED
@@ -11,15 +11,16 @@ module Libmf
|
|
11
11
|
class << self
|
12
12
|
attr_accessor :ffi_lib
|
13
13
|
end
|
14
|
-
|
15
|
-
if
|
16
|
-
"
|
17
|
-
elsif ::
|
18
|
-
"libmf.
|
14
|
+
lib_name =
|
15
|
+
if Gem.win_platform?
|
16
|
+
"mf.dll"
|
17
|
+
elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
|
18
|
+
"libmf.dylib"
|
19
19
|
else
|
20
20
|
"libmf.so"
|
21
21
|
end
|
22
|
-
|
22
|
+
vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__)
|
23
|
+
self.ffi_lib = [vendor_lib]
|
23
24
|
|
24
25
|
# friendlier error message
|
25
26
|
autoload :FFI, "libmf/ffi"
|
data/lib/libmf/ffi.rb
CHANGED
@@ -2,12 +2,7 @@ module Libmf
|
|
2
2
|
module FFI
|
3
3
|
extend ::FFI::Library
|
4
4
|
|
5
|
-
|
6
|
-
ffi_lib Libmf.ffi_lib
|
7
|
-
rescue LoadError => e
|
8
|
-
raise e if ENV["LIBMF_DEBUG"]
|
9
|
-
raise LoadError, "Could not find LIBMF"
|
10
|
-
end
|
5
|
+
ffi_lib Libmf.ffi_lib
|
11
6
|
|
12
7
|
class Node < ::FFI::Struct
|
13
8
|
layout :u, :int,
|
@@ -51,6 +46,7 @@ module Libmf
|
|
51
46
|
end
|
52
47
|
|
53
48
|
attach_function :mf_get_default_param, [], Parameter.by_value
|
49
|
+
attach_function :mf_read_problem, [:string], Problem.by_value
|
54
50
|
attach_function :mf_save_model, [Model.by_ref, :string], :int
|
55
51
|
attach_function :mf_load_model, [:string], Model.by_ref
|
56
52
|
attach_function :mf_destroy_model, [Model.by_ref], :void
|
data/lib/libmf/model.rb
CHANGED
@@ -51,16 +51,27 @@ module Libmf
|
|
51
51
|
model[:b]
|
52
52
|
end
|
53
53
|
|
54
|
-
def p_factors
|
55
|
-
|
54
|
+
def p_factors(format: nil)
|
55
|
+
_factors(model[:p], rows, format)
|
56
56
|
end
|
57
57
|
|
58
|
-
def q_factors
|
59
|
-
|
58
|
+
def q_factors(format: nil)
|
59
|
+
_factors(model[:q], columns, format)
|
60
60
|
end
|
61
61
|
|
62
62
|
private
|
63
63
|
|
64
|
+
def _factors(ptr, n, format)
|
65
|
+
case format
|
66
|
+
when :numo
|
67
|
+
Numo::SFloat.from_string(ptr.read_bytes(n * factors * 4)).reshape(n, factors)
|
68
|
+
when nil
|
69
|
+
ptr.read_array_of_float(n * factors).each_slice(factors).to_a
|
70
|
+
else
|
71
|
+
raise ArgumentError, "Invalid format"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
64
75
|
def model
|
65
76
|
raise Error, "Not fit" unless @model
|
66
77
|
@model
|
@@ -71,6 +82,7 @@ module Libmf
|
|
71
82
|
options = @options.dup
|
72
83
|
# silence insufficient blocks warning with default params
|
73
84
|
options[:bins] ||= 25 unless options[:nr_bins]
|
85
|
+
options[:copy_data] = false unless options.key?(:copy_data)
|
74
86
|
options_map = {
|
75
87
|
:loss => :fun,
|
76
88
|
:factors => :k,
|
@@ -90,31 +102,37 @@ module Libmf
|
|
90
102
|
end
|
91
103
|
|
92
104
|
def create_problem(data)
|
105
|
+
if data.is_a?(String)
|
106
|
+
# need to expand path so it's absolute
|
107
|
+
return FFI.mf_read_problem(File.expand_path(data))
|
108
|
+
end
|
109
|
+
|
93
110
|
raise Error, "No data" if data.empty?
|
94
111
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
n[:r] = row[2]
|
102
|
-
nodes << n
|
112
|
+
# TODO do in C for better performance
|
113
|
+
# can use FIX2INT() and RFLOAT_VALUE() instead of pack
|
114
|
+
buffer = String.new
|
115
|
+
data.each do |row|
|
116
|
+
row[0, 2].pack("i*".freeze, buffer: buffer)
|
117
|
+
row[2, 1].pack("f".freeze, buffer: buffer)
|
103
118
|
end
|
104
119
|
|
105
|
-
|
106
|
-
|
120
|
+
r = ::FFI::MemoryPointer.new(FFI::Node, data.size)
|
121
|
+
r.write_bytes(buffer)
|
122
|
+
|
123
|
+
# double check size is what we expect
|
124
|
+
# FFI will throw an error above if too long
|
125
|
+
raise Error, "Bad buffer size" if r.size != buffer.bytesize
|
126
|
+
|
127
|
+
m = data.max_by { |r| r[0] }[0] + 1
|
128
|
+
n = data.max_by { |r| r[1] }[1] + 1
|
107
129
|
|
108
130
|
prob = FFI::Problem.new
|
109
131
|
prob[:m] = m
|
110
132
|
prob[:n] = n
|
111
|
-
prob[:nnz] =
|
133
|
+
prob[:nnz] = data.size
|
112
134
|
prob[:r] = r
|
113
135
|
prob
|
114
136
|
end
|
115
|
-
|
116
|
-
def reshape(arr, factors)
|
117
|
-
arr.each_slice(factors).to_a
|
118
|
-
end
|
119
137
|
end
|
120
138
|
end
|
data/lib/libmf/version.rb
CHANGED
File without changes
|
File without changes
|
File without changes
|
data/vendor/libmf.dylib
ADDED
Binary file
|
data/vendor/libmf.so
ADDED
Binary file
|
data/vendor/mf.dll
ADDED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libmf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -67,7 +67,21 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '5'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: benchmark-ips
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: numo-narray
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
72
86
|
requirements:
|
73
87
|
- - ">="
|
@@ -83,42 +97,25 @@ dependencies:
|
|
83
97
|
description:
|
84
98
|
email: andrew@chartkick.com
|
85
99
|
executables: []
|
86
|
-
extensions:
|
87
|
-
- ext/libmf/extconf.rb
|
100
|
+
extensions: []
|
88
101
|
extra_rdoc_files: []
|
89
102
|
files:
|
90
103
|
- CHANGELOG.md
|
91
104
|
- LICENSE.txt
|
92
105
|
- README.md
|
93
|
-
- ext/libmf/extconf.rb
|
94
|
-
- lib/libmf.bundle
|
95
106
|
- lib/libmf.rb
|
96
107
|
- lib/libmf/ffi.rb
|
97
108
|
- lib/libmf/model.rb
|
98
109
|
- lib/libmf/version.rb
|
99
|
-
- vendor/
|
100
|
-
- vendor/
|
101
|
-
- vendor/
|
102
|
-
- vendor/libmf
|
103
|
-
- vendor/libmf
|
104
|
-
- vendor/
|
105
|
-
- vendor/libmf/demo/binary_matrix.te.txt
|
106
|
-
- vendor/libmf/demo/binary_matrix.tr.txt
|
107
|
-
- vendor/libmf/demo/demo.bat
|
108
|
-
- vendor/libmf/demo/demo.sh
|
109
|
-
- vendor/libmf/demo/real_matrix.te.txt
|
110
|
-
- vendor/libmf/demo/real_matrix.tr.txt
|
111
|
-
- vendor/libmf/mf-predict.cpp
|
112
|
-
- vendor/libmf/mf-train.cpp
|
113
|
-
- vendor/libmf/mf.cpp
|
114
|
-
- vendor/libmf/mf.def
|
115
|
-
- vendor/libmf/mf.h
|
116
|
-
- vendor/libmf/windows/mf-predict.exe
|
117
|
-
- vendor/libmf/windows/mf-train.exe
|
118
|
-
- vendor/libmf/windows/mf.dll
|
110
|
+
- vendor/COPYRIGHT
|
111
|
+
- vendor/demo/real_matrix.te.txt
|
112
|
+
- vendor/demo/real_matrix.tr.txt
|
113
|
+
- vendor/libmf.dylib
|
114
|
+
- vendor/libmf.so
|
115
|
+
- vendor/mf.dll
|
119
116
|
homepage: https://github.com/ankane/libmf
|
120
117
|
licenses:
|
121
|
-
-
|
118
|
+
- BSD-3-Clause
|
122
119
|
metadata: {}
|
123
120
|
post_install_message:
|
124
121
|
rdoc_options: []
|
@@ -135,8 +132,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
135
132
|
- !ruby/object:Gem::Version
|
136
133
|
version: '0'
|
137
134
|
requirements: []
|
138
|
-
rubygems_version: 3.
|
135
|
+
rubygems_version: 3.1.2
|
139
136
|
signing_key:
|
140
137
|
specification_version: 4
|
141
|
-
summary:
|
138
|
+
summary: Large-scale sparse matrix factorization for Ruby
|
142
139
|
test_files: []
|
data/ext/libmf/extconf.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
require "mkmf"
|
2
|
-
|
3
|
-
arch = RbConfig::CONFIG["arch"]
|
4
|
-
case arch
|
5
|
-
when /mingw/
|
6
|
-
File.write("Makefile", dummy_makefile("libmf").join)
|
7
|
-
else
|
8
|
-
abort "Missing stdc++" unless have_library("stdc++")
|
9
|
-
$CXXFLAGS << " -std=c++11"
|
10
|
-
|
11
|
-
# TODO
|
12
|
-
# if have_library("libomp")
|
13
|
-
# end
|
14
|
-
|
15
|
-
$objs = ["mf.o"]
|
16
|
-
vendor_path = File.expand_path("../../vendor/libmf", __dir__)
|
17
|
-
create_makefile("libmf", vendor_path)
|
18
|
-
end
|
data/lib/libmf.bundle
DELETED
Binary file
|
data/vendor/libmf/Makefile
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
CXX = g++
|
2
|
-
CXXFLAGS = -Wall -O3 -pthread -std=c++0x -march=native
|
3
|
-
OMPFLAG = -fopenmp
|
4
|
-
SHVER = 2
|
5
|
-
|
6
|
-
# run `make clean all' if you change the following flags.
|
7
|
-
|
8
|
-
# comment the following flag if you want to disable SSE or enable AVX
|
9
|
-
DFLAG = -DUSESSE
|
10
|
-
|
11
|
-
# uncomment the following flags if you want to use AVX
|
12
|
-
#DFLAG = -DUSEAVX
|
13
|
-
#CXXFLAGS += -mavx
|
14
|
-
|
15
|
-
# uncomment the following flags if you do not want to use OpenMP
|
16
|
-
DFLAG += -DUSEOMP
|
17
|
-
CXXFLAGS += $(OMPFLAG)
|
18
|
-
|
19
|
-
all: mf-train mf-predict
|
20
|
-
|
21
|
-
lib:
|
22
|
-
$(CXX) -shared -Wl,-soname,libmf.so.$(SHVER) -o libmf.so.$(SHVER) mf.o
|
23
|
-
|
24
|
-
mf-train: mf-train.cpp mf.o
|
25
|
-
$(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
|
26
|
-
|
27
|
-
mf-predict: mf-predict.cpp mf.o
|
28
|
-
$(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
|
29
|
-
|
30
|
-
mf.o: mf.cpp mf.h
|
31
|
-
$(CXX) $(CXXFLAGS) $(DFLAG) -c -fPIC -o $@ $<
|
32
|
-
|
33
|
-
clean:
|
34
|
-
rm -f mf-train mf-predict mf.o libmf.so.$(SHVER)
|