libmf 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 316a859127d3ee4a6b2af41599daf5d14e1f436479dfd8d1c8ebd739b8141367
4
- data.tar.gz: 59b64d67f90955b81630873bc2b776cbf5a463174502ccd3f9e92eb29810eac9
3
+ metadata.gz: 58c3cf0afbb36ff976cb7641104f363795499023ce04c859beedf17bccaaffa1
4
+ data.tar.gz: '09a6b70c631ae252b24bd83c3cbb08ff3a9468d6b174b0255cf8223aaf77ea4d'
5
5
  SHA512:
6
- metadata.gz: 8b2d80a014a92dd78533e31476909aac348906ab0caba74d30826e9f7057bb5f9dd649f10c49d2d436325dbdedc8b590ab031c65197cc4c1b8be6290f525ace9
7
- data.tar.gz: 9af9ef4372b7ed124bc3cbdc823e3dfbfc1e10303c914895fbaa608f5959a6de560349148cc6a83e35ef002409042705af198d5f533fbc0e3a87c6fbce0a438b
6
+ metadata.gz: ad1aad8cdb571fd5fa99aa121a67cf172f6686b793a89a82b555005116378a07772c6675e20308f8575419d618edfa368c0adef738da06847f0c06e860ab650e
7
+ data.tar.gz: 87f4260c2bccc5f5da8c44497d656fd367467b1d403de6584d802700359f455e55e7f9cbf71db515a5252012e27ace4cf52f5e284528b05a1a6b0be72c693a0d
@@ -1,16 +1,23 @@
1
- ## 0.1.3
1
+ ## 0.2.0 (2020-03-26)
2
+
3
+ - Changed to BSD 3-Clause license to match LIBMF
4
+ - Added support for reading data directly from files
5
+ - Added `format: :numo` option to `p_factors` and `q_factors`
6
+ - Improved performance of loading data by 5x
7
+
8
+ ## 0.1.3 (2019-11-07)
2
9
 
3
10
  - Made parameter names more Ruby-like
4
11
  - No need to set `do_nmf` with generalized KL-divergence
5
12
 
6
- ## 0.1.2
13
+ ## 0.1.2 (2019-11-06)
7
14
 
8
15
  - Fixed bug in `p_factors` and `q_factors` methods
9
16
 
10
- ## 0.1.1
17
+ ## 0.1.1 (2019-11-05)
11
18
 
12
19
  - Fixed errors on Linux and Windows
13
20
 
14
- ## 0.1.0
21
+ ## 0.1.0 (2019-11-04)
15
22
 
16
23
  - First release
@@ -1,22 +1,29 @@
1
- Copyright (c) 2019 Andrew Kane
1
+ BSD 3-Clause License
2
2
 
3
- MIT License
3
+ Copyright (c) 2019-2020, Andrew Kane
4
+ All rights reserved.
4
5
 
5
- Permission is hereby granted, free of charge, to any person obtaining
6
- a copy of this software and associated documentation files (the
7
- "Software"), to deal in the Software without restriction, including
8
- without limitation the rights to use, copy, modify, merge, publish,
9
- distribute, sublicense, and/or sell copies of the Software, and to
10
- permit persons to whom the Software is furnished to do so, subject to
11
- the following conditions:
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
12
8
 
13
- The above copyright notice and this permission notice shall be
14
- included in all copies or substantial portions of the Software.
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
15
11
 
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ 3. Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md CHANGED
@@ -2,7 +2,9 @@
2
2
 
3
3
  [LIBMF](https://github.com/cjlin1/libmf) - large-scale sparse matrix factorization - for Ruby
4
4
 
5
- [![Build Status](https://travis-ci.org/ankane/libmf.svg?branch=master)](https://travis-ci.org/ankane/libmf)
5
+ Check out [Disco](https://github.com/ankane/disco) for higher-level collaborative filtering
6
+
7
+ [![Build Status](https://travis-ci.org/ankane/libmf.svg?branch=master)](https://travis-ci.org/ankane/libmf) [![Build status](https://ci.appveyor.com/api/projects/status/92fbip1bd8sjd2tj/branch/master?svg=true)](https://ci.appveyor.com/project/ankane/libmf/branch/master)
6
8
 
7
9
  ## Installation
8
10
 
@@ -96,8 +98,7 @@ Libmf::Model.new(
96
98
  alpha: 0.1, # importance of negative entries
97
99
  c: 0.0001, # desired value of negative entries
98
100
  nmf: false, # perform non-negative MF (NMF)
99
- quiet: false, # no outputs to stdout
100
- copy_data: true # copy data in training procedure
101
+ quiet: false # no outputs to stdout
101
102
  )
102
103
  ```
103
104
 
@@ -121,6 +122,32 @@ For one-class matrix factorization
121
122
  - 11 - column-oriented pair-wise logarithmic loss
122
123
  - 12 - squared error (L2-norm)
123
124
 
125
+ ## Performance
126
+
127
+ For performance, read data directly from files
128
+
129
+ ```ruby
130
+ model.fit("train.txt", eval_set: "validate.txt")
131
+ model.cv("train.txt")
132
+ ```
133
+
134
+ Data should be in the format `row_index column_index value`:
135
+
136
+ ```txt
137
+ 0 0 5.0
138
+ 0 2 3.5
139
+ 1 1 4.0
140
+ ```
141
+
142
+ ## Numo
143
+
144
+ Get latent factors as Numo arrays
145
+
146
+ ```ruby
147
+ model.p_factors(format: :numo)
148
+ model.q_factors(format: :numo)
149
+ ```
150
+
124
151
  ## Resources
125
152
 
126
153
  - [LIBMF: A Library for Parallel Matrix Factorization in Shared-memory Systems](https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_open_source.pdf)
@@ -137,3 +164,13 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
137
164
  - Fix bugs and [submit pull requests](https://github.com/ankane/libmf/pulls)
138
165
  - Write, clarify, or fix documentation
139
166
  - Suggest or add new features
167
+
168
+ To get started with development:
169
+
170
+ ```sh
171
+ git clone --recursive https://github.com/ankane/libmf.git
172
+ cd libmf
173
+ bundle install
174
+ bundle exec rake vendor:all
175
+ bundle exec rake test
176
+ ```
@@ -11,15 +11,16 @@ module Libmf
11
11
  class << self
12
12
  attr_accessor :ffi_lib
13
13
  end
14
- lib_path =
15
- if ::FFI::Platform.windows?
16
- "../vendor/libmf/windows/mf.dll"
17
- elsif ::FFI::Platform.mac?
18
- "libmf.bundle"
14
+ lib_name =
15
+ if Gem.win_platform?
16
+ "mf.dll"
17
+ elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
18
+ "libmf.dylib"
19
19
  else
20
20
  "libmf.so"
21
21
  end
22
- self.ffi_lib = [File.expand_path(lib_path, __dir__)]
22
+ vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__)
23
+ self.ffi_lib = [vendor_lib]
23
24
 
24
25
  # friendlier error message
25
26
  autoload :FFI, "libmf/ffi"
@@ -2,12 +2,7 @@ module Libmf
2
2
  module FFI
3
3
  extend ::FFI::Library
4
4
 
5
- begin
6
- ffi_lib Libmf.ffi_lib
7
- rescue LoadError => e
8
- raise e if ENV["LIBMF_DEBUG"]
9
- raise LoadError, "Could not find LIBMF"
10
- end
5
+ ffi_lib Libmf.ffi_lib
11
6
 
12
7
  class Node < ::FFI::Struct
13
8
  layout :u, :int,
@@ -51,6 +46,7 @@ module Libmf
51
46
  end
52
47
 
53
48
  attach_function :mf_get_default_param, [], Parameter.by_value
49
+ attach_function :mf_read_problem, [:string], Problem.by_value
54
50
  attach_function :mf_save_model, [Model.by_ref, :string], :int
55
51
  attach_function :mf_load_model, [:string], Model.by_ref
56
52
  attach_function :mf_destroy_model, [Model.by_ref], :void
@@ -51,16 +51,27 @@ module Libmf
51
51
  model[:b]
52
52
  end
53
53
 
54
- def p_factors
55
- reshape(model[:p].read_array_of_float(factors * rows), factors)
54
+ def p_factors(format: nil)
55
+ _factors(model[:p], rows, format)
56
56
  end
57
57
 
58
- def q_factors
59
- reshape(model[:q].read_array_of_float(factors * columns), factors)
58
+ def q_factors(format: nil)
59
+ _factors(model[:q], columns, format)
60
60
  end
61
61
 
62
62
  private
63
63
 
64
+ def _factors(ptr, n, format)
65
+ case format
66
+ when :numo
67
+ Numo::SFloat.from_string(ptr.read_bytes(n * factors * 4)).reshape(n, factors)
68
+ when nil
69
+ ptr.read_array_of_float(n * factors).each_slice(factors).to_a
70
+ else
71
+ raise ArgumentError, "Invalid format"
72
+ end
73
+ end
74
+
64
75
  def model
65
76
  raise Error, "Not fit" unless @model
66
77
  @model
@@ -71,6 +82,7 @@ module Libmf
71
82
  options = @options.dup
72
83
  # silence insufficient blocks warning with default params
73
84
  options[:bins] ||= 25 unless options[:nr_bins]
85
+ options[:copy_data] = false unless options.key?(:copy_data)
74
86
  options_map = {
75
87
  :loss => :fun,
76
88
  :factors => :k,
@@ -90,31 +102,37 @@ module Libmf
90
102
  end
91
103
 
92
104
  def create_problem(data)
105
+ if data.is_a?(String)
106
+ # need to expand path so it's absolute
107
+ return FFI.mf_read_problem(File.expand_path(data))
108
+ end
109
+
93
110
  raise Error, "No data" if data.empty?
94
111
 
95
- nodes = []
96
- r = ::FFI::MemoryPointer.new(FFI::Node, data.size)
97
- data.each_with_index do |row, i|
98
- n = FFI::Node.new(r[i])
99
- n[:u] = row[0]
100
- n[:v] = row[1]
101
- n[:r] = row[2]
102
- nodes << n
112
+ # TODO do in C for better performance
113
+ # can use FIX2INT() and RFLOAT_VALUE() instead of pack
114
+ buffer = String.new
115
+ data.each do |row|
116
+ row[0, 2].pack("i*".freeze, buffer: buffer)
117
+ row[2, 1].pack("f".freeze, buffer: buffer)
103
118
  end
104
119
 
105
- m = nodes.map { |n| n[:u] }.max + 1
106
- n = nodes.map { |n| n[:v] }.max + 1
120
+ r = ::FFI::MemoryPointer.new(FFI::Node, data.size)
121
+ r.write_bytes(buffer)
122
+
123
+ # double check size is what we expect
124
+ # FFI will throw an error above if too long
125
+ raise Error, "Bad buffer size" if r.size != buffer.bytesize
126
+
127
+ m = data.max_by { |r| r[0] }[0] + 1
128
+ n = data.max_by { |r| r[1] }[1] + 1
107
129
 
108
130
  prob = FFI::Problem.new
109
131
  prob[:m] = m
110
132
  prob[:n] = n
111
- prob[:nnz] = nodes.size
133
+ prob[:nnz] = data.size
112
134
  prob[:r] = r
113
135
  prob
114
136
  end
115
-
116
- def reshape(arr, factors)
117
- arr.each_slice(factors).to_a
118
- end
119
137
  end
120
138
  end
@@ -1,3 +1,3 @@
1
1
  module Libmf
2
- VERSION = "0.1.3"
2
+ VERSION = "0.2.0"
3
3
  end
File without changes
Binary file
Binary file
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libmf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-08 00:00:00.000000000 Z
11
+ date: 2020-03-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -67,7 +67,21 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '5'
69
69
  - !ruby/object:Gem::Dependency
70
- name: rake-compiler
70
+ name: benchmark-ips
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: numo-narray
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
87
  - - ">="
@@ -83,42 +97,25 @@ dependencies:
83
97
  description:
84
98
  email: andrew@chartkick.com
85
99
  executables: []
86
- extensions:
87
- - ext/libmf/extconf.rb
100
+ extensions: []
88
101
  extra_rdoc_files: []
89
102
  files:
90
103
  - CHANGELOG.md
91
104
  - LICENSE.txt
92
105
  - README.md
93
- - ext/libmf/extconf.rb
94
- - lib/libmf.bundle
95
106
  - lib/libmf.rb
96
107
  - lib/libmf/ffi.rb
97
108
  - lib/libmf/model.rb
98
109
  - lib/libmf/version.rb
99
- - vendor/libmf/COPYRIGHT
100
- - vendor/libmf/Makefile
101
- - vendor/libmf/Makefile.win
102
- - vendor/libmf/README
103
- - vendor/libmf/demo/all_one_matrix.te.txt
104
- - vendor/libmf/demo/all_one_matrix.tr.txt
105
- - vendor/libmf/demo/binary_matrix.te.txt
106
- - vendor/libmf/demo/binary_matrix.tr.txt
107
- - vendor/libmf/demo/demo.bat
108
- - vendor/libmf/demo/demo.sh
109
- - vendor/libmf/demo/real_matrix.te.txt
110
- - vendor/libmf/demo/real_matrix.tr.txt
111
- - vendor/libmf/mf-predict.cpp
112
- - vendor/libmf/mf-train.cpp
113
- - vendor/libmf/mf.cpp
114
- - vendor/libmf/mf.def
115
- - vendor/libmf/mf.h
116
- - vendor/libmf/windows/mf-predict.exe
117
- - vendor/libmf/windows/mf-train.exe
118
- - vendor/libmf/windows/mf.dll
110
+ - vendor/COPYRIGHT
111
+ - vendor/demo/real_matrix.te.txt
112
+ - vendor/demo/real_matrix.tr.txt
113
+ - vendor/libmf.dylib
114
+ - vendor/libmf.so
115
+ - vendor/mf.dll
119
116
  homepage: https://github.com/ankane/libmf
120
117
  licenses:
121
- - MIT
118
+ - BSD-3-Clause
122
119
  metadata: {}
123
120
  post_install_message:
124
121
  rdoc_options: []
@@ -135,8 +132,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
135
132
  - !ruby/object:Gem::Version
136
133
  version: '0'
137
134
  requirements: []
138
- rubygems_version: 3.0.3
135
+ rubygems_version: 3.1.2
139
136
  signing_key:
140
137
  specification_version: 4
141
- summary: LIBMF - large-scale sparse matrix factorization - for Ruby
138
+ summary: Large-scale sparse matrix factorization for Ruby
142
139
  test_files: []
@@ -1,18 +0,0 @@
1
- require "mkmf"
2
-
3
- arch = RbConfig::CONFIG["arch"]
4
- case arch
5
- when /mingw/
6
- File.write("Makefile", dummy_makefile("libmf").join)
7
- else
8
- abort "Missing stdc++" unless have_library("stdc++")
9
- $CXXFLAGS << " -std=c++11"
10
-
11
- # TODO
12
- # if have_library("libomp")
13
- # end
14
-
15
- $objs = ["mf.o"]
16
- vendor_path = File.expand_path("../../vendor/libmf", __dir__)
17
- create_makefile("libmf", vendor_path)
18
- end
Binary file
@@ -1,34 +0,0 @@
1
- CXX = g++
2
- CXXFLAGS = -Wall -O3 -pthread -std=c++0x -march=native
3
- OMPFLAG = -fopenmp
4
- SHVER = 2
5
-
6
- # run `make clean all' if you change the following flags.
7
-
8
- # comment the following flag if you want to disable SSE or enable AVX
9
- DFLAG = -DUSESSE
10
-
11
- # uncomment the following flags if you want to use AVX
12
- #DFLAG = -DUSEAVX
13
- #CXXFLAGS += -mavx
14
-
15
- # uncomment the following flags if you do not want to use OpenMP
16
- DFLAG += -DUSEOMP
17
- CXXFLAGS += $(OMPFLAG)
18
-
19
- all: mf-train mf-predict
20
-
21
- lib:
22
- $(CXX) -shared -Wl,-soname,libmf.so.$(SHVER) -o libmf.so.$(SHVER) mf.o
23
-
24
- mf-train: mf-train.cpp mf.o
25
- $(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
26
-
27
- mf-predict: mf-predict.cpp mf.o
28
- $(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
29
-
30
- mf.o: mf.cpp mf.h
31
- $(CXX) $(CXXFLAGS) $(DFLAG) -c -fPIC -o $@ $<
32
-
33
- clean:
34
- rm -f mf-train mf-predict mf.o libmf.so.$(SHVER)