libmf 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 316a859127d3ee4a6b2af41599daf5d14e1f436479dfd8d1c8ebd739b8141367
4
- data.tar.gz: 59b64d67f90955b81630873bc2b776cbf5a463174502ccd3f9e92eb29810eac9
3
+ metadata.gz: 58c3cf0afbb36ff976cb7641104f363795499023ce04c859beedf17bccaaffa1
4
+ data.tar.gz: '09a6b70c631ae252b24bd83c3cbb08ff3a9468d6b174b0255cf8223aaf77ea4d'
5
5
  SHA512:
6
- metadata.gz: 8b2d80a014a92dd78533e31476909aac348906ab0caba74d30826e9f7057bb5f9dd649f10c49d2d436325dbdedc8b590ab031c65197cc4c1b8be6290f525ace9
7
- data.tar.gz: 9af9ef4372b7ed124bc3cbdc823e3dfbfc1e10303c914895fbaa608f5959a6de560349148cc6a83e35ef002409042705af198d5f533fbc0e3a87c6fbce0a438b
6
+ metadata.gz: ad1aad8cdb571fd5fa99aa121a67cf172f6686b793a89a82b555005116378a07772c6675e20308f8575419d618edfa368c0adef738da06847f0c06e860ab650e
7
+ data.tar.gz: 87f4260c2bccc5f5da8c44497d656fd367467b1d403de6584d802700359f455e55e7f9cbf71db515a5252012e27ace4cf52f5e284528b05a1a6b0be72c693a0d
@@ -1,16 +1,23 @@
1
- ## 0.1.3
1
+ ## 0.2.0 (2020-03-26)
2
+
3
+ - Changed to BSD 3-Clause license to match LIBMF
4
+ - Added support for reading data directly from files
5
+ - Added `format: :numo` option to `p_factors` and `q_factors`
6
+ - Improved performance of loading data by 5x
7
+
8
+ ## 0.1.3 (2019-11-07)
2
9
 
3
10
  - Made parameter names more Ruby-like
4
11
  - No need to set `do_nmf` with generalized KL-divergence
5
12
 
6
- ## 0.1.2
13
+ ## 0.1.2 (2019-11-06)
7
14
 
8
15
  - Fixed bug in `p_factors` and `q_factors` methods
9
16
 
10
- ## 0.1.1
17
+ ## 0.1.1 (2019-11-05)
11
18
 
12
19
  - Fixed errors on Linux and Windows
13
20
 
14
- ## 0.1.0
21
+ ## 0.1.0 (2019-11-04)
15
22
 
16
23
  - First release
@@ -1,22 +1,29 @@
1
- Copyright (c) 2019 Andrew Kane
1
+ BSD 3-Clause License
2
2
 
3
- MIT License
3
+ Copyright (c) 2019-2020, Andrew Kane
4
+ All rights reserved.
4
5
 
5
- Permission is hereby granted, free of charge, to any person obtaining
6
- a copy of this software and associated documentation files (the
7
- "Software"), to deal in the Software without restriction, including
8
- without limitation the rights to use, copy, modify, merge, publish,
9
- distribute, sublicense, and/or sell copies of the Software, and to
10
- permit persons to whom the Software is furnished to do so, subject to
11
- the following conditions:
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
12
8
 
13
- The above copyright notice and this permission notice shall be
14
- included in all copies or substantial portions of the Software.
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
15
11
 
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ 3. Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md CHANGED
@@ -2,7 +2,9 @@
2
2
 
3
3
  [LIBMF](https://github.com/cjlin1/libmf) - large-scale sparse matrix factorization - for Ruby
4
4
 
5
- [![Build Status](https://travis-ci.org/ankane/libmf.svg?branch=master)](https://travis-ci.org/ankane/libmf)
5
+ Check out [Disco](https://github.com/ankane/disco) for higher-level collaborative filtering
6
+
7
+ [![Build Status](https://travis-ci.org/ankane/libmf.svg?branch=master)](https://travis-ci.org/ankane/libmf) [![Build status](https://ci.appveyor.com/api/projects/status/92fbip1bd8sjd2tj/branch/master?svg=true)](https://ci.appveyor.com/project/ankane/libmf/branch/master)
6
8
 
7
9
  ## Installation
8
10
 
@@ -96,8 +98,7 @@ Libmf::Model.new(
96
98
  alpha: 0.1, # importance of negative entries
97
99
  c: 0.0001, # desired value of negative entries
98
100
  nmf: false, # perform non-negative MF (NMF)
99
- quiet: false, # no outputs to stdout
100
- copy_data: true # copy data in training procedure
101
+ quiet: false # no outputs to stdout
101
102
  )
102
103
  ```
103
104
 
@@ -121,6 +122,32 @@ For one-class matrix factorization
121
122
  - 11 - column-oriented pair-wise logarithmic loss
122
123
  - 12 - squared error (L2-norm)
123
124
 
125
+ ## Performance
126
+
127
+ For performance, read data directly from files
128
+
129
+ ```ruby
130
+ model.fit("train.txt", eval_set: "validate.txt")
131
+ model.cv("train.txt")
132
+ ```
133
+
134
+ Data should be in the format `row_index column_index value`:
135
+
136
+ ```txt
137
+ 0 0 5.0
138
+ 0 2 3.5
139
+ 1 1 4.0
140
+ ```
141
+
142
+ ## Numo
143
+
144
+ Get latent factors as Numo arrays
145
+
146
+ ```ruby
147
+ model.p_factors(format: :numo)
148
+ model.q_factors(format: :numo)
149
+ ```
150
+
124
151
  ## Resources
125
152
 
126
153
  - [LIBMF: A Library for Parallel Matrix Factorization in Shared-memory Systems](https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_open_source.pdf)
@@ -137,3 +164,13 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
137
164
  - Fix bugs and [submit pull requests](https://github.com/ankane/libmf/pulls)
138
165
  - Write, clarify, or fix documentation
139
166
  - Suggest or add new features
167
+
168
+ To get started with development:
169
+
170
+ ```sh
171
+ git clone --recursive https://github.com/ankane/libmf.git
172
+ cd libmf
173
+ bundle install
174
+ bundle exec rake vendor:all
175
+ bundle exec rake test
176
+ ```
@@ -11,15 +11,16 @@ module Libmf
11
11
  class << self
12
12
  attr_accessor :ffi_lib
13
13
  end
14
- lib_path =
15
- if ::FFI::Platform.windows?
16
- "../vendor/libmf/windows/mf.dll"
17
- elsif ::FFI::Platform.mac?
18
- "libmf.bundle"
14
+ lib_name =
15
+ if Gem.win_platform?
16
+ "mf.dll"
17
+ elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
18
+ "libmf.dylib"
19
19
  else
20
20
  "libmf.so"
21
21
  end
22
- self.ffi_lib = [File.expand_path(lib_path, __dir__)]
22
+ vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__)
23
+ self.ffi_lib = [vendor_lib]
23
24
 
24
25
  # friendlier error message
25
26
  autoload :FFI, "libmf/ffi"
@@ -2,12 +2,7 @@ module Libmf
2
2
  module FFI
3
3
  extend ::FFI::Library
4
4
 
5
- begin
6
- ffi_lib Libmf.ffi_lib
7
- rescue LoadError => e
8
- raise e if ENV["LIBMF_DEBUG"]
9
- raise LoadError, "Could not find LIBMF"
10
- end
5
+ ffi_lib Libmf.ffi_lib
11
6
 
12
7
  class Node < ::FFI::Struct
13
8
  layout :u, :int,
@@ -51,6 +46,7 @@ module Libmf
51
46
  end
52
47
 
53
48
  attach_function :mf_get_default_param, [], Parameter.by_value
49
+ attach_function :mf_read_problem, [:string], Problem.by_value
54
50
  attach_function :mf_save_model, [Model.by_ref, :string], :int
55
51
  attach_function :mf_load_model, [:string], Model.by_ref
56
52
  attach_function :mf_destroy_model, [Model.by_ref], :void
@@ -51,16 +51,27 @@ module Libmf
51
51
  model[:b]
52
52
  end
53
53
 
54
- def p_factors
55
- reshape(model[:p].read_array_of_float(factors * rows), factors)
54
+ def p_factors(format: nil)
55
+ _factors(model[:p], rows, format)
56
56
  end
57
57
 
58
- def q_factors
59
- reshape(model[:q].read_array_of_float(factors * columns), factors)
58
+ def q_factors(format: nil)
59
+ _factors(model[:q], columns, format)
60
60
  end
61
61
 
62
62
  private
63
63
 
64
+ def _factors(ptr, n, format)
65
+ case format
66
+ when :numo
67
+ Numo::SFloat.from_string(ptr.read_bytes(n * factors * 4)).reshape(n, factors)
68
+ when nil
69
+ ptr.read_array_of_float(n * factors).each_slice(factors).to_a
70
+ else
71
+ raise ArgumentError, "Invalid format"
72
+ end
73
+ end
74
+
64
75
  def model
65
76
  raise Error, "Not fit" unless @model
66
77
  @model
@@ -71,6 +82,7 @@ module Libmf
71
82
  options = @options.dup
72
83
  # silence insufficient blocks warning with default params
73
84
  options[:bins] ||= 25 unless options[:nr_bins]
85
+ options[:copy_data] = false unless options.key?(:copy_data)
74
86
  options_map = {
75
87
  :loss => :fun,
76
88
  :factors => :k,
@@ -90,31 +102,37 @@ module Libmf
90
102
  end
91
103
 
92
104
  def create_problem(data)
105
+ if data.is_a?(String)
106
+ # need to expand path so it's absolute
107
+ return FFI.mf_read_problem(File.expand_path(data))
108
+ end
109
+
93
110
  raise Error, "No data" if data.empty?
94
111
 
95
- nodes = []
96
- r = ::FFI::MemoryPointer.new(FFI::Node, data.size)
97
- data.each_with_index do |row, i|
98
- n = FFI::Node.new(r[i])
99
- n[:u] = row[0]
100
- n[:v] = row[1]
101
- n[:r] = row[2]
102
- nodes << n
112
+ # TODO do in C for better performance
113
+ # can use FIX2INT() and RFLOAT_VALUE() instead of pack
114
+ buffer = String.new
115
+ data.each do |row|
116
+ row[0, 2].pack("i*".freeze, buffer: buffer)
117
+ row[2, 1].pack("f".freeze, buffer: buffer)
103
118
  end
104
119
 
105
- m = nodes.map { |n| n[:u] }.max + 1
106
- n = nodes.map { |n| n[:v] }.max + 1
120
+ r = ::FFI::MemoryPointer.new(FFI::Node, data.size)
121
+ r.write_bytes(buffer)
122
+
123
+ # double check size is what we expect
124
+ # FFI will throw an error above if too long
125
+ raise Error, "Bad buffer size" if r.size != buffer.bytesize
126
+
127
+ m = data.max_by { |r| r[0] }[0] + 1
128
+ n = data.max_by { |r| r[1] }[1] + 1
107
129
 
108
130
  prob = FFI::Problem.new
109
131
  prob[:m] = m
110
132
  prob[:n] = n
111
- prob[:nnz] = nodes.size
133
+ prob[:nnz] = data.size
112
134
  prob[:r] = r
113
135
  prob
114
136
  end
115
-
116
- def reshape(arr, factors)
117
- arr.each_slice(factors).to_a
118
- end
119
137
  end
120
138
  end
@@ -1,3 +1,3 @@
1
1
  module Libmf
2
- VERSION = "0.1.3"
2
+ VERSION = "0.2.0"
3
3
  end
File without changes
Binary file
Binary file
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libmf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-08 00:00:00.000000000 Z
11
+ date: 2020-03-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -67,7 +67,21 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '5'
69
69
  - !ruby/object:Gem::Dependency
70
- name: rake-compiler
70
+ name: benchmark-ips
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: numo-narray
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
87
  - - ">="
@@ -83,42 +97,25 @@ dependencies:
83
97
  description:
84
98
  email: andrew@chartkick.com
85
99
  executables: []
86
- extensions:
87
- - ext/libmf/extconf.rb
100
+ extensions: []
88
101
  extra_rdoc_files: []
89
102
  files:
90
103
  - CHANGELOG.md
91
104
  - LICENSE.txt
92
105
  - README.md
93
- - ext/libmf/extconf.rb
94
- - lib/libmf.bundle
95
106
  - lib/libmf.rb
96
107
  - lib/libmf/ffi.rb
97
108
  - lib/libmf/model.rb
98
109
  - lib/libmf/version.rb
99
- - vendor/libmf/COPYRIGHT
100
- - vendor/libmf/Makefile
101
- - vendor/libmf/Makefile.win
102
- - vendor/libmf/README
103
- - vendor/libmf/demo/all_one_matrix.te.txt
104
- - vendor/libmf/demo/all_one_matrix.tr.txt
105
- - vendor/libmf/demo/binary_matrix.te.txt
106
- - vendor/libmf/demo/binary_matrix.tr.txt
107
- - vendor/libmf/demo/demo.bat
108
- - vendor/libmf/demo/demo.sh
109
- - vendor/libmf/demo/real_matrix.te.txt
110
- - vendor/libmf/demo/real_matrix.tr.txt
111
- - vendor/libmf/mf-predict.cpp
112
- - vendor/libmf/mf-train.cpp
113
- - vendor/libmf/mf.cpp
114
- - vendor/libmf/mf.def
115
- - vendor/libmf/mf.h
116
- - vendor/libmf/windows/mf-predict.exe
117
- - vendor/libmf/windows/mf-train.exe
118
- - vendor/libmf/windows/mf.dll
110
+ - vendor/COPYRIGHT
111
+ - vendor/demo/real_matrix.te.txt
112
+ - vendor/demo/real_matrix.tr.txt
113
+ - vendor/libmf.dylib
114
+ - vendor/libmf.so
115
+ - vendor/mf.dll
119
116
  homepage: https://github.com/ankane/libmf
120
117
  licenses:
121
- - MIT
118
+ - BSD-3-Clause
122
119
  metadata: {}
123
120
  post_install_message:
124
121
  rdoc_options: []
@@ -135,8 +132,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
135
132
  - !ruby/object:Gem::Version
136
133
  version: '0'
137
134
  requirements: []
138
- rubygems_version: 3.0.3
135
+ rubygems_version: 3.1.2
139
136
  signing_key:
140
137
  specification_version: 4
141
- summary: LIBMF - large-scale sparse matrix factorization - for Ruby
138
+ summary: Large-scale sparse matrix factorization for Ruby
142
139
  test_files: []
@@ -1,18 +0,0 @@
1
- require "mkmf"
2
-
3
- arch = RbConfig::CONFIG["arch"]
4
- case arch
5
- when /mingw/
6
- File.write("Makefile", dummy_makefile("libmf").join)
7
- else
8
- abort "Missing stdc++" unless have_library("stdc++")
9
- $CXXFLAGS << " -std=c++11"
10
-
11
- # TODO
12
- # if have_library("libomp")
13
- # end
14
-
15
- $objs = ["mf.o"]
16
- vendor_path = File.expand_path("../../vendor/libmf", __dir__)
17
- create_makefile("libmf", vendor_path)
18
- end
Binary file
@@ -1,34 +0,0 @@
1
- CXX = g++
2
- CXXFLAGS = -Wall -O3 -pthread -std=c++0x -march=native
3
- OMPFLAG = -fopenmp
4
- SHVER = 2
5
-
6
- # run `make clean all' if you change the following flags.
7
-
8
- # comment the following flag if you want to disable SSE or enable AVX
9
- DFLAG = -DUSESSE
10
-
11
- # uncomment the following flags if you want to use AVX
12
- #DFLAG = -DUSEAVX
13
- #CXXFLAGS += -mavx
14
-
15
- # uncomment the following flags if you do not want to use OpenMP
16
- DFLAG += -DUSEOMP
17
- CXXFLAGS += $(OMPFLAG)
18
-
19
- all: mf-train mf-predict
20
-
21
- lib:
22
- $(CXX) -shared -Wl,-soname,libmf.so.$(SHVER) -o libmf.so.$(SHVER) mf.o
23
-
24
- mf-train: mf-train.cpp mf.o
25
- $(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
26
-
27
- mf-predict: mf-predict.cpp mf.o
28
- $(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
29
-
30
- mf.o: mf.cpp mf.h
31
- $(CXX) $(CXXFLAGS) $(DFLAG) -c -fPIC -o $@ $<
32
-
33
- clean:
34
- rm -f mf-train mf-predict mf.o libmf.so.$(SHVER)