RubyGems - libmf - Versions diffs - 0.1.1 → 0.2.2 - Mend

libmf 0.1.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +27 -2
data/LICENSE.txt +26 -18
data/README.md +87 -33
data/lib/libmf.rb +11 -6
data/lib/libmf/ffi.rb +2 -6
data/lib/libmf/model.rb +52 -25
data/lib/libmf/version.rb +1 -1
data/vendor/{libmf/COPYRIGHT → COPYRIGHT} +0 -0
data/vendor/{libmf/demo → demo}/real_matrix.te.txt +0 -0
data/vendor/{libmf/demo → demo}/real_matrix.tr.txt +0 -0
data/vendor/libmf.arm64.dylib +0 -0
data/vendor/libmf.dylib +0 -0
data/vendor/libmf.so +0 -0
data/vendor/mf.dll +0 -0
metadata +18 -89
data/ext/libmf/extconf.rb +0 -18
data/vendor/libmf/Makefile +0 -34
data/vendor/libmf/Makefile.win +0 -36
data/vendor/libmf/README +0 -637
data/vendor/libmf/demo/all_one_matrix.te.txt +0 -1382
data/vendor/libmf/demo/all_one_matrix.tr.txt +0 -5172
data/vendor/libmf/demo/binary_matrix.te.txt +0 -1312
data/vendor/libmf/demo/binary_matrix.tr.txt +0 -4937
data/vendor/libmf/demo/demo.bat +0 -40
data/vendor/libmf/demo/demo.sh +0 -58
data/vendor/libmf/mf-predict.cpp +0 -207
data/vendor/libmf/mf-train.cpp +0 -378
data/vendor/libmf/mf.cpp +0 -4683
data/vendor/libmf/mf.def +0 -21
data/vendor/libmf/mf.h +0 -130
data/vendor/libmf/windows/mf-predict.exe +0 -0
data/vendor/libmf/windows/mf-train.exe +0 -0
data/vendor/libmf/windows/mf.dll +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 2f46be88d7d3f59b4c77ad15c4843a4f88745ec4433a09193ce28a7cc77c1b38
-  data.tar.gz: 7b39d8a0d9c9495fb91c6424903fee5f694672493c031f9107f8db49571b384d
+  metadata.gz: e654382e818f1b59bc4437e71bc3e0ae6f4d4d79b9c85aeb53fb3494b2baf888
+  data.tar.gz: 560fa519794c7cd8b29c27b9ac9f4247e0485e9f13f229c18a4562c8bee62868
 SHA512:
-  metadata.gz: 0b95f27bc77e5a6f2c8352c960d5005c5ac0011e7eaff80f63608294536af074a6fbdefbc0932ae1a13769f8264ad044a419fade2377c56a93aa8ac885bb4662
-  data.tar.gz: ed73f96ce4cd0c1bd105ad5b7b6c0f85a9f14949e4cc48f107714d791b0ec20e64398697778da87c41003cd742fe13ed789aec29296aff7518c7b01a4bc5052b
+  metadata.gz: d16fb17f9b58cea5c53814a68fbf9de91440fd77e1669b52bbe5cb3a3837c94449d8bbc348405c0c117f219d2a513f1fc2e813b330ea127c93ac04a0c4101d07
+  data.tar.gz: 3d5d3962d3878fe992f76ef9d2fa5e30866d007cd5ae3ab97a666fc53f97f9f3f2487bb425e15b59becde6118c61ecac6c8511692742d64efa2b9e26fe0d2872

data/CHANGELOG.md CHANGED Viewed

@@ -1,7 +1,32 @@
-## 0.1.1
+## 0.2.2 (2021-02-04)
+- Reduced allocations
+- Improved ARM detection
+## 0.2.1 (2020-12-28)
+- Added ARM shared library for Mac
+## 0.2.0 (2020-03-26)
+- Changed to BSD 3-Clause license to match LIBMF
+- Added support for reading data directly from files
+- Added `format: :numo` option to `p_factors` and `q_factors`
+- Improved performance of loading data by 5x
+## 0.1.3 (2019-11-07)
+- Made parameter names more Ruby-like
+- No need to set `do_nmf` with generalized KL-divergence
+## 0.1.2 (2019-11-06)
+- Fixed bug in `p_factors` and `q_factors` methods
+## 0.1.1 (2019-11-05)
 - Fixed errors on Linux and Windows
-## 0.1.0
+## 0.1.0 (2019-11-04)
 - First release

data/LICENSE.txt CHANGED Viewed

@@ -1,22 +1,30 @@
-Copyright (c) 2019 Andrew Kane
+BSD 3-Clause License
-MIT License
+Copyright (c) 2014-2015, The LIBMF Project
+Copyright (c) 2019-2021, Andrew Kane
+All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

data/README.md CHANGED Viewed

@@ -2,9 +2,9 @@
 [LIBMF](https://github.com/cjlin1/libmf) - large-scale sparse matrix factorization - for Ruby
-:fire: Uses the C API for blazing performance
+Check out [Disco](https://github.com/ankane/disco) for higher-level collaborative filtering
-[![Build Status](https://travis-ci.org/ankane/libmf.svg?branch=master)](https://travis-ci.org/ankane/libmf)
+[![Build Status](https://github.com/ankane/libmf/workflows/build/badge.svg?branch=master)](https://github.com/ankane/libmf/actions)
 ## Installation
@@ -39,14 +39,19 @@ Make predictions
 model.predict(row_index, column_index)
 ```
-Get the bias and latent factors
+Get the latent factors (these approximate the training matrix)
 ```ruby
-model.bias
 model.p_factors
 model.q_factors
 ```
+Get the bias (average of all elements in the training matrix)
+```ruby
+model.bias
+```
 Save the model to a file
 ```ruby
@@ -65,48 +70,87 @@ Pass a validation set
 model.fit(data, eval_set: eval_set)
 ```
+## Cross-Validation
+Perform cross-validation
+```ruby
+model.cv(data)
+```
+Specify the number of folds
+```ruby
+model.cv(data, folds: 5)
+```
 ## Parameters
-Pass parameters
+Pass parameters - default values below
 ```ruby
-model = Libmf::Model.new(k: 20, nr_iters: 50)
-```
-Supports the same parameters as LIBMF
-```text
-variable      meaning                                    default
-================================================================
-fun           loss function                                    0
-k             number of latent factors                         8
-nr_threads    number of threads used                          12
-nr_bins       number of bins                                  25
-nr_iters      number of iterations                            20
-lambda_p1     coefficient of L1-norm regularization on P       0
-lambda_p2     coefficient of L2-norm regularization on P     0.1
-lambda_q1     coefficient of L1-norm regularization on Q       0
-lambda_q2     coefficient of L2-norm regularization on Q     0.1
-eta           learning rate                                  0.1
-alpha         importance of negative entries                 0.1
-c             desired value of negative entries           0.0001
-do_nmf        perform non-negative MF (NMF)                false
-quiet         no outputs to stdout                         false
-copy_data     copy data in training procedure               true
+Libmf::Model.new(
+  loss: 0,                # loss function
+  factors: 8,             # number of latent factors
+  threads: 12,            # number of threads used
+  bins: 25,               # number of bins
+  iterations: 20,         # number of iterations
+  lambda_p1: 0,           # coefficient of L1-norm regularization on P
+  lambda_p2: 0.1,         # coefficient of L2-norm regularization on P
+  lambda_q1: 0,           # coefficient of L1-norm regularization on Q
+  lambda_q2: 0.1,         # coefficient of L2-norm regularization on Q
+  learning_rate: 0.1,     # learning rate
+  alpha: 0.1,             # importance of negative entries
+  c: 0.0001,              # desired value of negative entries
+  nmf: false,             # perform non-negative MF (NMF)
+  quiet: false            # no outputs to stdout
+)
 ```
-## Cross-Validation
+### Loss Functions
-Perform cross-validation
+For real-valued matrix factorization
+- 0 - squared error (L2-norm)
+- 1 - absolute error (L1-norm)
+- 2 - generalized KL-divergence
+For binary matrix factorization
+- 5 - logarithmic error
+- 6 - squared hinge loss
+- 7 - hinge loss
+For one-class matrix factorization
+- 10 - row-oriented pair-wise logarithmic loss
+- 11 - column-oriented pair-wise logarithmic loss
+- 12 - squared error (L2-norm)
+## Performance
+For performance, read data directly from files
 ```ruby
-model.cv(data)
+model.fit("train.txt", eval_set: "validate.txt")
+model.cv("train.txt")
 ```
-Specify the number of folds
+Data should be in the format `row_index column_index value`:
+```txt
+0 0 5.0
+0 2 3.5
+1 1 4.0
+```
+## Numo
+Get latent factors as Numo arrays
 ```ruby
-model.cv(data, folds: 5)
+model.p_factors(format: :numo)
+model.q_factors(format: :numo)
 ```
 ## Resources
@@ -125,3 +169,13 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
 - Fix bugs and [submit pull requests](https://github.com/ankane/libmf/pulls)
 - Write, clarify, or fix documentation
 - Suggest or add new features
+To get started with development:
+```sh
+git clone --recursive https://github.com/ankane/libmf.git
+cd libmf
+bundle install
+bundle exec rake vendor:all
+bundle exec rake test
+```

data/lib/libmf.rb CHANGED Viewed

@@ -11,15 +11,20 @@ module Libmf
   class << self
     attr_accessor :ffi_lib
   end
-  lib_path =
-    if ::FFI::Platform.windows?
-      "../vendor/libmf/windows/mf.dll"
-    elsif ::FFI::Platform.mac?
-      "libmf.bundle"
+  lib_name =
+    if Gem.win_platform?
+      "mf.dll"
+    elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
+      if RbConfig::CONFIG["host_cpu"] =~ /arm/i
+        "libmf.arm64.dylib"
+      else
+        "libmf.dylib"
+      end
     else
       "libmf.so"
     end
-  self.ffi_lib = [File.expand_path(lib_path, __dir__)]
+  vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__)
+  self.ffi_lib = [vendor_lib]
   # friendlier error message
   autoload :FFI, "libmf/ffi"

data/lib/libmf/ffi.rb CHANGED Viewed

@@ -2,12 +2,7 @@ module Libmf
   module FFI
     extend ::FFI::Library
-    begin
-      ffi_lib Libmf.ffi_lib
-    rescue LoadError => e
-      raise e if ENV["LIBMF_DEBUG"]
-      raise LoadError, "Could not find LIBMF"
-    end
+    ffi_lib Libmf.ffi_lib
     class Node < ::FFI::Struct
       layout :u, :int,
@@ -51,6 +46,7 @@ module Libmf
     end
     attach_function :mf_get_default_param, [], Parameter.by_value
+    attach_function :mf_read_problem, [:string], Problem.by_value
     attach_function :mf_save_model, [Model.by_ref, :string], :int
     attach_function :mf_load_model, [:string], Model.by_ref
     attach_function :mf_destroy_model, [Model.by_ref], :void

data/lib/libmf/model.rb CHANGED Viewed

@@ -51,16 +51,27 @@ module Libmf
       model[:b]
     end
-    def p_factors
-      reshape(model[:p].read_array_of_float(factors * rows), [rows, factors])
+    def p_factors(format: nil)
+      _factors(model[:p], rows, format)
     end
-    def q_factors
-      reshape(model[:q].read_array_of_float(factors * columns), [columns, factors])
+    def q_factors(format: nil)
+      _factors(model[:q], columns, format)
     end
     private
+    def _factors(ptr, n, format)
+      case format
+      when :numo
+        Numo::SFloat.from_string(ptr.read_bytes(n * factors * 4)).reshape(n, factors)
+      when nil
+        ptr.read_array_of_float(n * factors).each_slice(factors).to_a
+      else
+        raise ArgumentError, "Invalid format"
+      end
+    end
     def model
       raise Error, "Not fit" unless @model
       @model
@@ -68,45 +79,61 @@ module Libmf
     def param
       param = FFI.mf_get_default_param
+      options = @options.dup
       # silence insufficient blocks warning with default params
-      options = {nr_bins: 25}.merge(@options)
+      options[:bins] ||= 25 unless options[:nr_bins]
+      options[:copy_data] = false unless options.key?(:copy_data)
+      options_map = {
+        :loss => :fun,
+        :factors => :k,
+        :threads => :nr_threads,
+        :bins => :nr_bins,
+        :iterations => :nr_iters,
+        :learning_rate => :eta,
+        :nmf => :do_nmf
+      }
       options.each do |k, v|
+        k = options_map[k] if options_map[k]
         param[k] = v
       end
+      # do_nmf must be true for generalized KL-divergence
+      param[:do_nmf] = true if param[:fun] == 2
       param
     end
     def create_problem(data)
+      if data.is_a?(String)
+        # need to expand path so it's absolute
+        return FFI.mf_read_problem(File.expand_path(data))
+      end
       raise Error, "No data" if data.empty?
-      nodes = []
-      r = ::FFI::MemoryPointer.new(FFI::Node, data.size)
-      data.each_with_index do |row, i|
-        n = FFI::Node.new(r[i])
-        n[:u] = row[0]
-        n[:v] = row[1]
-        n[:r] = row[2]
-        nodes << n
+      # TODO do in C for better performance
+      # can use FIX2INT() and RFLOAT_VALUE() instead of pack
+      # and write directly to C string
+      buffer = String.new
+      pack_format = "iif"
+      data.each do |row|
+        row.pack(pack_format, buffer: buffer)
       end
-      m = nodes.map { |n| n[:u] }.max + 1
-      n = nodes.map { |n| n[:v] }.max + 1
+      r = ::FFI::MemoryPointer.new(FFI::Node, data.size)
+      r.write_bytes(buffer)
+      # double check size is what we expect
+      # FFI will throw an error above if too long
+      raise Error, "Bad buffer size" if r.size != buffer.bytesize
+      m = data.max_by { |r| r[0] }[0] + 1
+      n = data.max_by { |r| r[1] }[1] + 1
       prob = FFI::Problem.new
       prob[:m] = m
       prob[:n] = n
-      prob[:nnz] = nodes.size
+      prob[:nnz] = data.size
       prob[:r] = r
       prob
     end
-    def reshape(arr, dims)
-      rows = dims.first
-      new_arr = rows.times.map { [] }
-      arr.each_with_index do |v, i|
-        new_arr[i % rows] << v
-      end
-      new_arr
-    end
   end
 end

data/lib/libmf/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Libmf
-  VERSION = "0.1.1"
+  VERSION = "0.2.2"
 end

data/vendor/{libmf/COPYRIGHT → COPYRIGHT} RENAMED Viewed

File without changes

data/vendor/{libmf/demo → demo}/real_matrix.te.txt RENAMED Viewed

File without changes

data/vendor/{libmf/demo → demo}/real_matrix.tr.txt RENAMED Viewed

File without changes

data/vendor/libmf.arm64.dylib ADDED Viewed

Binary file

data/vendor/libmf.dylib ADDED Viewed

Binary file

data/vendor/libmf.so ADDED Viewed

Binary file

data/vendor/mf.dll ADDED Viewed

Binary file