libmf 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 510156ab4bccb39a99441002dd80f3540b643934294bc5724dca91987b4effba
4
- data.tar.gz: 03a1f04e4679ec60cb2f1d2a96e2add0b9adba03fe5715bc4f10309ab795bcc1
3
+ metadata.gz: d82762549216fa7e42bc21e9450aaa7f9a423350915f8d1c6ed158cea7b4520d
4
+ data.tar.gz: 820677028926afecac55c270d74e572b70ce19e22c2d16710fa2c514f77e7154
5
5
  SHA512:
6
- metadata.gz: eb862c0cf91a077ba2e9a0ed3f93491297cdc9801683e3587b91125c7e0b79123d0f942aea6bf3cef42fbbb43f5e8f822758a894f25a7eedf2fd9ee7b5ea4920
7
- data.tar.gz: b4d74a6b3d4160f3b1165f90496a9f93c6b59ca99e5f3cc0094f022a0807fc8a34af3d9b0edeeff463264a60c19901ceaa171c3cea944d860103208e2d26dbf2
6
+ metadata.gz: 1278c9ad499069cd1287bda40878ad9e61c2e8d039bea27c439fcc5b1bcb32bcd5136b9ffde6bb967ba39ea49fbd97d832f765a3b6d5b90a8bd51a81d26843b1
7
+ data.tar.gz: fac4621e932e5eb07b4652ed0f7336275833a63e37814e853aa407e2f344076dec201cbff49b1a0271a115a715e2fd43ac0bdb25abdbd262ddfa5bcb14774ad8
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## 0.5.0 (2026-04-17)
2
+
3
+ - Added checks for invalid data
4
+ - Fixed memory leak in shared library
5
+ - Dropped support for reading data directly from files
6
+ - Dropped support for Ruby < 3.3
7
+
8
+ ## 0.4.0 (2024-10-22)
9
+
10
+ - Dropped support for Ruby < 3.1
11
+
1
12
  ## 0.3.0 (2022-08-07)
2
13
 
3
14
  - Added metrics
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  BSD 3-Clause License
2
2
 
3
3
  Copyright (c) 2014-2015 The LIBMF Project.
4
- Copyright (c) 2019-2022 Andrew Kane.
4
+ Copyright (c) 2019-2026 Andrew Kane.
5
5
  All rights reserved.
6
6
 
7
7
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  Check out [Disco](https://github.com/ankane/disco) for higher-level collaborative filtering
6
6
 
7
- [![Build Status](https://github.com/ankane/libmf-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/libmf-ruby/actions)
7
+ [![Build Status](https://github.com/ankane/libmf-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/libmf-ruby/actions)
8
8
 
9
9
  ## Installation
10
10
 
@@ -170,21 +170,25 @@ Calculate AUC (for one-class MF)
170
170
  model.auc(data, transpose)
171
171
  ```
172
172
 
173
- ## Performance
173
+ ## Example
174
174
 
175
- For performance, read data directly from files
175
+ Download the [MovieLens 100K dataset](https://grouplens.org/datasets/movielens/100k/) and use:
176
176
 
177
177
  ```ruby
178
- model.fit("train.txt", eval_set: "validate.txt")
179
- model.cv("train.txt")
180
- ```
178
+ require "csv"
179
+
180
+ train_set = Libmf::Matrix.new
181
+ valid_set = Libmf::Matrix.new
182
+
183
+ CSV.foreach("u.data", col_sep: "\t").with_index do |row, i|
184
+ data = i < 80000 ? train_set : valid_set
185
+ data.push(row[0].to_i, row[1].to_i, row[2].to_f)
186
+ end
181
187
 
182
- Data should be in the format `row_index column_index value`:
188
+ model = Libmf::Model.new(factors: 20)
189
+ model.fit(train_set, eval_set: valid_set)
183
190
 
184
- ```txt
185
- 0 0 5.0
186
- 0 2 3.5
187
- 1 1 4.0
191
+ puts model.rmse(valid_set)
188
192
  ```
189
193
 
190
194
  ## Numo
data/lib/libmf/ffi.rb CHANGED
@@ -53,7 +53,6 @@ module Libmf
53
53
  end
54
54
 
55
55
  attach_function :mf_get_default_param, [], Parameter.by_value
56
- attach_function :mf_read_problem, [:string], Problem.by_value
57
56
  attach_function :mf_save_model, [Model.by_ref, :string], :int
58
57
  attach_function :mf_load_model, [:string], Model.auto_ptr
59
58
  attach_function :mf_destroy_model, [:pointer], :void
data/lib/libmf/model.rb CHANGED
@@ -10,9 +10,22 @@ module Libmf
10
10
  @model =
11
11
  if eval_set
12
12
  eval_set = create_problem(eval_set)
13
+ param = self.param
14
+
15
+ # LIBMF does not handle these cases
16
+ if param[:fun] == 12
17
+ if eval_set[:m] > train_set[:m]
18
+ raise ArgumentError, "Eval set cannot have extra rows for one_class_l2 loss"
19
+ end
20
+
21
+ if eval_set[:n] > train_set[:n]
22
+ raise ArgumentError, "Eval set cannot have extra columns for one_class_l2 loss"
23
+ end
24
+ end
25
+
13
26
  FFI.mf_train_with_validation(train_set, eval_set, param)
14
27
  else
15
- FFI.mf_train(train_set, param)
28
+ FFI.mf_train(train_set, self.param)
16
29
  end
17
30
  raise Error, "fit failed" if @model.null?
18
31
 
@@ -141,13 +154,13 @@ module Libmf
141
154
  options[:bins] ||= 25 unless options[:nr_bins]
142
155
  options[:copy_data] = false unless options.key?(:copy_data)
143
156
  options_map = {
144
- :loss => :fun,
145
- :factors => :k,
146
- :threads => :nr_threads,
147
- :bins => :nr_bins,
148
- :iterations => :nr_iters,
149
- :learning_rate => :eta,
150
- :nmf => :do_nmf
157
+ loss: :fun,
158
+ factors: :k,
159
+ threads: :nr_threads,
160
+ bins: :nr_bins,
161
+ iterations: :nr_iters,
162
+ learning_rate: :eta,
163
+ nmf: :do_nmf
151
164
  }
152
165
  options.each do |k, v|
153
166
  k = options_map[k] if options_map[k]
@@ -160,8 +173,7 @@ module Libmf
160
173
 
161
174
  def create_problem(data)
162
175
  if data.is_a?(String)
163
- # need to expand path so it's absolute
164
- return FFI.mf_read_problem(File.expand_path(data))
176
+ raise Error, "Reading data directly from files is no longer supported"
165
177
  end
166
178
 
167
179
  if data.is_a?(Matrix)
@@ -175,7 +187,23 @@ module Libmf
175
187
  # and write directly to C string
176
188
  buffer = String.new
177
189
  pack_format = "iif"
190
+ int_max = 2**31 - 1
191
+ umax = -1
192
+ vmax = -1
178
193
  data.each do |row|
194
+ u = row[0]
195
+ if u < 0 || u >= int_max
196
+ raise ArgumentError, "Invalid row index"
197
+ end
198
+
199
+ v = row[1]
200
+ if v < 0 || v >= int_max
201
+ raise ArgumentError, "Invalid column index"
202
+ end
203
+
204
+ umax = u if u > umax
205
+ vmax = v if v > vmax
206
+
179
207
  row.pack(pack_format, buffer: buffer)
180
208
  end
181
209
 
@@ -186,12 +214,9 @@ module Libmf
186
214
  # FFI will throw an error above if too long
187
215
  raise Error, "Bad buffer size" if r.size != buffer.bytesize
188
216
 
189
- m = data.max_by { |r| r[0] }[0] + 1
190
- n = data.max_by { |r| r[1] }[1] + 1
191
-
192
217
  prob = FFI::Problem.new
193
- prob[:m] = m
194
- prob[:n] = n
218
+ prob[:m] = umax + 1
219
+ prob[:n] = vmax + 1
195
220
  prob[:nnz] = data.size
196
221
  prob[:r] = r
197
222
  prob
data/lib/libmf/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Libmf
2
- VERSION = "0.3.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/libmf.rb CHANGED
@@ -2,9 +2,9 @@
2
2
  require "ffi"
3
3
 
4
4
  # modules
5
- require "libmf/matrix"
6
- require "libmf/model"
7
- require "libmf/version"
5
+ require_relative "libmf/matrix"
6
+ require_relative "libmf/model"
7
+ require_relative "libmf/version"
8
8
 
9
9
  module Libmf
10
10
  class Error < StandardError; end
Binary file
Binary file
data/vendor/libmf.dylib CHANGED
Binary file
data/vendor/libmf.so CHANGED
Binary file
data/vendor/mf.dll CHANGED
Binary file
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libmf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2022-08-08 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: ffi
@@ -24,7 +23,6 @@ dependencies:
24
23
  - - ">="
25
24
  - !ruby/object:Gem::Version
26
25
  version: '0'
27
- description:
28
26
  email: andrew@ankane.org
29
27
  executables: []
30
28
  extensions: []
@@ -50,7 +48,6 @@ homepage: https://github.com/ankane/libmf-ruby
50
48
  licenses:
51
49
  - BSD-3-Clause
52
50
  metadata: {}
53
- post_install_message:
54
51
  rdoc_options: []
55
52
  require_paths:
56
53
  - lib
@@ -58,15 +55,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
58
55
  requirements:
59
56
  - - ">="
60
57
  - !ruby/object:Gem::Version
61
- version: '2.7'
58
+ version: '3.3'
62
59
  required_rubygems_version: !ruby/object:Gem::Requirement
63
60
  requirements:
64
61
  - - ">="
65
62
  - !ruby/object:Gem::Version
66
63
  version: '0'
67
64
  requirements: []
68
- rubygems_version: 3.3.7
69
- signing_key:
65
+ rubygems_version: 4.0.6
70
66
  specification_version: 4
71
67
  summary: Large-scale sparse matrix factorization for Ruby
72
68
  test_files: []