xlearn 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +107 -0
- data/lib/xlearn.rb +28 -0
- data/lib/xlearn/dmatrix.rb +35 -0
- data/lib/xlearn/ffi.rb +39 -0
- data/lib/xlearn/ffm.rb +8 -0
- data/lib/xlearn/fm.rb +8 -0
- data/lib/xlearn/linear.rb +8 -0
- data/lib/xlearn/model.rb +104 -0
- data/lib/xlearn/utils.rb +9 -0
- data/lib/xlearn/version.rb +3 -0
- metadata +110 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cbc492d4f4cb0de9c53cac0820251fc1f747de836348280dfa9d1b7e6475f745
|
4
|
+
data.tar.gz: d5f1fcbbb10b96714c38fd9c0c924c98fb01dccf1872734e1084a31ad855503c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: '048e5915264ba2749e00a91b4c59a773efcd553cf7c764e56df0107e6dd08edcb7bcb86350181b217d095351ea79f47ba6daa84407069fcd9c445683ecdc22a8'
|
7
|
+
data.tar.gz: 46831787724f8ec1d4063859445a0a9e6aefd2a38b4267032eb11ffcadf145ebbd37ab3babcf56599cf31f3f4143126b1663baf409099926860ce88c591adb75
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2019 Andrew Kane
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
# xLearn
|
2
|
+
|
3
|
+
[xLearn](https://github.com/aksnzhy/xlearn) - the high performance machine learning library - for Ruby
|
4
|
+
|
5
|
+
:fire: Uses the C API for blazing performance
|
6
|
+
|
7
|
+
Supports:
|
8
|
+
|
9
|
+
- Linear models
|
10
|
+
- Factorization machines
|
11
|
+
- Field-aware factorization machines
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
First, [install xLearn](https://xlearn-doc.readthedocs.io/en/latest/install/index.html). On Mac, copy `build/lib/libxlearn_api.dylib` to `/usr/local/lib`.
|
16
|
+
|
17
|
+
Add this line to your application’s Gemfile:
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
gem 'xlearn'
|
21
|
+
```
|
22
|
+
|
23
|
+
## Getting Started
|
24
|
+
|
25
|
+
This library is modeled after the [Python Scikit-learn API](https://xlearn-doc.readthedocs.io/en/latest/python_api/index.html). Some methods are missing at the moment. PRs welcome!
|
26
|
+
|
27
|
+
Prep your data
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
x = [[1, 2], [3, 4], [5, 6], [7, 8]]
|
31
|
+
y = [1, 2, 3, 4]
|
32
|
+
```
|
33
|
+
|
34
|
+
Train a model
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
model = XLearn::Linear.new(task: "reg")
|
38
|
+
model.fit(x, y)
|
39
|
+
```
|
40
|
+
|
41
|
+
Use `XLearn::FM` for factorization machines and `XLearn::FFM` for field-aware factorization machines
|
42
|
+
|
43
|
+
Make predictions
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
model.predict(x)
|
47
|
+
```
|
48
|
+
|
49
|
+
Save the model to a file
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
model.save_model("model.bin")
|
53
|
+
```
|
54
|
+
|
55
|
+
Load the model from a file
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
model.load_model("model.bin")
|
59
|
+
```
|
60
|
+
|
61
|
+
## Parameters
|
62
|
+
|
63
|
+
Specify parameters
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
model = XLearn::FM.new(k: 20, epoch: 50)
|
67
|
+
```
|
68
|
+
|
69
|
+
Supports the same parameters as [Python](https://xlearn-doc.readthedocs.io/en/latest/all_api/index.html)
|
70
|
+
|
71
|
+
## Validation
|
72
|
+
|
73
|
+
Pass a validation set when fitting
|
74
|
+
|
75
|
+
```ruby
|
76
|
+
model.fit(x_train, y_train, eval_set: [x_val, y_val])
|
77
|
+
```
|
78
|
+
|
79
|
+
## Performance
|
80
|
+
|
81
|
+
For performance, you can read data directly from files
|
82
|
+
|
83
|
+
```ruby
|
84
|
+
model.fit("train.txt", eval_set: "validate.txt")
|
85
|
+
model.predict("test.txt")
|
86
|
+
```
|
87
|
+
|
88
|
+
[These formats](https://xlearn-doc.readthedocs.io/en/latest/python_api/index.html#choose-machine-learning-algorithm) are supported
|
89
|
+
|
90
|
+
You can also write predictions directly to a file
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
model.predict("test.txt", out_file: "predictions.txt")
|
94
|
+
```
|
95
|
+
|
96
|
+
## History
|
97
|
+
|
98
|
+
View the [changelog](https://github.com/ankane/xlearn/blob/master/CHANGELOG.md)
|
99
|
+
|
100
|
+
## Contributing
|
101
|
+
|
102
|
+
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
103
|
+
|
104
|
+
- [Report bugs](https://github.com/ankane/xlearn/issues)
|
105
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/xlearn/pulls)
|
106
|
+
- Write, clarify, or fix documentation
|
107
|
+
- Suggest or add new features
|
data/lib/xlearn.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# dependencies
|
2
|
+
require "ffi"
|
3
|
+
|
4
|
+
# stdlib
|
5
|
+
require "csv"
|
6
|
+
require "fileutils"
|
7
|
+
require "tempfile"
|
8
|
+
|
9
|
+
# modules
|
10
|
+
require "xlearn/utils"
|
11
|
+
require "xlearn/dmatrix"
|
12
|
+
require "xlearn/model"
|
13
|
+
require "xlearn/ffm"
|
14
|
+
require "xlearn/fm"
|
15
|
+
require "xlearn/linear"
|
16
|
+
require "xlearn/version"
|
17
|
+
|
18
|
+
module XLearn
|
19
|
+
class Error < StandardError; end
|
20
|
+
|
21
|
+
class << self
|
22
|
+
attr_accessor :ffi_lib
|
23
|
+
end
|
24
|
+
self.ffi_lib = ["xlearn_api"]
|
25
|
+
|
26
|
+
# friendlier error message
|
27
|
+
autoload :FFI, "xlearn/ffi"
|
28
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module XLearn
|
2
|
+
class DMatrix
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
def initialize(data, label: nil)
|
6
|
+
@handle = ::FFI::MemoryPointer.new(:pointer)
|
7
|
+
|
8
|
+
nrow = data.count
|
9
|
+
ncol = data.first.count
|
10
|
+
|
11
|
+
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
12
|
+
c_data.put_array_of_float(0, data.flatten)
|
13
|
+
|
14
|
+
if label
|
15
|
+
c_label = ::FFI::MemoryPointer.new(:float, nrow)
|
16
|
+
c_label.put_array_of_float(0, label)
|
17
|
+
end
|
18
|
+
|
19
|
+
# TODO support this
|
20
|
+
field_map = nil
|
21
|
+
|
22
|
+
check_call FFI.XlearnCreateDataFromMat(c_data, nrow, ncol, c_label, field_map, @handle)
|
23
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(@handle))
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_ptr
|
27
|
+
@handle
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.finalize(pointer)
|
31
|
+
# must use proc instead of stabby lambda
|
32
|
+
proc { FFI.XlearnDataFree(pointer) }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/xlearn/ffi.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
module XLearn
|
2
|
+
module FFI
|
3
|
+
extend ::FFI::Library
|
4
|
+
|
5
|
+
begin
|
6
|
+
ffi_lib XLearn.ffi_lib
|
7
|
+
rescue LoadError => e
|
8
|
+
raise e if ENV["XLEARN_DEBUG"]
|
9
|
+
raise LoadError, "Could not find xLearn"
|
10
|
+
end
|
11
|
+
|
12
|
+
# https://github.com/aksnzhy/xlearn/blob/master/src/c_api/c_api.h
|
13
|
+
# keep same order
|
14
|
+
|
15
|
+
attach_function :XLearnHello, %i[], :int
|
16
|
+
attach_function :XLearnCreate, %i[string pointer], :int
|
17
|
+
attach_function :XlearnCreateDataFromMat, %i[pointer uint32 uint32 pointer pointer pointer], :int
|
18
|
+
attach_function :XlearnDataFree, %i[pointer], :int
|
19
|
+
attach_function :XLearnHandleFree, %i[pointer], :int
|
20
|
+
attach_function :XLearnShow, %i[pointer], :int
|
21
|
+
attach_function :XLearnSetTrain, %i[pointer string], :int
|
22
|
+
attach_function :XLearnSetTest, %i[pointer string], :int
|
23
|
+
attach_function :XLearnSetPreModel, %i[pointer string], :int
|
24
|
+
attach_function :XLearnSetValidate, %i[pointer string], :int
|
25
|
+
attach_function :XLearnSetTXTModel, %i[pointer string], :int
|
26
|
+
attach_function :XLearnFit, %i[pointer string], :int
|
27
|
+
attach_function :XLearnCV, %i[pointer], :int
|
28
|
+
attach_function :XLearnPredictForMat, %i[pointer string pointer pointer], :int
|
29
|
+
attach_function :XLearnPredictForFile, %i[pointer string string], :int
|
30
|
+
attach_function :XLearnSetDMatrix, %i[pointer string pointer], :int
|
31
|
+
attach_function :XLearnSetStr, %i[pointer string string], :int
|
32
|
+
attach_function :XLearnSetInt, %i[pointer string int], :int
|
33
|
+
attach_function :XLearnSetFloat, %i[pointer string float], :int
|
34
|
+
attach_function :XLearnSetBool, %i[pointer string bool], :int
|
35
|
+
|
36
|
+
# errors
|
37
|
+
attach_function :XLearnGetLastError, %i[], :string
|
38
|
+
end
|
39
|
+
end
|
data/lib/xlearn/ffm.rb
ADDED
data/lib/xlearn/fm.rb
ADDED
data/lib/xlearn/model.rb
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
module XLearn
|
2
|
+
class Model
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
def initialize(**options)
|
6
|
+
@handle = ::FFI::MemoryPointer.new(:pointer)
|
7
|
+
check_call FFI.XLearnCreate(@model_type, @handle)
|
8
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(@handle))
|
9
|
+
|
10
|
+
options = {
|
11
|
+
task: "binary",
|
12
|
+
quiet: true
|
13
|
+
}.merge(options)
|
14
|
+
|
15
|
+
if options[:task] == "binary" && !options.key?(:sigmoid)
|
16
|
+
options[:sigmoid] = true
|
17
|
+
end
|
18
|
+
|
19
|
+
set_params(options)
|
20
|
+
end
|
21
|
+
|
22
|
+
def fit(x, y = nil, eval_set: nil)
|
23
|
+
if x.is_a?(String)
|
24
|
+
check_call FFI.XLearnSetTrain(@handle, x)
|
25
|
+
check_call FFI.XLearnSetBool(@handle, "from_file", true)
|
26
|
+
else
|
27
|
+
train_set = DMatrix.new(x, label: y)
|
28
|
+
check_call FFI.XLearnSetDMatrix(@handle, "train", train_set)
|
29
|
+
check_call FFI.XLearnSetBool(@handle, "from_file", false)
|
30
|
+
end
|
31
|
+
|
32
|
+
if eval_set
|
33
|
+
if eval_set.is_a?(String)
|
34
|
+
check_call FFI.XLearnSetValidate(@handle, eval_set)
|
35
|
+
else
|
36
|
+
valid_set = DMatrix.new(x, label: y)
|
37
|
+
check_call FFI.XLearnSetDMatrix(@handle, "validate", valid_set)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# TODO unlink in finalizer
|
42
|
+
@model_file = Tempfile.new("xlearn")
|
43
|
+
check_call FFI.XLearnFit(@handle, @model_file.path)
|
44
|
+
end
|
45
|
+
|
46
|
+
def predict(x, out_path: nil)
|
47
|
+
if x.is_a?(String)
|
48
|
+
check_call FFI.XLearnSetTest(@handle, x)
|
49
|
+
check_call FFI.XLearnSetBool(@handle, "from_file", true)
|
50
|
+
else
|
51
|
+
test_set = DMatrix.new(x)
|
52
|
+
check_call FFI.XLearnSetDMatrix(@handle, "test", test_set)
|
53
|
+
check_call FFI.XLearnSetBool(@handle, "from_file", false)
|
54
|
+
end
|
55
|
+
|
56
|
+
if out_path
|
57
|
+
check_call FFI.XLearnPredictForFile(@handle, @model_file.path, out_path)
|
58
|
+
else
|
59
|
+
length = ::FFI::MemoryPointer.new(:uint64)
|
60
|
+
out_arr = ::FFI::MemoryPointer.new(:pointer)
|
61
|
+
check_call FFI.XLearnPredictForMat(@handle, @model_file.path, length, out_arr)
|
62
|
+
out_arr.read_pointer.read_array_of_float(length.read_uint64)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def save_model(path)
|
67
|
+
raise Error, "Not trained" unless @model_file
|
68
|
+
FileUtils.cp(@model_file.path, path)
|
69
|
+
end
|
70
|
+
|
71
|
+
def load_model(path)
|
72
|
+
@model_file ||= Tempfile.new("xlearn")
|
73
|
+
# TODO ensure tempfile is still cleaned up
|
74
|
+
FileUtils.cp(path, @model_file.path)
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.finalize(pointer)
|
78
|
+
# must use proc instead of stabby lambda
|
79
|
+
proc { FFI.XLearnHandleFree(pointer) }
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def set_params(params)
|
85
|
+
params.each do |k, v|
|
86
|
+
k = k.to_s
|
87
|
+
ret =
|
88
|
+
case k
|
89
|
+
when "task", "metric", "opt", "log"
|
90
|
+
FFI.XLearnSetStr(@handle, k, v)
|
91
|
+
when "lr", "lambda", "init", "alpha", "beta", "lambda_1", "lambda_2"
|
92
|
+
FFI.XLearnSetFloat(@handle, k, v)
|
93
|
+
when "k", "epoch", "fold", "nthread", "block_size", "stop_window", "seed"
|
94
|
+
FFI.XLearnSetInt(@handle, k, v)
|
95
|
+
when "quiet", "on_disk", "bin_out", "norm", "lock_free", "early_stop", "sign", "sigmoid"
|
96
|
+
FFI.XLearnSetBool(@handle, k, v)
|
97
|
+
else
|
98
|
+
raise ArgumentError, "Invalid parameter: #{k}"
|
99
|
+
end
|
100
|
+
check_call ret
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
data/lib/xlearn/utils.rb
ADDED
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xlearn
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrew Kane
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-10-12 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ffi
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '5'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '5'
|
69
|
+
description:
|
70
|
+
email: andrew@chartkick.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files: []
|
74
|
+
files:
|
75
|
+
- CHANGELOG.md
|
76
|
+
- LICENSE.txt
|
77
|
+
- README.md
|
78
|
+
- lib/xlearn.rb
|
79
|
+
- lib/xlearn/dmatrix.rb
|
80
|
+
- lib/xlearn/ffi.rb
|
81
|
+
- lib/xlearn/ffm.rb
|
82
|
+
- lib/xlearn/fm.rb
|
83
|
+
- lib/xlearn/linear.rb
|
84
|
+
- lib/xlearn/model.rb
|
85
|
+
- lib/xlearn/utils.rb
|
86
|
+
- lib/xlearn/version.rb
|
87
|
+
homepage: https://github.com/ankane/xlearn
|
88
|
+
licenses:
|
89
|
+
- MIT
|
90
|
+
metadata: {}
|
91
|
+
post_install_message:
|
92
|
+
rdoc_options: []
|
93
|
+
require_paths:
|
94
|
+
- lib
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '2.4'
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
requirements: []
|
106
|
+
rubygems_version: 3.0.3
|
107
|
+
signing_key:
|
108
|
+
specification_version: 4
|
109
|
+
summary: xLearn - the high performance machine learning library - for Ruby
|
110
|
+
test_files: []
|