vowpalwabbit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +125 -0
- data/lib/vowpalwabbit.rb +20 -0
- data/lib/vowpalwabbit/classifier.rb +18 -0
- data/lib/vowpalwabbit/ffi.rb +41 -0
- data/lib/vowpalwabbit/model.rb +140 -0
- data/lib/vowpalwabbit/regressor.rb +16 -0
- data/lib/vowpalwabbit/version.rb +3 -0
- metadata +107 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: fd6a68e5d6103be4f6f58bd3fb6cb9785bd47382d7787b8d0ebc2c0336d87af3
|
4
|
+
data.tar.gz: 2f290dfdfe0539b4daf3e73dfbb39d04fa97e2f5c9aec08d43b0ee81db1d49b2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7e63904cf470666bbec00e9096e1701c1824af4b7790aa9d137a8a779eefb6d6833aadf4043f7d941e2f8395dfda532349bada33ffaa5cfa8461a593b9e80088
|
7
|
+
data.tar.gz: 0d6b47d922da2cfefb525f59e718e6f3be4c7e99033647f0f8814b9efde0095399b1b87c5922143cefeb8f0b7207ca4811b8c1c29505ea7b5af8cdc5c9910acc
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2019 Andrew Kane
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
# Vowpal Wabbit
|
2
|
+
|
3
|
+
[Vowpal Wabbit](https://vowpalwabbit.org) - fast online machine learning - for Ruby
|
4
|
+
|
5
|
+
:fire: Uses the C API for blazing performance
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
First, [install Vowpal Wabbit](https://vowpalwabbit.org/start.html). For Homebrew, use:
|
10
|
+
|
11
|
+
```sh
|
12
|
+
brew install vowpal-wabbit
|
13
|
+
```
|
14
|
+
|
15
|
+
Add this line to your application’s Gemfile:
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
gem 'vowpalwabbit'
|
19
|
+
```
|
20
|
+
|
21
|
+
## Getting Started
|
22
|
+
|
23
|
+
Prep your data
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
x = [[1, 2], [3, 4], [5, 6], [7, 8]]
|
27
|
+
y = [1, 2, 3, 4]
|
28
|
+
```
|
29
|
+
|
30
|
+
Train a model
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
model = VowpalWabbit::Regressor.new(l: 100)
|
34
|
+
model.fit(x, y)
|
35
|
+
```
|
36
|
+
|
37
|
+
Use `VowpalWabbit::Classifier` for classification and `VowpalWabbit::Model` for other models
|
38
|
+
|
39
|
+
Make predictions
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
model.predict(x)
|
43
|
+
```
|
44
|
+
|
45
|
+
Save the model to a file
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
model.save_model("model.bin")
|
49
|
+
```
|
50
|
+
|
51
|
+
Load the model from a file
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
model.load_model("model.bin")
|
55
|
+
```
|
56
|
+
|
57
|
+
Train online
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
model.partial_fit(x, y)
|
61
|
+
```
|
62
|
+
|
63
|
+
Get the intercept and coefficients
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
model.intercept
|
67
|
+
model.coefs
|
68
|
+
```
|
69
|
+
|
70
|
+
Score - R-squared for regression and accuracy for classification
|
71
|
+
|
72
|
+
```ruby
|
73
|
+
model.score(x, y)
|
74
|
+
```
|
75
|
+
|
76
|
+
## Parameters
|
77
|
+
|
78
|
+
Specify parameters
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
model = VowpalWabbit::Model.new(cb: 4)
|
82
|
+
```
|
83
|
+
|
84
|
+
Supports the same parameters as the [CLI](https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Command-Line-Arguments)
|
85
|
+
|
86
|
+
## Data
|
87
|
+
|
88
|
+
Data can be an array of arrays
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
[[1, 2, 3], [4, 5, 6]]
|
92
|
+
```
|
93
|
+
|
94
|
+
Or an array of strings
|
95
|
+
|
96
|
+
```ruby
|
97
|
+
[
|
98
|
+
"0 | price:.23 sqft:.25 age:.05 2006",
|
99
|
+
"1 2 'second_house | price:.18 sqft:.15 age:.35 1976",
|
100
|
+
"0 1 0.5 'third_house | price:.53 sqft:.32 age:.87 1924"
|
101
|
+
]
|
102
|
+
```
|
103
|
+
|
104
|
+
Or a path to a file
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
model.fit("train.txt")
|
108
|
+
model.predict("train.txt")
|
109
|
+
model.score("train.txt")
|
110
|
+
```
|
111
|
+
|
112
|
+
Read more about the [input format](https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Input-format)
|
113
|
+
|
114
|
+
## History
|
115
|
+
|
116
|
+
View the [changelog](https://github.com/ankane/vowpalwabbit/blob/master/CHANGELOG.md)
|
117
|
+
|
118
|
+
## Contributing
|
119
|
+
|
120
|
+
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
121
|
+
|
122
|
+
- [Report bugs](https://github.com/ankane/vowpalwabbit/issues)
|
123
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/vowpalwabbit/pulls)
|
124
|
+
- Write, clarify, or fix documentation
|
125
|
+
- Suggest or add new features
|
data/lib/vowpalwabbit.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# dependencies
|
2
|
+
require "ffi"
|
3
|
+
|
4
|
+
# modules
|
5
|
+
require "vowpalwabbit/model"
|
6
|
+
require "vowpalwabbit/classifier"
|
7
|
+
require "vowpalwabbit/regressor"
|
8
|
+
require "vowpalwabbit/version"
|
9
|
+
|
10
|
+
module VowpalWabbit
|
11
|
+
class Error < StandardError; end
|
12
|
+
|
13
|
+
class << self
|
14
|
+
attr_accessor :ffi_lib
|
15
|
+
end
|
16
|
+
self.ffi_lib = ["vw_c_wrapper"]
|
17
|
+
|
18
|
+
# friendlier error message
|
19
|
+
autoload :FFI, "vowpalwabbit/ffi"
|
20
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module VowpalWabbit
|
2
|
+
class Classifier < Model
|
3
|
+
def initialize(**params)
|
4
|
+
super({loss_function: "logistic"}.merge(params))
|
5
|
+
end
|
6
|
+
|
7
|
+
def predict(x)
|
8
|
+
predictions = super
|
9
|
+
predictions.map { |v| v >= 0 ? 1 : -1 }
|
10
|
+
end
|
11
|
+
|
12
|
+
def score(x, y = nil)
|
13
|
+
y_pred, y = predict_for_score(x, y)
|
14
|
+
y_pred.map! { |v| v >= 0 ? 1 : -1 }
|
15
|
+
y_pred.zip(y).select { |yp, yt| yp == yt }.count / y.count.to_f
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module VowpalWabbit
|
2
|
+
module FFI
|
3
|
+
extend ::FFI::Library
|
4
|
+
|
5
|
+
begin
|
6
|
+
ffi_lib VowpalWabbit.ffi_lib
|
7
|
+
rescue LoadError => e
|
8
|
+
raise e if ENV["VOWPALWABBIT_DEBUG"]
|
9
|
+
raise LoadError, "Could not find Vowpal Wabbit"
|
10
|
+
end
|
11
|
+
|
12
|
+
# https://github.com/VowpalWabbit/vowpal_wabbit/blob/master/vowpalwabbit/vwdll.h
|
13
|
+
# keep same order
|
14
|
+
|
15
|
+
attach_function :VW_InitializeA, %i[string], :pointer
|
16
|
+
attach_function :VW_InitializeWithModel, %i[string pointer size_t], :pointer
|
17
|
+
attach_function :VW_SeedWithModel, %i[pointer string], :pointer
|
18
|
+
attach_function :VW_Finish_Passes, %i[pointer], :void
|
19
|
+
attach_function :VW_Finish, %i[pointer], :void
|
20
|
+
attach_function :VW_ImportExample, %i[pointer string pointer size_t], :pointer
|
21
|
+
attach_function :VW_ReadExampleA, %i[pointer string], :pointer
|
22
|
+
attach_function :VW_StartParser, %i[pointer], :void
|
23
|
+
attach_function :VW_EndParser, %i[pointer], :void
|
24
|
+
attach_function :VW_GetExample, %i[pointer], :pointer
|
25
|
+
attach_function :VW_FinishExample, %i[pointer pointer], :void
|
26
|
+
attach_function :VW_GetLabel, %i[pointer], :float
|
27
|
+
attach_function :VW_GetFeatureNumber, %i[pointer], :size_t
|
28
|
+
attach_function :VW_GetFeatures, %i[pointer pointer pointer], :pointer
|
29
|
+
attach_function :VW_HashSpaceA, %i[pointer string], :size_t
|
30
|
+
attach_function :VW_Learn, %i[pointer pointer], :float
|
31
|
+
attach_function :VW_Predict, %i[pointer pointer], :float
|
32
|
+
attach_function :VW_PredictCostSensitive, %i[pointer pointer], :float
|
33
|
+
attach_function :VW_Get_Weight, %i[pointer size_t size_t], :float
|
34
|
+
attach_function :VW_Set_Weight, %i[pointer size_t size_t float], :void
|
35
|
+
attach_function :VW_Num_Weights, %i[pointer], :size_t
|
36
|
+
attach_function :VW_Get_Stride, %i[pointer], :size_t
|
37
|
+
attach_function :VW_SaveModel, %i[pointer], :void
|
38
|
+
attach_function :VW_CopyModelData, %i[pointer pointer pointer pointer], :void
|
39
|
+
attach_function :VW_FreeIOBuf, %i[pointer], :void
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
module VowpalWabbit
|
2
|
+
class Model
|
3
|
+
def initialize(**params)
|
4
|
+
# add strict parse once exceptions are handled properly
|
5
|
+
# https://github.com/VowpalWabbit/vowpal_wabbit/issues/2004
|
6
|
+
@params = {quiet: true}.merge(params)
|
7
|
+
end
|
8
|
+
|
9
|
+
def fit(x, y = nil)
|
10
|
+
@handle = nil
|
11
|
+
partial_fit(x, y)
|
12
|
+
end
|
13
|
+
|
14
|
+
def partial_fit(x, y = nil)
|
15
|
+
each_example(x, y) do |example|
|
16
|
+
FFI.VW_Learn(handle, example)
|
17
|
+
end
|
18
|
+
nil
|
19
|
+
end
|
20
|
+
|
21
|
+
def predict(x)
|
22
|
+
out = []
|
23
|
+
each_example(x) do |example|
|
24
|
+
out << predict_example(example)
|
25
|
+
end
|
26
|
+
out
|
27
|
+
end
|
28
|
+
|
29
|
+
def coefs
|
30
|
+
num_weights = FFI.VW_Num_Weights(handle)
|
31
|
+
coefs = {}
|
32
|
+
num_weights.times.map do |i|
|
33
|
+
weight = FFI.VW_Get_Weight(handle, i, 0)
|
34
|
+
coefs[i] = weight if weight != 0
|
35
|
+
end
|
36
|
+
coefs
|
37
|
+
end
|
38
|
+
|
39
|
+
def save_model(filename)
|
40
|
+
buffer_handle = ::FFI::MemoryPointer.new(:pointer)
|
41
|
+
output_data = ::FFI::MemoryPointer.new(:pointer)
|
42
|
+
output_size = ::FFI::MemoryPointer.new(:size_t)
|
43
|
+
FFI.VW_CopyModelData(handle, buffer_handle, output_data, output_size)
|
44
|
+
bin_str = output_data.read_pointer.read_string(output_size.read(:size_t))
|
45
|
+
FFI.VW_FreeIOBuf(buffer_handle.read_pointer)
|
46
|
+
File.binwrite(filename, bin_str)
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
|
50
|
+
def load_model(filename)
|
51
|
+
bin_str = File.binread(filename)
|
52
|
+
model_data = ::FFI::MemoryPointer.new(:char, bin_str.bytesize)
|
53
|
+
model_data.put_bytes(0, bin_str)
|
54
|
+
@handle = FFI.VW_InitializeWithModel(param_str, model_data, bin_str.bytesize)
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
# TODO clean-up handle
|
61
|
+
def handle
|
62
|
+
@handle ||= FFI.VW_InitializeA(param_str)
|
63
|
+
end
|
64
|
+
|
65
|
+
def param_str
|
66
|
+
args =
|
67
|
+
@params.map do |k, v|
|
68
|
+
check_param(k.to_s)
|
69
|
+
check_param(v.to_s)
|
70
|
+
|
71
|
+
if v == true
|
72
|
+
"--#{k}"
|
73
|
+
elsif !v
|
74
|
+
nil
|
75
|
+
elsif k.size == 1
|
76
|
+
"-#{k} #{v}"
|
77
|
+
else
|
78
|
+
"--#{k} #{v}"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
args.compact.join(" ")
|
82
|
+
end
|
83
|
+
|
84
|
+
def check_param(v)
|
85
|
+
raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match(v)
|
86
|
+
end
|
87
|
+
|
88
|
+
def predict_example(example)
|
89
|
+
if @params[:cb]
|
90
|
+
FFI.VW_PredictCostSensitive(handle, example)
|
91
|
+
else
|
92
|
+
FFI.VW_Predict(handle, example)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# get both in one pass for efficiency
|
97
|
+
def predict_for_score(x, y)
|
98
|
+
if x.is_a?(String) && !y
|
99
|
+
y_pred = []
|
100
|
+
y = []
|
101
|
+
each_example(x) do |example|
|
102
|
+
y_pred << predict_example(example)
|
103
|
+
y << FFI.VW_GetLabel(example)
|
104
|
+
end
|
105
|
+
[y_pred, y]
|
106
|
+
else
|
107
|
+
[predict(x), y]
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# TODO support compressed files
|
112
|
+
def each_example(x, y = nil)
|
113
|
+
each_line(x, y) do |line|
|
114
|
+
example = FFI.VW_ReadExampleA(handle, line)
|
115
|
+
yield example
|
116
|
+
FFI.VW_FinishExample(handle, example)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def each_line(x, y)
|
121
|
+
if x.is_a?(String)
|
122
|
+
raise ArgumentError, "Cannot pass y with file" if y
|
123
|
+
|
124
|
+
File.foreach(x) do |line|
|
125
|
+
yield line
|
126
|
+
end
|
127
|
+
else
|
128
|
+
raise ArgumentError, "x and y must have same size" if y && x.size != y.size
|
129
|
+
|
130
|
+
x.zip(y || []) do |xi, yi|
|
131
|
+
if xi.is_a?(String)
|
132
|
+
yield xi
|
133
|
+
else
|
134
|
+
yield "#{yi} 1 | #{xi.map.with_index { |v, i| "#{i}:#{v}" }.join(" ")}"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module VowpalWabbit
|
2
|
+
class Regressor < Model
|
3
|
+
def score(x, y = nil)
|
4
|
+
y_pred, y = predict_for_score(x, y)
|
5
|
+
|
6
|
+
# r2
|
7
|
+
sse = y_pred.zip(y).map { |yp, yt| (yp - yt) ** 2 }.sum
|
8
|
+
sst = y.map { |yi| yi ** 2 }.sum - (y.sum ** 2) / y.size
|
9
|
+
1 - sse / sst
|
10
|
+
end
|
11
|
+
|
12
|
+
def intercept
|
13
|
+
FFI.VW_Get_Weight(handle, 116060, 0)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
metadata
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: vowpalwabbit
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrew Kane
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-10-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ffi
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '5'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '5'
|
69
|
+
description:
|
70
|
+
email: andrew@chartkick.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files: []
|
74
|
+
files:
|
75
|
+
- CHANGELOG.md
|
76
|
+
- LICENSE.txt
|
77
|
+
- README.md
|
78
|
+
- lib/vowpalwabbit.rb
|
79
|
+
- lib/vowpalwabbit/classifier.rb
|
80
|
+
- lib/vowpalwabbit/ffi.rb
|
81
|
+
- lib/vowpalwabbit/model.rb
|
82
|
+
- lib/vowpalwabbit/regressor.rb
|
83
|
+
- lib/vowpalwabbit/version.rb
|
84
|
+
homepage: https://github.com/ankane/vowpalwabbit
|
85
|
+
licenses:
|
86
|
+
- MIT
|
87
|
+
metadata: {}
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options: []
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '2.4'
|
97
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
requirements: []
|
103
|
+
rubygems_version: 3.0.3
|
104
|
+
signing_key:
|
105
|
+
specification_version: 4
|
106
|
+
summary: Vowpal Wabbit - fast online machine learning - for Ruby
|
107
|
+
test_files: []
|