vowpalwabbit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: fd6a68e5d6103be4f6f58bd3fb6cb9785bd47382d7787b8d0ebc2c0336d87af3
4
+ data.tar.gz: 2f290dfdfe0539b4daf3e73dfbb39d04fa97e2f5c9aec08d43b0ee81db1d49b2
5
+ SHA512:
6
+ metadata.gz: 7e63904cf470666bbec00e9096e1701c1824af4b7790aa9d137a8a779eefb6d6833aadf4043f7d941e2f8395dfda532349bada33ffaa5cfa8461a593b9e80088
7
+ data.tar.gz: 0d6b47d922da2cfefb525f59e718e6f3be4c7e99033647f0f8814b9efde0095399b1b87c5922143cefeb8f0b7207ca4811b8c1c29505ea7b5af8cdc5c9910acc
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ ## 0.1.0
2
+
3
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2019 Andrew Kane
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,125 @@
1
+ # Vowpal Wabbit
2
+
3
+ [Vowpal Wabbit](https://vowpalwabbit.org) - fast online machine learning - for Ruby
4
+
5
+ :fire: Uses the C API for blazing performance
6
+
7
+ ## Installation
8
+
9
+ First, [install Vowpal Wabbit](https://vowpalwabbit.org/start.html). For Homebrew, use:
10
+
11
+ ```sh
12
+ brew install vowpal-wabbit
13
+ ```
14
+
15
+ Add this line to your application’s Gemfile:
16
+
17
+ ```ruby
18
+ gem 'vowpalwabbit'
19
+ ```
20
+
21
+ ## Getting Started
22
+
23
+ Prep your data
24
+
25
+ ```ruby
26
+ x = [[1, 2], [3, 4], [5, 6], [7, 8]]
27
+ y = [1, 2, 3, 4]
28
+ ```
29
+
30
+ Train a model
31
+
32
+ ```ruby
33
+ model = VowpalWabbit::Regressor.new(l: 100)
34
+ model.fit(x, y)
35
+ ```
36
+
37
+ Use `VowpalWabbit::Classifier` for classification and `VowpalWabbit::Model` for other models
38
+
39
+ Make predictions
40
+
41
+ ```ruby
42
+ model.predict(x)
43
+ ```
44
+
45
+ Save the model to a file
46
+
47
+ ```ruby
48
+ model.save_model("model.bin")
49
+ ```
50
+
51
+ Load the model from a file
52
+
53
+ ```ruby
54
+ model.load_model("model.bin")
55
+ ```
56
+
57
+ Train online
58
+
59
+ ```ruby
60
+ model.partial_fit(x, y)
61
+ ```
62
+
63
+ Get the intercept and coefficients
64
+
65
+ ```ruby
66
+ model.intercept
67
+ model.coefs
68
+ ```
69
+
70
+ Score - R-squared for regression and accuracy for classification
71
+
72
+ ```ruby
73
+ model.score(x, y)
74
+ ```
75
+
76
+ ## Parameters
77
+
78
+ Specify parameters
79
+
80
+ ```ruby
81
+ model = VowpalWabbit::Model.new(cb: 4)
82
+ ```
83
+
84
+ Supports the same parameters as the [CLI](https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Command-Line-Arguments)
85
+
86
+ ## Data
87
+
88
+ Data can be an array of arrays
89
+
90
+ ```ruby
91
+ [[1, 2, 3], [4, 5, 6]]
92
+ ```
93
+
94
+ Or an array of strings
95
+
96
+ ```ruby
97
+ [
98
+ "0 | price:.23 sqft:.25 age:.05 2006",
99
+ "1 2 'second_house | price:.18 sqft:.15 age:.35 1976",
100
+ "0 1 0.5 'third_house | price:.53 sqft:.32 age:.87 1924"
101
+ ]
102
+ ```
103
+
104
+ Or a path to a file
105
+
106
+ ```ruby
107
+ model.fit("train.txt")
108
+ model.predict("train.txt")
109
+ model.score("train.txt")
110
+ ```
111
+
112
+ Read more about the [input format](https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Input-format)
113
+
114
+ ## History
115
+
116
+ View the [changelog](https://github.com/ankane/vowpalwabbit/blob/master/CHANGELOG.md)
117
+
118
+ ## Contributing
119
+
120
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
121
+
122
+ - [Report bugs](https://github.com/ankane/vowpalwabbit/issues)
123
+ - Fix bugs and [submit pull requests](https://github.com/ankane/vowpalwabbit/pulls)
124
+ - Write, clarify, or fix documentation
125
+ - Suggest or add new features
@@ -0,0 +1,20 @@
1
+ # dependencies
2
+ require "ffi"
3
+
4
+ # modules
5
+ require "vowpalwabbit/model"
6
+ require "vowpalwabbit/classifier"
7
+ require "vowpalwabbit/regressor"
8
+ require "vowpalwabbit/version"
9
+
10
+ module VowpalWabbit
11
+ class Error < StandardError; end
12
+
13
+ class << self
14
+ attr_accessor :ffi_lib
15
+ end
16
+ self.ffi_lib = ["vw_c_wrapper"]
17
+
18
+ # friendlier error message
19
+ autoload :FFI, "vowpalwabbit/ffi"
20
+ end
@@ -0,0 +1,18 @@
1
+ module VowpalWabbit
2
+ class Classifier < Model
3
+ def initialize(**params)
4
+ super({loss_function: "logistic"}.merge(params))
5
+ end
6
+
7
+ def predict(x)
8
+ predictions = super
9
+ predictions.map { |v| v >= 0 ? 1 : -1 }
10
+ end
11
+
12
+ def score(x, y = nil)
13
+ y_pred, y = predict_for_score(x, y)
14
+ y_pred.map! { |v| v >= 0 ? 1 : -1 }
15
+ y_pred.zip(y).select { |yp, yt| yp == yt }.count / y.count.to_f
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,41 @@
1
+ module VowpalWabbit
2
+ module FFI
3
+ extend ::FFI::Library
4
+
5
+ begin
6
+ ffi_lib VowpalWabbit.ffi_lib
7
+ rescue LoadError => e
8
+ raise e if ENV["VOWPALWABBIT_DEBUG"]
9
+ raise LoadError, "Could not find Vowpal Wabbit"
10
+ end
11
+
12
+ # https://github.com/VowpalWabbit/vowpal_wabbit/blob/master/vowpalwabbit/vwdll.h
13
+ # keep same order
14
+
15
+ attach_function :VW_InitializeA, %i[string], :pointer
16
+ attach_function :VW_InitializeWithModel, %i[string pointer size_t], :pointer
17
+ attach_function :VW_SeedWithModel, %i[pointer string], :pointer
18
+ attach_function :VW_Finish_Passes, %i[pointer], :void
19
+ attach_function :VW_Finish, %i[pointer], :void
20
+ attach_function :VW_ImportExample, %i[pointer string pointer size_t], :pointer
21
+ attach_function :VW_ReadExampleA, %i[pointer string], :pointer
22
+ attach_function :VW_StartParser, %i[pointer], :void
23
+ attach_function :VW_EndParser, %i[pointer], :void
24
+ attach_function :VW_GetExample, %i[pointer], :pointer
25
+ attach_function :VW_FinishExample, %i[pointer pointer], :void
26
+ attach_function :VW_GetLabel, %i[pointer], :float
27
+ attach_function :VW_GetFeatureNumber, %i[pointer], :size_t
28
+ attach_function :VW_GetFeatures, %i[pointer pointer pointer], :pointer
29
+ attach_function :VW_HashSpaceA, %i[pointer string], :size_t
30
+ attach_function :VW_Learn, %i[pointer pointer], :float
31
+ attach_function :VW_Predict, %i[pointer pointer], :float
32
+ attach_function :VW_PredictCostSensitive, %i[pointer pointer], :float
33
+ attach_function :VW_Get_Weight, %i[pointer size_t size_t], :float
34
+ attach_function :VW_Set_Weight, %i[pointer size_t size_t float], :void
35
+ attach_function :VW_Num_Weights, %i[pointer], :size_t
36
+ attach_function :VW_Get_Stride, %i[pointer], :size_t
37
+ attach_function :VW_SaveModel, %i[pointer], :void
38
+ attach_function :VW_CopyModelData, %i[pointer pointer pointer pointer], :void
39
+ attach_function :VW_FreeIOBuf, %i[pointer], :void
40
+ end
41
+ end
@@ -0,0 +1,140 @@
1
+ module VowpalWabbit
2
+ class Model
3
+ def initialize(**params)
4
+ # add strict parse once exceptions are handled properly
5
+ # https://github.com/VowpalWabbit/vowpal_wabbit/issues/2004
6
+ @params = {quiet: true}.merge(params)
7
+ end
8
+
9
+ def fit(x, y = nil)
10
+ @handle = nil
11
+ partial_fit(x, y)
12
+ end
13
+
14
+ def partial_fit(x, y = nil)
15
+ each_example(x, y) do |example|
16
+ FFI.VW_Learn(handle, example)
17
+ end
18
+ nil
19
+ end
20
+
21
+ def predict(x)
22
+ out = []
23
+ each_example(x) do |example|
24
+ out << predict_example(example)
25
+ end
26
+ out
27
+ end
28
+
29
+ def coefs
30
+ num_weights = FFI.VW_Num_Weights(handle)
31
+ coefs = {}
32
+ num_weights.times.map do |i|
33
+ weight = FFI.VW_Get_Weight(handle, i, 0)
34
+ coefs[i] = weight if weight != 0
35
+ end
36
+ coefs
37
+ end
38
+
39
+ def save_model(filename)
40
+ buffer_handle = ::FFI::MemoryPointer.new(:pointer)
41
+ output_data = ::FFI::MemoryPointer.new(:pointer)
42
+ output_size = ::FFI::MemoryPointer.new(:size_t)
43
+ FFI.VW_CopyModelData(handle, buffer_handle, output_data, output_size)
44
+ bin_str = output_data.read_pointer.read_string(output_size.read(:size_t))
45
+ FFI.VW_FreeIOBuf(buffer_handle.read_pointer)
46
+ File.binwrite(filename, bin_str)
47
+ nil
48
+ end
49
+
50
+ def load_model(filename)
51
+ bin_str = File.binread(filename)
52
+ model_data = ::FFI::MemoryPointer.new(:char, bin_str.bytesize)
53
+ model_data.put_bytes(0, bin_str)
54
+ @handle = FFI.VW_InitializeWithModel(param_str, model_data, bin_str.bytesize)
55
+ nil
56
+ end
57
+
58
+ private
59
+
60
+ # TODO clean-up handle
61
+ def handle
62
+ @handle ||= FFI.VW_InitializeA(param_str)
63
+ end
64
+
65
+ def param_str
66
+ args =
67
+ @params.map do |k, v|
68
+ check_param(k.to_s)
69
+ check_param(v.to_s)
70
+
71
+ if v == true
72
+ "--#{k}"
73
+ elsif !v
74
+ nil
75
+ elsif k.size == 1
76
+ "-#{k} #{v}"
77
+ else
78
+ "--#{k} #{v}"
79
+ end
80
+ end
81
+ args.compact.join(" ")
82
+ end
83
+
84
+ def check_param(v)
85
+ raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match(v)
86
+ end
87
+
88
+ def predict_example(example)
89
+ if @params[:cb]
90
+ FFI.VW_PredictCostSensitive(handle, example)
91
+ else
92
+ FFI.VW_Predict(handle, example)
93
+ end
94
+ end
95
+
96
+ # get both in one pass for efficiency
97
+ def predict_for_score(x, y)
98
+ if x.is_a?(String) && !y
99
+ y_pred = []
100
+ y = []
101
+ each_example(x) do |example|
102
+ y_pred << predict_example(example)
103
+ y << FFI.VW_GetLabel(example)
104
+ end
105
+ [y_pred, y]
106
+ else
107
+ [predict(x), y]
108
+ end
109
+ end
110
+
111
+ # TODO support compressed files
112
+ def each_example(x, y = nil)
113
+ each_line(x, y) do |line|
114
+ example = FFI.VW_ReadExampleA(handle, line)
115
+ yield example
116
+ FFI.VW_FinishExample(handle, example)
117
+ end
118
+ end
119
+
120
+ def each_line(x, y)
121
+ if x.is_a?(String)
122
+ raise ArgumentError, "Cannot pass y with file" if y
123
+
124
+ File.foreach(x) do |line|
125
+ yield line
126
+ end
127
+ else
128
+ raise ArgumentError, "x and y must have same size" if y && x.size != y.size
129
+
130
+ x.zip(y || []) do |xi, yi|
131
+ if xi.is_a?(String)
132
+ yield xi
133
+ else
134
+ yield "#{yi} 1 | #{xi.map.with_index { |v, i| "#{i}:#{v}" }.join(" ")}"
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,16 @@
1
+ module VowpalWabbit
2
+ class Regressor < Model
3
+ def score(x, y = nil)
4
+ y_pred, y = predict_for_score(x, y)
5
+
6
+ # r2
7
+ sse = y_pred.zip(y).map { |yp, yt| (yp - yt) ** 2 }.sum
8
+ sst = y.map { |yi| yi ** 2 }.sum - (y.sum ** 2) / y.size
9
+ 1 - sse / sst
10
+ end
11
+
12
+ def intercept
13
+ FFI.VW_Get_Weight(handle, 116060, 0)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ module VowpalWabbit
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,107 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: vowpalwabbit
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Kane
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-10-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ffi
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '5'
69
+ description:
70
+ email: andrew@chartkick.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - CHANGELOG.md
76
+ - LICENSE.txt
77
+ - README.md
78
+ - lib/vowpalwabbit.rb
79
+ - lib/vowpalwabbit/classifier.rb
80
+ - lib/vowpalwabbit/ffi.rb
81
+ - lib/vowpalwabbit/model.rb
82
+ - lib/vowpalwabbit/regressor.rb
83
+ - lib/vowpalwabbit/version.rb
84
+ homepage: https://github.com/ankane/vowpalwabbit
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '2.4'
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubygems_version: 3.0.3
104
+ signing_key:
105
+ specification_version: 4
106
+ summary: Vowpal Wabbit - fast online machine learning - for Ruby
107
+ test_files: []