vowpalwabbit 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: fd6a68e5d6103be4f6f58bd3fb6cb9785bd47382d7787b8d0ebc2c0336d87af3
4
+ data.tar.gz: 2f290dfdfe0539b4daf3e73dfbb39d04fa97e2f5c9aec08d43b0ee81db1d49b2
5
+ SHA512:
6
+ metadata.gz: 7e63904cf470666bbec00e9096e1701c1824af4b7790aa9d137a8a779eefb6d6833aadf4043f7d941e2f8395dfda532349bada33ffaa5cfa8461a593b9e80088
7
+ data.tar.gz: 0d6b47d922da2cfefb525f59e718e6f3be4c7e99033647f0f8814b9efde0095399b1b87c5922143cefeb8f0b7207ca4811b8c1c29505ea7b5af8cdc5c9910acc
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ ## 0.1.0
2
+
3
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2019 Andrew Kane
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,125 @@
1
+ # Vowpal Wabbit
2
+
3
+ [Vowpal Wabbit](https://vowpalwabbit.org) - fast online machine learning - for Ruby
4
+
5
+ :fire: Uses the C API for blazing performance
6
+
7
+ ## Installation
8
+
9
+ First, [install Vowpal Wabbit](https://vowpalwabbit.org/start.html). For Homebrew, use:
10
+
11
+ ```sh
12
+ brew install vowpal-wabbit
13
+ ```
14
+
15
+ Add this line to your application’s Gemfile:
16
+
17
+ ```ruby
18
+ gem 'vowpalwabbit'
19
+ ```
20
+
21
+ ## Getting Started
22
+
23
+ Prep your data
24
+
25
+ ```ruby
26
+ x = [[1, 2], [3, 4], [5, 6], [7, 8]]
27
+ y = [1, 2, 3, 4]
28
+ ```
29
+
30
+ Train a model
31
+
32
+ ```ruby
33
+ model = VowpalWabbit::Regressor.new(l: 100)
34
+ model.fit(x, y)
35
+ ```
36
+
37
+ Use `VowpalWabbit::Classifier` for classification and `VowpalWabbit::Model` for other models
38
+
39
+ Make predictions
40
+
41
+ ```ruby
42
+ model.predict(x)
43
+ ```
44
+
45
+ Save the model to a file
46
+
47
+ ```ruby
48
+ model.save_model("model.bin")
49
+ ```
50
+
51
+ Load the model from a file
52
+
53
+ ```ruby
54
+ model.load_model("model.bin")
55
+ ```
56
+
57
+ Train online
58
+
59
+ ```ruby
60
+ model.partial_fit(x, y)
61
+ ```
62
+
63
+ Get the intercept and coefficients
64
+
65
+ ```ruby
66
+ model.intercept
67
+ model.coefs
68
+ ```
69
+
70
+ Score - R-squared for regression and accuracy for classification
71
+
72
+ ```ruby
73
+ model.score(x, y)
74
+ ```
75
+
76
+ ## Parameters
77
+
78
+ Specify parameters
79
+
80
+ ```ruby
81
+ model = VowpalWabbit::Model.new(cb: 4)
82
+ ```
83
+
84
+ Supports the same parameters as the [CLI](https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Command-Line-Arguments)
85
+
86
+ ## Data
87
+
88
+ Data can be an array of arrays
89
+
90
+ ```ruby
91
+ [[1, 2, 3], [4, 5, 6]]
92
+ ```
93
+
94
+ Or an array of strings
95
+
96
+ ```ruby
97
+ [
98
+ "0 | price:.23 sqft:.25 age:.05 2006",
99
+ "1 2 'second_house | price:.18 sqft:.15 age:.35 1976",
100
+ "0 1 0.5 'third_house | price:.53 sqft:.32 age:.87 1924"
101
+ ]
102
+ ```
103
+
104
+ Or a path to a file
105
+
106
+ ```ruby
107
+ model.fit("train.txt")
108
+ model.predict("train.txt")
109
+ model.score("train.txt")
110
+ ```
111
+
112
+ Read more about the [input format](https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Input-format)
113
+
114
+ ## History
115
+
116
+ View the [changelog](https://github.com/ankane/vowpalwabbit/blob/master/CHANGELOG.md)
117
+
118
+ ## Contributing
119
+
120
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
121
+
122
+ - [Report bugs](https://github.com/ankane/vowpalwabbit/issues)
123
+ - Fix bugs and [submit pull requests](https://github.com/ankane/vowpalwabbit/pulls)
124
+ - Write, clarify, or fix documentation
125
+ - Suggest or add new features
@@ -0,0 +1,20 @@
1
+ # dependencies
2
+ require "ffi"
3
+
4
+ # modules
5
+ require "vowpalwabbit/model"
6
+ require "vowpalwabbit/classifier"
7
+ require "vowpalwabbit/regressor"
8
+ require "vowpalwabbit/version"
9
+
10
+ module VowpalWabbit
11
+ class Error < StandardError; end
12
+
13
+ class << self
14
+ attr_accessor :ffi_lib
15
+ end
16
+ self.ffi_lib = ["vw_c_wrapper"]
17
+
18
+ # friendlier error message
19
+ autoload :FFI, "vowpalwabbit/ffi"
20
+ end
@@ -0,0 +1,18 @@
1
+ module VowpalWabbit
2
+ class Classifier < Model
3
+ def initialize(**params)
4
+ super({loss_function: "logistic"}.merge(params))
5
+ end
6
+
7
+ def predict(x)
8
+ predictions = super
9
+ predictions.map { |v| v >= 0 ? 1 : -1 }
10
+ end
11
+
12
+ def score(x, y = nil)
13
+ y_pred, y = predict_for_score(x, y)
14
+ y_pred.map! { |v| v >= 0 ? 1 : -1 }
15
+ y_pred.zip(y).select { |yp, yt| yp == yt }.count / y.count.to_f
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,41 @@
1
+ module VowpalWabbit
2
+ module FFI
3
+ extend ::FFI::Library
4
+
5
+ begin
6
+ ffi_lib VowpalWabbit.ffi_lib
7
+ rescue LoadError => e
8
+ raise e if ENV["VOWPALWABBIT_DEBUG"]
9
+ raise LoadError, "Could not find Vowpal Wabbit"
10
+ end
11
+
12
+ # https://github.com/VowpalWabbit/vowpal_wabbit/blob/master/vowpalwabbit/vwdll.h
13
+ # keep same order
14
+
15
+ attach_function :VW_InitializeA, %i[string], :pointer
16
+ attach_function :VW_InitializeWithModel, %i[string pointer size_t], :pointer
17
+ attach_function :VW_SeedWithModel, %i[pointer string], :pointer
18
+ attach_function :VW_Finish_Passes, %i[pointer], :void
19
+ attach_function :VW_Finish, %i[pointer], :void
20
+ attach_function :VW_ImportExample, %i[pointer string pointer size_t], :pointer
21
+ attach_function :VW_ReadExampleA, %i[pointer string], :pointer
22
+ attach_function :VW_StartParser, %i[pointer], :void
23
+ attach_function :VW_EndParser, %i[pointer], :void
24
+ attach_function :VW_GetExample, %i[pointer], :pointer
25
+ attach_function :VW_FinishExample, %i[pointer pointer], :void
26
+ attach_function :VW_GetLabel, %i[pointer], :float
27
+ attach_function :VW_GetFeatureNumber, %i[pointer], :size_t
28
+ attach_function :VW_GetFeatures, %i[pointer pointer pointer], :pointer
29
+ attach_function :VW_HashSpaceA, %i[pointer string], :size_t
30
+ attach_function :VW_Learn, %i[pointer pointer], :float
31
+ attach_function :VW_Predict, %i[pointer pointer], :float
32
+ attach_function :VW_PredictCostSensitive, %i[pointer pointer], :float
33
+ attach_function :VW_Get_Weight, %i[pointer size_t size_t], :float
34
+ attach_function :VW_Set_Weight, %i[pointer size_t size_t float], :void
35
+ attach_function :VW_Num_Weights, %i[pointer], :size_t
36
+ attach_function :VW_Get_Stride, %i[pointer], :size_t
37
+ attach_function :VW_SaveModel, %i[pointer], :void
38
+ attach_function :VW_CopyModelData, %i[pointer pointer pointer pointer], :void
39
+ attach_function :VW_FreeIOBuf, %i[pointer], :void
40
+ end
41
+ end
@@ -0,0 +1,140 @@
1
+ module VowpalWabbit
2
+ class Model
3
+ def initialize(**params)
4
+ # add strict parse once exceptions are handled properly
5
+ # https://github.com/VowpalWabbit/vowpal_wabbit/issues/2004
6
+ @params = {quiet: true}.merge(params)
7
+ end
8
+
9
+ def fit(x, y = nil)
10
+ @handle = nil
11
+ partial_fit(x, y)
12
+ end
13
+
14
+ def partial_fit(x, y = nil)
15
+ each_example(x, y) do |example|
16
+ FFI.VW_Learn(handle, example)
17
+ end
18
+ nil
19
+ end
20
+
21
+ def predict(x)
22
+ out = []
23
+ each_example(x) do |example|
24
+ out << predict_example(example)
25
+ end
26
+ out
27
+ end
28
+
29
+ def coefs
30
+ num_weights = FFI.VW_Num_Weights(handle)
31
+ coefs = {}
32
+ num_weights.times.map do |i|
33
+ weight = FFI.VW_Get_Weight(handle, i, 0)
34
+ coefs[i] = weight if weight != 0
35
+ end
36
+ coefs
37
+ end
38
+
39
+ def save_model(filename)
40
+ buffer_handle = ::FFI::MemoryPointer.new(:pointer)
41
+ output_data = ::FFI::MemoryPointer.new(:pointer)
42
+ output_size = ::FFI::MemoryPointer.new(:size_t)
43
+ FFI.VW_CopyModelData(handle, buffer_handle, output_data, output_size)
44
+ bin_str = output_data.read_pointer.read_string(output_size.read(:size_t))
45
+ FFI.VW_FreeIOBuf(buffer_handle.read_pointer)
46
+ File.binwrite(filename, bin_str)
47
+ nil
48
+ end
49
+
50
+ def load_model(filename)
51
+ bin_str = File.binread(filename)
52
+ model_data = ::FFI::MemoryPointer.new(:char, bin_str.bytesize)
53
+ model_data.put_bytes(0, bin_str)
54
+ @handle = FFI.VW_InitializeWithModel(param_str, model_data, bin_str.bytesize)
55
+ nil
56
+ end
57
+
58
+ private
59
+
60
+ # TODO clean-up handle
61
+ def handle
62
+ @handle ||= FFI.VW_InitializeA(param_str)
63
+ end
64
+
65
+ def param_str
66
+ args =
67
+ @params.map do |k, v|
68
+ check_param(k.to_s)
69
+ check_param(v.to_s)
70
+
71
+ if v == true
72
+ "--#{k}"
73
+ elsif !v
74
+ nil
75
+ elsif k.size == 1
76
+ "-#{k} #{v}"
77
+ else
78
+ "--#{k} #{v}"
79
+ end
80
+ end
81
+ args.compact.join(" ")
82
+ end
83
+
84
+ def check_param(v)
85
+ raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match(v)
86
+ end
87
+
88
+ def predict_example(example)
89
+ if @params[:cb]
90
+ FFI.VW_PredictCostSensitive(handle, example)
91
+ else
92
+ FFI.VW_Predict(handle, example)
93
+ end
94
+ end
95
+
96
+ # get both in one pass for efficiency
97
+ def predict_for_score(x, y)
98
+ if x.is_a?(String) && !y
99
+ y_pred = []
100
+ y = []
101
+ each_example(x) do |example|
102
+ y_pred << predict_example(example)
103
+ y << FFI.VW_GetLabel(example)
104
+ end
105
+ [y_pred, y]
106
+ else
107
+ [predict(x), y]
108
+ end
109
+ end
110
+
111
+ # TODO support compressed files
112
+ def each_example(x, y = nil)
113
+ each_line(x, y) do |line|
114
+ example = FFI.VW_ReadExampleA(handle, line)
115
+ yield example
116
+ FFI.VW_FinishExample(handle, example)
117
+ end
118
+ end
119
+
120
+ def each_line(x, y)
121
+ if x.is_a?(String)
122
+ raise ArgumentError, "Cannot pass y with file" if y
123
+
124
+ File.foreach(x) do |line|
125
+ yield line
126
+ end
127
+ else
128
+ raise ArgumentError, "x and y must have same size" if y && x.size != y.size
129
+
130
+ x.zip(y || []) do |xi, yi|
131
+ if xi.is_a?(String)
132
+ yield xi
133
+ else
134
+ yield "#{yi} 1 | #{xi.map.with_index { |v, i| "#{i}:#{v}" }.join(" ")}"
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,16 @@
1
+ module VowpalWabbit
2
+ class Regressor < Model
3
+ def score(x, y = nil)
4
+ y_pred, y = predict_for_score(x, y)
5
+
6
+ # r2
7
+ sse = y_pred.zip(y).map { |yp, yt| (yp - yt) ** 2 }.sum
8
+ sst = y.map { |yi| yi ** 2 }.sum - (y.sum ** 2) / y.size
9
+ 1 - sse / sst
10
+ end
11
+
12
+ def intercept
13
+ FFI.VW_Get_Weight(handle, 116060, 0)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ module VowpalWabbit
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,107 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: vowpalwabbit
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Kane
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-10-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ffi
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '5'
69
+ description:
70
+ email: andrew@chartkick.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - CHANGELOG.md
76
+ - LICENSE.txt
77
+ - README.md
78
+ - lib/vowpalwabbit.rb
79
+ - lib/vowpalwabbit/classifier.rb
80
+ - lib/vowpalwabbit/ffi.rb
81
+ - lib/vowpalwabbit/model.rb
82
+ - lib/vowpalwabbit/regressor.rb
83
+ - lib/vowpalwabbit/version.rb
84
+ homepage: https://github.com/ankane/vowpalwabbit
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '2.4'
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubygems_version: 3.0.3
104
+ signing_key:
105
+ specification_version: 4
106
+ summary: Vowpal Wabbit - fast online machine learning - for Ruby
107
+ test_files: []