rubyml 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/rubyml.rb +85 -0
- data/lib/rubyml/linear_regression.rb +34 -0
- data/lib/rubyml/perceptron.rb +64 -0
- data/lib/rubyml/tools.rb +104 -0
- metadata +80 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3be8f5b08589ad4973b15a9c83bc11d6522504c5
|
4
|
+
data.tar.gz: 9f1a58d71e2932417c07634b4dd4f74a2ce3dc1c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5b9e3d4b883391569c5006283d426fa5282c7f19b66095734186c6ddbd9ac050a17339579f213c15db9b49d461d8984fee55c1b41756662456b29a3b218c2237
|
7
|
+
data.tar.gz: 28e7a2562e961ad518e6e15b6eece272dc3d156b1dff4cf833bf535de3dc7b9ac78c8af2311632376f58a7d1c780b0aa02226151438eb04a2506b287da12299c
|
data/lib/rubyml.rb
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'rubyml/tools'
|
2
|
+
require 'rubyml/linear_regression'
|
3
|
+
require 'rubyml/perceptron'
|
4
|
+
|
5
|
+
# Monkey patching the Matrix class to
|
6
|
+
# implement matrix splicing.
|
7
|
+
class Matrix
|
8
|
+
alias old_element element
|
9
|
+
def [](i, j)
|
10
|
+
if i.class == Fixnum && j == ':'
|
11
|
+
row(i)
|
12
|
+
elsif j.class == Fixnum && i == ':'
|
13
|
+
column(j)
|
14
|
+
else
|
15
|
+
redirect(i, j)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def redirect(i, j)
|
20
|
+
if i.class == String && j.class == String
|
21
|
+
redirect2(i, j)
|
22
|
+
else
|
23
|
+
old_element(i, j)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def redirect2(i, j)
|
28
|
+
if i.include?(':') || j.include?(':')
|
29
|
+
redirect3(i, j)
|
30
|
+
else
|
31
|
+
old_element(i, j)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def redirect3(i, j)
|
36
|
+
rs, re = i.split(':').map { |e| e == '' ? 0 : Integer(e) }
|
37
|
+
cs, ce = j.split(':').map { |e| e == '' ? 0 : Integer(e) }
|
38
|
+
redirect4(rs, re, cs, ce, [i, j])
|
39
|
+
end
|
40
|
+
|
41
|
+
def redirect4(rs, re, cs, ce, orig)
|
42
|
+
if orig[0] == ':'
|
43
|
+
rs = 0
|
44
|
+
re = row_count
|
45
|
+
end
|
46
|
+
if orig[1] == ':'
|
47
|
+
cs = 0
|
48
|
+
ce = column_count
|
49
|
+
end
|
50
|
+
redirect5(rs, re, cs, ce, orig)
|
51
|
+
end
|
52
|
+
|
53
|
+
def redirect5(rs, re, cs, ce, orig)
|
54
|
+
re = rs + 1 unless orig[0].include?(':')
|
55
|
+
ce = cs + 1 unless orig[1].include?(':')
|
56
|
+
redirect6(rs, re, cs, ce)
|
57
|
+
end
|
58
|
+
|
59
|
+
def redirect6(rs, re, cs, ce)
|
60
|
+
rs = rs.nil? ? 0 : rs
|
61
|
+
cs = cs.nil? ? 0 : cs
|
62
|
+
re = re.nil? ? row_count : re
|
63
|
+
ce = ce.nil? ? column_count : ce
|
64
|
+
redirect7(rs, re, cs, ce)
|
65
|
+
end
|
66
|
+
|
67
|
+
def redirect7(rs, re, cs, ce)
|
68
|
+
return Matrix.rows([]) if rs >= re && cs >= ce
|
69
|
+
return Matrix.rows([[]] * (re - rs)) if cs == ce
|
70
|
+
return Matrix.columns([[]] * (ce - cs)) if re == rs
|
71
|
+
redirect8(rs, re, cs, ce)
|
72
|
+
end
|
73
|
+
|
74
|
+
def redirect8(rs, re, cs, ce)
|
75
|
+
rv = row_vectors[rs..re - 1].map(&:to_a)
|
76
|
+
nrv = rv.map { |e| e[cs..ce - 1] }
|
77
|
+
Matrix.rows(nrv)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# A general class to allow access to
|
82
|
+
# data manipulation tools.
|
83
|
+
class RubyML
|
84
|
+
include Tools::DataMethods
|
85
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'rubyml/tools'
|
2
|
+
|
3
|
+
# The linear regression class with
|
4
|
+
# customizable number of folds for
|
5
|
+
# K-fold cross validation.
|
6
|
+
class LinearRegression
|
7
|
+
include Tools::DataMethods
|
8
|
+
include Tools::ClassifierMethods
|
9
|
+
attr_reader :theta, :accuracy, :precision, :folds
|
10
|
+
|
11
|
+
def initialize(precision = 3, folds = 5)
|
12
|
+
@precision = precision
|
13
|
+
@epsilon = 2.0
|
14
|
+
@folds = folds
|
15
|
+
end
|
16
|
+
|
17
|
+
def fit(x, y)
|
18
|
+
x_mat = bias_trick(x)
|
19
|
+
@theta = ((x_mat.t * x_mat).inv * x_mat.t) * y
|
20
|
+
@theta = @theta.collect { |e| e.round(@precision) }
|
21
|
+
end
|
22
|
+
|
23
|
+
def predict(x)
|
24
|
+
x_mat = bias_trick(x)
|
25
|
+
(x_mat * @theta).collect { |e| e.round(@precision) }
|
26
|
+
end
|
27
|
+
|
28
|
+
def visualize(x, y)
|
29
|
+
x = mat_to_array(x)
|
30
|
+
y = mat_to_array(y)
|
31
|
+
theta = mat_to_array(@theta)
|
32
|
+
plot_function(x, y, theta)
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'rubyml/tools'
|
2
|
+
|
3
|
+
# The multiclass perceptron class with
|
4
|
+
# customizable number of iterations and folds.
|
5
|
+
class Perceptron
|
6
|
+
include Tools::DataMethods
|
7
|
+
include Tools::ClassifierMethods
|
8
|
+
|
9
|
+
attr_reader :iterations, :folds, :labels, :weights
|
10
|
+
|
11
|
+
def initialize(iterations = 100, folds = 5)
|
12
|
+
@iterations = iterations
|
13
|
+
@epsilon = nil
|
14
|
+
@folds = folds
|
15
|
+
@labels = []
|
16
|
+
@weights = {}
|
17
|
+
end
|
18
|
+
|
19
|
+
def setup_weights(y)
|
20
|
+
@labels = mat_to_array(y).uniq { |e| e }
|
21
|
+
@labels.each { |lbl| @weights[lbl] = Hash.new(0) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def update_weights(guess, real, c, w)
|
25
|
+
@weights[guess][c] -= w
|
26
|
+
@weights[real][c] += w
|
27
|
+
end
|
28
|
+
|
29
|
+
def fit(x, y, cs = true)
|
30
|
+
cold_start if cs
|
31
|
+
setup_weights(y)
|
32
|
+
@iterations.times do
|
33
|
+
x.row_count.times do |r|
|
34
|
+
clbl = get_best_guess(x, r)
|
35
|
+
next unless y[r, 0] != clbl
|
36
|
+
x.column_count.times { |c| update_weights(clbl, y[r, 0], c, x[r, c]) }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def predict(x)
|
42
|
+
preds = []
|
43
|
+
x.row_count.times { |r| preds << get_best_guess(x, r) }
|
44
|
+
Matrix.columns([preds])
|
45
|
+
end
|
46
|
+
|
47
|
+
def get_best_guess(x, r)
|
48
|
+
clbl, cmax = nil
|
49
|
+
@labels.each do |lbl|
|
50
|
+
csum = 0.0
|
51
|
+
x.column_count.times { |c| csum += @weights[lbl][c] * x[r, c] }
|
52
|
+
if cmax.nil? || cmax <= csum
|
53
|
+
cmax = csum
|
54
|
+
clbl = lbl
|
55
|
+
end
|
56
|
+
end
|
57
|
+
clbl
|
58
|
+
end
|
59
|
+
|
60
|
+
def cold_start
|
61
|
+
@labels = []
|
62
|
+
@weights = {}
|
63
|
+
end
|
64
|
+
end
|
data/lib/rubyml/tools.rb
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'matrix'
|
2
|
+
require 'gruff'
|
3
|
+
|
4
|
+
module Tools
|
5
|
+
# Methods for loading and manipulating data.
|
6
|
+
module DataMethods
|
7
|
+
def load_data(file, text = false)
|
8
|
+
mat = []
|
9
|
+
File.foreach(file) do |f|
|
10
|
+
mat << f.split(',').map { |i| text ? String(i).chomp : Float(i) }
|
11
|
+
end
|
12
|
+
Matrix.rows(mat)
|
13
|
+
end
|
14
|
+
|
15
|
+
def separate_data(data)
|
16
|
+
col_vec = data.column_vectors
|
17
|
+
y = Matrix.columns([col_vec.pop])
|
18
|
+
x = Matrix.columns(col_vec).collect { |e| Float(e) }
|
19
|
+
[x, y]
|
20
|
+
end
|
21
|
+
|
22
|
+
def mat_to_array(data)
|
23
|
+
arr = []
|
24
|
+
data.each { |e| arr << e }
|
25
|
+
arr
|
26
|
+
end
|
27
|
+
|
28
|
+
def bias_trick(x)
|
29
|
+
ones = Matrix.columns([[1] * x.row_count])
|
30
|
+
x_bias = ones.hstack(x)
|
31
|
+
x_bias
|
32
|
+
end
|
33
|
+
|
34
|
+
def plot_function(px, py, theta)
|
35
|
+
fx = []
|
36
|
+
fy = []
|
37
|
+
1000.times do |i|
|
38
|
+
fx << (px[0] + (px[-1] - px[0]) * Float(i) / 1000.0)
|
39
|
+
fy << (fx[-1] * theta[1] + theta[0])
|
40
|
+
end
|
41
|
+
plot(fx, fy, px, py)
|
42
|
+
end
|
43
|
+
|
44
|
+
def plot(fx, fy, px, py)
|
45
|
+
g = Gruff::Scatter.new(800)
|
46
|
+
g.data(:data, px, py)
|
47
|
+
g.data(:fit, fx, fy)
|
48
|
+
g.write('scatter.png')
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Methods to test classifier accuracy via
|
53
|
+
# K-fold cross validation.
|
54
|
+
module ClassifierMethods
|
55
|
+
def generate_folds(x, y, num, folds)
|
56
|
+
sin = String(num * (x.row_count / folds))
|
57
|
+
ein = String([(num + 1) * (x.row_count / folds), x.row_count].min)
|
58
|
+
train = generate_train_set(x, y, sin, ein)
|
59
|
+
test = generate_test_set(x, y, sin, ein)
|
60
|
+
train + test
|
61
|
+
end
|
62
|
+
|
63
|
+
def generate_train_set(x, y, sin, ein)
|
64
|
+
xtrain = x[':' + sin, ':'].vstack(x[ein + ':', ':'])
|
65
|
+
ytrain = y[':' + sin, ':'].vstack(y[ein + ':', ':'])
|
66
|
+
[xtrain, ytrain]
|
67
|
+
end
|
68
|
+
|
69
|
+
def generate_test_set(x, y, sin, ein)
|
70
|
+
xtest = x[sin + ':' + ein, ':']
|
71
|
+
ytest = y[sin + ':' + ein, ':']
|
72
|
+
[xtest, ytest]
|
73
|
+
end
|
74
|
+
|
75
|
+
def handle_epsilon(ypred, ytest, r)
|
76
|
+
if @epsilon
|
77
|
+
((ypred[r, 0] - ytest[r, 0]).abs < @epsilon ? 1.0 : 0.0)
|
78
|
+
else
|
79
|
+
(ypred[r, 0] == ytest[r, 0] ? 1.0 : 0.0)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def correct_count(ypred, ytest, c, t, n)
|
84
|
+
count = 0.0
|
85
|
+
ypred.row_count.times do |r|
|
86
|
+
count += handle_epsilon(ypred, ytest, r)
|
87
|
+
end
|
88
|
+
p "Fold #{n} Accuracy: #{(count / ypred.row_count * 100.0).round(3)}%"
|
89
|
+
[c + count, t + ypred.row_count]
|
90
|
+
end
|
91
|
+
|
92
|
+
def training_accuracy(x, y)
|
93
|
+
correct = 0.0
|
94
|
+
total = 0.0
|
95
|
+
@folds.times do |n|
|
96
|
+
xtrain, ytrain, xtest, ytest = generate_folds(x, y, n, @folds)
|
97
|
+
fit(xtrain, ytrain)
|
98
|
+
ypred = predict(xtest)
|
99
|
+
correct, total = correct_count(ypred, ytest, correct, total, n)
|
100
|
+
end
|
101
|
+
(correct / total).round(5)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rubyml
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Palimar Rao
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-04-29 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: gruff
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.6.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.6.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: coveralls
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.8.13
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.8.13
|
41
|
+
description: |-
|
42
|
+
This is a simple machine learning library
|
43
|
+
written in Ruby. It provides implementations of linear regression
|
44
|
+
and multiclass perceptron and visualization and validation methods
|
45
|
+
to verify results. Also included are helper methods to work with
|
46
|
+
training and testing data.
|
47
|
+
email:
|
48
|
+
executables: []
|
49
|
+
extensions: []
|
50
|
+
extra_rdoc_files: []
|
51
|
+
files:
|
52
|
+
- lib/rubyml.rb
|
53
|
+
- lib/rubyml/linear_regression.rb
|
54
|
+
- lib/rubyml/perceptron.rb
|
55
|
+
- lib/rubyml/tools.rb
|
56
|
+
homepage:
|
57
|
+
licenses:
|
58
|
+
- MIT
|
59
|
+
metadata: {}
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
require_paths:
|
63
|
+
- lib
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
requirements: []
|
75
|
+
rubyforge_project:
|
76
|
+
rubygems_version: 2.4.8
|
77
|
+
signing_key:
|
78
|
+
specification_version: 4
|
79
|
+
summary: A simple Ruby machine learning library.
|
80
|
+
test_files: []
|