rubyml 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3be8f5b08589ad4973b15a9c83bc11d6522504c5
4
+ data.tar.gz: 9f1a58d71e2932417c07634b4dd4f74a2ce3dc1c
5
+ SHA512:
6
+ metadata.gz: 5b9e3d4b883391569c5006283d426fa5282c7f19b66095734186c6ddbd9ac050a17339579f213c15db9b49d461d8984fee55c1b41756662456b29a3b218c2237
7
+ data.tar.gz: 28e7a2562e961ad518e6e15b6eece272dc3d156b1dff4cf833bf535de3dc7b9ac78c8af2311632376f58a7d1c780b0aa02226151438eb04a2506b287da12299c
data/lib/rubyml.rb ADDED
@@ -0,0 +1,85 @@
1
+ require 'rubyml/tools'
2
+ require 'rubyml/linear_regression'
3
+ require 'rubyml/perceptron'
4
+
5
+ # Monkey patching the Matrix class to
6
+ # implement matrix splicing.
7
+ class Matrix
8
+ alias old_element element
9
+ def [](i, j)
10
+ if i.class == Fixnum && j == ':'
11
+ row(i)
12
+ elsif j.class == Fixnum && i == ':'
13
+ column(j)
14
+ else
15
+ redirect(i, j)
16
+ end
17
+ end
18
+
19
+ def redirect(i, j)
20
+ if i.class == String && j.class == String
21
+ redirect2(i, j)
22
+ else
23
+ old_element(i, j)
24
+ end
25
+ end
26
+
27
+ def redirect2(i, j)
28
+ if i.include?(':') || j.include?(':')
29
+ redirect3(i, j)
30
+ else
31
+ old_element(i, j)
32
+ end
33
+ end
34
+
35
+ def redirect3(i, j)
36
+ rs, re = i.split(':').map { |e| e == '' ? 0 : Integer(e) }
37
+ cs, ce = j.split(':').map { |e| e == '' ? 0 : Integer(e) }
38
+ redirect4(rs, re, cs, ce, [i, j])
39
+ end
40
+
41
+ def redirect4(rs, re, cs, ce, orig)
42
+ if orig[0] == ':'
43
+ rs = 0
44
+ re = row_count
45
+ end
46
+ if orig[1] == ':'
47
+ cs = 0
48
+ ce = column_count
49
+ end
50
+ redirect5(rs, re, cs, ce, orig)
51
+ end
52
+
53
+ def redirect5(rs, re, cs, ce, orig)
54
+ re = rs + 1 unless orig[0].include?(':')
55
+ ce = cs + 1 unless orig[1].include?(':')
56
+ redirect6(rs, re, cs, ce)
57
+ end
58
+
59
+ def redirect6(rs, re, cs, ce)
60
+ rs = rs.nil? ? 0 : rs
61
+ cs = cs.nil? ? 0 : cs
62
+ re = re.nil? ? row_count : re
63
+ ce = ce.nil? ? column_count : ce
64
+ redirect7(rs, re, cs, ce)
65
+ end
66
+
67
+ def redirect7(rs, re, cs, ce)
68
+ return Matrix.rows([]) if rs >= re && cs >= ce
69
+ return Matrix.rows([[]] * (re - rs)) if cs == ce
70
+ return Matrix.columns([[]] * (ce - cs)) if re == rs
71
+ redirect8(rs, re, cs, ce)
72
+ end
73
+
74
+ def redirect8(rs, re, cs, ce)
75
+ rv = row_vectors[rs..re - 1].map(&:to_a)
76
+ nrv = rv.map { |e| e[cs..ce - 1] }
77
+ Matrix.rows(nrv)
78
+ end
79
+ end
80
+
81
+ # A general class to allow access to
82
+ # data manipulation tools.
83
+ class RubyML
84
+ include Tools::DataMethods
85
+ end
@@ -0,0 +1,34 @@
1
+ require 'rubyml/tools'
2
+
3
+ # The linear regression class with
4
+ # customizable number of folds for
5
+ # K-fold cross validation.
6
+ class LinearRegression
7
+ include Tools::DataMethods
8
+ include Tools::ClassifierMethods
9
+ attr_reader :theta, :accuracy, :precision, :folds
10
+
11
+ def initialize(precision = 3, folds = 5)
12
+ @precision = precision
13
+ @epsilon = 2.0
14
+ @folds = folds
15
+ end
16
+
17
+ def fit(x, y)
18
+ x_mat = bias_trick(x)
19
+ @theta = ((x_mat.t * x_mat).inv * x_mat.t) * y
20
+ @theta = @theta.collect { |e| e.round(@precision) }
21
+ end
22
+
23
+ def predict(x)
24
+ x_mat = bias_trick(x)
25
+ (x_mat * @theta).collect { |e| e.round(@precision) }
26
+ end
27
+
28
+ def visualize(x, y)
29
+ x = mat_to_array(x)
30
+ y = mat_to_array(y)
31
+ theta = mat_to_array(@theta)
32
+ plot_function(x, y, theta)
33
+ end
34
+ end
@@ -0,0 +1,64 @@
1
+ require 'rubyml/tools'
2
+
3
+ # The multiclass perceptron class with
4
+ # customizable number of iterations and folds.
5
+ class Perceptron
6
+ include Tools::DataMethods
7
+ include Tools::ClassifierMethods
8
+
9
+ attr_reader :iterations, :folds, :labels, :weights
10
+
11
+ def initialize(iterations = 100, folds = 5)
12
+ @iterations = iterations
13
+ @epsilon = nil
14
+ @folds = folds
15
+ @labels = []
16
+ @weights = {}
17
+ end
18
+
19
+ def setup_weights(y)
20
+ @labels = mat_to_array(y).uniq { |e| e }
21
+ @labels.each { |lbl| @weights[lbl] = Hash.new(0) }
22
+ end
23
+
24
+ def update_weights(guess, real, c, w)
25
+ @weights[guess][c] -= w
26
+ @weights[real][c] += w
27
+ end
28
+
29
+ def fit(x, y, cs = true)
30
+ cold_start if cs
31
+ setup_weights(y)
32
+ @iterations.times do
33
+ x.row_count.times do |r|
34
+ clbl = get_best_guess(x, r)
35
+ next unless y[r, 0] != clbl
36
+ x.column_count.times { |c| update_weights(clbl, y[r, 0], c, x[r, c]) }
37
+ end
38
+ end
39
+ end
40
+
41
+ def predict(x)
42
+ preds = []
43
+ x.row_count.times { |r| preds << get_best_guess(x, r) }
44
+ Matrix.columns([preds])
45
+ end
46
+
47
+ def get_best_guess(x, r)
48
+ clbl, cmax = nil
49
+ @labels.each do |lbl|
50
+ csum = 0.0
51
+ x.column_count.times { |c| csum += @weights[lbl][c] * x[r, c] }
52
+ if cmax.nil? || cmax <= csum
53
+ cmax = csum
54
+ clbl = lbl
55
+ end
56
+ end
57
+ clbl
58
+ end
59
+
60
+ def cold_start
61
+ @labels = []
62
+ @weights = {}
63
+ end
64
+ end
@@ -0,0 +1,104 @@
1
+ require 'matrix'
2
+ require 'gruff'
3
+
4
+ module Tools
5
+ # Methods for loading and manipulating data.
6
+ module DataMethods
7
+ def load_data(file, text = false)
8
+ mat = []
9
+ File.foreach(file) do |f|
10
+ mat << f.split(',').map { |i| text ? String(i).chomp : Float(i) }
11
+ end
12
+ Matrix.rows(mat)
13
+ end
14
+
15
+ def separate_data(data)
16
+ col_vec = data.column_vectors
17
+ y = Matrix.columns([col_vec.pop])
18
+ x = Matrix.columns(col_vec).collect { |e| Float(e) }
19
+ [x, y]
20
+ end
21
+
22
+ def mat_to_array(data)
23
+ arr = []
24
+ data.each { |e| arr << e }
25
+ arr
26
+ end
27
+
28
+ def bias_trick(x)
29
+ ones = Matrix.columns([[1] * x.row_count])
30
+ x_bias = ones.hstack(x)
31
+ x_bias
32
+ end
33
+
34
+ def plot_function(px, py, theta)
35
+ fx = []
36
+ fy = []
37
+ 1000.times do |i|
38
+ fx << (px[0] + (px[-1] - px[0]) * Float(i) / 1000.0)
39
+ fy << (fx[-1] * theta[1] + theta[0])
40
+ end
41
+ plot(fx, fy, px, py)
42
+ end
43
+
44
+ def plot(fx, fy, px, py)
45
+ g = Gruff::Scatter.new(800)
46
+ g.data(:data, px, py)
47
+ g.data(:fit, fx, fy)
48
+ g.write('scatter.png')
49
+ end
50
+ end
51
+
52
+ # Methods to test classifier accuracy via
53
+ # K-fold cross validation.
54
+ module ClassifierMethods
55
+ def generate_folds(x, y, num, folds)
56
+ sin = String(num * (x.row_count / folds))
57
+ ein = String([(num + 1) * (x.row_count / folds), x.row_count].min)
58
+ train = generate_train_set(x, y, sin, ein)
59
+ test = generate_test_set(x, y, sin, ein)
60
+ train + test
61
+ end
62
+
63
+ def generate_train_set(x, y, sin, ein)
64
+ xtrain = x[':' + sin, ':'].vstack(x[ein + ':', ':'])
65
+ ytrain = y[':' + sin, ':'].vstack(y[ein + ':', ':'])
66
+ [xtrain, ytrain]
67
+ end
68
+
69
+ def generate_test_set(x, y, sin, ein)
70
+ xtest = x[sin + ':' + ein, ':']
71
+ ytest = y[sin + ':' + ein, ':']
72
+ [xtest, ytest]
73
+ end
74
+
75
+ def handle_epsilon(ypred, ytest, r)
76
+ if @epsilon
77
+ ((ypred[r, 0] - ytest[r, 0]).abs < @epsilon ? 1.0 : 0.0)
78
+ else
79
+ (ypred[r, 0] == ytest[r, 0] ? 1.0 : 0.0)
80
+ end
81
+ end
82
+
83
+ def correct_count(ypred, ytest, c, t, n)
84
+ count = 0.0
85
+ ypred.row_count.times do |r|
86
+ count += handle_epsilon(ypred, ytest, r)
87
+ end
88
+ p "Fold #{n} Accuracy: #{(count / ypred.row_count * 100.0).round(3)}%"
89
+ [c + count, t + ypred.row_count]
90
+ end
91
+
92
+ def training_accuracy(x, y)
93
+ correct = 0.0
94
+ total = 0.0
95
+ @folds.times do |n|
96
+ xtrain, ytrain, xtest, ytest = generate_folds(x, y, n, @folds)
97
+ fit(xtrain, ytrain)
98
+ ypred = predict(xtest)
99
+ correct, total = correct_count(ypred, ytest, correct, total, n)
100
+ end
101
+ (correct / total).round(5)
102
+ end
103
+ end
104
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rubyml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Palimar Rao
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-04-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: gruff
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.6.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.6.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: coveralls
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.8.13
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.13
41
+ description: |-
42
+ This is a simple machine learning library
43
+ written in Ruby. It provides implementations of linear regression
44
+ and multiclass perceptron and visualization and validation methods
45
+ to verify results. Also included are helper methods to work with
46
+ training and testing data.
47
+ email:
48
+ executables: []
49
+ extensions: []
50
+ extra_rdoc_files: []
51
+ files:
52
+ - lib/rubyml.rb
53
+ - lib/rubyml/linear_regression.rb
54
+ - lib/rubyml/perceptron.rb
55
+ - lib/rubyml/tools.rb
56
+ homepage:
57
+ licenses:
58
+ - MIT
59
+ metadata: {}
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ requirements: []
75
+ rubyforge_project:
76
+ rubygems_version: 2.4.8
77
+ signing_key:
78
+ specification_version: 4
79
+ summary: A simple Ruby machine learning library.
80
+ test_files: []