rubyml 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3be8f5b08589ad4973b15a9c83bc11d6522504c5
4
+ data.tar.gz: 9f1a58d71e2932417c07634b4dd4f74a2ce3dc1c
5
+ SHA512:
6
+ metadata.gz: 5b9e3d4b883391569c5006283d426fa5282c7f19b66095734186c6ddbd9ac050a17339579f213c15db9b49d461d8984fee55c1b41756662456b29a3b218c2237
7
+ data.tar.gz: 28e7a2562e961ad518e6e15b6eece272dc3d156b1dff4cf833bf535de3dc7b9ac78c8af2311632376f58a7d1c780b0aa02226151438eb04a2506b287da12299c
data/lib/rubyml.rb ADDED
@@ -0,0 +1,85 @@
1
+ require 'rubyml/tools'
2
+ require 'rubyml/linear_regression'
3
+ require 'rubyml/perceptron'
4
+
5
+ # Monkey patching the Matrix class to
6
+ # implement matrix splicing.
7
+ class Matrix
8
+ alias old_element element
9
+ def [](i, j)
10
+ if i.class == Fixnum && j == ':'
11
+ row(i)
12
+ elsif j.class == Fixnum && i == ':'
13
+ column(j)
14
+ else
15
+ redirect(i, j)
16
+ end
17
+ end
18
+
19
+ def redirect(i, j)
20
+ if i.class == String && j.class == String
21
+ redirect2(i, j)
22
+ else
23
+ old_element(i, j)
24
+ end
25
+ end
26
+
27
+ def redirect2(i, j)
28
+ if i.include?(':') || j.include?(':')
29
+ redirect3(i, j)
30
+ else
31
+ old_element(i, j)
32
+ end
33
+ end
34
+
35
+ def redirect3(i, j)
36
+ rs, re = i.split(':').map { |e| e == '' ? 0 : Integer(e) }
37
+ cs, ce = j.split(':').map { |e| e == '' ? 0 : Integer(e) }
38
+ redirect4(rs, re, cs, ce, [i, j])
39
+ end
40
+
41
+ def redirect4(rs, re, cs, ce, orig)
42
+ if orig[0] == ':'
43
+ rs = 0
44
+ re = row_count
45
+ end
46
+ if orig[1] == ':'
47
+ cs = 0
48
+ ce = column_count
49
+ end
50
+ redirect5(rs, re, cs, ce, orig)
51
+ end
52
+
53
+ def redirect5(rs, re, cs, ce, orig)
54
+ re = rs + 1 unless orig[0].include?(':')
55
+ ce = cs + 1 unless orig[1].include?(':')
56
+ redirect6(rs, re, cs, ce)
57
+ end
58
+
59
+ def redirect6(rs, re, cs, ce)
60
+ rs = rs.nil? ? 0 : rs
61
+ cs = cs.nil? ? 0 : cs
62
+ re = re.nil? ? row_count : re
63
+ ce = ce.nil? ? column_count : ce
64
+ redirect7(rs, re, cs, ce)
65
+ end
66
+
67
+ def redirect7(rs, re, cs, ce)
68
+ return Matrix.rows([]) if rs >= re && cs >= ce
69
+ return Matrix.rows([[]] * (re - rs)) if cs == ce
70
+ return Matrix.columns([[]] * (ce - cs)) if re == rs
71
+ redirect8(rs, re, cs, ce)
72
+ end
73
+
74
+ def redirect8(rs, re, cs, ce)
75
+ rv = row_vectors[rs..re - 1].map(&:to_a)
76
+ nrv = rv.map { |e| e[cs..ce - 1] }
77
+ Matrix.rows(nrv)
78
+ end
79
+ end
80
+
81
+ # A general class to allow access to
82
+ # data manipulation tools.
83
+ class RubyML
84
+ include Tools::DataMethods
85
+ end
@@ -0,0 +1,34 @@
1
+ require 'rubyml/tools'
2
+
3
+ # The linear regression class with
4
+ # customizable number of folds for
5
+ # K-fold cross validation.
6
+ class LinearRegression
7
+ include Tools::DataMethods
8
+ include Tools::ClassifierMethods
9
+ attr_reader :theta, :accuracy, :precision, :folds
10
+
11
+ def initialize(precision = 3, folds = 5)
12
+ @precision = precision
13
+ @epsilon = 2.0
14
+ @folds = folds
15
+ end
16
+
17
+ def fit(x, y)
18
+ x_mat = bias_trick(x)
19
+ @theta = ((x_mat.t * x_mat).inv * x_mat.t) * y
20
+ @theta = @theta.collect { |e| e.round(@precision) }
21
+ end
22
+
23
+ def predict(x)
24
+ x_mat = bias_trick(x)
25
+ (x_mat * @theta).collect { |e| e.round(@precision) }
26
+ end
27
+
28
+ def visualize(x, y)
29
+ x = mat_to_array(x)
30
+ y = mat_to_array(y)
31
+ theta = mat_to_array(@theta)
32
+ plot_function(x, y, theta)
33
+ end
34
+ end
@@ -0,0 +1,64 @@
1
+ require 'rubyml/tools'
2
+
3
+ # The multiclass perceptron class with
4
+ # customizable number of iterations and folds.
5
+ class Perceptron
6
+ include Tools::DataMethods
7
+ include Tools::ClassifierMethods
8
+
9
+ attr_reader :iterations, :folds, :labels, :weights
10
+
11
+ def initialize(iterations = 100, folds = 5)
12
+ @iterations = iterations
13
+ @epsilon = nil
14
+ @folds = folds
15
+ @labels = []
16
+ @weights = {}
17
+ end
18
+
19
+ def setup_weights(y)
20
+ @labels = mat_to_array(y).uniq { |e| e }
21
+ @labels.each { |lbl| @weights[lbl] = Hash.new(0) }
22
+ end
23
+
24
+ def update_weights(guess, real, c, w)
25
+ @weights[guess][c] -= w
26
+ @weights[real][c] += w
27
+ end
28
+
29
+ def fit(x, y, cs = true)
30
+ cold_start if cs
31
+ setup_weights(y)
32
+ @iterations.times do
33
+ x.row_count.times do |r|
34
+ clbl = get_best_guess(x, r)
35
+ next unless y[r, 0] != clbl
36
+ x.column_count.times { |c| update_weights(clbl, y[r, 0], c, x[r, c]) }
37
+ end
38
+ end
39
+ end
40
+
41
+ def predict(x)
42
+ preds = []
43
+ x.row_count.times { |r| preds << get_best_guess(x, r) }
44
+ Matrix.columns([preds])
45
+ end
46
+
47
+ def get_best_guess(x, r)
48
+ clbl, cmax = nil
49
+ @labels.each do |lbl|
50
+ csum = 0.0
51
+ x.column_count.times { |c| csum += @weights[lbl][c] * x[r, c] }
52
+ if cmax.nil? || cmax <= csum
53
+ cmax = csum
54
+ clbl = lbl
55
+ end
56
+ end
57
+ clbl
58
+ end
59
+
60
+ def cold_start
61
+ @labels = []
62
+ @weights = {}
63
+ end
64
+ end
@@ -0,0 +1,104 @@
1
+ require 'matrix'
2
+ require 'gruff'
3
+
4
+ module Tools
5
+ # Methods for loading and manipulating data.
6
+ module DataMethods
7
+ def load_data(file, text = false)
8
+ mat = []
9
+ File.foreach(file) do |f|
10
+ mat << f.split(',').map { |i| text ? String(i).chomp : Float(i) }
11
+ end
12
+ Matrix.rows(mat)
13
+ end
14
+
15
+ def separate_data(data)
16
+ col_vec = data.column_vectors
17
+ y = Matrix.columns([col_vec.pop])
18
+ x = Matrix.columns(col_vec).collect { |e| Float(e) }
19
+ [x, y]
20
+ end
21
+
22
+ def mat_to_array(data)
23
+ arr = []
24
+ data.each { |e| arr << e }
25
+ arr
26
+ end
27
+
28
+ def bias_trick(x)
29
+ ones = Matrix.columns([[1] * x.row_count])
30
+ x_bias = ones.hstack(x)
31
+ x_bias
32
+ end
33
+
34
+ def plot_function(px, py, theta)
35
+ fx = []
36
+ fy = []
37
+ 1000.times do |i|
38
+ fx << (px[0] + (px[-1] - px[0]) * Float(i) / 1000.0)
39
+ fy << (fx[-1] * theta[1] + theta[0])
40
+ end
41
+ plot(fx, fy, px, py)
42
+ end
43
+
44
+ def plot(fx, fy, px, py)
45
+ g = Gruff::Scatter.new(800)
46
+ g.data(:data, px, py)
47
+ g.data(:fit, fx, fy)
48
+ g.write('scatter.png')
49
+ end
50
+ end
51
+
52
+ # Methods to test classifier accuracy via
53
+ # K-fold cross validation.
54
+ module ClassifierMethods
55
+ def generate_folds(x, y, num, folds)
56
+ sin = String(num * (x.row_count / folds))
57
+ ein = String([(num + 1) * (x.row_count / folds), x.row_count].min)
58
+ train = generate_train_set(x, y, sin, ein)
59
+ test = generate_test_set(x, y, sin, ein)
60
+ train + test
61
+ end
62
+
63
+ def generate_train_set(x, y, sin, ein)
64
+ xtrain = x[':' + sin, ':'].vstack(x[ein + ':', ':'])
65
+ ytrain = y[':' + sin, ':'].vstack(y[ein + ':', ':'])
66
+ [xtrain, ytrain]
67
+ end
68
+
69
+ def generate_test_set(x, y, sin, ein)
70
+ xtest = x[sin + ':' + ein, ':']
71
+ ytest = y[sin + ':' + ein, ':']
72
+ [xtest, ytest]
73
+ end
74
+
75
+ def handle_epsilon(ypred, ytest, r)
76
+ if @epsilon
77
+ ((ypred[r, 0] - ytest[r, 0]).abs < @epsilon ? 1.0 : 0.0)
78
+ else
79
+ (ypred[r, 0] == ytest[r, 0] ? 1.0 : 0.0)
80
+ end
81
+ end
82
+
83
+ def correct_count(ypred, ytest, c, t, n)
84
+ count = 0.0
85
+ ypred.row_count.times do |r|
86
+ count += handle_epsilon(ypred, ytest, r)
87
+ end
88
+ p "Fold #{n} Accuracy: #{(count / ypred.row_count * 100.0).round(3)}%"
89
+ [c + count, t + ypred.row_count]
90
+ end
91
+
92
+ def training_accuracy(x, y)
93
+ correct = 0.0
94
+ total = 0.0
95
+ @folds.times do |n|
96
+ xtrain, ytrain, xtest, ytest = generate_folds(x, y, n, @folds)
97
+ fit(xtrain, ytrain)
98
+ ypred = predict(xtest)
99
+ correct, total = correct_count(ypred, ytest, correct, total, n)
100
+ end
101
+ (correct / total).round(5)
102
+ end
103
+ end
104
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rubyml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Palimar Rao
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-04-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: gruff
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.6.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.6.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: coveralls
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.8.13
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.13
41
+ description: |-
42
+ This is a simple machine learning library
43
+ written in Ruby. It provides implementations of linear regression
44
+ and multiclass perceptron and visualization and validation methods
45
+ to verify results. Also included are helper methods to work with
46
+ training and testing data.
47
+ email:
48
+ executables: []
49
+ extensions: []
50
+ extra_rdoc_files: []
51
+ files:
52
+ - lib/rubyml.rb
53
+ - lib/rubyml/linear_regression.rb
54
+ - lib/rubyml/perceptron.rb
55
+ - lib/rubyml/tools.rb
56
+ homepage:
57
+ licenses:
58
+ - MIT
59
+ metadata: {}
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ requirements: []
75
+ rubyforge_project:
76
+ rubygems_version: 2.4.8
77
+ signing_key:
78
+ specification_version: 4
79
+ summary: A simple Ruby machine learning library.
80
+ test_files: []