RubyGems - rubyml - Versions diffs - 0.0.1 - Mend

rubyml 0.0.1

Files changed (6) hide show

checksums.yaml +7 -0
data/lib/rubyml.rb +85 -0
data/lib/rubyml/linear_regression.rb +34 -0
data/lib/rubyml/perceptron.rb +64 -0
data/lib/rubyml/tools.rb +104 -0
metadata +80 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 3be8f5b08589ad4973b15a9c83bc11d6522504c5
+  data.tar.gz: 9f1a58d71e2932417c07634b4dd4f74a2ce3dc1c
+SHA512:
+  metadata.gz: 5b9e3d4b883391569c5006283d426fa5282c7f19b66095734186c6ddbd9ac050a17339579f213c15db9b49d461d8984fee55c1b41756662456b29a3b218c2237
+  data.tar.gz: 28e7a2562e961ad518e6e15b6eece272dc3d156b1dff4cf833bf535de3dc7b9ac78c8af2311632376f58a7d1c780b0aa02226151438eb04a2506b287da12299c

data/lib/rubyml.rb ADDED Viewed

@@ -0,0 +1,85 @@
+require 'rubyml/tools'
+require 'rubyml/linear_regression'
+require 'rubyml/perceptron'
+# Monkey patching the Matrix class to
+# implement matrix splicing.
+class Matrix
+  alias old_element element
+  def [](i, j)
+    if i.class == Fixnum && j == ':'
+      row(i)
+    elsif j.class == Fixnum && i == ':'
+      column(j)
+    else
+      redirect(i, j)
+    end
+  end
+  def redirect(i, j)
+    if i.class == String && j.class == String
+      redirect2(i, j)
+    else
+      old_element(i, j)
+    end
+  end
+  def redirect2(i, j)
+    if i.include?(':') || j.include?(':')
+      redirect3(i, j)
+    else
+      old_element(i, j)
+    end
+  end
+  def redirect3(i, j)
+    rs, re = i.split(':').map { |e| e == '' ? 0 : Integer(e) }
+    cs, ce = j.split(':').map { |e| e == '' ? 0 : Integer(e) }
+    redirect4(rs, re, cs, ce, [i, j])
+  end
+  def redirect4(rs, re, cs, ce, orig)
+    if orig[0] == ':'
+      rs = 0
+      re = row_count
+    end
+    if orig[1] == ':'
+      cs = 0
+      ce = column_count
+    end
+    redirect5(rs, re, cs, ce, orig)
+  end
+  def redirect5(rs, re, cs, ce, orig)
+    re = rs + 1 unless orig[0].include?(':')
+    ce = cs + 1 unless orig[1].include?(':')
+    redirect6(rs, re, cs, ce)
+  end
+  def redirect6(rs, re, cs, ce)
+    rs = rs.nil? ? 0 : rs
+    cs = cs.nil? ? 0 : cs
+    re = re.nil? ? row_count : re
+    ce = ce.nil? ? column_count : ce
+    redirect7(rs, re, cs, ce)
+  end
+  def redirect7(rs, re, cs, ce)
+    return Matrix.rows([]) if rs >= re && cs >= ce
+    return Matrix.rows([[]] * (re - rs)) if cs == ce
+    return Matrix.columns([[]] * (ce - cs)) if re == rs
+    redirect8(rs, re, cs, ce)
+  end
+  def redirect8(rs, re, cs, ce)
+    rv = row_vectors[rs..re - 1].map(&:to_a)
+    nrv = rv.map { |e| e[cs..ce - 1] }
+    Matrix.rows(nrv)
+  end
+end
+# A general class to allow access to
+# data manipulation tools.
+class RubyML
+  include Tools::DataMethods
+end

data/lib/rubyml/linear_regression.rb ADDED Viewed

@@ -0,0 +1,34 @@
+require 'rubyml/tools'
+# The linear regression class with
+# customizable number of folds for
+# K-fold cross validation.
+class LinearRegression
+  include Tools::DataMethods
+  include Tools::ClassifierMethods
+  attr_reader :theta, :accuracy, :precision, :folds
+  def initialize(precision = 3, folds = 5)
+    @precision = precision
+    @epsilon = 2.0
+    @folds = folds
+  end
+  def fit(x, y)
+    x_mat = bias_trick(x)
+    @theta = ((x_mat.t * x_mat).inv * x_mat.t) * y
+    @theta = @theta.collect { |e| e.round(@precision) }
+  end
+  def predict(x)
+    x_mat = bias_trick(x)
+    (x_mat * @theta).collect { |e| e.round(@precision) }
+  end
+  def visualize(x, y)
+    x = mat_to_array(x)
+    y = mat_to_array(y)
+    theta = mat_to_array(@theta)
+    plot_function(x, y, theta)
+  end
+end

data/lib/rubyml/perceptron.rb ADDED Viewed

@@ -0,0 +1,64 @@
+require 'rubyml/tools'
+# The multiclass perceptron class with
+# customizable number of iterations and folds.
+class Perceptron
+  include Tools::DataMethods
+  include Tools::ClassifierMethods
+  attr_reader :iterations, :folds, :labels, :weights
+  def initialize(iterations = 100, folds = 5)
+    @iterations = iterations
+    @epsilon = nil
+    @folds = folds
+    @labels = []
+    @weights = {}
+  end
+  def setup_weights(y)
+    @labels = mat_to_array(y).uniq { |e| e }
+    @labels.each { |lbl| @weights[lbl] = Hash.new(0) }
+  end
+  def update_weights(guess, real, c, w)
+    @weights[guess][c] -= w
+    @weights[real][c] += w
+  end
+  def fit(x, y, cs = true)
+    cold_start if cs
+    setup_weights(y)
+    @iterations.times do
+      x.row_count.times do |r|
+        clbl = get_best_guess(x, r)
+        next unless y[r, 0] != clbl
+        x.column_count.times { |c| update_weights(clbl, y[r, 0], c, x[r, c]) }
+      end
+    end
+  end
+  def predict(x)
+    preds = []
+    x.row_count.times { |r| preds << get_best_guess(x, r) }
+    Matrix.columns([preds])
+  end
+  def get_best_guess(x, r)
+    clbl, cmax = nil
+    @labels.each do |lbl|
+      csum = 0.0
+      x.column_count.times { |c| csum += @weights[lbl][c] * x[r, c] }
+      if cmax.nil? || cmax <= csum
+        cmax = csum
+        clbl = lbl
+      end
+    end
+    clbl
+  end
+  def cold_start
+    @labels = []
+    @weights = {}
+  end
+end

data/lib/rubyml/tools.rb ADDED Viewed

@@ -0,0 +1,104 @@
+require 'matrix'
+require 'gruff'
+module Tools
+  # Methods for loading and manipulating data.
+  module DataMethods
+    def load_data(file, text = false)
+      mat = []
+      File.foreach(file) do |f|
+        mat << f.split(',').map { |i| text ? String(i).chomp : Float(i) }
+      end
+      Matrix.rows(mat)
+    end
+    def separate_data(data)
+      col_vec = data.column_vectors
+      y = Matrix.columns([col_vec.pop])
+      x = Matrix.columns(col_vec).collect { |e| Float(e) }
+      [x, y]
+    end
+    def mat_to_array(data)
+      arr = []
+      data.each { |e| arr << e }
+      arr
+    end
+    def bias_trick(x)
+      ones = Matrix.columns([[1] * x.row_count])
+      x_bias = ones.hstack(x)
+      x_bias
+    end
+    def plot_function(px, py, theta)
+      fx = []
+      fy = []
+      1000.times do |i|
+        fx << (px[0] + (px[-1] - px[0]) * Float(i) / 1000.0)
+        fy << (fx[-1] * theta[1] + theta[0])
+      end
+      plot(fx, fy, px, py)
+    end
+    def plot(fx, fy, px, py)
+      g = Gruff::Scatter.new(800)
+      g.data(:data, px, py)
+      g.data(:fit, fx, fy)
+      g.write('scatter.png')
+    end
+  end
+  # Methods to test classifier accuracy via
+  # K-fold cross validation.
+  module ClassifierMethods
+    def generate_folds(x, y, num, folds)
+      sin = String(num * (x.row_count / folds))
+      ein = String([(num + 1) * (x.row_count / folds), x.row_count].min)
+      train = generate_train_set(x, y, sin, ein)
+      test = generate_test_set(x, y, sin, ein)
+      train + test
+    end
+    def generate_train_set(x, y, sin, ein)
+      xtrain = x[':' + sin, ':'].vstack(x[ein + ':', ':'])
+      ytrain = y[':' + sin, ':'].vstack(y[ein + ':', ':'])
+      [xtrain, ytrain]
+    end
+    def generate_test_set(x, y, sin, ein)
+      xtest = x[sin + ':' + ein, ':']
+      ytest = y[sin + ':' + ein, ':']
+      [xtest, ytest]
+    end
+    def handle_epsilon(ypred, ytest, r)
+      if @epsilon
+        ((ypred[r, 0] - ytest[r, 0]).abs < @epsilon ? 1.0 : 0.0)
+      else
+        (ypred[r, 0] == ytest[r, 0] ? 1.0 : 0.0)
+      end
+    end
+    def correct_count(ypred, ytest, c, t, n)
+      count = 0.0
+      ypred.row_count.times do |r|
+        count += handle_epsilon(ypred, ytest, r)
+      end
+      p "Fold #{n} Accuracy: #{(count / ypred.row_count * 100.0).round(3)}%"
+      [c + count, t + ypred.row_count]
+    end
+    def training_accuracy(x, y)
+      correct = 0.0
+      total = 0.0
+      @folds.times do |n|
+        xtrain, ytrain, xtest, ytest = generate_folds(x, y, n, @folds)
+        fit(xtrain, ytrain)
+        ypred = predict(xtest)
+        correct, total = correct_count(ypred, ytest, correct, total, n)
+      end
+      (correct / total).round(5)
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,80 @@
+--- !ruby/object:Gem::Specification
+name: rubyml
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+platform: ruby
+authors:
+- Palimar Rao
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2016-04-29 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: gruff
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '='
+      - !ruby/object:Gem::Version
+        version: 0.6.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '='
+      - !ruby/object:Gem::Version
+        version: 0.6.0
+- !ruby/object:Gem::Dependency
+  name: coveralls
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '='
+      - !ruby/object:Gem::Version
+        version: 0.8.13
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '='
+      - !ruby/object:Gem::Version
+        version: 0.8.13
+description: |-
+  This is a simple machine learning library
+      written in Ruby. It provides implementations of linear regression
+      and multiclass perceptron and visualization and validation methods
+      to verify results. Also included are helper methods to work with
+      training and testing data.
+email:
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/rubyml.rb
+- lib/rubyml/linear_regression.rb
+- lib/rubyml/perceptron.rb
+- lib/rubyml/tools.rb
+homepage:
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.8
+signing_key:
+specification_version: 4
+summary: A simple Ruby machine learning library.
+test_files: []