RubyGems - rubyml - Versions diffs - 0.0.1 - Mend

rubyml 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +7 -0
data/lib/rubyml.rb +85 -0
data/lib/rubyml/linear_regression.rb +34 -0
data/lib/rubyml/perceptron.rb +64 -0
data/lib/rubyml/tools.rb +104 -0
metadata +80 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 3be8f5b08589ad4973b15a9c83bc11d6522504c5
+  data.tar.gz: 9f1a58d71e2932417c07634b4dd4f74a2ce3dc1c
+SHA512:
+  metadata.gz: 5b9e3d4b883391569c5006283d426fa5282c7f19b66095734186c6ddbd9ac050a17339579f213c15db9b49d461d8984fee55c1b41756662456b29a3b218c2237
+  data.tar.gz: 28e7a2562e961ad518e6e15b6eece272dc3d156b1dff4cf833bf535de3dc7b9ac78c8af2311632376f58a7d1c780b0aa02226151438eb04a2506b287da12299c

data/lib/rubyml.rb ADDED Viewed

@@ -0,0 +1,85 @@
+require 'rubyml/tools'
+require 'rubyml/linear_regression'
+require 'rubyml/perceptron'
+# Monkey patching the Matrix class to
+# implement matrix splicing.
+class Matrix
+  alias old_element element
+  def [](i, j)
+    if i.class == Fixnum && j == ':'
+      row(i)
+    elsif j.class == Fixnum && i == ':'
+      column(j)
+    else
+      redirect(i, j)
+    end
+  end
+  def redirect(i, j)
+    if i.class == String && j.class == String
+      redirect2(i, j)
+    else
+      old_element(i, j)
+    end
+  end
+  def redirect2(i, j)
+    if i.include?(':') || j.include?(':')
+      redirect3(i, j)
+    else
+      old_element(i, j)
+    end
+  end
+  def redirect3(i, j)
+    rs, re = i.split(':').map { |e| e == '' ? 0 : Integer(e) }
+    cs, ce = j.split(':').map { |e| e == '' ? 0 : Integer(e) }
+    redirect4(rs, re, cs, ce, [i, j])
+  end
+  def redirect4(rs, re, cs, ce, orig)
+    if orig[0] == ':'
+      rs = 0
+      re = row_count
+    end
+    if orig[1] == ':'
+      cs = 0
+      ce = column_count
+    end
+    redirect5(rs, re, cs, ce, orig)
+  end
+  def redirect5(rs, re, cs, ce, orig)
+    re = rs + 1 unless orig[0].include?(':')
+    ce = cs + 1 unless orig[1].include?(':')
+    redirect6(rs, re, cs, ce)
+  end
+  def redirect6(rs, re, cs, ce)
+    rs = rs.nil? ? 0 : rs
+    cs = cs.nil? ? 0 : cs
+    re = re.nil? ? row_count : re
+    ce = ce.nil? ? column_count : ce
+    redirect7(rs, re, cs, ce)
+  end
+  def redirect7(rs, re, cs, ce)
+    return Matrix.rows([]) if rs >= re && cs >= ce
+    return Matrix.rows([[]] * (re - rs)) if cs == ce
+    return Matrix.columns([[]] * (ce - cs)) if re == rs
+    redirect8(rs, re, cs, ce)
+  end
+  def redirect8(rs, re, cs, ce)
+    rv = row_vectors[rs..re - 1].map(&:to_a)
+    nrv = rv.map { |e| e[cs..ce - 1] }
+    Matrix.rows(nrv)
+  end
+end
+# A general class to allow access to
+# data manipulation tools.
+class RubyML
+  include Tools::DataMethods
+end

data/lib/rubyml/linear_regression.rb ADDED Viewed

@@ -0,0 +1,34 @@
+require 'rubyml/tools'
+# The linear regression class with
+# customizable number of folds for
+# K-fold cross validation.
+class LinearRegression
+  include Tools::DataMethods
+  include Tools::ClassifierMethods
+  attr_reader :theta, :accuracy, :precision, :folds
+  def initialize(precision = 3, folds = 5)
+    @precision = precision
+    @epsilon = 2.0
+    @folds = folds
+  end
+  def fit(x, y)
+    x_mat = bias_trick(x)
+    @theta = ((x_mat.t * x_mat).inv * x_mat.t) * y
+    @theta = @theta.collect { |e| e.round(@precision) }
+  end
+  def predict(x)
+    x_mat = bias_trick(x)
+    (x_mat * @theta).collect { |e| e.round(@precision) }
+  end
+  def visualize(x, y)
+    x = mat_to_array(x)
+    y = mat_to_array(y)
+    theta = mat_to_array(@theta)
+    plot_function(x, y, theta)
+  end
+end

data/lib/rubyml/perceptron.rb ADDED Viewed

@@ -0,0 +1,64 @@
+require 'rubyml/tools'
+# The multiclass perceptron class with
+# customizable number of iterations and folds.
+class Perceptron
+  include Tools::DataMethods
+  include Tools::ClassifierMethods
+  attr_reader :iterations, :folds, :labels, :weights
+  def initialize(iterations = 100, folds = 5)
+    @iterations = iterations
+    @epsilon = nil
+    @folds = folds
+    @labels = []
+    @weights = {}
+  end
+  def setup_weights(y)
+    @labels = mat_to_array(y).uniq { |e| e }
+    @labels.each { |lbl| @weights[lbl] = Hash.new(0) }
+  end
+  def update_weights(guess, real, c, w)
+    @weights[guess][c] -= w
+    @weights[real][c] += w
+  end
+  def fit(x, y, cs = true)
+    cold_start if cs
+    setup_weights(y)
+    @iterations.times do
+      x.row_count.times do |r|
+        clbl = get_best_guess(x, r)
+        next unless y[r, 0] != clbl
+        x.column_count.times { |c| update_weights(clbl, y[r, 0], c, x[r, c]) }
+      end
+    end
+  end
+  def predict(x)
+    preds = []
+    x.row_count.times { |r| preds << get_best_guess(x, r) }
+    Matrix.columns([preds])
+  end
+  def get_best_guess(x, r)
+    clbl, cmax = nil
+    @labels.each do |lbl|
+      csum = 0.0
+      x.column_count.times { |c| csum += @weights[lbl][c] * x[r, c] }
+      if cmax.nil? || cmax <= csum
+        cmax = csum
+        clbl = lbl
+      end
+    end
+    clbl
+  end
+  def cold_start
+    @labels = []
+    @weights = {}
+  end
+end

data/lib/rubyml/tools.rb ADDED Viewed

@@ -0,0 +1,104 @@
+require 'matrix'
+require 'gruff'
+module Tools
+  # Methods for loading and manipulating data.
+  module DataMethods
+    def load_data(file, text = false)
+      mat = []
+      File.foreach(file) do |f|
+        mat << f.split(',').map { |i| text ? String(i).chomp : Float(i) }
+      end
+      Matrix.rows(mat)
+    end
+    def separate_data(data)
+      col_vec = data.column_vectors
+      y = Matrix.columns([col_vec.pop])
+      x = Matrix.columns(col_vec).collect { |e| Float(e) }
+      [x, y]
+    end
+    def mat_to_array(data)
+      arr = []
+      data.each { |e| arr << e }
+      arr
+    end
+    def bias_trick(x)
+      ones = Matrix.columns([[1] * x.row_count])
+      x_bias = ones.hstack(x)
+      x_bias
+    end
+    def plot_function(px, py, theta)
+      fx = []
+      fy = []
+      1000.times do |i|
+        fx << (px[0] + (px[-1] - px[0]) * Float(i) / 1000.0)
+        fy << (fx[-1] * theta[1] + theta[0])
+      end
+      plot(fx, fy, px, py)
+    end
+    def plot(fx, fy, px, py)
+      g = Gruff::Scatter.new(800)
+      g.data(:data, px, py)
+      g.data(:fit, fx, fy)
+      g.write('scatter.png')
+    end
+  end
+  # Methods to test classifier accuracy via
+  # K-fold cross validation.
+  module ClassifierMethods
+    def generate_folds(x, y, num, folds)
+      sin = String(num * (x.row_count / folds))
+      ein = String([(num + 1) * (x.row_count / folds), x.row_count].min)
+      train = generate_train_set(x, y, sin, ein)
+      test = generate_test_set(x, y, sin, ein)
+      train + test
+    end
+    def generate_train_set(x, y, sin, ein)
+      xtrain = x[':' + sin, ':'].vstack(x[ein + ':', ':'])
+      ytrain = y[':' + sin, ':'].vstack(y[ein + ':', ':'])
+      [xtrain, ytrain]
+    end
+    def generate_test_set(x, y, sin, ein)
+      xtest = x[sin + ':' + ein, ':']
+      ytest = y[sin + ':' + ein, ':']
+      [xtest, ytest]
+    end
+    def handle_epsilon(ypred, ytest, r)
+      if @epsilon
+        ((ypred[r, 0] - ytest[r, 0]).abs < @epsilon ? 1.0 : 0.0)
+      else
+        (ypred[r, 0] == ytest[r, 0] ? 1.0 : 0.0)
+      end
+    end
+    def correct_count(ypred, ytest, c, t, n)
+      count = 0.0
+      ypred.row_count.times do |r|
+        count += handle_epsilon(ypred, ytest, r)
+      end
+      p "Fold #{n} Accuracy: #{(count / ypred.row_count * 100.0).round(3)}%"
+      [c + count, t + ypred.row_count]
+    end
+    def training_accuracy(x, y)
+      correct = 0.0
+      total = 0.0
+      @folds.times do |n|
+        xtrain, ytrain, xtest, ytest = generate_folds(x, y, n, @folds)
+        fit(xtrain, ytrain)
+        ypred = predict(xtest)
+        correct, total = correct_count(ypred, ytest, correct, total, n)
+      end
+      (correct / total).round(5)
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,80 @@
+--- !ruby/object:Gem::Specification
+name: rubyml
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+platform: ruby
+authors:
+- Palimar Rao
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2016-04-29 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: gruff
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '='
+      - !ruby/object:Gem::Version
+        version: 0.6.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '='
+      - !ruby/object:Gem::Version
+        version: 0.6.0
+- !ruby/object:Gem::Dependency
+  name: coveralls
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '='
+      - !ruby/object:Gem::Version
+        version: 0.8.13
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '='
+      - !ruby/object:Gem::Version
+        version: 0.8.13
+description: |-
+  This is a simple machine learning library
+      written in Ruby. It provides implementations of linear regression
+      and multiclass perceptron and visualization and validation methods
+      to verify results. Also included are helper methods to work with
+      training and testing data.
+email:
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/rubyml.rb
+- lib/rubyml/linear_regression.rb
+- lib/rubyml/perceptron.rb
+- lib/rubyml/tools.rb
+homepage:
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.8
+signing_key:
+specification_version: 4
+summary: A simple Ruby machine learning library.
+test_files: []