RubyGems - convolver - Versions diffs - 0.0.1 → 0.0.2 - Mend

convolver 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/README.md +11 -1
data/benchmarks/convolver_vs_fftw3.rb +70 -0
data/benchmarks/nn_layer_benchmark.rb +18 -0
data/ext/convolver/convolver.c +114 -8
data/lib/convolver.rb +1 -5
data/lib/convolver/version.rb +1 -1
data/spec/convolver_spec.rb +22 -0
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 254fbf54caf7407108357aee612ab3c08af4ea4c
-  data.tar.gz: f4212255722174976e92c734b76ce956365ad936
+  metadata.gz: e20ffb64db726dac28c0b1ab5643355a67642fad
+  data.tar.gz: 278c29c4e851edf295c6f0c82043ecab98c42d6f
 SHA512:
-  metadata.gz: 55d4082e492a48d09316cf4f6c0f44ad1dd5bae756fd9ef3d8890c592ed0c863029678eded0a3783559e7acfdb8ca9198ce3d4938c74174220f5452f35524d0d
-  data.tar.gz: 3a9ee78cb91ca935c83f0b56e93d58799871e97d5f8174ff9ba4e86ec798469dad6f3167482458c24f37bdf47aed4b6b5f5aaf6e658f9329e63aab16daa5743d
+  metadata.gz: 3d8aefb6c27bf1777bea0d07f83c096a106278caf66b05519764705cfc783ebe614b71fd7f31adb43ffae7365daf18d8f4cb16cb18d5b83fea231467d571e75f
+  data.tar.gz: 0cf6c1c0fc18781580d512f56b6199f2918d476284f703fc4d2d9409e13787a7d9da22a9440714d04d114b472d5dfdcb55824b263ea3717071f61c4e4aa3714c

data/README.md CHANGED Viewed

@@ -2,10 +2,18 @@
 [![Build Status](https://travis-ci.org/neilslater/convolver.png?branch=master)](http://travis-ci.org/neilslater/convolver)
-Adds an "inner" convolve operation to NArray floats. It is around 250 times faster than equivalents
+Adds a convolve operation to NArray floats. It is around 250 times faster than equivalents
 in pure Ruby.
 Note that convolves based on FFTW3 could well be faster still for large arrays with large kernels.
+Benchmark tests suggest that the kernel needs to be a few hundred items, and be significantly smaller
+than the signal before FFTW3 offers a significant advantage.
+## Planned features
+The *convolver* gem will eventually contain a basic kit for creating, training and running convolutional
+neural networks. As a side effect of this plan, it will also contain efficient code for
+calculating signal convolutions for other types of analysis.
 ## Installation
@@ -32,8 +40,10 @@ Basic convolution:
  * Convolver only works on single-precision floats internally. It will cast NArray types to this, if
 possible, prior to calculating.
  * The convolution is an "inner" one. The output is smaller than the input, each dimension is reduced
 by 1 less than the width of the kernel in the same dimension.
  * Convolver expects input a and kernel b to have the same rank, and for the kernel to be same size
 or smaller in all dimensions as the input.

data/benchmarks/convolver_vs_fftw3.rb ADDED Viewed

@@ -0,0 +1,70 @@
+require 'convolver'
+require 'narray'
+require 'fftw3'
+require 'benchmark'
+# In Ruby for now, which is slower, but at least gets us ballpark figures (99% of the work is in the C)
+module FFTW3Convolver
+  def self.convolve orig_a, orig_b
+    combined_size = orig_a.size + orig_b.size - 1
+    left_pad_a = ( combined_size - orig_a.size + 1)/2
+    mod_a = NArray.float(combined_size)
+    mod_a[left_pad_a] = orig_a
+    mod_b = NArray.float(combined_size)
+    left_select_b = ( orig_b.size + 1 )/2
+    right_select_b = orig_b.size - left_select_b
+    mod_b[0] = orig_b[(0...left_select_b)].reverse
+    mod_b[-right_select_b] = orig_b[-right_select_b..-1].reverse
+    afft = FFTW3.fft(mod_a)
+    bfft = FFTW3.fft(mod_b)
+    cfft = afft * bfft
+    (FFTW3.ifft( cfft )/combined_size).real[left_pad_a...(left_pad_a+ orig_a.size - orig_b.size + 1)]
+  end
+end
+class Convolver2DBenchmark
+  attr_reader :image, :kernel
+  def initialize
+    # These show Convolver.convolve as 3x faster than FFTW3
+    #  @image = NArray.float(256 * 256).random
+    #  @kernel = NArray.float(16 * 16).random
+    # These are roughly even (10% advantage to FFTW3)
+    #  @image = NArray.float(256 * 256).random
+    #  @kernel = NArray.float(32 * 32).random
+    # These show FFTW3 as 4x faster than Convolver.convolve
+    #  @image = NArray.float(256 * 256).random
+    #  @kernel = NArray.float(64 * 64).random
+    # These show Convolver.convolve as 200x faster than FFTW3
+    # @image = NArray.float(50 * 64 * 64).random
+    # @kernel = NArray.float(50 * 64 * 64).random
+    # These show FFTW3 as 2x faster than Convolver.convolve
+    # @image = NArray.float(128 * 128).random
+    # @kernel = NArray.float(64 * 64).random
+    # These show FFTW3 and Convolver.convolve roughly equal
+    # @image = NArray.float(80 * 80).random
+    # @kernel = NArray.float(64 * 64).random
+    # These show FFTW3 as 2x faster than Convolver.convolve
+    # @image = NArray.float(2 * 80 * 80).random
+    # @kernel = NArray.float(2 * 64 * 64).random
+    # These are roughly even - increasing size of image favours FFTW3
+    @image = NArray.float(2000 + 80 * 80).random
+    @kernel = NArray.float(80 * 80).random
+  end
+end
+Benchmark.bm do |x|
+  source = Convolver2DBenchmark.new
+  x.report('convolver') { 100.times { Convolver.convolve( source.image, source.kernel ) } }
+  x.report('fftw3') { 100.times { FFTW3Convolver.convolve( source.image, source.kernel ) } }
+end

data/benchmarks/nn_layer_benchmark.rb ADDED Viewed

@@ -0,0 +1,18 @@
+require 'convolver'
+require 'narray'
+require 'benchmark'
+class ConvolverNNLayerBenchmark
+  attr_reader :input, :weights, :thresholds
+  def initialize
+    @input = NArray.float(1024).random
+    @weights = NArray.float(1024,256).random
+    @thresholds = NArray.float(256).random
+  end
+end
+Benchmark.bm do |x|
+  source = ConvolverNNLayerBenchmark.new
+  x.report('kilo') { 1000.times { Convolver.nn_run_layer( source.input, source.weights, source.thresholds ) } }
+end

data/ext/convolver/convolver.c CHANGED Viewed

@@ -63,9 +63,9 @@ inline void calc_co_increment( int rank, int *outer_shape, int *inner_shape, int
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 //
-//  Convolve method 4.
+//  Convolve
 //
-//    Benchmark: 640x480 image, 8x8 kernel, 1000 iterations. 11.54 seconds. Score: 63
+//    Benchmark: 640x480 image, 8x8 kernel, 1000 iterations. 12.3 seconds.
 //
 void convolve_raw(
@@ -128,12 +128,67 @@ void convolve_raw(
   return;
 }
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//  Neural net
+//
+//    Benchmark: 1024 inputs, 256 outputs. 1000 iterations. 0.56 seconds
+//
+//
+void nn_run_layer_raw( int in_size, int out_size,
+    float *in_ptr, float *weights, float *thresholds, float *out_ptr ) {
+  int i, j, in_aligned_size, out_aligned_size, offset;
+  __m128 simd_x, simd_y, simd_t;
+  in_aligned_size = 4 * ( in_size/4 );
+  out_aligned_size = 4 * ( out_size/4 );
+  // Calculate activation
+  for ( i = 0; i < out_size; i++ ) {
+    float t = 0.0;
+    simd_t = _mm_setzero_ps();
+    offset = i * in_size;
+    // Use SIMD for all the aligned values in groups of 4
+    for ( j = 0; j < in_aligned_size; j +=4 ) {
+      simd_x = _mm_load_ps( in_ptr + j );
+      // Weights might not align to 16 bytes due to size of layers
+      simd_y = _mm_loadu_ps( weights + (offset + j) );
+      simd_x = _mm_mul_ps( simd_x, simd_y );
+      simd_t = _mm_add_ps( simd_x, simd_t );
+    }
+    // Complete any remaining 1,2 or 3 items one at a time
+    for ( j = in_aligned_size; j < in_size; j++ ) {
+      t += in_ptr[ j ] * weights[ offset + j ];
+    }
+    out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
+  }
+  for ( i = 0; i < out_size; i++ ) {
+    out_ptr[i] -= thresholds[i];
+    if ( out_ptr[i] < 0.0 ) { out_ptr[i] = 0.0; }
+  }
+  return;
+}
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // To hold the module object
 VALUE Convolver = Qnil;
+/* @overload convolve( signal, kernel )
+ * Calculates convolution of an array of floats representing a signal, with a second array representing
+ * a kernel. The two parameters must have the same rank. The output has same rank, its size in each dimension d is given by
+ *  signal.shape[d] - kernel.shape[d] + 1
+ * @param [NArray] signal must be same size or larger than kernel in each dimension
+ * @param [NArray] kernel must be same size or smaller than signal in each dimension
+ * @return [NArray] result of convolving signal with kernel
+ */
 static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
   struct NARRAY *na_a, *na_b, *na_c;
   volatile VALUE val_a, val_b, val_c;
@@ -146,12 +201,8 @@ static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
   val_b = na_cast_object(b, NA_SFLOAT);
   GetNArray( val_b, na_b );
-  if ( na_a->rank < na_b->rank ) {
-    rb_raise( rb_eArgError, "narray b must have equal or lower rank than narray a" );
-  }
-  if ( na_a->rank < na_b->rank ) {
-    rb_raise( rb_eArgError, "narray a must have equal rank to narray b (temporary restriction)" );
+  if ( na_a->rank != na_b->rank ) {
+    rb_raise( rb_eArgError, "narray a must have equal rank to narray b (a rack %d, b rank %d)", na_a->rank,  na_b->rank );
   }
   if ( na_a->rank > LARGEST_RANK ) {
@@ -178,7 +229,62 @@ static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
   return val_c;
 }
+/* @overload nn_run_layer( inputs, weights, thresholds )
+ * Calculates activations of a fully-connected neural network layer. The transfer function after
+ * summing weights and applying threshold is a "ReLU", equivalent to
+ *  y = x < 0.0 ? 0.0 : x
+ * this is less sophisticated than many neural net architectures, but is fast to calculate and to
+ * train.
+ * @param [NArray] inputs must be rank 1 array of floats
+ * @param [NArray] weights must be rank 2 array of floats, with first rank size of inputs, and second rank equal to number of outputs desired
+ * @param [NArray] thresholds must be rank 1 array of floats, size equal to number of outputs desired
+ * @return [NArray] neuron activations
+ */
+static VALUE narray_nn_run_single_layer( VALUE self, VALUE inputs, VALUE weights, VALUE thresholds ) {
+  struct NARRAY *na_inputs, *na_weights, *na_thresholds, *na_outputs;
+  volatile VALUE val_inputs, val_weights, val_thresholds, val_outputs;
+  int input_size, output_size;
+  int output_shape[1];
+  val_inputs = na_cast_object(inputs, NA_SFLOAT);
+  GetNArray( val_inputs, na_inputs );
+  if ( na_inputs->rank != 1 ) {
+    rb_raise( rb_eArgError, "input must be array of rank 1" );
+  }
+  input_size = na_inputs->total;
+  val_weights = na_cast_object(weights, NA_SFLOAT);
+  GetNArray( val_weights, na_weights );
+  if ( na_weights->rank != 2 ) {
+    rb_raise( rb_eArgError, "weights must be array of rank 2" );
+  }
+  if ( na_weights->shape[0] != input_size ) {
+    rb_raise( rb_eArgError, "weights shape mismatch, expected %d across, got %d", input_size, na_weights->shape[0] );
+  }
+  output_size = na_weights->shape[1];
+  val_thresholds = na_cast_object(thresholds, NA_SFLOAT);
+  GetNArray( val_thresholds, na_thresholds );
+  if ( na_thresholds->rank != 1 ) {
+    rb_raise( rb_eArgError, "thresholds must be array of rank 1" );
+  }
+  if ( na_thresholds->shape[0] != output_size ) {
+    rb_raise( rb_eArgError, "thresholds expected size %d, but got %d", output_size, na_thresholds->shape[0] );
+  }
+  output_shape[0] = output_size;
+  val_outputs = na_make_object( NA_SFLOAT, 1, output_shape, CLASS_OF( val_inputs ) );
+  GetNArray( val_outputs, na_outputs );
+  nn_run_layer_raw( input_size, output_size, (float*) na_inputs->ptr, (float*) na_weights->ptr,
+      (float*) na_thresholds->ptr, (float*) na_outputs->ptr );
+  return val_outputs;
+}
 void Init_convolver() {
   Convolver = rb_define_module( "Convolver" );
   rb_define_singleton_method( Convolver, "convolve", narray_convolve, 2 );
+  rb_define_singleton_method( Convolver, "nn_run_layer", narray_nn_run_single_layer, 3 );
 }

data/lib/convolver.rb CHANGED Viewed

@@ -3,9 +3,5 @@ require "convolver/convolver"
 require "convolver/version"
 module Convolver
-  # Calculates float convolution of an array with a kernel
-  # @param [NArray] a outer array
-  # @param [NArray] b kernel
-  # @return [NArray] result of convolving a with b
-  # @!parse def self.convolve(a,b); end
 end

data/lib/convolver/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Convolver
-  VERSION = "0.0.1"
+  VERSION = "0.0.2"
 end

data/spec/convolver_spec.rb CHANGED Viewed

@@ -81,4 +81,26 @@ describe Convolver do
       ]
     end
   end
+  describe "#nn_run_layer" do
+    it "should calculate basic layer rules" do
+      inputs = NArray[ 1.0 ]
+      weights = NArray[ [ 1.0 ] ]
+      thresholds = NArray[ 0.0 ]
+      outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
+      outputs.should be_narray_like NArray[ 1.0 ]
+      inputs = NArray[ 0.5, -0.5 ]
+      weights = NArray[ [ 1.0, 2.0 ], [ 2.0, 1.0 ] ]
+      thresholds = NArray[ 0.0, 0.0 ]
+      outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
+      outputs.should be_narray_like NArray[ 0.0, 0.5 ]
+      inputs = NArray[ 0.3, -0.4, 0.8, -0.7 ]
+      weights = NArray[ [ 1.0, 0.25, 0.5, -0.5 ], [ -1.0, -0.25, -0.5, 0.5 ] ]
+      thresholds = NArray[ 0.0, 0.0 ]
+      outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
+      outputs.should be_narray_like NArray[ 0.95, 0.0 ]
+    end
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: convolver
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.0.2
 platform: ruby
 authors:
 - Neil Slater
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-10-20 00:00:00.000000000 Z
+date: 2013-10-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: narray
@@ -109,6 +109,8 @@ files:
 - README.md
 - Rakefile
 - benchmarks/convolve_benchmark.rb
+- benchmarks/convolver_vs_fftw3.rb
+- benchmarks/nn_layer_benchmark.rb
 - convolver.gemspec
 - ext/convolver/convolver.c
 - ext/convolver/extconf.rb