convolver 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 254fbf54caf7407108357aee612ab3c08af4ea4c
4
- data.tar.gz: f4212255722174976e92c734b76ce956365ad936
3
+ metadata.gz: e20ffb64db726dac28c0b1ab5643355a67642fad
4
+ data.tar.gz: 278c29c4e851edf295c6f0c82043ecab98c42d6f
5
5
  SHA512:
6
- metadata.gz: 55d4082e492a48d09316cf4f6c0f44ad1dd5bae756fd9ef3d8890c592ed0c863029678eded0a3783559e7acfdb8ca9198ce3d4938c74174220f5452f35524d0d
7
- data.tar.gz: 3a9ee78cb91ca935c83f0b56e93d58799871e97d5f8174ff9ba4e86ec798469dad6f3167482458c24f37bdf47aed4b6b5f5aaf6e658f9329e63aab16daa5743d
6
+ metadata.gz: 3d8aefb6c27bf1777bea0d07f83c096a106278caf66b05519764705cfc783ebe614b71fd7f31adb43ffae7365daf18d8f4cb16cb18d5b83fea231467d571e75f
7
+ data.tar.gz: 0cf6c1c0fc18781580d512f56b6199f2918d476284f703fc4d2d9409e13787a7d9da22a9440714d04d114b472d5dfdcb55824b263ea3717071f61c4e4aa3714c
data/README.md CHANGED
@@ -2,10 +2,18 @@
2
2
 
3
3
  [![Build Status](https://travis-ci.org/neilslater/convolver.png?branch=master)](http://travis-ci.org/neilslater/convolver)
4
4
 
5
- Adds an "inner" convolve operation to NArray floats. It is around 250 times faster than equivalents
5
+ Adds a convolve operation to NArray floats. It is around 250 times faster than equivalents
6
6
  in pure Ruby.
7
7
 
8
8
  Note that convolves based on FFTW3 could well be faster still for large arrays with large kernels.
9
+ Benchmark tests suggest that the kernel needs to be a few hundred items, and be significantly smaller
10
+ than the signal before FFTW3 offers a significant advantage.
11
+
12
+ ## Planned features
13
+
14
+ The *convolver* gem will eventually contain a basic kit for creating, training and running convolutional
15
+ neural networks. As a side effect of this plan, it will also contain efficient code for
16
+ calculating signal convolutions for other types of analysis.
9
17
 
10
18
  ## Installation
11
19
 
@@ -32,8 +40,10 @@ Basic convolution:
32
40
 
33
41
  * Convolver only works on single-precision floats internally. It will cast NArray types to this, if
34
42
  possible, prior to calculating.
43
+
35
44
  * The convolution is an "inner" one. The output is smaller than the input, each dimension is reduced
36
45
  by 1 less than the width of the kernel in the same dimension.
46
+
37
47
  * Convolver expects input a and kernel b to have the same rank, and for the kernel to be same size
38
48
  or smaller in all dimensions as the input.
39
49
 
@@ -0,0 +1,70 @@
1
+ require 'convolver'
2
+ require 'narray'
3
+ require 'fftw3'
4
+ require 'benchmark'
5
+
6
+ # In Ruby for now, which is slower, but at least gets us ballpark figures (99% of the work is in the C)
7
+ module FFTW3Convolver
8
+ def self.convolve orig_a, orig_b
9
+ combined_size = orig_a.size + orig_b.size - 1
10
+ left_pad_a = ( combined_size - orig_a.size + 1)/2
11
+ mod_a = NArray.float(combined_size)
12
+ mod_a[left_pad_a] = orig_a
13
+
14
+ mod_b = NArray.float(combined_size)
15
+ left_select_b = ( orig_b.size + 1 )/2
16
+ right_select_b = orig_b.size - left_select_b
17
+ mod_b[0] = orig_b[(0...left_select_b)].reverse
18
+ mod_b[-right_select_b] = orig_b[-right_select_b..-1].reverse
19
+
20
+ afft = FFTW3.fft(mod_a)
21
+ bfft = FFTW3.fft(mod_b)
22
+ cfft = afft * bfft
23
+
24
+ (FFTW3.ifft( cfft )/combined_size).real[left_pad_a...(left_pad_a+ orig_a.size - orig_b.size + 1)]
25
+ end
26
+ end
27
+
28
+ class Convolver2DBenchmark
29
+ attr_reader :image, :kernel
30
+
31
+ def initialize
32
+ # These show Convolver.convolve as 3x faster than FFTW3
33
+ # @image = NArray.float(256 * 256).random
34
+ # @kernel = NArray.float(16 * 16).random
35
+
36
+ # These are roughly even (10% advantage to FFTW3)
37
+ # @image = NArray.float(256 * 256).random
38
+ # @kernel = NArray.float(32 * 32).random
39
+
40
+ # These show FFTW3 as 4x faster than Convolver.convolve
41
+ # @image = NArray.float(256 * 256).random
42
+ # @kernel = NArray.float(64 * 64).random
43
+
44
+ # These show Convolver.convolve as 200x faster than FFTW3
45
+ # @image = NArray.float(50 * 64 * 64).random
46
+ # @kernel = NArray.float(50 * 64 * 64).random
47
+
48
+ # These show FFTW3 as 2x faster than Convolver.convolve
49
+ # @image = NArray.float(128 * 128).random
50
+ # @kernel = NArray.float(64 * 64).random
51
+
52
+ # These show FFTW3 and Convolver.convolve roughly equal
53
+ # @image = NArray.float(80 * 80).random
54
+ # @kernel = NArray.float(64 * 64).random
55
+
56
+ # These show FFTW3 as 2x faster than Convolver.convolve
57
+ # @image = NArray.float(2 * 80 * 80).random
58
+ # @kernel = NArray.float(2 * 64 * 64).random
59
+
60
+ # These are roughly even - increasing size of image favours FFTW3
61
+ @image = NArray.float(2000 + 80 * 80).random
62
+ @kernel = NArray.float(80 * 80).random
63
+ end
64
+ end
65
+
66
+ Benchmark.bm do |x|
67
+ source = Convolver2DBenchmark.new
68
+ x.report('convolver') { 100.times { Convolver.convolve( source.image, source.kernel ) } }
69
+ x.report('fftw3') { 100.times { FFTW3Convolver.convolve( source.image, source.kernel ) } }
70
+ end
@@ -0,0 +1,18 @@
1
+ require 'convolver'
2
+ require 'narray'
3
+ require 'benchmark'
4
+
5
+ class ConvolverNNLayerBenchmark
6
+ attr_reader :input, :weights, :thresholds
7
+
8
+ def initialize
9
+ @input = NArray.float(1024).random
10
+ @weights = NArray.float(1024,256).random
11
+ @thresholds = NArray.float(256).random
12
+ end
13
+ end
14
+
15
+ Benchmark.bm do |x|
16
+ source = ConvolverNNLayerBenchmark.new
17
+ x.report('kilo') { 1000.times { Convolver.nn_run_layer( source.input, source.weights, source.thresholds ) } }
18
+ end
@@ -63,9 +63,9 @@ inline void calc_co_increment( int rank, int *outer_shape, int *inner_shape, int
63
63
 
64
64
  ////////////////////////////////////////////////////////////////////////////////////////////////////
65
65
  //
66
- // Convolve method 4.
66
+ // Convolve
67
67
  //
68
- // Benchmark: 640x480 image, 8x8 kernel, 1000 iterations. 11.54 seconds. Score: 63
68
+ // Benchmark: 640x480 image, 8x8 kernel, 1000 iterations. 12.3 seconds.
69
69
  //
70
70
 
71
71
  void convolve_raw(
@@ -128,12 +128,67 @@ void convolve_raw(
128
128
  return;
129
129
  }
130
130
 
131
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
132
+ //
133
+ // Neural net
134
+ //
135
+ // Benchmark: 1024 inputs, 256 outputs. 1000 iterations. 0.56 seconds
136
+ //
137
+ //
138
+
139
+ void nn_run_layer_raw( int in_size, int out_size,
140
+ float *in_ptr, float *weights, float *thresholds, float *out_ptr ) {
141
+ int i, j, in_aligned_size, out_aligned_size, offset;
142
+ __m128 simd_x, simd_y, simd_t;
143
+
144
+ in_aligned_size = 4 * ( in_size/4 );
145
+ out_aligned_size = 4 * ( out_size/4 );
146
+
147
+ // Calculate activation
148
+ for ( i = 0; i < out_size; i++ ) {
149
+
150
+ float t = 0.0;
151
+ simd_t = _mm_setzero_ps();
152
+ offset = i * in_size;
153
+
154
+ // Use SIMD for all the aligned values in groups of 4
155
+ for ( j = 0; j < in_aligned_size; j +=4 ) {
156
+ simd_x = _mm_load_ps( in_ptr + j );
157
+ // Weights might not align to 16 bytes due to size of layers
158
+ simd_y = _mm_loadu_ps( weights + (offset + j) );
159
+ simd_x = _mm_mul_ps( simd_x, simd_y );
160
+ simd_t = _mm_add_ps( simd_x, simd_t );
161
+ }
162
+
163
+ // Complete any remaining 1,2 or 3 items one at a time
164
+ for ( j = in_aligned_size; j < in_size; j++ ) {
165
+ t += in_ptr[ j ] * weights[ offset + j ];
166
+ }
167
+
168
+ out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
169
+ }
170
+
171
+ for ( i = 0; i < out_size; i++ ) {
172
+ out_ptr[i] -= thresholds[i];
173
+ if ( out_ptr[i] < 0.0 ) { out_ptr[i] = 0.0; }
174
+ }
175
+
176
+ return;
177
+ }
131
178
 
132
179
  ////////////////////////////////////////////////////////////////////////////////////////////////////
133
180
 
134
181
  // To hold the module object
135
182
  VALUE Convolver = Qnil;
136
183
 
184
+ /* @overload convolve( signal, kernel )
185
+ * Calculates convolution of an array of floats representing a signal, with a second array representing
186
+ * a kernel. The two parameters must have the same rank. The output has same rank, its size in each dimension d is given by
187
+ * signal.shape[d] - kernel.shape[d] + 1
188
+ * @param [NArray] signal must be same size or larger than kernel in each dimension
189
+ * @param [NArray] kernel must be same size or smaller than signal in each dimension
190
+ * @return [NArray] result of convolving signal with kernel
191
+ */
137
192
  static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
138
193
  struct NARRAY *na_a, *na_b, *na_c;
139
194
  volatile VALUE val_a, val_b, val_c;
@@ -146,12 +201,8 @@ static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
146
201
  val_b = na_cast_object(b, NA_SFLOAT);
147
202
  GetNArray( val_b, na_b );
148
203
 
149
- if ( na_a->rank < na_b->rank ) {
150
- rb_raise( rb_eArgError, "narray b must have equal or lower rank than narray a" );
151
- }
152
-
153
- if ( na_a->rank < na_b->rank ) {
154
- rb_raise( rb_eArgError, "narray a must have equal rank to narray b (temporary restriction)" );
204
+ if ( na_a->rank != na_b->rank ) {
205
+ rb_raise( rb_eArgError, "narray a must have equal rank to narray b (a rack %d, b rank %d)", na_a->rank, na_b->rank );
155
206
  }
156
207
 
157
208
  if ( na_a->rank > LARGEST_RANK ) {
@@ -178,7 +229,62 @@ static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
178
229
  return val_c;
179
230
  }
180
231
 
232
+ /* @overload nn_run_layer( inputs, weights, thresholds )
233
+ * Calculates activations of a fully-connected neural network layer. The transfer function after
234
+ * summing weights and applying threshold is a "ReLU", equivalent to
235
+ * y = x < 0.0 ? 0.0 : x
236
+ * this is less sophisticated than many neural net architectures, but is fast to calculate and to
237
+ * train.
238
+ * @param [NArray] inputs must be rank 1 array of floats
239
+ * @param [NArray] weights must be rank 2 array of floats, with first rank size of inputs, and second rank equal to number of outputs desired
240
+ * @param [NArray] thresholds must be rank 1 array of floats, size equal to number of outputs desired
241
+ * @return [NArray] neuron activations
242
+ */
243
+ static VALUE narray_nn_run_single_layer( VALUE self, VALUE inputs, VALUE weights, VALUE thresholds ) {
244
+ struct NARRAY *na_inputs, *na_weights, *na_thresholds, *na_outputs;
245
+ volatile VALUE val_inputs, val_weights, val_thresholds, val_outputs;
246
+ int input_size, output_size;
247
+ int output_shape[1];
248
+
249
+ val_inputs = na_cast_object(inputs, NA_SFLOAT);
250
+ GetNArray( val_inputs, na_inputs );
251
+ if ( na_inputs->rank != 1 ) {
252
+ rb_raise( rb_eArgError, "input must be array of rank 1" );
253
+ }
254
+ input_size = na_inputs->total;
255
+
256
+ val_weights = na_cast_object(weights, NA_SFLOAT);
257
+ GetNArray( val_weights, na_weights );
258
+ if ( na_weights->rank != 2 ) {
259
+ rb_raise( rb_eArgError, "weights must be array of rank 2" );
260
+ }
261
+ if ( na_weights->shape[0] != input_size ) {
262
+ rb_raise( rb_eArgError, "weights shape mismatch, expected %d across, got %d", input_size, na_weights->shape[0] );
263
+ }
264
+ output_size = na_weights->shape[1];
265
+
266
+ val_thresholds = na_cast_object(thresholds, NA_SFLOAT);
267
+ GetNArray( val_thresholds, na_thresholds );
268
+ if ( na_thresholds->rank != 1 ) {
269
+ rb_raise( rb_eArgError, "thresholds must be array of rank 1" );
270
+ }
271
+ if ( na_thresholds->shape[0] != output_size ) {
272
+ rb_raise( rb_eArgError, "thresholds expected size %d, but got %d", output_size, na_thresholds->shape[0] );
273
+ }
274
+
275
+ output_shape[0] = output_size;
276
+ val_outputs = na_make_object( NA_SFLOAT, 1, output_shape, CLASS_OF( val_inputs ) );
277
+ GetNArray( val_outputs, na_outputs );
278
+
279
+ nn_run_layer_raw( input_size, output_size, (float*) na_inputs->ptr, (float*) na_weights->ptr,
280
+ (float*) na_thresholds->ptr, (float*) na_outputs->ptr );
281
+
282
+ return val_outputs;
283
+ }
284
+
285
+
181
286
  void Init_convolver() {
182
287
  Convolver = rb_define_module( "Convolver" );
183
288
  rb_define_singleton_method( Convolver, "convolve", narray_convolve, 2 );
289
+ rb_define_singleton_method( Convolver, "nn_run_layer", narray_nn_run_single_layer, 3 );
184
290
  }
data/lib/convolver.rb CHANGED
@@ -3,9 +3,5 @@ require "convolver/convolver"
3
3
  require "convolver/version"
4
4
 
5
5
  module Convolver
6
- # Calculates float convolution of an array with a kernel
7
- # @param [NArray] a outer array
8
- # @param [NArray] b kernel
9
- # @return [NArray] result of convolving a with b
10
- # @!parse def self.convolve(a,b); end
6
+
11
7
  end
@@ -1,3 +1,3 @@
1
1
  module Convolver
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -81,4 +81,26 @@ describe Convolver do
81
81
  ]
82
82
  end
83
83
  end
84
+
85
+ describe "#nn_run_layer" do
86
+ it "should calculate basic layer rules" do
87
+ inputs = NArray[ 1.0 ]
88
+ weights = NArray[ [ 1.0 ] ]
89
+ thresholds = NArray[ 0.0 ]
90
+ outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
91
+ outputs.should be_narray_like NArray[ 1.0 ]
92
+
93
+ inputs = NArray[ 0.5, -0.5 ]
94
+ weights = NArray[ [ 1.0, 2.0 ], [ 2.0, 1.0 ] ]
95
+ thresholds = NArray[ 0.0, 0.0 ]
96
+ outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
97
+ outputs.should be_narray_like NArray[ 0.0, 0.5 ]
98
+
99
+ inputs = NArray[ 0.3, -0.4, 0.8, -0.7 ]
100
+ weights = NArray[ [ 1.0, 0.25, 0.5, -0.5 ], [ -1.0, -0.25, -0.5, 0.5 ] ]
101
+ thresholds = NArray[ 0.0, 0.0 ]
102
+ outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
103
+ outputs.should be_narray_like NArray[ 0.95, 0.0 ]
104
+ end
105
+ end
84
106
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: convolver
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Neil Slater
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-10-20 00:00:00.000000000 Z
11
+ date: 2013-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: narray
@@ -109,6 +109,8 @@ files:
109
109
  - README.md
110
110
  - Rakefile
111
111
  - benchmarks/convolve_benchmark.rb
112
+ - benchmarks/convolver_vs_fftw3.rb
113
+ - benchmarks/nn_layer_benchmark.rb
112
114
  - convolver.gemspec
113
115
  - ext/convolver/convolver.c
114
116
  - ext/convolver/extconf.rb