convolver 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 254fbf54caf7407108357aee612ab3c08af4ea4c
4
- data.tar.gz: f4212255722174976e92c734b76ce956365ad936
3
+ metadata.gz: e20ffb64db726dac28c0b1ab5643355a67642fad
4
+ data.tar.gz: 278c29c4e851edf295c6f0c82043ecab98c42d6f
5
5
  SHA512:
6
- metadata.gz: 55d4082e492a48d09316cf4f6c0f44ad1dd5bae756fd9ef3d8890c592ed0c863029678eded0a3783559e7acfdb8ca9198ce3d4938c74174220f5452f35524d0d
7
- data.tar.gz: 3a9ee78cb91ca935c83f0b56e93d58799871e97d5f8174ff9ba4e86ec798469dad6f3167482458c24f37bdf47aed4b6b5f5aaf6e658f9329e63aab16daa5743d
6
+ metadata.gz: 3d8aefb6c27bf1777bea0d07f83c096a106278caf66b05519764705cfc783ebe614b71fd7f31adb43ffae7365daf18d8f4cb16cb18d5b83fea231467d571e75f
7
+ data.tar.gz: 0cf6c1c0fc18781580d512f56b6199f2918d476284f703fc4d2d9409e13787a7d9da22a9440714d04d114b472d5dfdcb55824b263ea3717071f61c4e4aa3714c
data/README.md CHANGED
@@ -2,10 +2,18 @@
2
2
 
3
3
  [![Build Status](https://travis-ci.org/neilslater/convolver.png?branch=master)](http://travis-ci.org/neilslater/convolver)
4
4
 
5
- Adds an "inner" convolve operation to NArray floats. It is around 250 times faster than equivalents
5
+ Adds a convolve operation to NArray floats. It is around 250 times faster than equivalents
6
6
  in pure Ruby.
7
7
 
8
8
  Note that convolves based on FFTW3 could well be faster still for large arrays with large kernels.
9
+ Benchmark tests suggest that the kernel needs to be a few hundred items, and be significantly smaller
10
+ than the signal before FFTW3 offers a significant advantage.
11
+
12
+ ## Planned features
13
+
14
+ The *convolver* gem will eventually contain a basic kit for creating, training and running convolutional
15
+ neural networks. As a side effect of this plan, it will also contain efficient code for
16
+ calculating signal convolutions for other types of analysis.
9
17
 
10
18
  ## Installation
11
19
 
@@ -32,8 +40,10 @@ Basic convolution:
32
40
 
33
41
  * Convolver only works on single-precision floats internally. It will cast NArray types to this, if
34
42
  possible, prior to calculating.
43
+
35
44
  * The convolution is an "inner" one. The output is smaller than the input, each dimension is reduced
36
45
  by 1 less than the width of the kernel in the same dimension.
46
+
37
47
  * Convolver expects input a and kernel b to have the same rank, and for the kernel to be same size
38
48
  or smaller in all dimensions as the input.
39
49
 
@@ -0,0 +1,70 @@
1
+ require 'convolver'
2
+ require 'narray'
3
+ require 'fftw3'
4
+ require 'benchmark'
5
+
6
+ # In Ruby for now, which is slower, but at least gets us ballpark figures (99% of the work is in the C)
7
+ module FFTW3Convolver
8
+ def self.convolve orig_a, orig_b
9
+ combined_size = orig_a.size + orig_b.size - 1
10
+ left_pad_a = ( combined_size - orig_a.size + 1)/2
11
+ mod_a = NArray.float(combined_size)
12
+ mod_a[left_pad_a] = orig_a
13
+
14
+ mod_b = NArray.float(combined_size)
15
+ left_select_b = ( orig_b.size + 1 )/2
16
+ right_select_b = orig_b.size - left_select_b
17
+ mod_b[0] = orig_b[(0...left_select_b)].reverse
18
+ mod_b[-right_select_b] = orig_b[-right_select_b..-1].reverse
19
+
20
+ afft = FFTW3.fft(mod_a)
21
+ bfft = FFTW3.fft(mod_b)
22
+ cfft = afft * bfft
23
+
24
+ (FFTW3.ifft( cfft )/combined_size).real[left_pad_a...(left_pad_a+ orig_a.size - orig_b.size + 1)]
25
+ end
26
+ end
27
+
28
+ class Convolver2DBenchmark
29
+ attr_reader :image, :kernel
30
+
31
+ def initialize
32
+ # These show Convolver.convolve as 3x faster than FFTW3
33
+ # @image = NArray.float(256 * 256).random
34
+ # @kernel = NArray.float(16 * 16).random
35
+
36
+ # These are roughly even (10% advantage to FFTW3)
37
+ # @image = NArray.float(256 * 256).random
38
+ # @kernel = NArray.float(32 * 32).random
39
+
40
+ # These show FFTW3 as 4x faster than Convolver.convolve
41
+ # @image = NArray.float(256 * 256).random
42
+ # @kernel = NArray.float(64 * 64).random
43
+
44
+ # These show Convolver.convolve as 200x faster than FFTW3
45
+ # @image = NArray.float(50 * 64 * 64).random
46
+ # @kernel = NArray.float(50 * 64 * 64).random
47
+
48
+ # These show FFTW3 as 2x faster than Convolver.convolve
49
+ # @image = NArray.float(128 * 128).random
50
+ # @kernel = NArray.float(64 * 64).random
51
+
52
+ # These show FFTW3 and Convolver.convolve roughly equal
53
+ # @image = NArray.float(80 * 80).random
54
+ # @kernel = NArray.float(64 * 64).random
55
+
56
+ # These show FFTW3 as 2x faster than Convolver.convolve
57
+ # @image = NArray.float(2 * 80 * 80).random
58
+ # @kernel = NArray.float(2 * 64 * 64).random
59
+
60
+ # These are roughly even - increasing size of image favours FFTW3
61
+ @image = NArray.float(2000 + 80 * 80).random
62
+ @kernel = NArray.float(80 * 80).random
63
+ end
64
+ end
65
+
66
+ Benchmark.bm do |x|
67
+ source = Convolver2DBenchmark.new
68
+ x.report('convolver') { 100.times { Convolver.convolve( source.image, source.kernel ) } }
69
+ x.report('fftw3') { 100.times { FFTW3Convolver.convolve( source.image, source.kernel ) } }
70
+ end
@@ -0,0 +1,18 @@
1
+ require 'convolver'
2
+ require 'narray'
3
+ require 'benchmark'
4
+
5
+ class ConvolverNNLayerBenchmark
6
+ attr_reader :input, :weights, :thresholds
7
+
8
+ def initialize
9
+ @input = NArray.float(1024).random
10
+ @weights = NArray.float(1024,256).random
11
+ @thresholds = NArray.float(256).random
12
+ end
13
+ end
14
+
15
+ Benchmark.bm do |x|
16
+ source = ConvolverNNLayerBenchmark.new
17
+ x.report('kilo') { 1000.times { Convolver.nn_run_layer( source.input, source.weights, source.thresholds ) } }
18
+ end
@@ -63,9 +63,9 @@ inline void calc_co_increment( int rank, int *outer_shape, int *inner_shape, int
63
63
 
64
64
  ////////////////////////////////////////////////////////////////////////////////////////////////////
65
65
  //
66
- // Convolve method 4.
66
+ // Convolve
67
67
  //
68
- // Benchmark: 640x480 image, 8x8 kernel, 1000 iterations. 11.54 seconds. Score: 63
68
+ // Benchmark: 640x480 image, 8x8 kernel, 1000 iterations. 12.3 seconds.
69
69
  //
70
70
 
71
71
  void convolve_raw(
@@ -128,12 +128,67 @@ void convolve_raw(
128
128
  return;
129
129
  }
130
130
 
131
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
132
+ //
133
+ // Neural net
134
+ //
135
+ // Benchmark: 1024 inputs, 256 outputs. 1000 iterations. 0.56 seconds
136
+ //
137
+ //
138
+
139
+ void nn_run_layer_raw( int in_size, int out_size,
140
+ float *in_ptr, float *weights, float *thresholds, float *out_ptr ) {
141
+ int i, j, in_aligned_size, out_aligned_size, offset;
142
+ __m128 simd_x, simd_y, simd_t;
143
+
144
+ in_aligned_size = 4 * ( in_size/4 );
145
+ out_aligned_size = 4 * ( out_size/4 );
146
+
147
+ // Calculate activation
148
+ for ( i = 0; i < out_size; i++ ) {
149
+
150
+ float t = 0.0;
151
+ simd_t = _mm_setzero_ps();
152
+ offset = i * in_size;
153
+
154
+ // Use SIMD for all the aligned values in groups of 4
155
+ for ( j = 0; j < in_aligned_size; j +=4 ) {
156
+ simd_x = _mm_load_ps( in_ptr + j );
157
+ // Weights might not align to 16 bytes due to size of layers
158
+ simd_y = _mm_loadu_ps( weights + (offset + j) );
159
+ simd_x = _mm_mul_ps( simd_x, simd_y );
160
+ simd_t = _mm_add_ps( simd_x, simd_t );
161
+ }
162
+
163
+ // Complete any remaining 1,2 or 3 items one at a time
164
+ for ( j = in_aligned_size; j < in_size; j++ ) {
165
+ t += in_ptr[ j ] * weights[ offset + j ];
166
+ }
167
+
168
+ out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
169
+ }
170
+
171
+ for ( i = 0; i < out_size; i++ ) {
172
+ out_ptr[i] -= thresholds[i];
173
+ if ( out_ptr[i] < 0.0 ) { out_ptr[i] = 0.0; }
174
+ }
175
+
176
+ return;
177
+ }
131
178
 
132
179
  ////////////////////////////////////////////////////////////////////////////////////////////////////
133
180
 
134
181
  // To hold the module object
135
182
  VALUE Convolver = Qnil;
136
183
 
184
+ /* @overload convolve( signal, kernel )
185
+ * Calculates convolution of an array of floats representing a signal, with a second array representing
186
+ * a kernel. The two parameters must have the same rank. The output has same rank, its size in each dimension d is given by
187
+ * signal.shape[d] - kernel.shape[d] + 1
188
+ * @param [NArray] signal must be same size or larger than kernel in each dimension
189
+ * @param [NArray] kernel must be same size or smaller than signal in each dimension
190
+ * @return [NArray] result of convolving signal with kernel
191
+ */
137
192
  static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
138
193
  struct NARRAY *na_a, *na_b, *na_c;
139
194
  volatile VALUE val_a, val_b, val_c;
@@ -146,12 +201,8 @@ static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
146
201
  val_b = na_cast_object(b, NA_SFLOAT);
147
202
  GetNArray( val_b, na_b );
148
203
 
149
- if ( na_a->rank < na_b->rank ) {
150
- rb_raise( rb_eArgError, "narray b must have equal or lower rank than narray a" );
151
- }
152
-
153
- if ( na_a->rank < na_b->rank ) {
154
- rb_raise( rb_eArgError, "narray a must have equal rank to narray b (temporary restriction)" );
204
+ if ( na_a->rank != na_b->rank ) {
205
+ rb_raise( rb_eArgError, "narray a must have equal rank to narray b (a rack %d, b rank %d)", na_a->rank, na_b->rank );
155
206
  }
156
207
 
157
208
  if ( na_a->rank > LARGEST_RANK ) {
@@ -178,7 +229,62 @@ static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
178
229
  return val_c;
179
230
  }
180
231
 
232
+ /* @overload nn_run_layer( inputs, weights, thresholds )
233
+ * Calculates activations of a fully-connected neural network layer. The transfer function after
234
+ * summing weights and applying threshold is a "ReLU", equivalent to
235
+ * y = x < 0.0 ? 0.0 : x
236
+ * this is less sophisticated than many neural net architectures, but is fast to calculate and to
237
+ * train.
238
+ * @param [NArray] inputs must be rank 1 array of floats
239
+ * @param [NArray] weights must be rank 2 array of floats, with first rank size of inputs, and second rank equal to number of outputs desired
240
+ * @param [NArray] thresholds must be rank 1 array of floats, size equal to number of outputs desired
241
+ * @return [NArray] neuron activations
242
+ */
243
+ static VALUE narray_nn_run_single_layer( VALUE self, VALUE inputs, VALUE weights, VALUE thresholds ) {
244
+ struct NARRAY *na_inputs, *na_weights, *na_thresholds, *na_outputs;
245
+ volatile VALUE val_inputs, val_weights, val_thresholds, val_outputs;
246
+ int input_size, output_size;
247
+ int output_shape[1];
248
+
249
+ val_inputs = na_cast_object(inputs, NA_SFLOAT);
250
+ GetNArray( val_inputs, na_inputs );
251
+ if ( na_inputs->rank != 1 ) {
252
+ rb_raise( rb_eArgError, "input must be array of rank 1" );
253
+ }
254
+ input_size = na_inputs->total;
255
+
256
+ val_weights = na_cast_object(weights, NA_SFLOAT);
257
+ GetNArray( val_weights, na_weights );
258
+ if ( na_weights->rank != 2 ) {
259
+ rb_raise( rb_eArgError, "weights must be array of rank 2" );
260
+ }
261
+ if ( na_weights->shape[0] != input_size ) {
262
+ rb_raise( rb_eArgError, "weights shape mismatch, expected %d across, got %d", input_size, na_weights->shape[0] );
263
+ }
264
+ output_size = na_weights->shape[1];
265
+
266
+ val_thresholds = na_cast_object(thresholds, NA_SFLOAT);
267
+ GetNArray( val_thresholds, na_thresholds );
268
+ if ( na_thresholds->rank != 1 ) {
269
+ rb_raise( rb_eArgError, "thresholds must be array of rank 1" );
270
+ }
271
+ if ( na_thresholds->shape[0] != output_size ) {
272
+ rb_raise( rb_eArgError, "thresholds expected size %d, but got %d", output_size, na_thresholds->shape[0] );
273
+ }
274
+
275
+ output_shape[0] = output_size;
276
+ val_outputs = na_make_object( NA_SFLOAT, 1, output_shape, CLASS_OF( val_inputs ) );
277
+ GetNArray( val_outputs, na_outputs );
278
+
279
+ nn_run_layer_raw( input_size, output_size, (float*) na_inputs->ptr, (float*) na_weights->ptr,
280
+ (float*) na_thresholds->ptr, (float*) na_outputs->ptr );
281
+
282
+ return val_outputs;
283
+ }
284
+
285
+
181
286
  void Init_convolver() {
182
287
  Convolver = rb_define_module( "Convolver" );
183
288
  rb_define_singleton_method( Convolver, "convolve", narray_convolve, 2 );
289
+ rb_define_singleton_method( Convolver, "nn_run_layer", narray_nn_run_single_layer, 3 );
184
290
  }
data/lib/convolver.rb CHANGED
@@ -3,9 +3,5 @@ require "convolver/convolver"
3
3
  require "convolver/version"
4
4
 
5
5
  module Convolver
6
- # Calculates float convolution of an array with a kernel
7
- # @param [NArray] a outer array
8
- # @param [NArray] b kernel
9
- # @return [NArray] result of convolving a with b
10
- # @!parse def self.convolve(a,b); end
6
+
11
7
  end
@@ -1,3 +1,3 @@
1
1
  module Convolver
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -81,4 +81,26 @@ describe Convolver do
81
81
  ]
82
82
  end
83
83
  end
84
+
85
+ describe "#nn_run_layer" do
86
+ it "should calculate basic layer rules" do
87
+ inputs = NArray[ 1.0 ]
88
+ weights = NArray[ [ 1.0 ] ]
89
+ thresholds = NArray[ 0.0 ]
90
+ outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
91
+ outputs.should be_narray_like NArray[ 1.0 ]
92
+
93
+ inputs = NArray[ 0.5, -0.5 ]
94
+ weights = NArray[ [ 1.0, 2.0 ], [ 2.0, 1.0 ] ]
95
+ thresholds = NArray[ 0.0, 0.0 ]
96
+ outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
97
+ outputs.should be_narray_like NArray[ 0.0, 0.5 ]
98
+
99
+ inputs = NArray[ 0.3, -0.4, 0.8, -0.7 ]
100
+ weights = NArray[ [ 1.0, 0.25, 0.5, -0.5 ], [ -1.0, -0.25, -0.5, 0.5 ] ]
101
+ thresholds = NArray[ 0.0, 0.0 ]
102
+ outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
103
+ outputs.should be_narray_like NArray[ 0.95, 0.0 ]
104
+ end
105
+ end
84
106
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: convolver
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Neil Slater
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-10-20 00:00:00.000000000 Z
11
+ date: 2013-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: narray
@@ -109,6 +109,8 @@ files:
109
109
  - README.md
110
110
  - Rakefile
111
111
  - benchmarks/convolve_benchmark.rb
112
+ - benchmarks/convolver_vs_fftw3.rb
113
+ - benchmarks/nn_layer_benchmark.rb
112
114
  - convolver.gemspec
113
115
  - ext/convolver/convolver.c
114
116
  - ext/convolver/extconf.rb