convolver 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/benchmarks/convolver_vs_fftw3.rb +70 -0
- data/benchmarks/nn_layer_benchmark.rb +18 -0
- data/ext/convolver/convolver.c +114 -8
- data/lib/convolver.rb +1 -5
- data/lib/convolver/version.rb +1 -1
- data/spec/convolver_spec.rb +22 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e20ffb64db726dac28c0b1ab5643355a67642fad
|
4
|
+
data.tar.gz: 278c29c4e851edf295c6f0c82043ecab98c42d6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3d8aefb6c27bf1777bea0d07f83c096a106278caf66b05519764705cfc783ebe614b71fd7f31adb43ffae7365daf18d8f4cb16cb18d5b83fea231467d571e75f
|
7
|
+
data.tar.gz: 0cf6c1c0fc18781580d512f56b6199f2918d476284f703fc4d2d9409e13787a7d9da22a9440714d04d114b472d5dfdcb55824b263ea3717071f61c4e4aa3714c
|
data/README.md
CHANGED
@@ -2,10 +2,18 @@
|
|
2
2
|
|
3
3
|
[![Build Status](https://travis-ci.org/neilslater/convolver.png?branch=master)](http://travis-ci.org/neilslater/convolver)
|
4
4
|
|
5
|
-
Adds
|
5
|
+
Adds a convolve operation to NArray floats. It is around 250 times faster than equivalents
|
6
6
|
in pure Ruby.
|
7
7
|
|
8
8
|
Note that convolves based on FFTW3 could well be faster still for large arrays with large kernels.
|
9
|
+
Benchmark tests suggest that the kernel needs to be a few hundred items, and be significantly smaller
|
10
|
+
than the signal before FFTW3 offers a significant advantage.
|
11
|
+
|
12
|
+
## Planned features
|
13
|
+
|
14
|
+
The *convolver* gem will eventually contain a basic kit for creating, training and running convolutional
|
15
|
+
neural networks. As a side effect of this plan, it will also contain efficient code for
|
16
|
+
calculating signal convolutions for other types of analysis.
|
9
17
|
|
10
18
|
## Installation
|
11
19
|
|
@@ -32,8 +40,10 @@ Basic convolution:
|
|
32
40
|
|
33
41
|
* Convolver only works on single-precision floats internally. It will cast NArray types to this, if
|
34
42
|
possible, prior to calculating.
|
43
|
+
|
35
44
|
* The convolution is an "inner" one. The output is smaller than the input, each dimension is reduced
|
36
45
|
by 1 less than the width of the kernel in the same dimension.
|
46
|
+
|
37
47
|
* Convolver expects input a and kernel b to have the same rank, and for the kernel to be same size
|
38
48
|
or smaller in all dimensions as the input.
|
39
49
|
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'convolver'
|
2
|
+
require 'narray'
|
3
|
+
require 'fftw3'
|
4
|
+
require 'benchmark'
|
5
|
+
|
6
|
+
# In Ruby for now, which is slower, but at least gets us ballpark figures (99% of the work is in the C)
|
7
|
+
module FFTW3Convolver
|
8
|
+
def self.convolve orig_a, orig_b
|
9
|
+
combined_size = orig_a.size + orig_b.size - 1
|
10
|
+
left_pad_a = ( combined_size - orig_a.size + 1)/2
|
11
|
+
mod_a = NArray.float(combined_size)
|
12
|
+
mod_a[left_pad_a] = orig_a
|
13
|
+
|
14
|
+
mod_b = NArray.float(combined_size)
|
15
|
+
left_select_b = ( orig_b.size + 1 )/2
|
16
|
+
right_select_b = orig_b.size - left_select_b
|
17
|
+
mod_b[0] = orig_b[(0...left_select_b)].reverse
|
18
|
+
mod_b[-right_select_b] = orig_b[-right_select_b..-1].reverse
|
19
|
+
|
20
|
+
afft = FFTW3.fft(mod_a)
|
21
|
+
bfft = FFTW3.fft(mod_b)
|
22
|
+
cfft = afft * bfft
|
23
|
+
|
24
|
+
(FFTW3.ifft( cfft )/combined_size).real[left_pad_a...(left_pad_a+ orig_a.size - orig_b.size + 1)]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Convolver2DBenchmark
|
29
|
+
attr_reader :image, :kernel
|
30
|
+
|
31
|
+
def initialize
|
32
|
+
# These show Convolver.convolve as 3x faster than FFTW3
|
33
|
+
# @image = NArray.float(256 * 256).random
|
34
|
+
# @kernel = NArray.float(16 * 16).random
|
35
|
+
|
36
|
+
# These are roughly even (10% advantage to FFTW3)
|
37
|
+
# @image = NArray.float(256 * 256).random
|
38
|
+
# @kernel = NArray.float(32 * 32).random
|
39
|
+
|
40
|
+
# These show FFTW3 as 4x faster than Convolver.convolve
|
41
|
+
# @image = NArray.float(256 * 256).random
|
42
|
+
# @kernel = NArray.float(64 * 64).random
|
43
|
+
|
44
|
+
# These show Convolver.convolve as 200x faster than FFTW3
|
45
|
+
# @image = NArray.float(50 * 64 * 64).random
|
46
|
+
# @kernel = NArray.float(50 * 64 * 64).random
|
47
|
+
|
48
|
+
# These show FFTW3 as 2x faster than Convolver.convolve
|
49
|
+
# @image = NArray.float(128 * 128).random
|
50
|
+
# @kernel = NArray.float(64 * 64).random
|
51
|
+
|
52
|
+
# These show FFTW3 and Convolver.convolve roughly equal
|
53
|
+
# @image = NArray.float(80 * 80).random
|
54
|
+
# @kernel = NArray.float(64 * 64).random
|
55
|
+
|
56
|
+
# These show FFTW3 as 2x faster than Convolver.convolve
|
57
|
+
# @image = NArray.float(2 * 80 * 80).random
|
58
|
+
# @kernel = NArray.float(2 * 64 * 64).random
|
59
|
+
|
60
|
+
# These are roughly even - increasing size of image favours FFTW3
|
61
|
+
@image = NArray.float(2000 + 80 * 80).random
|
62
|
+
@kernel = NArray.float(80 * 80).random
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
Benchmark.bm do |x|
|
67
|
+
source = Convolver2DBenchmark.new
|
68
|
+
x.report('convolver') { 100.times { Convolver.convolve( source.image, source.kernel ) } }
|
69
|
+
x.report('fftw3') { 100.times { FFTW3Convolver.convolve( source.image, source.kernel ) } }
|
70
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'convolver'
|
2
|
+
require 'narray'
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
class ConvolverNNLayerBenchmark
|
6
|
+
attr_reader :input, :weights, :thresholds
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@input = NArray.float(1024).random
|
10
|
+
@weights = NArray.float(1024,256).random
|
11
|
+
@thresholds = NArray.float(256).random
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
Benchmark.bm do |x|
|
16
|
+
source = ConvolverNNLayerBenchmark.new
|
17
|
+
x.report('kilo') { 1000.times { Convolver.nn_run_layer( source.input, source.weights, source.thresholds ) } }
|
18
|
+
end
|
data/ext/convolver/convolver.c
CHANGED
@@ -63,9 +63,9 @@ inline void calc_co_increment( int rank, int *outer_shape, int *inner_shape, int
|
|
63
63
|
|
64
64
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
65
65
|
//
|
66
|
-
// Convolve
|
66
|
+
// Convolve
|
67
67
|
//
|
68
|
-
// Benchmark: 640x480 image, 8x8 kernel, 1000 iterations.
|
68
|
+
// Benchmark: 640x480 image, 8x8 kernel, 1000 iterations. 12.3 seconds.
|
69
69
|
//
|
70
70
|
|
71
71
|
void convolve_raw(
|
@@ -128,12 +128,67 @@ void convolve_raw(
|
|
128
128
|
return;
|
129
129
|
}
|
130
130
|
|
131
|
+
////////////////////////////////////////////////////////////////////////////////////////////////////
|
132
|
+
//
|
133
|
+
// Neural net
|
134
|
+
//
|
135
|
+
// Benchmark: 1024 inputs, 256 outputs. 1000 iterations. 0.56 seconds
|
136
|
+
//
|
137
|
+
//
|
138
|
+
|
139
|
+
void nn_run_layer_raw( int in_size, int out_size,
|
140
|
+
float *in_ptr, float *weights, float *thresholds, float *out_ptr ) {
|
141
|
+
int i, j, in_aligned_size, out_aligned_size, offset;
|
142
|
+
__m128 simd_x, simd_y, simd_t;
|
143
|
+
|
144
|
+
in_aligned_size = 4 * ( in_size/4 );
|
145
|
+
out_aligned_size = 4 * ( out_size/4 );
|
146
|
+
|
147
|
+
// Calculate activation
|
148
|
+
for ( i = 0; i < out_size; i++ ) {
|
149
|
+
|
150
|
+
float t = 0.0;
|
151
|
+
simd_t = _mm_setzero_ps();
|
152
|
+
offset = i * in_size;
|
153
|
+
|
154
|
+
// Use SIMD for all the aligned values in groups of 4
|
155
|
+
for ( j = 0; j < in_aligned_size; j +=4 ) {
|
156
|
+
simd_x = _mm_load_ps( in_ptr + j );
|
157
|
+
// Weights might not align to 16 bytes due to size of layers
|
158
|
+
simd_y = _mm_loadu_ps( weights + (offset + j) );
|
159
|
+
simd_x = _mm_mul_ps( simd_x, simd_y );
|
160
|
+
simd_t = _mm_add_ps( simd_x, simd_t );
|
161
|
+
}
|
162
|
+
|
163
|
+
// Complete any remaining 1,2 or 3 items one at a time
|
164
|
+
for ( j = in_aligned_size; j < in_size; j++ ) {
|
165
|
+
t += in_ptr[ j ] * weights[ offset + j ];
|
166
|
+
}
|
167
|
+
|
168
|
+
out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
|
169
|
+
}
|
170
|
+
|
171
|
+
for ( i = 0; i < out_size; i++ ) {
|
172
|
+
out_ptr[i] -= thresholds[i];
|
173
|
+
if ( out_ptr[i] < 0.0 ) { out_ptr[i] = 0.0; }
|
174
|
+
}
|
175
|
+
|
176
|
+
return;
|
177
|
+
}
|
131
178
|
|
132
179
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
133
180
|
|
134
181
|
// To hold the module object
|
135
182
|
VALUE Convolver = Qnil;
|
136
183
|
|
184
|
+
/* @overload convolve( signal, kernel )
|
185
|
+
* Calculates convolution of an array of floats representing a signal, with a second array representing
|
186
|
+
* a kernel. The two parameters must have the same rank. The output has same rank, its size in each dimension d is given by
|
187
|
+
* signal.shape[d] - kernel.shape[d] + 1
|
188
|
+
* @param [NArray] signal must be same size or larger than kernel in each dimension
|
189
|
+
* @param [NArray] kernel must be same size or smaller than signal in each dimension
|
190
|
+
* @return [NArray] result of convolving signal with kernel
|
191
|
+
*/
|
137
192
|
static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
|
138
193
|
struct NARRAY *na_a, *na_b, *na_c;
|
139
194
|
volatile VALUE val_a, val_b, val_c;
|
@@ -146,12 +201,8 @@ static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
|
|
146
201
|
val_b = na_cast_object(b, NA_SFLOAT);
|
147
202
|
GetNArray( val_b, na_b );
|
148
203
|
|
149
|
-
if ( na_a->rank
|
150
|
-
rb_raise( rb_eArgError, "narray
|
151
|
-
}
|
152
|
-
|
153
|
-
if ( na_a->rank < na_b->rank ) {
|
154
|
-
rb_raise( rb_eArgError, "narray a must have equal rank to narray b (temporary restriction)" );
|
204
|
+
if ( na_a->rank != na_b->rank ) {
|
205
|
+
rb_raise( rb_eArgError, "narray a must have equal rank to narray b (a rack %d, b rank %d)", na_a->rank, na_b->rank );
|
155
206
|
}
|
156
207
|
|
157
208
|
if ( na_a->rank > LARGEST_RANK ) {
|
@@ -178,7 +229,62 @@ static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
|
|
178
229
|
return val_c;
|
179
230
|
}
|
180
231
|
|
232
|
+
/* @overload nn_run_layer( inputs, weights, thresholds )
|
233
|
+
* Calculates activations of a fully-connected neural network layer. The transfer function after
|
234
|
+
* summing weights and applying threshold is a "ReLU", equivalent to
|
235
|
+
* y = x < 0.0 ? 0.0 : x
|
236
|
+
* this is less sophisticated than many neural net architectures, but is fast to calculate and to
|
237
|
+
* train.
|
238
|
+
* @param [NArray] inputs must be rank 1 array of floats
|
239
|
+
* @param [NArray] weights must be rank 2 array of floats, with first rank size of inputs, and second rank equal to number of outputs desired
|
240
|
+
* @param [NArray] thresholds must be rank 1 array of floats, size equal to number of outputs desired
|
241
|
+
* @return [NArray] neuron activations
|
242
|
+
*/
|
243
|
+
static VALUE narray_nn_run_single_layer( VALUE self, VALUE inputs, VALUE weights, VALUE thresholds ) {
|
244
|
+
struct NARRAY *na_inputs, *na_weights, *na_thresholds, *na_outputs;
|
245
|
+
volatile VALUE val_inputs, val_weights, val_thresholds, val_outputs;
|
246
|
+
int input_size, output_size;
|
247
|
+
int output_shape[1];
|
248
|
+
|
249
|
+
val_inputs = na_cast_object(inputs, NA_SFLOAT);
|
250
|
+
GetNArray( val_inputs, na_inputs );
|
251
|
+
if ( na_inputs->rank != 1 ) {
|
252
|
+
rb_raise( rb_eArgError, "input must be array of rank 1" );
|
253
|
+
}
|
254
|
+
input_size = na_inputs->total;
|
255
|
+
|
256
|
+
val_weights = na_cast_object(weights, NA_SFLOAT);
|
257
|
+
GetNArray( val_weights, na_weights );
|
258
|
+
if ( na_weights->rank != 2 ) {
|
259
|
+
rb_raise( rb_eArgError, "weights must be array of rank 2" );
|
260
|
+
}
|
261
|
+
if ( na_weights->shape[0] != input_size ) {
|
262
|
+
rb_raise( rb_eArgError, "weights shape mismatch, expected %d across, got %d", input_size, na_weights->shape[0] );
|
263
|
+
}
|
264
|
+
output_size = na_weights->shape[1];
|
265
|
+
|
266
|
+
val_thresholds = na_cast_object(thresholds, NA_SFLOAT);
|
267
|
+
GetNArray( val_thresholds, na_thresholds );
|
268
|
+
if ( na_thresholds->rank != 1 ) {
|
269
|
+
rb_raise( rb_eArgError, "thresholds must be array of rank 1" );
|
270
|
+
}
|
271
|
+
if ( na_thresholds->shape[0] != output_size ) {
|
272
|
+
rb_raise( rb_eArgError, "thresholds expected size %d, but got %d", output_size, na_thresholds->shape[0] );
|
273
|
+
}
|
274
|
+
|
275
|
+
output_shape[0] = output_size;
|
276
|
+
val_outputs = na_make_object( NA_SFLOAT, 1, output_shape, CLASS_OF( val_inputs ) );
|
277
|
+
GetNArray( val_outputs, na_outputs );
|
278
|
+
|
279
|
+
nn_run_layer_raw( input_size, output_size, (float*) na_inputs->ptr, (float*) na_weights->ptr,
|
280
|
+
(float*) na_thresholds->ptr, (float*) na_outputs->ptr );
|
281
|
+
|
282
|
+
return val_outputs;
|
283
|
+
}
|
284
|
+
|
285
|
+
|
181
286
|
void Init_convolver() {
|
182
287
|
Convolver = rb_define_module( "Convolver" );
|
183
288
|
rb_define_singleton_method( Convolver, "convolve", narray_convolve, 2 );
|
289
|
+
rb_define_singleton_method( Convolver, "nn_run_layer", narray_nn_run_single_layer, 3 );
|
184
290
|
}
|
data/lib/convolver.rb
CHANGED
@@ -3,9 +3,5 @@ require "convolver/convolver"
|
|
3
3
|
require "convolver/version"
|
4
4
|
|
5
5
|
module Convolver
|
6
|
-
|
7
|
-
# @param [NArray] a outer array
|
8
|
-
# @param [NArray] b kernel
|
9
|
-
# @return [NArray] result of convolving a with b
|
10
|
-
# @!parse def self.convolve(a,b); end
|
6
|
+
|
11
7
|
end
|
data/lib/convolver/version.rb
CHANGED
data/spec/convolver_spec.rb
CHANGED
@@ -81,4 +81,26 @@ describe Convolver do
|
|
81
81
|
]
|
82
82
|
end
|
83
83
|
end
|
84
|
+
|
85
|
+
describe "#nn_run_layer" do
|
86
|
+
it "should calculate basic layer rules" do
|
87
|
+
inputs = NArray[ 1.0 ]
|
88
|
+
weights = NArray[ [ 1.0 ] ]
|
89
|
+
thresholds = NArray[ 0.0 ]
|
90
|
+
outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
|
91
|
+
outputs.should be_narray_like NArray[ 1.0 ]
|
92
|
+
|
93
|
+
inputs = NArray[ 0.5, -0.5 ]
|
94
|
+
weights = NArray[ [ 1.0, 2.0 ], [ 2.0, 1.0 ] ]
|
95
|
+
thresholds = NArray[ 0.0, 0.0 ]
|
96
|
+
outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
|
97
|
+
outputs.should be_narray_like NArray[ 0.0, 0.5 ]
|
98
|
+
|
99
|
+
inputs = NArray[ 0.3, -0.4, 0.8, -0.7 ]
|
100
|
+
weights = NArray[ [ 1.0, 0.25, 0.5, -0.5 ], [ -1.0, -0.25, -0.5, 0.5 ] ]
|
101
|
+
thresholds = NArray[ 0.0, 0.0 ]
|
102
|
+
outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
|
103
|
+
outputs.should be_narray_like NArray[ 0.95, 0.0 ]
|
104
|
+
end
|
105
|
+
end
|
84
106
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: convolver
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Neil Slater
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-10-
|
11
|
+
date: 2013-10-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|
@@ -109,6 +109,8 @@ files:
|
|
109
109
|
- README.md
|
110
110
|
- Rakefile
|
111
111
|
- benchmarks/convolve_benchmark.rb
|
112
|
+
- benchmarks/convolver_vs_fftw3.rb
|
113
|
+
- benchmarks/nn_layer_benchmark.rb
|
112
114
|
- convolver.gemspec
|
113
115
|
- ext/convolver/convolver.c
|
114
116
|
- ext/convolver/extconf.rb
|