convolver 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/benchmarks/convolver_vs_fftw3.rb +70 -0
- data/benchmarks/nn_layer_benchmark.rb +18 -0
- data/ext/convolver/convolver.c +114 -8
- data/lib/convolver.rb +1 -5
- data/lib/convolver/version.rb +1 -1
- data/spec/convolver_spec.rb +22 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e20ffb64db726dac28c0b1ab5643355a67642fad
|
4
|
+
data.tar.gz: 278c29c4e851edf295c6f0c82043ecab98c42d6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3d8aefb6c27bf1777bea0d07f83c096a106278caf66b05519764705cfc783ebe614b71fd7f31adb43ffae7365daf18d8f4cb16cb18d5b83fea231467d571e75f
|
7
|
+
data.tar.gz: 0cf6c1c0fc18781580d512f56b6199f2918d476284f703fc4d2d9409e13787a7d9da22a9440714d04d114b472d5dfdcb55824b263ea3717071f61c4e4aa3714c
|
data/README.md
CHANGED
@@ -2,10 +2,18 @@
|
|
2
2
|
|
3
3
|
[](http://travis-ci.org/neilslater/convolver)
|
4
4
|
|
5
|
-
Adds
|
5
|
+
Adds a convolve operation to NArray floats. It is around 250 times faster than equivalents
|
6
6
|
in pure Ruby.
|
7
7
|
|
8
8
|
Note that convolves based on FFTW3 could well be faster still for large arrays with large kernels.
|
9
|
+
Benchmark tests suggest that the kernel needs to be a few hundred items, and be significantly smaller
|
10
|
+
than the signal before FFTW3 offers a significant advantage.
|
11
|
+
|
12
|
+
## Planned features
|
13
|
+
|
14
|
+
The *convolver* gem will eventually contain a basic kit for creating, training and running convolutional
|
15
|
+
neural networks. As a side effect of this plan, it will also contain efficient code for
|
16
|
+
calculating signal convolutions for other types of analysis.
|
9
17
|
|
10
18
|
## Installation
|
11
19
|
|
@@ -32,8 +40,10 @@ Basic convolution:
|
|
32
40
|
|
33
41
|
* Convolver only works on single-precision floats internally. It will cast NArray types to this, if
|
34
42
|
possible, prior to calculating.
|
43
|
+
|
35
44
|
* The convolution is an "inner" one. The output is smaller than the input, each dimension is reduced
|
36
45
|
by 1 less than the width of the kernel in the same dimension.
|
46
|
+
|
37
47
|
* Convolver expects input a and kernel b to have the same rank, and for the kernel to be same size
|
38
48
|
or smaller in all dimensions as the input.
|
39
49
|
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'convolver'
|
2
|
+
require 'narray'
|
3
|
+
require 'fftw3'
|
4
|
+
require 'benchmark'
|
5
|
+
|
6
|
+
# In Ruby for now, which is slower, but at least gets us ballpark figures (99% of the work is in the C)
|
7
|
+
module FFTW3Convolver
|
8
|
+
def self.convolve orig_a, orig_b
|
9
|
+
combined_size = orig_a.size + orig_b.size - 1
|
10
|
+
left_pad_a = ( combined_size - orig_a.size + 1)/2
|
11
|
+
mod_a = NArray.float(combined_size)
|
12
|
+
mod_a[left_pad_a] = orig_a
|
13
|
+
|
14
|
+
mod_b = NArray.float(combined_size)
|
15
|
+
left_select_b = ( orig_b.size + 1 )/2
|
16
|
+
right_select_b = orig_b.size - left_select_b
|
17
|
+
mod_b[0] = orig_b[(0...left_select_b)].reverse
|
18
|
+
mod_b[-right_select_b] = orig_b[-right_select_b..-1].reverse
|
19
|
+
|
20
|
+
afft = FFTW3.fft(mod_a)
|
21
|
+
bfft = FFTW3.fft(mod_b)
|
22
|
+
cfft = afft * bfft
|
23
|
+
|
24
|
+
(FFTW3.ifft( cfft )/combined_size).real[left_pad_a...(left_pad_a+ orig_a.size - orig_b.size + 1)]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Convolver2DBenchmark
|
29
|
+
attr_reader :image, :kernel
|
30
|
+
|
31
|
+
def initialize
|
32
|
+
# These show Convolver.convolve as 3x faster than FFTW3
|
33
|
+
# @image = NArray.float(256 * 256).random
|
34
|
+
# @kernel = NArray.float(16 * 16).random
|
35
|
+
|
36
|
+
# These are roughly even (10% advantage to FFTW3)
|
37
|
+
# @image = NArray.float(256 * 256).random
|
38
|
+
# @kernel = NArray.float(32 * 32).random
|
39
|
+
|
40
|
+
# These show FFTW3 as 4x faster than Convolver.convolve
|
41
|
+
# @image = NArray.float(256 * 256).random
|
42
|
+
# @kernel = NArray.float(64 * 64).random
|
43
|
+
|
44
|
+
# These show Convolver.convolve as 200x faster than FFTW3
|
45
|
+
# @image = NArray.float(50 * 64 * 64).random
|
46
|
+
# @kernel = NArray.float(50 * 64 * 64).random
|
47
|
+
|
48
|
+
# These show FFTW3 as 2x faster than Convolver.convolve
|
49
|
+
# @image = NArray.float(128 * 128).random
|
50
|
+
# @kernel = NArray.float(64 * 64).random
|
51
|
+
|
52
|
+
# These show FFTW3 and Convolver.convolve roughly equal
|
53
|
+
# @image = NArray.float(80 * 80).random
|
54
|
+
# @kernel = NArray.float(64 * 64).random
|
55
|
+
|
56
|
+
# These show FFTW3 as 2x faster than Convolver.convolve
|
57
|
+
# @image = NArray.float(2 * 80 * 80).random
|
58
|
+
# @kernel = NArray.float(2 * 64 * 64).random
|
59
|
+
|
60
|
+
# These are roughly even - increasing size of image favours FFTW3
|
61
|
+
@image = NArray.float(2000 + 80 * 80).random
|
62
|
+
@kernel = NArray.float(80 * 80).random
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
Benchmark.bm do |x|
|
67
|
+
source = Convolver2DBenchmark.new
|
68
|
+
x.report('convolver') { 100.times { Convolver.convolve( source.image, source.kernel ) } }
|
69
|
+
x.report('fftw3') { 100.times { FFTW3Convolver.convolve( source.image, source.kernel ) } }
|
70
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'convolver'
|
2
|
+
require 'narray'
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
class ConvolverNNLayerBenchmark
|
6
|
+
attr_reader :input, :weights, :thresholds
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@input = NArray.float(1024).random
|
10
|
+
@weights = NArray.float(1024,256).random
|
11
|
+
@thresholds = NArray.float(256).random
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
Benchmark.bm do |x|
|
16
|
+
source = ConvolverNNLayerBenchmark.new
|
17
|
+
x.report('kilo') { 1000.times { Convolver.nn_run_layer( source.input, source.weights, source.thresholds ) } }
|
18
|
+
end
|
data/ext/convolver/convolver.c
CHANGED
@@ -63,9 +63,9 @@ inline void calc_co_increment( int rank, int *outer_shape, int *inner_shape, int
|
|
63
63
|
|
64
64
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
65
65
|
//
|
66
|
-
// Convolve
|
66
|
+
// Convolve
|
67
67
|
//
|
68
|
-
// Benchmark: 640x480 image, 8x8 kernel, 1000 iterations.
|
68
|
+
// Benchmark: 640x480 image, 8x8 kernel, 1000 iterations. 12.3 seconds.
|
69
69
|
//
|
70
70
|
|
71
71
|
void convolve_raw(
|
@@ -128,12 +128,67 @@ void convolve_raw(
|
|
128
128
|
return;
|
129
129
|
}
|
130
130
|
|
131
|
+
////////////////////////////////////////////////////////////////////////////////////////////////////
|
132
|
+
//
|
133
|
+
// Neural net
|
134
|
+
//
|
135
|
+
// Benchmark: 1024 inputs, 256 outputs. 1000 iterations. 0.56 seconds
|
136
|
+
//
|
137
|
+
//
|
138
|
+
|
139
|
+
void nn_run_layer_raw( int in_size, int out_size,
|
140
|
+
float *in_ptr, float *weights, float *thresholds, float *out_ptr ) {
|
141
|
+
int i, j, in_aligned_size, out_aligned_size, offset;
|
142
|
+
__m128 simd_x, simd_y, simd_t;
|
143
|
+
|
144
|
+
in_aligned_size = 4 * ( in_size/4 );
|
145
|
+
out_aligned_size = 4 * ( out_size/4 );
|
146
|
+
|
147
|
+
// Calculate activation
|
148
|
+
for ( i = 0; i < out_size; i++ ) {
|
149
|
+
|
150
|
+
float t = 0.0;
|
151
|
+
simd_t = _mm_setzero_ps();
|
152
|
+
offset = i * in_size;
|
153
|
+
|
154
|
+
// Use SIMD for all the aligned values in groups of 4
|
155
|
+
for ( j = 0; j < in_aligned_size; j +=4 ) {
|
156
|
+
simd_x = _mm_load_ps( in_ptr + j );
|
157
|
+
// Weights might not align to 16 bytes due to size of layers
|
158
|
+
simd_y = _mm_loadu_ps( weights + (offset + j) );
|
159
|
+
simd_x = _mm_mul_ps( simd_x, simd_y );
|
160
|
+
simd_t = _mm_add_ps( simd_x, simd_t );
|
161
|
+
}
|
162
|
+
|
163
|
+
// Complete any remaining 1,2 or 3 items one at a time
|
164
|
+
for ( j = in_aligned_size; j < in_size; j++ ) {
|
165
|
+
t += in_ptr[ j ] * weights[ offset + j ];
|
166
|
+
}
|
167
|
+
|
168
|
+
out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
|
169
|
+
}
|
170
|
+
|
171
|
+
for ( i = 0; i < out_size; i++ ) {
|
172
|
+
out_ptr[i] -= thresholds[i];
|
173
|
+
if ( out_ptr[i] < 0.0 ) { out_ptr[i] = 0.0; }
|
174
|
+
}
|
175
|
+
|
176
|
+
return;
|
177
|
+
}
|
131
178
|
|
132
179
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
133
180
|
|
134
181
|
// To hold the module object
|
135
182
|
VALUE Convolver = Qnil;
|
136
183
|
|
184
|
+
/* @overload convolve( signal, kernel )
|
185
|
+
* Calculates convolution of an array of floats representing a signal, with a second array representing
|
186
|
+
* a kernel. The two parameters must have the same rank. The output has same rank, its size in each dimension d is given by
|
187
|
+
* signal.shape[d] - kernel.shape[d] + 1
|
188
|
+
* @param [NArray] signal must be same size or larger than kernel in each dimension
|
189
|
+
* @param [NArray] kernel must be same size or smaller than signal in each dimension
|
190
|
+
* @return [NArray] result of convolving signal with kernel
|
191
|
+
*/
|
137
192
|
static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
|
138
193
|
struct NARRAY *na_a, *na_b, *na_c;
|
139
194
|
volatile VALUE val_a, val_b, val_c;
|
@@ -146,12 +201,8 @@ static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
|
|
146
201
|
val_b = na_cast_object(b, NA_SFLOAT);
|
147
202
|
GetNArray( val_b, na_b );
|
148
203
|
|
149
|
-
if ( na_a->rank
|
150
|
-
rb_raise( rb_eArgError, "narray
|
151
|
-
}
|
152
|
-
|
153
|
-
if ( na_a->rank < na_b->rank ) {
|
154
|
-
rb_raise( rb_eArgError, "narray a must have equal rank to narray b (temporary restriction)" );
|
204
|
+
if ( na_a->rank != na_b->rank ) {
|
205
|
+
rb_raise( rb_eArgError, "narray a must have equal rank to narray b (a rack %d, b rank %d)", na_a->rank, na_b->rank );
|
155
206
|
}
|
156
207
|
|
157
208
|
if ( na_a->rank > LARGEST_RANK ) {
|
@@ -178,7 +229,62 @@ static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
|
|
178
229
|
return val_c;
|
179
230
|
}
|
180
231
|
|
232
|
+
/* @overload nn_run_layer( inputs, weights, thresholds )
|
233
|
+
* Calculates activations of a fully-connected neural network layer. The transfer function after
|
234
|
+
* summing weights and applying threshold is a "ReLU", equivalent to
|
235
|
+
* y = x < 0.0 ? 0.0 : x
|
236
|
+
* this is less sophisticated than many neural net architectures, but is fast to calculate and to
|
237
|
+
* train.
|
238
|
+
* @param [NArray] inputs must be rank 1 array of floats
|
239
|
+
* @param [NArray] weights must be rank 2 array of floats, with first rank size of inputs, and second rank equal to number of outputs desired
|
240
|
+
* @param [NArray] thresholds must be rank 1 array of floats, size equal to number of outputs desired
|
241
|
+
* @return [NArray] neuron activations
|
242
|
+
*/
|
243
|
+
static VALUE narray_nn_run_single_layer( VALUE self, VALUE inputs, VALUE weights, VALUE thresholds ) {
|
244
|
+
struct NARRAY *na_inputs, *na_weights, *na_thresholds, *na_outputs;
|
245
|
+
volatile VALUE val_inputs, val_weights, val_thresholds, val_outputs;
|
246
|
+
int input_size, output_size;
|
247
|
+
int output_shape[1];
|
248
|
+
|
249
|
+
val_inputs = na_cast_object(inputs, NA_SFLOAT);
|
250
|
+
GetNArray( val_inputs, na_inputs );
|
251
|
+
if ( na_inputs->rank != 1 ) {
|
252
|
+
rb_raise( rb_eArgError, "input must be array of rank 1" );
|
253
|
+
}
|
254
|
+
input_size = na_inputs->total;
|
255
|
+
|
256
|
+
val_weights = na_cast_object(weights, NA_SFLOAT);
|
257
|
+
GetNArray( val_weights, na_weights );
|
258
|
+
if ( na_weights->rank != 2 ) {
|
259
|
+
rb_raise( rb_eArgError, "weights must be array of rank 2" );
|
260
|
+
}
|
261
|
+
if ( na_weights->shape[0] != input_size ) {
|
262
|
+
rb_raise( rb_eArgError, "weights shape mismatch, expected %d across, got %d", input_size, na_weights->shape[0] );
|
263
|
+
}
|
264
|
+
output_size = na_weights->shape[1];
|
265
|
+
|
266
|
+
val_thresholds = na_cast_object(thresholds, NA_SFLOAT);
|
267
|
+
GetNArray( val_thresholds, na_thresholds );
|
268
|
+
if ( na_thresholds->rank != 1 ) {
|
269
|
+
rb_raise( rb_eArgError, "thresholds must be array of rank 1" );
|
270
|
+
}
|
271
|
+
if ( na_thresholds->shape[0] != output_size ) {
|
272
|
+
rb_raise( rb_eArgError, "thresholds expected size %d, but got %d", output_size, na_thresholds->shape[0] );
|
273
|
+
}
|
274
|
+
|
275
|
+
output_shape[0] = output_size;
|
276
|
+
val_outputs = na_make_object( NA_SFLOAT, 1, output_shape, CLASS_OF( val_inputs ) );
|
277
|
+
GetNArray( val_outputs, na_outputs );
|
278
|
+
|
279
|
+
nn_run_layer_raw( input_size, output_size, (float*) na_inputs->ptr, (float*) na_weights->ptr,
|
280
|
+
(float*) na_thresholds->ptr, (float*) na_outputs->ptr );
|
281
|
+
|
282
|
+
return val_outputs;
|
283
|
+
}
|
284
|
+
|
285
|
+
|
181
286
|
void Init_convolver() {
|
182
287
|
Convolver = rb_define_module( "Convolver" );
|
183
288
|
rb_define_singleton_method( Convolver, "convolve", narray_convolve, 2 );
|
289
|
+
rb_define_singleton_method( Convolver, "nn_run_layer", narray_nn_run_single_layer, 3 );
|
184
290
|
}
|
data/lib/convolver.rb
CHANGED
@@ -3,9 +3,5 @@ require "convolver/convolver"
|
|
3
3
|
require "convolver/version"
|
4
4
|
|
5
5
|
module Convolver
|
6
|
-
|
7
|
-
# @param [NArray] a outer array
|
8
|
-
# @param [NArray] b kernel
|
9
|
-
# @return [NArray] result of convolving a with b
|
10
|
-
# @!parse def self.convolve(a,b); end
|
6
|
+
|
11
7
|
end
|
data/lib/convolver/version.rb
CHANGED
data/spec/convolver_spec.rb
CHANGED
@@ -81,4 +81,26 @@ describe Convolver do
|
|
81
81
|
]
|
82
82
|
end
|
83
83
|
end
|
84
|
+
|
85
|
+
describe "#nn_run_layer" do
|
86
|
+
it "should calculate basic layer rules" do
|
87
|
+
inputs = NArray[ 1.0 ]
|
88
|
+
weights = NArray[ [ 1.0 ] ]
|
89
|
+
thresholds = NArray[ 0.0 ]
|
90
|
+
outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
|
91
|
+
outputs.should be_narray_like NArray[ 1.0 ]
|
92
|
+
|
93
|
+
inputs = NArray[ 0.5, -0.5 ]
|
94
|
+
weights = NArray[ [ 1.0, 2.0 ], [ 2.0, 1.0 ] ]
|
95
|
+
thresholds = NArray[ 0.0, 0.0 ]
|
96
|
+
outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
|
97
|
+
outputs.should be_narray_like NArray[ 0.0, 0.5 ]
|
98
|
+
|
99
|
+
inputs = NArray[ 0.3, -0.4, 0.8, -0.7 ]
|
100
|
+
weights = NArray[ [ 1.0, 0.25, 0.5, -0.5 ], [ -1.0, -0.25, -0.5, 0.5 ] ]
|
101
|
+
thresholds = NArray[ 0.0, 0.0 ]
|
102
|
+
outputs = Convolver.nn_run_layer( inputs, weights, thresholds );
|
103
|
+
outputs.should be_narray_like NArray[ 0.95, 0.0 ]
|
104
|
+
end
|
105
|
+
end
|
84
106
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: convolver
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Neil Slater
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-10-
|
11
|
+
date: 2013-10-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|
@@ -109,6 +109,8 @@ files:
|
|
109
109
|
- README.md
|
110
110
|
- Rakefile
|
111
111
|
- benchmarks/convolve_benchmark.rb
|
112
|
+
- benchmarks/convolver_vs_fftw3.rb
|
113
|
+
- benchmarks/nn_layer_benchmark.rb
|
112
114
|
- convolver.gemspec
|
113
115
|
- ext/convolver/convolver.c
|
114
116
|
- ext/convolver/extconf.rb
|