convolver 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -0
- data/README.md +18 -5
- data/benchmarks/convolve_benchmark.rb +2 -2
- data/benchmarks/convolver_vs_fftw3.rb +17 -41
- data/convolver.gemspec +1 -0
- data/ext/convolver/cnn_components.c +52 -0
- data/ext/convolver/cnn_components.h +14 -0
- data/ext/convolver/convolve_raw.c +105 -0
- data/ext/convolver/convolve_raw.h +22 -0
- data/ext/convolver/convolver.c +35 -162
- data/ext/convolver/narray_shared.c +42 -0
- data/ext/convolver/narray_shared.h +22 -0
- data/lib/convolver.rb +41 -0
- data/lib/convolver/version.rb +1 -1
- data/spec/convolve_fftw3_spec.rb +161 -0
- data/spec/helpers.rb +1 -1
- metadata +24 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4d393cf7a6f7cd94485db0aaed706239c92ab0d0
|
4
|
+
data.tar.gz: 0944542524997227558ceb4f5c9b6968d3413e50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1f263ae8f88d5318f84f9479c2540d25648396d09f5bfce65dcfc13122eaff26207280afa0a459d634408b5a526d6f302ebe750aebdd3106df3c91e7ac8af681
|
7
|
+
data.tar.gz: 35473c019dfb69abf0a0048df1b53f0c12e0512d1ebddc4b4dab4a7ececf7d8d0c35078ad92192d350b7c34bc5c4548a9446a2e5b69b0888c1b0c877445aea8b
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -5,9 +5,9 @@
|
|
5
5
|
Adds a convolve operation to NArray floats. It is around 250 times faster than equivalents
|
6
6
|
in pure Ruby.
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
The gem makes convolution via FFTW3 library available. This is faster for convolutions with
|
9
|
+
larger kernels and signals. The relationship is complex, but as a rule of thumb, the kernel
|
10
|
+
needs to be around 1000 entries or larger before it is worth switching to FFTW3-based convolves.
|
11
11
|
|
12
12
|
## Planned features
|
13
13
|
|
@@ -17,6 +17,12 @@ calculating signal convolutions for other types of analysis.
|
|
17
17
|
|
18
18
|
## Installation
|
19
19
|
|
20
|
+
### Dependency: FFTW3
|
21
|
+
|
22
|
+
Before you install *convolver*, you should install FFTW3. See http://www.fftw.org/ for details.
|
23
|
+
|
24
|
+
### Installing the gem
|
25
|
+
|
20
26
|
Add this line to your application's Gemfile:
|
21
27
|
|
22
28
|
gem 'convolver'
|
@@ -41,12 +47,19 @@ Basic convolution:
|
|
41
47
|
* Convolver only works on single-precision floats internally. It will cast NArray types to this, if
|
42
48
|
possible, prior to calculating.
|
43
49
|
|
44
|
-
* The
|
45
|
-
|
50
|
+
* The output is smaller than the input, each dimension is reduced by 1 less than the width of the
|
51
|
+
kernel in the same dimension.
|
46
52
|
|
47
53
|
* Convolver expects input a and kernel b to have the same rank, and for the kernel to be same size
|
48
54
|
or smaller in all dimensions as the input.
|
49
55
|
|
56
|
+
FFTW3 convolution:
|
57
|
+
|
58
|
+
a = NArray[0.3,0.4,0.5]
|
59
|
+
b = NArray[1.3, -0.5]
|
60
|
+
c = Convolver.convolve_fftw3( a, b )
|
61
|
+
=> NArray.float(2): [ 0.19, 0.27 ]
|
62
|
+
|
50
63
|
## Contributing
|
51
64
|
|
52
65
|
1. Fork it
|
@@ -1,70 +1,46 @@
|
|
1
1
|
require 'convolver'
|
2
|
-
require 'narray'
|
3
|
-
require 'fftw3'
|
4
2
|
require 'benchmark'
|
5
3
|
|
6
|
-
# In Ruby for now, which is slower, but at least gets us ballpark figures (99% of the work is in the C)
|
7
|
-
module FFTW3Convolver
|
8
|
-
def self.convolve orig_a, orig_b
|
9
|
-
combined_size = orig_a.size + orig_b.size - 1
|
10
|
-
left_pad_a = ( combined_size - orig_a.size + 1)/2
|
11
|
-
mod_a = NArray.float(combined_size)
|
12
|
-
mod_a[left_pad_a] = orig_a
|
13
|
-
|
14
|
-
mod_b = NArray.float(combined_size)
|
15
|
-
left_select_b = ( orig_b.size + 1 )/2
|
16
|
-
right_select_b = orig_b.size - left_select_b
|
17
|
-
mod_b[0] = orig_b[(0...left_select_b)].reverse
|
18
|
-
mod_b[-right_select_b] = orig_b[-right_select_b..-1].reverse
|
19
|
-
|
20
|
-
afft = FFTW3.fft(mod_a)
|
21
|
-
bfft = FFTW3.fft(mod_b)
|
22
|
-
cfft = afft * bfft
|
23
|
-
|
24
|
-
(FFTW3.ifft( cfft )/combined_size).real[left_pad_a...(left_pad_a+ orig_a.size - orig_b.size + 1)]
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
4
|
class Convolver2DBenchmark
|
29
5
|
attr_reader :image, :kernel
|
30
6
|
|
31
7
|
def initialize
|
32
8
|
# These show Convolver.convolve as 3x faster than FFTW3
|
33
|
-
|
34
|
-
|
9
|
+
@image = NArray.sfloat(256 * 256).random
|
10
|
+
@kernel = NArray.sfloat(16 * 16).random
|
35
11
|
|
36
12
|
# These are roughly even (10% advantage to FFTW3)
|
37
|
-
# @image = NArray.
|
38
|
-
# @kernel = NArray.
|
13
|
+
# @image = NArray.sfloat(256 * 256).random
|
14
|
+
# @kernel = NArray.sfloat(32 * 32).random
|
39
15
|
|
40
16
|
# These show FFTW3 as 4x faster than Convolver.convolve
|
41
|
-
# @image = NArray.
|
42
|
-
# @kernel = NArray.
|
17
|
+
# @image = NArray.sfloat(256 * 256).random
|
18
|
+
# @kernel = NArray.sfloat(64 * 64).random
|
43
19
|
|
44
20
|
# These show Convolver.convolve as 200x faster than FFTW3
|
45
|
-
# @image = NArray.
|
46
|
-
# @kernel = NArray.
|
21
|
+
# @image = NArray.sfloat(50 * 64 * 64).random
|
22
|
+
# @kernel = NArray.sfloat(50 * 64 * 64).random
|
47
23
|
|
48
24
|
# These show FFTW3 as 2x faster than Convolver.convolve
|
49
|
-
# @image = NArray.
|
50
|
-
# @kernel = NArray.
|
25
|
+
# @image = NArray.sfloat(128 * 128).random
|
26
|
+
# @kernel = NArray.sfloat(64 * 64).random
|
51
27
|
|
52
28
|
# These show FFTW3 and Convolver.convolve roughly equal
|
53
|
-
# @image = NArray.
|
54
|
-
# @kernel = NArray.
|
29
|
+
# @image = NArray.sfloat(80 * 80).random
|
30
|
+
# @kernel = NArray.sfloat(64 * 64).random
|
55
31
|
|
56
32
|
# These show FFTW3 as 2x faster than Convolver.convolve
|
57
|
-
# @image = NArray.
|
58
|
-
# @kernel = NArray.
|
33
|
+
# @image = NArray.sfloat(2 * 80 * 80).random
|
34
|
+
# @kernel = NArray.sfloat(2 * 64 * 64).random
|
59
35
|
|
60
36
|
# These are roughly even - increasing size of image favours FFTW3
|
61
|
-
|
62
|
-
|
37
|
+
#@image = NArray.sfloat(2000 + 80 * 80).random
|
38
|
+
#@kernel = NArray.sfloat(80 * 80).random
|
63
39
|
end
|
64
40
|
end
|
65
41
|
|
66
42
|
Benchmark.bm do |x|
|
67
43
|
source = Convolver2DBenchmark.new
|
68
44
|
x.report('convolver') { 100.times { Convolver.convolve( source.image, source.kernel ) } }
|
69
|
-
x.report('fftw3') { 100.times {
|
45
|
+
x.report('fftw3') { 100.times { Convolver.convolve_fftw3( source.image, source.kernel ) } }
|
70
46
|
end
|
data/convolver.gemspec
CHANGED
@@ -0,0 +1,52 @@
|
|
1
|
+
// ext/convolver/cnn_components.c
|
2
|
+
|
3
|
+
#include <xmmintrin.h>
|
4
|
+
#include "cnn_components.h"
|
5
|
+
|
6
|
+
////////////////////////////////////////////////////////////////////////////////////////////////////
|
7
|
+
//
|
8
|
+
// Run a single fully-connected layer, calculating output from input
|
9
|
+
//
|
10
|
+
// Benchmark: 1024 inputs, 256 outputs. 1000 iterations. 0.56 seconds
|
11
|
+
//
|
12
|
+
//
|
13
|
+
|
14
|
+
void nn_run_layer_raw( int in_size, int out_size,
|
15
|
+
float *in_ptr, float *weights, float *thresholds, float *out_ptr ) {
|
16
|
+
int i, j, in_aligned_size, out_aligned_size, offset;
|
17
|
+
__m128 simd_x, simd_y, simd_t;
|
18
|
+
|
19
|
+
in_aligned_size = 4 * ( in_size/4 );
|
20
|
+
out_aligned_size = 4 * ( out_size/4 );
|
21
|
+
|
22
|
+
// Calculate activation
|
23
|
+
for ( i = 0; i < out_size; i++ ) {
|
24
|
+
|
25
|
+
float t = 0.0;
|
26
|
+
simd_t = _mm_setzero_ps();
|
27
|
+
offset = i * in_size;
|
28
|
+
|
29
|
+
// Use SIMD for all the aligned values in groups of 4
|
30
|
+
for ( j = 0; j < in_aligned_size; j +=4 ) {
|
31
|
+
simd_x = _mm_load_ps( in_ptr + j );
|
32
|
+
// Weights might not align to 16 bytes due to size of layers
|
33
|
+
simd_y = _mm_loadu_ps( weights + (offset + j) );
|
34
|
+
simd_x = _mm_mul_ps( simd_x, simd_y );
|
35
|
+
simd_t = _mm_add_ps( simd_x, simd_t );
|
36
|
+
}
|
37
|
+
|
38
|
+
// Complete any remaining 1,2 or 3 items one at a time
|
39
|
+
for ( j = in_aligned_size; j < in_size; j++ ) {
|
40
|
+
t += in_ptr[ j ] * weights[ offset + j ];
|
41
|
+
}
|
42
|
+
|
43
|
+
out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
|
44
|
+
}
|
45
|
+
|
46
|
+
for ( i = 0; i < out_size; i++ ) {
|
47
|
+
out_ptr[i] -= thresholds[i];
|
48
|
+
if ( out_ptr[i] < 0.0 ) { out_ptr[i] = 0.0; }
|
49
|
+
}
|
50
|
+
|
51
|
+
return;
|
52
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
// ext/convolver/cnn_components.h
|
2
|
+
|
3
|
+
////////////////////////////////////////////////////////////////////////////////////////////////
|
4
|
+
//
|
5
|
+
// Declarations of narray helper functions
|
6
|
+
//
|
7
|
+
|
8
|
+
#ifndef CNN_COMPONENTS_H
|
9
|
+
#define CNN_COMPONENTS_H
|
10
|
+
|
11
|
+
void nn_run_layer_raw( int in_size, int out_size,
|
12
|
+
float *in_ptr, float *weights, float *thresholds, float *out_ptr );
|
13
|
+
|
14
|
+
#endif
|
@@ -0,0 +1,105 @@
|
|
1
|
+
// ext/convolver/convolve_raw.c
|
2
|
+
|
3
|
+
#include "convolve_raw.h"
|
4
|
+
|
5
|
+
inline int size_from_shape( int rank, int *shape ) {
|
6
|
+
int size = 1;
|
7
|
+
int i;
|
8
|
+
for ( i = 0; i < rank; i++ ) { size *= shape[i]; }
|
9
|
+
return size;
|
10
|
+
}
|
11
|
+
|
12
|
+
// Sets reverse indices
|
13
|
+
inline void corner_reset( int rank, int *shape, int *rev_indices ) {
|
14
|
+
int i;
|
15
|
+
for ( i = 0; i < rank; i++ ) { rev_indices[i] = shape[i] - 1; }
|
16
|
+
return;
|
17
|
+
}
|
18
|
+
|
19
|
+
// Counts indices down, returns number of ranks that reset
|
20
|
+
inline int corner_dec( int rank, int *shape, int *rev_indices ) {
|
21
|
+
int i = 0;
|
22
|
+
while ( ! rev_indices[i]-- ) {
|
23
|
+
rev_indices[i] = shape[i] - 1;
|
24
|
+
i++;
|
25
|
+
}
|
26
|
+
return i;
|
27
|
+
}
|
28
|
+
|
29
|
+
// Generates co-increment steps by rank boundaries crossed, for the outer position as inner position is incremented by 1
|
30
|
+
inline void calc_co_increment( int rank, int *outer_shape, int *inner_shape, int *co_increment ) {
|
31
|
+
int i, factor;
|
32
|
+
co_increment[0] = 1; // co-increment is always 1 in lowest rank
|
33
|
+
factor = 1;
|
34
|
+
for ( i = 0; i < rank; i++ ) {
|
35
|
+
co_increment[i+1] = co_increment[i] + factor * ( outer_shape[i] - inner_shape[i] );
|
36
|
+
factor *= outer_shape[i];
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
////////////////////////////////////////////////////////////////////////////////////////////////////
|
41
|
+
//
|
42
|
+
// Convolve
|
43
|
+
//
|
44
|
+
// Benchmark: 640x480 image, 8x8 kernel, 1000 iterations. 12.3 seconds.
|
45
|
+
//
|
46
|
+
|
47
|
+
void convolve_raw(
|
48
|
+
int in_rank, int *in_shape, float *in_ptr,
|
49
|
+
int kernel_rank, int *kernel_shape, float *kernel_ptr,
|
50
|
+
int out_rank, int *out_shape, float *out_ptr ) {
|
51
|
+
int i, j, in_size, kernel_size, kernel_aligned, out_size, offset;
|
52
|
+
int out_co_incr[LARGEST_RANK], kernel_co_incr[LARGEST_RANK];
|
53
|
+
int ker_q[LARGEST_RANK], out_q[LARGEST_RANK];
|
54
|
+
int *kernel_co_incr_cache;
|
55
|
+
|
56
|
+
in_size = size_from_shape( in_rank, in_shape );
|
57
|
+
kernel_size = size_from_shape( kernel_rank, kernel_shape );
|
58
|
+
kernel_aligned = 4 * (kernel_size/4);
|
59
|
+
out_size = size_from_shape( out_rank, out_shape );
|
60
|
+
|
61
|
+
calc_co_increment( in_rank, in_shape, out_shape, out_co_incr );
|
62
|
+
calc_co_increment( in_rank, in_shape, kernel_shape, kernel_co_incr );
|
63
|
+
|
64
|
+
kernel_co_incr_cache = ALLOC_N( int, kernel_size );
|
65
|
+
kernel_co_incr_cache[0] = 0;
|
66
|
+
|
67
|
+
corner_reset( kernel_rank, kernel_shape, ker_q );
|
68
|
+
for ( i = 1; i < kernel_size; i++ ) {
|
69
|
+
kernel_co_incr_cache[i] = kernel_co_incr_cache[i-1] + kernel_co_incr[ corner_dec( kernel_rank, kernel_shape, ker_q ) ];
|
70
|
+
}
|
71
|
+
|
72
|
+
// For convenience of flow, we set offset to -1 and adjust countdown 1 higher to compensate
|
73
|
+
offset = -1;
|
74
|
+
corner_reset( out_rank, out_shape, out_q );
|
75
|
+
out_q[0]++;
|
76
|
+
|
77
|
+
// Main convolve loop
|
78
|
+
for ( i = 0; i < out_size; i++ ) {
|
79
|
+
__m128 simd_x, simd_y, simd_t;
|
80
|
+
float t = 0.0;
|
81
|
+
simd_t = _mm_setzero_ps();
|
82
|
+
|
83
|
+
offset += out_co_incr[ corner_dec( out_rank, out_shape, out_q ) ];
|
84
|
+
|
85
|
+
// Use SIMD for all the aligned values in groups of 4
|
86
|
+
for ( j = 0; j < kernel_aligned; j +=4 ) {
|
87
|
+
simd_x = _mm_load_ps( kernel_ptr + j );
|
88
|
+
// Yes the backwards alignment is correct
|
89
|
+
simd_y = _mm_set_ps( in_ptr[ offset + kernel_co_incr_cache[j+3] ], in_ptr[ offset + kernel_co_incr_cache[j+2] ],
|
90
|
+
in_ptr[ offset + kernel_co_incr_cache[j+1] ], in_ptr[ offset + kernel_co_incr_cache[j] ] );
|
91
|
+
simd_x = _mm_mul_ps( simd_x, simd_y );
|
92
|
+
simd_t = _mm_add_ps( simd_x, simd_t );
|
93
|
+
}
|
94
|
+
|
95
|
+
// Complete any remaining 1,2 or 3 items one at a time
|
96
|
+
for ( j = kernel_aligned; j < kernel_size; j++ ) {
|
97
|
+
t += in_ptr[ offset + kernel_co_incr_cache[j] ] * kernel_ptr[ j ];
|
98
|
+
}
|
99
|
+
|
100
|
+
out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
|
101
|
+
}
|
102
|
+
|
103
|
+
xfree( kernel_co_incr_cache );
|
104
|
+
return;
|
105
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
// ext/convolver/convolve_raw.h
|
2
|
+
|
3
|
+
////////////////////////////////////////////////////////////////////////////////////////////////
|
4
|
+
//
|
5
|
+
// Declarations of narray helper functions
|
6
|
+
//
|
7
|
+
|
8
|
+
#ifndef CONVOLVE_RAW_H
|
9
|
+
#define CONVOLVE_RAW_H
|
10
|
+
|
11
|
+
#include <ruby.h>
|
12
|
+
#include <xmmintrin.h>
|
13
|
+
#include "narray_shared.h"
|
14
|
+
|
15
|
+
#define LARGEST_RANK 16
|
16
|
+
|
17
|
+
void convolve_raw(
|
18
|
+
int in_rank, int *in_shape, float *in_ptr,
|
19
|
+
int kernel_rank, int *kernel_shape, float *kernel_ptr,
|
20
|
+
int out_rank, int *out_shape, float *out_ptr );
|
21
|
+
|
22
|
+
#endif
|
data/ext/convolver/convolver.c
CHANGED
@@ -5,181 +5,53 @@
|
|
5
5
|
#include <stdio.h>
|
6
6
|
#include <xmmintrin.h>
|
7
7
|
|
8
|
-
#
|
8
|
+
#include "narray_shared.h"
|
9
|
+
#include "convolve_raw.h"
|
10
|
+
#include "cnn_components.h"
|
9
11
|
|
10
|
-
|
11
|
-
inline int na_quick_idxs_to_pos( int rank, int *shape, int *idxs ) {
|
12
|
-
int i, pos = 0;
|
13
|
-
for ( i = rank - 1; i >= 0; i-- ) {
|
14
|
-
pos = pos * shape[i] + idxs[i];
|
15
|
-
}
|
16
|
-
return pos;
|
17
|
-
}
|
18
|
-
|
19
|
-
// This is inverse of above
|
20
|
-
inline void na_quick_pos_to_idxs( int rank, int *shape, int pos, int *idxs ) {
|
21
|
-
int i;
|
22
|
-
for ( i = 0; i < rank; i++ ) {
|
23
|
-
idxs[ i ] = pos % shape[i];
|
24
|
-
pos /= shape[i];
|
25
|
-
}
|
26
|
-
return;
|
27
|
-
}
|
12
|
+
////////////////////////////////////////////////////////////////////////////////////////////////////
|
28
13
|
|
29
|
-
|
30
|
-
|
31
|
-
int i;
|
32
|
-
for ( i = 0; i < rank; i++ ) { size *= shape[i]; }
|
33
|
-
return size;
|
34
|
-
}
|
14
|
+
// To hold the module object
|
15
|
+
VALUE Convolver = Qnil;
|
35
16
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
}
|
17
|
+
static VALUE narray_fit_backwards( VALUE self, VALUE a, VALUE b ) {
|
18
|
+
struct NARRAY *na_a, *na_b;
|
19
|
+
volatile VALUE val_a, val_b;
|
20
|
+
int target_rank, i;
|
21
|
+
int shift_by[LARGEST_RANK];
|
42
22
|
|
43
|
-
|
44
|
-
|
45
|
-
int i = 0;
|
46
|
-
while ( ! rev_indices[i]-- ) {
|
47
|
-
rev_indices[i] = shape[i] - 1;
|
48
|
-
i++;
|
49
|
-
}
|
50
|
-
return i;
|
51
|
-
}
|
23
|
+
val_a = na_cast_object(a, NA_SFLOAT);
|
24
|
+
GetNArray( val_a, na_a );
|
52
25
|
|
53
|
-
|
54
|
-
|
55
|
-
int i, factor;
|
56
|
-
co_increment[0] = 1; // co-increment is always 1 in lowest rank
|
57
|
-
factor = 1;
|
58
|
-
for ( i = 0; i < rank; i++ ) {
|
59
|
-
co_increment[i+1] = co_increment[i] + factor * ( outer_shape[i] - inner_shape[i] );
|
60
|
-
factor *= outer_shape[i];
|
61
|
-
}
|
62
|
-
}
|
26
|
+
val_b = na_cast_object(b, NA_SFLOAT);
|
27
|
+
GetNArray( val_b, na_b );
|
63
28
|
|
64
|
-
|
65
|
-
|
66
|
-
// Convolve
|
67
|
-
//
|
68
|
-
// Benchmark: 640x480 image, 8x8 kernel, 1000 iterations. 12.3 seconds.
|
69
|
-
//
|
70
|
-
|
71
|
-
void convolve_raw(
|
72
|
-
int in_rank, int *in_shape, float *in_ptr,
|
73
|
-
int kernel_rank, int *kernel_shape, float *kernel_ptr,
|
74
|
-
int out_rank, int *out_shape, float *out_ptr ) {
|
75
|
-
int i, j, in_size, kernel_size, kernel_aligned, out_size, offset;
|
76
|
-
int out_co_incr[LARGEST_RANK], kernel_co_incr[LARGEST_RANK];
|
77
|
-
int ker_q[LARGEST_RANK], out_q[LARGEST_RANK];
|
78
|
-
int *kernel_co_incr_cache;
|
79
|
-
|
80
|
-
in_size = size_from_shape( in_rank, in_shape );
|
81
|
-
kernel_size = size_from_shape( kernel_rank, kernel_shape );
|
82
|
-
kernel_aligned = 4 * (kernel_size/4);
|
83
|
-
out_size = size_from_shape( out_rank, out_shape );
|
84
|
-
|
85
|
-
calc_co_increment( in_rank, in_shape, out_shape, out_co_incr );
|
86
|
-
calc_co_increment( in_rank, in_shape, kernel_shape, kernel_co_incr );
|
87
|
-
|
88
|
-
kernel_co_incr_cache = ALLOC_N( int, kernel_size );
|
89
|
-
kernel_co_incr_cache[0] = 0;
|
90
|
-
|
91
|
-
corner_reset( kernel_rank, kernel_shape, ker_q );
|
92
|
-
for ( i = 1; i < kernel_size; i++ ) {
|
93
|
-
kernel_co_incr_cache[i] = kernel_co_incr_cache[i-1] + kernel_co_incr[ corner_dec( kernel_rank, kernel_shape, ker_q ) ];
|
29
|
+
if ( na_a->rank != na_b->rank ) {
|
30
|
+
rb_raise( rb_eArgError, "narray a must have equal rank to narray b (a rank %d, b rank %d)", na_a->rank, na_b->rank );
|
94
31
|
}
|
95
32
|
|
96
|
-
|
97
|
-
|
98
|
-
corner_reset( out_rank, out_shape, out_q );
|
99
|
-
out_q[0]++;
|
100
|
-
|
101
|
-
// Main convolve loop
|
102
|
-
for ( i = 0; i < out_size; i++ ) {
|
103
|
-
__m128 simd_x, simd_y, simd_t;
|
104
|
-
float t = 0.0;
|
105
|
-
simd_t = _mm_setzero_ps();
|
106
|
-
|
107
|
-
offset += out_co_incr[ corner_dec( out_rank, out_shape, out_q ) ];
|
108
|
-
|
109
|
-
// Use SIMD for all the aligned values in groups of 4
|
110
|
-
for ( j = 0; j < kernel_aligned; j +=4 ) {
|
111
|
-
simd_x = _mm_load_ps( kernel_ptr + j );
|
112
|
-
// Yes the backwards alignment is correct
|
113
|
-
simd_y = _mm_set_ps( in_ptr[ offset + kernel_co_incr_cache[j+3] ], in_ptr[ offset + kernel_co_incr_cache[j+2] ],
|
114
|
-
in_ptr[ offset + kernel_co_incr_cache[j+1] ], in_ptr[ offset + kernel_co_incr_cache[j] ] );
|
115
|
-
simd_x = _mm_mul_ps( simd_x, simd_y );
|
116
|
-
simd_t = _mm_add_ps( simd_x, simd_t );
|
117
|
-
}
|
118
|
-
|
119
|
-
// Complete any remaining 1,2 or 3 items one at a time
|
120
|
-
for ( j = kernel_aligned; j < kernel_size; j++ ) {
|
121
|
-
t += in_ptr[ offset + kernel_co_incr_cache[j] ] * kernel_ptr[ j ];
|
122
|
-
}
|
123
|
-
|
124
|
-
out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
|
33
|
+
if ( na_a->rank > LARGEST_RANK ) {
|
34
|
+
rb_raise( rb_eArgError, "exceeded maximum narray rank for convolve of %d", LARGEST_RANK );
|
125
35
|
}
|
126
36
|
|
127
|
-
|
128
|
-
return;
|
129
|
-
}
|
130
|
-
|
131
|
-
////////////////////////////////////////////////////////////////////////////////////////////////////
|
132
|
-
//
|
133
|
-
// Neural net
|
134
|
-
//
|
135
|
-
// Benchmark: 1024 inputs, 256 outputs. 1000 iterations. 0.56 seconds
|
136
|
-
//
|
137
|
-
//
|
138
|
-
|
139
|
-
void nn_run_layer_raw( int in_size, int out_size,
|
140
|
-
float *in_ptr, float *weights, float *thresholds, float *out_ptr ) {
|
141
|
-
int i, j, in_aligned_size, out_aligned_size, offset;
|
142
|
-
__m128 simd_x, simd_y, simd_t;
|
143
|
-
|
144
|
-
in_aligned_size = 4 * ( in_size/4 );
|
145
|
-
out_aligned_size = 4 * ( out_size/4 );
|
146
|
-
|
147
|
-
// Calculate activation
|
148
|
-
for ( i = 0; i < out_size; i++ ) {
|
149
|
-
|
150
|
-
float t = 0.0;
|
151
|
-
simd_t = _mm_setzero_ps();
|
152
|
-
offset = i * in_size;
|
153
|
-
|
154
|
-
// Use SIMD for all the aligned values in groups of 4
|
155
|
-
for ( j = 0; j < in_aligned_size; j +=4 ) {
|
156
|
-
simd_x = _mm_load_ps( in_ptr + j );
|
157
|
-
// Weights might not align to 16 bytes due to size of layers
|
158
|
-
simd_y = _mm_loadu_ps( weights + (offset + j) );
|
159
|
-
simd_x = _mm_mul_ps( simd_x, simd_y );
|
160
|
-
simd_t = _mm_add_ps( simd_x, simd_t );
|
161
|
-
}
|
37
|
+
target_rank = na_a->rank;
|
162
38
|
|
163
|
-
|
164
|
-
|
165
|
-
|
39
|
+
for ( i = 0; i < target_rank; i++ ) {
|
40
|
+
if ( ( na_a->shape[i] - na_b->shape[i] ) < 0 ) {
|
41
|
+
rb_raise( rb_eArgError, "no space for backward fit" );
|
166
42
|
}
|
167
|
-
|
168
|
-
out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
|
43
|
+
shift_by[i] = na_b->shape[i] >> 1;
|
169
44
|
}
|
170
45
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
46
|
+
fit_backwards_raw(
|
47
|
+
target_rank,
|
48
|
+
na_a->shape, (float*) na_a->ptr,
|
49
|
+
na_b->shape, (float*) na_b->ptr,
|
50
|
+
shift_by );
|
175
51
|
|
176
|
-
return;
|
52
|
+
return Qnil;
|
177
53
|
}
|
178
54
|
|
179
|
-
////////////////////////////////////////////////////////////////////////////////////////////////////
|
180
|
-
|
181
|
-
// To hold the module object
|
182
|
-
VALUE Convolver = Qnil;
|
183
55
|
|
184
56
|
/* @overload convolve( signal, kernel )
|
185
57
|
* Calculates convolution of an array of floats representing a signal, with a second array representing
|
@@ -233,10 +105,10 @@ static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
|
|
233
105
|
* Calculates activations of a fully-connected neural network layer. The transfer function after
|
234
106
|
* summing weights and applying threshold is a "ReLU", equivalent to
|
235
107
|
* y = x < 0.0 ? 0.0 : x
|
236
|
-
* this is less sophisticated than many neural net
|
237
|
-
* train.
|
108
|
+
* this is less sophisticated than many other neural net functions (such as sigma), but is fast to
|
109
|
+
* calculate and to train.
|
238
110
|
* @param [NArray] inputs must be rank 1 array of floats
|
239
|
-
* @param [NArray] weights must be rank 2 array of floats, with first
|
111
|
+
* @param [NArray] weights must be rank 2 array of floats, with first dimension size of inputs, and second dimension size equal to number of outputs
|
240
112
|
* @param [NArray] thresholds must be rank 1 array of floats, size equal to number of outputs desired
|
241
113
|
* @return [NArray] neuron activations
|
242
114
|
*/
|
@@ -266,7 +138,7 @@ static VALUE narray_nn_run_single_layer( VALUE self, VALUE inputs, VALUE weights
|
|
266
138
|
val_thresholds = na_cast_object(thresholds, NA_SFLOAT);
|
267
139
|
GetNArray( val_thresholds, na_thresholds );
|
268
140
|
if ( na_thresholds->rank != 1 ) {
|
269
|
-
rb_raise( rb_eArgError, "thresholds must be
|
141
|
+
rb_raise( rb_eArgError, "thresholds must be narray of rank 1" );
|
270
142
|
}
|
271
143
|
if ( na_thresholds->shape[0] != output_size ) {
|
272
144
|
rb_raise( rb_eArgError, "thresholds expected size %d, but got %d", output_size, na_thresholds->shape[0] );
|
@@ -287,4 +159,5 @@ void Init_convolver() {
|
|
287
159
|
Convolver = rb_define_module( "Convolver" );
|
288
160
|
rb_define_singleton_method( Convolver, "convolve", narray_convolve, 2 );
|
289
161
|
rb_define_singleton_method( Convolver, "nn_run_layer", narray_nn_run_single_layer, 3 );
|
162
|
+
rb_define_singleton_method( Convolver, "fit_kernel_backwards", narray_fit_backwards, 2 );
|
290
163
|
}
|
@@ -0,0 +1,42 @@
|
|
1
|
+
// ext/convolver/narray_shared.c
|
2
|
+
|
3
|
+
#include "narray_shared.h"
|
4
|
+
|
5
|
+
// This is copied from na_array.c, with safety checks and temp vars removed
|
6
|
+
int na_quick_idxs_to_pos( int rank, int *shape, int *idxs ) {
|
7
|
+
int i, pos = 0;
|
8
|
+
for ( i = rank - 1; i >= 0; i-- ) {
|
9
|
+
pos = pos * shape[i] + idxs[i];
|
10
|
+
}
|
11
|
+
return pos;
|
12
|
+
}
|
13
|
+
|
14
|
+
// This is inverse of above
|
15
|
+
void na_quick_pos_to_idxs( int rank, int *shape, int pos, int *idxs ) {
|
16
|
+
int i;
|
17
|
+
for ( i = 0; i < rank; i++ ) {
|
18
|
+
idxs[ i ] = pos % shape[i];
|
19
|
+
pos /= shape[i];
|
20
|
+
}
|
21
|
+
return;
|
22
|
+
}
|
23
|
+
|
24
|
+
// used to place kernel data into array for FFTW3 processing
|
25
|
+
void fit_backwards_raw( int rank, int *dst_shape, float *dst, int *src_shape, float *src, int *shift_shape ) {
|
26
|
+
int i, j, size, x;
|
27
|
+
int k_idx[16], dst_idx[16];
|
28
|
+
|
29
|
+
size = 1;
|
30
|
+
for ( j = 0; j < rank; j++ ) { size *= src_shape[j]; }
|
31
|
+
|
32
|
+
for ( i = 0; i < size; i++ ) {
|
33
|
+
na_quick_pos_to_idxs( rank, src_shape, i, k_idx );
|
34
|
+
for ( j = 0; j < rank; j++ ) {
|
35
|
+
x = src_shape[j] - shift_shape[j] - k_idx[j] - 1;
|
36
|
+
if ( x < 0 ) x = x + dst_shape[j];
|
37
|
+
dst_idx[j] = x;
|
38
|
+
}
|
39
|
+
dst[ na_quick_idxs_to_pos( rank, dst_shape, dst_idx ) ] = src[i];
|
40
|
+
}
|
41
|
+
return;
|
42
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
// ext/convolver/narray_shared.h
|
2
|
+
|
3
|
+
////////////////////////////////////////////////////////////////////////////////////////////////
|
4
|
+
//
|
5
|
+
// Declarations of narray helper functions
|
6
|
+
//
|
7
|
+
|
8
|
+
#ifndef CONVOLVER_NARRAY_SHARED_H
|
9
|
+
#define CONVOLVER_NARRAY_SHARED_H
|
10
|
+
|
11
|
+
#include <ruby.h>
|
12
|
+
#include "narray.h"
|
13
|
+
|
14
|
+
// This is copied from na_array.c, with safety checks and temp vars removed
|
15
|
+
int na_quick_idxs_to_pos( int rank, int *shape, int *idxs );
|
16
|
+
|
17
|
+
// This is inverse of above
|
18
|
+
void na_quick_pos_to_idxs( int rank, int *shape, int pos, int *idxs );
|
19
|
+
|
20
|
+
void fit_backwards_raw( int rank, int *dst_shape, float *dst, int *src_shape, float *src, int *shift_shape );
|
21
|
+
|
22
|
+
#endif
|
data/lib/convolver.rb
CHANGED
@@ -1,7 +1,48 @@
|
|
1
1
|
require 'narray'
|
2
2
|
require "convolver/convolver"
|
3
3
|
require "convolver/version"
|
4
|
+
require 'fftw3'
|
4
5
|
|
5
6
|
module Convolver
|
7
|
+
# Uses FFTW3 library to calculate convolution of an array of floats representing a signal,
|
8
|
+
# with a second array representing a kernel. The two parameters must have the same rank.
|
9
|
+
# The output has same rank, its size in each dimension d is given by
|
10
|
+
# signal.shape[d] - kernel.shape[d] + 1
|
11
|
+
# @param [NArray] signal must be same size or larger than kernel in each dimension
|
12
|
+
# @param [NArray] kernel must be same size or smaller than signal in each dimension
|
13
|
+
# @return [NArray] result of convolving signal with kernel
|
14
|
+
def self.convolve_fftw3 signal, kernel
|
15
|
+
combined_shape, shift_by, ranges = fft_offsets( signal.shape, kernel.shape )
|
6
16
|
|
17
|
+
mod_a = NArray.sfloat(*combined_shape)
|
18
|
+
mod_a[*shift_by] = signal
|
19
|
+
|
20
|
+
mod_b = NArray.sfloat(*combined_shape)
|
21
|
+
|
22
|
+
Convolver.fit_kernel_backwards( mod_b, kernel )
|
23
|
+
|
24
|
+
afreqs = FFTW3.fft(mod_a)
|
25
|
+
bfreqs = FFTW3.fft(mod_b)
|
26
|
+
cfreqs = afreqs * bfreqs
|
27
|
+
|
28
|
+
(FFTW3.ifft( cfreqs ).real * (1.0/mod_a.size))[*ranges]
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def self.fft_offsets signal_shape, kernel_shape
|
34
|
+
combined_shape = []
|
35
|
+
shift_by = []
|
36
|
+
ranges = []
|
37
|
+
signal_shape.each_with_index do |signal_size, i|
|
38
|
+
kernel_size = kernel_shape[i]
|
39
|
+
|
40
|
+
combined_shape[i] = signal_size + kernel_size - 1
|
41
|
+
output_size = signal_size - kernel_size + 1
|
42
|
+
output_offset = kernel_size - 1
|
43
|
+
shift_by[i] = kernel_size / 2
|
44
|
+
ranges[i] = (output_offset...(output_offset + output_size))
|
45
|
+
end
|
46
|
+
[ combined_shape, shift_by, ranges ]
|
47
|
+
end
|
7
48
|
end
|
data/lib/convolver/version.rb
CHANGED
@@ -0,0 +1,161 @@
|
|
1
|
+
require 'helpers'
|
2
|
+
|
3
|
+
describe Convolver do
|
4
|
+
describe "#convolve_fftw3" do
|
5
|
+
|
6
|
+
it "should work like the example in the README" do
|
7
|
+
a = NArray[ 0.3, 0.4, 0.5 ]
|
8
|
+
b = NArray[ 1.3, -0.5 ]
|
9
|
+
c = Convolver.convolve_fftw3( a, b )
|
10
|
+
c.should be_narray_like NArray[ 0.19, 0.27 ]
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should convolve 1D arrays with a variety of signal and kernel lengths" do
|
14
|
+
a = NArray[ 0.3 ]
|
15
|
+
b = NArray[ -0.7 ]
|
16
|
+
c = Convolver.convolve_fftw3( a, b )
|
17
|
+
c.should be_narray_like NArray[ -0.21 ]
|
18
|
+
|
19
|
+
a = NArray[ 0.3, 0.4, 0.5, 0.2 ]
|
20
|
+
b = NArray[ -0.7 ]
|
21
|
+
c = Convolver.convolve_fftw3( a, b )
|
22
|
+
c.should be_narray_like NArray[ -0.21, -0.28, -0.35, -0.14 ]
|
23
|
+
|
24
|
+
a = NArray[ 0.3, 0.4, 0.5, 0.2 ]
|
25
|
+
b = NArray[ 1.1, -0.7 ]
|
26
|
+
c = Convolver.convolve_fftw3( a, b )
|
27
|
+
c.should be_narray_like NArray[ 0.05, 0.09, 0.41 ]
|
28
|
+
|
29
|
+
a = NArray[ 0.3, 0.4, 0.5, 0.2 ]
|
30
|
+
b = NArray[ 1.1, -0.7, -0.2 ]
|
31
|
+
c = Convolver.convolve_fftw3( a, b )
|
32
|
+
c.should be_narray_like NArray[ -0.05, 0.05 ]
|
33
|
+
|
34
|
+
a = NArray[ 0.3, 0.4, 0.5, 0.2, 0.6 ]
|
35
|
+
b = NArray[ 1.1, -0.7 ]
|
36
|
+
c = Convolver.convolve_fftw3( a, b )
|
37
|
+
c.should be_narray_like NArray[ 0.05, 0.09, 0.41, -0.2 ]
|
38
|
+
|
39
|
+
a = NArray[ 0.3, 0.4, 0.5, 0.2, 0.6 ]
|
40
|
+
b = NArray[ 1.1, -0.7, 2.1 ]
|
41
|
+
c = Convolver.convolve_fftw3( a, b )
|
42
|
+
c.should be_narray_like NArray[ 1.1, 0.51, 1.67 ]
|
43
|
+
|
44
|
+
a = NArray[ 0.3, 0.4, 0.5, 0.2, 0.6 ]
|
45
|
+
b = NArray[ 0.6, -0.5, -0.4, 0.7 ]
|
46
|
+
c = Convolver.convolve_fftw3( a, b )
|
47
|
+
c.should be_narray_like NArray[ -0.08, 0.33 ]
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should calculate a 2D convolution" do
|
51
|
+
a = NArray[ [ 0.3, 0.4, 0.5 ], [ 0.6, 0.8, 0.2 ], [ 0.9, 1.0, 0.1 ] ]
|
52
|
+
b = NArray[ [ 1.2, -0.5 ], [ 0.5, -1.3 ] ]
|
53
|
+
c = Convolver.convolve_fftw3( a, b )
|
54
|
+
c.should be_narray_like NArray[ [ -0.58, 0.37 ], [ -0.53, 1.23 ] ]
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should calculate a 3D convolution" do
|
58
|
+
# 5x4x3
|
59
|
+
a = NArray[
|
60
|
+
[ [ 1.0, 0.6, 1.1, 0.2, 0.9 ], [ 1.0, 0.7, 0.8, 1.0, 1.0 ], [ 0.2, 0.6, 0.1, 0.2, 0.5 ], [ 0.5, 0.9, 0.2, 0.1, 0.6 ] ],
|
61
|
+
[ [ 0.4, 0.9, 0.4, 0.0, 0.6 ], [ 0.2, 1.1, 0.2, 0.4, 0.1 ], [ 0.4, 0.2, 0.5, 0.8, 0.7 ], [ 0.1, 0.9, 0.7, 0.1, 0.3 ] ],
|
62
|
+
[ [ 0.8, 0.6, 1.0, 0.1, 0.4 ], [ 0.3, 0.8, 0.6, 0.7, 1.1 ], [ 0.9, 1.0, 0.3, 0.4, 0.6 ], [ 0.2, 0.5, 0.4, 0.7, 0.2 ] ]
|
63
|
+
]
|
64
|
+
|
65
|
+
# 3x3x3
|
66
|
+
b = NArray[
|
67
|
+
[ [ -0.9, 1.2, 0.8 ], [ 0.9, 0.1, -0.5 ], [ 1.1, 0.1, -1.1 ] ],
|
68
|
+
[ [ -0.2, -1.0, 1.4 ], [ -1.4, 0.0, 1.3 ], [ 0.3, 1.0, -0.5 ] ],
|
69
|
+
[ [ 0.6, 0.0, 0.7 ], [ -0.7, 1.1, 1.2 ], [ 1.3, 0.7, 0.0 ] ]
|
70
|
+
]
|
71
|
+
|
72
|
+
# Should be 3x2x1
|
73
|
+
c = Convolver.convolve_fftw3( a, b )
|
74
|
+
c.should be_narray_like NArray[ [ [ 5.51, 3.04, 4.3 ], [ 3.04, 6.31, 3.87 ] ] ]
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should calculate a 4D convolution" do
|
78
|
+
# 3x4x5x3
|
79
|
+
a = NArray[
|
80
|
+
[ [ [ 0.5, 0.4, 0.9 ], [ 0.1, 0.9, 0.8 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ],
|
81
|
+
[ [ 0.0, 0.4, 0.0 ], [ 0.2, 0.3, 0.8 ], [ 0.6, 0.3, 0.2 ], [ 0.7, 0.4, 0.3 ] ],
|
82
|
+
[ [ 0.3, 0.3, 0.1 ], [ 0.6, 0.9, 0.4 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ],
|
83
|
+
[ [ 0.0, 0.4, 0.0 ], [ 0.2, 0.3, 0.8 ], [ 0.6, 0.3, 0.2 ], [ 0.7, 0.4, 0.3 ] ],
|
84
|
+
[ [ 0.3, 0.3, 0.1 ], [ 0.6, 0.9, 0.4 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ] ],
|
85
|
+
[ [ [ 0.5, 0.4, 0.9 ], [ 0.1, 0.9, 0.8 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ],
|
86
|
+
[ [ 0.0, 0.4, 0.0 ], [ 0.2, 0.3, 0.8 ], [ 0.6, 0.3, 0.2 ], [ 0.7, 0.4, 0.3 ] ],
|
87
|
+
[ [ 0.3, 0.3, 0.1 ], [ 0.6, 0.9, 0.4 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ],
|
88
|
+
[ [ 0.0, 0.4, 0.0 ], [ 0.2, 0.3, 0.8 ], [ 0.6, 0.3, 0.2 ], [ 0.7, 0.4, 0.3 ] ],
|
89
|
+
[ [ 0.3, 0.3, 0.1 ], [ 0.6, 0.9, 0.4 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ] ],
|
90
|
+
[ [ [ 0.5, 0.4, 0.9 ], [ 0.1, 0.9, 0.8 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ],
|
91
|
+
[ [ 0.0, 0.4, 0.0 ], [ 0.2, 0.3, 0.8 ], [ 0.6, 0.3, 0.2 ], [ 0.7, 0.4, 0.3 ] ],
|
92
|
+
[ [ 0.3, 0.3, 0.1 ], [ 0.6, 0.9, 0.4 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ],
|
93
|
+
[ [ 0.0, 0.4, 0.0 ], [ 0.2, 0.3, 0.8 ], [ 0.6, 0.3, 0.2 ], [ 0.7, 0.4, 0.3 ] ],
|
94
|
+
[ [ 0.3, 0.3, 0.1 ], [ 0.6, 0.9, 0.4 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ] ] ]
|
95
|
+
|
96
|
+
# 2x3x3x2
|
97
|
+
b = NArray[ [
|
98
|
+
[ [ 1.1, 0.6 ], [ 1.2, 0.6 ], [ 0.8, 0.1 ] ], [ [ -0.4, 0.8 ], [ 0.5, 0.4 ], [ 1.2, 0.2 ] ],
|
99
|
+
[ [ 0.8, 0.2 ], [ 0.5, 0.0 ], [ 1.4, 1.3 ] ] ],
|
100
|
+
[ [ [ 1.1, 0.6 ], [ 1.2, 0.6 ], [ 0.8, 0.1 ] ], [ [ -0.4, 0.8 ], [ 0.5, 0.4 ], [ 1.2, 0.2 ] ],
|
101
|
+
[ [ 0.8, 0.2 ], [ 0.5, 0.0 ], [ 1.4, 1.3 ] ] ] ]
|
102
|
+
|
103
|
+
# Should be 2x2x3x2
|
104
|
+
c = Convolver.convolve_fftw3( a, b )
|
105
|
+
c.should be_narray_like NArray[
|
106
|
+
[ [ [ 8.5, 8.2 ], [ 11.34, 9.68 ] ], [ [ 7.68, 6.56 ], [ 11.24, 7.16 ] ], [ [ 9.14, 6.54 ], [ 12.44, 9.2 ] ] ],
|
107
|
+
[ [ [ 8.5, 8.2 ], [ 11.34, 9.68 ] ], [ [ 7.68, 6.56 ], [ 11.24, 7.16 ] ], [ [ 9.14, 6.54 ], [ 12.44, 9.2 ] ] ]
|
108
|
+
]
|
109
|
+
end
|
110
|
+
|
111
|
+
describe "compared with #convolve" do
|
112
|
+
it "should produce same results for 1D arrays " do
|
113
|
+
(1..30).each do |signal_length|
|
114
|
+
(1..signal_length).each do |kernel_length|
|
115
|
+
signal = NArray.sfloat(signal_length).random()
|
116
|
+
kernel = NArray.sfloat(kernel_length).random()
|
117
|
+
expect_result = Convolver.convolve( signal, kernel )
|
118
|
+
got_result = Convolver.convolve_fftw3( signal, kernel )
|
119
|
+
got_result.should be_narray_like expect_result
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
it "should produce same results for 2D arrays " do
|
125
|
+
(3..10).each do |signal_x|
|
126
|
+
(signal_x-2..signal_x+2).each do |signal_y|
|
127
|
+
(1..signal_x).each do |kernel_x|
|
128
|
+
(1..signal_y).each do |kernel_y|
|
129
|
+
signal = NArray.sfloat(signal_x,signal_y).random()
|
130
|
+
kernel = NArray.sfloat(kernel_x,kernel_y).random()
|
131
|
+
expect_result = Convolver.convolve( signal, kernel )
|
132
|
+
got_result = Convolver.convolve_fftw3( signal, kernel )
|
133
|
+
got_result.should be_narray_like expect_result
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
it "should produce same results for 3D arrays " do
|
141
|
+
(3..5).each do |signal_x|
|
142
|
+
(signal_x-2..signal_x+2).each do |signal_y|
|
143
|
+
(signal_x-2..signal_x+2).each do |signal_z|
|
144
|
+
(1..signal_x).each do |kernel_x|
|
145
|
+
(1..signal_y).each do |kernel_y|
|
146
|
+
(1..signal_z).each do |kernel_z|
|
147
|
+
signal = NArray.sfloat(signal_x,signal_y,signal_z).random()
|
148
|
+
kernel = NArray.sfloat(kernel_x,kernel_y,kernel_z).random()
|
149
|
+
expect_result = Convolver.convolve( signal, kernel )
|
150
|
+
got_result = Convolver.convolve_fftw3( signal, kernel )
|
151
|
+
got_result.should be_narray_like expect_result
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
data/spec/helpers.rb
CHANGED
@@ -12,7 +12,7 @@ RSpec::Matchers.define :be_narray_like do |expected_narray|
|
|
12
12
|
else
|
13
13
|
d = given - expected_narray
|
14
14
|
difference = ( d * d ).sum / d.size
|
15
|
-
if difference > 1e-
|
15
|
+
if difference > 1e-9
|
16
16
|
@error = "Numerical difference with mean square error #{difference}"
|
17
17
|
end
|
18
18
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: convolver
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Neil Slater
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-10-
|
11
|
+
date: 2013-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - '>='
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.6.0.8
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: fftw3
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.3'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.3'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: yard
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -112,10 +126,17 @@ files:
|
|
112
126
|
- benchmarks/convolver_vs_fftw3.rb
|
113
127
|
- benchmarks/nn_layer_benchmark.rb
|
114
128
|
- convolver.gemspec
|
129
|
+
- ext/convolver/cnn_components.c
|
130
|
+
- ext/convolver/cnn_components.h
|
131
|
+
- ext/convolver/convolve_raw.c
|
132
|
+
- ext/convolver/convolve_raw.h
|
115
133
|
- ext/convolver/convolver.c
|
116
134
|
- ext/convolver/extconf.rb
|
135
|
+
- ext/convolver/narray_shared.c
|
136
|
+
- ext/convolver/narray_shared.h
|
117
137
|
- lib/convolver.rb
|
118
138
|
- lib/convolver/version.rb
|
139
|
+
- spec/convolve_fftw3_spec.rb
|
119
140
|
- spec/convolver_spec.rb
|
120
141
|
- spec/helpers.rb
|
121
142
|
homepage: http://github.com/neilslater/convolver
|
@@ -143,6 +164,7 @@ signing_key:
|
|
143
164
|
specification_version: 4
|
144
165
|
summary: Convolution for NArray
|
145
166
|
test_files:
|
167
|
+
- spec/convolve_fftw3_spec.rb
|
146
168
|
- spec/convolver_spec.rb
|
147
169
|
- spec/helpers.rb
|
148
170
|
has_rdoc:
|