convolver 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -0
- data/README.md +18 -5
- data/benchmarks/convolve_benchmark.rb +2 -2
- data/benchmarks/convolver_vs_fftw3.rb +17 -41
- data/convolver.gemspec +1 -0
- data/ext/convolver/cnn_components.c +52 -0
- data/ext/convolver/cnn_components.h +14 -0
- data/ext/convolver/convolve_raw.c +105 -0
- data/ext/convolver/convolve_raw.h +22 -0
- data/ext/convolver/convolver.c +35 -162
- data/ext/convolver/narray_shared.c +42 -0
- data/ext/convolver/narray_shared.h +22 -0
- data/lib/convolver.rb +41 -0
- data/lib/convolver/version.rb +1 -1
- data/spec/convolve_fftw3_spec.rb +161 -0
- data/spec/helpers.rb +1 -1
- metadata +24 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4d393cf7a6f7cd94485db0aaed706239c92ab0d0
|
4
|
+
data.tar.gz: 0944542524997227558ceb4f5c9b6968d3413e50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1f263ae8f88d5318f84f9479c2540d25648396d09f5bfce65dcfc13122eaff26207280afa0a459d634408b5a526d6f302ebe750aebdd3106df3c91e7ac8af681
|
7
|
+
data.tar.gz: 35473c019dfb69abf0a0048df1b53f0c12e0512d1ebddc4b4dab4a7ececf7d8d0c35078ad92192d350b7c34bc5c4548a9446a2e5b69b0888c1b0c877445aea8b
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -5,9 +5,9 @@
|
|
5
5
|
Adds a convolve operation to NArray floats. It is around 250 times faster than equivalents
|
6
6
|
in pure Ruby.
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
The gem makes convolution via FFTW3 library available. This is faster for convolutions with
|
9
|
+
larger kernels and signals. The relationship is complex, but as a rule of thumb, the kernel
|
10
|
+
needs to be around 1000 entries or larger before it is worth switching to FFTW3-based convolves.
|
11
11
|
|
12
12
|
## Planned features
|
13
13
|
|
@@ -17,6 +17,12 @@ calculating signal convolutions for other types of analysis.
|
|
17
17
|
|
18
18
|
## Installation
|
19
19
|
|
20
|
+
### Dependency: FFTW3
|
21
|
+
|
22
|
+
Before you install *convolver*, you should install FFTW3. See http://www.fftw.org/ for details.
|
23
|
+
|
24
|
+
### Installing the gem
|
25
|
+
|
20
26
|
Add this line to your application's Gemfile:
|
21
27
|
|
22
28
|
gem 'convolver'
|
@@ -41,12 +47,19 @@ Basic convolution:
|
|
41
47
|
* Convolver only works on single-precision floats internally. It will cast NArray types to this, if
|
42
48
|
possible, prior to calculating.
|
43
49
|
|
44
|
-
* The
|
45
|
-
|
50
|
+
* The output is smaller than the input, each dimension is reduced by 1 less than the width of the
|
51
|
+
kernel in the same dimension.
|
46
52
|
|
47
53
|
* Convolver expects input a and kernel b to have the same rank, and for the kernel to be same size
|
48
54
|
or smaller in all dimensions as the input.
|
49
55
|
|
56
|
+
FFTW3 convolution:
|
57
|
+
|
58
|
+
a = NArray[0.3,0.4,0.5]
|
59
|
+
b = NArray[1.3, -0.5]
|
60
|
+
c = Convolver.convolve_fftw3( a, b )
|
61
|
+
=> NArray.float(2): [ 0.19, 0.27 ]
|
62
|
+
|
50
63
|
## Contributing
|
51
64
|
|
52
65
|
1. Fork it
|
@@ -1,70 +1,46 @@
|
|
1
1
|
require 'convolver'
|
2
|
-
require 'narray'
|
3
|
-
require 'fftw3'
|
4
2
|
require 'benchmark'
|
5
3
|
|
6
|
-
# In Ruby for now, which is slower, but at least gets us ballpark figures (99% of the work is in the C)
|
7
|
-
module FFTW3Convolver
|
8
|
-
def self.convolve orig_a, orig_b
|
9
|
-
combined_size = orig_a.size + orig_b.size - 1
|
10
|
-
left_pad_a = ( combined_size - orig_a.size + 1)/2
|
11
|
-
mod_a = NArray.float(combined_size)
|
12
|
-
mod_a[left_pad_a] = orig_a
|
13
|
-
|
14
|
-
mod_b = NArray.float(combined_size)
|
15
|
-
left_select_b = ( orig_b.size + 1 )/2
|
16
|
-
right_select_b = orig_b.size - left_select_b
|
17
|
-
mod_b[0] = orig_b[(0...left_select_b)].reverse
|
18
|
-
mod_b[-right_select_b] = orig_b[-right_select_b..-1].reverse
|
19
|
-
|
20
|
-
afft = FFTW3.fft(mod_a)
|
21
|
-
bfft = FFTW3.fft(mod_b)
|
22
|
-
cfft = afft * bfft
|
23
|
-
|
24
|
-
(FFTW3.ifft( cfft )/combined_size).real[left_pad_a...(left_pad_a+ orig_a.size - orig_b.size + 1)]
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
4
|
class Convolver2DBenchmark
|
29
5
|
attr_reader :image, :kernel
|
30
6
|
|
31
7
|
def initialize
|
32
8
|
# These show Convolver.convolve as 3x faster than FFTW3
|
33
|
-
|
34
|
-
|
9
|
+
@image = NArray.sfloat(256 * 256).random
|
10
|
+
@kernel = NArray.sfloat(16 * 16).random
|
35
11
|
|
36
12
|
# These are roughly even (10% advantage to FFTW3)
|
37
|
-
# @image = NArray.
|
38
|
-
# @kernel = NArray.
|
13
|
+
# @image = NArray.sfloat(256 * 256).random
|
14
|
+
# @kernel = NArray.sfloat(32 * 32).random
|
39
15
|
|
40
16
|
# These show FFTW3 as 4x faster than Convolver.convolve
|
41
|
-
# @image = NArray.
|
42
|
-
# @kernel = NArray.
|
17
|
+
# @image = NArray.sfloat(256 * 256).random
|
18
|
+
# @kernel = NArray.sfloat(64 * 64).random
|
43
19
|
|
44
20
|
# These show Convolver.convolve as 200x faster than FFTW3
|
45
|
-
# @image = NArray.
|
46
|
-
# @kernel = NArray.
|
21
|
+
# @image = NArray.sfloat(50 * 64 * 64).random
|
22
|
+
# @kernel = NArray.sfloat(50 * 64 * 64).random
|
47
23
|
|
48
24
|
# These show FFTW3 as 2x faster than Convolver.convolve
|
49
|
-
# @image = NArray.
|
50
|
-
# @kernel = NArray.
|
25
|
+
# @image = NArray.sfloat(128 * 128).random
|
26
|
+
# @kernel = NArray.sfloat(64 * 64).random
|
51
27
|
|
52
28
|
# These show FFTW3 and Convolver.convolve roughly equal
|
53
|
-
# @image = NArray.
|
54
|
-
# @kernel = NArray.
|
29
|
+
# @image = NArray.sfloat(80 * 80).random
|
30
|
+
# @kernel = NArray.sfloat(64 * 64).random
|
55
31
|
|
56
32
|
# These show FFTW3 as 2x faster than Convolver.convolve
|
57
|
-
# @image = NArray.
|
58
|
-
# @kernel = NArray.
|
33
|
+
# @image = NArray.sfloat(2 * 80 * 80).random
|
34
|
+
# @kernel = NArray.sfloat(2 * 64 * 64).random
|
59
35
|
|
60
36
|
# These are roughly even - increasing size of image favours FFTW3
|
61
|
-
|
62
|
-
|
37
|
+
#@image = NArray.sfloat(2000 + 80 * 80).random
|
38
|
+
#@kernel = NArray.sfloat(80 * 80).random
|
63
39
|
end
|
64
40
|
end
|
65
41
|
|
66
42
|
Benchmark.bm do |x|
|
67
43
|
source = Convolver2DBenchmark.new
|
68
44
|
x.report('convolver') { 100.times { Convolver.convolve( source.image, source.kernel ) } }
|
69
|
-
x.report('fftw3') { 100.times {
|
45
|
+
x.report('fftw3') { 100.times { Convolver.convolve_fftw3( source.image, source.kernel ) } }
|
70
46
|
end
|
data/convolver.gemspec
CHANGED
@@ -0,0 +1,52 @@
|
|
1
|
+
// ext/convolver/cnn_components.c
|
2
|
+
|
3
|
+
#include <xmmintrin.h>
|
4
|
+
#include "cnn_components.h"
|
5
|
+
|
6
|
+
////////////////////////////////////////////////////////////////////////////////////////////////////
|
7
|
+
//
|
8
|
+
// Run a single fully-connected layer, calculating output from input
|
9
|
+
//
|
10
|
+
// Benchmark: 1024 inputs, 256 outputs. 1000 iterations. 0.56 seconds
|
11
|
+
//
|
12
|
+
//
|
13
|
+
|
14
|
+
void nn_run_layer_raw( int in_size, int out_size,
|
15
|
+
float *in_ptr, float *weights, float *thresholds, float *out_ptr ) {
|
16
|
+
int i, j, in_aligned_size, out_aligned_size, offset;
|
17
|
+
__m128 simd_x, simd_y, simd_t;
|
18
|
+
|
19
|
+
in_aligned_size = 4 * ( in_size/4 );
|
20
|
+
out_aligned_size = 4 * ( out_size/4 );
|
21
|
+
|
22
|
+
// Calculate activation
|
23
|
+
for ( i = 0; i < out_size; i++ ) {
|
24
|
+
|
25
|
+
float t = 0.0;
|
26
|
+
simd_t = _mm_setzero_ps();
|
27
|
+
offset = i * in_size;
|
28
|
+
|
29
|
+
// Use SIMD for all the aligned values in groups of 4
|
30
|
+
for ( j = 0; j < in_aligned_size; j +=4 ) {
|
31
|
+
simd_x = _mm_load_ps( in_ptr + j );
|
32
|
+
// Weights might not align to 16 bytes due to size of layers
|
33
|
+
simd_y = _mm_loadu_ps( weights + (offset + j) );
|
34
|
+
simd_x = _mm_mul_ps( simd_x, simd_y );
|
35
|
+
simd_t = _mm_add_ps( simd_x, simd_t );
|
36
|
+
}
|
37
|
+
|
38
|
+
// Complete any remaining 1,2 or 3 items one at a time
|
39
|
+
for ( j = in_aligned_size; j < in_size; j++ ) {
|
40
|
+
t += in_ptr[ j ] * weights[ offset + j ];
|
41
|
+
}
|
42
|
+
|
43
|
+
out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
|
44
|
+
}
|
45
|
+
|
46
|
+
for ( i = 0; i < out_size; i++ ) {
|
47
|
+
out_ptr[i] -= thresholds[i];
|
48
|
+
if ( out_ptr[i] < 0.0 ) { out_ptr[i] = 0.0; }
|
49
|
+
}
|
50
|
+
|
51
|
+
return;
|
52
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
// ext/convolver/cnn_components.h
|
2
|
+
|
3
|
+
////////////////////////////////////////////////////////////////////////////////////////////////
|
4
|
+
//
|
5
|
+
// Declarations of narray helper functions
|
6
|
+
//
|
7
|
+
|
8
|
+
#ifndef CNN_COMPONENTS_H
|
9
|
+
#define CNN_COMPONENTS_H
|
10
|
+
|
11
|
+
void nn_run_layer_raw( int in_size, int out_size,
|
12
|
+
float *in_ptr, float *weights, float *thresholds, float *out_ptr );
|
13
|
+
|
14
|
+
#endif
|
@@ -0,0 +1,105 @@
|
|
1
|
+
// ext/convolver/convolve_raw.c
|
2
|
+
|
3
|
+
#include "convolve_raw.h"
|
4
|
+
|
5
|
+
inline int size_from_shape( int rank, int *shape ) {
|
6
|
+
int size = 1;
|
7
|
+
int i;
|
8
|
+
for ( i = 0; i < rank; i++ ) { size *= shape[i]; }
|
9
|
+
return size;
|
10
|
+
}
|
11
|
+
|
12
|
+
// Sets reverse indices
|
13
|
+
inline void corner_reset( int rank, int *shape, int *rev_indices ) {
|
14
|
+
int i;
|
15
|
+
for ( i = 0; i < rank; i++ ) { rev_indices[i] = shape[i] - 1; }
|
16
|
+
return;
|
17
|
+
}
|
18
|
+
|
19
|
+
// Counts indices down, returns number of ranks that reset
|
20
|
+
inline int corner_dec( int rank, int *shape, int *rev_indices ) {
|
21
|
+
int i = 0;
|
22
|
+
while ( ! rev_indices[i]-- ) {
|
23
|
+
rev_indices[i] = shape[i] - 1;
|
24
|
+
i++;
|
25
|
+
}
|
26
|
+
return i;
|
27
|
+
}
|
28
|
+
|
29
|
+
// Generates co-increment steps by rank boundaries crossed, for the outer position as inner position is incremented by 1
|
30
|
+
inline void calc_co_increment( int rank, int *outer_shape, int *inner_shape, int *co_increment ) {
|
31
|
+
int i, factor;
|
32
|
+
co_increment[0] = 1; // co-increment is always 1 in lowest rank
|
33
|
+
factor = 1;
|
34
|
+
for ( i = 0; i < rank; i++ ) {
|
35
|
+
co_increment[i+1] = co_increment[i] + factor * ( outer_shape[i] - inner_shape[i] );
|
36
|
+
factor *= outer_shape[i];
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
////////////////////////////////////////////////////////////////////////////////////////////////////
|
41
|
+
//
|
42
|
+
// Convolve
|
43
|
+
//
|
44
|
+
// Benchmark: 640x480 image, 8x8 kernel, 1000 iterations. 12.3 seconds.
|
45
|
+
//
|
46
|
+
|
47
|
+
void convolve_raw(
|
48
|
+
int in_rank, int *in_shape, float *in_ptr,
|
49
|
+
int kernel_rank, int *kernel_shape, float *kernel_ptr,
|
50
|
+
int out_rank, int *out_shape, float *out_ptr ) {
|
51
|
+
int i, j, in_size, kernel_size, kernel_aligned, out_size, offset;
|
52
|
+
int out_co_incr[LARGEST_RANK], kernel_co_incr[LARGEST_RANK];
|
53
|
+
int ker_q[LARGEST_RANK], out_q[LARGEST_RANK];
|
54
|
+
int *kernel_co_incr_cache;
|
55
|
+
|
56
|
+
in_size = size_from_shape( in_rank, in_shape );
|
57
|
+
kernel_size = size_from_shape( kernel_rank, kernel_shape );
|
58
|
+
kernel_aligned = 4 * (kernel_size/4);
|
59
|
+
out_size = size_from_shape( out_rank, out_shape );
|
60
|
+
|
61
|
+
calc_co_increment( in_rank, in_shape, out_shape, out_co_incr );
|
62
|
+
calc_co_increment( in_rank, in_shape, kernel_shape, kernel_co_incr );
|
63
|
+
|
64
|
+
kernel_co_incr_cache = ALLOC_N( int, kernel_size );
|
65
|
+
kernel_co_incr_cache[0] = 0;
|
66
|
+
|
67
|
+
corner_reset( kernel_rank, kernel_shape, ker_q );
|
68
|
+
for ( i = 1; i < kernel_size; i++ ) {
|
69
|
+
kernel_co_incr_cache[i] = kernel_co_incr_cache[i-1] + kernel_co_incr[ corner_dec( kernel_rank, kernel_shape, ker_q ) ];
|
70
|
+
}
|
71
|
+
|
72
|
+
// For convenience of flow, we set offset to -1 and adjust countdown 1 higher to compensate
|
73
|
+
offset = -1;
|
74
|
+
corner_reset( out_rank, out_shape, out_q );
|
75
|
+
out_q[0]++;
|
76
|
+
|
77
|
+
// Main convolve loop
|
78
|
+
for ( i = 0; i < out_size; i++ ) {
|
79
|
+
__m128 simd_x, simd_y, simd_t;
|
80
|
+
float t = 0.0;
|
81
|
+
simd_t = _mm_setzero_ps();
|
82
|
+
|
83
|
+
offset += out_co_incr[ corner_dec( out_rank, out_shape, out_q ) ];
|
84
|
+
|
85
|
+
// Use SIMD for all the aligned values in groups of 4
|
86
|
+
for ( j = 0; j < kernel_aligned; j +=4 ) {
|
87
|
+
simd_x = _mm_load_ps( kernel_ptr + j );
|
88
|
+
// Yes the backwards alignment is correct
|
89
|
+
simd_y = _mm_set_ps( in_ptr[ offset + kernel_co_incr_cache[j+3] ], in_ptr[ offset + kernel_co_incr_cache[j+2] ],
|
90
|
+
in_ptr[ offset + kernel_co_incr_cache[j+1] ], in_ptr[ offset + kernel_co_incr_cache[j] ] );
|
91
|
+
simd_x = _mm_mul_ps( simd_x, simd_y );
|
92
|
+
simd_t = _mm_add_ps( simd_x, simd_t );
|
93
|
+
}
|
94
|
+
|
95
|
+
// Complete any remaining 1,2 or 3 items one at a time
|
96
|
+
for ( j = kernel_aligned; j < kernel_size; j++ ) {
|
97
|
+
t += in_ptr[ offset + kernel_co_incr_cache[j] ] * kernel_ptr[ j ];
|
98
|
+
}
|
99
|
+
|
100
|
+
out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
|
101
|
+
}
|
102
|
+
|
103
|
+
xfree( kernel_co_incr_cache );
|
104
|
+
return;
|
105
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
// ext/convolver/convolve_raw.h
|
2
|
+
|
3
|
+
////////////////////////////////////////////////////////////////////////////////////////////////
|
4
|
+
//
|
5
|
+
// Declarations of narray helper functions
|
6
|
+
//
|
7
|
+
|
8
|
+
#ifndef CONVOLVE_RAW_H
|
9
|
+
#define CONVOLVE_RAW_H
|
10
|
+
|
11
|
+
#include <ruby.h>
|
12
|
+
#include <xmmintrin.h>
|
13
|
+
#include "narray_shared.h"
|
14
|
+
|
15
|
+
#define LARGEST_RANK 16
|
16
|
+
|
17
|
+
void convolve_raw(
|
18
|
+
int in_rank, int *in_shape, float *in_ptr,
|
19
|
+
int kernel_rank, int *kernel_shape, float *kernel_ptr,
|
20
|
+
int out_rank, int *out_shape, float *out_ptr );
|
21
|
+
|
22
|
+
#endif
|
data/ext/convolver/convolver.c
CHANGED
@@ -5,181 +5,53 @@
|
|
5
5
|
#include <stdio.h>
|
6
6
|
#include <xmmintrin.h>
|
7
7
|
|
8
|
-
#
|
8
|
+
#include "narray_shared.h"
|
9
|
+
#include "convolve_raw.h"
|
10
|
+
#include "cnn_components.h"
|
9
11
|
|
10
|
-
|
11
|
-
inline int na_quick_idxs_to_pos( int rank, int *shape, int *idxs ) {
|
12
|
-
int i, pos = 0;
|
13
|
-
for ( i = rank - 1; i >= 0; i-- ) {
|
14
|
-
pos = pos * shape[i] + idxs[i];
|
15
|
-
}
|
16
|
-
return pos;
|
17
|
-
}
|
18
|
-
|
19
|
-
// This is inverse of above
|
20
|
-
inline void na_quick_pos_to_idxs( int rank, int *shape, int pos, int *idxs ) {
|
21
|
-
int i;
|
22
|
-
for ( i = 0; i < rank; i++ ) {
|
23
|
-
idxs[ i ] = pos % shape[i];
|
24
|
-
pos /= shape[i];
|
25
|
-
}
|
26
|
-
return;
|
27
|
-
}
|
12
|
+
////////////////////////////////////////////////////////////////////////////////////////////////////
|
28
13
|
|
29
|
-
|
30
|
-
|
31
|
-
int i;
|
32
|
-
for ( i = 0; i < rank; i++ ) { size *= shape[i]; }
|
33
|
-
return size;
|
34
|
-
}
|
14
|
+
// To hold the module object
|
15
|
+
VALUE Convolver = Qnil;
|
35
16
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
}
|
17
|
+
static VALUE narray_fit_backwards( VALUE self, VALUE a, VALUE b ) {
|
18
|
+
struct NARRAY *na_a, *na_b;
|
19
|
+
volatile VALUE val_a, val_b;
|
20
|
+
int target_rank, i;
|
21
|
+
int shift_by[LARGEST_RANK];
|
42
22
|
|
43
|
-
|
44
|
-
|
45
|
-
int i = 0;
|
46
|
-
while ( ! rev_indices[i]-- ) {
|
47
|
-
rev_indices[i] = shape[i] - 1;
|
48
|
-
i++;
|
49
|
-
}
|
50
|
-
return i;
|
51
|
-
}
|
23
|
+
val_a = na_cast_object(a, NA_SFLOAT);
|
24
|
+
GetNArray( val_a, na_a );
|
52
25
|
|
53
|
-
|
54
|
-
|
55
|
-
int i, factor;
|
56
|
-
co_increment[0] = 1; // co-increment is always 1 in lowest rank
|
57
|
-
factor = 1;
|
58
|
-
for ( i = 0; i < rank; i++ ) {
|
59
|
-
co_increment[i+1] = co_increment[i] + factor * ( outer_shape[i] - inner_shape[i] );
|
60
|
-
factor *= outer_shape[i];
|
61
|
-
}
|
62
|
-
}
|
26
|
+
val_b = na_cast_object(b, NA_SFLOAT);
|
27
|
+
GetNArray( val_b, na_b );
|
63
28
|
|
64
|
-
|
65
|
-
|
66
|
-
// Convolve
|
67
|
-
//
|
68
|
-
// Benchmark: 640x480 image, 8x8 kernel, 1000 iterations. 12.3 seconds.
|
69
|
-
//
|
70
|
-
|
71
|
-
void convolve_raw(
|
72
|
-
int in_rank, int *in_shape, float *in_ptr,
|
73
|
-
int kernel_rank, int *kernel_shape, float *kernel_ptr,
|
74
|
-
int out_rank, int *out_shape, float *out_ptr ) {
|
75
|
-
int i, j, in_size, kernel_size, kernel_aligned, out_size, offset;
|
76
|
-
int out_co_incr[LARGEST_RANK], kernel_co_incr[LARGEST_RANK];
|
77
|
-
int ker_q[LARGEST_RANK], out_q[LARGEST_RANK];
|
78
|
-
int *kernel_co_incr_cache;
|
79
|
-
|
80
|
-
in_size = size_from_shape( in_rank, in_shape );
|
81
|
-
kernel_size = size_from_shape( kernel_rank, kernel_shape );
|
82
|
-
kernel_aligned = 4 * (kernel_size/4);
|
83
|
-
out_size = size_from_shape( out_rank, out_shape );
|
84
|
-
|
85
|
-
calc_co_increment( in_rank, in_shape, out_shape, out_co_incr );
|
86
|
-
calc_co_increment( in_rank, in_shape, kernel_shape, kernel_co_incr );
|
87
|
-
|
88
|
-
kernel_co_incr_cache = ALLOC_N( int, kernel_size );
|
89
|
-
kernel_co_incr_cache[0] = 0;
|
90
|
-
|
91
|
-
corner_reset( kernel_rank, kernel_shape, ker_q );
|
92
|
-
for ( i = 1; i < kernel_size; i++ ) {
|
93
|
-
kernel_co_incr_cache[i] = kernel_co_incr_cache[i-1] + kernel_co_incr[ corner_dec( kernel_rank, kernel_shape, ker_q ) ];
|
29
|
+
if ( na_a->rank != na_b->rank ) {
|
30
|
+
rb_raise( rb_eArgError, "narray a must have equal rank to narray b (a rank %d, b rank %d)", na_a->rank, na_b->rank );
|
94
31
|
}
|
95
32
|
|
96
|
-
|
97
|
-
|
98
|
-
corner_reset( out_rank, out_shape, out_q );
|
99
|
-
out_q[0]++;
|
100
|
-
|
101
|
-
// Main convolve loop
|
102
|
-
for ( i = 0; i < out_size; i++ ) {
|
103
|
-
__m128 simd_x, simd_y, simd_t;
|
104
|
-
float t = 0.0;
|
105
|
-
simd_t = _mm_setzero_ps();
|
106
|
-
|
107
|
-
offset += out_co_incr[ corner_dec( out_rank, out_shape, out_q ) ];
|
108
|
-
|
109
|
-
// Use SIMD for all the aligned values in groups of 4
|
110
|
-
for ( j = 0; j < kernel_aligned; j +=4 ) {
|
111
|
-
simd_x = _mm_load_ps( kernel_ptr + j );
|
112
|
-
// Yes the backwards alignment is correct
|
113
|
-
simd_y = _mm_set_ps( in_ptr[ offset + kernel_co_incr_cache[j+3] ], in_ptr[ offset + kernel_co_incr_cache[j+2] ],
|
114
|
-
in_ptr[ offset + kernel_co_incr_cache[j+1] ], in_ptr[ offset + kernel_co_incr_cache[j] ] );
|
115
|
-
simd_x = _mm_mul_ps( simd_x, simd_y );
|
116
|
-
simd_t = _mm_add_ps( simd_x, simd_t );
|
117
|
-
}
|
118
|
-
|
119
|
-
// Complete any remaining 1,2 or 3 items one at a time
|
120
|
-
for ( j = kernel_aligned; j < kernel_size; j++ ) {
|
121
|
-
t += in_ptr[ offset + kernel_co_incr_cache[j] ] * kernel_ptr[ j ];
|
122
|
-
}
|
123
|
-
|
124
|
-
out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
|
33
|
+
if ( na_a->rank > LARGEST_RANK ) {
|
34
|
+
rb_raise( rb_eArgError, "exceeded maximum narray rank for convolve of %d", LARGEST_RANK );
|
125
35
|
}
|
126
36
|
|
127
|
-
|
128
|
-
return;
|
129
|
-
}
|
130
|
-
|
131
|
-
////////////////////////////////////////////////////////////////////////////////////////////////////
|
132
|
-
//
|
133
|
-
// Neural net
|
134
|
-
//
|
135
|
-
// Benchmark: 1024 inputs, 256 outputs. 1000 iterations. 0.56 seconds
|
136
|
-
//
|
137
|
-
//
|
138
|
-
|
139
|
-
void nn_run_layer_raw( int in_size, int out_size,
|
140
|
-
float *in_ptr, float *weights, float *thresholds, float *out_ptr ) {
|
141
|
-
int i, j, in_aligned_size, out_aligned_size, offset;
|
142
|
-
__m128 simd_x, simd_y, simd_t;
|
143
|
-
|
144
|
-
in_aligned_size = 4 * ( in_size/4 );
|
145
|
-
out_aligned_size = 4 * ( out_size/4 );
|
146
|
-
|
147
|
-
// Calculate activation
|
148
|
-
for ( i = 0; i < out_size; i++ ) {
|
149
|
-
|
150
|
-
float t = 0.0;
|
151
|
-
simd_t = _mm_setzero_ps();
|
152
|
-
offset = i * in_size;
|
153
|
-
|
154
|
-
// Use SIMD for all the aligned values in groups of 4
|
155
|
-
for ( j = 0; j < in_aligned_size; j +=4 ) {
|
156
|
-
simd_x = _mm_load_ps( in_ptr + j );
|
157
|
-
// Weights might not align to 16 bytes due to size of layers
|
158
|
-
simd_y = _mm_loadu_ps( weights + (offset + j) );
|
159
|
-
simd_x = _mm_mul_ps( simd_x, simd_y );
|
160
|
-
simd_t = _mm_add_ps( simd_x, simd_t );
|
161
|
-
}
|
37
|
+
target_rank = na_a->rank;
|
162
38
|
|
163
|
-
|
164
|
-
|
165
|
-
|
39
|
+
for ( i = 0; i < target_rank; i++ ) {
|
40
|
+
if ( ( na_a->shape[i] - na_b->shape[i] ) < 0 ) {
|
41
|
+
rb_raise( rb_eArgError, "no space for backward fit" );
|
166
42
|
}
|
167
|
-
|
168
|
-
out_ptr[i] = simd_t[0] + simd_t[1] + simd_t[2] + simd_t[3] + t;
|
43
|
+
shift_by[i] = na_b->shape[i] >> 1;
|
169
44
|
}
|
170
45
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
46
|
+
fit_backwards_raw(
|
47
|
+
target_rank,
|
48
|
+
na_a->shape, (float*) na_a->ptr,
|
49
|
+
na_b->shape, (float*) na_b->ptr,
|
50
|
+
shift_by );
|
175
51
|
|
176
|
-
return;
|
52
|
+
return Qnil;
|
177
53
|
}
|
178
54
|
|
179
|
-
////////////////////////////////////////////////////////////////////////////////////////////////////
|
180
|
-
|
181
|
-
// To hold the module object
|
182
|
-
VALUE Convolver = Qnil;
|
183
55
|
|
184
56
|
/* @overload convolve( signal, kernel )
|
185
57
|
* Calculates convolution of an array of floats representing a signal, with a second array representing
|
@@ -233,10 +105,10 @@ static VALUE narray_convolve( VALUE self, VALUE a, VALUE b ) {
|
|
233
105
|
* Calculates activations of a fully-connected neural network layer. The transfer function after
|
234
106
|
* summing weights and applying threshold is a "ReLU", equivalent to
|
235
107
|
* y = x < 0.0 ? 0.0 : x
|
236
|
-
* this is less sophisticated than many neural net
|
237
|
-
* train.
|
108
|
+
* this is less sophisticated than many other neural net functions (such as sigma), but is fast to
|
109
|
+
* calculate and to train.
|
238
110
|
* @param [NArray] inputs must be rank 1 array of floats
|
239
|
-
* @param [NArray] weights must be rank 2 array of floats, with first
|
111
|
+
* @param [NArray] weights must be rank 2 array of floats, with first dimension size of inputs, and second dimension size equal to number of outputs
|
240
112
|
* @param [NArray] thresholds must be rank 1 array of floats, size equal to number of outputs desired
|
241
113
|
* @return [NArray] neuron activations
|
242
114
|
*/
|
@@ -266,7 +138,7 @@ static VALUE narray_nn_run_single_layer( VALUE self, VALUE inputs, VALUE weights
|
|
266
138
|
val_thresholds = na_cast_object(thresholds, NA_SFLOAT);
|
267
139
|
GetNArray( val_thresholds, na_thresholds );
|
268
140
|
if ( na_thresholds->rank != 1 ) {
|
269
|
-
rb_raise( rb_eArgError, "thresholds must be
|
141
|
+
rb_raise( rb_eArgError, "thresholds must be narray of rank 1" );
|
270
142
|
}
|
271
143
|
if ( na_thresholds->shape[0] != output_size ) {
|
272
144
|
rb_raise( rb_eArgError, "thresholds expected size %d, but got %d", output_size, na_thresholds->shape[0] );
|
@@ -287,4 +159,5 @@ void Init_convolver() {
|
|
287
159
|
Convolver = rb_define_module( "Convolver" );
|
288
160
|
rb_define_singleton_method( Convolver, "convolve", narray_convolve, 2 );
|
289
161
|
rb_define_singleton_method( Convolver, "nn_run_layer", narray_nn_run_single_layer, 3 );
|
162
|
+
rb_define_singleton_method( Convolver, "fit_kernel_backwards", narray_fit_backwards, 2 );
|
290
163
|
}
|
@@ -0,0 +1,42 @@
|
|
1
|
+
// ext/convolver/narray_shared.c
|
2
|
+
|
3
|
+
#include "narray_shared.h"
|
4
|
+
|
5
|
+
// This is copied from na_array.c, with safety checks and temp vars removed
|
6
|
+
int na_quick_idxs_to_pos( int rank, int *shape, int *idxs ) {
|
7
|
+
int i, pos = 0;
|
8
|
+
for ( i = rank - 1; i >= 0; i-- ) {
|
9
|
+
pos = pos * shape[i] + idxs[i];
|
10
|
+
}
|
11
|
+
return pos;
|
12
|
+
}
|
13
|
+
|
14
|
+
// This is inverse of above
|
15
|
+
void na_quick_pos_to_idxs( int rank, int *shape, int pos, int *idxs ) {
|
16
|
+
int i;
|
17
|
+
for ( i = 0; i < rank; i++ ) {
|
18
|
+
idxs[ i ] = pos % shape[i];
|
19
|
+
pos /= shape[i];
|
20
|
+
}
|
21
|
+
return;
|
22
|
+
}
|
23
|
+
|
24
|
+
// used to place kernel data into array for FFTW3 processing
|
25
|
+
void fit_backwards_raw( int rank, int *dst_shape, float *dst, int *src_shape, float *src, int *shift_shape ) {
|
26
|
+
int i, j, size, x;
|
27
|
+
int k_idx[16], dst_idx[16];
|
28
|
+
|
29
|
+
size = 1;
|
30
|
+
for ( j = 0; j < rank; j++ ) { size *= src_shape[j]; }
|
31
|
+
|
32
|
+
for ( i = 0; i < size; i++ ) {
|
33
|
+
na_quick_pos_to_idxs( rank, src_shape, i, k_idx );
|
34
|
+
for ( j = 0; j < rank; j++ ) {
|
35
|
+
x = src_shape[j] - shift_shape[j] - k_idx[j] - 1;
|
36
|
+
if ( x < 0 ) x = x + dst_shape[j];
|
37
|
+
dst_idx[j] = x;
|
38
|
+
}
|
39
|
+
dst[ na_quick_idxs_to_pos( rank, dst_shape, dst_idx ) ] = src[i];
|
40
|
+
}
|
41
|
+
return;
|
42
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
// ext/convolver/narray_shared.h
|
2
|
+
|
3
|
+
////////////////////////////////////////////////////////////////////////////////////////////////
|
4
|
+
//
|
5
|
+
// Declarations of narray helper functions
|
6
|
+
//
|
7
|
+
|
8
|
+
#ifndef CONVOLVER_NARRAY_SHARED_H
|
9
|
+
#define CONVOLVER_NARRAY_SHARED_H
|
10
|
+
|
11
|
+
#include <ruby.h>
|
12
|
+
#include "narray.h"
|
13
|
+
|
14
|
+
// This is copied from na_array.c, with safety checks and temp vars removed
|
15
|
+
int na_quick_idxs_to_pos( int rank, int *shape, int *idxs );
|
16
|
+
|
17
|
+
// This is inverse of above
|
18
|
+
void na_quick_pos_to_idxs( int rank, int *shape, int pos, int *idxs );
|
19
|
+
|
20
|
+
void fit_backwards_raw( int rank, int *dst_shape, float *dst, int *src_shape, float *src, int *shift_shape );
|
21
|
+
|
22
|
+
#endif
|
data/lib/convolver.rb
CHANGED
@@ -1,7 +1,48 @@
|
|
1
1
|
require 'narray'
|
2
2
|
require "convolver/convolver"
|
3
3
|
require "convolver/version"
|
4
|
+
require 'fftw3'
|
4
5
|
|
5
6
|
module Convolver
|
7
|
+
# Uses FFTW3 library to calculate convolution of an array of floats representing a signal,
|
8
|
+
# with a second array representing a kernel. The two parameters must have the same rank.
|
9
|
+
# The output has same rank, its size in each dimension d is given by
|
10
|
+
# signal.shape[d] - kernel.shape[d] + 1
|
11
|
+
# @param [NArray] signal must be same size or larger than kernel in each dimension
|
12
|
+
# @param [NArray] kernel must be same size or smaller than signal in each dimension
|
13
|
+
# @return [NArray] result of convolving signal with kernel
|
14
|
+
def self.convolve_fftw3 signal, kernel
|
15
|
+
combined_shape, shift_by, ranges = fft_offsets( signal.shape, kernel.shape )
|
6
16
|
|
17
|
+
mod_a = NArray.sfloat(*combined_shape)
|
18
|
+
mod_a[*shift_by] = signal
|
19
|
+
|
20
|
+
mod_b = NArray.sfloat(*combined_shape)
|
21
|
+
|
22
|
+
Convolver.fit_kernel_backwards( mod_b, kernel )
|
23
|
+
|
24
|
+
afreqs = FFTW3.fft(mod_a)
|
25
|
+
bfreqs = FFTW3.fft(mod_b)
|
26
|
+
cfreqs = afreqs * bfreqs
|
27
|
+
|
28
|
+
(FFTW3.ifft( cfreqs ).real * (1.0/mod_a.size))[*ranges]
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def self.fft_offsets signal_shape, kernel_shape
|
34
|
+
combined_shape = []
|
35
|
+
shift_by = []
|
36
|
+
ranges = []
|
37
|
+
signal_shape.each_with_index do |signal_size, i|
|
38
|
+
kernel_size = kernel_shape[i]
|
39
|
+
|
40
|
+
combined_shape[i] = signal_size + kernel_size - 1
|
41
|
+
output_size = signal_size - kernel_size + 1
|
42
|
+
output_offset = kernel_size - 1
|
43
|
+
shift_by[i] = kernel_size / 2
|
44
|
+
ranges[i] = (output_offset...(output_offset + output_size))
|
45
|
+
end
|
46
|
+
[ combined_shape, shift_by, ranges ]
|
47
|
+
end
|
7
48
|
end
|
data/lib/convolver/version.rb
CHANGED
@@ -0,0 +1,161 @@
|
|
1
|
+
require 'helpers'
|
2
|
+
|
3
|
+
describe Convolver do
|
4
|
+
describe "#convolve_fftw3" do
|
5
|
+
|
6
|
+
it "should work like the example in the README" do
|
7
|
+
a = NArray[ 0.3, 0.4, 0.5 ]
|
8
|
+
b = NArray[ 1.3, -0.5 ]
|
9
|
+
c = Convolver.convolve_fftw3( a, b )
|
10
|
+
c.should be_narray_like NArray[ 0.19, 0.27 ]
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should convolve 1D arrays with a variety of signal and kernel lengths" do
|
14
|
+
a = NArray[ 0.3 ]
|
15
|
+
b = NArray[ -0.7 ]
|
16
|
+
c = Convolver.convolve_fftw3( a, b )
|
17
|
+
c.should be_narray_like NArray[ -0.21 ]
|
18
|
+
|
19
|
+
a = NArray[ 0.3, 0.4, 0.5, 0.2 ]
|
20
|
+
b = NArray[ -0.7 ]
|
21
|
+
c = Convolver.convolve_fftw3( a, b )
|
22
|
+
c.should be_narray_like NArray[ -0.21, -0.28, -0.35, -0.14 ]
|
23
|
+
|
24
|
+
a = NArray[ 0.3, 0.4, 0.5, 0.2 ]
|
25
|
+
b = NArray[ 1.1, -0.7 ]
|
26
|
+
c = Convolver.convolve_fftw3( a, b )
|
27
|
+
c.should be_narray_like NArray[ 0.05, 0.09, 0.41 ]
|
28
|
+
|
29
|
+
a = NArray[ 0.3, 0.4, 0.5, 0.2 ]
|
30
|
+
b = NArray[ 1.1, -0.7, -0.2 ]
|
31
|
+
c = Convolver.convolve_fftw3( a, b )
|
32
|
+
c.should be_narray_like NArray[ -0.05, 0.05 ]
|
33
|
+
|
34
|
+
a = NArray[ 0.3, 0.4, 0.5, 0.2, 0.6 ]
|
35
|
+
b = NArray[ 1.1, -0.7 ]
|
36
|
+
c = Convolver.convolve_fftw3( a, b )
|
37
|
+
c.should be_narray_like NArray[ 0.05, 0.09, 0.41, -0.2 ]
|
38
|
+
|
39
|
+
a = NArray[ 0.3, 0.4, 0.5, 0.2, 0.6 ]
|
40
|
+
b = NArray[ 1.1, -0.7, 2.1 ]
|
41
|
+
c = Convolver.convolve_fftw3( a, b )
|
42
|
+
c.should be_narray_like NArray[ 1.1, 0.51, 1.67 ]
|
43
|
+
|
44
|
+
a = NArray[ 0.3, 0.4, 0.5, 0.2, 0.6 ]
|
45
|
+
b = NArray[ 0.6, -0.5, -0.4, 0.7 ]
|
46
|
+
c = Convolver.convolve_fftw3( a, b )
|
47
|
+
c.should be_narray_like NArray[ -0.08, 0.33 ]
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should calculate a 2D convolution" do
|
51
|
+
a = NArray[ [ 0.3, 0.4, 0.5 ], [ 0.6, 0.8, 0.2 ], [ 0.9, 1.0, 0.1 ] ]
|
52
|
+
b = NArray[ [ 1.2, -0.5 ], [ 0.5, -1.3 ] ]
|
53
|
+
c = Convolver.convolve_fftw3( a, b )
|
54
|
+
c.should be_narray_like NArray[ [ -0.58, 0.37 ], [ -0.53, 1.23 ] ]
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should calculate a 3D convolution" do
|
58
|
+
# 5x4x3
|
59
|
+
a = NArray[
|
60
|
+
[ [ 1.0, 0.6, 1.1, 0.2, 0.9 ], [ 1.0, 0.7, 0.8, 1.0, 1.0 ], [ 0.2, 0.6, 0.1, 0.2, 0.5 ], [ 0.5, 0.9, 0.2, 0.1, 0.6 ] ],
|
61
|
+
[ [ 0.4, 0.9, 0.4, 0.0, 0.6 ], [ 0.2, 1.1, 0.2, 0.4, 0.1 ], [ 0.4, 0.2, 0.5, 0.8, 0.7 ], [ 0.1, 0.9, 0.7, 0.1, 0.3 ] ],
|
62
|
+
[ [ 0.8, 0.6, 1.0, 0.1, 0.4 ], [ 0.3, 0.8, 0.6, 0.7, 1.1 ], [ 0.9, 1.0, 0.3, 0.4, 0.6 ], [ 0.2, 0.5, 0.4, 0.7, 0.2 ] ]
|
63
|
+
]
|
64
|
+
|
65
|
+
# 3x3x3
|
66
|
+
b = NArray[
|
67
|
+
[ [ -0.9, 1.2, 0.8 ], [ 0.9, 0.1, -0.5 ], [ 1.1, 0.1, -1.1 ] ],
|
68
|
+
[ [ -0.2, -1.0, 1.4 ], [ -1.4, 0.0, 1.3 ], [ 0.3, 1.0, -0.5 ] ],
|
69
|
+
[ [ 0.6, 0.0, 0.7 ], [ -0.7, 1.1, 1.2 ], [ 1.3, 0.7, 0.0 ] ]
|
70
|
+
]
|
71
|
+
|
72
|
+
# Should be 3x2x1
|
73
|
+
c = Convolver.convolve_fftw3( a, b )
|
74
|
+
c.should be_narray_like NArray[ [ [ 5.51, 3.04, 4.3 ], [ 3.04, 6.31, 3.87 ] ] ]
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should calculate a 4D convolution" do
|
78
|
+
# 3x4x5x3
|
79
|
+
a = NArray[
|
80
|
+
[ [ [ 0.5, 0.4, 0.9 ], [ 0.1, 0.9, 0.8 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ],
|
81
|
+
[ [ 0.0, 0.4, 0.0 ], [ 0.2, 0.3, 0.8 ], [ 0.6, 0.3, 0.2 ], [ 0.7, 0.4, 0.3 ] ],
|
82
|
+
[ [ 0.3, 0.3, 0.1 ], [ 0.6, 0.9, 0.4 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ],
|
83
|
+
[ [ 0.0, 0.4, 0.0 ], [ 0.2, 0.3, 0.8 ], [ 0.6, 0.3, 0.2 ], [ 0.7, 0.4, 0.3 ] ],
|
84
|
+
[ [ 0.3, 0.3, 0.1 ], [ 0.6, 0.9, 0.4 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ] ],
|
85
|
+
[ [ [ 0.5, 0.4, 0.9 ], [ 0.1, 0.9, 0.8 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ],
|
86
|
+
[ [ 0.0, 0.4, 0.0 ], [ 0.2, 0.3, 0.8 ], [ 0.6, 0.3, 0.2 ], [ 0.7, 0.4, 0.3 ] ],
|
87
|
+
[ [ 0.3, 0.3, 0.1 ], [ 0.6, 0.9, 0.4 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ],
|
88
|
+
[ [ 0.0, 0.4, 0.0 ], [ 0.2, 0.3, 0.8 ], [ 0.6, 0.3, 0.2 ], [ 0.7, 0.4, 0.3 ] ],
|
89
|
+
[ [ 0.3, 0.3, 0.1 ], [ 0.6, 0.9, 0.4 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ] ],
|
90
|
+
[ [ [ 0.5, 0.4, 0.9 ], [ 0.1, 0.9, 0.8 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ],
|
91
|
+
[ [ 0.0, 0.4, 0.0 ], [ 0.2, 0.3, 0.8 ], [ 0.6, 0.3, 0.2 ], [ 0.7, 0.4, 0.3 ] ],
|
92
|
+
[ [ 0.3, 0.3, 0.1 ], [ 0.6, 0.9, 0.4 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ],
|
93
|
+
[ [ 0.0, 0.4, 0.0 ], [ 0.2, 0.3, 0.8 ], [ 0.6, 0.3, 0.2 ], [ 0.7, 0.4, 0.3 ] ],
|
94
|
+
[ [ 0.3, 0.3, 0.1 ], [ 0.6, 0.9, 0.4 ], [ 0.4, 0.0, 0.1 ], [ 0.8, 0.3, 0.4 ] ] ] ]
|
95
|
+
|
96
|
+
# 2x3x3x2
|
97
|
+
b = NArray[ [
|
98
|
+
[ [ 1.1, 0.6 ], [ 1.2, 0.6 ], [ 0.8, 0.1 ] ], [ [ -0.4, 0.8 ], [ 0.5, 0.4 ], [ 1.2, 0.2 ] ],
|
99
|
+
[ [ 0.8, 0.2 ], [ 0.5, 0.0 ], [ 1.4, 1.3 ] ] ],
|
100
|
+
[ [ [ 1.1, 0.6 ], [ 1.2, 0.6 ], [ 0.8, 0.1 ] ], [ [ -0.4, 0.8 ], [ 0.5, 0.4 ], [ 1.2, 0.2 ] ],
|
101
|
+
[ [ 0.8, 0.2 ], [ 0.5, 0.0 ], [ 1.4, 1.3 ] ] ] ]
|
102
|
+
|
103
|
+
# Should be 2x2x3x2
|
104
|
+
c = Convolver.convolve_fftw3( a, b )
|
105
|
+
c.should be_narray_like NArray[
|
106
|
+
[ [ [ 8.5, 8.2 ], [ 11.34, 9.68 ] ], [ [ 7.68, 6.56 ], [ 11.24, 7.16 ] ], [ [ 9.14, 6.54 ], [ 12.44, 9.2 ] ] ],
|
107
|
+
[ [ [ 8.5, 8.2 ], [ 11.34, 9.68 ] ], [ [ 7.68, 6.56 ], [ 11.24, 7.16 ] ], [ [ 9.14, 6.54 ], [ 12.44, 9.2 ] ] ]
|
108
|
+
]
|
109
|
+
end
|
110
|
+
|
111
|
+
describe "compared with #convolve" do
|
112
|
+
it "should produce same results for 1D arrays " do
|
113
|
+
(1..30).each do |signal_length|
|
114
|
+
(1..signal_length).each do |kernel_length|
|
115
|
+
signal = NArray.sfloat(signal_length).random()
|
116
|
+
kernel = NArray.sfloat(kernel_length).random()
|
117
|
+
expect_result = Convolver.convolve( signal, kernel )
|
118
|
+
got_result = Convolver.convolve_fftw3( signal, kernel )
|
119
|
+
got_result.should be_narray_like expect_result
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
it "should produce same results for 2D arrays " do
|
125
|
+
(3..10).each do |signal_x|
|
126
|
+
(signal_x-2..signal_x+2).each do |signal_y|
|
127
|
+
(1..signal_x).each do |kernel_x|
|
128
|
+
(1..signal_y).each do |kernel_y|
|
129
|
+
signal = NArray.sfloat(signal_x,signal_y).random()
|
130
|
+
kernel = NArray.sfloat(kernel_x,kernel_y).random()
|
131
|
+
expect_result = Convolver.convolve( signal, kernel )
|
132
|
+
got_result = Convolver.convolve_fftw3( signal, kernel )
|
133
|
+
got_result.should be_narray_like expect_result
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
it "should produce same results for 3D arrays " do
|
141
|
+
(3..5).each do |signal_x|
|
142
|
+
(signal_x-2..signal_x+2).each do |signal_y|
|
143
|
+
(signal_x-2..signal_x+2).each do |signal_z|
|
144
|
+
(1..signal_x).each do |kernel_x|
|
145
|
+
(1..signal_y).each do |kernel_y|
|
146
|
+
(1..signal_z).each do |kernel_z|
|
147
|
+
signal = NArray.sfloat(signal_x,signal_y,signal_z).random()
|
148
|
+
kernel = NArray.sfloat(kernel_x,kernel_y,kernel_z).random()
|
149
|
+
expect_result = Convolver.convolve( signal, kernel )
|
150
|
+
got_result = Convolver.convolve_fftw3( signal, kernel )
|
151
|
+
got_result.should be_narray_like expect_result
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
data/spec/helpers.rb
CHANGED
@@ -12,7 +12,7 @@ RSpec::Matchers.define :be_narray_like do |expected_narray|
|
|
12
12
|
else
|
13
13
|
d = given - expected_narray
|
14
14
|
difference = ( d * d ).sum / d.size
|
15
|
-
if difference > 1e-
|
15
|
+
if difference > 1e-9
|
16
16
|
@error = "Numerical difference with mean square error #{difference}"
|
17
17
|
end
|
18
18
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: convolver
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Neil Slater
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-10-
|
11
|
+
date: 2013-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - '>='
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.6.0.8
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: fftw3
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.3'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.3'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: yard
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -112,10 +126,17 @@ files:
|
|
112
126
|
- benchmarks/convolver_vs_fftw3.rb
|
113
127
|
- benchmarks/nn_layer_benchmark.rb
|
114
128
|
- convolver.gemspec
|
129
|
+
- ext/convolver/cnn_components.c
|
130
|
+
- ext/convolver/cnn_components.h
|
131
|
+
- ext/convolver/convolve_raw.c
|
132
|
+
- ext/convolver/convolve_raw.h
|
115
133
|
- ext/convolver/convolver.c
|
116
134
|
- ext/convolver/extconf.rb
|
135
|
+
- ext/convolver/narray_shared.c
|
136
|
+
- ext/convolver/narray_shared.h
|
117
137
|
- lib/convolver.rb
|
118
138
|
- lib/convolver/version.rb
|
139
|
+
- spec/convolve_fftw3_spec.rb
|
119
140
|
- spec/convolver_spec.rb
|
120
141
|
- spec/helpers.rb
|
121
142
|
homepage: http://github.com/neilslater/convolver
|
@@ -143,6 +164,7 @@ signing_key:
|
|
143
164
|
specification_version: 4
|
144
165
|
summary: Convolution for NArray
|
145
166
|
test_files:
|
167
|
+
- spec/convolve_fftw3_spec.rb
|
146
168
|
- spec/convolver_spec.rb
|
147
169
|
- spec/helpers.rb
|
148
170
|
has_rdoc:
|