vector_sse 0.0.1.pre

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 98eb4a76855923685b4f2857b834900304bb85c8
4
+ data.tar.gz: 5ca280566cecbd8cf474e6950ebf21dfe3104199
5
+ SHA512:
6
+ metadata.gz: 39cbc928cf7f4fc555d8edca11caa0eefb28d34b23356c837f2ef453afd06d6f1b6798c802ad4cad76f65fe3ae25fd6dee459f32fc731bfaad386e4ecf9dbf49
7
+ data.tar.gz: df243d6d5260ac8f414ccfb5b2c68dfc0a090304cd7c0d49947dc0a6d880dbe966a9cba265d6bedb7caadd41fd6d7bc55e9776929ab453870d85d3c17112b04f
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ test_vectorops.rb
2
+ vector_sse-*.gem
3
+ tmp/
4
+ *.sublime-*
data/LICENSE.txt ADDED
@@ -0,0 +1,23 @@
1
+ Copyright (c) 2015, Robert Glissmann
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,100 @@
1
+ ## Welcome to VectorSSE ##
2
+
3
+ VectorSSE is a Ruby gem that uses x86 Streaming SIMD Extensions (SSE) to accelerate
4
+ array and matrix computations. SIMD instructions perform a single operation
5
+ on a vector of data rather than on a single value. Vector operations can
6
+ improve the performance of algorithms that exhibit data-level parallelism.
7
+
8
+ A simple example:
9
+ Let us say you need to compute the sum of an array containing 1000 floating
10
+ point numbers. One approach is to loop through the array and consecutively add
11
+ each element to a running sum. Another approach is to break the problem
12
+ into a set of smaller, independent computations that can be performed in parallel
13
+ with SIMD instructions. For example, we can break the 1000-element array into
14
+ four 250-element arrays and use SIMD extensions to find four sums in parallel.
15
+ Of course, this leaves us with four separate sums, so we must add these using
16
+ normal, non-SSE instructions to yield the overall sum of the 1000-element array.
17
+
18
+
19
+ ## Install the gem ##
20
+
21
+ Install it with [RubyGems](https://rubygems.org/) (currently pre-release)
22
+
23
+ gem install vector_sse --pre
24
+
25
+ or add this to your Gemfile if you use [Bundler](http://gembundler.com/):
26
+
27
+ gem "vector_sse"
28
+
29
+
30
+ ## Getting Started ##
31
+
32
+ The VectorSSE gem defines two data types: the Array class, which inherits from
33
+ the core Array class, and the Matrix class. Unlike typical Ruby containers, the
34
+ Array and Matrix classes are intended to store a homogeneous data type. At this
35
+ time, the supported data types include signed 32 and 64-bit signed integers,
36
+ 32-bit floating point, and double-precision floating point. The type is
37
+ identified when the Array or Matrix is constructed so that all operations can
38
+ use the appropriate implementation.
39
+
40
+
41
+ ### Example: Multiply two matrices ###
42
+
43
+ require 'vector_sse'
44
+
45
+ left = VectorSSE::Matrix.new( VectorSSE::Type::F32, 4, 4, [
46
+ 1.2, 2.3, 3.4, 4.5,
47
+ 5.6, 6.7, 7.8, 8.9,
48
+ 9.05, 10.9, 11.85, 12.2,
49
+ 13.43, 14.85, 15.67, 16.5
50
+ ])
51
+ right = VectorSSE::Matrix.new( VectorSSE::Type::F32, 4, 2, [
52
+ 1, 2,
53
+ 5, 6,
54
+ 9, 10,
55
+ 13, 14
56
+ ])
57
+
58
+ product = left * right
59
+
60
+
61
+ ### Example: Scale a matrix by a scalar value ###
62
+
63
+ require 'vector_sse'
64
+
65
+ left = VectorSSE::Matrix.new( VectorSSE::Type::F32, 4, 4, [
66
+ 1.2, 2.3, 3.4, 4.5,
67
+ 5.6, 6.7, 7.8, 8.9,
68
+ 9.05, 10.9, 11.85, 12.2,
69
+ 13.43, 14.85, 15.67, 16.5
70
+ ])
71
+
72
+ product = left * 3.14
73
+
74
+
75
+ ### Example: Subtract Arrays and find the sum of the elements of an Array ###
76
+
77
+ require 'vector_sse'
78
+
79
+ # Initialize a four element integer array
80
+ left = VectoSSE::Array.new( VectorSSE::Type::S32, 10 )
81
+ left.fill([
82
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
83
+ ])
84
+ right = VectoSSE::Array.new( VectorSSE::Type::S32, 10 )
85
+ right.fill([
86
+ 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
87
+ ])
88
+
89
+ # Subtract the arrays
90
+ result = left - right
91
+
92
+ # Get the sum of the elements of an array
93
+ sum = left.sum
94
+
95
+
96
+ ## License and copyright ##
97
+
98
+ VectorSSE is released under the BSD License.
99
+
100
+ Copyright: (C) 2015 by Robert Glissmann. All Rights Reserved.
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'rake/extensiontask'
2
+ require 'rake/testtask'
3
+ require 'rspec/core/rake_task'
4
+
5
+ Rake::ExtensionTask.new "vector_sse" do |ext|
6
+ ext.lib_dir = "lib/vector_sse"
7
+ end
8
+
9
+ RSpec::Core::RakeTask.new( :spec )
@@ -0,0 +1 @@
1
+ Makefile
@@ -0,0 +1,13 @@
1
+ require 'mkmf'
2
+
3
+ # Give it a name
4
+ extension_name = 'vector_sse'
5
+
6
+ $CFLAGS << ' -O3'
7
+
8
+ # Check for dependencies
9
+ have_header( 'emmintrin.h' )
10
+
11
+ # Do the work
12
+ create_makefile "vector_sse/vector_sse"
13
+
@@ -0,0 +1,80 @@
1
+ //
2
+ // Copyright (c) 2015, Robert Glissmann
3
+ // All rights reserved.
4
+ //
5
+ // Redistribution and use in source and binary forms, with or without
6
+ // modification, are permitted provided that the following conditions are met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright notice, this
9
+ // list of conditions and the following disclaimer.
10
+ //
11
+ // * Redistributions in binary form must reproduce the above copyright notice,
12
+ // this list of conditions and the following disclaimer in the documentation
13
+ // and/or other materials provided with the distribution.
14
+ //
15
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19
+ // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
+ // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21
+ // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22
+ // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23
+ // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+ //
26
+
27
+
28
+ // Include the Ruby headers and goodies
29
+ #include <emmintrin.h>
30
+ #include <stdio.h>
31
+ #include "ruby.h"
32
+
33
+ #include "vector_sse_add.h"
34
+ #include "vector_sse_sum.h"
35
+ #include "vector_sse_mul.h"
36
+ #include "vector_sse_vec_mul.h"
37
+
38
+ // TODO:
39
+ struct vector_sse_result {
40
+ VALUE result;
41
+ VALUE overflow;
42
+ };
43
+
44
+ // Defining a space for information and references about the module to be stored internally
45
+ VALUE VectorSSE = Qnil;
46
+
47
+ // Prototype for the initialization method - Ruby calls this, not you
48
+ void Init_vector_sse();
49
+
50
+
51
+ // The initialization method for this module
52
+ void Init_vector_sse() {
53
+
54
+ VectorSSE = rb_define_module("VectorSSE");
55
+
56
+ rb_define_singleton_method( VectorSSE, "add_s32", method_vec_add_s32, 2 );
57
+ rb_define_singleton_method( VectorSSE, "add_s64", method_vec_add_s64, 2 );
58
+ rb_define_singleton_method( VectorSSE, "add_f32", method_vec_add_f32, 2 );
59
+ rb_define_singleton_method( VectorSSE, "add_f64", method_vec_add_f64, 2 );
60
+
61
+ rb_define_singleton_method( VectorSSE, "sub_s32", method_vec_sub_s32, 2 );
62
+ rb_define_singleton_method( VectorSSE, "sub_s64", method_vec_sub_s64, 2 );
63
+ rb_define_singleton_method( VectorSSE, "sub_f32", method_vec_sub_f32, 2 );
64
+ rb_define_singleton_method( VectorSSE, "sub_f64", method_vec_sub_f64, 2 );
65
+
66
+ rb_define_singleton_method( VectorSSE, "sum_s32", method_vec_sum_s32, 1 );
67
+ rb_define_singleton_method( VectorSSE, "sum_s64", method_vec_sum_s64, 1 );
68
+ rb_define_singleton_method( VectorSSE, "sum_f32", method_vec_sum_f32, 1 );
69
+ rb_define_singleton_method( VectorSSE, "sum_f64", method_vec_sum_f64, 1 );
70
+
71
+ rb_define_singleton_method( VectorSSE, "mul_s32", method_mat_mul_s32, 6 );
72
+ rb_define_singleton_method( VectorSSE, "mul_s64", method_mat_mul_s64, 6 );
73
+ rb_define_singleton_method( VectorSSE, "mul_f32", method_mat_mul_f32, 6 );
74
+
75
+ rb_define_singleton_method( VectorSSE, "vec_mul_s32", method_vec_mul_s32, 2 );
76
+ rb_define_singleton_method( VectorSSE, "vec_mul_s64", method_vec_mul_s64, 2 );
77
+ rb_define_singleton_method( VectorSSE, "vec_mul_f32", method_vec_mul_f32, 2 );
78
+ rb_define_singleton_method( VectorSSE, "vec_mul_f64", method_vec_mul_f64, 2 );
79
+ }
80
+
@@ -0,0 +1,127 @@
1
+ //
2
+ // Copyright (c) 2015, Robert Glissmann
3
+ // All rights reserved.
4
+ //
5
+ // Redistribution and use in source and binary forms, with or without
6
+ // modification, are permitted provided that the following conditions are met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright notice, this
9
+ // list of conditions and the following disclaimer.
10
+ //
11
+ // * Redistributions in binary form must reproduce the above copyright notice,
12
+ // this list of conditions and the following disclaimer in the documentation
13
+ // and/or other materials provided with the distribution.
14
+ //
15
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19
+ // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
+ // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21
+ // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22
+ // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23
+ // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+ //
26
+
27
+ // %% license-end-token %%
28
+ //
29
+ // Author: Robert.Glissmann@gmail.com (Robert Glissmann)
30
+ //
31
+ //
32
+
33
+ #include <emmintrin.h>
34
+ #include "vector_sse_add.h"
35
+
36
+ #define TEMPLATE_ADD_S( FUNC_NAME, TYPE, OFTYPE, TYPE_SIZE, CONV_IN, CONV_OUT, EL_PER_VEC, ADD ) \
37
+ VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
38
+ { \
39
+ uint32_t length = 0; \
40
+ uint32_t offset = 0; \
41
+ uint32_t vector_pos = 0; \
42
+ uint32_t input_index = 0; \
43
+ \
44
+ TYPE left_segment[ EL_PER_VEC ]; \
45
+ TYPE right_segment[ EL_PER_VEC ]; \
46
+ TYPE result_segment[ EL_PER_VEC ]; \
47
+ \
48
+ __m128i left_vec; \
49
+ __m128i right_vec; \
50
+ __m128i* result_vec = (__m128i*)result_segment; \
51
+ \
52
+ __m128i sign_left; \
53
+ __m128i sign_right; \
54
+ const OFTYPE OVERFLOW_MASK = ( (OFTYPE)0x1 << (TYPE_SIZE-1) ); \
55
+ TYPE overflow[ EL_PER_VEC ]; \
56
+ __m128i* overflow_vec = (__m128i*)overflow; \
57
+ \
58
+ VALUE result = Qnil; \
59
+ \
60
+ Check_Type( left, T_ARRAY ); \
61
+ Check_Type( right, T_ARRAY ); \
62
+ \
63
+ if ( RARRAY_LEN( left ) != RARRAY_LEN( right ) ) \
64
+ { \
65
+ rb_raise( rb_eRuntimeError, "Vector lengths must be the same" ); \
66
+ } \
67
+ \
68
+ length = RARRAY_LEN( left ); \
69
+ result = rb_ary_new2( length ); \
70
+ \
71
+ if ( length > 0 ) \
72
+ { \
73
+ for ( offset = 0; offset < length; offset += EL_PER_VEC ) \
74
+ { \
75
+ for ( vector_pos = 0; vector_pos < EL_PER_VEC; ++vector_pos ) \
76
+ { \
77
+ input_index = offset + vector_pos; \
78
+ if ( input_index < length ) \
79
+ { \
80
+ left_segment[ vector_pos ] = CONV_IN( rb_ary_entry( left, input_index ) ); \
81
+ right_segment[ vector_pos ] = CONV_IN( rb_ary_entry( right, input_index ) ); \
82
+ } \
83
+ else \
84
+ { \
85
+ left_segment[ vector_pos ] = 0; \
86
+ right_segment[ vector_pos ] = 0; \
87
+ } \
88
+ } \
89
+ \
90
+ left_vec = _mm_loadu_si128( (const __m128i *)left_segment ); \
91
+ right_vec = _mm_loadu_si128( (const __m128i *)right_segment ); \
92
+ *result_vec = ADD( left_vec, right_vec ); \
93
+ \
94
+ sign_left = _mm_xor_si128(*result_vec, left_vec); \
95
+ sign_right = _mm_xor_si128(*result_vec, right_vec); \
96
+ *overflow_vec = _mm_and_si128(sign_left, sign_right); \
97
+ \
98
+ for ( vector_pos = 0; vector_pos < EL_PER_VEC; ++vector_pos ) \
99
+ { \
100
+ if ( 0 && ( (OFTYPE)overflow[ vector_pos ] & OVERFLOW_MASK ) ) \
101
+ { \
102
+ rb_raise( rb_eRuntimeError, "Vector addition overflow" ); \
103
+ } \
104
+ \
105
+ input_index = offset + vector_pos; \
106
+ \
107
+ if ( input_index < length ) \
108
+ { \
109
+ rb_ary_push( result, CONV_OUT( result_segment[ vector_pos ] ) ); \
110
+ } \
111
+ } \
112
+ } \
113
+ } \
114
+ \
115
+ return result; \
116
+ }
117
+
118
+
119
+ TEMPLATE_ADD_S( method_vec_add_s32, int32_t, int32_t, 32, NUM2INT, INT2NUM, 4, _mm_add_epi32 );
120
+ TEMPLATE_ADD_S( method_vec_add_s64, int64_t, int64_t, 64, NUM2LL, LL2NUM, 2, _mm_add_epi64 );
121
+ TEMPLATE_ADD_S( method_vec_add_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, _mm_add_ps );
122
+ TEMPLATE_ADD_S( method_vec_add_f64, double, int64_t, 64, NUM2DBL, DBL2NUM, 2, _mm_add_pd );
123
+
124
+ TEMPLATE_ADD_S( method_vec_sub_s32, int32_t, int32_t, 32, NUM2INT, INT2NUM, 4, _mm_sub_epi32 );
125
+ TEMPLATE_ADD_S( method_vec_sub_s64, int64_t, int64_t, 64, NUM2LL, LL2NUM, 2, _mm_sub_epi64 );
126
+ TEMPLATE_ADD_S( method_vec_sub_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, _mm_sub_ps );
127
+ TEMPLATE_ADD_S( method_vec_sub_f64, double, int64_t, 64, NUM2DBL, DBL2NUM, 2, _mm_sub_pd );
@@ -0,0 +1,48 @@
1
+ //
2
+ // Copyright (c) 2015, Robert Glissmann
3
+ // All rights reserved.
4
+ //
5
+ // Redistribution and use in source and binary forms, with or without
6
+ // modification, are permitted provided that the following conditions are met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright notice, this
9
+ // list of conditions and the following disclaimer.
10
+ //
11
+ // * Redistributions in binary form must reproduce the above copyright notice,
12
+ // this list of conditions and the following disclaimer in the documentation
13
+ // and/or other materials provided with the distribution.
14
+ //
15
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19
+ // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
+ // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21
+ // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22
+ // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23
+ // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+ //
26
+
27
+ // %% license-end-token %%
28
+ //
29
+ // Author: Robert.Glissmann@gmail.com (Robert Glissmann)
30
+ //
31
+ //
32
+
33
+ #ifndef VECTOR_SSE_ADD_H
34
+ #define VECTOR_SSE_ADD_H
35
+
36
+ #include "ruby.h"
37
+
38
+ VALUE method_vec_add_s32( VALUE self, VALUE left, VALUE right );
39
+ VALUE method_vec_add_s64( VALUE self, VALUE left, VALUE right );
40
+ VALUE method_vec_add_f32( VALUE self, VALUE left, VALUE right );
41
+ VALUE method_vec_add_f64( VALUE self, VALUE left, VALUE right );
42
+
43
+ VALUE method_vec_sub_s32( VALUE self, VALUE left, VALUE right );
44
+ VALUE method_vec_sub_s64( VALUE self, VALUE left, VALUE right );
45
+ VALUE method_vec_sub_f32( VALUE self, VALUE left, VALUE right );
46
+ VALUE method_vec_sub_f64( VALUE self, VALUE left, VALUE right );
47
+
48
+ #endif // VECTOR_SSE_ADD_H