vector_sse 0.0.1.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 98eb4a76855923685b4f2857b834900304bb85c8
4
+ data.tar.gz: 5ca280566cecbd8cf474e6950ebf21dfe3104199
5
+ SHA512:
6
+ metadata.gz: 39cbc928cf7f4fc555d8edca11caa0eefb28d34b23356c837f2ef453afd06d6f1b6798c802ad4cad76f65fe3ae25fd6dee459f32fc731bfaad386e4ecf9dbf49
7
+ data.tar.gz: df243d6d5260ac8f414ccfb5b2c68dfc0a090304cd7c0d49947dc0a6d880dbe966a9cba265d6bedb7caadd41fd6d7bc55e9776929ab453870d85d3c17112b04f
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ test_vectorops.rb
2
+ vector_sse-*.gem
3
+ tmp/
4
+ *.sublime-*
data/LICENSE.txt ADDED
@@ -0,0 +1,23 @@
1
+ Copyright (c) 2015, Robert Glissmann
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,100 @@
1
+ ## Welcome to VectorSSE ##
2
+
3
+ VectorSSE is a Ruby gem that uses x86 Streaming SIMD Extensions (SSE) to accelerate
4
+ array and matrix computations. SIMD instructions perform a single operation
5
+ on a vector of data rather than on a single value. Vector operations can
6
+ improve the performance of algorithms that exhibit data-level parallelism.
7
+
8
+ A simple example:
9
+ Let us say you need to compute the sum of an array containing 1000 floating
10
+ point numbers. One approach is to loop through the array and consecutively add
11
+ each element to a running sum. Another approach is to break the problem
12
+ into a set of smaller, independent computations that can be performed in parallel
13
+ with SIMD instructions. For example, we can break the 1000-element array into
14
+ four 250-element arrays and use SIMD extensions to find four sums in parallel.
15
+ Of course, this leaves us with four separate sums, so we must add these using
16
+ normal, non-SSE instructions to yield the overall sum of the 1000-element array.
17
+
18
+
19
+ ## Install the gem ##
20
+
21
+ Install it with [RubyGems](https://rubygems.org/) (currently pre-release)
22
+
23
+ gem install vector_sse --pre
24
+
25
+ or add this to your Gemfile if you use [Bundler](http://gembundler.com/):
26
+
27
+ gem "vector_sse"
28
+
29
+
30
+ ## Getting Started ##
31
+
32
+ The VectorSSE gem defines two data types: the Array class, which inherits from
33
+ the core Array class, and the Matrix class. Unlike typical Ruby containers, the
34
+ Array and Matrix classes are intended to store a homogeneous data type. At this
35
+ time, the supported data types include signed 32 and 64-bit signed integers,
36
+ 32-bit floating point, and double-precision floating point. The type is
37
+ identified when the Array or Matrix is constructed so that all operations can
38
+ use the appropriate implementation.
39
+
40
+
41
+ ### Example: Multiply two matrices ###
42
+
43
+ require 'vector_sse'
44
+
45
+ left = VectorSSE::Matrix.new( VectorSSE::Type::F32, 4, 4, [
46
+ 1.2, 2.3, 3.4, 4.5,
47
+ 5.6, 6.7, 7.8, 8.9,
48
+ 9.05, 10.9, 11.85, 12.2,
49
+ 13.43, 14.85, 15.67, 16.5
50
+ ])
51
+ right = VectorSSE::Matrix.new( VectorSSE::Type::F32, 4, 2, [
52
+ 1, 2,
53
+ 5, 6,
54
+ 9, 10,
55
+ 13, 14
56
+ ])
57
+
58
+ product = left * right
59
+
60
+
61
+ ### Example: Scale a matrix by a scalar value ###
62
+
63
+ require 'vector_sse'
64
+
65
+ left = VectorSSE::Matrix.new( VectorSSE::Type::F32, 4, 4, [
66
+ 1.2, 2.3, 3.4, 4.5,
67
+ 5.6, 6.7, 7.8, 8.9,
68
+ 9.05, 10.9, 11.85, 12.2,
69
+ 13.43, 14.85, 15.67, 16.5
70
+ ])
71
+
72
+ product = left * 3.14
73
+
74
+
75
+ ### Example: Subtract Arrays and find the sum of the elements of an Array ###
76
+
77
+ require 'vector_sse'
78
+
79
+ # Initialize a four element integer array
80
+ left = VectoSSE::Array.new( VectorSSE::Type::S32, 10 )
81
+ left.fill([
82
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
83
+ ])
84
+ right = VectoSSE::Array.new( VectorSSE::Type::S32, 10 )
85
+ right.fill([
86
+ 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
87
+ ])
88
+
89
+ # Subtract the arrays
90
+ result = left - right
91
+
92
+ # Get the sum of the elements of an array
93
+ sum = left.sum
94
+
95
+
96
+ ## License and copyright ##
97
+
98
+ VectorSSE is released under the BSD License.
99
+
100
+ Copyright: (C) 2015 by Robert Glissmann. All Rights Reserved.
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'rake/extensiontask'
2
+ require 'rake/testtask'
3
+ require 'rspec/core/rake_task'
4
+
5
+ Rake::ExtensionTask.new "vector_sse" do |ext|
6
+ ext.lib_dir = "lib/vector_sse"
7
+ end
8
+
9
+ RSpec::Core::RakeTask.new( :spec )
@@ -0,0 +1 @@
1
+ Makefile
@@ -0,0 +1,13 @@
1
+ require 'mkmf'
2
+
3
+ # Give it a name
4
+ extension_name = 'vector_sse'
5
+
6
+ $CFLAGS << ' -O3'
7
+
8
+ # Check for dependencies
9
+ have_header( 'emmintrin.h' )
10
+
11
+ # Do the work
12
+ create_makefile "vector_sse/vector_sse"
13
+
@@ -0,0 +1,80 @@
1
+ //
2
+ // Copyright (c) 2015, Robert Glissmann
3
+ // All rights reserved.
4
+ //
5
+ // Redistribution and use in source and binary forms, with or without
6
+ // modification, are permitted provided that the following conditions are met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright notice, this
9
+ // list of conditions and the following disclaimer.
10
+ //
11
+ // * Redistributions in binary form must reproduce the above copyright notice,
12
+ // this list of conditions and the following disclaimer in the documentation
13
+ // and/or other materials provided with the distribution.
14
+ //
15
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19
+ // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
+ // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21
+ // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22
+ // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23
+ // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+ //
26
+
27
+
28
+ // Include the Ruby headers and goodies
29
+ #include <emmintrin.h>
30
+ #include <stdio.h>
31
+ #include "ruby.h"
32
+
33
+ #include "vector_sse_add.h"
34
+ #include "vector_sse_sum.h"
35
+ #include "vector_sse_mul.h"
36
+ #include "vector_sse_vec_mul.h"
37
+
38
+ // TODO:
39
+ struct vector_sse_result {
40
+ VALUE result;
41
+ VALUE overflow;
42
+ };
43
+
44
+ // Defining a space for information and references about the module to be stored internally
45
+ VALUE VectorSSE = Qnil;
46
+
47
+ // Prototype for the initialization method - Ruby calls this, not you
48
+ void Init_vector_sse();
49
+
50
+
51
+ // The initialization method for this module
52
+ void Init_vector_sse() {
53
+
54
+ VectorSSE = rb_define_module("VectorSSE");
55
+
56
+ rb_define_singleton_method( VectorSSE, "add_s32", method_vec_add_s32, 2 );
57
+ rb_define_singleton_method( VectorSSE, "add_s64", method_vec_add_s64, 2 );
58
+ rb_define_singleton_method( VectorSSE, "add_f32", method_vec_add_f32, 2 );
59
+ rb_define_singleton_method( VectorSSE, "add_f64", method_vec_add_f64, 2 );
60
+
61
+ rb_define_singleton_method( VectorSSE, "sub_s32", method_vec_sub_s32, 2 );
62
+ rb_define_singleton_method( VectorSSE, "sub_s64", method_vec_sub_s64, 2 );
63
+ rb_define_singleton_method( VectorSSE, "sub_f32", method_vec_sub_f32, 2 );
64
+ rb_define_singleton_method( VectorSSE, "sub_f64", method_vec_sub_f64, 2 );
65
+
66
+ rb_define_singleton_method( VectorSSE, "sum_s32", method_vec_sum_s32, 1 );
67
+ rb_define_singleton_method( VectorSSE, "sum_s64", method_vec_sum_s64, 1 );
68
+ rb_define_singleton_method( VectorSSE, "sum_f32", method_vec_sum_f32, 1 );
69
+ rb_define_singleton_method( VectorSSE, "sum_f64", method_vec_sum_f64, 1 );
70
+
71
+ rb_define_singleton_method( VectorSSE, "mul_s32", method_mat_mul_s32, 6 );
72
+ rb_define_singleton_method( VectorSSE, "mul_s64", method_mat_mul_s64, 6 );
73
+ rb_define_singleton_method( VectorSSE, "mul_f32", method_mat_mul_f32, 6 );
74
+
75
+ rb_define_singleton_method( VectorSSE, "vec_mul_s32", method_vec_mul_s32, 2 );
76
+ rb_define_singleton_method( VectorSSE, "vec_mul_s64", method_vec_mul_s64, 2 );
77
+ rb_define_singleton_method( VectorSSE, "vec_mul_f32", method_vec_mul_f32, 2 );
78
+ rb_define_singleton_method( VectorSSE, "vec_mul_f64", method_vec_mul_f64, 2 );
79
+ }
80
+
@@ -0,0 +1,127 @@
1
+ //
2
+ // Copyright (c) 2015, Robert Glissmann
3
+ // All rights reserved.
4
+ //
5
+ // Redistribution and use in source and binary forms, with or without
6
+ // modification, are permitted provided that the following conditions are met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright notice, this
9
+ // list of conditions and the following disclaimer.
10
+ //
11
+ // * Redistributions in binary form must reproduce the above copyright notice,
12
+ // this list of conditions and the following disclaimer in the documentation
13
+ // and/or other materials provided with the distribution.
14
+ //
15
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19
+ // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
+ // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21
+ // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22
+ // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23
+ // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+ //
26
+
27
+ // %% license-end-token %%
28
+ //
29
+ // Author: Robert.Glissmann@gmail.com (Robert Glissmann)
30
+ //
31
+ //
32
+
33
+ #include <emmintrin.h>
34
+ #include "vector_sse_add.h"
35
+
36
+ #define TEMPLATE_ADD_S( FUNC_NAME, TYPE, OFTYPE, TYPE_SIZE, CONV_IN, CONV_OUT, EL_PER_VEC, ADD ) \
37
+ VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
38
+ { \
39
+ uint32_t length = 0; \
40
+ uint32_t offset = 0; \
41
+ uint32_t vector_pos = 0; \
42
+ uint32_t input_index = 0; \
43
+ \
44
+ TYPE left_segment[ EL_PER_VEC ]; \
45
+ TYPE right_segment[ EL_PER_VEC ]; \
46
+ TYPE result_segment[ EL_PER_VEC ]; \
47
+ \
48
+ __m128i left_vec; \
49
+ __m128i right_vec; \
50
+ __m128i* result_vec = (__m128i*)result_segment; \
51
+ \
52
+ __m128i sign_left; \
53
+ __m128i sign_right; \
54
+ const OFTYPE OVERFLOW_MASK = ( (OFTYPE)0x1 << (TYPE_SIZE-1) ); \
55
+ TYPE overflow[ EL_PER_VEC ]; \
56
+ __m128i* overflow_vec = (__m128i*)overflow; \
57
+ \
58
+ VALUE result = Qnil; \
59
+ \
60
+ Check_Type( left, T_ARRAY ); \
61
+ Check_Type( right, T_ARRAY ); \
62
+ \
63
+ if ( RARRAY_LEN( left ) != RARRAY_LEN( right ) ) \
64
+ { \
65
+ rb_raise( rb_eRuntimeError, "Vector lengths must be the same" ); \
66
+ } \
67
+ \
68
+ length = RARRAY_LEN( left ); \
69
+ result = rb_ary_new2( length ); \
70
+ \
71
+ if ( length > 0 ) \
72
+ { \
73
+ for ( offset = 0; offset < length; offset += EL_PER_VEC ) \
74
+ { \
75
+ for ( vector_pos = 0; vector_pos < EL_PER_VEC; ++vector_pos ) \
76
+ { \
77
+ input_index = offset + vector_pos; \
78
+ if ( input_index < length ) \
79
+ { \
80
+ left_segment[ vector_pos ] = CONV_IN( rb_ary_entry( left, input_index ) ); \
81
+ right_segment[ vector_pos ] = CONV_IN( rb_ary_entry( right, input_index ) ); \
82
+ } \
83
+ else \
84
+ { \
85
+ left_segment[ vector_pos ] = 0; \
86
+ right_segment[ vector_pos ] = 0; \
87
+ } \
88
+ } \
89
+ \
90
+ left_vec = _mm_loadu_si128( (const __m128i *)left_segment ); \
91
+ right_vec = _mm_loadu_si128( (const __m128i *)right_segment ); \
92
+ *result_vec = ADD( left_vec, right_vec ); \
93
+ \
94
+ sign_left = _mm_xor_si128(*result_vec, left_vec); \
95
+ sign_right = _mm_xor_si128(*result_vec, right_vec); \
96
+ *overflow_vec = _mm_and_si128(sign_left, sign_right); \
97
+ \
98
+ for ( vector_pos = 0; vector_pos < EL_PER_VEC; ++vector_pos ) \
99
+ { \
100
+ if ( 0 && ( (OFTYPE)overflow[ vector_pos ] & OVERFLOW_MASK ) ) \
101
+ { \
102
+ rb_raise( rb_eRuntimeError, "Vector addition overflow" ); \
103
+ } \
104
+ \
105
+ input_index = offset + vector_pos; \
106
+ \
107
+ if ( input_index < length ) \
108
+ { \
109
+ rb_ary_push( result, CONV_OUT( result_segment[ vector_pos ] ) ); \
110
+ } \
111
+ } \
112
+ } \
113
+ } \
114
+ \
115
+ return result; \
116
+ }
117
+
118
+
119
+ TEMPLATE_ADD_S( method_vec_add_s32, int32_t, int32_t, 32, NUM2INT, INT2NUM, 4, _mm_add_epi32 );
120
+ TEMPLATE_ADD_S( method_vec_add_s64, int64_t, int64_t, 64, NUM2LL, LL2NUM, 2, _mm_add_epi64 );
121
+ TEMPLATE_ADD_S( method_vec_add_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, _mm_add_ps );
122
+ TEMPLATE_ADD_S( method_vec_add_f64, double, int64_t, 64, NUM2DBL, DBL2NUM, 2, _mm_add_pd );
123
+
124
+ TEMPLATE_ADD_S( method_vec_sub_s32, int32_t, int32_t, 32, NUM2INT, INT2NUM, 4, _mm_sub_epi32 );
125
+ TEMPLATE_ADD_S( method_vec_sub_s64, int64_t, int64_t, 64, NUM2LL, LL2NUM, 2, _mm_sub_epi64 );
126
+ TEMPLATE_ADD_S( method_vec_sub_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, _mm_sub_ps );
127
+ TEMPLATE_ADD_S( method_vec_sub_f64, double, int64_t, 64, NUM2DBL, DBL2NUM, 2, _mm_sub_pd );
@@ -0,0 +1,48 @@
1
+ //
2
+ // Copyright (c) 2015, Robert Glissmann
3
+ // All rights reserved.
4
+ //
5
+ // Redistribution and use in source and binary forms, with or without
6
+ // modification, are permitted provided that the following conditions are met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright notice, this
9
+ // list of conditions and the following disclaimer.
10
+ //
11
+ // * Redistributions in binary form must reproduce the above copyright notice,
12
+ // this list of conditions and the following disclaimer in the documentation
13
+ // and/or other materials provided with the distribution.
14
+ //
15
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19
+ // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
+ // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21
+ // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22
+ // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23
+ // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+ //
26
+
27
+ // %% license-end-token %%
28
+ //
29
+ // Author: Robert.Glissmann@gmail.com (Robert Glissmann)
30
+ //
31
+ //
32
+
33
+ #ifndef VECTOR_SSE_ADD_H
34
+ #define VECTOR_SSE_ADD_H
35
+
36
+ #include "ruby.h"
37
+
38
+ VALUE method_vec_add_s32( VALUE self, VALUE left, VALUE right );
39
+ VALUE method_vec_add_s64( VALUE self, VALUE left, VALUE right );
40
+ VALUE method_vec_add_f32( VALUE self, VALUE left, VALUE right );
41
+ VALUE method_vec_add_f64( VALUE self, VALUE left, VALUE right );
42
+
43
+ VALUE method_vec_sub_s32( VALUE self, VALUE left, VALUE right );
44
+ VALUE method_vec_sub_s64( VALUE self, VALUE left, VALUE right );
45
+ VALUE method_vec_sub_f32( VALUE self, VALUE left, VALUE right );
46
+ VALUE method_vec_sub_f64( VALUE self, VALUE left, VALUE right );
47
+
48
+ #endif // VECTOR_SSE_ADD_H