vector_sse 0.0.1.pre → 0.0.2.pre

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 98eb4a76855923685b4f2857b834900304bb85c8
4
- data.tar.gz: 5ca280566cecbd8cf474e6950ebf21dfe3104199
3
+ metadata.gz: fdabd1511ca1ab68168a0723f06d26afe3312627
4
+ data.tar.gz: 9bb853808e5c0a8ec8e8b1592e84a0537954f6dc
5
5
  SHA512:
6
- metadata.gz: 39cbc928cf7f4fc555d8edca11caa0eefb28d34b23356c837f2ef453afd06d6f1b6798c802ad4cad76f65fe3ae25fd6dee459f32fc731bfaad386e4ecf9dbf49
7
- data.tar.gz: df243d6d5260ac8f414ccfb5b2c68dfc0a090304cd7c0d49947dc0a6d880dbe966a9cba265d6bedb7caadd41fd6d7bc55e9776929ab453870d85d3c17112b04f
6
+ metadata.gz: 213dffc68fc752e4520f0d5f7e910467b3fb51faba6ed5f6052b38d6a677928ead0b899b7658427fa78e08c7400f9ac874112b8c325174a328a9c137cdf34d6d
7
+ data.tar.gz: 10b7386669a7a1acbbcad0a9f24323e703487997d18969f9b02a6f0d5ba02ac2bd9ff1c50ba5df71f33d18413e38545454eb48700570046bc2c2c5fb1f54f712
data/.gitignore CHANGED
@@ -1,4 +1,8 @@
1
1
  test_vectorops.rb
2
2
  vector_sse-*.gem
3
3
  tmp/
4
- *.sublime-*
4
+ *.sublime-*
5
+ Gemfile.lock
6
+ vendor/
7
+ *DS_Store
8
+ .bundle/
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+ gem 'rake', '10.4.2'
3
+ gem 'rake-compiler', '0.9.5'
4
+ gem 'rspec'
5
+
data/ext/.gitignore ADDED
@@ -0,0 +1 @@
1
+ *DS_Store
@@ -3,7 +3,7 @@ require 'mkmf'
3
3
  # Give it a name
4
4
  extension_name = 'vector_sse'
5
5
 
6
- $CFLAGS << ' -O3'
6
+ $CFLAGS << ' -O3 -msse -msse2 -msse3 -msse4.1 -msse4.2'
7
7
 
8
8
  # Check for dependencies
9
9
  have_header( 'emmintrin.h' )
@@ -32,6 +32,7 @@
32
32
 
33
33
  #include <emmintrin.h>
34
34
  #include "vector_sse_add.h"
35
+ #include "vector_sse_common.h"
35
36
 
36
37
  #define TEMPLATE_ADD_S( FUNC_NAME, TYPE, OFTYPE, TYPE_SIZE, CONV_IN, CONV_OUT, EL_PER_VEC, ADD ) \
37
38
  VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
@@ -118,10 +119,10 @@ VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
118
119
 
119
120
  TEMPLATE_ADD_S( method_vec_add_s32, int32_t, int32_t, 32, NUM2INT, INT2NUM, 4, _mm_add_epi32 );
120
121
  TEMPLATE_ADD_S( method_vec_add_s64, int64_t, int64_t, 64, NUM2LL, LL2NUM, 2, _mm_add_epi64 );
121
- TEMPLATE_ADD_S( method_vec_add_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, _mm_add_ps );
122
- TEMPLATE_ADD_S( method_vec_add_f64, double, int64_t, 64, NUM2DBL, DBL2NUM, 2, _mm_add_pd );
122
+ TEMPLATE_ADD_S( method_vec_add_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, add_f32 );
123
+ TEMPLATE_ADD_S( method_vec_add_f64, double, int64_t, 64, NUM2DBL, DBL2NUM, 2, add_f64 );
123
124
 
124
125
  TEMPLATE_ADD_S( method_vec_sub_s32, int32_t, int32_t, 32, NUM2INT, INT2NUM, 4, _mm_sub_epi32 );
125
126
  TEMPLATE_ADD_S( method_vec_sub_s64, int64_t, int64_t, 64, NUM2LL, LL2NUM, 2, _mm_sub_epi64 );
126
- TEMPLATE_ADD_S( method_vec_sub_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, _mm_sub_ps );
127
- TEMPLATE_ADD_S( method_vec_sub_f64, double, int64_t, 64, NUM2DBL, DBL2NUM, 2, _mm_sub_pd );
127
+ TEMPLATE_ADD_S( method_vec_sub_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, sub_f32 );
128
+ TEMPLATE_ADD_S( method_vec_sub_f64, double, int64_t, 64, NUM2DBL, DBL2NUM, 2, sub_f64 );
@@ -0,0 +1,31 @@
1
+ #include "vector_sse_common.h"
2
+
3
+ __m128i add_f32( const __m128i left, const __m128i right )
4
+ {
5
+ return _mm_castps_si128( _mm_add_ps( _mm_castsi128_ps( left ), _mm_castsi128_ps( right ) ) );
6
+ }
7
+
8
+ __m128i add_f64( const __m128i left, const __m128i right )
9
+ {
10
+ return _mm_cvtpd_epi32( _mm_add_pd( _mm_castsi128_pd( left ), _mm_castsi128_pd( right ) ) );
11
+ }
12
+
13
+ __m128i sub_f32( const __m128i left, const __m128i right )
14
+ {
15
+ return _mm_castps_si128( _mm_sub_ps( _mm_castsi128_ps( left ), _mm_castsi128_ps( right ) ) );
16
+ }
17
+
18
+ __m128i sub_f64( const __m128i left, const __m128i right )
19
+ {
20
+ return _mm_cvtpd_epi32( _mm_sub_pd( _mm_castsi128_pd( left ), _mm_castsi128_pd( right ) ) );
21
+ }
22
+
23
+ __m128i mul_f32( const __m128i left, const __m128i right )
24
+ {
25
+ return _mm_castps_si128( _mm_mul_ps( _mm_castsi128_ps( left ), _mm_castsi128_ps( right ) ) );
26
+ }
27
+
28
+ __m128i mul_f64( const __m128i left, const __m128i right )
29
+ {
30
+ return _mm_cvtpd_epi32( _mm_mul_pd( _mm_castsi128_pd( left ), _mm_castsi128_pd( right ) ) );
31
+ }
@@ -0,0 +1,13 @@
1
+ #ifndef VECTOR_SSE_COMMON_H
2
+ #define VECTOR_SSE_COMMON_H
3
+
4
+ #include <emmintrin.h>
5
+
6
+ __m128i add_f32( const __m128i left, const __m128i right );
7
+ __m128i add_f64( const __m128i left, const __m128i right );
8
+ __m128i sub_f32( const __m128i left, const __m128i right );
9
+ __m128i sub_f64( const __m128i left, const __m128i right );
10
+ __m128i mul_f32( const __m128i left, const __m128i right );
11
+ __m128i mul_f64( const __m128i left, const __m128i right );
12
+
13
+ #endif // VECTOR_SSE_COMMON_H
@@ -32,6 +32,7 @@
32
32
 
33
33
  #include <emmintrin.h>
34
34
  #include "vector_sse_mul.h"
35
+ #include "vector_sse_common.h"
35
36
 
36
37
  #define SSE_VECTOR_WIDTH (4)
37
38
 
@@ -40,7 +41,6 @@ VALUE method_mat_mul_s32( VALUE self, VALUE left, VALUE left_rows_rb, VALUE left
40
41
  uint32_t left_row = 0;
41
42
  uint32_t right_col = 0;
42
43
  uint32_t common = 0;
43
- uint32_t vector_pos = 0;
44
44
  uint32_t input_index = 0;
45
45
  uint32_t pos = 0;
46
46
 
@@ -148,17 +148,8 @@ VALUE method_mat_mul_s64( VALUE self, VALUE left, VALUE left_rows_rb, VALUE left
148
148
  uint32_t left_row = 0;
149
149
  uint32_t right_col = 0;
150
150
  uint32_t common = 0;
151
- uint32_t vector_pos = 0;
152
- uint32_t input_index = 0;
153
151
  uint32_t pos = 0;
154
152
 
155
- int64_t left_segment[ SSE_VECTOR_WIDTH ];
156
- int64_t right_segment[ SSE_VECTOR_WIDTH ];
157
-
158
- __m128i* left_vec = NULL;
159
- __m128i* right_vec = NULL;
160
- __m128i result_vec;
161
-
162
153
  VALUE result = Qnil;
163
154
 
164
155
  int64_t* result_native = NULL;
@@ -233,7 +224,6 @@ VALUE method_mat_mul_f32( VALUE self, VALUE left, VALUE left_rows_rb, VALUE left
233
224
  uint32_t left_row = 0;
234
225
  uint32_t right_col = 0;
235
226
  uint32_t common = 0;
236
- uint32_t vector_pos = 0;
237
227
  uint32_t input_index = 0;
238
228
  uint32_t pos = 0;
239
229
 
@@ -303,7 +293,7 @@ VALUE method_mat_mul_f32( VALUE self, VALUE left, VALUE left_rows_rb, VALUE left
303
293
 
304
294
  left_vec = ( __m128i *)left_segment;
305
295
  right_vec = ( __m128i *)right_segment;
306
- result_vec = _mm_mul_ps( *left_vec, *right_vec );
296
+ result_vec = mul_f32( *left_vec, *right_vec );
307
297
 
308
298
  _mm_store_si128( (__m128i*)result_segment, result_vec );
309
299
  for ( pos = 0; pos < SSE_VECTOR_WIDTH; ++pos )
@@ -32,9 +32,31 @@
32
32
 
33
33
  #include <string.h>
34
34
  #include <emmintrin.h>
35
+ #include <ruby.h>
35
36
  #include "vector_sse_sum.h"
37
+ #include "vector_sse_common.h"
36
38
 
37
- #define TEMPLATE_SUM_S( FUNC_NAME, TYPE, OFTYPE, TYPE_SIZE, CONV_IN, CONV_OUT, EL_PER_VEC, ADD ) \
39
+
40
+ // Check for overflow
41
+ // __m128i sign_left;
42
+ // __m128i sign_right;
43
+ // const int32_t OVERFLOW_MASK = ( (int32_t)0x1 << (32-1) );
44
+ // int32_t overflow[ 4 ];
45
+ // __m128i* overflow_vec = (__m128i*)overflow;
46
+ // sign_left = _mm_xor_si128(result_vec, left_vec);
47
+ // sign_right = _mm_xor_si128(result_vec, right_vec);
48
+ // *overflow_vec = _mm_and_si128(sign_left, sign_right);
49
+
50
+ // for ( vector_pos = 0; vector_pos < 4; ++vector_pos )
51
+ // {
52
+ // if ( ( (int32_t)overflow[ vector_pos ] & OVERFLOW_MASK ) )
53
+ // {
54
+ // rb_raise( rb_eRuntimeError, "Vector addition overflow" );
55
+ // }
56
+ // }
57
+
58
+
59
+ #define TEMPLATE_SUM_S( FUNC_NAME, TYPE, CONV_IN, CONV_OUT, EL_PER_VEC, ADDER ) \
38
60
  VALUE FUNC_NAME( VALUE self, VALUE vector ) \
39
61
  { \
40
62
  uint32_t length = 0; \
@@ -44,20 +66,12 @@ VALUE FUNC_NAME( VALUE self, VALUE vector ) \
44
66
  \
45
67
  TYPE result = 0; \
46
68
  \
47
- TYPE left_segment[ EL_PER_VEC ]; \
48
- TYPE right_segment[ EL_PER_VEC ]; \
49
69
  TYPE result_segment[ EL_PER_VEC ]; \
50
70
  TYPE vector_segment[ EL_PER_VEC ]; \
51
71
  \
52
- __m128i left_vec; \
53
- __m128i right_vec; \
72
+ __m128i left_vec; \
73
+ __m128i right_vec; \
54
74
  __m128i result_vec; \
55
- \
56
- __m128i sign_left; \
57
- __m128i sign_right; \
58
- const OFTYPE OVERFLOW_MASK = ( (OFTYPE)0x1 << (TYPE_SIZE-1) ); \
59
- OFTYPE overflow[ EL_PER_VEC ]; \
60
- __m128i* overflow_vec = (__m128i*)overflow; \
61
75
  \
62
76
  Check_Type( vector, T_ARRAY ); \
63
77
  \
@@ -85,19 +99,7 @@ VALUE FUNC_NAME( VALUE self, VALUE vector ) \
85
99
  right_vec = _mm_loadu_si128( (const __m128i *)vector_segment ); \
86
100
  left_vec = _mm_loadu_si128( &result_vec ); \
87
101
  \
88
- result_vec = ADD( left_vec, right_vec ); \
89
- \
90
- sign_left = _mm_xor_si128(result_vec, left_vec); \
91
- sign_right = _mm_xor_si128(result_vec, right_vec); \
92
- *overflow_vec = _mm_and_si128(sign_left, sign_right); \
93
- \
94
- for ( vector_pos = 0; vector_pos < EL_PER_VEC; ++vector_pos ) \
95
- { \
96
- if ( ( (OFTYPE)overflow[ vector_pos ] & OVERFLOW_MASK ) ) \
97
- { \
98
- rb_raise( rb_eRuntimeError, "Vector addition overflow" ); \
99
- } \
100
- } \
102
+ result_vec = ADDER( left_vec, right_vec ); \
101
103
  } \
102
104
  \
103
105
  _mm_store_si128( (__m128i*)result_segment, result_vec ); \
@@ -111,8 +113,8 @@ VALUE FUNC_NAME( VALUE self, VALUE vector ) \
111
113
  return CONV_OUT( result ); \
112
114
  }
113
115
 
114
- TEMPLATE_SUM_S( method_vec_sum_s32, int32_t, int32_t, 32, NUM2INT, INT2NUM, 4, _mm_add_epi32 );
115
- TEMPLATE_SUM_S( method_vec_sum_s64, int64_t, int64_t, 64, NUM2LL, LL2NUM, 2, _mm_add_epi64 );
116
- TEMPLATE_SUM_S( method_vec_sum_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, _mm_add_ps );
117
- TEMPLATE_SUM_S( method_vec_sum_f64, double, int64_t, 32, NUM2DBL, DBL2NUM, 2, _mm_add_pd );
116
+ TEMPLATE_SUM_S( method_vec_sum_s32, int32_t, NUM2INT, INT2NUM, 4, _mm_add_epi32 );
117
+ TEMPLATE_SUM_S( method_vec_sum_s64, int64_t, NUM2LL, LL2NUM, 2, _mm_add_epi64 );
118
+ TEMPLATE_SUM_S( method_vec_sum_f32, float, NUM2DBL, DBL2NUM, 4, add_f32 );
119
+ TEMPLATE_SUM_S( method_vec_sum_f64, double, NUM2DBL, DBL2NUM, 2, add_f64 );
118
120
 
@@ -36,6 +36,7 @@
36
36
  #include <smmintrin.h>
37
37
  #endif
38
38
  #include "vector_sse_vec_mul.h"
39
+ #include "vector_sse_common.h"
39
40
 
40
41
  #define SSE_VECTOR_WIDTH (4)
41
42
  // #define EL_PER_VEC SSE_VECTOR_WIDTH
@@ -66,14 +67,14 @@ static inline __m128i mul_s64( const __m128i* left_vec, const __m128i* right_vec
66
67
  return _mm_loadu_si128( (const __m128i *)result );
67
68
  }
68
69
 
69
- static inline __m128i mul_f32(const __m128i* a, const __m128i* b )
70
+ static inline __m128i mul_f32_ptr(const __m128i* a, const __m128i* b )
70
71
  {
71
- return _mm_mul_ps( *a, *b );
72
+ return mul_f32( *a, *b );
72
73
  }
73
74
 
74
- static inline __m128i mul_f64(const __m128i* a, const __m128i* b )
75
+ static inline __m128i mul_f64_ptr(const __m128i* a, const __m128i* b )
75
76
  {
76
- return _mm_mul_pd( *a, *b );
77
+ return mul_f64( *a, *b );
77
78
  }
78
79
 
79
80
 
@@ -95,9 +96,6 @@ VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
95
96
  \
96
97
  TYPE result_segment[ EL_PER_VEC ]; \
97
98
  __m128i result_vec; \
98
- \
99
- __m128i sign_left; \
100
- __m128i sign_right; \
101
99
  \
102
100
  Check_Type( left, T_ARRAY ); \
103
101
  Check_Type( right, T_ARRAY ); \
@@ -152,6 +150,6 @@ VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
152
150
 
153
151
  TEMPLATE_VEC_MUL_S( method_vec_mul_s32, int32_t, 32, NUM2INT, INT2NUM, 4, mul_s32 );
154
152
  TEMPLATE_VEC_MUL_S( method_vec_mul_s64, int64_t, 64, NUM2LL, LL2NUM, 2, mul_s64 );
155
- TEMPLATE_VEC_MUL_S( method_vec_mul_f32, float, 32, NUM2DBL, DBL2NUM, 4, mul_f32 );
156
- TEMPLATE_VEC_MUL_S( method_vec_mul_f64, double, 64, NUM2DBL, DBL2NUM, 2, mul_f64 );
153
+ TEMPLATE_VEC_MUL_S( method_vec_mul_f32, float, 32, NUM2DBL, DBL2NUM, 4, mul_f32_ptr );
154
+ TEMPLATE_VEC_MUL_S( method_vec_mul_f64, double, 64, NUM2DBL, DBL2NUM, 2, mul_f64_ptr );
157
155
 
data/lib/vector_sse.rb CHANGED
@@ -36,7 +36,7 @@ require File.join( bin_root, 'vector_sse.so' )
36
36
 
37
37
  module VectorSSE
38
38
 
39
- VERSION = "0.0.1.pre"
39
+ VERSION = "0.0.2.pre"
40
40
 
41
41
  module Type
42
42
  S32 = 0
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vector_sse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.pre
4
+ version: 0.0.2.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Glissmann
@@ -14,28 +14,28 @@ dependencies:
14
14
  name: rake-compiler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: 0.9.5
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.9.5
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: 3.1.0
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 3.1.0
41
41
  description: VectorSse employs x86 Streaming SIMD Extensions (SSE), v3 or greater,
@@ -46,15 +46,19 @@ extensions:
46
46
  - ext/vector_sse/extconf.rb
47
47
  extra_rdoc_files: []
48
48
  files:
49
- - .gitignore
49
+ - ".gitignore"
50
+ - Gemfile
50
51
  - LICENSE.txt
51
52
  - README.md
52
53
  - Rakefile
54
+ - ext/.gitignore
53
55
  - ext/vector_sse/.gitignore
54
56
  - ext/vector_sse/extconf.rb
55
57
  - ext/vector_sse/vector_sse.c
56
58
  - ext/vector_sse/vector_sse_add.c
57
59
  - ext/vector_sse/vector_sse_add.h
60
+ - ext/vector_sse/vector_sse_common.c
61
+ - ext/vector_sse/vector_sse_common.h
58
62
  - ext/vector_sse/vector_sse_mul.c
59
63
  - ext/vector_sse/vector_sse_mul.h
60
64
  - ext/vector_sse/vector_sse_sum.c
@@ -76,17 +80,17 @@ require_paths:
76
80
  - lib
77
81
  required_ruby_version: !ruby/object:Gem::Requirement
78
82
  requirements:
79
- - - '>='
83
+ - - ">="
80
84
  - !ruby/object:Gem::Version
81
85
  version: '0'
82
86
  required_rubygems_version: !ruby/object:Gem::Requirement
83
87
  requirements:
84
- - - '>'
88
+ - - ">"
85
89
  - !ruby/object:Gem::Version
86
90
  version: 1.3.1
87
91
  requirements: []
88
92
  rubyforge_project:
89
- rubygems_version: 2.0.14
93
+ rubygems_version: 2.4.8
90
94
  signing_key:
91
95
  specification_version: 4
92
96
  summary: SIMD accelerated vector and matrix operations