vector_sse 0.0.1.pre → 0.0.2.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 98eb4a76855923685b4f2857b834900304bb85c8
4
- data.tar.gz: 5ca280566cecbd8cf474e6950ebf21dfe3104199
3
+ metadata.gz: fdabd1511ca1ab68168a0723f06d26afe3312627
4
+ data.tar.gz: 9bb853808e5c0a8ec8e8b1592e84a0537954f6dc
5
5
  SHA512:
6
- metadata.gz: 39cbc928cf7f4fc555d8edca11caa0eefb28d34b23356c837f2ef453afd06d6f1b6798c802ad4cad76f65fe3ae25fd6dee459f32fc731bfaad386e4ecf9dbf49
7
- data.tar.gz: df243d6d5260ac8f414ccfb5b2c68dfc0a090304cd7c0d49947dc0a6d880dbe966a9cba265d6bedb7caadd41fd6d7bc55e9776929ab453870d85d3c17112b04f
6
+ metadata.gz: 213dffc68fc752e4520f0d5f7e910467b3fb51faba6ed5f6052b38d6a677928ead0b899b7658427fa78e08c7400f9ac874112b8c325174a328a9c137cdf34d6d
7
+ data.tar.gz: 10b7386669a7a1acbbcad0a9f24323e703487997d18969f9b02a6f0d5ba02ac2bd9ff1c50ba5df71f33d18413e38545454eb48700570046bc2c2c5fb1f54f712
data/.gitignore CHANGED
@@ -1,4 +1,8 @@
1
1
  test_vectorops.rb
2
2
  vector_sse-*.gem
3
3
  tmp/
4
- *.sublime-*
4
+ *.sublime-*
5
+ Gemfile.lock
6
+ vendor/
7
+ *DS_Store
8
+ .bundle/
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+ gem 'rake', '10.4.2'
3
+ gem 'rake-compiler', '0.9.5'
4
+ gem 'rspec'
5
+
data/ext/.gitignore ADDED
@@ -0,0 +1 @@
1
+ *DS_Store
@@ -3,7 +3,7 @@ require 'mkmf'
3
3
  # Give it a name
4
4
  extension_name = 'vector_sse'
5
5
 
6
- $CFLAGS << ' -O3'
6
+ $CFLAGS << ' -O3 -msse -msse2 -msse3 -msse4.1 -msse4.2'
7
7
 
8
8
  # Check for dependencies
9
9
  have_header( 'emmintrin.h' )
@@ -32,6 +32,7 @@
32
32
 
33
33
  #include <emmintrin.h>
34
34
  #include "vector_sse_add.h"
35
+ #include "vector_sse_common.h"
35
36
 
36
37
  #define TEMPLATE_ADD_S( FUNC_NAME, TYPE, OFTYPE, TYPE_SIZE, CONV_IN, CONV_OUT, EL_PER_VEC, ADD ) \
37
38
  VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
@@ -118,10 +119,10 @@ VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
118
119
 
119
120
  TEMPLATE_ADD_S( method_vec_add_s32, int32_t, int32_t, 32, NUM2INT, INT2NUM, 4, _mm_add_epi32 );
120
121
  TEMPLATE_ADD_S( method_vec_add_s64, int64_t, int64_t, 64, NUM2LL, LL2NUM, 2, _mm_add_epi64 );
121
- TEMPLATE_ADD_S( method_vec_add_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, _mm_add_ps );
122
- TEMPLATE_ADD_S( method_vec_add_f64, double, int64_t, 64, NUM2DBL, DBL2NUM, 2, _mm_add_pd );
122
+ TEMPLATE_ADD_S( method_vec_add_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, add_f32 );
123
+ TEMPLATE_ADD_S( method_vec_add_f64, double, int64_t, 64, NUM2DBL, DBL2NUM, 2, add_f64 );
123
124
 
124
125
  TEMPLATE_ADD_S( method_vec_sub_s32, int32_t, int32_t, 32, NUM2INT, INT2NUM, 4, _mm_sub_epi32 );
125
126
  TEMPLATE_ADD_S( method_vec_sub_s64, int64_t, int64_t, 64, NUM2LL, LL2NUM, 2, _mm_sub_epi64 );
126
- TEMPLATE_ADD_S( method_vec_sub_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, _mm_sub_ps );
127
- TEMPLATE_ADD_S( method_vec_sub_f64, double, int64_t, 64, NUM2DBL, DBL2NUM, 2, _mm_sub_pd );
127
+ TEMPLATE_ADD_S( method_vec_sub_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, sub_f32 );
128
+ TEMPLATE_ADD_S( method_vec_sub_f64, double, int64_t, 64, NUM2DBL, DBL2NUM, 2, sub_f64 );
@@ -0,0 +1,31 @@
1
+ #include "vector_sse_common.h"
2
+
3
+ __m128i add_f32( const __m128i left, const __m128i right )
4
+ {
5
+ return _mm_castps_si128( _mm_add_ps( _mm_castsi128_ps( left ), _mm_castsi128_ps( right ) ) );
6
+ }
7
+
8
+ __m128i add_f64( const __m128i left, const __m128i right )
9
+ {
10
+ return _mm_cvtpd_epi32( _mm_add_pd( _mm_castsi128_pd( left ), _mm_castsi128_pd( right ) ) );
11
+ }
12
+
13
+ __m128i sub_f32( const __m128i left, const __m128i right )
14
+ {
15
+ return _mm_castps_si128( _mm_sub_ps( _mm_castsi128_ps( left ), _mm_castsi128_ps( right ) ) );
16
+ }
17
+
18
+ __m128i sub_f64( const __m128i left, const __m128i right )
19
+ {
20
+ return _mm_cvtpd_epi32( _mm_sub_pd( _mm_castsi128_pd( left ), _mm_castsi128_pd( right ) ) );
21
+ }
22
+
23
+ __m128i mul_f32( const __m128i left, const __m128i right )
24
+ {
25
+ return _mm_castps_si128( _mm_mul_ps( _mm_castsi128_ps( left ), _mm_castsi128_ps( right ) ) );
26
+ }
27
+
28
+ __m128i mul_f64( const __m128i left, const __m128i right )
29
+ {
30
+ return _mm_cvtpd_epi32( _mm_mul_pd( _mm_castsi128_pd( left ), _mm_castsi128_pd( right ) ) );
31
+ }
@@ -0,0 +1,13 @@
1
+ #ifndef VECTOR_SSE_COMMON_H
2
+ #define VECTOR_SSE_COMMON_H
3
+
4
+ #include <emmintrin.h>
5
+
6
+ __m128i add_f32( const __m128i left, const __m128i right );
7
+ __m128i add_f64( const __m128i left, const __m128i right );
8
+ __m128i sub_f32( const __m128i left, const __m128i right );
9
+ __m128i sub_f64( const __m128i left, const __m128i right );
10
+ __m128i mul_f32( const __m128i left, const __m128i right );
11
+ __m128i mul_f64( const __m128i left, const __m128i right );
12
+
13
+ #endif // VECTOR_SSE_COMMON_H
@@ -32,6 +32,7 @@
32
32
 
33
33
  #include <emmintrin.h>
34
34
  #include "vector_sse_mul.h"
35
+ #include "vector_sse_common.h"
35
36
 
36
37
  #define SSE_VECTOR_WIDTH (4)
37
38
 
@@ -40,7 +41,6 @@ VALUE method_mat_mul_s32( VALUE self, VALUE left, VALUE left_rows_rb, VALUE left
40
41
  uint32_t left_row = 0;
41
42
  uint32_t right_col = 0;
42
43
  uint32_t common = 0;
43
- uint32_t vector_pos = 0;
44
44
  uint32_t input_index = 0;
45
45
  uint32_t pos = 0;
46
46
 
@@ -148,17 +148,8 @@ VALUE method_mat_mul_s64( VALUE self, VALUE left, VALUE left_rows_rb, VALUE left
148
148
  uint32_t left_row = 0;
149
149
  uint32_t right_col = 0;
150
150
  uint32_t common = 0;
151
- uint32_t vector_pos = 0;
152
- uint32_t input_index = 0;
153
151
  uint32_t pos = 0;
154
152
 
155
- int64_t left_segment[ SSE_VECTOR_WIDTH ];
156
- int64_t right_segment[ SSE_VECTOR_WIDTH ];
157
-
158
- __m128i* left_vec = NULL;
159
- __m128i* right_vec = NULL;
160
- __m128i result_vec;
161
-
162
153
  VALUE result = Qnil;
163
154
 
164
155
  int64_t* result_native = NULL;
@@ -233,7 +224,6 @@ VALUE method_mat_mul_f32( VALUE self, VALUE left, VALUE left_rows_rb, VALUE left
233
224
  uint32_t left_row = 0;
234
225
  uint32_t right_col = 0;
235
226
  uint32_t common = 0;
236
- uint32_t vector_pos = 0;
237
227
  uint32_t input_index = 0;
238
228
  uint32_t pos = 0;
239
229
 
@@ -303,7 +293,7 @@ VALUE method_mat_mul_f32( VALUE self, VALUE left, VALUE left_rows_rb, VALUE left
303
293
 
304
294
  left_vec = ( __m128i *)left_segment;
305
295
  right_vec = ( __m128i *)right_segment;
306
- result_vec = _mm_mul_ps( *left_vec, *right_vec );
296
+ result_vec = mul_f32( *left_vec, *right_vec );
307
297
 
308
298
  _mm_store_si128( (__m128i*)result_segment, result_vec );
309
299
  for ( pos = 0; pos < SSE_VECTOR_WIDTH; ++pos )
@@ -32,9 +32,31 @@
32
32
 
33
33
  #include <string.h>
34
34
  #include <emmintrin.h>
35
+ #include <ruby.h>
35
36
  #include "vector_sse_sum.h"
37
+ #include "vector_sse_common.h"
36
38
 
37
- #define TEMPLATE_SUM_S( FUNC_NAME, TYPE, OFTYPE, TYPE_SIZE, CONV_IN, CONV_OUT, EL_PER_VEC, ADD ) \
39
+
40
+ // Check for overflow
41
+ // __m128i sign_left;
42
+ // __m128i sign_right;
43
+ // const int32_t OVERFLOW_MASK = ( (int32_t)0x1 << (32-1) );
44
+ // int32_t overflow[ 4 ];
45
+ // __m128i* overflow_vec = (__m128i*)overflow;
46
+ // sign_left = _mm_xor_si128(result_vec, left_vec);
47
+ // sign_right = _mm_xor_si128(result_vec, right_vec);
48
+ // *overflow_vec = _mm_and_si128(sign_left, sign_right);
49
+
50
+ // for ( vector_pos = 0; vector_pos < 4; ++vector_pos )
51
+ // {
52
+ // if ( ( (int32_t)overflow[ vector_pos ] & OVERFLOW_MASK ) )
53
+ // {
54
+ // rb_raise( rb_eRuntimeError, "Vector addition overflow" );
55
+ // }
56
+ // }
57
+
58
+
59
+ #define TEMPLATE_SUM_S( FUNC_NAME, TYPE, CONV_IN, CONV_OUT, EL_PER_VEC, ADDER ) \
38
60
  VALUE FUNC_NAME( VALUE self, VALUE vector ) \
39
61
  { \
40
62
  uint32_t length = 0; \
@@ -44,20 +66,12 @@ VALUE FUNC_NAME( VALUE self, VALUE vector ) \
44
66
  \
45
67
  TYPE result = 0; \
46
68
  \
47
- TYPE left_segment[ EL_PER_VEC ]; \
48
- TYPE right_segment[ EL_PER_VEC ]; \
49
69
  TYPE result_segment[ EL_PER_VEC ]; \
50
70
  TYPE vector_segment[ EL_PER_VEC ]; \
51
71
  \
52
- __m128i left_vec; \
53
- __m128i right_vec; \
72
+ __m128i left_vec; \
73
+ __m128i right_vec; \
54
74
  __m128i result_vec; \
55
- \
56
- __m128i sign_left; \
57
- __m128i sign_right; \
58
- const OFTYPE OVERFLOW_MASK = ( (OFTYPE)0x1 << (TYPE_SIZE-1) ); \
59
- OFTYPE overflow[ EL_PER_VEC ]; \
60
- __m128i* overflow_vec = (__m128i*)overflow; \
61
75
  \
62
76
  Check_Type( vector, T_ARRAY ); \
63
77
  \
@@ -85,19 +99,7 @@ VALUE FUNC_NAME( VALUE self, VALUE vector ) \
85
99
  right_vec = _mm_loadu_si128( (const __m128i *)vector_segment ); \
86
100
  left_vec = _mm_loadu_si128( &result_vec ); \
87
101
  \
88
- result_vec = ADD( left_vec, right_vec ); \
89
- \
90
- sign_left = _mm_xor_si128(result_vec, left_vec); \
91
- sign_right = _mm_xor_si128(result_vec, right_vec); \
92
- *overflow_vec = _mm_and_si128(sign_left, sign_right); \
93
- \
94
- for ( vector_pos = 0; vector_pos < EL_PER_VEC; ++vector_pos ) \
95
- { \
96
- if ( ( (OFTYPE)overflow[ vector_pos ] & OVERFLOW_MASK ) ) \
97
- { \
98
- rb_raise( rb_eRuntimeError, "Vector addition overflow" ); \
99
- } \
100
- } \
102
+ result_vec = ADDER( left_vec, right_vec ); \
101
103
  } \
102
104
  \
103
105
  _mm_store_si128( (__m128i*)result_segment, result_vec ); \
@@ -111,8 +113,8 @@ VALUE FUNC_NAME( VALUE self, VALUE vector ) \
111
113
  return CONV_OUT( result ); \
112
114
  }
113
115
 
114
- TEMPLATE_SUM_S( method_vec_sum_s32, int32_t, int32_t, 32, NUM2INT, INT2NUM, 4, _mm_add_epi32 );
115
- TEMPLATE_SUM_S( method_vec_sum_s64, int64_t, int64_t, 64, NUM2LL, LL2NUM, 2, _mm_add_epi64 );
116
- TEMPLATE_SUM_S( method_vec_sum_f32, float, int32_t, 32, NUM2DBL, DBL2NUM, 4, _mm_add_ps );
117
- TEMPLATE_SUM_S( method_vec_sum_f64, double, int64_t, 32, NUM2DBL, DBL2NUM, 2, _mm_add_pd );
116
+ TEMPLATE_SUM_S( method_vec_sum_s32, int32_t, NUM2INT, INT2NUM, 4, _mm_add_epi32 );
117
+ TEMPLATE_SUM_S( method_vec_sum_s64, int64_t, NUM2LL, LL2NUM, 2, _mm_add_epi64 );
118
+ TEMPLATE_SUM_S( method_vec_sum_f32, float, NUM2DBL, DBL2NUM, 4, add_f32 );
119
+ TEMPLATE_SUM_S( method_vec_sum_f64, double, NUM2DBL, DBL2NUM, 2, add_f64 );
118
120
 
@@ -36,6 +36,7 @@
36
36
  #include <smmintrin.h>
37
37
  #endif
38
38
  #include "vector_sse_vec_mul.h"
39
+ #include "vector_sse_common.h"
39
40
 
40
41
  #define SSE_VECTOR_WIDTH (4)
41
42
  // #define EL_PER_VEC SSE_VECTOR_WIDTH
@@ -66,14 +67,14 @@ static inline __m128i mul_s64( const __m128i* left_vec, const __m128i* right_vec
66
67
  return _mm_loadu_si128( (const __m128i *)result );
67
68
  }
68
69
 
69
- static inline __m128i mul_f32(const __m128i* a, const __m128i* b )
70
+ static inline __m128i mul_f32_ptr(const __m128i* a, const __m128i* b )
70
71
  {
71
- return _mm_mul_ps( *a, *b );
72
+ return mul_f32( *a, *b );
72
73
  }
73
74
 
74
- static inline __m128i mul_f64(const __m128i* a, const __m128i* b )
75
+ static inline __m128i mul_f64_ptr(const __m128i* a, const __m128i* b )
75
76
  {
76
- return _mm_mul_pd( *a, *b );
77
+ return mul_f64( *a, *b );
77
78
  }
78
79
 
79
80
 
@@ -95,9 +96,6 @@ VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
95
96
  \
96
97
  TYPE result_segment[ EL_PER_VEC ]; \
97
98
  __m128i result_vec; \
98
- \
99
- __m128i sign_left; \
100
- __m128i sign_right; \
101
99
  \
102
100
  Check_Type( left, T_ARRAY ); \
103
101
  Check_Type( right, T_ARRAY ); \
@@ -152,6 +150,6 @@ VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
152
150
 
153
151
  TEMPLATE_VEC_MUL_S( method_vec_mul_s32, int32_t, 32, NUM2INT, INT2NUM, 4, mul_s32 );
154
152
  TEMPLATE_VEC_MUL_S( method_vec_mul_s64, int64_t, 64, NUM2LL, LL2NUM, 2, mul_s64 );
155
- TEMPLATE_VEC_MUL_S( method_vec_mul_f32, float, 32, NUM2DBL, DBL2NUM, 4, mul_f32 );
156
- TEMPLATE_VEC_MUL_S( method_vec_mul_f64, double, 64, NUM2DBL, DBL2NUM, 2, mul_f64 );
153
+ TEMPLATE_VEC_MUL_S( method_vec_mul_f32, float, 32, NUM2DBL, DBL2NUM, 4, mul_f32_ptr );
154
+ TEMPLATE_VEC_MUL_S( method_vec_mul_f64, double, 64, NUM2DBL, DBL2NUM, 2, mul_f64_ptr );
157
155
 
data/lib/vector_sse.rb CHANGED
@@ -36,7 +36,7 @@ require File.join( bin_root, 'vector_sse.so' )
36
36
 
37
37
  module VectorSSE
38
38
 
39
- VERSION = "0.0.1.pre"
39
+ VERSION = "0.0.2.pre"
40
40
 
41
41
  module Type
42
42
  S32 = 0
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vector_sse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.pre
4
+ version: 0.0.2.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Glissmann
@@ -14,28 +14,28 @@ dependencies:
14
14
  name: rake-compiler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: 0.9.5
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.9.5
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: 3.1.0
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 3.1.0
41
41
  description: VectorSse employs x86 Streaming SIMD Extensions (SSE), v3 or greater,
@@ -46,15 +46,19 @@ extensions:
46
46
  - ext/vector_sse/extconf.rb
47
47
  extra_rdoc_files: []
48
48
  files:
49
- - .gitignore
49
+ - ".gitignore"
50
+ - Gemfile
50
51
  - LICENSE.txt
51
52
  - README.md
52
53
  - Rakefile
54
+ - ext/.gitignore
53
55
  - ext/vector_sse/.gitignore
54
56
  - ext/vector_sse/extconf.rb
55
57
  - ext/vector_sse/vector_sse.c
56
58
  - ext/vector_sse/vector_sse_add.c
57
59
  - ext/vector_sse/vector_sse_add.h
60
+ - ext/vector_sse/vector_sse_common.c
61
+ - ext/vector_sse/vector_sse_common.h
58
62
  - ext/vector_sse/vector_sse_mul.c
59
63
  - ext/vector_sse/vector_sse_mul.h
60
64
  - ext/vector_sse/vector_sse_sum.c
@@ -76,17 +80,17 @@ require_paths:
76
80
  - lib
77
81
  required_ruby_version: !ruby/object:Gem::Requirement
78
82
  requirements:
79
- - - '>='
83
+ - - ">="
80
84
  - !ruby/object:Gem::Version
81
85
  version: '0'
82
86
  required_rubygems_version: !ruby/object:Gem::Requirement
83
87
  requirements:
84
- - - '>'
88
+ - - ">"
85
89
  - !ruby/object:Gem::Version
86
90
  version: 1.3.1
87
91
  requirements: []
88
92
  rubyforge_project:
89
- rubygems_version: 2.0.14
93
+ rubygems_version: 2.4.8
90
94
  signing_key:
91
95
  specification_version: 4
92
96
  summary: SIMD accelerated vector and matrix operations