vector_sse 0.0.2.pre → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fdabd1511ca1ab68168a0723f06d26afe3312627
4
- data.tar.gz: 9bb853808e5c0a8ec8e8b1592e84a0537954f6dc
3
+ metadata.gz: 76432f1deaa727915d70bb6d2f6c1f664c37266d
4
+ data.tar.gz: 38032d25b177403ae26e9aaf4b96f5779d891bb4
5
5
  SHA512:
6
- metadata.gz: 213dffc68fc752e4520f0d5f7e910467b3fb51faba6ed5f6052b38d6a677928ead0b899b7658427fa78e08c7400f9ac874112b8c325174a328a9c137cdf34d6d
7
- data.tar.gz: 10b7386669a7a1acbbcad0a9f24323e703487997d18969f9b02a6f0d5ba02ac2bd9ff1c50ba5df71f33d18413e38545454eb48700570046bc2c2c5fb1f54f712
6
+ metadata.gz: c7371159b7f988e00bd1118370c7cebba152b8c06d1f7158fb3312033ab11b57c12f0051ef2c757bdeadf3b5011720335dd997f5df47adb350c4b7a2febf5167
7
+ data.tar.gz: d96ab615ef09749a318f301a1e7109209f0eae80318dc0bd9162263e6ea46e7c36376890df5a3d796cecc588e5edae90ce0bb533c36459f86064dac87f9b41d4
@@ -67,18 +67,19 @@ static inline __m128i mul_s64( const __m128i* left_vec, const __m128i* right_vec
67
67
  return _mm_loadu_si128( (const __m128i *)result );
68
68
  }
69
69
 
70
- static inline __m128i mul_f32_ptr(const __m128i* a, const __m128i* b )
70
+ static inline __m128 mul_f32_ptr(const __m128* left, const __m128* right )
71
71
  {
72
- return mul_f32( *a, *b );
72
+ return _mm_mul_ps( *left, *right );
73
73
  }
74
74
 
75
- static inline __m128i mul_f64_ptr(const __m128i* a, const __m128i* b )
75
+ static inline __m128d mul_f64_ptr(const __m128d* left, const __m128d* right )
76
76
  {
77
- return mul_f64( *a, *b );
77
+ return _mm_mul_pd( *left, *right );
78
78
  }
79
79
 
80
80
 
81
- #define TEMPLATE_VEC_MUL_S( FUNC_NAME, TYPE, TYPE_SIZE, CONV_IN, CONV_OUT, EL_PER_VEC, MULOP ) \
81
+ #define TEMPLATE_VEC_MUL_S( \
82
+ FUNC_NAME, TYPE, CONV_IN, CONV_OUT, EL_PER_VEC, ITYPE, LOAD, LCAST, STORE, SCAST, MULOP ) \
82
83
  VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
83
84
  { \
84
85
  uint32_t length = 0; \
@@ -91,11 +92,11 @@ VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
91
92
  TYPE left_segment[ EL_PER_VEC ]; \
92
93
  TYPE right_segment[ EL_PER_VEC ]; \
93
94
  \
94
- __m128i left_vec; \
95
- __m128i right_vec; \
95
+ ITYPE left_vec; \
96
+ ITYPE right_vec; \
96
97
  \
97
98
  TYPE result_segment[ EL_PER_VEC ]; \
98
- __m128i result_vec; \
99
+ ITYPE result_vec; \
99
100
  \
100
101
  Check_Type( left, T_ARRAY ); \
101
102
  Check_Type( right, T_ARRAY ); \
@@ -125,12 +126,12 @@ VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
125
126
  } \
126
127
  } \
127
128
  \
128
- left_vec = _mm_loadu_si128( (const __m128i *)left_segment ); \
129
- right_vec = _mm_loadu_si128( (const __m128i *)right_segment ); \
129
+ left_vec = LOAD( (const LCAST *)left_segment ); \
130
+ right_vec = LOAD( (const LCAST *)right_segment ); \
130
131
  \
131
132
  result_vec = MULOP( &left_vec, &right_vec ); \
132
133
  \
133
- _mm_store_si128( (__m128i*)result_segment, result_vec ); \
134
+ STORE( ( SCAST * )result_segment, result_vec ); \
134
135
  \
135
136
  for ( vector_pos = 0; vector_pos < EL_PER_VEC; ++vector_pos ) \
136
137
  { \
@@ -148,8 +149,40 @@ VALUE FUNC_NAME( VALUE self, VALUE left, VALUE right ) \
148
149
  }
149
150
 
150
151
 
151
- TEMPLATE_VEC_MUL_S( method_vec_mul_s32, int32_t, 32, NUM2INT, INT2NUM, 4, mul_s32 );
152
- TEMPLATE_VEC_MUL_S( method_vec_mul_s64, int64_t, 64, NUM2LL, LL2NUM, 2, mul_s64 );
153
- TEMPLATE_VEC_MUL_S( method_vec_mul_f32, float, 32, NUM2DBL, DBL2NUM, 4, mul_f32_ptr );
154
- TEMPLATE_VEC_MUL_S( method_vec_mul_f64, double, 64, NUM2DBL, DBL2NUM, 2, mul_f64_ptr );
152
+ TEMPLATE_VEC_MUL_S(
153
+ method_vec_mul_s32,
154
+ int32_t,
155
+ NUM2INT, INT2NUM,
156
+ 4,
157
+ __m128i,
158
+ _mm_loadu_si128, __m128i,
159
+ _mm_store_si128, __m128i,
160
+ mul_s32 );
161
+ TEMPLATE_VEC_MUL_S(
162
+ method_vec_mul_s64,
163
+ int64_t,
164
+ NUM2LL, LL2NUM,
165
+ 2,
166
+ __m128i,
167
+ _mm_loadu_si128, __m128i,
168
+ _mm_store_si128, __m128i,
169
+ mul_s64 );
170
+ TEMPLATE_VEC_MUL_S(
171
+ method_vec_mul_f32,
172
+ float,
173
+ NUM2DBL, DBL2NUM,
174
+ 4,
175
+ __m128,
176
+ _mm_load_ps, float,
177
+ _mm_store_ps, float,
178
+ mul_f32_ptr );
179
+ TEMPLATE_VEC_MUL_S(
180
+ method_vec_mul_f64,
181
+ double,
182
+ NUM2DBL, DBL2NUM,
183
+ 2,
184
+ __m128d,
185
+ _mm_load_pd, double,
186
+ _mm_store_pd, double,
187
+ mul_f64_ptr );
155
188
 
data/lib/vector_sse.rb CHANGED
@@ -36,7 +36,7 @@ require File.join( bin_root, 'vector_sse.so' )
36
36
 
37
37
  module VectorSSE
38
38
 
39
- VERSION = "0.0.2.pre"
39
+ VERSION = "0.0.3"
40
40
 
41
41
  module Type
42
42
  S32 = 0
data/vector_sse.gemspec CHANGED
@@ -4,7 +4,7 @@ require_relative 'lib/vector_sse'
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'vector_sse'
6
6
  s.version = VectorSSE::VERSION
7
- s.date = '2015-12-28'
7
+ s.date = Time.now.to_date.strftime('%Y-%m-%d')
8
8
  s.summary = "SIMD accelerated vector and matrix operations"
9
9
  s.description = "VectorSse employs x86 Streaming SIMD Extensions (SSE), v3 or greater, to accelerate basic vector and matrix computations in Ruby."
10
10
  s.authors = [ "Robert Glissmann" ]
@@ -15,5 +15,5 @@ Gem::Specification.new do |s|
15
15
  s.homepage = 'https://github.com/rgmann/vector_sse'
16
16
 
17
17
  s.add_development_dependency 'rake-compiler', '~> 0.9.5'
18
- s.add_development_dependency 'rspec', '~> 3.1.0'
18
+ s.add_development_dependency 'rspec', '~> 3.1'
19
19
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vector_sse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2.pre
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Glissmann
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-28 00:00:00.000000000 Z
11
+ date: 2016-01-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 3.1.0
33
+ version: '3.1'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 3.1.0
40
+ version: '3.1'
41
41
  description: VectorSse employs x86 Streaming SIMD Extensions (SSE), v3 or greater,
42
42
  to accelerate basic vector and matrix computations in Ruby.
43
43
  email: Robert.Glissmann@gmail.com
@@ -85,9 +85,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
85
85
  version: '0'
86
86
  required_rubygems_version: !ruby/object:Gem::Requirement
87
87
  requirements:
88
- - - ">"
88
+ - - ">="
89
89
  - !ruby/object:Gem::Version
90
- version: 1.3.1
90
+ version: '0'
91
91
  requirements: []
92
92
  rubyforge_project:
93
93
  rubygems_version: 2.4.8