simd 0.4.0 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/ext/simd/extconf.rb +6 -0
- data/ext/simd/simd_common.c +82 -4
- data/ext/simd/simd_common.h +4 -2
- data/ext/simd/simd_floatarray.c +83 -68
- data/ext/simd/simd_floatarray.h +10 -2
- data/ext/simd/simd_intarray.c +43 -74
- data/ext/simd/simd_intarray.h +7 -3
- data/ext/simd/simd_longarray.c +43 -71
- data/ext/simd/simd_longarray.h +7 -3
- data/ext/simd/simd_smallfloatarray.c +83 -71
- data/ext/simd/simd_smallfloatarray.h +10 -2
- data/ext/simd/simd_types.h +3 -3
- metadata +10 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d43585672169c7a727e7502dea99cb56e8103c2e6a8effe4f32521ee4cae6e5d
|
4
|
+
data.tar.gz: c404674a578002bdf5933baee75e54680717c562b1e8c839a0661e04dfee27a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86c6b9e43f219190f4bd2a34d0ad1c309eacb56068dbdb0d44f46894b1ceee19bc68c77d4f682d42b335039250730c9637a1d19c5075861c9731fd13a48d71d0
|
7
|
+
data.tar.gz: a71afed09b9ead560531c9ec2a9ffb0260843398255963b549abe44db0b45dc036fb604a945c9caff7f5d54dd51626f78ed4f2877803a9a5d2bc455e68c5c332
|
data/ext/simd/extconf.rb
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
# Makes Makefiles for Ruby extensions.
|
2
2
|
require 'mkmf'
|
3
3
|
|
4
|
+
cpu = RbConfig::CONFIG['arch'].downcase
|
5
|
+
if cpu.include?('arm')
|
6
|
+
ver = cpu.gsub(/[^\d]*(\d+).*/, '\\1').to_i
|
7
|
+
$CFLAGS << ' -mfpu=neon' if ver >= 6
|
8
|
+
end
|
9
|
+
|
4
10
|
extension_name = 'simd'
|
5
11
|
dir_config(extension_name)
|
6
12
|
create_makefile(extension_name)
|
data/ext/simd/simd_common.c
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#include "simd_common.h"
|
2
2
|
|
3
|
+
#define XMM_BYTES 16 /* Width of the xmm1,2... registers */
|
4
|
+
|
3
5
|
/* Internal: Allocate memory for the vector container. */
|
4
6
|
VALUE allocate(VALUE klass)
|
5
7
|
{
|
@@ -36,10 +38,9 @@ VALUE method_length(VALUE self)
|
|
36
38
|
}
|
37
39
|
|
38
40
|
/* Internal: Allocate memory for the data array. */
|
39
|
-
void *internal_allocate_vector_array(unsigned long
|
41
|
+
void *internal_allocate_vector_array(unsigned long long int count)
|
40
42
|
{
|
41
|
-
|
42
|
-
void *vector = malloc(((count + (count % modulo)) / modulo + 1) * size);
|
43
|
+
void *vector = malloc((count + 1) * XMM_BYTES);
|
43
44
|
if(vector == NULL)
|
44
45
|
{
|
45
46
|
rb_raise(rb_eNoMemError, "Unable to allocate memory");
|
@@ -50,7 +51,7 @@ void *internal_allocate_vector_array(unsigned long count, size_t size)
|
|
50
51
|
|
51
52
|
/* Internal: Determine if two arrays can be acted upon, by being of equal
|
52
53
|
* lengths or with the operand's length being a multiple of the data array's. */
|
53
|
-
int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo)
|
54
|
+
int internal_align_vectors(unsigned long long int v1, unsigned long long int v2, unsigned int modulo)
|
54
55
|
{
|
55
56
|
if((v1 % modulo) != (v2 % modulo))
|
56
57
|
{
|
@@ -74,3 +75,80 @@ int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modu
|
|
74
75
|
/* Never reached */
|
75
76
|
return(-1);
|
76
77
|
}
|
78
|
+
|
79
|
+
/* Internal: Given another object, perform an action specified via a function
|
80
|
+
* pointer against both.
|
81
|
+
*
|
82
|
+
* Since arithmetic is purposefully performed against a void pointers, disable
|
83
|
+
* warnings regarding this for the current function. */
|
84
|
+
#pragma GCC diagnostic push
|
85
|
+
#pragma GCC diagnostic ignored "-Wpointer-arith"
|
86
|
+
VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func)
|
87
|
+
{
|
88
|
+
unsigned long long int length, i, j;
|
89
|
+
int align;
|
90
|
+
vector_t *v1, *v2, *rv;
|
91
|
+
void *data;
|
92
|
+
VALUE result_obj = allocate(klass);
|
93
|
+
|
94
|
+
Data_Get_Struct(self, vector_t, v1);
|
95
|
+
Data_Get_Struct(obj, vector_t, v2);
|
96
|
+
Data_Get_Struct(result_obj, vector_t, rv);
|
97
|
+
rv->data = internal_allocate_vector_array(v1->len);
|
98
|
+
|
99
|
+
align = internal_align_vectors(v1->len, v2->len, (XMM_BYTES / size));
|
100
|
+
|
101
|
+
length = ((v1->len + (XMM_BYTES / size - 1)) / (XMM_BYTES / size));
|
102
|
+
rv->len = v1->len;
|
103
|
+
|
104
|
+
switch(align)
|
105
|
+
{
|
106
|
+
case 0: /* Same size arrays */
|
107
|
+
for(i = 0; i < length; i++)
|
108
|
+
{
|
109
|
+
func((v1->data + XMM_BYTES * i), (v2->data + XMM_BYTES * i), (rv->data + XMM_BYTES * i));
|
110
|
+
}
|
111
|
+
break;
|
112
|
+
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
113
|
+
for(i = 0; i < length; i++)
|
114
|
+
{
|
115
|
+
func((v1->data + XMM_BYTES * i), v2->data, (rv->data + XMM_BYTES * i));
|
116
|
+
}
|
117
|
+
break;
|
118
|
+
default: /* Self is a multiple of operand's length long */
|
119
|
+
for(j = 0; j < v2->len; j++)
|
120
|
+
{
|
121
|
+
data = v2->data + XMM_BYTES * j;
|
122
|
+
for(i = j; i < length; i+=v2->len)
|
123
|
+
{
|
124
|
+
func((v1->data + XMM_BYTES * i), data, (rv->data + XMM_BYTES * i));
|
125
|
+
}
|
126
|
+
}
|
127
|
+
}
|
128
|
+
internal_sanitize_unaligned_final_vector(rv, size);
|
129
|
+
|
130
|
+
return(result_obj);
|
131
|
+
}
|
132
|
+
#pragma GCC diagnostic pop
|
133
|
+
|
134
|
+
/* Internal: Make sure that no null bytes exist beyond the boundary of
|
135
|
+
* unaligned vectors. This function should be called after any operation that
|
136
|
+
* results in the mutation or creation of a vector array.
|
137
|
+
*
|
138
|
+
* Since arithmetic is purposefully performed against a void pointers, disable
|
139
|
+
* warnings regarding this for the current function. */
|
140
|
+
#pragma GCC diagnostic push
|
141
|
+
#pragma GCC diagnostic ignored "-Wpointer-arith"
|
142
|
+
void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size)
|
143
|
+
{
|
144
|
+
unsigned long long int i;
|
145
|
+
|
146
|
+
if((rv->len * size) % XMM_BYTES)
|
147
|
+
{
|
148
|
+
for(i = 1; i <= XMM_BYTES - ((rv->len * size) % XMM_BYTES); i+=size)
|
149
|
+
{
|
150
|
+
*(unsigned char *)(rv->data + (rv->len * size + i)) = 1;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
}
|
154
|
+
#pragma GCC diagnostic pop
|
data/ext/simd/simd_common.h
CHANGED
@@ -8,5 +8,7 @@ void deallocate(vector_t *vector);
|
|
8
8
|
|
9
9
|
VALUE method_length(VALUE self);
|
10
10
|
|
11
|
-
void *internal_allocate_vector_array(unsigned long
|
12
|
-
int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo);
|
11
|
+
void *internal_allocate_vector_array(unsigned long long int count);
|
12
|
+
int internal_align_vectors(unsigned long long int v1, unsigned long long int v2, unsigned int modulo);
|
13
|
+
VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func);
|
14
|
+
void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size);
|
data/ext/simd/simd_floatarray.c
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#include "simd_floatarray.h"
|
2
|
+
#include "simd_longarray.h"
|
2
3
|
|
3
4
|
VALUE SIMD_FloatArray = Qnil;
|
4
5
|
|
@@ -12,6 +13,13 @@ void Init_SIMD_FloatArray(VALUE parent)
|
|
12
13
|
rb_define_method(SIMD_FloatArray, "/", method_divide, 1);
|
13
14
|
rb_define_method(SIMD_FloatArray, "+", method_add, 1);
|
14
15
|
rb_define_method(SIMD_FloatArray, "-", method_subtract, 1);
|
16
|
+
rb_define_method(SIMD_FloatArray, "&", method_and, 1);
|
17
|
+
rb_define_method(SIMD_FloatArray, "|", method_or, 1);
|
18
|
+
rb_define_method(SIMD_FloatArray, "^", method_xor, 1);
|
19
|
+
rb_define_method(SIMD_FloatArray, "gt", method_gt, 1);
|
20
|
+
rb_define_method(SIMD_FloatArray, "lt", method_lt, 1);
|
21
|
+
rb_define_method(SIMD_FloatArray, ">", method_gt, 1);
|
22
|
+
rb_define_method(SIMD_FloatArray, "<", method_lt, 1);
|
15
23
|
rb_define_method(SIMD_FloatArray, "length", method_length, 0);
|
16
24
|
rb_define_method(SIMD_FloatArray, "to_a", method_to_a, 0);
|
17
25
|
}
|
@@ -22,7 +30,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
22
30
|
{
|
23
31
|
vector_t *vector;
|
24
32
|
d2v_t *data;
|
25
|
-
unsigned long n,
|
33
|
+
unsigned long long int n,i;
|
26
34
|
|
27
35
|
Check_Type(rb_array, T_ARRAY);
|
28
36
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -34,7 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
34
42
|
rb_raise(rb_eArgError, "Vectors must be at least 4 long");
|
35
43
|
}
|
36
44
|
|
37
|
-
vector->data = internal_allocate_vector_array(vector->len
|
45
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
38
46
|
|
39
47
|
data = (d2v_t *)vector->data;
|
40
48
|
for(i = 0; i < vector->len; i++)
|
@@ -42,12 +50,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
42
50
|
data[i/2].f[i%2] = NUM2DBL(rb_ary_entry(rb_array, i));
|
43
51
|
}
|
44
52
|
|
45
|
-
|
46
|
-
m = n + (n % 2);
|
47
|
-
for(i = n % 4; i > 0; i--)
|
48
|
-
{
|
49
|
-
data[m/2].f[1] = 1.0;
|
50
|
-
}
|
53
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(double));
|
51
54
|
|
52
55
|
return(self);
|
53
56
|
}
|
@@ -56,34 +59,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
56
59
|
* another FloatArray object, returning a new FloatArray. */
|
57
60
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
58
61
|
{
|
59
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
62
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_multiply));
|
60
63
|
}
|
61
64
|
|
62
65
|
/* Public: Divide values contained in the data array by those contained in
|
63
66
|
* another FloatArray object, returning a new FloatArray. */
|
64
67
|
static VALUE method_divide(VALUE self, VALUE obj)
|
65
68
|
{
|
66
|
-
return(internal_apply_operation(self, obj, func_divide));
|
69
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_divide));
|
67
70
|
}
|
68
71
|
|
69
72
|
/* Public: add values contained in the data array with those contained in
|
70
73
|
* another FloatArray object, returning a new FloatArray. */
|
71
74
|
static VALUE method_add(VALUE self, VALUE obj)
|
72
75
|
{
|
73
|
-
return(internal_apply_operation(self, obj, func_add));
|
76
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_add));
|
74
77
|
}
|
75
78
|
|
76
79
|
/* Public: Subtract values contained in another FloatArray object from those
|
77
80
|
* contained in the current data array object, returning a new FloatArray. */
|
78
81
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
79
82
|
{
|
80
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
83
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_subtract));
|
84
|
+
}
|
85
|
+
|
86
|
+
/* Public: and values contained in the data array with those contained in
|
87
|
+
* another FloatArray object, returning a new FloatArray. */
|
88
|
+
static VALUE method_and(VALUE self, VALUE obj)
|
89
|
+
{
|
90
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_and));
|
91
|
+
}
|
92
|
+
|
93
|
+
/* Public: or values contained in the data array with those contained in
|
94
|
+
* another FloatArray object, returning a new FloatArray. */
|
95
|
+
static VALUE method_or(VALUE self, VALUE obj)
|
96
|
+
{
|
97
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_or));
|
98
|
+
}
|
99
|
+
|
100
|
+
/* Public: xor values contained in the data array with those contained in
|
101
|
+
* another FloatArray object, returning a new FloatArray. */
|
102
|
+
static VALUE method_xor(VALUE self, VALUE obj)
|
103
|
+
{
|
104
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_xor));
|
105
|
+
}
|
106
|
+
|
107
|
+
/* Public: Compare values contained in the data array with those contained in
|
108
|
+
* another FloatArray object, return a new LongArray with each element being
|
109
|
+
* -1 if the data array's value is greater, and 0 otherwise. */
|
110
|
+
static VALUE method_gt(VALUE self, VALUE obj)
|
111
|
+
{
|
112
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_LongArray, func_gt));
|
113
|
+
}
|
114
|
+
|
115
|
+
/* Public: Compare values contained in the data array with those contained in
|
116
|
+
* another FloatArray object, return a new LongArray with each element being
|
117
|
+
* -1 if the data array's value is less, and 0 otherwise. */
|
118
|
+
static VALUE method_lt(VALUE self, VALUE obj)
|
119
|
+
{
|
120
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_LongArray, func_lt));
|
81
121
|
}
|
82
122
|
|
83
123
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
84
124
|
static VALUE method_to_a(VALUE self)
|
85
125
|
{
|
86
|
-
unsigned long i;
|
126
|
+
unsigned long long int i;
|
87
127
|
vector_t *vector;
|
88
128
|
d2v_t *data;
|
89
129
|
VALUE rb_array = rb_ary_new();
|
@@ -98,61 +138,6 @@ static VALUE method_to_a(VALUE self)
|
|
98
138
|
return(rb_array);
|
99
139
|
}
|
100
140
|
|
101
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
102
|
-
* function pointer against both. */
|
103
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
104
|
-
{
|
105
|
-
unsigned long size, i;
|
106
|
-
int align;
|
107
|
-
vector_t *v1, *v2, *rv;
|
108
|
-
d2v_t *d1, *d2, *r;
|
109
|
-
VALUE result_obj = allocate(SIMD_FloatArray);
|
110
|
-
|
111
|
-
Data_Get_Struct(self, vector_t, v1);
|
112
|
-
Data_Get_Struct(obj, vector_t, v2);
|
113
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
114
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(d2v_t));
|
115
|
-
|
116
|
-
align = internal_align_vectors(v1->len, v2->len, 2);
|
117
|
-
|
118
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
119
|
-
size = (v1->len + 1) / 2;
|
120
|
-
|
121
|
-
d1 = (d2v_t *)v1->data;
|
122
|
-
d2 = (d2v_t *)v2->data;
|
123
|
-
r = (d2v_t *)rv->data;
|
124
|
-
|
125
|
-
rv->len = v1->len;
|
126
|
-
|
127
|
-
switch(align)
|
128
|
-
{
|
129
|
-
case 0: /* Same size arrays */
|
130
|
-
for(i = 0; i < size; i++)
|
131
|
-
{
|
132
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
133
|
-
}
|
134
|
-
break;
|
135
|
-
case 1: /* Operand is exactly 2 long (size of 1 sse register) */
|
136
|
-
for(i = 0; i < size; i++)
|
137
|
-
{
|
138
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
139
|
-
}
|
140
|
-
break;
|
141
|
-
default: /* Self is a multiple of operand's length long */
|
142
|
-
for(i = 0; i < size; i++)
|
143
|
-
{
|
144
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
145
|
-
}
|
146
|
-
}
|
147
|
-
|
148
|
-
if(rv->len != rv->len + (rv->len % 2))
|
149
|
-
{
|
150
|
-
r[size].f[1] = 1;
|
151
|
-
}
|
152
|
-
|
153
|
-
return(result_obj);
|
154
|
-
}
|
155
|
-
|
156
141
|
/* Function: Multiply two vectors. */
|
157
142
|
static void func_multiply(void *v1, void *v2, void *r)
|
158
143
|
{
|
@@ -176,3 +161,33 @@ static void func_subtract(void *v1, void *v2, void *r)
|
|
176
161
|
{
|
177
162
|
*(d2v *)r = *(d2v *)v1 - *(d2v *)v2;
|
178
163
|
}
|
164
|
+
|
165
|
+
/* Function: Perform a binary AND on two vectors. */
|
166
|
+
static void func_and(void *v1, void *v2, void *r)
|
167
|
+
{
|
168
|
+
*(l2v *)r = *(l2v *)v1 & *(l2v *)v2;
|
169
|
+
}
|
170
|
+
|
171
|
+
/* Function: Perform a binary OR on two vectors. */
|
172
|
+
static void func_or(void *v1, void *v2, void *r)
|
173
|
+
{
|
174
|
+
*(l2v *)r = *(l2v *)v1 | *(l2v *)v2;
|
175
|
+
}
|
176
|
+
|
177
|
+
/* Function: Perform a binary XOR on two vectors. */
|
178
|
+
static void func_xor(void *v1, void *v2, void *r)
|
179
|
+
{
|
180
|
+
*(l2v *)r = *(l2v *)v1 ^ *(l2v *)v2;
|
181
|
+
}
|
182
|
+
|
183
|
+
/* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
|
184
|
+
static void func_gt(void *v1, void *v2, void *r)
|
185
|
+
{
|
186
|
+
*(l2v *)r = (*(d2v *)v1 > *(d2v *)v2);
|
187
|
+
}
|
188
|
+
|
189
|
+
/* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
|
190
|
+
static void func_lt(void *v1, void *v2, void *r)
|
191
|
+
{
|
192
|
+
*(l2v *)r = (*(d2v *)v1 < *(d2v *)v2);
|
193
|
+
}
|
data/ext/simd/simd_floatarray.h
CHANGED
@@ -6,11 +6,19 @@ static VALUE method_multiply(VALUE self, VALUE obj);
|
|
6
6
|
static VALUE method_divide(VALUE self, VALUE obj);
|
7
7
|
static VALUE method_add(VALUE self, VALUE obj);
|
8
8
|
static VALUE method_subtract(VALUE self, VALUE obj);
|
9
|
+
static VALUE method_and(VALUE self, VALUE obj);
|
10
|
+
static VALUE method_or(VALUE self, VALUE obj);
|
11
|
+
static VALUE method_xor(VALUE self, VALUE obj);
|
12
|
+
static VALUE method_gt(VALUE self, VALUE obj);
|
13
|
+
static VALUE method_lt(VALUE self, VALUE obj);
|
9
14
|
static VALUE method_to_a(VALUE self);
|
10
15
|
|
11
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
12
|
-
|
13
16
|
static void func_multiply(void *v1, void *v2, void *r);
|
14
17
|
static void func_divide(void *v1, void *v2, void *r);
|
15
18
|
static void func_add(void *v1, void *v2, void *r);
|
16
19
|
static void func_subtract(void *v1, void *v2, void *r);
|
20
|
+
static void func_and(void *v1, void *v2, void *r);
|
21
|
+
static void func_or(void *v1, void *v2, void *r);
|
22
|
+
static void func_xor(void *v1, void *v2, void *r);
|
23
|
+
static void func_gt(void *v1, void *v2, void *r);
|
24
|
+
static void func_lt(void *v1, void *v2, void *r);
|
data/ext/simd/simd_intarray.c
CHANGED
@@ -15,6 +15,10 @@ void Init_SIMD_IntArray(VALUE parent)
|
|
15
15
|
rb_define_method(SIMD_IntArray, "&", method_and, 1);
|
16
16
|
rb_define_method(SIMD_IntArray, "|", method_or, 1);
|
17
17
|
rb_define_method(SIMD_IntArray, "^", method_xor, 1);
|
18
|
+
rb_define_method(SIMD_IntArray, "gt", method_gt, 1);
|
19
|
+
rb_define_method(SIMD_IntArray, "lt", method_lt, 1);
|
20
|
+
rb_define_method(SIMD_IntArray, ">", method_gt, 1);
|
21
|
+
rb_define_method(SIMD_IntArray, "<", method_lt, 1);
|
18
22
|
rb_define_method(SIMD_IntArray, "length", method_length, 0);
|
19
23
|
rb_define_method(SIMD_IntArray, "to_a", method_to_a, 0);
|
20
24
|
}
|
@@ -25,7 +29,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
25
29
|
{
|
26
30
|
vector_t *vector;
|
27
31
|
i4v_t *data;
|
28
|
-
unsigned long n,
|
32
|
+
unsigned long long int n,i;
|
29
33
|
|
30
34
|
Check_Type(rb_array, T_ARRAY);
|
31
35
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -37,7 +41,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
37
41
|
rb_raise(rb_eArgError, "Vectors must be at least 4 long");
|
38
42
|
}
|
39
43
|
|
40
|
-
vector->data = internal_allocate_vector_array(vector->len
|
44
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
41
45
|
|
42
46
|
data = (i4v_t *)vector->data;
|
43
47
|
for(i = 0; i < vector->len; i++)
|
@@ -45,12 +49,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
45
49
|
data[i/4].f[i%4] = NUM2INT(rb_ary_entry(rb_array, i));
|
46
50
|
}
|
47
51
|
|
48
|
-
|
49
|
-
m = n + (n % 4);
|
50
|
-
for(i = n % 4; i > 0; i--)
|
51
|
-
{
|
52
|
-
data[m/4].f[i] = 1.0;
|
53
|
-
}
|
52
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(int));
|
54
53
|
|
55
54
|
return(self);
|
56
55
|
}
|
@@ -59,55 +58,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
59
58
|
* another FloatArray object, returning a new FloatArray. */
|
60
59
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
61
60
|
{
|
62
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
61
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_multiply));
|
63
62
|
}
|
64
63
|
|
65
64
|
/* Public: Divide values contained in the data array by those contained in
|
66
65
|
* another FloatArray object, returning a new FloatArray. */
|
67
66
|
static VALUE method_divide(VALUE self, VALUE obj)
|
68
67
|
{
|
69
|
-
return(internal_apply_operation(self, obj, func_divide));
|
68
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_divide));
|
70
69
|
}
|
71
70
|
|
72
71
|
/* Public: add values contained in the data array with those contained in
|
73
72
|
* another FloatArray object, returning a new FloatArray. */
|
74
73
|
static VALUE method_add(VALUE self, VALUE obj)
|
75
74
|
{
|
76
|
-
return(internal_apply_operation(self, obj, func_add));
|
75
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_add));
|
77
76
|
}
|
78
77
|
|
79
78
|
/* Public: and values contained in the data array with those contained in
|
80
79
|
* another FloatArray object, returning a new FloatArray. */
|
81
80
|
static VALUE method_and(VALUE self, VALUE obj)
|
82
81
|
{
|
83
|
-
return(internal_apply_operation(self, obj, func_and));
|
82
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_and));
|
84
83
|
}
|
85
84
|
|
86
85
|
/* Public: or values contained in the data array with those contained in
|
87
86
|
* another FloatArray object, returning a new FloatArray. */
|
88
87
|
static VALUE method_or(VALUE self, VALUE obj)
|
89
88
|
{
|
90
|
-
return(internal_apply_operation(self, obj, func_or));
|
89
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_or));
|
91
90
|
}
|
92
91
|
|
93
92
|
/* Public: xor values contained in the data array with those contained in
|
94
93
|
* another FloatArray object, returning a new FloatArray. */
|
95
94
|
static VALUE method_xor(VALUE self, VALUE obj)
|
96
95
|
{
|
97
|
-
return(internal_apply_operation(self, obj, func_xor));
|
96
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_xor));
|
97
|
+
}
|
98
|
+
|
99
|
+
/* Public: Compare values contained in the data array with those contained in
|
100
|
+
* another IntArray object, return a new IntArray with each element being -1
|
101
|
+
* if the data array's value is greater, and 0 otherwise. */
|
102
|
+
static VALUE method_gt(VALUE self, VALUE obj)
|
103
|
+
{
|
104
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_gt));
|
105
|
+
}
|
106
|
+
|
107
|
+
/* Public: Compare values contained in the data array with those contained in
|
108
|
+
* another IntArray object, return a new IntArray with each element being -1 if
|
109
|
+
* the data array's value is less, and 0 otherwise. */
|
110
|
+
static VALUE method_lt(VALUE self, VALUE obj)
|
111
|
+
{
|
112
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_lt));
|
98
113
|
}
|
99
114
|
|
100
115
|
/* Public: Subtract values contained in another FloatArray object from those
|
101
116
|
* contained in the current data array object, returning a new FloatArray. */
|
102
117
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
103
118
|
{
|
104
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
119
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_subtract));
|
105
120
|
}
|
106
121
|
|
107
122
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
108
123
|
static VALUE method_to_a(VALUE self)
|
109
124
|
{
|
110
|
-
unsigned long i;
|
125
|
+
unsigned long long int i;
|
111
126
|
vector_t *vector;
|
112
127
|
i4v_t *data;
|
113
128
|
VALUE rb_array = rb_ary_new();
|
@@ -122,64 +137,6 @@ static VALUE method_to_a(VALUE self)
|
|
122
137
|
return(rb_array);
|
123
138
|
}
|
124
139
|
|
125
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
126
|
-
* function pointer against both. */
|
127
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
128
|
-
{
|
129
|
-
unsigned long size, i;
|
130
|
-
int align;
|
131
|
-
vector_t *v1, *v2, *rv;
|
132
|
-
i4v_t *d1, *d2, *r;
|
133
|
-
VALUE result_obj = allocate(SIMD_IntArray);
|
134
|
-
|
135
|
-
Data_Get_Struct(self, vector_t, v1);
|
136
|
-
Data_Get_Struct(obj, vector_t, v2);
|
137
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
138
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(i4v_t));
|
139
|
-
|
140
|
-
align = internal_align_vectors(v1->len, v2->len, 4);
|
141
|
-
|
142
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
143
|
-
size = (v1->len + 3) / 4;
|
144
|
-
|
145
|
-
d1 = (i4v_t *)v1->data;
|
146
|
-
d2 = (i4v_t *)v2->data;
|
147
|
-
r = (i4v_t *)rv->data;
|
148
|
-
|
149
|
-
rv->len = v1->len;
|
150
|
-
|
151
|
-
switch(align)
|
152
|
-
{
|
153
|
-
case 0: /* Same size arrays */
|
154
|
-
for(i = 0; i < size; i++)
|
155
|
-
{
|
156
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
157
|
-
}
|
158
|
-
break;
|
159
|
-
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
160
|
-
for(i = 0; i < size; i++)
|
161
|
-
{
|
162
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
163
|
-
}
|
164
|
-
break;
|
165
|
-
default: /* Self is a multiple of operand's length long */
|
166
|
-
for(i = 0; i < size; i++)
|
167
|
-
{
|
168
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
169
|
-
}
|
170
|
-
}
|
171
|
-
|
172
|
-
if(rv->len != rv->len + (rv->len % 4))
|
173
|
-
{
|
174
|
-
for(i = 3; i > rv->len + (rv->len % 4); i--)
|
175
|
-
{
|
176
|
-
r[size].f[i] = 1;
|
177
|
-
}
|
178
|
-
}
|
179
|
-
|
180
|
-
return(result_obj);
|
181
|
-
}
|
182
|
-
|
183
140
|
/* Function: Multiply two vectors. */
|
184
141
|
static void func_multiply(void *v1, void *v2, void *r)
|
185
142
|
{
|
@@ -221,3 +178,15 @@ static void func_xor(void *v1, void *v2, void *r)
|
|
221
178
|
{
|
222
179
|
*(i4v *)r = *(i4v *)v1 ^ *(i4v *)v2;
|
223
180
|
}
|
181
|
+
|
182
|
+
/* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
|
183
|
+
static void func_gt(void *v1, void *v2, void *r)
|
184
|
+
{
|
185
|
+
*(i4v *)r = (*(i4v *)v1 > *(i4v *)v2);
|
186
|
+
}
|
187
|
+
|
188
|
+
/* Function: Compare vectors, return -1 if v1 is less than v2, 0 otherwise */
|
189
|
+
static void func_lt(void *v1, void *v2, void *r)
|
190
|
+
{
|
191
|
+
*(i4v *)r = (*(i4v *)v1 < *(i4v *)v2);
|
192
|
+
}
|
data/ext/simd/simd_intarray.h
CHANGED
@@ -1,18 +1,20 @@
|
|
1
1
|
#include "ruby.h"
|
2
2
|
#include "simd_common.h"
|
3
3
|
|
4
|
+
extern VALUE SIMD_IntArray;
|
5
|
+
|
4
6
|
static VALUE method_initialize(VALUE self, VALUE rb_array);
|
5
7
|
static VALUE method_multiply(VALUE self, VALUE obj);
|
6
8
|
static VALUE method_divide(VALUE self, VALUE obj);
|
7
9
|
static VALUE method_add(VALUE self, VALUE obj);
|
10
|
+
static VALUE method_subtract(VALUE self, VALUE obj);
|
8
11
|
static VALUE method_and(VALUE self, VALUE obj);
|
9
12
|
static VALUE method_or(VALUE self, VALUE obj);
|
10
13
|
static VALUE method_xor(VALUE self, VALUE obj);
|
11
|
-
static VALUE
|
14
|
+
static VALUE method_gt(VALUE self, VALUE obj);
|
15
|
+
static VALUE method_lt(VALUE self, VALUE obj);
|
12
16
|
static VALUE method_to_a(VALUE self);
|
13
17
|
|
14
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
15
|
-
|
16
18
|
static void func_multiply(void *v1, void *v2, void *r);
|
17
19
|
static void func_divide(void *v1, void *v2, void *r);
|
18
20
|
static void func_add(void *v1, void *v2, void *r);
|
@@ -20,3 +22,5 @@ static void func_subtract(void *v1, void *v2, void *r);
|
|
20
22
|
static void func_and(void *v1, void *v2, void *r);
|
21
23
|
static void func_or(void *v1, void *v2, void *r);
|
22
24
|
static void func_xor(void *v1, void *v2, void *r);
|
25
|
+
static void func_gt(void *v1, void *v2, void *r);
|
26
|
+
static void func_lt(void *v1, void *v2, void *r);
|
data/ext/simd/simd_longarray.c
CHANGED
@@ -15,6 +15,10 @@ void Init_SIMD_LongArray(VALUE parent)
|
|
15
15
|
rb_define_method(SIMD_LongArray, "&", method_and, 1);
|
16
16
|
rb_define_method(SIMD_LongArray, "|", method_or, 1);
|
17
17
|
rb_define_method(SIMD_LongArray, "^", method_xor, 1);
|
18
|
+
rb_define_method(SIMD_LongArray, "gt", method_gt, 1);
|
19
|
+
rb_define_method(SIMD_LongArray, "lt", method_lt, 1);
|
20
|
+
rb_define_method(SIMD_LongArray, ">", method_gt, 1);
|
21
|
+
rb_define_method(SIMD_LongArray, "<", method_lt, 1);
|
18
22
|
rb_define_method(SIMD_LongArray, "length", method_length, 0);
|
19
23
|
rb_define_method(SIMD_LongArray, "to_a", method_to_a, 0);
|
20
24
|
}
|
@@ -25,7 +29,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
25
29
|
{
|
26
30
|
vector_t *vector;
|
27
31
|
l2v_t *data;
|
28
|
-
unsigned long n,
|
32
|
+
unsigned long long int n,i;
|
29
33
|
|
30
34
|
Check_Type(rb_array, T_ARRAY);
|
31
35
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -37,7 +41,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
37
41
|
rb_raise(rb_eArgError, "Vectors must be at least 2 long");
|
38
42
|
}
|
39
43
|
|
40
|
-
vector->data = internal_allocate_vector_array(vector->len
|
44
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
41
45
|
|
42
46
|
data = (l2v_t *)vector->data;
|
43
47
|
for(i = 0; i < vector->len; i++)
|
@@ -45,12 +49,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
45
49
|
data[i/2].f[i%2] = NUM2LONG(rb_ary_entry(rb_array, i));
|
46
50
|
}
|
47
51
|
|
48
|
-
|
49
|
-
m = n + (n % 2);
|
50
|
-
for(i = n % 2; i > 0; i--)
|
51
|
-
{
|
52
|
-
data[m/2].f[i] = 1;
|
53
|
-
}
|
52
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(long long int));
|
54
53
|
|
55
54
|
return(self);
|
56
55
|
}
|
@@ -59,55 +58,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
59
58
|
* another FloatArray object, returning a new FloatArray. */
|
60
59
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
61
60
|
{
|
62
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
61
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_multiply));
|
63
62
|
}
|
64
63
|
|
65
64
|
/* Public: Divide values contained in the data array by those contained in
|
66
65
|
* another FloatArray object, returning a new FloatArray. */
|
67
66
|
static VALUE method_divide(VALUE self, VALUE obj)
|
68
67
|
{
|
69
|
-
return(internal_apply_operation(self, obj, func_divide));
|
68
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_divide));
|
70
69
|
}
|
71
70
|
|
72
71
|
/* Public: add values contained in the data array with those contained in
|
73
72
|
* another FloatArray object, returning a new FloatArray. */
|
74
73
|
static VALUE method_add(VALUE self, VALUE obj)
|
75
74
|
{
|
76
|
-
return(internal_apply_operation(self, obj, func_add));
|
75
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_add));
|
77
76
|
}
|
78
77
|
|
79
78
|
/* Public: and values contained in the data array with those contained in
|
80
79
|
* another FloatArray object, returning a new FloatArray. */
|
81
80
|
static VALUE method_and(VALUE self, VALUE obj)
|
82
81
|
{
|
83
|
-
return(internal_apply_operation(self, obj, func_and));
|
82
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_and));
|
84
83
|
}
|
85
84
|
|
86
85
|
/* Public: or values contained in the data array with those contained in
|
87
86
|
* another FloatArray object, returning a new FloatArray. */
|
88
87
|
static VALUE method_or(VALUE self, VALUE obj)
|
89
88
|
{
|
90
|
-
return(internal_apply_operation(self, obj, func_or));
|
89
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_or));
|
91
90
|
}
|
92
91
|
|
93
92
|
/* Public: xor values contained in the data array with those contained in
|
94
93
|
* another FloatArray object, returning a new FloatArray. */
|
95
94
|
static VALUE method_xor(VALUE self, VALUE obj)
|
96
95
|
{
|
97
|
-
return(internal_apply_operation(self, obj, func_xor));
|
96
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_xor));
|
97
|
+
}
|
98
|
+
|
99
|
+
/* Public: Compare values contained in the data array with those contained in
|
100
|
+
* another Longrray object, return a new LongArray with each element being -1
|
101
|
+
* if the data array's value is greater, and 0 otherwise. */
|
102
|
+
static VALUE method_gt(VALUE self, VALUE obj)
|
103
|
+
{
|
104
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_gt));
|
105
|
+
}
|
106
|
+
|
107
|
+
/* Public: Compare values contained in the data array with those contained in
|
108
|
+
* another LongArray object, return a new LongArray with each element being -1 if
|
109
|
+
* the data array's value is less, and 0 otherwise. */
|
110
|
+
static VALUE method_lt(VALUE self, VALUE obj)
|
111
|
+
{
|
112
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_lt));
|
98
113
|
}
|
99
114
|
|
100
115
|
/* Public: Subtract values contained in another FloatArray object from those
|
101
116
|
* contained in the current data array object, returning a new FloatArray. */
|
102
117
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
103
118
|
{
|
104
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
119
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_subtract));
|
105
120
|
}
|
106
121
|
|
107
122
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
108
123
|
static VALUE method_to_a(VALUE self)
|
109
124
|
{
|
110
|
-
unsigned long i;
|
125
|
+
unsigned long long int i;
|
111
126
|
vector_t *vector;
|
112
127
|
l2v_t *data;
|
113
128
|
VALUE rb_array = rb_ary_new();
|
@@ -122,61 +137,6 @@ static VALUE method_to_a(VALUE self)
|
|
122
137
|
return(rb_array);
|
123
138
|
}
|
124
139
|
|
125
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
126
|
-
* function pointer against both. */
|
127
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
128
|
-
{
|
129
|
-
unsigned long size, i;
|
130
|
-
int align;
|
131
|
-
vector_t *v1, *v2, *rv;
|
132
|
-
l2v_t *d1, *d2, *r;
|
133
|
-
VALUE result_obj = allocate(SIMD_LongArray);
|
134
|
-
|
135
|
-
Data_Get_Struct(self, vector_t, v1);
|
136
|
-
Data_Get_Struct(obj, vector_t, v2);
|
137
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
138
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(l2v_t));
|
139
|
-
|
140
|
-
align = internal_align_vectors(v1->len, v2->len, 2);
|
141
|
-
|
142
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
143
|
-
size = (v1->len + 1) / 2;
|
144
|
-
|
145
|
-
d1 = (l2v_t *)v1->data;
|
146
|
-
d2 = (l2v_t *)v2->data;
|
147
|
-
r = (l2v_t *)rv->data;
|
148
|
-
|
149
|
-
rv->len = v1->len;
|
150
|
-
|
151
|
-
switch(align)
|
152
|
-
{
|
153
|
-
case 0: /* Same size arrays */
|
154
|
-
for(i = 0; i < size; i++)
|
155
|
-
{
|
156
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
157
|
-
}
|
158
|
-
break;
|
159
|
-
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
160
|
-
for(i = 0; i < size; i++)
|
161
|
-
{
|
162
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
163
|
-
}
|
164
|
-
break;
|
165
|
-
default: /* Self is a multiple of operand's length long */
|
166
|
-
for(i = 0; i < size; i++)
|
167
|
-
{
|
168
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
169
|
-
}
|
170
|
-
}
|
171
|
-
|
172
|
-
if(rv->len != rv->len + (rv->len % 2))
|
173
|
-
{
|
174
|
-
r[size].f[1] = 1;
|
175
|
-
}
|
176
|
-
|
177
|
-
return(result_obj);
|
178
|
-
}
|
179
|
-
|
180
140
|
/* Function: Multiply two vectors. */
|
181
141
|
static void func_multiply(void *v1, void *v2, void *r)
|
182
142
|
{
|
@@ -218,3 +178,15 @@ static void func_xor(void *v1, void *v2, void *r)
|
|
218
178
|
{
|
219
179
|
*(l2v *)r = *(l2v *)v1 ^ *(l2v *)v2;
|
220
180
|
}
|
181
|
+
|
182
|
+
/* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
|
183
|
+
static void func_gt(void *v1, void *v2, void *r)
|
184
|
+
{
|
185
|
+
*(l2v *)r = (*(l2v *)v1 > *(l2v *)v2);
|
186
|
+
}
|
187
|
+
|
188
|
+
/* Function: Compare vectors, return -1 if v1 is less than v2, 0 otherwise */
|
189
|
+
static void func_lt(void *v1, void *v2, void *r)
|
190
|
+
{
|
191
|
+
*(l2v *)r = (*(l2v *)v1 < *(l2v *)v2);
|
192
|
+
}
|
data/ext/simd/simd_longarray.h
CHANGED
@@ -1,18 +1,20 @@
|
|
1
1
|
#include "ruby.h"
|
2
2
|
#include "simd_common.h"
|
3
3
|
|
4
|
+
extern VALUE SIMD_LongArray;
|
5
|
+
|
4
6
|
static VALUE method_initialize(VALUE self, VALUE rb_array);
|
5
7
|
static VALUE method_multiply(VALUE self, VALUE obj);
|
6
8
|
static VALUE method_divide(VALUE self, VALUE obj);
|
7
9
|
static VALUE method_add(VALUE self, VALUE obj);
|
10
|
+
static VALUE method_subtract(VALUE self, VALUE obj);
|
8
11
|
static VALUE method_and(VALUE self, VALUE obj);
|
9
12
|
static VALUE method_or(VALUE self, VALUE obj);
|
10
13
|
static VALUE method_xor(VALUE self, VALUE obj);
|
11
|
-
static VALUE
|
14
|
+
static VALUE method_gt(VALUE self, VALUE obj);
|
15
|
+
static VALUE method_lt(VALUE self, VALUE obj);
|
12
16
|
static VALUE method_to_a(VALUE self);
|
13
17
|
|
14
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
15
|
-
|
16
18
|
static void func_multiply(void *v1, void *v2, void *r);
|
17
19
|
static void func_divide(void *v1, void *v2, void *r);
|
18
20
|
static void func_add(void *v1, void *v2, void *r);
|
@@ -20,3 +22,5 @@ static void func_subtract(void *v1, void *v2, void *r);
|
|
20
22
|
static void func_and(void *v1, void *v2, void *r);
|
21
23
|
static void func_or(void *v1, void *v2, void *r);
|
22
24
|
static void func_xor(void *v1, void *v2, void *r);
|
25
|
+
static void func_gt(void *v1, void *v2, void *r);
|
26
|
+
static void func_lt(void *v1, void *v2, void *r);
|
@@ -1,4 +1,5 @@
|
|
1
1
|
#include "simd_smallfloatarray.h"
|
2
|
+
#include "simd_intarray.h"
|
2
3
|
|
3
4
|
VALUE SIMD_SmallFloatArray = Qnil;
|
4
5
|
|
@@ -12,6 +13,13 @@ void Init_SIMD_SmallFloatArray(VALUE parent)
|
|
12
13
|
rb_define_method(SIMD_SmallFloatArray, "/", method_divide, 1);
|
13
14
|
rb_define_method(SIMD_SmallFloatArray, "+", method_add, 1);
|
14
15
|
rb_define_method(SIMD_SmallFloatArray, "-", method_subtract, 1);
|
16
|
+
rb_define_method(SIMD_SmallFloatArray, "&", method_and, 1);
|
17
|
+
rb_define_method(SIMD_SmallFloatArray, "|", method_or, 1);
|
18
|
+
rb_define_method(SIMD_SmallFloatArray, "^", method_xor, 1);
|
19
|
+
rb_define_method(SIMD_SmallFloatArray, "gt", method_gt, 1);
|
20
|
+
rb_define_method(SIMD_SmallFloatArray, "lt", method_lt, 1);
|
21
|
+
rb_define_method(SIMD_SmallFloatArray, ">", method_gt, 1);
|
22
|
+
rb_define_method(SIMD_SmallFloatArray, "<", method_lt, 1);
|
15
23
|
rb_define_method(SIMD_SmallFloatArray, "length", method_length, 0);
|
16
24
|
rb_define_method(SIMD_SmallFloatArray, "to_a", method_to_a, 0);
|
17
25
|
}
|
@@ -22,7 +30,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
22
30
|
{
|
23
31
|
vector_t *vector;
|
24
32
|
f4v_t *data;
|
25
|
-
unsigned long n,
|
33
|
+
unsigned long long int n,i;
|
26
34
|
|
27
35
|
Check_Type(rb_array, T_ARRAY);
|
28
36
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -34,7 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
34
42
|
rb_raise(rb_eArgError, "Vectors must be at least 4 long");
|
35
43
|
}
|
36
44
|
|
37
|
-
vector->data = internal_allocate_vector_array(vector->len
|
45
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
38
46
|
|
39
47
|
data = (f4v_t *)vector->data;
|
40
48
|
for(i = 0; i < vector->len; i++)
|
@@ -42,12 +50,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
42
50
|
data[i/4].f[i%4] = (float)NUM2DBL(rb_ary_entry(rb_array, i));
|
43
51
|
}
|
44
52
|
|
45
|
-
|
46
|
-
m = n + (n % 4);
|
47
|
-
for(i = n % 4; i > 0; i--)
|
48
|
-
{
|
49
|
-
data[m/4].f[i] = 1.0;
|
50
|
-
}
|
53
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(float));
|
51
54
|
|
52
55
|
return(self);
|
53
56
|
}
|
@@ -56,34 +59,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
56
59
|
* another FloatArray object, returning a new FloatArray. */
|
57
60
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
58
61
|
{
|
59
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
62
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_multiply));
|
60
63
|
}
|
61
64
|
|
62
65
|
/* Public: Divide values contained in the data array by those contained in
|
63
66
|
* another FloatArray object, returning a new FloatArray. */
|
64
67
|
static VALUE method_divide(VALUE self, VALUE obj)
|
65
68
|
{
|
66
|
-
return(internal_apply_operation(self, obj, func_divide));
|
69
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_divide));
|
67
70
|
}
|
68
71
|
|
69
72
|
/* Public: add values contained in the data array with those contained in
|
70
73
|
* another FloatArray object, returning a new FloatArray. */
|
71
74
|
static VALUE method_add(VALUE self, VALUE obj)
|
72
75
|
{
|
73
|
-
return(internal_apply_operation(self, obj, func_add));
|
76
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_add));
|
74
77
|
}
|
75
78
|
|
76
79
|
/* Public: Subtract values contained in another FloatArray object from those
|
77
80
|
* contained in the current data array object, returning a new FloatArray. */
|
78
81
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
79
82
|
{
|
80
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
83
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_subtract));
|
84
|
+
}
|
85
|
+
|
86
|
+
/* Public: and values contained in the data array with those contained in
|
87
|
+
* another FloatArray object, returning a new FloatArray. */
|
88
|
+
static VALUE method_and(VALUE self, VALUE obj)
|
89
|
+
{
|
90
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_and));
|
91
|
+
}
|
92
|
+
|
93
|
+
/* Public: or values contained in the data array with those contained in
|
94
|
+
* another FloatArray object, returning a new FloatArray. */
|
95
|
+
static VALUE method_or(VALUE self, VALUE obj)
|
96
|
+
{
|
97
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_or));
|
98
|
+
}
|
99
|
+
|
100
|
+
/* Public: xor values contained in the data array with those contained in
|
101
|
+
* another FloatArray object, returning a new FloatArray. */
|
102
|
+
static VALUE method_xor(VALUE self, VALUE obj)
|
103
|
+
{
|
104
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_xor));
|
105
|
+
}
|
106
|
+
|
107
|
+
/* Public: Compare values contained in the data array with those contained in
|
108
|
+
* another SmallFloatArray object, return a new IntArray with each element being
|
109
|
+
* -1 if the data array's value is greater, and 0 otherwise. */
|
110
|
+
static VALUE method_gt(VALUE self, VALUE obj)
|
111
|
+
{
|
112
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_IntArray, func_gt));
|
113
|
+
}
|
114
|
+
|
115
|
+
/* Public: Compare values contained in the data array with those contained in
|
116
|
+
* another SmallFloatArray object, return a new IntArray with each element being
|
117
|
+
* -1 if the data array's value is less, and 0 otherwise. */
|
118
|
+
static VALUE method_lt(VALUE self, VALUE obj)
|
119
|
+
{
|
120
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_IntArray, func_lt));
|
81
121
|
}
|
82
122
|
|
83
123
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
84
124
|
static VALUE method_to_a(VALUE self)
|
85
125
|
{
|
86
|
-
unsigned long i;
|
126
|
+
unsigned long long int i;
|
87
127
|
vector_t *vector;
|
88
128
|
f4v_t *data;
|
89
129
|
VALUE rb_array = rb_ary_new();
|
@@ -98,64 +138,6 @@ static VALUE method_to_a(VALUE self)
|
|
98
138
|
return(rb_array);
|
99
139
|
}
|
100
140
|
|
101
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
102
|
-
* function pointer against both. */
|
103
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
104
|
-
{
|
105
|
-
unsigned long size, i;
|
106
|
-
int align;
|
107
|
-
vector_t *v1, *v2, *rv;
|
108
|
-
f4v_t *d1, *d2, *r;
|
109
|
-
VALUE result_obj = allocate(SIMD_SmallFloatArray);
|
110
|
-
|
111
|
-
Data_Get_Struct(self, vector_t, v1);
|
112
|
-
Data_Get_Struct(obj, vector_t, v2);
|
113
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
114
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(f4v_t));
|
115
|
-
|
116
|
-
align = internal_align_vectors(v1->len, v2->len, 4);
|
117
|
-
|
118
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
119
|
-
size = (v1->len + 3) / 4;
|
120
|
-
|
121
|
-
d1 = (f4v_t *)v1->data;
|
122
|
-
d2 = (f4v_t *)v2->data;
|
123
|
-
r = (f4v_t *)rv->data;
|
124
|
-
|
125
|
-
rv->len = v1->len;
|
126
|
-
|
127
|
-
switch(align)
|
128
|
-
{
|
129
|
-
case 0: /* Same size arrays */
|
130
|
-
for(i = 0; i < size; i++)
|
131
|
-
{
|
132
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
133
|
-
}
|
134
|
-
break;
|
135
|
-
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
136
|
-
for(i = 0; i < size; i++)
|
137
|
-
{
|
138
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
139
|
-
}
|
140
|
-
break;
|
141
|
-
default: /* Self is a multiple of operand's length long */
|
142
|
-
for(i = 0; i < size; i++)
|
143
|
-
{
|
144
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
145
|
-
}
|
146
|
-
}
|
147
|
-
|
148
|
-
if(rv->len != rv->len + (rv->len % 4))
|
149
|
-
{
|
150
|
-
for(i = 3; i > rv->len + (rv->len % 4); i--)
|
151
|
-
{
|
152
|
-
r[size].f[i] = 1;
|
153
|
-
}
|
154
|
-
}
|
155
|
-
|
156
|
-
return(result_obj);
|
157
|
-
}
|
158
|
-
|
159
141
|
/* Function: Multiply two vectors. */
|
160
142
|
static void func_multiply(void *v1, void *v2, void *r)
|
161
143
|
{
|
@@ -179,3 +161,33 @@ static void func_subtract(void *v1, void *v2, void *r)
|
|
179
161
|
{
|
180
162
|
*(f4v *)r = *(f4v *)v1 - *(f4v *)v2;
|
181
163
|
}
|
164
|
+
|
165
|
+
/* Function: Perform a binary AND on two vectors. */
|
166
|
+
static void func_and(void *v1, void *v2, void *r)
|
167
|
+
{
|
168
|
+
*(i4v *)r = *(i4v *)v1 & *(i4v *)v2;
|
169
|
+
}
|
170
|
+
|
171
|
+
/* Function: Perform a binary OR on two vectors. */
|
172
|
+
static void func_or(void *v1, void *v2, void *r)
|
173
|
+
{
|
174
|
+
*(i4v *)r = *(i4v *)v1 | *(i4v *)v2;
|
175
|
+
}
|
176
|
+
|
177
|
+
/* Function: Perform a binary XOR on two vectors. */
|
178
|
+
static void func_xor(void *v1, void *v2, void *r)
|
179
|
+
{
|
180
|
+
*(i4v *)r = *(i4v *)v1 ^ *(i4v *)v2;
|
181
|
+
}
|
182
|
+
|
183
|
+
/* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
|
184
|
+
static void func_gt(void *v1, void *v2, void *r)
|
185
|
+
{
|
186
|
+
*(i4v *)r = (*(f4v *)v1 > *(f4v *)v2);
|
187
|
+
}
|
188
|
+
|
189
|
+
/* Function: Compare vectors, return -1 if v1 is less than v2, 0 otherwise */
|
190
|
+
static void func_lt(void *v1, void *v2, void *r)
|
191
|
+
{
|
192
|
+
*(i4v *)r = (*(f4v *)v1 < *(f4v *)v2);
|
193
|
+
}
|
@@ -6,11 +6,19 @@ static VALUE method_multiply(VALUE self, VALUE obj);
|
|
6
6
|
static VALUE method_divide(VALUE self, VALUE obj);
|
7
7
|
static VALUE method_add(VALUE self, VALUE obj);
|
8
8
|
static VALUE method_subtract(VALUE self, VALUE obj);
|
9
|
+
static VALUE method_and(VALUE self, VALUE obj);
|
10
|
+
static VALUE method_or(VALUE self, VALUE obj);
|
11
|
+
static VALUE method_xor(VALUE self, VALUE obj);
|
12
|
+
static VALUE method_gt(VALUE self, VALUE obj);
|
13
|
+
static VALUE method_lt(VALUE self, VALUE obj);
|
9
14
|
static VALUE method_to_a(VALUE self);
|
10
15
|
|
11
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
12
|
-
|
13
16
|
static void func_multiply(void *v1, void *v2, void *r);
|
14
17
|
static void func_divide(void *v1, void *v2, void *r);
|
15
18
|
static void func_add(void *v1, void *v2, void *r);
|
16
19
|
static void func_subtract(void *v1, void *v2, void *r);
|
20
|
+
static void func_and(void *v1, void *v2, void *r);
|
21
|
+
static void func_or(void *v1, void *v2, void *r);
|
22
|
+
static void func_xor(void *v1, void *v2, void *r);
|
23
|
+
static void func_gt(void *v1, void *v2, void *r);
|
24
|
+
static void func_lt(void *v1, void *v2, void *r);
|
data/ext/simd/simd_types.h
CHANGED
@@ -40,17 +40,17 @@ typedef union i4v_t
|
|
40
40
|
/*
|
41
41
|
* Types for LongArray
|
42
42
|
*/
|
43
|
-
typedef long int __attribute__ ((vector_size (16))) l2v;
|
43
|
+
typedef long long int __attribute__ ((vector_size (16))) l2v;
|
44
44
|
typedef union l2v_t
|
45
45
|
{
|
46
46
|
l2v v;
|
47
|
-
long int f[2];
|
47
|
+
long long int f[2];
|
48
48
|
} l2v_t;
|
49
49
|
|
50
50
|
typedef struct vector_t
|
51
51
|
{
|
52
52
|
void *data;
|
53
|
-
unsigned long len;
|
53
|
+
unsigned long long int len;
|
54
54
|
} vector_t;
|
55
55
|
|
56
56
|
typedef void (b_operation)(void *v1, void *v2, void *r);
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tina Wuest
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-08-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1.2'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '1.2'
|
27
27
|
description: Access to SIMD (Single Instruction Multiple Data) instructions in Ruby
|
28
28
|
email: tina@wuest.me
|
29
29
|
executables: []
|
@@ -48,7 +48,7 @@ files:
|
|
48
48
|
homepage: https://gitlab.com/wuest/simd-ruby
|
49
49
|
licenses: []
|
50
50
|
metadata: {}
|
51
|
-
post_install_message:
|
51
|
+
post_install_message:
|
52
52
|
rdoc_options: []
|
53
53
|
require_paths:
|
54
54
|
- lib
|
@@ -63,9 +63,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
63
|
- !ruby/object:Gem::Version
|
64
64
|
version: '0'
|
65
65
|
requirements: []
|
66
|
-
|
67
|
-
|
68
|
-
signing_key:
|
66
|
+
rubygems_version: 3.3.7
|
67
|
+
signing_key:
|
69
68
|
specification_version: 4
|
70
69
|
summary: SIMD instructions in ruby
|
71
70
|
test_files: []
|