simd 0.4.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/ext/simd/extconf.rb +6 -0
- data/ext/simd/simd_common.c +82 -4
- data/ext/simd/simd_common.h +4 -2
- data/ext/simd/simd_floatarray.c +83 -68
- data/ext/simd/simd_floatarray.h +10 -2
- data/ext/simd/simd_intarray.c +43 -74
- data/ext/simd/simd_intarray.h +7 -3
- data/ext/simd/simd_longarray.c +43 -71
- data/ext/simd/simd_longarray.h +7 -3
- data/ext/simd/simd_smallfloatarray.c +83 -71
- data/ext/simd/simd_smallfloatarray.h +10 -2
- data/ext/simd/simd_types.h +3 -3
- metadata +10 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d43585672169c7a727e7502dea99cb56e8103c2e6a8effe4f32521ee4cae6e5d
|
4
|
+
data.tar.gz: c404674a578002bdf5933baee75e54680717c562b1e8c839a0661e04dfee27a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86c6b9e43f219190f4bd2a34d0ad1c309eacb56068dbdb0d44f46894b1ceee19bc68c77d4f682d42b335039250730c9637a1d19c5075861c9731fd13a48d71d0
|
7
|
+
data.tar.gz: a71afed09b9ead560531c9ec2a9ffb0260843398255963b549abe44db0b45dc036fb604a945c9caff7f5d54dd51626f78ed4f2877803a9a5d2bc455e68c5c332
|
data/ext/simd/extconf.rb
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
# Makes Makefiles for Ruby extensions.
|
2
2
|
require 'mkmf'
|
3
3
|
|
4
|
+
cpu = RbConfig::CONFIG['arch'].downcase
|
5
|
+
if cpu.include?('arm')
|
6
|
+
ver = cpu.gsub(/[^\d]*(\d+).*/, '\\1').to_i
|
7
|
+
$CFLAGS << ' -mfpu=neon' if ver >= 6
|
8
|
+
end
|
9
|
+
|
4
10
|
extension_name = 'simd'
|
5
11
|
dir_config(extension_name)
|
6
12
|
create_makefile(extension_name)
|
data/ext/simd/simd_common.c
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#include "simd_common.h"
|
2
2
|
|
3
|
+
#define XMM_BYTES 16 /* Width of the xmm1,2... registers */
|
4
|
+
|
3
5
|
/* Internal: Allocate memory for the vector container. */
|
4
6
|
VALUE allocate(VALUE klass)
|
5
7
|
{
|
@@ -36,10 +38,9 @@ VALUE method_length(VALUE self)
|
|
36
38
|
}
|
37
39
|
|
38
40
|
/* Internal: Allocate memory for the data array. */
|
39
|
-
void *internal_allocate_vector_array(unsigned long
|
41
|
+
void *internal_allocate_vector_array(unsigned long long int count)
|
40
42
|
{
|
41
|
-
|
42
|
-
void *vector = malloc(((count + (count % modulo)) / modulo + 1) * size);
|
43
|
+
void *vector = malloc((count + 1) * XMM_BYTES);
|
43
44
|
if(vector == NULL)
|
44
45
|
{
|
45
46
|
rb_raise(rb_eNoMemError, "Unable to allocate memory");
|
@@ -50,7 +51,7 @@ void *internal_allocate_vector_array(unsigned long count, size_t size)
|
|
50
51
|
|
51
52
|
/* Internal: Determine if two arrays can be acted upon, by being of equal
|
52
53
|
* lengths or with the operand's length being a multiple of the data array's. */
|
53
|
-
int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo)
|
54
|
+
int internal_align_vectors(unsigned long long int v1, unsigned long long int v2, unsigned int modulo)
|
54
55
|
{
|
55
56
|
if((v1 % modulo) != (v2 % modulo))
|
56
57
|
{
|
@@ -74,3 +75,80 @@ int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modu
|
|
74
75
|
/* Never reached */
|
75
76
|
return(-1);
|
76
77
|
}
|
78
|
+
|
79
|
+
/* Internal: Given another object, perform an action specified via a function
|
80
|
+
* pointer against both.
|
81
|
+
*
|
82
|
+
* Since arithmetic is purposefully performed against a void pointers, disable
|
83
|
+
* warnings regarding this for the current function. */
|
84
|
+
#pragma GCC diagnostic push
|
85
|
+
#pragma GCC diagnostic ignored "-Wpointer-arith"
|
86
|
+
VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func)
|
87
|
+
{
|
88
|
+
unsigned long long int length, i, j;
|
89
|
+
int align;
|
90
|
+
vector_t *v1, *v2, *rv;
|
91
|
+
void *data;
|
92
|
+
VALUE result_obj = allocate(klass);
|
93
|
+
|
94
|
+
Data_Get_Struct(self, vector_t, v1);
|
95
|
+
Data_Get_Struct(obj, vector_t, v2);
|
96
|
+
Data_Get_Struct(result_obj, vector_t, rv);
|
97
|
+
rv->data = internal_allocate_vector_array(v1->len);
|
98
|
+
|
99
|
+
align = internal_align_vectors(v1->len, v2->len, (XMM_BYTES / size));
|
100
|
+
|
101
|
+
length = ((v1->len + (XMM_BYTES / size - 1)) / (XMM_BYTES / size));
|
102
|
+
rv->len = v1->len;
|
103
|
+
|
104
|
+
switch(align)
|
105
|
+
{
|
106
|
+
case 0: /* Same size arrays */
|
107
|
+
for(i = 0; i < length; i++)
|
108
|
+
{
|
109
|
+
func((v1->data + XMM_BYTES * i), (v2->data + XMM_BYTES * i), (rv->data + XMM_BYTES * i));
|
110
|
+
}
|
111
|
+
break;
|
112
|
+
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
113
|
+
for(i = 0; i < length; i++)
|
114
|
+
{
|
115
|
+
func((v1->data + XMM_BYTES * i), v2->data, (rv->data + XMM_BYTES * i));
|
116
|
+
}
|
117
|
+
break;
|
118
|
+
default: /* Self is a multiple of operand's length long */
|
119
|
+
for(j = 0; j < v2->len; j++)
|
120
|
+
{
|
121
|
+
data = v2->data + XMM_BYTES * j;
|
122
|
+
for(i = j; i < length; i+=v2->len)
|
123
|
+
{
|
124
|
+
func((v1->data + XMM_BYTES * i), data, (rv->data + XMM_BYTES * i));
|
125
|
+
}
|
126
|
+
}
|
127
|
+
}
|
128
|
+
internal_sanitize_unaligned_final_vector(rv, size);
|
129
|
+
|
130
|
+
return(result_obj);
|
131
|
+
}
|
132
|
+
#pragma GCC diagnostic pop
|
133
|
+
|
134
|
+
/* Internal: Make sure that no null bytes exist beyond the boundary of
|
135
|
+
* unaligned vectors. This function should be called after any operation that
|
136
|
+
* results in the mutation or creation of a vector array.
|
137
|
+
*
|
138
|
+
* Since arithmetic is purposefully performed against a void pointers, disable
|
139
|
+
* warnings regarding this for the current function. */
|
140
|
+
#pragma GCC diagnostic push
|
141
|
+
#pragma GCC diagnostic ignored "-Wpointer-arith"
|
142
|
+
void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size)
|
143
|
+
{
|
144
|
+
unsigned long long int i;
|
145
|
+
|
146
|
+
if((rv->len * size) % XMM_BYTES)
|
147
|
+
{
|
148
|
+
for(i = 1; i <= XMM_BYTES - ((rv->len * size) % XMM_BYTES); i+=size)
|
149
|
+
{
|
150
|
+
*(unsigned char *)(rv->data + (rv->len * size + i)) = 1;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
}
|
154
|
+
#pragma GCC diagnostic pop
|
data/ext/simd/simd_common.h
CHANGED
@@ -8,5 +8,7 @@ void deallocate(vector_t *vector);
|
|
8
8
|
|
9
9
|
VALUE method_length(VALUE self);
|
10
10
|
|
11
|
-
void *internal_allocate_vector_array(unsigned long
|
12
|
-
int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo);
|
11
|
+
void *internal_allocate_vector_array(unsigned long long int count);
|
12
|
+
int internal_align_vectors(unsigned long long int v1, unsigned long long int v2, unsigned int modulo);
|
13
|
+
VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func);
|
14
|
+
void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size);
|
data/ext/simd/simd_floatarray.c
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#include "simd_floatarray.h"
|
2
|
+
#include "simd_longarray.h"
|
2
3
|
|
3
4
|
VALUE SIMD_FloatArray = Qnil;
|
4
5
|
|
@@ -12,6 +13,13 @@ void Init_SIMD_FloatArray(VALUE parent)
|
|
12
13
|
rb_define_method(SIMD_FloatArray, "/", method_divide, 1);
|
13
14
|
rb_define_method(SIMD_FloatArray, "+", method_add, 1);
|
14
15
|
rb_define_method(SIMD_FloatArray, "-", method_subtract, 1);
|
16
|
+
rb_define_method(SIMD_FloatArray, "&", method_and, 1);
|
17
|
+
rb_define_method(SIMD_FloatArray, "|", method_or, 1);
|
18
|
+
rb_define_method(SIMD_FloatArray, "^", method_xor, 1);
|
19
|
+
rb_define_method(SIMD_FloatArray, "gt", method_gt, 1);
|
20
|
+
rb_define_method(SIMD_FloatArray, "lt", method_lt, 1);
|
21
|
+
rb_define_method(SIMD_FloatArray, ">", method_gt, 1);
|
22
|
+
rb_define_method(SIMD_FloatArray, "<", method_lt, 1);
|
15
23
|
rb_define_method(SIMD_FloatArray, "length", method_length, 0);
|
16
24
|
rb_define_method(SIMD_FloatArray, "to_a", method_to_a, 0);
|
17
25
|
}
|
@@ -22,7 +30,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
22
30
|
{
|
23
31
|
vector_t *vector;
|
24
32
|
d2v_t *data;
|
25
|
-
unsigned long n,
|
33
|
+
unsigned long long int n,i;
|
26
34
|
|
27
35
|
Check_Type(rb_array, T_ARRAY);
|
28
36
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -34,7 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
34
42
|
rb_raise(rb_eArgError, "Vectors must be at least 4 long");
|
35
43
|
}
|
36
44
|
|
37
|
-
vector->data = internal_allocate_vector_array(vector->len
|
45
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
38
46
|
|
39
47
|
data = (d2v_t *)vector->data;
|
40
48
|
for(i = 0; i < vector->len; i++)
|
@@ -42,12 +50,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
42
50
|
data[i/2].f[i%2] = NUM2DBL(rb_ary_entry(rb_array, i));
|
43
51
|
}
|
44
52
|
|
45
|
-
|
46
|
-
m = n + (n % 2);
|
47
|
-
for(i = n % 4; i > 0; i--)
|
48
|
-
{
|
49
|
-
data[m/2].f[1] = 1.0;
|
50
|
-
}
|
53
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(double));
|
51
54
|
|
52
55
|
return(self);
|
53
56
|
}
|
@@ -56,34 +59,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
56
59
|
* another FloatArray object, returning a new FloatArray. */
|
57
60
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
58
61
|
{
|
59
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
62
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_multiply));
|
60
63
|
}
|
61
64
|
|
62
65
|
/* Public: Divide values contained in the data array by those contained in
|
63
66
|
* another FloatArray object, returning a new FloatArray. */
|
64
67
|
static VALUE method_divide(VALUE self, VALUE obj)
|
65
68
|
{
|
66
|
-
return(internal_apply_operation(self, obj, func_divide));
|
69
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_divide));
|
67
70
|
}
|
68
71
|
|
69
72
|
/* Public: add values contained in the data array with those contained in
|
70
73
|
* another FloatArray object, returning a new FloatArray. */
|
71
74
|
static VALUE method_add(VALUE self, VALUE obj)
|
72
75
|
{
|
73
|
-
return(internal_apply_operation(self, obj, func_add));
|
76
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_add));
|
74
77
|
}
|
75
78
|
|
76
79
|
/* Public: Subtract values contained in another FloatArray object from those
|
77
80
|
* contained in the current data array object, returning a new FloatArray. */
|
78
81
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
79
82
|
{
|
80
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
83
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_subtract));
|
84
|
+
}
|
85
|
+
|
86
|
+
/* Public: and values contained in the data array with those contained in
|
87
|
+
* another FloatArray object, returning a new FloatArray. */
|
88
|
+
static VALUE method_and(VALUE self, VALUE obj)
|
89
|
+
{
|
90
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_and));
|
91
|
+
}
|
92
|
+
|
93
|
+
/* Public: or values contained in the data array with those contained in
|
94
|
+
* another FloatArray object, returning a new FloatArray. */
|
95
|
+
static VALUE method_or(VALUE self, VALUE obj)
|
96
|
+
{
|
97
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_or));
|
98
|
+
}
|
99
|
+
|
100
|
+
/* Public: xor values contained in the data array with those contained in
|
101
|
+
* another FloatArray object, returning a new FloatArray. */
|
102
|
+
static VALUE method_xor(VALUE self, VALUE obj)
|
103
|
+
{
|
104
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_xor));
|
105
|
+
}
|
106
|
+
|
107
|
+
/* Public: Compare values contained in the data array with those contained in
|
108
|
+
* another FloatArray object, return a new LongArray with each element being
|
109
|
+
* -1 if the data array's value is greater, and 0 otherwise. */
|
110
|
+
static VALUE method_gt(VALUE self, VALUE obj)
|
111
|
+
{
|
112
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_LongArray, func_gt));
|
113
|
+
}
|
114
|
+
|
115
|
+
/* Public: Compare values contained in the data array with those contained in
|
116
|
+
* another FloatArray object, return a new LongArray with each element being
|
117
|
+
* -1 if the data array's value is less, and 0 otherwise. */
|
118
|
+
static VALUE method_lt(VALUE self, VALUE obj)
|
119
|
+
{
|
120
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_LongArray, func_lt));
|
81
121
|
}
|
82
122
|
|
83
123
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
84
124
|
static VALUE method_to_a(VALUE self)
|
85
125
|
{
|
86
|
-
unsigned long i;
|
126
|
+
unsigned long long int i;
|
87
127
|
vector_t *vector;
|
88
128
|
d2v_t *data;
|
89
129
|
VALUE rb_array = rb_ary_new();
|
@@ -98,61 +138,6 @@ static VALUE method_to_a(VALUE self)
|
|
98
138
|
return(rb_array);
|
99
139
|
}
|
100
140
|
|
101
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
102
|
-
* function pointer against both. */
|
103
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
104
|
-
{
|
105
|
-
unsigned long size, i;
|
106
|
-
int align;
|
107
|
-
vector_t *v1, *v2, *rv;
|
108
|
-
d2v_t *d1, *d2, *r;
|
109
|
-
VALUE result_obj = allocate(SIMD_FloatArray);
|
110
|
-
|
111
|
-
Data_Get_Struct(self, vector_t, v1);
|
112
|
-
Data_Get_Struct(obj, vector_t, v2);
|
113
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
114
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(d2v_t));
|
115
|
-
|
116
|
-
align = internal_align_vectors(v1->len, v2->len, 2);
|
117
|
-
|
118
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
119
|
-
size = (v1->len + 1) / 2;
|
120
|
-
|
121
|
-
d1 = (d2v_t *)v1->data;
|
122
|
-
d2 = (d2v_t *)v2->data;
|
123
|
-
r = (d2v_t *)rv->data;
|
124
|
-
|
125
|
-
rv->len = v1->len;
|
126
|
-
|
127
|
-
switch(align)
|
128
|
-
{
|
129
|
-
case 0: /* Same size arrays */
|
130
|
-
for(i = 0; i < size; i++)
|
131
|
-
{
|
132
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
133
|
-
}
|
134
|
-
break;
|
135
|
-
case 1: /* Operand is exactly 2 long (size of 1 sse register) */
|
136
|
-
for(i = 0; i < size; i++)
|
137
|
-
{
|
138
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
139
|
-
}
|
140
|
-
break;
|
141
|
-
default: /* Self is a multiple of operand's length long */
|
142
|
-
for(i = 0; i < size; i++)
|
143
|
-
{
|
144
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
145
|
-
}
|
146
|
-
}
|
147
|
-
|
148
|
-
if(rv->len != rv->len + (rv->len % 2))
|
149
|
-
{
|
150
|
-
r[size].f[1] = 1;
|
151
|
-
}
|
152
|
-
|
153
|
-
return(result_obj);
|
154
|
-
}
|
155
|
-
|
156
141
|
/* Function: Multiply two vectors. */
|
157
142
|
static void func_multiply(void *v1, void *v2, void *r)
|
158
143
|
{
|
@@ -176,3 +161,33 @@ static void func_subtract(void *v1, void *v2, void *r)
|
|
176
161
|
{
|
177
162
|
*(d2v *)r = *(d2v *)v1 - *(d2v *)v2;
|
178
163
|
}
|
164
|
+
|
165
|
+
/* Function: Perform a binary AND on two vectors. */
|
166
|
+
static void func_and(void *v1, void *v2, void *r)
|
167
|
+
{
|
168
|
+
*(l2v *)r = *(l2v *)v1 & *(l2v *)v2;
|
169
|
+
}
|
170
|
+
|
171
|
+
/* Function: Perform a binary OR on two vectors. */
|
172
|
+
static void func_or(void *v1, void *v2, void *r)
|
173
|
+
{
|
174
|
+
*(l2v *)r = *(l2v *)v1 | *(l2v *)v2;
|
175
|
+
}
|
176
|
+
|
177
|
+
/* Function: Perform a binary XOR on two vectors. */
|
178
|
+
static void func_xor(void *v1, void *v2, void *r)
|
179
|
+
{
|
180
|
+
*(l2v *)r = *(l2v *)v1 ^ *(l2v *)v2;
|
181
|
+
}
|
182
|
+
|
183
|
+
/* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
|
184
|
+
static void func_gt(void *v1, void *v2, void *r)
|
185
|
+
{
|
186
|
+
*(l2v *)r = (*(d2v *)v1 > *(d2v *)v2);
|
187
|
+
}
|
188
|
+
|
189
|
+
/* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
|
190
|
+
static void func_lt(void *v1, void *v2, void *r)
|
191
|
+
{
|
192
|
+
*(l2v *)r = (*(d2v *)v1 < *(d2v *)v2);
|
193
|
+
}
|
data/ext/simd/simd_floatarray.h
CHANGED
@@ -6,11 +6,19 @@ static VALUE method_multiply(VALUE self, VALUE obj);
|
|
6
6
|
static VALUE method_divide(VALUE self, VALUE obj);
|
7
7
|
static VALUE method_add(VALUE self, VALUE obj);
|
8
8
|
static VALUE method_subtract(VALUE self, VALUE obj);
|
9
|
+
static VALUE method_and(VALUE self, VALUE obj);
|
10
|
+
static VALUE method_or(VALUE self, VALUE obj);
|
11
|
+
static VALUE method_xor(VALUE self, VALUE obj);
|
12
|
+
static VALUE method_gt(VALUE self, VALUE obj);
|
13
|
+
static VALUE method_lt(VALUE self, VALUE obj);
|
9
14
|
static VALUE method_to_a(VALUE self);
|
10
15
|
|
11
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
12
|
-
|
13
16
|
static void func_multiply(void *v1, void *v2, void *r);
|
14
17
|
static void func_divide(void *v1, void *v2, void *r);
|
15
18
|
static void func_add(void *v1, void *v2, void *r);
|
16
19
|
static void func_subtract(void *v1, void *v2, void *r);
|
20
|
+
static void func_and(void *v1, void *v2, void *r);
|
21
|
+
static void func_or(void *v1, void *v2, void *r);
|
22
|
+
static void func_xor(void *v1, void *v2, void *r);
|
23
|
+
static void func_gt(void *v1, void *v2, void *r);
|
24
|
+
static void func_lt(void *v1, void *v2, void *r);
|
data/ext/simd/simd_intarray.c
CHANGED
@@ -15,6 +15,10 @@ void Init_SIMD_IntArray(VALUE parent)
|
|
15
15
|
rb_define_method(SIMD_IntArray, "&", method_and, 1);
|
16
16
|
rb_define_method(SIMD_IntArray, "|", method_or, 1);
|
17
17
|
rb_define_method(SIMD_IntArray, "^", method_xor, 1);
|
18
|
+
rb_define_method(SIMD_IntArray, "gt", method_gt, 1);
|
19
|
+
rb_define_method(SIMD_IntArray, "lt", method_lt, 1);
|
20
|
+
rb_define_method(SIMD_IntArray, ">", method_gt, 1);
|
21
|
+
rb_define_method(SIMD_IntArray, "<", method_lt, 1);
|
18
22
|
rb_define_method(SIMD_IntArray, "length", method_length, 0);
|
19
23
|
rb_define_method(SIMD_IntArray, "to_a", method_to_a, 0);
|
20
24
|
}
|
@@ -25,7 +29,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
25
29
|
{
|
26
30
|
vector_t *vector;
|
27
31
|
i4v_t *data;
|
28
|
-
unsigned long n,
|
32
|
+
unsigned long long int n,i;
|
29
33
|
|
30
34
|
Check_Type(rb_array, T_ARRAY);
|
31
35
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -37,7 +41,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
37
41
|
rb_raise(rb_eArgError, "Vectors must be at least 4 long");
|
38
42
|
}
|
39
43
|
|
40
|
-
vector->data = internal_allocate_vector_array(vector->len
|
44
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
41
45
|
|
42
46
|
data = (i4v_t *)vector->data;
|
43
47
|
for(i = 0; i < vector->len; i++)
|
@@ -45,12 +49,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
45
49
|
data[i/4].f[i%4] = NUM2INT(rb_ary_entry(rb_array, i));
|
46
50
|
}
|
47
51
|
|
48
|
-
|
49
|
-
m = n + (n % 4);
|
50
|
-
for(i = n % 4; i > 0; i--)
|
51
|
-
{
|
52
|
-
data[m/4].f[i] = 1.0;
|
53
|
-
}
|
52
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(int));
|
54
53
|
|
55
54
|
return(self);
|
56
55
|
}
|
@@ -59,55 +58,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
59
58
|
* another FloatArray object, returning a new FloatArray. */
|
60
59
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
61
60
|
{
|
62
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
61
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_multiply));
|
63
62
|
}
|
64
63
|
|
65
64
|
/* Public: Divide values contained in the data array by those contained in
|
66
65
|
* another FloatArray object, returning a new FloatArray. */
|
67
66
|
static VALUE method_divide(VALUE self, VALUE obj)
|
68
67
|
{
|
69
|
-
return(internal_apply_operation(self, obj, func_divide));
|
68
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_divide));
|
70
69
|
}
|
71
70
|
|
72
71
|
/* Public: add values contained in the data array with those contained in
|
73
72
|
* another FloatArray object, returning a new FloatArray. */
|
74
73
|
static VALUE method_add(VALUE self, VALUE obj)
|
75
74
|
{
|
76
|
-
return(internal_apply_operation(self, obj, func_add));
|
75
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_add));
|
77
76
|
}
|
78
77
|
|
79
78
|
/* Public: and values contained in the data array with those contained in
|
80
79
|
* another FloatArray object, returning a new FloatArray. */
|
81
80
|
static VALUE method_and(VALUE self, VALUE obj)
|
82
81
|
{
|
83
|
-
return(internal_apply_operation(self, obj, func_and));
|
82
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_and));
|
84
83
|
}
|
85
84
|
|
86
85
|
/* Public: or values contained in the data array with those contained in
|
87
86
|
* another FloatArray object, returning a new FloatArray. */
|
88
87
|
static VALUE method_or(VALUE self, VALUE obj)
|
89
88
|
{
|
90
|
-
return(internal_apply_operation(self, obj, func_or));
|
89
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_or));
|
91
90
|
}
|
92
91
|
|
93
92
|
/* Public: xor values contained in the data array with those contained in
|
94
93
|
* another FloatArray object, returning a new FloatArray. */
|
95
94
|
static VALUE method_xor(VALUE self, VALUE obj)
|
96
95
|
{
|
97
|
-
return(internal_apply_operation(self, obj, func_xor));
|
96
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_xor));
|
97
|
+
}
|
98
|
+
|
99
|
+
/* Public: Compare values contained in the data array with those contained in
|
100
|
+
* another IntArray object, return a new IntArray with each element being -1
|
101
|
+
* if the data array's value is greater, and 0 otherwise. */
|
102
|
+
static VALUE method_gt(VALUE self, VALUE obj)
|
103
|
+
{
|
104
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_gt));
|
105
|
+
}
|
106
|
+
|
107
|
+
/* Public: Compare values contained in the data array with those contained in
|
108
|
+
* another IntArray object, return a new IntArray with each element being -1 if
|
109
|
+
* the data array's value is less, and 0 otherwise. */
|
110
|
+
static VALUE method_lt(VALUE self, VALUE obj)
|
111
|
+
{
|
112
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_lt));
|
98
113
|
}
|
99
114
|
|
100
115
|
/* Public: Subtract values contained in another FloatArray object from those
|
101
116
|
* contained in the current data array object, returning a new FloatArray. */
|
102
117
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
103
118
|
{
|
104
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
119
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_subtract));
|
105
120
|
}
|
106
121
|
|
107
122
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
108
123
|
static VALUE method_to_a(VALUE self)
|
109
124
|
{
|
110
|
-
unsigned long i;
|
125
|
+
unsigned long long int i;
|
111
126
|
vector_t *vector;
|
112
127
|
i4v_t *data;
|
113
128
|
VALUE rb_array = rb_ary_new();
|
@@ -122,64 +137,6 @@ static VALUE method_to_a(VALUE self)
|
|
122
137
|
return(rb_array);
|
123
138
|
}
|
124
139
|
|
125
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
126
|
-
* function pointer against both. */
|
127
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
128
|
-
{
|
129
|
-
unsigned long size, i;
|
130
|
-
int align;
|
131
|
-
vector_t *v1, *v2, *rv;
|
132
|
-
i4v_t *d1, *d2, *r;
|
133
|
-
VALUE result_obj = allocate(SIMD_IntArray);
|
134
|
-
|
135
|
-
Data_Get_Struct(self, vector_t, v1);
|
136
|
-
Data_Get_Struct(obj, vector_t, v2);
|
137
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
138
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(i4v_t));
|
139
|
-
|
140
|
-
align = internal_align_vectors(v1->len, v2->len, 4);
|
141
|
-
|
142
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
143
|
-
size = (v1->len + 3) / 4;
|
144
|
-
|
145
|
-
d1 = (i4v_t *)v1->data;
|
146
|
-
d2 = (i4v_t *)v2->data;
|
147
|
-
r = (i4v_t *)rv->data;
|
148
|
-
|
149
|
-
rv->len = v1->len;
|
150
|
-
|
151
|
-
switch(align)
|
152
|
-
{
|
153
|
-
case 0: /* Same size arrays */
|
154
|
-
for(i = 0; i < size; i++)
|
155
|
-
{
|
156
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
157
|
-
}
|
158
|
-
break;
|
159
|
-
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
160
|
-
for(i = 0; i < size; i++)
|
161
|
-
{
|
162
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
163
|
-
}
|
164
|
-
break;
|
165
|
-
default: /* Self is a multiple of operand's length long */
|
166
|
-
for(i = 0; i < size; i++)
|
167
|
-
{
|
168
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
169
|
-
}
|
170
|
-
}
|
171
|
-
|
172
|
-
if(rv->len != rv->len + (rv->len % 4))
|
173
|
-
{
|
174
|
-
for(i = 3; i > rv->len + (rv->len % 4); i--)
|
175
|
-
{
|
176
|
-
r[size].f[i] = 1;
|
177
|
-
}
|
178
|
-
}
|
179
|
-
|
180
|
-
return(result_obj);
|
181
|
-
}
|
182
|
-
|
183
140
|
/* Function: Multiply two vectors. */
|
184
141
|
static void func_multiply(void *v1, void *v2, void *r)
|
185
142
|
{
|
@@ -221,3 +178,15 @@ static void func_xor(void *v1, void *v2, void *r)
|
|
221
178
|
{
|
222
179
|
*(i4v *)r = *(i4v *)v1 ^ *(i4v *)v2;
|
223
180
|
}
|
181
|
+
|
182
|
+
/* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
|
183
|
+
static void func_gt(void *v1, void *v2, void *r)
|
184
|
+
{
|
185
|
+
*(i4v *)r = (*(i4v *)v1 > *(i4v *)v2);
|
186
|
+
}
|
187
|
+
|
188
|
+
/* Function: Compare vectors, return -1 if v1 is less than v2, 0 otherwise */
|
189
|
+
static void func_lt(void *v1, void *v2, void *r)
|
190
|
+
{
|
191
|
+
*(i4v *)r = (*(i4v *)v1 < *(i4v *)v2);
|
192
|
+
}
|
data/ext/simd/simd_intarray.h
CHANGED
@@ -1,18 +1,20 @@
|
|
1
1
|
#include "ruby.h"
|
2
2
|
#include "simd_common.h"
|
3
3
|
|
4
|
+
extern VALUE SIMD_IntArray;
|
5
|
+
|
4
6
|
static VALUE method_initialize(VALUE self, VALUE rb_array);
|
5
7
|
static VALUE method_multiply(VALUE self, VALUE obj);
|
6
8
|
static VALUE method_divide(VALUE self, VALUE obj);
|
7
9
|
static VALUE method_add(VALUE self, VALUE obj);
|
10
|
+
static VALUE method_subtract(VALUE self, VALUE obj);
|
8
11
|
static VALUE method_and(VALUE self, VALUE obj);
|
9
12
|
static VALUE method_or(VALUE self, VALUE obj);
|
10
13
|
static VALUE method_xor(VALUE self, VALUE obj);
|
11
|
-
static VALUE
|
14
|
+
static VALUE method_gt(VALUE self, VALUE obj);
|
15
|
+
static VALUE method_lt(VALUE self, VALUE obj);
|
12
16
|
static VALUE method_to_a(VALUE self);
|
13
17
|
|
14
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
15
|
-
|
16
18
|
static void func_multiply(void *v1, void *v2, void *r);
|
17
19
|
static void func_divide(void *v1, void *v2, void *r);
|
18
20
|
static void func_add(void *v1, void *v2, void *r);
|
@@ -20,3 +22,5 @@ static void func_subtract(void *v1, void *v2, void *r);
|
|
20
22
|
static void func_and(void *v1, void *v2, void *r);
|
21
23
|
static void func_or(void *v1, void *v2, void *r);
|
22
24
|
static void func_xor(void *v1, void *v2, void *r);
|
25
|
+
static void func_gt(void *v1, void *v2, void *r);
|
26
|
+
static void func_lt(void *v1, void *v2, void *r);
|
data/ext/simd/simd_longarray.c
CHANGED
@@ -15,6 +15,10 @@ void Init_SIMD_LongArray(VALUE parent)
|
|
15
15
|
rb_define_method(SIMD_LongArray, "&", method_and, 1);
|
16
16
|
rb_define_method(SIMD_LongArray, "|", method_or, 1);
|
17
17
|
rb_define_method(SIMD_LongArray, "^", method_xor, 1);
|
18
|
+
rb_define_method(SIMD_LongArray, "gt", method_gt, 1);
|
19
|
+
rb_define_method(SIMD_LongArray, "lt", method_lt, 1);
|
20
|
+
rb_define_method(SIMD_LongArray, ">", method_gt, 1);
|
21
|
+
rb_define_method(SIMD_LongArray, "<", method_lt, 1);
|
18
22
|
rb_define_method(SIMD_LongArray, "length", method_length, 0);
|
19
23
|
rb_define_method(SIMD_LongArray, "to_a", method_to_a, 0);
|
20
24
|
}
|
@@ -25,7 +29,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
25
29
|
{
|
26
30
|
vector_t *vector;
|
27
31
|
l2v_t *data;
|
28
|
-
unsigned long n,
|
32
|
+
unsigned long long int n,i;
|
29
33
|
|
30
34
|
Check_Type(rb_array, T_ARRAY);
|
31
35
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -37,7 +41,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
37
41
|
rb_raise(rb_eArgError, "Vectors must be at least 2 long");
|
38
42
|
}
|
39
43
|
|
40
|
-
vector->data = internal_allocate_vector_array(vector->len
|
44
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
41
45
|
|
42
46
|
data = (l2v_t *)vector->data;
|
43
47
|
for(i = 0; i < vector->len; i++)
|
@@ -45,12 +49,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
45
49
|
data[i/2].f[i%2] = NUM2LONG(rb_ary_entry(rb_array, i));
|
46
50
|
}
|
47
51
|
|
48
|
-
|
49
|
-
m = n + (n % 2);
|
50
|
-
for(i = n % 2; i > 0; i--)
|
51
|
-
{
|
52
|
-
data[m/2].f[i] = 1;
|
53
|
-
}
|
52
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(long long int));
|
54
53
|
|
55
54
|
return(self);
|
56
55
|
}
|
@@ -59,55 +58,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
59
58
|
* another FloatArray object, returning a new FloatArray. */
|
60
59
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
61
60
|
{
|
62
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
61
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_multiply));
|
63
62
|
}
|
64
63
|
|
65
64
|
/* Public: Divide values contained in the data array by those contained in
|
66
65
|
* another FloatArray object, returning a new FloatArray. */
|
67
66
|
static VALUE method_divide(VALUE self, VALUE obj)
|
68
67
|
{
|
69
|
-
return(internal_apply_operation(self, obj, func_divide));
|
68
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_divide));
|
70
69
|
}
|
71
70
|
|
72
71
|
/* Public: add values contained in the data array with those contained in
|
73
72
|
* another FloatArray object, returning a new FloatArray. */
|
74
73
|
static VALUE method_add(VALUE self, VALUE obj)
|
75
74
|
{
|
76
|
-
return(internal_apply_operation(self, obj, func_add));
|
75
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_add));
|
77
76
|
}
|
78
77
|
|
79
78
|
/* Public: and values contained in the data array with those contained in
|
80
79
|
* another FloatArray object, returning a new FloatArray. */
|
81
80
|
static VALUE method_and(VALUE self, VALUE obj)
|
82
81
|
{
|
83
|
-
return(internal_apply_operation(self, obj, func_and));
|
82
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_and));
|
84
83
|
}
|
85
84
|
|
86
85
|
/* Public: or values contained in the data array with those contained in
|
87
86
|
* another FloatArray object, returning a new FloatArray. */
|
88
87
|
static VALUE method_or(VALUE self, VALUE obj)
|
89
88
|
{
|
90
|
-
return(internal_apply_operation(self, obj, func_or));
|
89
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_or));
|
91
90
|
}
|
92
91
|
|
93
92
|
/* Public: xor values contained in the data array with those contained in
|
94
93
|
* another FloatArray object, returning a new FloatArray. */
|
95
94
|
static VALUE method_xor(VALUE self, VALUE obj)
|
96
95
|
{
|
97
|
-
return(internal_apply_operation(self, obj, func_xor));
|
96
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_xor));
|
97
|
+
}
|
98
|
+
|
99
|
+
/* Public: Compare values contained in the data array with those contained in
|
100
|
+
* another Longrray object, return a new LongArray with each element being -1
|
101
|
+
* if the data array's value is greater, and 0 otherwise. */
|
102
|
+
static VALUE method_gt(VALUE self, VALUE obj)
|
103
|
+
{
|
104
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_gt));
|
105
|
+
}
|
106
|
+
|
107
|
+
/* Public: Compare values contained in the data array with those contained in
|
108
|
+
* another LongArray object, return a new LongArray with each element being -1 if
|
109
|
+
* the data array's value is less, and 0 otherwise. */
|
110
|
+
static VALUE method_lt(VALUE self, VALUE obj)
|
111
|
+
{
|
112
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_lt));
|
98
113
|
}
|
99
114
|
|
100
115
|
/* Public: Subtract values contained in another FloatArray object from those
|
101
116
|
* contained in the current data array object, returning a new FloatArray. */
|
102
117
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
103
118
|
{
|
104
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
119
|
+
return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_subtract));
|
105
120
|
}
|
106
121
|
|
107
122
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
108
123
|
static VALUE method_to_a(VALUE self)
|
109
124
|
{
|
110
|
-
unsigned long i;
|
125
|
+
unsigned long long int i;
|
111
126
|
vector_t *vector;
|
112
127
|
l2v_t *data;
|
113
128
|
VALUE rb_array = rb_ary_new();
|
@@ -122,61 +137,6 @@ static VALUE method_to_a(VALUE self)
|
|
122
137
|
return(rb_array);
|
123
138
|
}
|
124
139
|
|
125
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
126
|
-
* function pointer against both. */
|
127
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
128
|
-
{
|
129
|
-
unsigned long size, i;
|
130
|
-
int align;
|
131
|
-
vector_t *v1, *v2, *rv;
|
132
|
-
l2v_t *d1, *d2, *r;
|
133
|
-
VALUE result_obj = allocate(SIMD_LongArray);
|
134
|
-
|
135
|
-
Data_Get_Struct(self, vector_t, v1);
|
136
|
-
Data_Get_Struct(obj, vector_t, v2);
|
137
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
138
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(l2v_t));
|
139
|
-
|
140
|
-
align = internal_align_vectors(v1->len, v2->len, 2);
|
141
|
-
|
142
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
143
|
-
size = (v1->len + 1) / 2;
|
144
|
-
|
145
|
-
d1 = (l2v_t *)v1->data;
|
146
|
-
d2 = (l2v_t *)v2->data;
|
147
|
-
r = (l2v_t *)rv->data;
|
148
|
-
|
149
|
-
rv->len = v1->len;
|
150
|
-
|
151
|
-
switch(align)
|
152
|
-
{
|
153
|
-
case 0: /* Same size arrays */
|
154
|
-
for(i = 0; i < size; i++)
|
155
|
-
{
|
156
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
157
|
-
}
|
158
|
-
break;
|
159
|
-
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
160
|
-
for(i = 0; i < size; i++)
|
161
|
-
{
|
162
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
163
|
-
}
|
164
|
-
break;
|
165
|
-
default: /* Self is a multiple of operand's length long */
|
166
|
-
for(i = 0; i < size; i++)
|
167
|
-
{
|
168
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
169
|
-
}
|
170
|
-
}
|
171
|
-
|
172
|
-
if(rv->len != rv->len + (rv->len % 2))
|
173
|
-
{
|
174
|
-
r[size].f[1] = 1;
|
175
|
-
}
|
176
|
-
|
177
|
-
return(result_obj);
|
178
|
-
}
|
179
|
-
|
180
140
|
/* Function: Multiply two vectors. */
|
181
141
|
static void func_multiply(void *v1, void *v2, void *r)
|
182
142
|
{
|
@@ -218,3 +178,15 @@ static void func_xor(void *v1, void *v2, void *r)
|
|
218
178
|
{
|
219
179
|
*(l2v *)r = *(l2v *)v1 ^ *(l2v *)v2;
|
220
180
|
}
|
181
|
+
|
182
|
+
/* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
|
183
|
+
static void func_gt(void *v1, void *v2, void *r)
|
184
|
+
{
|
185
|
+
*(l2v *)r = (*(l2v *)v1 > *(l2v *)v2);
|
186
|
+
}
|
187
|
+
|
188
|
+
/* Function: Compare vectors, return -1 if v1 is less than v2, 0 otherwise */
|
189
|
+
static void func_lt(void *v1, void *v2, void *r)
|
190
|
+
{
|
191
|
+
*(l2v *)r = (*(l2v *)v1 < *(l2v *)v2);
|
192
|
+
}
|
data/ext/simd/simd_longarray.h
CHANGED
@@ -1,18 +1,20 @@
|
|
1
1
|
#include "ruby.h"
|
2
2
|
#include "simd_common.h"
|
3
3
|
|
4
|
+
extern VALUE SIMD_LongArray;
|
5
|
+
|
4
6
|
static VALUE method_initialize(VALUE self, VALUE rb_array);
|
5
7
|
static VALUE method_multiply(VALUE self, VALUE obj);
|
6
8
|
static VALUE method_divide(VALUE self, VALUE obj);
|
7
9
|
static VALUE method_add(VALUE self, VALUE obj);
|
10
|
+
static VALUE method_subtract(VALUE self, VALUE obj);
|
8
11
|
static VALUE method_and(VALUE self, VALUE obj);
|
9
12
|
static VALUE method_or(VALUE self, VALUE obj);
|
10
13
|
static VALUE method_xor(VALUE self, VALUE obj);
|
11
|
-
static VALUE
|
14
|
+
static VALUE method_gt(VALUE self, VALUE obj);
|
15
|
+
static VALUE method_lt(VALUE self, VALUE obj);
|
12
16
|
static VALUE method_to_a(VALUE self);
|
13
17
|
|
14
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
15
|
-
|
16
18
|
static void func_multiply(void *v1, void *v2, void *r);
|
17
19
|
static void func_divide(void *v1, void *v2, void *r);
|
18
20
|
static void func_add(void *v1, void *v2, void *r);
|
@@ -20,3 +22,5 @@ static void func_subtract(void *v1, void *v2, void *r);
|
|
20
22
|
static void func_and(void *v1, void *v2, void *r);
|
21
23
|
static void func_or(void *v1, void *v2, void *r);
|
22
24
|
static void func_xor(void *v1, void *v2, void *r);
|
25
|
+
static void func_gt(void *v1, void *v2, void *r);
|
26
|
+
static void func_lt(void *v1, void *v2, void *r);
|
@@ -1,4 +1,5 @@
|
|
1
1
|
#include "simd_smallfloatarray.h"
|
2
|
+
#include "simd_intarray.h"
|
2
3
|
|
3
4
|
VALUE SIMD_SmallFloatArray = Qnil;
|
4
5
|
|
@@ -12,6 +13,13 @@ void Init_SIMD_SmallFloatArray(VALUE parent)
|
|
12
13
|
rb_define_method(SIMD_SmallFloatArray, "/", method_divide, 1);
|
13
14
|
rb_define_method(SIMD_SmallFloatArray, "+", method_add, 1);
|
14
15
|
rb_define_method(SIMD_SmallFloatArray, "-", method_subtract, 1);
|
16
|
+
rb_define_method(SIMD_SmallFloatArray, "&", method_and, 1);
|
17
|
+
rb_define_method(SIMD_SmallFloatArray, "|", method_or, 1);
|
18
|
+
rb_define_method(SIMD_SmallFloatArray, "^", method_xor, 1);
|
19
|
+
rb_define_method(SIMD_SmallFloatArray, "gt", method_gt, 1);
|
20
|
+
rb_define_method(SIMD_SmallFloatArray, "lt", method_lt, 1);
|
21
|
+
rb_define_method(SIMD_SmallFloatArray, ">", method_gt, 1);
|
22
|
+
rb_define_method(SIMD_SmallFloatArray, "<", method_lt, 1);
|
15
23
|
rb_define_method(SIMD_SmallFloatArray, "length", method_length, 0);
|
16
24
|
rb_define_method(SIMD_SmallFloatArray, "to_a", method_to_a, 0);
|
17
25
|
}
|
@@ -22,7 +30,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
22
30
|
{
|
23
31
|
vector_t *vector;
|
24
32
|
f4v_t *data;
|
25
|
-
unsigned long n,
|
33
|
+
unsigned long long int n,i;
|
26
34
|
|
27
35
|
Check_Type(rb_array, T_ARRAY);
|
28
36
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -34,7 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
34
42
|
rb_raise(rb_eArgError, "Vectors must be at least 4 long");
|
35
43
|
}
|
36
44
|
|
37
|
-
vector->data = internal_allocate_vector_array(vector->len
|
45
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
38
46
|
|
39
47
|
data = (f4v_t *)vector->data;
|
40
48
|
for(i = 0; i < vector->len; i++)
|
@@ -42,12 +50,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
42
50
|
data[i/4].f[i%4] = (float)NUM2DBL(rb_ary_entry(rb_array, i));
|
43
51
|
}
|
44
52
|
|
45
|
-
|
46
|
-
m = n + (n % 4);
|
47
|
-
for(i = n % 4; i > 0; i--)
|
48
|
-
{
|
49
|
-
data[m/4].f[i] = 1.0;
|
50
|
-
}
|
53
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(float));
|
51
54
|
|
52
55
|
return(self);
|
53
56
|
}
|
@@ -56,34 +59,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
56
59
|
* another FloatArray object, returning a new FloatArray. */
|
57
60
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
58
61
|
{
|
59
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
62
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_multiply));
|
60
63
|
}
|
61
64
|
|
62
65
|
/* Public: Divide values contained in the data array by those contained in
|
63
66
|
* another FloatArray object, returning a new FloatArray. */
|
64
67
|
static VALUE method_divide(VALUE self, VALUE obj)
|
65
68
|
{
|
66
|
-
return(internal_apply_operation(self, obj, func_divide));
|
69
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_divide));
|
67
70
|
}
|
68
71
|
|
69
72
|
/* Public: add values contained in the data array with those contained in
|
70
73
|
* another FloatArray object, returning a new FloatArray. */
|
71
74
|
static VALUE method_add(VALUE self, VALUE obj)
|
72
75
|
{
|
73
|
-
return(internal_apply_operation(self, obj, func_add));
|
76
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_add));
|
74
77
|
}
|
75
78
|
|
76
79
|
/* Public: Subtract values contained in another FloatArray object from those
|
77
80
|
* contained in the current data array object, returning a new FloatArray. */
|
78
81
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
79
82
|
{
|
80
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
83
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_subtract));
|
84
|
+
}
|
85
|
+
|
86
|
+
/* Public: and values contained in the data array with those contained in
|
87
|
+
* another FloatArray object, returning a new FloatArray. */
|
88
|
+
static VALUE method_and(VALUE self, VALUE obj)
|
89
|
+
{
|
90
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_and));
|
91
|
+
}
|
92
|
+
|
93
|
+
/* Public: or values contained in the data array with those contained in
|
94
|
+
* another FloatArray object, returning a new FloatArray. */
|
95
|
+
static VALUE method_or(VALUE self, VALUE obj)
|
96
|
+
{
|
97
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_or));
|
98
|
+
}
|
99
|
+
|
100
|
+
/* Public: xor values contained in the data array with those contained in
|
101
|
+
* another FloatArray object, returning a new FloatArray. */
|
102
|
+
static VALUE method_xor(VALUE self, VALUE obj)
|
103
|
+
{
|
104
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_xor));
|
105
|
+
}
|
106
|
+
|
107
|
+
/* Public: Compare values contained in the data array with those contained in
|
108
|
+
* another SmallFloatArray object, return a new IntArray with each element being
|
109
|
+
* -1 if the data array's value is greater, and 0 otherwise. */
|
110
|
+
static VALUE method_gt(VALUE self, VALUE obj)
|
111
|
+
{
|
112
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_IntArray, func_gt));
|
113
|
+
}
|
114
|
+
|
115
|
+
/* Public: Compare values contained in the data array with those contained in
|
116
|
+
* another SmallFloatArray object, return a new IntArray with each element being
|
117
|
+
* -1 if the data array's value is less, and 0 otherwise. */
|
118
|
+
static VALUE method_lt(VALUE self, VALUE obj)
|
119
|
+
{
|
120
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_IntArray, func_lt));
|
81
121
|
}
|
82
122
|
|
83
123
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
84
124
|
static VALUE method_to_a(VALUE self)
|
85
125
|
{
|
86
|
-
unsigned long i;
|
126
|
+
unsigned long long int i;
|
87
127
|
vector_t *vector;
|
88
128
|
f4v_t *data;
|
89
129
|
VALUE rb_array = rb_ary_new();
|
@@ -98,64 +138,6 @@ static VALUE method_to_a(VALUE self)
|
|
98
138
|
return(rb_array);
|
99
139
|
}
|
100
140
|
|
101
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
102
|
-
* function pointer against both. */
|
103
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
104
|
-
{
|
105
|
-
unsigned long size, i;
|
106
|
-
int align;
|
107
|
-
vector_t *v1, *v2, *rv;
|
108
|
-
f4v_t *d1, *d2, *r;
|
109
|
-
VALUE result_obj = allocate(SIMD_SmallFloatArray);
|
110
|
-
|
111
|
-
Data_Get_Struct(self, vector_t, v1);
|
112
|
-
Data_Get_Struct(obj, vector_t, v2);
|
113
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
114
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(f4v_t));
|
115
|
-
|
116
|
-
align = internal_align_vectors(v1->len, v2->len, 4);
|
117
|
-
|
118
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
119
|
-
size = (v1->len + 3) / 4;
|
120
|
-
|
121
|
-
d1 = (f4v_t *)v1->data;
|
122
|
-
d2 = (f4v_t *)v2->data;
|
123
|
-
r = (f4v_t *)rv->data;
|
124
|
-
|
125
|
-
rv->len = v1->len;
|
126
|
-
|
127
|
-
switch(align)
|
128
|
-
{
|
129
|
-
case 0: /* Same size arrays */
|
130
|
-
for(i = 0; i < size; i++)
|
131
|
-
{
|
132
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
133
|
-
}
|
134
|
-
break;
|
135
|
-
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
136
|
-
for(i = 0; i < size; i++)
|
137
|
-
{
|
138
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
139
|
-
}
|
140
|
-
break;
|
141
|
-
default: /* Self is a multiple of operand's length long */
|
142
|
-
for(i = 0; i < size; i++)
|
143
|
-
{
|
144
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
145
|
-
}
|
146
|
-
}
|
147
|
-
|
148
|
-
if(rv->len != rv->len + (rv->len % 4))
|
149
|
-
{
|
150
|
-
for(i = 3; i > rv->len + (rv->len % 4); i--)
|
151
|
-
{
|
152
|
-
r[size].f[i] = 1;
|
153
|
-
}
|
154
|
-
}
|
155
|
-
|
156
|
-
return(result_obj);
|
157
|
-
}
|
158
|
-
|
159
141
|
/* Function: Multiply two vectors. */
|
160
142
|
static void func_multiply(void *v1, void *v2, void *r)
|
161
143
|
{
|
@@ -179,3 +161,33 @@ static void func_subtract(void *v1, void *v2, void *r)
|
|
179
161
|
{
|
180
162
|
*(f4v *)r = *(f4v *)v1 - *(f4v *)v2;
|
181
163
|
}
|
164
|
+
|
165
|
+
/* Function: Perform a binary AND on two vectors. */
|
166
|
+
static void func_and(void *v1, void *v2, void *r)
|
167
|
+
{
|
168
|
+
*(i4v *)r = *(i4v *)v1 & *(i4v *)v2;
|
169
|
+
}
|
170
|
+
|
171
|
+
/* Function: Perform a binary OR on two vectors. */
|
172
|
+
static void func_or(void *v1, void *v2, void *r)
|
173
|
+
{
|
174
|
+
*(i4v *)r = *(i4v *)v1 | *(i4v *)v2;
|
175
|
+
}
|
176
|
+
|
177
|
+
/* Function: Perform a binary XOR on two vectors. */
|
178
|
+
static void func_xor(void *v1, void *v2, void *r)
|
179
|
+
{
|
180
|
+
*(i4v *)r = *(i4v *)v1 ^ *(i4v *)v2;
|
181
|
+
}
|
182
|
+
|
183
|
+
/* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
|
184
|
+
static void func_gt(void *v1, void *v2, void *r)
|
185
|
+
{
|
186
|
+
*(i4v *)r = (*(f4v *)v1 > *(f4v *)v2);
|
187
|
+
}
|
188
|
+
|
189
|
+
/* Function: Compare vectors, return -1 if v1 is less than v2, 0 otherwise */
|
190
|
+
static void func_lt(void *v1, void *v2, void *r)
|
191
|
+
{
|
192
|
+
*(i4v *)r = (*(f4v *)v1 < *(f4v *)v2);
|
193
|
+
}
|
@@ -6,11 +6,19 @@ static VALUE method_multiply(VALUE self, VALUE obj);
|
|
6
6
|
static VALUE method_divide(VALUE self, VALUE obj);
|
7
7
|
static VALUE method_add(VALUE self, VALUE obj);
|
8
8
|
static VALUE method_subtract(VALUE self, VALUE obj);
|
9
|
+
static VALUE method_and(VALUE self, VALUE obj);
|
10
|
+
static VALUE method_or(VALUE self, VALUE obj);
|
11
|
+
static VALUE method_xor(VALUE self, VALUE obj);
|
12
|
+
static VALUE method_gt(VALUE self, VALUE obj);
|
13
|
+
static VALUE method_lt(VALUE self, VALUE obj);
|
9
14
|
static VALUE method_to_a(VALUE self);
|
10
15
|
|
11
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
12
|
-
|
13
16
|
static void func_multiply(void *v1, void *v2, void *r);
|
14
17
|
static void func_divide(void *v1, void *v2, void *r);
|
15
18
|
static void func_add(void *v1, void *v2, void *r);
|
16
19
|
static void func_subtract(void *v1, void *v2, void *r);
|
20
|
+
static void func_and(void *v1, void *v2, void *r);
|
21
|
+
static void func_or(void *v1, void *v2, void *r);
|
22
|
+
static void func_xor(void *v1, void *v2, void *r);
|
23
|
+
static void func_gt(void *v1, void *v2, void *r);
|
24
|
+
static void func_lt(void *v1, void *v2, void *r);
|
data/ext/simd/simd_types.h
CHANGED
@@ -40,17 +40,17 @@ typedef union i4v_t
|
|
40
40
|
/*
|
41
41
|
* Types for LongArray
|
42
42
|
*/
|
43
|
-
typedef long int __attribute__ ((vector_size (16))) l2v;
|
43
|
+
typedef long long int __attribute__ ((vector_size (16))) l2v;
|
44
44
|
typedef union l2v_t
|
45
45
|
{
|
46
46
|
l2v v;
|
47
|
-
long int f[2];
|
47
|
+
long long int f[2];
|
48
48
|
} l2v_t;
|
49
49
|
|
50
50
|
typedef struct vector_t
|
51
51
|
{
|
52
52
|
void *data;
|
53
|
-
unsigned long len;
|
53
|
+
unsigned long long int len;
|
54
54
|
} vector_t;
|
55
55
|
|
56
56
|
typedef void (b_operation)(void *v1, void *v2, void *r);
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tina Wuest
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-08-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1.2'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '1.2'
|
27
27
|
description: Access to SIMD (Single Instruction Multiple Data) instructions in Ruby
|
28
28
|
email: tina@wuest.me
|
29
29
|
executables: []
|
@@ -48,7 +48,7 @@ files:
|
|
48
48
|
homepage: https://gitlab.com/wuest/simd-ruby
|
49
49
|
licenses: []
|
50
50
|
metadata: {}
|
51
|
-
post_install_message:
|
51
|
+
post_install_message:
|
52
52
|
rdoc_options: []
|
53
53
|
require_paths:
|
54
54
|
- lib
|
@@ -63,9 +63,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
63
|
- !ruby/object:Gem::Version
|
64
64
|
version: '0'
|
65
65
|
requirements: []
|
66
|
-
|
67
|
-
|
68
|
-
signing_key:
|
66
|
+
rubygems_version: 3.3.7
|
67
|
+
signing_key:
|
69
68
|
specification_version: 4
|
70
69
|
summary: SIMD instructions in ruby
|
71
70
|
test_files: []
|