simd 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/simd/simd_common.c +81 -3
- data/ext/simd/simd_common.h +3 -1
- data/ext/simd/simd_floatarray.c +7 -67
- data/ext/simd/simd_floatarray.h +0 -2
- data/ext/simd/simd_intarray.c +10 -73
- data/ext/simd/simd_intarray.h +0 -2
- data/ext/simd/simd_longarray.c +10 -70
- data/ext/simd/simd_longarray.h +0 -2
- data/ext/simd/simd_smallfloatarray.c +7 -70
- data/ext/simd/simd_smallfloatarray.h +0 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91f6ffe0659b15461fbf3bfc7657bd72b8e0d6eb
|
4
|
+
data.tar.gz: 289421206f7343fed00e5fbcf214d0908e835882
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c2353151fa088f7ebcb535d095211e83517fd2436e2acee2b5d42e43f776d8df4931007e2a50c0f11026bed64c99be20d25f6e3598e52f10b30301e532420ea
|
7
|
+
data.tar.gz: 1e0ef0af7c81a176fc908163f6c7d68a09ae651a9d852fecfdf0185a3aed1d1732d4636670576964537f27c687f1c9765d214b75f8d15bcab8e89e1124bcf224
|
data/ext/simd/simd_common.c
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#include "simd_common.h"
|
2
2
|
|
3
|
+
#define XMM_BYTES 16 /* Width of the xmm1,2... registers */
|
4
|
+
|
3
5
|
/* Internal: Allocate memory for the vector container. */
|
4
6
|
VALUE allocate(VALUE klass)
|
5
7
|
{
|
@@ -36,10 +38,9 @@ VALUE method_length(VALUE self)
|
|
36
38
|
}
|
37
39
|
|
38
40
|
/* Internal: Allocate memory for the data array. */
|
39
|
-
void *internal_allocate_vector_array(unsigned long count
|
41
|
+
void *internal_allocate_vector_array(unsigned long count)
|
40
42
|
{
|
41
|
-
|
42
|
-
void *vector = malloc(((count + (count % modulo)) / modulo + 1) * size);
|
43
|
+
void *vector = malloc((count + 1) * XMM_BYTES);
|
43
44
|
if(vector == NULL)
|
44
45
|
{
|
45
46
|
rb_raise(rb_eNoMemError, "Unable to allocate memory");
|
@@ -74,3 +75,80 @@ int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modu
|
|
74
75
|
/* Never reached */
|
75
76
|
return(-1);
|
76
77
|
}
|
78
|
+
|
79
|
+
/* Internal: Given another object, perform an action specified via a function
|
80
|
+
* pointer against both.
|
81
|
+
*
|
82
|
+
* Since arithmetic is purposefully performed against a void pointers, disable
|
83
|
+
* warnings regarding this for the current function. */
|
84
|
+
#pragma GCC diagnostic push
|
85
|
+
#pragma GCC diagnostic ignored "-Wpointer-arith"
|
86
|
+
VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func)
|
87
|
+
{
|
88
|
+
unsigned long length, i, j;
|
89
|
+
int align;
|
90
|
+
vector_t *v1, *v2, *rv;
|
91
|
+
void *data;
|
92
|
+
VALUE result_obj = allocate(klass);
|
93
|
+
|
94
|
+
Data_Get_Struct(self, vector_t, v1);
|
95
|
+
Data_Get_Struct(obj, vector_t, v2);
|
96
|
+
Data_Get_Struct(result_obj, vector_t, rv);
|
97
|
+
rv->data = internal_allocate_vector_array(v1->len);
|
98
|
+
|
99
|
+
align = internal_align_vectors(v1->len, v2->len, (XMM_BYTES / size));
|
100
|
+
|
101
|
+
length = ((v1->len + (XMM_BYTES / size - 1)) / (XMM_BYTES / size));
|
102
|
+
rv->len = v1->len;
|
103
|
+
|
104
|
+
switch(align)
|
105
|
+
{
|
106
|
+
case 0: /* Same size arrays */
|
107
|
+
for(i = 0; i < length; i++)
|
108
|
+
{
|
109
|
+
func((v1->data + XMM_BYTES * i), (v2->data + XMM_BYTES * i), (rv->data + XMM_BYTES * i));
|
110
|
+
}
|
111
|
+
break;
|
112
|
+
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
113
|
+
for(i = 0; i < length; i++)
|
114
|
+
{
|
115
|
+
func((v1->data + XMM_BYTES * i), v2->data, (rv->data + XMM_BYTES * i));
|
116
|
+
}
|
117
|
+
break;
|
118
|
+
default: /* Self is a multiple of operand's length long */
|
119
|
+
for(j = 0; j < v2->len; j++)
|
120
|
+
{
|
121
|
+
data = v2->data + XMM_BYTES * j;
|
122
|
+
for(i = j; i < length; i+=v2->len)
|
123
|
+
{
|
124
|
+
func((v1->data + XMM_BYTES * i), data, (rv->data + XMM_BYTES * i));
|
125
|
+
}
|
126
|
+
}
|
127
|
+
}
|
128
|
+
internal_sanitize_unaligned_final_vector(rv, size);
|
129
|
+
|
130
|
+
return(result_obj);
|
131
|
+
}
|
132
|
+
#pragma GCC diagnostic pop
|
133
|
+
|
134
|
+
/* Internal: Make sure that no null bytes exist beyond the boundary of
|
135
|
+
* unaligned vectors. This function should be called after any operation that
|
136
|
+
* results in the mutation or creation of a vector array.
|
137
|
+
*
|
138
|
+
* Since arithmetic is purposefully performed against a void pointers, disable
|
139
|
+
* warnings regarding this for the current function. */
|
140
|
+
#pragma GCC diagnostic push
|
141
|
+
#pragma GCC diagnostic ignored "-Wpointer-arith"
|
142
|
+
void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size)
|
143
|
+
{
|
144
|
+
unsigned long i;
|
145
|
+
|
146
|
+
if((rv->len * size) % XMM_BYTES)
|
147
|
+
{
|
148
|
+
for(i = 1; i <= XMM_BYTES - ((rv->len * size) % XMM_BYTES); i+=size)
|
149
|
+
{
|
150
|
+
*(unsigned char *)(rv->data + (rv->len * size + i)) = 1;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
}
|
154
|
+
#pragma GCC diagnostic pop
|
data/ext/simd/simd_common.h
CHANGED
@@ -8,5 +8,7 @@ void deallocate(vector_t *vector);
|
|
8
8
|
|
9
9
|
VALUE method_length(VALUE self);
|
10
10
|
|
11
|
-
void *internal_allocate_vector_array(unsigned long count
|
11
|
+
void *internal_allocate_vector_array(unsigned long count);
|
12
12
|
int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo);
|
13
|
+
VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func);
|
14
|
+
void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size);
|
data/ext/simd/simd_floatarray.c
CHANGED
@@ -22,7 +22,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
22
22
|
{
|
23
23
|
vector_t *vector;
|
24
24
|
d2v_t *data;
|
25
|
-
unsigned long n,
|
25
|
+
unsigned long n,i;
|
26
26
|
|
27
27
|
Check_Type(rb_array, T_ARRAY);
|
28
28
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -34,7 +34,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
34
34
|
rb_raise(rb_eArgError, "Vectors must be at least 4 long");
|
35
35
|
}
|
36
36
|
|
37
|
-
vector->data = internal_allocate_vector_array(vector->len
|
37
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
38
38
|
|
39
39
|
data = (d2v_t *)vector->data;
|
40
40
|
for(i = 0; i < vector->len; i++)
|
@@ -42,12 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
42
42
|
data[i/2].f[i%2] = NUM2DBL(rb_ary_entry(rb_array, i));
|
43
43
|
}
|
44
44
|
|
45
|
-
|
46
|
-
m = n + (n % 2);
|
47
|
-
for(i = n % 4; i > 0; i--)
|
48
|
-
{
|
49
|
-
data[m/2].f[1] = 1.0;
|
50
|
-
}
|
45
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(double));
|
51
46
|
|
52
47
|
return(self);
|
53
48
|
}
|
@@ -56,28 +51,28 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
56
51
|
* another FloatArray object, returning a new FloatArray. */
|
57
52
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
58
53
|
{
|
59
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
54
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_multiply));
|
60
55
|
}
|
61
56
|
|
62
57
|
/* Public: Divide values contained in the data array by those contained in
|
63
58
|
* another FloatArray object, returning a new FloatArray. */
|
64
59
|
static VALUE method_divide(VALUE self, VALUE obj)
|
65
60
|
{
|
66
|
-
return(internal_apply_operation(self, obj, func_divide));
|
61
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_divide));
|
67
62
|
}
|
68
63
|
|
69
64
|
/* Public: add values contained in the data array with those contained in
|
70
65
|
* another FloatArray object, returning a new FloatArray. */
|
71
66
|
static VALUE method_add(VALUE self, VALUE obj)
|
72
67
|
{
|
73
|
-
return(internal_apply_operation(self, obj, func_add));
|
68
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_add));
|
74
69
|
}
|
75
70
|
|
76
71
|
/* Public: Subtract values contained in another FloatArray object from those
|
77
72
|
* contained in the current data array object, returning a new FloatArray. */
|
78
73
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
79
74
|
{
|
80
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
75
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_subtract));
|
81
76
|
}
|
82
77
|
|
83
78
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
@@ -98,61 +93,6 @@ static VALUE method_to_a(VALUE self)
|
|
98
93
|
return(rb_array);
|
99
94
|
}
|
100
95
|
|
101
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
102
|
-
* function pointer against both. */
|
103
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
104
|
-
{
|
105
|
-
unsigned long size, i;
|
106
|
-
int align;
|
107
|
-
vector_t *v1, *v2, *rv;
|
108
|
-
d2v_t *d1, *d2, *r;
|
109
|
-
VALUE result_obj = allocate(SIMD_FloatArray);
|
110
|
-
|
111
|
-
Data_Get_Struct(self, vector_t, v1);
|
112
|
-
Data_Get_Struct(obj, vector_t, v2);
|
113
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
114
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(d2v_t));
|
115
|
-
|
116
|
-
align = internal_align_vectors(v1->len, v2->len, 2);
|
117
|
-
|
118
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
119
|
-
size = (v1->len + 1) / 2;
|
120
|
-
|
121
|
-
d1 = (d2v_t *)v1->data;
|
122
|
-
d2 = (d2v_t *)v2->data;
|
123
|
-
r = (d2v_t *)rv->data;
|
124
|
-
|
125
|
-
rv->len = v1->len;
|
126
|
-
|
127
|
-
switch(align)
|
128
|
-
{
|
129
|
-
case 0: /* Same size arrays */
|
130
|
-
for(i = 0; i < size; i++)
|
131
|
-
{
|
132
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
133
|
-
}
|
134
|
-
break;
|
135
|
-
case 1: /* Operand is exactly 2 long (size of 1 sse register) */
|
136
|
-
for(i = 0; i < size; i++)
|
137
|
-
{
|
138
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
139
|
-
}
|
140
|
-
break;
|
141
|
-
default: /* Self is a multiple of operand's length long */
|
142
|
-
for(i = 0; i < size; i++)
|
143
|
-
{
|
144
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
145
|
-
}
|
146
|
-
}
|
147
|
-
|
148
|
-
if(rv->len != rv->len + (rv->len % 2))
|
149
|
-
{
|
150
|
-
r[size].f[1] = 1;
|
151
|
-
}
|
152
|
-
|
153
|
-
return(result_obj);
|
154
|
-
}
|
155
|
-
|
156
96
|
/* Function: Multiply two vectors. */
|
157
97
|
static void func_multiply(void *v1, void *v2, void *r)
|
158
98
|
{
|
data/ext/simd/simd_floatarray.h
CHANGED
@@ -8,8 +8,6 @@ static VALUE method_add(VALUE self, VALUE obj);
|
|
8
8
|
static VALUE method_subtract(VALUE self, VALUE obj);
|
9
9
|
static VALUE method_to_a(VALUE self);
|
10
10
|
|
11
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
12
|
-
|
13
11
|
static void func_multiply(void *v1, void *v2, void *r);
|
14
12
|
static void func_divide(void *v1, void *v2, void *r);
|
15
13
|
static void func_add(void *v1, void *v2, void *r);
|
data/ext/simd/simd_intarray.c
CHANGED
@@ -25,7 +25,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
25
25
|
{
|
26
26
|
vector_t *vector;
|
27
27
|
i4v_t *data;
|
28
|
-
unsigned long n,
|
28
|
+
unsigned long n,i;
|
29
29
|
|
30
30
|
Check_Type(rb_array, T_ARRAY);
|
31
31
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -37,7 +37,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
37
37
|
rb_raise(rb_eArgError, "Vectors must be at least 4 long");
|
38
38
|
}
|
39
39
|
|
40
|
-
vector->data = internal_allocate_vector_array(vector->len
|
40
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
41
41
|
|
42
42
|
data = (i4v_t *)vector->data;
|
43
43
|
for(i = 0; i < vector->len; i++)
|
@@ -45,12 +45,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
45
45
|
data[i/4].f[i%4] = NUM2INT(rb_ary_entry(rb_array, i));
|
46
46
|
}
|
47
47
|
|
48
|
-
|
49
|
-
m = n + (n % 4);
|
50
|
-
for(i = n % 4; i > 0; i--)
|
51
|
-
{
|
52
|
-
data[m/4].f[i] = 1.0;
|
53
|
-
}
|
48
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(int));
|
54
49
|
|
55
50
|
return(self);
|
56
51
|
}
|
@@ -59,49 +54,49 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
59
54
|
* another FloatArray object, returning a new FloatArray. */
|
60
55
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
61
56
|
{
|
62
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
57
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_multiply));
|
63
58
|
}
|
64
59
|
|
65
60
|
/* Public: Divide values contained in the data array by those contained in
|
66
61
|
* another FloatArray object, returning a new FloatArray. */
|
67
62
|
static VALUE method_divide(VALUE self, VALUE obj)
|
68
63
|
{
|
69
|
-
return(internal_apply_operation(self, obj, func_divide));
|
64
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_divide));
|
70
65
|
}
|
71
66
|
|
72
67
|
/* Public: add values contained in the data array with those contained in
|
73
68
|
* another FloatArray object, returning a new FloatArray. */
|
74
69
|
static VALUE method_add(VALUE self, VALUE obj)
|
75
70
|
{
|
76
|
-
return(internal_apply_operation(self, obj, func_add));
|
71
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_add));
|
77
72
|
}
|
78
73
|
|
79
74
|
/* Public: and values contained in the data array with those contained in
|
80
75
|
* another FloatArray object, returning a new FloatArray. */
|
81
76
|
static VALUE method_and(VALUE self, VALUE obj)
|
82
77
|
{
|
83
|
-
return(internal_apply_operation(self, obj, func_and));
|
78
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_and));
|
84
79
|
}
|
85
80
|
|
86
81
|
/* Public: or values contained in the data array with those contained in
|
87
82
|
* another FloatArray object, returning a new FloatArray. */
|
88
83
|
static VALUE method_or(VALUE self, VALUE obj)
|
89
84
|
{
|
90
|
-
return(internal_apply_operation(self, obj, func_or));
|
85
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_or));
|
91
86
|
}
|
92
87
|
|
93
88
|
/* Public: xor values contained in the data array with those contained in
|
94
89
|
* another FloatArray object, returning a new FloatArray. */
|
95
90
|
static VALUE method_xor(VALUE self, VALUE obj)
|
96
91
|
{
|
97
|
-
return(internal_apply_operation(self, obj, func_xor));
|
92
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_xor));
|
98
93
|
}
|
99
94
|
|
100
95
|
/* Public: Subtract values contained in another FloatArray object from those
|
101
96
|
* contained in the current data array object, returning a new FloatArray. */
|
102
97
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
103
98
|
{
|
104
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
99
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_subtract));
|
105
100
|
}
|
106
101
|
|
107
102
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
@@ -122,64 +117,6 @@ static VALUE method_to_a(VALUE self)
|
|
122
117
|
return(rb_array);
|
123
118
|
}
|
124
119
|
|
125
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
126
|
-
* function pointer against both. */
|
127
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
128
|
-
{
|
129
|
-
unsigned long size, i;
|
130
|
-
int align;
|
131
|
-
vector_t *v1, *v2, *rv;
|
132
|
-
i4v_t *d1, *d2, *r;
|
133
|
-
VALUE result_obj = allocate(SIMD_IntArray);
|
134
|
-
|
135
|
-
Data_Get_Struct(self, vector_t, v1);
|
136
|
-
Data_Get_Struct(obj, vector_t, v2);
|
137
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
138
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(i4v_t));
|
139
|
-
|
140
|
-
align = internal_align_vectors(v1->len, v2->len, 4);
|
141
|
-
|
142
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
143
|
-
size = (v1->len + 3) / 4;
|
144
|
-
|
145
|
-
d1 = (i4v_t *)v1->data;
|
146
|
-
d2 = (i4v_t *)v2->data;
|
147
|
-
r = (i4v_t *)rv->data;
|
148
|
-
|
149
|
-
rv->len = v1->len;
|
150
|
-
|
151
|
-
switch(align)
|
152
|
-
{
|
153
|
-
case 0: /* Same size arrays */
|
154
|
-
for(i = 0; i < size; i++)
|
155
|
-
{
|
156
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
157
|
-
}
|
158
|
-
break;
|
159
|
-
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
160
|
-
for(i = 0; i < size; i++)
|
161
|
-
{
|
162
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
163
|
-
}
|
164
|
-
break;
|
165
|
-
default: /* Self is a multiple of operand's length long */
|
166
|
-
for(i = 0; i < size; i++)
|
167
|
-
{
|
168
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
169
|
-
}
|
170
|
-
}
|
171
|
-
|
172
|
-
if(rv->len != rv->len + (rv->len % 4))
|
173
|
-
{
|
174
|
-
for(i = 3; i > rv->len + (rv->len % 4); i--)
|
175
|
-
{
|
176
|
-
r[size].f[i] = 1;
|
177
|
-
}
|
178
|
-
}
|
179
|
-
|
180
|
-
return(result_obj);
|
181
|
-
}
|
182
|
-
|
183
120
|
/* Function: Multiply two vectors. */
|
184
121
|
static void func_multiply(void *v1, void *v2, void *r)
|
185
122
|
{
|
data/ext/simd/simd_intarray.h
CHANGED
@@ -11,8 +11,6 @@ static VALUE method_xor(VALUE self, VALUE obj);
|
|
11
11
|
static VALUE method_subtract(VALUE self, VALUE obj);
|
12
12
|
static VALUE method_to_a(VALUE self);
|
13
13
|
|
14
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
15
|
-
|
16
14
|
static void func_multiply(void *v1, void *v2, void *r);
|
17
15
|
static void func_divide(void *v1, void *v2, void *r);
|
18
16
|
static void func_add(void *v1, void *v2, void *r);
|
data/ext/simd/simd_longarray.c
CHANGED
@@ -25,7 +25,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
25
25
|
{
|
26
26
|
vector_t *vector;
|
27
27
|
l2v_t *data;
|
28
|
-
unsigned long n,
|
28
|
+
unsigned long n,i;
|
29
29
|
|
30
30
|
Check_Type(rb_array, T_ARRAY);
|
31
31
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -37,7 +37,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
37
37
|
rb_raise(rb_eArgError, "Vectors must be at least 2 long");
|
38
38
|
}
|
39
39
|
|
40
|
-
vector->data = internal_allocate_vector_array(vector->len
|
40
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
41
41
|
|
42
42
|
data = (l2v_t *)vector->data;
|
43
43
|
for(i = 0; i < vector->len; i++)
|
@@ -45,12 +45,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
45
45
|
data[i/2].f[i%2] = NUM2LONG(rb_ary_entry(rb_array, i));
|
46
46
|
}
|
47
47
|
|
48
|
-
|
49
|
-
m = n + (n % 2);
|
50
|
-
for(i = n % 2; i > 0; i--)
|
51
|
-
{
|
52
|
-
data[m/2].f[i] = 1;
|
53
|
-
}
|
48
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(long));
|
54
49
|
|
55
50
|
return(self);
|
56
51
|
}
|
@@ -59,49 +54,49 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
59
54
|
* another FloatArray object, returning a new FloatArray. */
|
60
55
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
61
56
|
{
|
62
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
57
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_multiply));
|
63
58
|
}
|
64
59
|
|
65
60
|
/* Public: Divide values contained in the data array by those contained in
|
66
61
|
* another FloatArray object, returning a new FloatArray. */
|
67
62
|
static VALUE method_divide(VALUE self, VALUE obj)
|
68
63
|
{
|
69
|
-
return(internal_apply_operation(self, obj, func_divide));
|
64
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_divide));
|
70
65
|
}
|
71
66
|
|
72
67
|
/* Public: add values contained in the data array with those contained in
|
73
68
|
* another FloatArray object, returning a new FloatArray. */
|
74
69
|
static VALUE method_add(VALUE self, VALUE obj)
|
75
70
|
{
|
76
|
-
return(internal_apply_operation(self, obj, func_add));
|
71
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_add));
|
77
72
|
}
|
78
73
|
|
79
74
|
/* Public: and values contained in the data array with those contained in
|
80
75
|
* another FloatArray object, returning a new FloatArray. */
|
81
76
|
static VALUE method_and(VALUE self, VALUE obj)
|
82
77
|
{
|
83
|
-
return(internal_apply_operation(self, obj, func_and));
|
78
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_and));
|
84
79
|
}
|
85
80
|
|
86
81
|
/* Public: or values contained in the data array with those contained in
|
87
82
|
* another FloatArray object, returning a new FloatArray. */
|
88
83
|
static VALUE method_or(VALUE self, VALUE obj)
|
89
84
|
{
|
90
|
-
return(internal_apply_operation(self, obj, func_or));
|
85
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_or));
|
91
86
|
}
|
92
87
|
|
93
88
|
/* Public: xor values contained in the data array with those contained in
|
94
89
|
* another FloatArray object, returning a new FloatArray. */
|
95
90
|
static VALUE method_xor(VALUE self, VALUE obj)
|
96
91
|
{
|
97
|
-
return(internal_apply_operation(self, obj, func_xor));
|
92
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_xor));
|
98
93
|
}
|
99
94
|
|
100
95
|
/* Public: Subtract values contained in another FloatArray object from those
|
101
96
|
* contained in the current data array object, returning a new FloatArray. */
|
102
97
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
103
98
|
{
|
104
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
99
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_subtract));
|
105
100
|
}
|
106
101
|
|
107
102
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
@@ -122,61 +117,6 @@ static VALUE method_to_a(VALUE self)
|
|
122
117
|
return(rb_array);
|
123
118
|
}
|
124
119
|
|
125
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
126
|
-
* function pointer against both. */
|
127
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
128
|
-
{
|
129
|
-
unsigned long size, i;
|
130
|
-
int align;
|
131
|
-
vector_t *v1, *v2, *rv;
|
132
|
-
l2v_t *d1, *d2, *r;
|
133
|
-
VALUE result_obj = allocate(SIMD_LongArray);
|
134
|
-
|
135
|
-
Data_Get_Struct(self, vector_t, v1);
|
136
|
-
Data_Get_Struct(obj, vector_t, v2);
|
137
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
138
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(l2v_t));
|
139
|
-
|
140
|
-
align = internal_align_vectors(v1->len, v2->len, 2);
|
141
|
-
|
142
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
143
|
-
size = (v1->len + 1) / 2;
|
144
|
-
|
145
|
-
d1 = (l2v_t *)v1->data;
|
146
|
-
d2 = (l2v_t *)v2->data;
|
147
|
-
r = (l2v_t *)rv->data;
|
148
|
-
|
149
|
-
rv->len = v1->len;
|
150
|
-
|
151
|
-
switch(align)
|
152
|
-
{
|
153
|
-
case 0: /* Same size arrays */
|
154
|
-
for(i = 0; i < size; i++)
|
155
|
-
{
|
156
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
157
|
-
}
|
158
|
-
break;
|
159
|
-
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
160
|
-
for(i = 0; i < size; i++)
|
161
|
-
{
|
162
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
163
|
-
}
|
164
|
-
break;
|
165
|
-
default: /* Self is a multiple of operand's length long */
|
166
|
-
for(i = 0; i < size; i++)
|
167
|
-
{
|
168
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
169
|
-
}
|
170
|
-
}
|
171
|
-
|
172
|
-
if(rv->len != rv->len + (rv->len % 2))
|
173
|
-
{
|
174
|
-
r[size].f[1] = 1;
|
175
|
-
}
|
176
|
-
|
177
|
-
return(result_obj);
|
178
|
-
}
|
179
|
-
|
180
120
|
/* Function: Multiply two vectors. */
|
181
121
|
static void func_multiply(void *v1, void *v2, void *r)
|
182
122
|
{
|
data/ext/simd/simd_longarray.h
CHANGED
@@ -11,8 +11,6 @@ static VALUE method_xor(VALUE self, VALUE obj);
|
|
11
11
|
static VALUE method_subtract(VALUE self, VALUE obj);
|
12
12
|
static VALUE method_to_a(VALUE self);
|
13
13
|
|
14
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
15
|
-
|
16
14
|
static void func_multiply(void *v1, void *v2, void *r);
|
17
15
|
static void func_divide(void *v1, void *v2, void *r);
|
18
16
|
static void func_add(void *v1, void *v2, void *r);
|
@@ -22,7 +22,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
22
22
|
{
|
23
23
|
vector_t *vector;
|
24
24
|
f4v_t *data;
|
25
|
-
unsigned long n,
|
25
|
+
unsigned long n,i;
|
26
26
|
|
27
27
|
Check_Type(rb_array, T_ARRAY);
|
28
28
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -34,7 +34,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
34
34
|
rb_raise(rb_eArgError, "Vectors must be at least 4 long");
|
35
35
|
}
|
36
36
|
|
37
|
-
vector->data = internal_allocate_vector_array(vector->len
|
37
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
38
38
|
|
39
39
|
data = (f4v_t *)vector->data;
|
40
40
|
for(i = 0; i < vector->len; i++)
|
@@ -42,12 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
42
42
|
data[i/4].f[i%4] = (float)NUM2DBL(rb_ary_entry(rb_array, i));
|
43
43
|
}
|
44
44
|
|
45
|
-
|
46
|
-
m = n + (n % 4);
|
47
|
-
for(i = n % 4; i > 0; i--)
|
48
|
-
{
|
49
|
-
data[m/4].f[i] = 1.0;
|
50
|
-
}
|
45
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(float));
|
51
46
|
|
52
47
|
return(self);
|
53
48
|
}
|
@@ -56,28 +51,28 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
56
51
|
* another FloatArray object, returning a new FloatArray. */
|
57
52
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
58
53
|
{
|
59
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
54
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_multiply));
|
60
55
|
}
|
61
56
|
|
62
57
|
/* Public: Divide values contained in the data array by those contained in
|
63
58
|
* another FloatArray object, returning a new FloatArray. */
|
64
59
|
static VALUE method_divide(VALUE self, VALUE obj)
|
65
60
|
{
|
66
|
-
return(internal_apply_operation(self, obj, func_divide));
|
61
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_divide));
|
67
62
|
}
|
68
63
|
|
69
64
|
/* Public: add values contained in the data array with those contained in
|
70
65
|
* another FloatArray object, returning a new FloatArray. */
|
71
66
|
static VALUE method_add(VALUE self, VALUE obj)
|
72
67
|
{
|
73
|
-
return(internal_apply_operation(self, obj, func_add));
|
68
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_add));
|
74
69
|
}
|
75
70
|
|
76
71
|
/* Public: Subtract values contained in another FloatArray object from those
|
77
72
|
* contained in the current data array object, returning a new FloatArray. */
|
78
73
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
79
74
|
{
|
80
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
75
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_subtract));
|
81
76
|
}
|
82
77
|
|
83
78
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
@@ -98,64 +93,6 @@ static VALUE method_to_a(VALUE self)
|
|
98
93
|
return(rb_array);
|
99
94
|
}
|
100
95
|
|
101
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
102
|
-
* function pointer against both. */
|
103
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
104
|
-
{
|
105
|
-
unsigned long size, i;
|
106
|
-
int align;
|
107
|
-
vector_t *v1, *v2, *rv;
|
108
|
-
f4v_t *d1, *d2, *r;
|
109
|
-
VALUE result_obj = allocate(SIMD_SmallFloatArray);
|
110
|
-
|
111
|
-
Data_Get_Struct(self, vector_t, v1);
|
112
|
-
Data_Get_Struct(obj, vector_t, v2);
|
113
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
114
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(f4v_t));
|
115
|
-
|
116
|
-
align = internal_align_vectors(v1->len, v2->len, 4);
|
117
|
-
|
118
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
119
|
-
size = (v1->len + 3) / 4;
|
120
|
-
|
121
|
-
d1 = (f4v_t *)v1->data;
|
122
|
-
d2 = (f4v_t *)v2->data;
|
123
|
-
r = (f4v_t *)rv->data;
|
124
|
-
|
125
|
-
rv->len = v1->len;
|
126
|
-
|
127
|
-
switch(align)
|
128
|
-
{
|
129
|
-
case 0: /* Same size arrays */
|
130
|
-
for(i = 0; i < size; i++)
|
131
|
-
{
|
132
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
133
|
-
}
|
134
|
-
break;
|
135
|
-
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
136
|
-
for(i = 0; i < size; i++)
|
137
|
-
{
|
138
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
139
|
-
}
|
140
|
-
break;
|
141
|
-
default: /* Self is a multiple of operand's length long */
|
142
|
-
for(i = 0; i < size; i++)
|
143
|
-
{
|
144
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
145
|
-
}
|
146
|
-
}
|
147
|
-
|
148
|
-
if(rv->len != rv->len + (rv->len % 4))
|
149
|
-
{
|
150
|
-
for(i = 3; i > rv->len + (rv->len % 4); i--)
|
151
|
-
{
|
152
|
-
r[size].f[i] = 1;
|
153
|
-
}
|
154
|
-
}
|
155
|
-
|
156
|
-
return(result_obj);
|
157
|
-
}
|
158
|
-
|
159
96
|
/* Function: Multiply two vectors. */
|
160
97
|
static void func_multiply(void *v1, void *v2, void *r)
|
161
98
|
{
|
@@ -8,8 +8,6 @@ static VALUE method_add(VALUE self, VALUE obj);
|
|
8
8
|
static VALUE method_subtract(VALUE self, VALUE obj);
|
9
9
|
static VALUE method_to_a(VALUE self);
|
10
10
|
|
11
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
12
|
-
|
13
11
|
static void func_multiply(void *v1, void *v2, void *r);
|
14
12
|
static void func_divide(void *v1, void *v2, void *r);
|
15
13
|
static void func_add(void *v1, void *v2, void *r);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tina Wuest
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -64,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
64
64
|
version: '0'
|
65
65
|
requirements: []
|
66
66
|
rubyforge_project:
|
67
|
-
rubygems_version: 2.
|
67
|
+
rubygems_version: 2.2.2
|
68
68
|
signing_key:
|
69
69
|
specification_version: 4
|
70
70
|
summary: SIMD instructions in ruby
|