simd 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/simd/simd_common.c +81 -3
- data/ext/simd/simd_common.h +3 -1
- data/ext/simd/simd_floatarray.c +7 -67
- data/ext/simd/simd_floatarray.h +0 -2
- data/ext/simd/simd_intarray.c +10 -73
- data/ext/simd/simd_intarray.h +0 -2
- data/ext/simd/simd_longarray.c +10 -70
- data/ext/simd/simd_longarray.h +0 -2
- data/ext/simd/simd_smallfloatarray.c +7 -70
- data/ext/simd/simd_smallfloatarray.h +0 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91f6ffe0659b15461fbf3bfc7657bd72b8e0d6eb
|
4
|
+
data.tar.gz: 289421206f7343fed00e5fbcf214d0908e835882
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c2353151fa088f7ebcb535d095211e83517fd2436e2acee2b5d42e43f776d8df4931007e2a50c0f11026bed64c99be20d25f6e3598e52f10b30301e532420ea
|
7
|
+
data.tar.gz: 1e0ef0af7c81a176fc908163f6c7d68a09ae651a9d852fecfdf0185a3aed1d1732d4636670576964537f27c687f1c9765d214b75f8d15bcab8e89e1124bcf224
|
data/ext/simd/simd_common.c
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#include "simd_common.h"
|
2
2
|
|
3
|
+
#define XMM_BYTES 16 /* Width of the xmm1,2... registers */
|
4
|
+
|
3
5
|
/* Internal: Allocate memory for the vector container. */
|
4
6
|
VALUE allocate(VALUE klass)
|
5
7
|
{
|
@@ -36,10 +38,9 @@ VALUE method_length(VALUE self)
|
|
36
38
|
}
|
37
39
|
|
38
40
|
/* Internal: Allocate memory for the data array. */
|
39
|
-
void *internal_allocate_vector_array(unsigned long count
|
41
|
+
void *internal_allocate_vector_array(unsigned long count)
|
40
42
|
{
|
41
|
-
|
42
|
-
void *vector = malloc(((count + (count % modulo)) / modulo + 1) * size);
|
43
|
+
void *vector = malloc((count + 1) * XMM_BYTES);
|
43
44
|
if(vector == NULL)
|
44
45
|
{
|
45
46
|
rb_raise(rb_eNoMemError, "Unable to allocate memory");
|
@@ -74,3 +75,80 @@ int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modu
|
|
74
75
|
/* Never reached */
|
75
76
|
return(-1);
|
76
77
|
}
|
78
|
+
|
79
|
+
/* Internal: Given another object, perform an action specified via a function
|
80
|
+
* pointer against both.
|
81
|
+
*
|
82
|
+
* Since arithmetic is purposefully performed against a void pointers, disable
|
83
|
+
* warnings regarding this for the current function. */
|
84
|
+
#pragma GCC diagnostic push
|
85
|
+
#pragma GCC diagnostic ignored "-Wpointer-arith"
|
86
|
+
VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func)
|
87
|
+
{
|
88
|
+
unsigned long length, i, j;
|
89
|
+
int align;
|
90
|
+
vector_t *v1, *v2, *rv;
|
91
|
+
void *data;
|
92
|
+
VALUE result_obj = allocate(klass);
|
93
|
+
|
94
|
+
Data_Get_Struct(self, vector_t, v1);
|
95
|
+
Data_Get_Struct(obj, vector_t, v2);
|
96
|
+
Data_Get_Struct(result_obj, vector_t, rv);
|
97
|
+
rv->data = internal_allocate_vector_array(v1->len);
|
98
|
+
|
99
|
+
align = internal_align_vectors(v1->len, v2->len, (XMM_BYTES / size));
|
100
|
+
|
101
|
+
length = ((v1->len + (XMM_BYTES / size - 1)) / (XMM_BYTES / size));
|
102
|
+
rv->len = v1->len;
|
103
|
+
|
104
|
+
switch(align)
|
105
|
+
{
|
106
|
+
case 0: /* Same size arrays */
|
107
|
+
for(i = 0; i < length; i++)
|
108
|
+
{
|
109
|
+
func((v1->data + XMM_BYTES * i), (v2->data + XMM_BYTES * i), (rv->data + XMM_BYTES * i));
|
110
|
+
}
|
111
|
+
break;
|
112
|
+
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
113
|
+
for(i = 0; i < length; i++)
|
114
|
+
{
|
115
|
+
func((v1->data + XMM_BYTES * i), v2->data, (rv->data + XMM_BYTES * i));
|
116
|
+
}
|
117
|
+
break;
|
118
|
+
default: /* Self is a multiple of operand's length long */
|
119
|
+
for(j = 0; j < v2->len; j++)
|
120
|
+
{
|
121
|
+
data = v2->data + XMM_BYTES * j;
|
122
|
+
for(i = j; i < length; i+=v2->len)
|
123
|
+
{
|
124
|
+
func((v1->data + XMM_BYTES * i), data, (rv->data + XMM_BYTES * i));
|
125
|
+
}
|
126
|
+
}
|
127
|
+
}
|
128
|
+
internal_sanitize_unaligned_final_vector(rv, size);
|
129
|
+
|
130
|
+
return(result_obj);
|
131
|
+
}
|
132
|
+
#pragma GCC diagnostic pop
|
133
|
+
|
134
|
+
/* Internal: Make sure that no null bytes exist beyond the boundary of
|
135
|
+
* unaligned vectors. This function should be called after any operation that
|
136
|
+
* results in the mutation or creation of a vector array.
|
137
|
+
*
|
138
|
+
* Since arithmetic is purposefully performed against a void pointers, disable
|
139
|
+
* warnings regarding this for the current function. */
|
140
|
+
#pragma GCC diagnostic push
|
141
|
+
#pragma GCC diagnostic ignored "-Wpointer-arith"
|
142
|
+
void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size)
|
143
|
+
{
|
144
|
+
unsigned long i;
|
145
|
+
|
146
|
+
if((rv->len * size) % XMM_BYTES)
|
147
|
+
{
|
148
|
+
for(i = 1; i <= XMM_BYTES - ((rv->len * size) % XMM_BYTES); i+=size)
|
149
|
+
{
|
150
|
+
*(unsigned char *)(rv->data + (rv->len * size + i)) = 1;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
}
|
154
|
+
#pragma GCC diagnostic pop
|
data/ext/simd/simd_common.h
CHANGED
@@ -8,5 +8,7 @@ void deallocate(vector_t *vector);
|
|
8
8
|
|
9
9
|
VALUE method_length(VALUE self);
|
10
10
|
|
11
|
-
void *internal_allocate_vector_array(unsigned long count
|
11
|
+
void *internal_allocate_vector_array(unsigned long count);
|
12
12
|
int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo);
|
13
|
+
VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func);
|
14
|
+
void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size);
|
data/ext/simd/simd_floatarray.c
CHANGED
@@ -22,7 +22,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
22
22
|
{
|
23
23
|
vector_t *vector;
|
24
24
|
d2v_t *data;
|
25
|
-
unsigned long n,
|
25
|
+
unsigned long n,i;
|
26
26
|
|
27
27
|
Check_Type(rb_array, T_ARRAY);
|
28
28
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -34,7 +34,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
34
34
|
rb_raise(rb_eArgError, "Vectors must be at least 4 long");
|
35
35
|
}
|
36
36
|
|
37
|
-
vector->data = internal_allocate_vector_array(vector->len
|
37
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
38
38
|
|
39
39
|
data = (d2v_t *)vector->data;
|
40
40
|
for(i = 0; i < vector->len; i++)
|
@@ -42,12 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
42
42
|
data[i/2].f[i%2] = NUM2DBL(rb_ary_entry(rb_array, i));
|
43
43
|
}
|
44
44
|
|
45
|
-
|
46
|
-
m = n + (n % 2);
|
47
|
-
for(i = n % 4; i > 0; i--)
|
48
|
-
{
|
49
|
-
data[m/2].f[1] = 1.0;
|
50
|
-
}
|
45
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(double));
|
51
46
|
|
52
47
|
return(self);
|
53
48
|
}
|
@@ -56,28 +51,28 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
56
51
|
* another FloatArray object, returning a new FloatArray. */
|
57
52
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
58
53
|
{
|
59
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
54
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_multiply));
|
60
55
|
}
|
61
56
|
|
62
57
|
/* Public: Divide values contained in the data array by those contained in
|
63
58
|
* another FloatArray object, returning a new FloatArray. */
|
64
59
|
static VALUE method_divide(VALUE self, VALUE obj)
|
65
60
|
{
|
66
|
-
return(internal_apply_operation(self, obj, func_divide));
|
61
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_divide));
|
67
62
|
}
|
68
63
|
|
69
64
|
/* Public: add values contained in the data array with those contained in
|
70
65
|
* another FloatArray object, returning a new FloatArray. */
|
71
66
|
static VALUE method_add(VALUE self, VALUE obj)
|
72
67
|
{
|
73
|
-
return(internal_apply_operation(self, obj, func_add));
|
68
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_add));
|
74
69
|
}
|
75
70
|
|
76
71
|
/* Public: Subtract values contained in another FloatArray object from those
|
77
72
|
* contained in the current data array object, returning a new FloatArray. */
|
78
73
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
79
74
|
{
|
80
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
75
|
+
return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_subtract));
|
81
76
|
}
|
82
77
|
|
83
78
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
@@ -98,61 +93,6 @@ static VALUE method_to_a(VALUE self)
|
|
98
93
|
return(rb_array);
|
99
94
|
}
|
100
95
|
|
101
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
102
|
-
* function pointer against both. */
|
103
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
104
|
-
{
|
105
|
-
unsigned long size, i;
|
106
|
-
int align;
|
107
|
-
vector_t *v1, *v2, *rv;
|
108
|
-
d2v_t *d1, *d2, *r;
|
109
|
-
VALUE result_obj = allocate(SIMD_FloatArray);
|
110
|
-
|
111
|
-
Data_Get_Struct(self, vector_t, v1);
|
112
|
-
Data_Get_Struct(obj, vector_t, v2);
|
113
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
114
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(d2v_t));
|
115
|
-
|
116
|
-
align = internal_align_vectors(v1->len, v2->len, 2);
|
117
|
-
|
118
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
119
|
-
size = (v1->len + 1) / 2;
|
120
|
-
|
121
|
-
d1 = (d2v_t *)v1->data;
|
122
|
-
d2 = (d2v_t *)v2->data;
|
123
|
-
r = (d2v_t *)rv->data;
|
124
|
-
|
125
|
-
rv->len = v1->len;
|
126
|
-
|
127
|
-
switch(align)
|
128
|
-
{
|
129
|
-
case 0: /* Same size arrays */
|
130
|
-
for(i = 0; i < size; i++)
|
131
|
-
{
|
132
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
133
|
-
}
|
134
|
-
break;
|
135
|
-
case 1: /* Operand is exactly 2 long (size of 1 sse register) */
|
136
|
-
for(i = 0; i < size; i++)
|
137
|
-
{
|
138
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
139
|
-
}
|
140
|
-
break;
|
141
|
-
default: /* Self is a multiple of operand's length long */
|
142
|
-
for(i = 0; i < size; i++)
|
143
|
-
{
|
144
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
145
|
-
}
|
146
|
-
}
|
147
|
-
|
148
|
-
if(rv->len != rv->len + (rv->len % 2))
|
149
|
-
{
|
150
|
-
r[size].f[1] = 1;
|
151
|
-
}
|
152
|
-
|
153
|
-
return(result_obj);
|
154
|
-
}
|
155
|
-
|
156
96
|
/* Function: Multiply two vectors. */
|
157
97
|
static void func_multiply(void *v1, void *v2, void *r)
|
158
98
|
{
|
data/ext/simd/simd_floatarray.h
CHANGED
@@ -8,8 +8,6 @@ static VALUE method_add(VALUE self, VALUE obj);
|
|
8
8
|
static VALUE method_subtract(VALUE self, VALUE obj);
|
9
9
|
static VALUE method_to_a(VALUE self);
|
10
10
|
|
11
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
12
|
-
|
13
11
|
static void func_multiply(void *v1, void *v2, void *r);
|
14
12
|
static void func_divide(void *v1, void *v2, void *r);
|
15
13
|
static void func_add(void *v1, void *v2, void *r);
|
data/ext/simd/simd_intarray.c
CHANGED
@@ -25,7 +25,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
25
25
|
{
|
26
26
|
vector_t *vector;
|
27
27
|
i4v_t *data;
|
28
|
-
unsigned long n,
|
28
|
+
unsigned long n,i;
|
29
29
|
|
30
30
|
Check_Type(rb_array, T_ARRAY);
|
31
31
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -37,7 +37,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
37
37
|
rb_raise(rb_eArgError, "Vectors must be at least 4 long");
|
38
38
|
}
|
39
39
|
|
40
|
-
vector->data = internal_allocate_vector_array(vector->len
|
40
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
41
41
|
|
42
42
|
data = (i4v_t *)vector->data;
|
43
43
|
for(i = 0; i < vector->len; i++)
|
@@ -45,12 +45,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
45
45
|
data[i/4].f[i%4] = NUM2INT(rb_ary_entry(rb_array, i));
|
46
46
|
}
|
47
47
|
|
48
|
-
|
49
|
-
m = n + (n % 4);
|
50
|
-
for(i = n % 4; i > 0; i--)
|
51
|
-
{
|
52
|
-
data[m/4].f[i] = 1.0;
|
53
|
-
}
|
48
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(int));
|
54
49
|
|
55
50
|
return(self);
|
56
51
|
}
|
@@ -59,49 +54,49 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
59
54
|
* another FloatArray object, returning a new FloatArray. */
|
60
55
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
61
56
|
{
|
62
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
57
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_multiply));
|
63
58
|
}
|
64
59
|
|
65
60
|
/* Public: Divide values contained in the data array by those contained in
|
66
61
|
* another FloatArray object, returning a new FloatArray. */
|
67
62
|
static VALUE method_divide(VALUE self, VALUE obj)
|
68
63
|
{
|
69
|
-
return(internal_apply_operation(self, obj, func_divide));
|
64
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_divide));
|
70
65
|
}
|
71
66
|
|
72
67
|
/* Public: add values contained in the data array with those contained in
|
73
68
|
* another FloatArray object, returning a new FloatArray. */
|
74
69
|
static VALUE method_add(VALUE self, VALUE obj)
|
75
70
|
{
|
76
|
-
return(internal_apply_operation(self, obj, func_add));
|
71
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_add));
|
77
72
|
}
|
78
73
|
|
79
74
|
/* Public: and values contained in the data array with those contained in
|
80
75
|
* another FloatArray object, returning a new FloatArray. */
|
81
76
|
static VALUE method_and(VALUE self, VALUE obj)
|
82
77
|
{
|
83
|
-
return(internal_apply_operation(self, obj, func_and));
|
78
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_and));
|
84
79
|
}
|
85
80
|
|
86
81
|
/* Public: or values contained in the data array with those contained in
|
87
82
|
* another FloatArray object, returning a new FloatArray. */
|
88
83
|
static VALUE method_or(VALUE self, VALUE obj)
|
89
84
|
{
|
90
|
-
return(internal_apply_operation(self, obj, func_or));
|
85
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_or));
|
91
86
|
}
|
92
87
|
|
93
88
|
/* Public: xor values contained in the data array with those contained in
|
94
89
|
* another FloatArray object, returning a new FloatArray. */
|
95
90
|
static VALUE method_xor(VALUE self, VALUE obj)
|
96
91
|
{
|
97
|
-
return(internal_apply_operation(self, obj, func_xor));
|
92
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_xor));
|
98
93
|
}
|
99
94
|
|
100
95
|
/* Public: Subtract values contained in another FloatArray object from those
|
101
96
|
* contained in the current data array object, returning a new FloatArray. */
|
102
97
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
103
98
|
{
|
104
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
99
|
+
return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_subtract));
|
105
100
|
}
|
106
101
|
|
107
102
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
@@ -122,64 +117,6 @@ static VALUE method_to_a(VALUE self)
|
|
122
117
|
return(rb_array);
|
123
118
|
}
|
124
119
|
|
125
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
126
|
-
* function pointer against both. */
|
127
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
128
|
-
{
|
129
|
-
unsigned long size, i;
|
130
|
-
int align;
|
131
|
-
vector_t *v1, *v2, *rv;
|
132
|
-
i4v_t *d1, *d2, *r;
|
133
|
-
VALUE result_obj = allocate(SIMD_IntArray);
|
134
|
-
|
135
|
-
Data_Get_Struct(self, vector_t, v1);
|
136
|
-
Data_Get_Struct(obj, vector_t, v2);
|
137
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
138
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(i4v_t));
|
139
|
-
|
140
|
-
align = internal_align_vectors(v1->len, v2->len, 4);
|
141
|
-
|
142
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
143
|
-
size = (v1->len + 3) / 4;
|
144
|
-
|
145
|
-
d1 = (i4v_t *)v1->data;
|
146
|
-
d2 = (i4v_t *)v2->data;
|
147
|
-
r = (i4v_t *)rv->data;
|
148
|
-
|
149
|
-
rv->len = v1->len;
|
150
|
-
|
151
|
-
switch(align)
|
152
|
-
{
|
153
|
-
case 0: /* Same size arrays */
|
154
|
-
for(i = 0; i < size; i++)
|
155
|
-
{
|
156
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
157
|
-
}
|
158
|
-
break;
|
159
|
-
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
160
|
-
for(i = 0; i < size; i++)
|
161
|
-
{
|
162
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
163
|
-
}
|
164
|
-
break;
|
165
|
-
default: /* Self is a multiple of operand's length long */
|
166
|
-
for(i = 0; i < size; i++)
|
167
|
-
{
|
168
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
169
|
-
}
|
170
|
-
}
|
171
|
-
|
172
|
-
if(rv->len != rv->len + (rv->len % 4))
|
173
|
-
{
|
174
|
-
for(i = 3; i > rv->len + (rv->len % 4); i--)
|
175
|
-
{
|
176
|
-
r[size].f[i] = 1;
|
177
|
-
}
|
178
|
-
}
|
179
|
-
|
180
|
-
return(result_obj);
|
181
|
-
}
|
182
|
-
|
183
120
|
/* Function: Multiply two vectors. */
|
184
121
|
static void func_multiply(void *v1, void *v2, void *r)
|
185
122
|
{
|
data/ext/simd/simd_intarray.h
CHANGED
@@ -11,8 +11,6 @@ static VALUE method_xor(VALUE self, VALUE obj);
|
|
11
11
|
static VALUE method_subtract(VALUE self, VALUE obj);
|
12
12
|
static VALUE method_to_a(VALUE self);
|
13
13
|
|
14
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
15
|
-
|
16
14
|
static void func_multiply(void *v1, void *v2, void *r);
|
17
15
|
static void func_divide(void *v1, void *v2, void *r);
|
18
16
|
static void func_add(void *v1, void *v2, void *r);
|
data/ext/simd/simd_longarray.c
CHANGED
@@ -25,7 +25,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
25
25
|
{
|
26
26
|
vector_t *vector;
|
27
27
|
l2v_t *data;
|
28
|
-
unsigned long n,
|
28
|
+
unsigned long n,i;
|
29
29
|
|
30
30
|
Check_Type(rb_array, T_ARRAY);
|
31
31
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -37,7 +37,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
37
37
|
rb_raise(rb_eArgError, "Vectors must be at least 2 long");
|
38
38
|
}
|
39
39
|
|
40
|
-
vector->data = internal_allocate_vector_array(vector->len
|
40
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
41
41
|
|
42
42
|
data = (l2v_t *)vector->data;
|
43
43
|
for(i = 0; i < vector->len; i++)
|
@@ -45,12 +45,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
45
45
|
data[i/2].f[i%2] = NUM2LONG(rb_ary_entry(rb_array, i));
|
46
46
|
}
|
47
47
|
|
48
|
-
|
49
|
-
m = n + (n % 2);
|
50
|
-
for(i = n % 2; i > 0; i--)
|
51
|
-
{
|
52
|
-
data[m/2].f[i] = 1;
|
53
|
-
}
|
48
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(long));
|
54
49
|
|
55
50
|
return(self);
|
56
51
|
}
|
@@ -59,49 +54,49 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
59
54
|
* another FloatArray object, returning a new FloatArray. */
|
60
55
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
61
56
|
{
|
62
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
57
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_multiply));
|
63
58
|
}
|
64
59
|
|
65
60
|
/* Public: Divide values contained in the data array by those contained in
|
66
61
|
* another FloatArray object, returning a new FloatArray. */
|
67
62
|
static VALUE method_divide(VALUE self, VALUE obj)
|
68
63
|
{
|
69
|
-
return(internal_apply_operation(self, obj, func_divide));
|
64
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_divide));
|
70
65
|
}
|
71
66
|
|
72
67
|
/* Public: add values contained in the data array with those contained in
|
73
68
|
* another FloatArray object, returning a new FloatArray. */
|
74
69
|
static VALUE method_add(VALUE self, VALUE obj)
|
75
70
|
{
|
76
|
-
return(internal_apply_operation(self, obj, func_add));
|
71
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_add));
|
77
72
|
}
|
78
73
|
|
79
74
|
/* Public: and values contained in the data array with those contained in
|
80
75
|
* another FloatArray object, returning a new FloatArray. */
|
81
76
|
static VALUE method_and(VALUE self, VALUE obj)
|
82
77
|
{
|
83
|
-
return(internal_apply_operation(self, obj, func_and));
|
78
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_and));
|
84
79
|
}
|
85
80
|
|
86
81
|
/* Public: or values contained in the data array with those contained in
|
87
82
|
* another FloatArray object, returning a new FloatArray. */
|
88
83
|
static VALUE method_or(VALUE self, VALUE obj)
|
89
84
|
{
|
90
|
-
return(internal_apply_operation(self, obj, func_or));
|
85
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_or));
|
91
86
|
}
|
92
87
|
|
93
88
|
/* Public: xor values contained in the data array with those contained in
|
94
89
|
* another FloatArray object, returning a new FloatArray. */
|
95
90
|
static VALUE method_xor(VALUE self, VALUE obj)
|
96
91
|
{
|
97
|
-
return(internal_apply_operation(self, obj, func_xor));
|
92
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_xor));
|
98
93
|
}
|
99
94
|
|
100
95
|
/* Public: Subtract values contained in another FloatArray object from those
|
101
96
|
* contained in the current data array object, returning a new FloatArray. */
|
102
97
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
103
98
|
{
|
104
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
99
|
+
return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_subtract));
|
105
100
|
}
|
106
101
|
|
107
102
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
@@ -122,61 +117,6 @@ static VALUE method_to_a(VALUE self)
|
|
122
117
|
return(rb_array);
|
123
118
|
}
|
124
119
|
|
125
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
126
|
-
* function pointer against both. */
|
127
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
128
|
-
{
|
129
|
-
unsigned long size, i;
|
130
|
-
int align;
|
131
|
-
vector_t *v1, *v2, *rv;
|
132
|
-
l2v_t *d1, *d2, *r;
|
133
|
-
VALUE result_obj = allocate(SIMD_LongArray);
|
134
|
-
|
135
|
-
Data_Get_Struct(self, vector_t, v1);
|
136
|
-
Data_Get_Struct(obj, vector_t, v2);
|
137
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
138
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(l2v_t));
|
139
|
-
|
140
|
-
align = internal_align_vectors(v1->len, v2->len, 2);
|
141
|
-
|
142
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
143
|
-
size = (v1->len + 1) / 2;
|
144
|
-
|
145
|
-
d1 = (l2v_t *)v1->data;
|
146
|
-
d2 = (l2v_t *)v2->data;
|
147
|
-
r = (l2v_t *)rv->data;
|
148
|
-
|
149
|
-
rv->len = v1->len;
|
150
|
-
|
151
|
-
switch(align)
|
152
|
-
{
|
153
|
-
case 0: /* Same size arrays */
|
154
|
-
for(i = 0; i < size; i++)
|
155
|
-
{
|
156
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
157
|
-
}
|
158
|
-
break;
|
159
|
-
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
160
|
-
for(i = 0; i < size; i++)
|
161
|
-
{
|
162
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
163
|
-
}
|
164
|
-
break;
|
165
|
-
default: /* Self is a multiple of operand's length long */
|
166
|
-
for(i = 0; i < size; i++)
|
167
|
-
{
|
168
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
169
|
-
}
|
170
|
-
}
|
171
|
-
|
172
|
-
if(rv->len != rv->len + (rv->len % 2))
|
173
|
-
{
|
174
|
-
r[size].f[1] = 1;
|
175
|
-
}
|
176
|
-
|
177
|
-
return(result_obj);
|
178
|
-
}
|
179
|
-
|
180
120
|
/* Function: Multiply two vectors. */
|
181
121
|
static void func_multiply(void *v1, void *v2, void *r)
|
182
122
|
{
|
data/ext/simd/simd_longarray.h
CHANGED
@@ -11,8 +11,6 @@ static VALUE method_xor(VALUE self, VALUE obj);
|
|
11
11
|
static VALUE method_subtract(VALUE self, VALUE obj);
|
12
12
|
static VALUE method_to_a(VALUE self);
|
13
13
|
|
14
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
15
|
-
|
16
14
|
static void func_multiply(void *v1, void *v2, void *r);
|
17
15
|
static void func_divide(void *v1, void *v2, void *r);
|
18
16
|
static void func_add(void *v1, void *v2, void *r);
|
@@ -22,7 +22,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
22
22
|
{
|
23
23
|
vector_t *vector;
|
24
24
|
f4v_t *data;
|
25
|
-
unsigned long n,
|
25
|
+
unsigned long n,i;
|
26
26
|
|
27
27
|
Check_Type(rb_array, T_ARRAY);
|
28
28
|
Data_Get_Struct(self, vector_t, vector);
|
@@ -34,7 +34,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
34
34
|
rb_raise(rb_eArgError, "Vectors must be at least 4 long");
|
35
35
|
}
|
36
36
|
|
37
|
-
vector->data = internal_allocate_vector_array(vector->len
|
37
|
+
vector->data = internal_allocate_vector_array(vector->len);
|
38
38
|
|
39
39
|
data = (f4v_t *)vector->data;
|
40
40
|
for(i = 0; i < vector->len; i++)
|
@@ -42,12 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
42
42
|
data[i/4].f[i%4] = (float)NUM2DBL(rb_ary_entry(rb_array, i));
|
43
43
|
}
|
44
44
|
|
45
|
-
|
46
|
-
m = n + (n % 4);
|
47
|
-
for(i = n % 4; i > 0; i--)
|
48
|
-
{
|
49
|
-
data[m/4].f[i] = 1.0;
|
50
|
-
}
|
45
|
+
internal_sanitize_unaligned_final_vector(vector, sizeof(float));
|
51
46
|
|
52
47
|
return(self);
|
53
48
|
}
|
@@ -56,28 +51,28 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
|
|
56
51
|
* another FloatArray object, returning a new FloatArray. */
|
57
52
|
static VALUE method_multiply(VALUE self, VALUE obj)
|
58
53
|
{
|
59
|
-
return(internal_apply_operation(self, obj, func_multiply));
|
54
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_multiply));
|
60
55
|
}
|
61
56
|
|
62
57
|
/* Public: Divide values contained in the data array by those contained in
|
63
58
|
* another FloatArray object, returning a new FloatArray. */
|
64
59
|
static VALUE method_divide(VALUE self, VALUE obj)
|
65
60
|
{
|
66
|
-
return(internal_apply_operation(self, obj, func_divide));
|
61
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_divide));
|
67
62
|
}
|
68
63
|
|
69
64
|
/* Public: add values contained in the data array with those contained in
|
70
65
|
* another FloatArray object, returning a new FloatArray. */
|
71
66
|
static VALUE method_add(VALUE self, VALUE obj)
|
72
67
|
{
|
73
|
-
return(internal_apply_operation(self, obj, func_add));
|
68
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_add));
|
74
69
|
}
|
75
70
|
|
76
71
|
/* Public: Subtract values contained in another FloatArray object from those
|
77
72
|
* contained in the current data array object, returning a new FloatArray. */
|
78
73
|
static VALUE method_subtract(VALUE self, VALUE obj)
|
79
74
|
{
|
80
|
-
return(internal_apply_operation(self, obj, func_subtract));
|
75
|
+
return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_subtract));
|
81
76
|
}
|
82
77
|
|
83
78
|
/* Public: Return a Ruby Array containing the doubles within the data array. */
|
@@ -98,64 +93,6 @@ static VALUE method_to_a(VALUE self)
|
|
98
93
|
return(rb_array);
|
99
94
|
}
|
100
95
|
|
101
|
-
/* Internal: Given another FloatArray object, perform an action specified via a
|
102
|
-
* function pointer against both. */
|
103
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
|
104
|
-
{
|
105
|
-
unsigned long size, i;
|
106
|
-
int align;
|
107
|
-
vector_t *v1, *v2, *rv;
|
108
|
-
f4v_t *d1, *d2, *r;
|
109
|
-
VALUE result_obj = allocate(SIMD_SmallFloatArray);
|
110
|
-
|
111
|
-
Data_Get_Struct(self, vector_t, v1);
|
112
|
-
Data_Get_Struct(obj, vector_t, v2);
|
113
|
-
Data_Get_Struct(result_obj, vector_t, rv);
|
114
|
-
rv->data = internal_allocate_vector_array(v1->len, sizeof(f4v_t));
|
115
|
-
|
116
|
-
align = internal_align_vectors(v1->len, v2->len, 4);
|
117
|
-
|
118
|
-
/* Ensure that size will be the result of ceil(len / 4.0) */
|
119
|
-
size = (v1->len + 3) / 4;
|
120
|
-
|
121
|
-
d1 = (f4v_t *)v1->data;
|
122
|
-
d2 = (f4v_t *)v2->data;
|
123
|
-
r = (f4v_t *)rv->data;
|
124
|
-
|
125
|
-
rv->len = v1->len;
|
126
|
-
|
127
|
-
switch(align)
|
128
|
-
{
|
129
|
-
case 0: /* Same size arrays */
|
130
|
-
for(i = 0; i < size; i++)
|
131
|
-
{
|
132
|
-
func(&d1[i].v, &d2[i].v, &r[i].v);
|
133
|
-
}
|
134
|
-
break;
|
135
|
-
case 1: /* Operand is exactly 4 long (size of 1 sse register) */
|
136
|
-
for(i = 0; i < size; i++)
|
137
|
-
{
|
138
|
-
func(&d1[i].v, &d2[0].v, &r[i].v);
|
139
|
-
}
|
140
|
-
break;
|
141
|
-
default: /* Self is a multiple of operand's length long */
|
142
|
-
for(i = 0; i < size; i++)
|
143
|
-
{
|
144
|
-
func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
|
145
|
-
}
|
146
|
-
}
|
147
|
-
|
148
|
-
if(rv->len != rv->len + (rv->len % 4))
|
149
|
-
{
|
150
|
-
for(i = 3; i > rv->len + (rv->len % 4); i--)
|
151
|
-
{
|
152
|
-
r[size].f[i] = 1;
|
153
|
-
}
|
154
|
-
}
|
155
|
-
|
156
|
-
return(result_obj);
|
157
|
-
}
|
158
|
-
|
159
96
|
/* Function: Multiply two vectors. */
|
160
97
|
static void func_multiply(void *v1, void *v2, void *r)
|
161
98
|
{
|
@@ -8,8 +8,6 @@ static VALUE method_add(VALUE self, VALUE obj);
|
|
8
8
|
static VALUE method_subtract(VALUE self, VALUE obj);
|
9
9
|
static VALUE method_to_a(VALUE self);
|
10
10
|
|
11
|
-
static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
|
12
|
-
|
13
11
|
static void func_multiply(void *v1, void *v2, void *r);
|
14
12
|
static void func_divide(void *v1, void *v2, void *r);
|
15
13
|
static void func_add(void *v1, void *v2, void *r);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tina Wuest
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -64,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
64
64
|
version: '0'
|
65
65
|
requirements: []
|
66
66
|
rubyforge_project:
|
67
|
-
rubygems_version: 2.
|
67
|
+
rubygems_version: 2.2.2
|
68
68
|
signing_key:
|
69
69
|
specification_version: 4
|
70
70
|
summary: SIMD instructions in ruby
|