simd 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c15d248c872e4369c45e4151faf81e9761ea1587
4
- data.tar.gz: cdfc5170454be15be1bc75ccc5eba08aa96c884d
3
+ metadata.gz: 91f6ffe0659b15461fbf3bfc7657bd72b8e0d6eb
4
+ data.tar.gz: 289421206f7343fed00e5fbcf214d0908e835882
5
5
  SHA512:
6
- metadata.gz: 38df7b30a113b325bba7be834fcbe24c44738706495579af5a9e4b508e4db4375e2f36b16a97612eab4dd8da379c450d062c6e0bceb6e8ddf308a952ecf6419c
7
- data.tar.gz: 342680d7f4f6690c8b3338de32ef192aaadb2952461e62e86a848650414b09d793d71e70c5b38909e0c882f062e346995f86f40d3dbf1de692cbe3eb0cfb0c73
6
+ metadata.gz: 1c2353151fa088f7ebcb535d095211e83517fd2436e2acee2b5d42e43f776d8df4931007e2a50c0f11026bed64c99be20d25f6e3598e52f10b30301e532420ea
7
+ data.tar.gz: 1e0ef0af7c81a176fc908163f6c7d68a09ae651a9d852fecfdf0185a3aed1d1732d4636670576964537f27c687f1c9765d214b75f8d15bcab8e89e1124bcf224
@@ -1,5 +1,7 @@
1
1
  #include "simd_common.h"
2
2
 
3
+ #define XMM_BYTES 16 /* Width of the xmm1,2... registers */
4
+
3
5
  /* Internal: Allocate memory for the vector container. */
4
6
  VALUE allocate(VALUE klass)
5
7
  {
@@ -36,10 +38,9 @@ VALUE method_length(VALUE self)
36
38
  }
37
39
 
38
40
  /* Internal: Allocate memory for the data array. */
39
- void *internal_allocate_vector_array(unsigned long count, size_t size)
41
+ void *internal_allocate_vector_array(unsigned long count)
40
42
  {
41
- unsigned int modulo = 16 / size;
42
- void *vector = malloc(((count + (count % modulo)) / modulo + 1) * size);
43
+ void *vector = malloc((count + 1) * XMM_BYTES);
43
44
  if(vector == NULL)
44
45
  {
45
46
  rb_raise(rb_eNoMemError, "Unable to allocate memory");
@@ -74,3 +75,80 @@ int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modu
74
75
  /* Never reached */
75
76
  return(-1);
76
77
  }
78
+
79
+ /* Internal: Given another object, perform an action specified via a function
80
+ * pointer against both.
81
+ *
82
+ * Since arithmetic is purposefully performed against a void pointers, disable
83
+ * warnings regarding this for the current function. */
84
+ #pragma GCC diagnostic push
85
+ #pragma GCC diagnostic ignored "-Wpointer-arith"
86
+ VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func)
87
+ {
88
+ unsigned long length, i, j;
89
+ int align;
90
+ vector_t *v1, *v2, *rv;
91
+ void *data;
92
+ VALUE result_obj = allocate(klass);
93
+
94
+ Data_Get_Struct(self, vector_t, v1);
95
+ Data_Get_Struct(obj, vector_t, v2);
96
+ Data_Get_Struct(result_obj, vector_t, rv);
97
+ rv->data = internal_allocate_vector_array(v1->len);
98
+
99
+ align = internal_align_vectors(v1->len, v2->len, (XMM_BYTES / size));
100
+
101
+ length = ((v1->len + (XMM_BYTES / size - 1)) / (XMM_BYTES / size));
102
+ rv->len = v1->len;
103
+
104
+ switch(align)
105
+ {
106
+ case 0: /* Same size arrays */
107
+ for(i = 0; i < length; i++)
108
+ {
109
+ func((v1->data + XMM_BYTES * i), (v2->data + XMM_BYTES * i), (rv->data + XMM_BYTES * i));
110
+ }
111
+ break;
112
+ case 1: /* Operand is exactly 4 long (size of 1 sse register) */
113
+ for(i = 0; i < length; i++)
114
+ {
115
+ func((v1->data + XMM_BYTES * i), v2->data, (rv->data + XMM_BYTES * i));
116
+ }
117
+ break;
118
+ default: /* Self is a multiple of operand's length long */
119
+ for(j = 0; j < v2->len; j++)
120
+ {
121
+ data = v2->data + XMM_BYTES * j;
122
+ for(i = j; i < length; i+=v2->len)
123
+ {
124
+ func((v1->data + XMM_BYTES * i), data, (rv->data + XMM_BYTES * i));
125
+ }
126
+ }
127
+ }
128
+ internal_sanitize_unaligned_final_vector(rv, size);
129
+
130
+ return(result_obj);
131
+ }
132
+ #pragma GCC diagnostic pop
133
+
134
+ /* Internal: Make sure that no null bytes exist beyond the boundary of
135
+ * unaligned vectors. This function should be called after any operation that
136
+ * results in the mutation or creation of a vector array.
137
+ *
138
+ * Since arithmetic is purposefully performed against a void pointers, disable
139
+ * warnings regarding this for the current function. */
140
+ #pragma GCC diagnostic push
141
+ #pragma GCC diagnostic ignored "-Wpointer-arith"
142
+ void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size)
143
+ {
144
+ unsigned long i;
145
+
146
+ if((rv->len * size) % XMM_BYTES)
147
+ {
148
+ for(i = 1; i <= XMM_BYTES - ((rv->len * size) % XMM_BYTES); i+=size)
149
+ {
150
+ *(unsigned char *)(rv->data + (rv->len * size + i)) = 1;
151
+ }
152
+ }
153
+ }
154
+ #pragma GCC diagnostic pop
@@ -8,5 +8,7 @@ void deallocate(vector_t *vector);
8
8
 
9
9
  VALUE method_length(VALUE self);
10
10
 
11
- void *internal_allocate_vector_array(unsigned long count, size_t size);
11
+ void *internal_allocate_vector_array(unsigned long count);
12
12
  int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo);
13
+ VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func);
14
+ void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size);
@@ -22,7 +22,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
22
22
  {
23
23
  vector_t *vector;
24
24
  d2v_t *data;
25
- unsigned long n,m,i;
25
+ unsigned long n,i;
26
26
 
27
27
  Check_Type(rb_array, T_ARRAY);
28
28
  Data_Get_Struct(self, vector_t, vector);
@@ -34,7 +34,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
34
34
  rb_raise(rb_eArgError, "Vectors must be at least 4 long");
35
35
  }
36
36
 
37
- vector->data = internal_allocate_vector_array(vector->len, sizeof(d2v_t));
37
+ vector->data = internal_allocate_vector_array(vector->len);
38
38
 
39
39
  data = (d2v_t *)vector->data;
40
40
  for(i = 0; i < vector->len; i++)
@@ -42,12 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
42
42
  data[i/2].f[i%2] = NUM2DBL(rb_ary_entry(rb_array, i));
43
43
  }
44
44
 
45
- /* If the array is an odd number of elements, set the final element to 1 */
46
- m = n + (n % 2);
47
- for(i = n % 4; i > 0; i--)
48
- {
49
- data[m/2].f[1] = 1.0;
50
- }
45
+ internal_sanitize_unaligned_final_vector(vector, sizeof(double));
51
46
 
52
47
  return(self);
53
48
  }
@@ -56,28 +51,28 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
56
51
  * another FloatArray object, returning a new FloatArray. */
57
52
  static VALUE method_multiply(VALUE self, VALUE obj)
58
53
  {
59
- return(internal_apply_operation(self, obj, func_multiply));
54
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_multiply));
60
55
  }
61
56
 
62
57
  /* Public: Divide values contained in the data array by those contained in
63
58
  * another FloatArray object, returning a new FloatArray. */
64
59
  static VALUE method_divide(VALUE self, VALUE obj)
65
60
  {
66
- return(internal_apply_operation(self, obj, func_divide));
61
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_divide));
67
62
  }
68
63
 
69
64
  /* Public: add values contained in the data array with those contained in
70
65
  * another FloatArray object, returning a new FloatArray. */
71
66
  static VALUE method_add(VALUE self, VALUE obj)
72
67
  {
73
- return(internal_apply_operation(self, obj, func_add));
68
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_add));
74
69
  }
75
70
 
76
71
  /* Public: Subtract values contained in another FloatArray object from those
77
72
  * contained in the current data array object, returning a new FloatArray. */
78
73
  static VALUE method_subtract(VALUE self, VALUE obj)
79
74
  {
80
- return(internal_apply_operation(self, obj, func_subtract));
75
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_subtract));
81
76
  }
82
77
 
83
78
  /* Public: Return a Ruby Array containing the doubles within the data array. */
@@ -98,61 +93,6 @@ static VALUE method_to_a(VALUE self)
98
93
  return(rb_array);
99
94
  }
100
95
 
101
- /* Internal: Given another FloatArray object, perform an action specified via a
102
- * function pointer against both. */
103
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
104
- {
105
- unsigned long size, i;
106
- int align;
107
- vector_t *v1, *v2, *rv;
108
- d2v_t *d1, *d2, *r;
109
- VALUE result_obj = allocate(SIMD_FloatArray);
110
-
111
- Data_Get_Struct(self, vector_t, v1);
112
- Data_Get_Struct(obj, vector_t, v2);
113
- Data_Get_Struct(result_obj, vector_t, rv);
114
- rv->data = internal_allocate_vector_array(v1->len, sizeof(d2v_t));
115
-
116
- align = internal_align_vectors(v1->len, v2->len, 2);
117
-
118
- /* Ensure that size will be the result of ceil(len / 4.0) */
119
- size = (v1->len + 1) / 2;
120
-
121
- d1 = (d2v_t *)v1->data;
122
- d2 = (d2v_t *)v2->data;
123
- r = (d2v_t *)rv->data;
124
-
125
- rv->len = v1->len;
126
-
127
- switch(align)
128
- {
129
- case 0: /* Same size arrays */
130
- for(i = 0; i < size; i++)
131
- {
132
- func(&d1[i].v, &d2[i].v, &r[i].v);
133
- }
134
- break;
135
- case 1: /* Operand is exactly 2 long (size of 1 sse register) */
136
- for(i = 0; i < size; i++)
137
- {
138
- func(&d1[i].v, &d2[0].v, &r[i].v);
139
- }
140
- break;
141
- default: /* Self is a multiple of operand's length long */
142
- for(i = 0; i < size; i++)
143
- {
144
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
145
- }
146
- }
147
-
148
- if(rv->len != rv->len + (rv->len % 2))
149
- {
150
- r[size].f[1] = 1;
151
- }
152
-
153
- return(result_obj);
154
- }
155
-
156
96
  /* Function: Multiply two vectors. */
157
97
  static void func_multiply(void *v1, void *v2, void *r)
158
98
  {
@@ -8,8 +8,6 @@ static VALUE method_add(VALUE self, VALUE obj);
8
8
  static VALUE method_subtract(VALUE self, VALUE obj);
9
9
  static VALUE method_to_a(VALUE self);
10
10
 
11
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
12
-
13
11
  static void func_multiply(void *v1, void *v2, void *r);
14
12
  static void func_divide(void *v1, void *v2, void *r);
15
13
  static void func_add(void *v1, void *v2, void *r);
@@ -25,7 +25,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
25
25
  {
26
26
  vector_t *vector;
27
27
  i4v_t *data;
28
- unsigned long n,m,i;
28
+ unsigned long n,i;
29
29
 
30
30
  Check_Type(rb_array, T_ARRAY);
31
31
  Data_Get_Struct(self, vector_t, vector);
@@ -37,7 +37,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
37
37
  rb_raise(rb_eArgError, "Vectors must be at least 4 long");
38
38
  }
39
39
 
40
- vector->data = internal_allocate_vector_array(vector->len, sizeof(i4v_t));
40
+ vector->data = internal_allocate_vector_array(vector->len);
41
41
 
42
42
  data = (i4v_t *)vector->data;
43
43
  for(i = 0; i < vector->len; i++)
@@ -45,12 +45,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
45
45
  data[i/4].f[i%4] = NUM2INT(rb_ary_entry(rb_array, i));
46
46
  }
47
47
 
48
- /* If the array is an odd number of elements, set the final element to 1 */
49
- m = n + (n % 4);
50
- for(i = n % 4; i > 0; i--)
51
- {
52
- data[m/4].f[i] = 1.0;
53
- }
48
+ internal_sanitize_unaligned_final_vector(vector, sizeof(int));
54
49
 
55
50
  return(self);
56
51
  }
@@ -59,49 +54,49 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
59
54
  * another FloatArray object, returning a new FloatArray. */
60
55
  static VALUE method_multiply(VALUE self, VALUE obj)
61
56
  {
62
- return(internal_apply_operation(self, obj, func_multiply));
57
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_multiply));
63
58
  }
64
59
 
65
60
  /* Public: Divide values contained in the data array by those contained in
66
61
  * another FloatArray object, returning a new FloatArray. */
67
62
  static VALUE method_divide(VALUE self, VALUE obj)
68
63
  {
69
- return(internal_apply_operation(self, obj, func_divide));
64
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_divide));
70
65
  }
71
66
 
72
67
  /* Public: add values contained in the data array with those contained in
73
68
  * another FloatArray object, returning a new FloatArray. */
74
69
  static VALUE method_add(VALUE self, VALUE obj)
75
70
  {
76
- return(internal_apply_operation(self, obj, func_add));
71
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_add));
77
72
  }
78
73
 
79
74
  /* Public: and values contained in the data array with those contained in
80
75
  * another FloatArray object, returning a new FloatArray. */
81
76
  static VALUE method_and(VALUE self, VALUE obj)
82
77
  {
83
- return(internal_apply_operation(self, obj, func_and));
78
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_and));
84
79
  }
85
80
 
86
81
  /* Public: or values contained in the data array with those contained in
87
82
  * another FloatArray object, returning a new FloatArray. */
88
83
  static VALUE method_or(VALUE self, VALUE obj)
89
84
  {
90
- return(internal_apply_operation(self, obj, func_or));
85
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_or));
91
86
  }
92
87
 
93
88
  /* Public: xor values contained in the data array with those contained in
94
89
  * another FloatArray object, returning a new FloatArray. */
95
90
  static VALUE method_xor(VALUE self, VALUE obj)
96
91
  {
97
- return(internal_apply_operation(self, obj, func_xor));
92
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_xor));
98
93
  }
99
94
 
100
95
  /* Public: Subtract values contained in another FloatArray object from those
101
96
  * contained in the current data array object, returning a new FloatArray. */
102
97
  static VALUE method_subtract(VALUE self, VALUE obj)
103
98
  {
104
- return(internal_apply_operation(self, obj, func_subtract));
99
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_subtract));
105
100
  }
106
101
 
107
102
  /* Public: Return a Ruby Array containing the doubles within the data array. */
@@ -122,64 +117,6 @@ static VALUE method_to_a(VALUE self)
122
117
  return(rb_array);
123
118
  }
124
119
 
125
- /* Internal: Given another FloatArray object, perform an action specified via a
126
- * function pointer against both. */
127
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
128
- {
129
- unsigned long size, i;
130
- int align;
131
- vector_t *v1, *v2, *rv;
132
- i4v_t *d1, *d2, *r;
133
- VALUE result_obj = allocate(SIMD_IntArray);
134
-
135
- Data_Get_Struct(self, vector_t, v1);
136
- Data_Get_Struct(obj, vector_t, v2);
137
- Data_Get_Struct(result_obj, vector_t, rv);
138
- rv->data = internal_allocate_vector_array(v1->len, sizeof(i4v_t));
139
-
140
- align = internal_align_vectors(v1->len, v2->len, 4);
141
-
142
- /* Ensure that size will be the result of ceil(len / 4.0) */
143
- size = (v1->len + 3) / 4;
144
-
145
- d1 = (i4v_t *)v1->data;
146
- d2 = (i4v_t *)v2->data;
147
- r = (i4v_t *)rv->data;
148
-
149
- rv->len = v1->len;
150
-
151
- switch(align)
152
- {
153
- case 0: /* Same size arrays */
154
- for(i = 0; i < size; i++)
155
- {
156
- func(&d1[i].v, &d2[i].v, &r[i].v);
157
- }
158
- break;
159
- case 1: /* Operand is exactly 4 long (size of 1 sse register) */
160
- for(i = 0; i < size; i++)
161
- {
162
- func(&d1[i].v, &d2[0].v, &r[i].v);
163
- }
164
- break;
165
- default: /* Self is a multiple of operand's length long */
166
- for(i = 0; i < size; i++)
167
- {
168
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
169
- }
170
- }
171
-
172
- if(rv->len != rv->len + (rv->len % 4))
173
- {
174
- for(i = 3; i > rv->len + (rv->len % 4); i--)
175
- {
176
- r[size].f[i] = 1;
177
- }
178
- }
179
-
180
- return(result_obj);
181
- }
182
-
183
120
  /* Function: Multiply two vectors. */
184
121
  static void func_multiply(void *v1, void *v2, void *r)
185
122
  {
@@ -11,8 +11,6 @@ static VALUE method_xor(VALUE self, VALUE obj);
11
11
  static VALUE method_subtract(VALUE self, VALUE obj);
12
12
  static VALUE method_to_a(VALUE self);
13
13
 
14
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
15
-
16
14
  static void func_multiply(void *v1, void *v2, void *r);
17
15
  static void func_divide(void *v1, void *v2, void *r);
18
16
  static void func_add(void *v1, void *v2, void *r);
@@ -25,7 +25,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
25
25
  {
26
26
  vector_t *vector;
27
27
  l2v_t *data;
28
- unsigned long n,m,i;
28
+ unsigned long n,i;
29
29
 
30
30
  Check_Type(rb_array, T_ARRAY);
31
31
  Data_Get_Struct(self, vector_t, vector);
@@ -37,7 +37,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
37
37
  rb_raise(rb_eArgError, "Vectors must be at least 2 long");
38
38
  }
39
39
 
40
- vector->data = internal_allocate_vector_array(vector->len, sizeof(l2v_t));
40
+ vector->data = internal_allocate_vector_array(vector->len);
41
41
 
42
42
  data = (l2v_t *)vector->data;
43
43
  for(i = 0; i < vector->len; i++)
@@ -45,12 +45,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
45
45
  data[i/2].f[i%2] = NUM2LONG(rb_ary_entry(rb_array, i));
46
46
  }
47
47
 
48
- /* If the array is an odd number of elements, set the final element to 1 */
49
- m = n + (n % 2);
50
- for(i = n % 2; i > 0; i--)
51
- {
52
- data[m/2].f[i] = 1;
53
- }
48
+ internal_sanitize_unaligned_final_vector(vector, sizeof(long));
54
49
 
55
50
  return(self);
56
51
  }
@@ -59,49 +54,49 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
59
54
  * another FloatArray object, returning a new FloatArray. */
60
55
  static VALUE method_multiply(VALUE self, VALUE obj)
61
56
  {
62
- return(internal_apply_operation(self, obj, func_multiply));
57
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_multiply));
63
58
  }
64
59
 
65
60
  /* Public: Divide values contained in the data array by those contained in
66
61
  * another FloatArray object, returning a new FloatArray. */
67
62
  static VALUE method_divide(VALUE self, VALUE obj)
68
63
  {
69
- return(internal_apply_operation(self, obj, func_divide));
64
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_divide));
70
65
  }
71
66
 
72
67
  /* Public: add values contained in the data array with those contained in
73
68
  * another FloatArray object, returning a new FloatArray. */
74
69
  static VALUE method_add(VALUE self, VALUE obj)
75
70
  {
76
- return(internal_apply_operation(self, obj, func_add));
71
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_add));
77
72
  }
78
73
 
79
74
  /* Public: and values contained in the data array with those contained in
80
75
  * another FloatArray object, returning a new FloatArray. */
81
76
  static VALUE method_and(VALUE self, VALUE obj)
82
77
  {
83
- return(internal_apply_operation(self, obj, func_and));
78
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_and));
84
79
  }
85
80
 
86
81
  /* Public: or values contained in the data array with those contained in
87
82
  * another FloatArray object, returning a new FloatArray. */
88
83
  static VALUE method_or(VALUE self, VALUE obj)
89
84
  {
90
- return(internal_apply_operation(self, obj, func_or));
85
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_or));
91
86
  }
92
87
 
93
88
  /* Public: xor values contained in the data array with those contained in
94
89
  * another FloatArray object, returning a new FloatArray. */
95
90
  static VALUE method_xor(VALUE self, VALUE obj)
96
91
  {
97
- return(internal_apply_operation(self, obj, func_xor));
92
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_xor));
98
93
  }
99
94
 
100
95
  /* Public: Subtract values contained in another FloatArray object from those
101
96
  * contained in the current data array object, returning a new FloatArray. */
102
97
  static VALUE method_subtract(VALUE self, VALUE obj)
103
98
  {
104
- return(internal_apply_operation(self, obj, func_subtract));
99
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_subtract));
105
100
  }
106
101
 
107
102
  /* Public: Return a Ruby Array containing the doubles within the data array. */
@@ -122,61 +117,6 @@ static VALUE method_to_a(VALUE self)
122
117
  return(rb_array);
123
118
  }
124
119
 
125
- /* Internal: Given another FloatArray object, perform an action specified via a
126
- * function pointer against both. */
127
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
128
- {
129
- unsigned long size, i;
130
- int align;
131
- vector_t *v1, *v2, *rv;
132
- l2v_t *d1, *d2, *r;
133
- VALUE result_obj = allocate(SIMD_LongArray);
134
-
135
- Data_Get_Struct(self, vector_t, v1);
136
- Data_Get_Struct(obj, vector_t, v2);
137
- Data_Get_Struct(result_obj, vector_t, rv);
138
- rv->data = internal_allocate_vector_array(v1->len, sizeof(l2v_t));
139
-
140
- align = internal_align_vectors(v1->len, v2->len, 2);
141
-
142
- /* Ensure that size will be the result of ceil(len / 4.0) */
143
- size = (v1->len + 1) / 2;
144
-
145
- d1 = (l2v_t *)v1->data;
146
- d2 = (l2v_t *)v2->data;
147
- r = (l2v_t *)rv->data;
148
-
149
- rv->len = v1->len;
150
-
151
- switch(align)
152
- {
153
- case 0: /* Same size arrays */
154
- for(i = 0; i < size; i++)
155
- {
156
- func(&d1[i].v, &d2[i].v, &r[i].v);
157
- }
158
- break;
159
- case 1: /* Operand is exactly 4 long (size of 1 sse register) */
160
- for(i = 0; i < size; i++)
161
- {
162
- func(&d1[i].v, &d2[0].v, &r[i].v);
163
- }
164
- break;
165
- default: /* Self is a multiple of operand's length long */
166
- for(i = 0; i < size; i++)
167
- {
168
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
169
- }
170
- }
171
-
172
- if(rv->len != rv->len + (rv->len % 2))
173
- {
174
- r[size].f[1] = 1;
175
- }
176
-
177
- return(result_obj);
178
- }
179
-
180
120
  /* Function: Multiply two vectors. */
181
121
  static void func_multiply(void *v1, void *v2, void *r)
182
122
  {
@@ -11,8 +11,6 @@ static VALUE method_xor(VALUE self, VALUE obj);
11
11
  static VALUE method_subtract(VALUE self, VALUE obj);
12
12
  static VALUE method_to_a(VALUE self);
13
13
 
14
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
15
-
16
14
  static void func_multiply(void *v1, void *v2, void *r);
17
15
  static void func_divide(void *v1, void *v2, void *r);
18
16
  static void func_add(void *v1, void *v2, void *r);
@@ -22,7 +22,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
22
22
  {
23
23
  vector_t *vector;
24
24
  f4v_t *data;
25
- unsigned long n,m,i;
25
+ unsigned long n,i;
26
26
 
27
27
  Check_Type(rb_array, T_ARRAY);
28
28
  Data_Get_Struct(self, vector_t, vector);
@@ -34,7 +34,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
34
34
  rb_raise(rb_eArgError, "Vectors must be at least 4 long");
35
35
  }
36
36
 
37
- vector->data = internal_allocate_vector_array(vector->len, sizeof(f4v_t));
37
+ vector->data = internal_allocate_vector_array(vector->len);
38
38
 
39
39
  data = (f4v_t *)vector->data;
40
40
  for(i = 0; i < vector->len; i++)
@@ -42,12 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
42
42
  data[i/4].f[i%4] = (float)NUM2DBL(rb_ary_entry(rb_array, i));
43
43
  }
44
44
 
45
- /* If the array is an odd number of elements, set the final element to 1 */
46
- m = n + (n % 4);
47
- for(i = n % 4; i > 0; i--)
48
- {
49
- data[m/4].f[i] = 1.0;
50
- }
45
+ internal_sanitize_unaligned_final_vector(vector, sizeof(float));
51
46
 
52
47
  return(self);
53
48
  }
@@ -56,28 +51,28 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
56
51
  * another FloatArray object, returning a new FloatArray. */
57
52
  static VALUE method_multiply(VALUE self, VALUE obj)
58
53
  {
59
- return(internal_apply_operation(self, obj, func_multiply));
54
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_multiply));
60
55
  }
61
56
 
62
57
  /* Public: Divide values contained in the data array by those contained in
63
58
  * another FloatArray object, returning a new FloatArray. */
64
59
  static VALUE method_divide(VALUE self, VALUE obj)
65
60
  {
66
- return(internal_apply_operation(self, obj, func_divide));
61
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_divide));
67
62
  }
68
63
 
69
64
  /* Public: add values contained in the data array with those contained in
70
65
  * another FloatArray object, returning a new FloatArray. */
71
66
  static VALUE method_add(VALUE self, VALUE obj)
72
67
  {
73
- return(internal_apply_operation(self, obj, func_add));
68
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_add));
74
69
  }
75
70
 
76
71
  /* Public: Subtract values contained in another FloatArray object from those
77
72
  * contained in the current data array object, returning a new FloatArray. */
78
73
  static VALUE method_subtract(VALUE self, VALUE obj)
79
74
  {
80
- return(internal_apply_operation(self, obj, func_subtract));
75
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_subtract));
81
76
  }
82
77
 
83
78
  /* Public: Return a Ruby Array containing the doubles within the data array. */
@@ -98,64 +93,6 @@ static VALUE method_to_a(VALUE self)
98
93
  return(rb_array);
99
94
  }
100
95
 
101
- /* Internal: Given another FloatArray object, perform an action specified via a
102
- * function pointer against both. */
103
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
104
- {
105
- unsigned long size, i;
106
- int align;
107
- vector_t *v1, *v2, *rv;
108
- f4v_t *d1, *d2, *r;
109
- VALUE result_obj = allocate(SIMD_SmallFloatArray);
110
-
111
- Data_Get_Struct(self, vector_t, v1);
112
- Data_Get_Struct(obj, vector_t, v2);
113
- Data_Get_Struct(result_obj, vector_t, rv);
114
- rv->data = internal_allocate_vector_array(v1->len, sizeof(f4v_t));
115
-
116
- align = internal_align_vectors(v1->len, v2->len, 4);
117
-
118
- /* Ensure that size will be the result of ceil(len / 4.0) */
119
- size = (v1->len + 3) / 4;
120
-
121
- d1 = (f4v_t *)v1->data;
122
- d2 = (f4v_t *)v2->data;
123
- r = (f4v_t *)rv->data;
124
-
125
- rv->len = v1->len;
126
-
127
- switch(align)
128
- {
129
- case 0: /* Same size arrays */
130
- for(i = 0; i < size; i++)
131
- {
132
- func(&d1[i].v, &d2[i].v, &r[i].v);
133
- }
134
- break;
135
- case 1: /* Operand is exactly 4 long (size of 1 sse register) */
136
- for(i = 0; i < size; i++)
137
- {
138
- func(&d1[i].v, &d2[0].v, &r[i].v);
139
- }
140
- break;
141
- default: /* Self is a multiple of operand's length long */
142
- for(i = 0; i < size; i++)
143
- {
144
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
145
- }
146
- }
147
-
148
- if(rv->len != rv->len + (rv->len % 4))
149
- {
150
- for(i = 3; i > rv->len + (rv->len % 4); i--)
151
- {
152
- r[size].f[i] = 1;
153
- }
154
- }
155
-
156
- return(result_obj);
157
- }
158
-
159
96
  /* Function: Multiply two vectors. */
160
97
  static void func_multiply(void *v1, void *v2, void *r)
161
98
  {
@@ -8,8 +8,6 @@ static VALUE method_add(VALUE self, VALUE obj);
8
8
  static VALUE method_subtract(VALUE self, VALUE obj);
9
9
  static VALUE method_to_a(VALUE self);
10
10
 
11
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
12
-
13
11
  static void func_multiply(void *v1, void *v2, void *r);
14
12
  static void func_divide(void *v1, void *v2, void *r);
15
13
  static void func_add(void *v1, void *v2, void *r);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tina Wuest
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-12 00:00:00.000000000 Z
11
+ date: 2014-12-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -64,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
64
64
  version: '0'
65
65
  requirements: []
66
66
  rubyforge_project:
67
- rubygems_version: 2.4.4
67
+ rubygems_version: 2.2.2
68
68
  signing_key:
69
69
  specification_version: 4
70
70
  summary: SIMD instructions in ruby