simd 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c15d248c872e4369c45e4151faf81e9761ea1587
4
- data.tar.gz: cdfc5170454be15be1bc75ccc5eba08aa96c884d
3
+ metadata.gz: 91f6ffe0659b15461fbf3bfc7657bd72b8e0d6eb
4
+ data.tar.gz: 289421206f7343fed00e5fbcf214d0908e835882
5
5
  SHA512:
6
- metadata.gz: 38df7b30a113b325bba7be834fcbe24c44738706495579af5a9e4b508e4db4375e2f36b16a97612eab4dd8da379c450d062c6e0bceb6e8ddf308a952ecf6419c
7
- data.tar.gz: 342680d7f4f6690c8b3338de32ef192aaadb2952461e62e86a848650414b09d793d71e70c5b38909e0c882f062e346995f86f40d3dbf1de692cbe3eb0cfb0c73
6
+ metadata.gz: 1c2353151fa088f7ebcb535d095211e83517fd2436e2acee2b5d42e43f776d8df4931007e2a50c0f11026bed64c99be20d25f6e3598e52f10b30301e532420ea
7
+ data.tar.gz: 1e0ef0af7c81a176fc908163f6c7d68a09ae651a9d852fecfdf0185a3aed1d1732d4636670576964537f27c687f1c9765d214b75f8d15bcab8e89e1124bcf224
@@ -1,5 +1,7 @@
1
1
  #include "simd_common.h"
2
2
 
3
+ #define XMM_BYTES 16 /* Width of the xmm1,2... registers */
4
+
3
5
  /* Internal: Allocate memory for the vector container. */
4
6
  VALUE allocate(VALUE klass)
5
7
  {
@@ -36,10 +38,9 @@ VALUE method_length(VALUE self)
36
38
  }
37
39
 
38
40
  /* Internal: Allocate memory for the data array. */
39
- void *internal_allocate_vector_array(unsigned long count, size_t size)
41
+ void *internal_allocate_vector_array(unsigned long count)
40
42
  {
41
- unsigned int modulo = 16 / size;
42
- void *vector = malloc(((count + (count % modulo)) / modulo + 1) * size);
43
+ void *vector = malloc((count + 1) * XMM_BYTES);
43
44
  if(vector == NULL)
44
45
  {
45
46
  rb_raise(rb_eNoMemError, "Unable to allocate memory");
@@ -74,3 +75,80 @@ int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modu
74
75
  /* Never reached */
75
76
  return(-1);
76
77
  }
78
+
79
+ /* Internal: Given another object, perform an action specified via a function
80
+ * pointer against both.
81
+ *
82
+ * Since arithmetic is purposefully performed against a void pointers, disable
83
+ * warnings regarding this for the current function. */
84
+ #pragma GCC diagnostic push
85
+ #pragma GCC diagnostic ignored "-Wpointer-arith"
86
+ VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func)
87
+ {
88
+ unsigned long length, i, j;
89
+ int align;
90
+ vector_t *v1, *v2, *rv;
91
+ void *data;
92
+ VALUE result_obj = allocate(klass);
93
+
94
+ Data_Get_Struct(self, vector_t, v1);
95
+ Data_Get_Struct(obj, vector_t, v2);
96
+ Data_Get_Struct(result_obj, vector_t, rv);
97
+ rv->data = internal_allocate_vector_array(v1->len);
98
+
99
+ align = internal_align_vectors(v1->len, v2->len, (XMM_BYTES / size));
100
+
101
+ length = ((v1->len + (XMM_BYTES / size - 1)) / (XMM_BYTES / size));
102
+ rv->len = v1->len;
103
+
104
+ switch(align)
105
+ {
106
+ case 0: /* Same size arrays */
107
+ for(i = 0; i < length; i++)
108
+ {
109
+ func((v1->data + XMM_BYTES * i), (v2->data + XMM_BYTES * i), (rv->data + XMM_BYTES * i));
110
+ }
111
+ break;
112
+ case 1: /* Operand is exactly 4 long (size of 1 sse register) */
113
+ for(i = 0; i < length; i++)
114
+ {
115
+ func((v1->data + XMM_BYTES * i), v2->data, (rv->data + XMM_BYTES * i));
116
+ }
117
+ break;
118
+ default: /* Self is a multiple of operand's length long */
119
+ for(j = 0; j < v2->len; j++)
120
+ {
121
+ data = v2->data + XMM_BYTES * j;
122
+ for(i = j; i < length; i+=v2->len)
123
+ {
124
+ func((v1->data + XMM_BYTES * i), data, (rv->data + XMM_BYTES * i));
125
+ }
126
+ }
127
+ }
128
+ internal_sanitize_unaligned_final_vector(rv, size);
129
+
130
+ return(result_obj);
131
+ }
132
+ #pragma GCC diagnostic pop
133
+
134
+ /* Internal: Make sure that no null bytes exist beyond the boundary of
135
+ * unaligned vectors. This function should be called after any operation that
136
+ * results in the mutation or creation of a vector array.
137
+ *
138
+ * Since arithmetic is purposefully performed against a void pointers, disable
139
+ * warnings regarding this for the current function. */
140
+ #pragma GCC diagnostic push
141
+ #pragma GCC diagnostic ignored "-Wpointer-arith"
142
+ void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size)
143
+ {
144
+ unsigned long i;
145
+
146
+ if((rv->len * size) % XMM_BYTES)
147
+ {
148
+ for(i = 1; i <= XMM_BYTES - ((rv->len * size) % XMM_BYTES); i+=size)
149
+ {
150
+ *(unsigned char *)(rv->data + (rv->len * size + i)) = 1;
151
+ }
152
+ }
153
+ }
154
+ #pragma GCC diagnostic pop
@@ -8,5 +8,7 @@ void deallocate(vector_t *vector);
8
8
 
9
9
  VALUE method_length(VALUE self);
10
10
 
11
- void *internal_allocate_vector_array(unsigned long count, size_t size);
11
+ void *internal_allocate_vector_array(unsigned long count);
12
12
  int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo);
13
+ VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func);
14
+ void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size);
@@ -22,7 +22,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
22
22
  {
23
23
  vector_t *vector;
24
24
  d2v_t *data;
25
- unsigned long n,m,i;
25
+ unsigned long n,i;
26
26
 
27
27
  Check_Type(rb_array, T_ARRAY);
28
28
  Data_Get_Struct(self, vector_t, vector);
@@ -34,7 +34,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
34
34
  rb_raise(rb_eArgError, "Vectors must be at least 4 long");
35
35
  }
36
36
 
37
- vector->data = internal_allocate_vector_array(vector->len, sizeof(d2v_t));
37
+ vector->data = internal_allocate_vector_array(vector->len);
38
38
 
39
39
  data = (d2v_t *)vector->data;
40
40
  for(i = 0; i < vector->len; i++)
@@ -42,12 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
42
42
  data[i/2].f[i%2] = NUM2DBL(rb_ary_entry(rb_array, i));
43
43
  }
44
44
 
45
- /* If the array is an odd number of elements, set the final element to 1 */
46
- m = n + (n % 2);
47
- for(i = n % 4; i > 0; i--)
48
- {
49
- data[m/2].f[1] = 1.0;
50
- }
45
+ internal_sanitize_unaligned_final_vector(vector, sizeof(double));
51
46
 
52
47
  return(self);
53
48
  }
@@ -56,28 +51,28 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
56
51
  * another FloatArray object, returning a new FloatArray. */
57
52
  static VALUE method_multiply(VALUE self, VALUE obj)
58
53
  {
59
- return(internal_apply_operation(self, obj, func_multiply));
54
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_multiply));
60
55
  }
61
56
 
62
57
  /* Public: Divide values contained in the data array by those contained in
63
58
  * another FloatArray object, returning a new FloatArray. */
64
59
  static VALUE method_divide(VALUE self, VALUE obj)
65
60
  {
66
- return(internal_apply_operation(self, obj, func_divide));
61
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_divide));
67
62
  }
68
63
 
69
64
  /* Public: add values contained in the data array with those contained in
70
65
  * another FloatArray object, returning a new FloatArray. */
71
66
  static VALUE method_add(VALUE self, VALUE obj)
72
67
  {
73
- return(internal_apply_operation(self, obj, func_add));
68
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_add));
74
69
  }
75
70
 
76
71
  /* Public: Subtract values contained in another FloatArray object from those
77
72
  * contained in the current data array object, returning a new FloatArray. */
78
73
  static VALUE method_subtract(VALUE self, VALUE obj)
79
74
  {
80
- return(internal_apply_operation(self, obj, func_subtract));
75
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_subtract));
81
76
  }
82
77
 
83
78
  /* Public: Return a Ruby Array containing the doubles within the data array. */
@@ -98,61 +93,6 @@ static VALUE method_to_a(VALUE self)
98
93
  return(rb_array);
99
94
  }
100
95
 
101
- /* Internal: Given another FloatArray object, perform an action specified via a
102
- * function pointer against both. */
103
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
104
- {
105
- unsigned long size, i;
106
- int align;
107
- vector_t *v1, *v2, *rv;
108
- d2v_t *d1, *d2, *r;
109
- VALUE result_obj = allocate(SIMD_FloatArray);
110
-
111
- Data_Get_Struct(self, vector_t, v1);
112
- Data_Get_Struct(obj, vector_t, v2);
113
- Data_Get_Struct(result_obj, vector_t, rv);
114
- rv->data = internal_allocate_vector_array(v1->len, sizeof(d2v_t));
115
-
116
- align = internal_align_vectors(v1->len, v2->len, 2);
117
-
118
- /* Ensure that size will be the result of ceil(len / 4.0) */
119
- size = (v1->len + 1) / 2;
120
-
121
- d1 = (d2v_t *)v1->data;
122
- d2 = (d2v_t *)v2->data;
123
- r = (d2v_t *)rv->data;
124
-
125
- rv->len = v1->len;
126
-
127
- switch(align)
128
- {
129
- case 0: /* Same size arrays */
130
- for(i = 0; i < size; i++)
131
- {
132
- func(&d1[i].v, &d2[i].v, &r[i].v);
133
- }
134
- break;
135
- case 1: /* Operand is exactly 2 long (size of 1 sse register) */
136
- for(i = 0; i < size; i++)
137
- {
138
- func(&d1[i].v, &d2[0].v, &r[i].v);
139
- }
140
- break;
141
- default: /* Self is a multiple of operand's length long */
142
- for(i = 0; i < size; i++)
143
- {
144
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
145
- }
146
- }
147
-
148
- if(rv->len != rv->len + (rv->len % 2))
149
- {
150
- r[size].f[1] = 1;
151
- }
152
-
153
- return(result_obj);
154
- }
155
-
156
96
  /* Function: Multiply two vectors. */
157
97
  static void func_multiply(void *v1, void *v2, void *r)
158
98
  {
@@ -8,8 +8,6 @@ static VALUE method_add(VALUE self, VALUE obj);
8
8
  static VALUE method_subtract(VALUE self, VALUE obj);
9
9
  static VALUE method_to_a(VALUE self);
10
10
 
11
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
12
-
13
11
  static void func_multiply(void *v1, void *v2, void *r);
14
12
  static void func_divide(void *v1, void *v2, void *r);
15
13
  static void func_add(void *v1, void *v2, void *r);
@@ -25,7 +25,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
25
25
  {
26
26
  vector_t *vector;
27
27
  i4v_t *data;
28
- unsigned long n,m,i;
28
+ unsigned long n,i;
29
29
 
30
30
  Check_Type(rb_array, T_ARRAY);
31
31
  Data_Get_Struct(self, vector_t, vector);
@@ -37,7 +37,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
37
37
  rb_raise(rb_eArgError, "Vectors must be at least 4 long");
38
38
  }
39
39
 
40
- vector->data = internal_allocate_vector_array(vector->len, sizeof(i4v_t));
40
+ vector->data = internal_allocate_vector_array(vector->len);
41
41
 
42
42
  data = (i4v_t *)vector->data;
43
43
  for(i = 0; i < vector->len; i++)
@@ -45,12 +45,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
45
45
  data[i/4].f[i%4] = NUM2INT(rb_ary_entry(rb_array, i));
46
46
  }
47
47
 
48
- /* If the array is an odd number of elements, set the final element to 1 */
49
- m = n + (n % 4);
50
- for(i = n % 4; i > 0; i--)
51
- {
52
- data[m/4].f[i] = 1.0;
53
- }
48
+ internal_sanitize_unaligned_final_vector(vector, sizeof(int));
54
49
 
55
50
  return(self);
56
51
  }
@@ -59,49 +54,49 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
59
54
  * another FloatArray object, returning a new FloatArray. */
60
55
  static VALUE method_multiply(VALUE self, VALUE obj)
61
56
  {
62
- return(internal_apply_operation(self, obj, func_multiply));
57
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_multiply));
63
58
  }
64
59
 
65
60
  /* Public: Divide values contained in the data array by those contained in
66
61
  * another FloatArray object, returning a new FloatArray. */
67
62
  static VALUE method_divide(VALUE self, VALUE obj)
68
63
  {
69
- return(internal_apply_operation(self, obj, func_divide));
64
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_divide));
70
65
  }
71
66
 
72
67
  /* Public: add values contained in the data array with those contained in
73
68
  * another FloatArray object, returning a new FloatArray. */
74
69
  static VALUE method_add(VALUE self, VALUE obj)
75
70
  {
76
- return(internal_apply_operation(self, obj, func_add));
71
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_add));
77
72
  }
78
73
 
79
74
  /* Public: and values contained in the data array with those contained in
80
75
  * another FloatArray object, returning a new FloatArray. */
81
76
  static VALUE method_and(VALUE self, VALUE obj)
82
77
  {
83
- return(internal_apply_operation(self, obj, func_and));
78
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_and));
84
79
  }
85
80
 
86
81
  /* Public: or values contained in the data array with those contained in
87
82
  * another FloatArray object, returning a new FloatArray. */
88
83
  static VALUE method_or(VALUE self, VALUE obj)
89
84
  {
90
- return(internal_apply_operation(self, obj, func_or));
85
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_or));
91
86
  }
92
87
 
93
88
  /* Public: xor values contained in the data array with those contained in
94
89
  * another FloatArray object, returning a new FloatArray. */
95
90
  static VALUE method_xor(VALUE self, VALUE obj)
96
91
  {
97
- return(internal_apply_operation(self, obj, func_xor));
92
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_xor));
98
93
  }
99
94
 
100
95
  /* Public: Subtract values contained in another FloatArray object from those
101
96
  * contained in the current data array object, returning a new FloatArray. */
102
97
  static VALUE method_subtract(VALUE self, VALUE obj)
103
98
  {
104
- return(internal_apply_operation(self, obj, func_subtract));
99
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_subtract));
105
100
  }
106
101
 
107
102
  /* Public: Return a Ruby Array containing the doubles within the data array. */
@@ -122,64 +117,6 @@ static VALUE method_to_a(VALUE self)
122
117
  return(rb_array);
123
118
  }
124
119
 
125
- /* Internal: Given another FloatArray object, perform an action specified via a
126
- * function pointer against both. */
127
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
128
- {
129
- unsigned long size, i;
130
- int align;
131
- vector_t *v1, *v2, *rv;
132
- i4v_t *d1, *d2, *r;
133
- VALUE result_obj = allocate(SIMD_IntArray);
134
-
135
- Data_Get_Struct(self, vector_t, v1);
136
- Data_Get_Struct(obj, vector_t, v2);
137
- Data_Get_Struct(result_obj, vector_t, rv);
138
- rv->data = internal_allocate_vector_array(v1->len, sizeof(i4v_t));
139
-
140
- align = internal_align_vectors(v1->len, v2->len, 4);
141
-
142
- /* Ensure that size will be the result of ceil(len / 4.0) */
143
- size = (v1->len + 3) / 4;
144
-
145
- d1 = (i4v_t *)v1->data;
146
- d2 = (i4v_t *)v2->data;
147
- r = (i4v_t *)rv->data;
148
-
149
- rv->len = v1->len;
150
-
151
- switch(align)
152
- {
153
- case 0: /* Same size arrays */
154
- for(i = 0; i < size; i++)
155
- {
156
- func(&d1[i].v, &d2[i].v, &r[i].v);
157
- }
158
- break;
159
- case 1: /* Operand is exactly 4 long (size of 1 sse register) */
160
- for(i = 0; i < size; i++)
161
- {
162
- func(&d1[i].v, &d2[0].v, &r[i].v);
163
- }
164
- break;
165
- default: /* Self is a multiple of operand's length long */
166
- for(i = 0; i < size; i++)
167
- {
168
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
169
- }
170
- }
171
-
172
- if(rv->len != rv->len + (rv->len % 4))
173
- {
174
- for(i = 3; i > rv->len + (rv->len % 4); i--)
175
- {
176
- r[size].f[i] = 1;
177
- }
178
- }
179
-
180
- return(result_obj);
181
- }
182
-
183
120
  /* Function: Multiply two vectors. */
184
121
  static void func_multiply(void *v1, void *v2, void *r)
185
122
  {
@@ -11,8 +11,6 @@ static VALUE method_xor(VALUE self, VALUE obj);
11
11
  static VALUE method_subtract(VALUE self, VALUE obj);
12
12
  static VALUE method_to_a(VALUE self);
13
13
 
14
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
15
-
16
14
  static void func_multiply(void *v1, void *v2, void *r);
17
15
  static void func_divide(void *v1, void *v2, void *r);
18
16
  static void func_add(void *v1, void *v2, void *r);
@@ -25,7 +25,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
25
25
  {
26
26
  vector_t *vector;
27
27
  l2v_t *data;
28
- unsigned long n,m,i;
28
+ unsigned long n,i;
29
29
 
30
30
  Check_Type(rb_array, T_ARRAY);
31
31
  Data_Get_Struct(self, vector_t, vector);
@@ -37,7 +37,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
37
37
  rb_raise(rb_eArgError, "Vectors must be at least 2 long");
38
38
  }
39
39
 
40
- vector->data = internal_allocate_vector_array(vector->len, sizeof(l2v_t));
40
+ vector->data = internal_allocate_vector_array(vector->len);
41
41
 
42
42
  data = (l2v_t *)vector->data;
43
43
  for(i = 0; i < vector->len; i++)
@@ -45,12 +45,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
45
45
  data[i/2].f[i%2] = NUM2LONG(rb_ary_entry(rb_array, i));
46
46
  }
47
47
 
48
- /* If the array is an odd number of elements, set the final element to 1 */
49
- m = n + (n % 2);
50
- for(i = n % 2; i > 0; i--)
51
- {
52
- data[m/2].f[i] = 1;
53
- }
48
+ internal_sanitize_unaligned_final_vector(vector, sizeof(long));
54
49
 
55
50
  return(self);
56
51
  }
@@ -59,49 +54,49 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
59
54
  * another FloatArray object, returning a new FloatArray. */
60
55
  static VALUE method_multiply(VALUE self, VALUE obj)
61
56
  {
62
- return(internal_apply_operation(self, obj, func_multiply));
57
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_multiply));
63
58
  }
64
59
 
65
60
  /* Public: Divide values contained in the data array by those contained in
66
61
  * another FloatArray object, returning a new FloatArray. */
67
62
  static VALUE method_divide(VALUE self, VALUE obj)
68
63
  {
69
- return(internal_apply_operation(self, obj, func_divide));
64
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_divide));
70
65
  }
71
66
 
72
67
  /* Public: add values contained in the data array with those contained in
73
68
  * another FloatArray object, returning a new FloatArray. */
74
69
  static VALUE method_add(VALUE self, VALUE obj)
75
70
  {
76
- return(internal_apply_operation(self, obj, func_add));
71
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_add));
77
72
  }
78
73
 
79
74
  /* Public: and values contained in the data array with those contained in
80
75
  * another FloatArray object, returning a new FloatArray. */
81
76
  static VALUE method_and(VALUE self, VALUE obj)
82
77
  {
83
- return(internal_apply_operation(self, obj, func_and));
78
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_and));
84
79
  }
85
80
 
86
81
  /* Public: or values contained in the data array with those contained in
87
82
  * another FloatArray object, returning a new FloatArray. */
88
83
  static VALUE method_or(VALUE self, VALUE obj)
89
84
  {
90
- return(internal_apply_operation(self, obj, func_or));
85
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_or));
91
86
  }
92
87
 
93
88
  /* Public: xor values contained in the data array with those contained in
94
89
  * another FloatArray object, returning a new FloatArray. */
95
90
  static VALUE method_xor(VALUE self, VALUE obj)
96
91
  {
97
- return(internal_apply_operation(self, obj, func_xor));
92
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_xor));
98
93
  }
99
94
 
100
95
  /* Public: Subtract values contained in another FloatArray object from those
101
96
  * contained in the current data array object, returning a new FloatArray. */
102
97
  static VALUE method_subtract(VALUE self, VALUE obj)
103
98
  {
104
- return(internal_apply_operation(self, obj, func_subtract));
99
+ return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_subtract));
105
100
  }
106
101
 
107
102
  /* Public: Return a Ruby Array containing the doubles within the data array. */
@@ -122,61 +117,6 @@ static VALUE method_to_a(VALUE self)
122
117
  return(rb_array);
123
118
  }
124
119
 
125
- /* Internal: Given another FloatArray object, perform an action specified via a
126
- * function pointer against both. */
127
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
128
- {
129
- unsigned long size, i;
130
- int align;
131
- vector_t *v1, *v2, *rv;
132
- l2v_t *d1, *d2, *r;
133
- VALUE result_obj = allocate(SIMD_LongArray);
134
-
135
- Data_Get_Struct(self, vector_t, v1);
136
- Data_Get_Struct(obj, vector_t, v2);
137
- Data_Get_Struct(result_obj, vector_t, rv);
138
- rv->data = internal_allocate_vector_array(v1->len, sizeof(l2v_t));
139
-
140
- align = internal_align_vectors(v1->len, v2->len, 2);
141
-
142
- /* Ensure that size will be the result of ceil(len / 4.0) */
143
- size = (v1->len + 1) / 2;
144
-
145
- d1 = (l2v_t *)v1->data;
146
- d2 = (l2v_t *)v2->data;
147
- r = (l2v_t *)rv->data;
148
-
149
- rv->len = v1->len;
150
-
151
- switch(align)
152
- {
153
- case 0: /* Same size arrays */
154
- for(i = 0; i < size; i++)
155
- {
156
- func(&d1[i].v, &d2[i].v, &r[i].v);
157
- }
158
- break;
159
- case 1: /* Operand is exactly 4 long (size of 1 sse register) */
160
- for(i = 0; i < size; i++)
161
- {
162
- func(&d1[i].v, &d2[0].v, &r[i].v);
163
- }
164
- break;
165
- default: /* Self is a multiple of operand's length long */
166
- for(i = 0; i < size; i++)
167
- {
168
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
169
- }
170
- }
171
-
172
- if(rv->len != rv->len + (rv->len % 2))
173
- {
174
- r[size].f[1] = 1;
175
- }
176
-
177
- return(result_obj);
178
- }
179
-
180
120
  /* Function: Multiply two vectors. */
181
121
  static void func_multiply(void *v1, void *v2, void *r)
182
122
  {
@@ -11,8 +11,6 @@ static VALUE method_xor(VALUE self, VALUE obj);
11
11
  static VALUE method_subtract(VALUE self, VALUE obj);
12
12
  static VALUE method_to_a(VALUE self);
13
13
 
14
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
15
-
16
14
  static void func_multiply(void *v1, void *v2, void *r);
17
15
  static void func_divide(void *v1, void *v2, void *r);
18
16
  static void func_add(void *v1, void *v2, void *r);
@@ -22,7 +22,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
22
22
  {
23
23
  vector_t *vector;
24
24
  f4v_t *data;
25
- unsigned long n,m,i;
25
+ unsigned long n,i;
26
26
 
27
27
  Check_Type(rb_array, T_ARRAY);
28
28
  Data_Get_Struct(self, vector_t, vector);
@@ -34,7 +34,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
34
34
  rb_raise(rb_eArgError, "Vectors must be at least 4 long");
35
35
  }
36
36
 
37
- vector->data = internal_allocate_vector_array(vector->len, sizeof(f4v_t));
37
+ vector->data = internal_allocate_vector_array(vector->len);
38
38
 
39
39
  data = (f4v_t *)vector->data;
40
40
  for(i = 0; i < vector->len; i++)
@@ -42,12 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
42
42
  data[i/4].f[i%4] = (float)NUM2DBL(rb_ary_entry(rb_array, i));
43
43
  }
44
44
 
45
- /* If the array is an odd number of elements, set the final element to 1 */
46
- m = n + (n % 4);
47
- for(i = n % 4; i > 0; i--)
48
- {
49
- data[m/4].f[i] = 1.0;
50
- }
45
+ internal_sanitize_unaligned_final_vector(vector, sizeof(float));
51
46
 
52
47
  return(self);
53
48
  }
@@ -56,28 +51,28 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
56
51
  * another FloatArray object, returning a new FloatArray. */
57
52
  static VALUE method_multiply(VALUE self, VALUE obj)
58
53
  {
59
- return(internal_apply_operation(self, obj, func_multiply));
54
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_multiply));
60
55
  }
61
56
 
62
57
  /* Public: Divide values contained in the data array by those contained in
63
58
  * another FloatArray object, returning a new FloatArray. */
64
59
  static VALUE method_divide(VALUE self, VALUE obj)
65
60
  {
66
- return(internal_apply_operation(self, obj, func_divide));
61
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_divide));
67
62
  }
68
63
 
69
64
  /* Public: add values contained in the data array with those contained in
70
65
  * another FloatArray object, returning a new FloatArray. */
71
66
  static VALUE method_add(VALUE self, VALUE obj)
72
67
  {
73
- return(internal_apply_operation(self, obj, func_add));
68
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_add));
74
69
  }
75
70
 
76
71
  /* Public: Subtract values contained in another FloatArray object from those
77
72
  * contained in the current data array object, returning a new FloatArray. */
78
73
  static VALUE method_subtract(VALUE self, VALUE obj)
79
74
  {
80
- return(internal_apply_operation(self, obj, func_subtract));
75
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_subtract));
81
76
  }
82
77
 
83
78
  /* Public: Return a Ruby Array containing the doubles within the data array. */
@@ -98,64 +93,6 @@ static VALUE method_to_a(VALUE self)
98
93
  return(rb_array);
99
94
  }
100
95
 
101
- /* Internal: Given another FloatArray object, perform an action specified via a
102
- * function pointer against both. */
103
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
104
- {
105
- unsigned long size, i;
106
- int align;
107
- vector_t *v1, *v2, *rv;
108
- f4v_t *d1, *d2, *r;
109
- VALUE result_obj = allocate(SIMD_SmallFloatArray);
110
-
111
- Data_Get_Struct(self, vector_t, v1);
112
- Data_Get_Struct(obj, vector_t, v2);
113
- Data_Get_Struct(result_obj, vector_t, rv);
114
- rv->data = internal_allocate_vector_array(v1->len, sizeof(f4v_t));
115
-
116
- align = internal_align_vectors(v1->len, v2->len, 4);
117
-
118
- /* Ensure that size will be the result of ceil(len / 4.0) */
119
- size = (v1->len + 3) / 4;
120
-
121
- d1 = (f4v_t *)v1->data;
122
- d2 = (f4v_t *)v2->data;
123
- r = (f4v_t *)rv->data;
124
-
125
- rv->len = v1->len;
126
-
127
- switch(align)
128
- {
129
- case 0: /* Same size arrays */
130
- for(i = 0; i < size; i++)
131
- {
132
- func(&d1[i].v, &d2[i].v, &r[i].v);
133
- }
134
- break;
135
- case 1: /* Operand is exactly 4 long (size of 1 sse register) */
136
- for(i = 0; i < size; i++)
137
- {
138
- func(&d1[i].v, &d2[0].v, &r[i].v);
139
- }
140
- break;
141
- default: /* Self is a multiple of operand's length long */
142
- for(i = 0; i < size; i++)
143
- {
144
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
145
- }
146
- }
147
-
148
- if(rv->len != rv->len + (rv->len % 4))
149
- {
150
- for(i = 3; i > rv->len + (rv->len % 4); i--)
151
- {
152
- r[size].f[i] = 1;
153
- }
154
- }
155
-
156
- return(result_obj);
157
- }
158
-
159
96
  /* Function: Multiply two vectors. */
160
97
  static void func_multiply(void *v1, void *v2, void *r)
161
98
  {
@@ -8,8 +8,6 @@ static VALUE method_add(VALUE self, VALUE obj);
8
8
  static VALUE method_subtract(VALUE self, VALUE obj);
9
9
  static VALUE method_to_a(VALUE self);
10
10
 
11
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
12
-
13
11
  static void func_multiply(void *v1, void *v2, void *r);
14
12
  static void func_divide(void *v1, void *v2, void *r);
15
13
  static void func_add(void *v1, void *v2, void *r);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tina Wuest
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-12 00:00:00.000000000 Z
11
+ date: 2014-12-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -64,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
64
64
  version: '0'
65
65
  requirements: []
66
66
  rubyforge_project:
67
- rubygems_version: 2.4.4
67
+ rubygems_version: 2.2.2
68
68
  signing_key:
69
69
  specification_version: 4
70
70
  summary: SIMD instructions in ruby