RubyGems - simd - Versions diffs - 0.4.0 → 0.5.0 - Mend

simd 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/ext/simd/simd_common.c +81 -3
data/ext/simd/simd_common.h +3 -1
data/ext/simd/simd_floatarray.c +7 -67
data/ext/simd/simd_floatarray.h +0 -2
data/ext/simd/simd_intarray.c +10 -73
data/ext/simd/simd_intarray.h +0 -2
data/ext/simd/simd_longarray.c +10 -70
data/ext/simd/simd_longarray.h +0 -2
data/ext/simd/simd_smallfloatarray.c +7 -70
data/ext/simd/simd_smallfloatarray.h +0 -2
metadata +3 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: c15d248c872e4369c45e4151faf81e9761ea1587
-  data.tar.gz: cdfc5170454be15be1bc75ccc5eba08aa96c884d
+  metadata.gz: 91f6ffe0659b15461fbf3bfc7657bd72b8e0d6eb
+  data.tar.gz: 289421206f7343fed00e5fbcf214d0908e835882
 SHA512:
-  metadata.gz: 38df7b30a113b325bba7be834fcbe24c44738706495579af5a9e4b508e4db4375e2f36b16a97612eab4dd8da379c450d062c6e0bceb6e8ddf308a952ecf6419c
-  data.tar.gz: 342680d7f4f6690c8b3338de32ef192aaadb2952461e62e86a848650414b09d793d71e70c5b38909e0c882f062e346995f86f40d3dbf1de692cbe3eb0cfb0c73
+  metadata.gz: 1c2353151fa088f7ebcb535d095211e83517fd2436e2acee2b5d42e43f776d8df4931007e2a50c0f11026bed64c99be20d25f6e3598e52f10b30301e532420ea
+  data.tar.gz: 1e0ef0af7c81a176fc908163f6c7d68a09ae651a9d852fecfdf0185a3aed1d1732d4636670576964537f27c687f1c9765d214b75f8d15bcab8e89e1124bcf224

data/ext/simd/simd_common.c CHANGED

@@ -1,5 +1,7 @@
 #include "simd_common.h"
+#define XMM_BYTES 16 /* Width of the xmm1,2... registers */
 /* Internal: Allocate memory for the vector container. */
 VALUE allocate(VALUE klass)
 {
@@ -36,10 +38,9 @@ VALUE method_length(VALUE self)
 }
 /* Internal: Allocate memory for the data array. */
-void *internal_allocate_vector_array(unsigned long count, size_t size)
+void *internal_allocate_vector_array(unsigned long count)
 {
-	unsigned int modulo = 16 / size;
-	void *vector = malloc(((count + (count % modulo)) / modulo + 1) * size);
+	void *vector = malloc((count + 1) * XMM_BYTES);
 	if(vector == NULL)
 	{
 		rb_raise(rb_eNoMemError, "Unable to allocate memory");
@@ -74,3 +75,80 @@ int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modu
 	/* Never reached */
 	return(-1);
 }
+/* Internal: Given another object, perform an action specified via a function
+ * pointer against both.
+ *
+ * Since arithmetic is purposefully performed against a void pointers, disable
+ * warnings regarding this for the current function. */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpointer-arith"
+VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func)
+{
+	unsigned long length, i, j;
+	int align;
+	vector_t *v1, *v2, *rv;
+	void *data;
+	VALUE result_obj = allocate(klass);
+	Data_Get_Struct(self, vector_t, v1);
+	Data_Get_Struct(obj, vector_t, v2);
+	Data_Get_Struct(result_obj, vector_t, rv);
+	rv->data = internal_allocate_vector_array(v1->len);
+	align = internal_align_vectors(v1->len, v2->len, (XMM_BYTES / size));
+	length = ((v1->len + (XMM_BYTES / size - 1)) / (XMM_BYTES / size));
+	rv->len = v1->len;
+	switch(align)
+	{
+		case 0: /* Same size arrays */
+			for(i = 0; i < length; i++)
+			{
+				func((v1->data + XMM_BYTES * i), (v2->data + XMM_BYTES * i), (rv->data + XMM_BYTES * i));
+			}
+			break;
+		case 1: /* Operand is exactly 4 long (size of 1 sse register) */
+			for(i = 0; i < length; i++)
+			{
+				func((v1->data + XMM_BYTES * i), v2->data, (rv->data + XMM_BYTES * i));
+			}
+			break;
+		default: /* Self is a multiple of operand's length long */
+			for(j = 0; j < v2->len; j++)
+			{
+				data = v2->data + XMM_BYTES * j;
+				for(i = j; i < length; i+=v2->len)
+				{
+					func((v1->data + XMM_BYTES * i), data, (rv->data + XMM_BYTES * i));
+				}
+			}
+	}
+	internal_sanitize_unaligned_final_vector(rv, size);
+	return(result_obj);
+}
+#pragma GCC diagnostic pop
+/* Internal: Make sure that no null bytes exist beyond the boundary of
+ * unaligned vectors.  This function should be called after any operation that
+ * results in the mutation or creation of a vector array.
+ *
+ * Since arithmetic is purposefully performed against a void pointers, disable
+ * warnings regarding this for the current function. */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpointer-arith"
+void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size)
+{
+	unsigned long i;
+	if((rv->len * size) % XMM_BYTES)
+	{
+		for(i = 1; i <= XMM_BYTES - ((rv->len * size) % XMM_BYTES); i+=size)
+		{
+			*(unsigned char *)(rv->data + (rv->len * size + i)) = 1;
+		}
+	}
+}
+#pragma GCC diagnostic pop

data/ext/simd/simd_common.h CHANGED

@@ -8,5 +8,7 @@ void deallocate(vector_t *vector);
 VALUE method_length(VALUE self);
-void *internal_allocate_vector_array(unsigned long count, size_t size);
+void *internal_allocate_vector_array(unsigned long count);
 int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo);
+VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func);
+void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size);

data/ext/simd/simd_floatarray.c CHANGED

@@ -22,7 +22,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
 {
 	vector_t *vector;
 	d2v_t *data;
-	unsigned long n,m,i;
+	unsigned long n,i;
 	Check_Type(rb_array, T_ARRAY);
 	Data_Get_Struct(self, vector_t, vector);
@@ -34,7 +34,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
 		rb_raise(rb_eArgError, "Vectors must be at least 4 long");
 	}
-	vector->data = internal_allocate_vector_array(vector->len, sizeof(d2v_t));
+	vector->data = internal_allocate_vector_array(vector->len);
 	data = (d2v_t *)vector->data;
 	for(i = 0; i < vector->len; i++)
@@ -42,12 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
 		data[i/2].f[i%2] = NUM2DBL(rb_ary_entry(rb_array, i));
 	}
-	/* If the array is an odd number of elements, set the final element to 1 */
-	m = n + (n % 2);
-	for(i = n % 4; i > 0; i--)
-	{
-		data[m/2].f[1] = 1.0;
-	}
+	internal_sanitize_unaligned_final_vector(vector, sizeof(double));
 	return(self);
 }
@@ -56,28 +51,28 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_multiply(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_multiply));
+	return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_multiply));
 }
 /* Public: Divide values contained in the data array by those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_divide(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_divide));
+	return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_divide));
 }
 /* Public: add values contained in the data array with those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_add(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_add));
+	return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_add));
 }
 /* Public: Subtract values contained in another FloatArray object from those
  * contained in the current data array object, returning a new FloatArray. */
 static VALUE method_subtract(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_subtract));
+	return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_subtract));
 }
 /* Public: Return a Ruby Array containing the doubles within the data array. */
@@ -98,61 +93,6 @@ static VALUE method_to_a(VALUE self)
 	return(rb_array);
 }
-/* Internal: Given another FloatArray object, perform an action specified via a
- * function pointer against both. */
-static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
-{
-	unsigned long size, i;
-	int align;
-	vector_t *v1, *v2, *rv;
-	d2v_t *d1, *d2, *r;
-	VALUE result_obj = allocate(SIMD_FloatArray);
-	Data_Get_Struct(self, vector_t, v1);
-	Data_Get_Struct(obj, vector_t, v2);
-	Data_Get_Struct(result_obj, vector_t, rv);
-	rv->data = internal_allocate_vector_array(v1->len, sizeof(d2v_t));
-	align = internal_align_vectors(v1->len, v2->len, 2);
-	/* Ensure that size will be the result of ceil(len / 4.0) */
-	size = (v1->len + 1) / 2;
-	d1 = (d2v_t *)v1->data;
-	d2 = (d2v_t *)v2->data;
-	r  = (d2v_t *)rv->data;
-	rv->len = v1->len;
-	switch(align)
-	{
-		case 0: /* Same size arrays */
-			for(i = 0; i < size; i++)
-			{
-				func(&d1[i].v, &d2[i].v, &r[i].v);
-			}
-			break;
-		case 1: /* Operand is exactly 2 long (size of 1 sse register) */
-			for(i = 0; i < size; i++)
-			{
-				func(&d1[i].v, &d2[0].v, &r[i].v);
-			}
-			break;
-		default: /* Self is a multiple of operand's length long */
-			for(i = 0; i < size; i++)
-			{
-				func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
-			}
-	}
-	if(rv->len != rv->len + (rv->len % 2))
-	{
-		r[size].f[1] = 1;
-	}
-	return(result_obj);
-}
 /* Function: Multiply two vectors. */
 static void func_multiply(void *v1, void *v2, void *r)
 {

data/ext/simd/simd_floatarray.h CHANGED

@@ -8,8 +8,6 @@ static VALUE method_add(VALUE self, VALUE obj);
 static VALUE method_subtract(VALUE self, VALUE obj);
 static VALUE method_to_a(VALUE self);
-static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
 static void func_multiply(void *v1, void *v2, void *r);
 static void func_divide(void *v1, void *v2, void *r);
 static void func_add(void *v1, void *v2, void *r);

data/ext/simd/simd_intarray.c CHANGED

@@ -25,7 +25,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
 {
 	vector_t *vector;
 	i4v_t *data;
-	unsigned long n,m,i;
+	unsigned long n,i;
 	Check_Type(rb_array, T_ARRAY);
 	Data_Get_Struct(self, vector_t, vector);
@@ -37,7 +37,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
 		rb_raise(rb_eArgError, "Vectors must be at least 4 long");
 	}
-	vector->data = internal_allocate_vector_array(vector->len, sizeof(i4v_t));
+	vector->data = internal_allocate_vector_array(vector->len);
 	data = (i4v_t *)vector->data;
 	for(i = 0; i < vector->len; i++)
@@ -45,12 +45,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
 		data[i/4].f[i%4] = NUM2INT(rb_ary_entry(rb_array, i));
 	}
-	/* If the array is an odd number of elements, set the final element to 1 */
-	m = n + (n % 4);
-	for(i = n % 4; i > 0; i--)
-	{
-		data[m/4].f[i] = 1.0;
-	}
+	internal_sanitize_unaligned_final_vector(vector, sizeof(int));
 	return(self);
 }
@@ -59,49 +54,49 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_multiply(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_multiply));
+	return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_multiply));
 }
 /* Public: Divide values contained in the data array by those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_divide(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_divide));
+	return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_divide));
 }
 /* Public: add values contained in the data array with those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_add(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_add));
+	return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_add));
 }
 /* Public: and values contained in the data array with those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_and(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_and));
+	return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_and));
 }
 /* Public: or values contained in the data array with those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_or(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_or));
+	return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_or));
 }
 /* Public: xor values contained in the data array with those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_xor(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_xor));
+	return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_xor));
 }
 /* Public: Subtract values contained in another FloatArray object from those
  * contained in the current data array object, returning a new FloatArray. */
 static VALUE method_subtract(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_subtract));
+	return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_subtract));
 }
 /* Public: Return a Ruby Array containing the doubles within the data array. */
@@ -122,64 +117,6 @@ static VALUE method_to_a(VALUE self)
 	return(rb_array);
 }
-/* Internal: Given another FloatArray object, perform an action specified via a
- * function pointer against both. */
-static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
-{
-	unsigned long size, i;
-	int align;
-	vector_t *v1, *v2, *rv;
-	i4v_t *d1, *d2, *r;
-	VALUE result_obj = allocate(SIMD_IntArray);
-	Data_Get_Struct(self, vector_t, v1);
-	Data_Get_Struct(obj, vector_t, v2);
-	Data_Get_Struct(result_obj, vector_t, rv);
-	rv->data = internal_allocate_vector_array(v1->len, sizeof(i4v_t));
-	align = internal_align_vectors(v1->len, v2->len, 4);
-	/* Ensure that size will be the result of ceil(len / 4.0) */
-	size = (v1->len + 3) / 4;
-	d1 = (i4v_t *)v1->data;
-	d2 = (i4v_t *)v2->data;
-	r  = (i4v_t *)rv->data;
-	rv->len = v1->len;
-	switch(align)
-	{
-		case 0: /* Same size arrays */
-			for(i = 0; i < size; i++)
-			{
-				func(&d1[i].v, &d2[i].v, &r[i].v);
-			}
-			break;
-		case 1: /* Operand is exactly 4 long (size of 1 sse register) */
-			for(i = 0; i < size; i++)
-			{
-				func(&d1[i].v, &d2[0].v, &r[i].v);
-			}
-			break;
-		default: /* Self is a multiple of operand's length long */
-			for(i = 0; i < size; i++)
-			{
-				func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
-			}
-	}
-	if(rv->len != rv->len + (rv->len % 4))
-	{
-		for(i = 3; i > rv->len + (rv->len % 4); i--)
-		{
-			r[size].f[i] = 1;
-		}
-	}
-	return(result_obj);
-}
 /* Function: Multiply two vectors. */
 static void func_multiply(void *v1, void *v2, void *r)
 {

data/ext/simd/simd_intarray.h CHANGED

@@ -11,8 +11,6 @@ static VALUE method_xor(VALUE self, VALUE obj);
 static VALUE method_subtract(VALUE self, VALUE obj);
 static VALUE method_to_a(VALUE self);
-static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
 static void func_multiply(void *v1, void *v2, void *r);
 static void func_divide(void *v1, void *v2, void *r);
 static void func_add(void *v1, void *v2, void *r);

data/ext/simd/simd_longarray.c CHANGED

@@ -25,7 +25,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
 {
 	vector_t *vector;
 	l2v_t *data;
-	unsigned long n,m,i;
+	unsigned long n,i;
 	Check_Type(rb_array, T_ARRAY);
 	Data_Get_Struct(self, vector_t, vector);
@@ -37,7 +37,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
 		rb_raise(rb_eArgError, "Vectors must be at least 2 long");
 	}
-	vector->data = internal_allocate_vector_array(vector->len, sizeof(l2v_t));
+	vector->data = internal_allocate_vector_array(vector->len);
 	data = (l2v_t *)vector->data;
 	for(i = 0; i < vector->len; i++)
@@ -45,12 +45,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
 		data[i/2].f[i%2] = NUM2LONG(rb_ary_entry(rb_array, i));
 	}
-	/* If the array is an odd number of elements, set the final element to 1 */
-	m = n + (n % 2);
-	for(i = n % 2; i > 0; i--)
-	{
-		data[m/2].f[i] = 1;
-	}
+	internal_sanitize_unaligned_final_vector(vector, sizeof(long));
 	return(self);
 }
@@ -59,49 +54,49 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_multiply(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_multiply));
+	return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_multiply));
 }
 /* Public: Divide values contained in the data array by those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_divide(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_divide));
+	return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_divide));
 }
 /* Public: add values contained in the data array with those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_add(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_add));
+	return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_add));
 }
 /* Public: and values contained in the data array with those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_and(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_and));
+	return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_and));
 }
 /* Public: or values contained in the data array with those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_or(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_or));
+	return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_or));
 }
 /* Public: xor values contained in the data array with those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_xor(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_xor));
+	return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_xor));
 }
 /* Public: Subtract values contained in another FloatArray object from those
  * contained in the current data array object, returning a new FloatArray. */
 static VALUE method_subtract(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_subtract));
+	return(internal_apply_operation(self, obj, sizeof(long), SIMD_LongArray, func_subtract));
 }
 /* Public: Return a Ruby Array containing the doubles within the data array. */
@@ -122,61 +117,6 @@ static VALUE method_to_a(VALUE self)
 	return(rb_array);
 }
-/* Internal: Given another FloatArray object, perform an action specified via a
- * function pointer against both. */
-static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
-{
-	unsigned long size, i;
-	int align;
-	vector_t *v1, *v2, *rv;
-	l2v_t *d1, *d2, *r;
-	VALUE result_obj = allocate(SIMD_LongArray);
-	Data_Get_Struct(self, vector_t, v1);
-	Data_Get_Struct(obj, vector_t, v2);
-	Data_Get_Struct(result_obj, vector_t, rv);
-	rv->data = internal_allocate_vector_array(v1->len, sizeof(l2v_t));
-	align = internal_align_vectors(v1->len, v2->len, 2);
-	/* Ensure that size will be the result of ceil(len / 4.0) */
-	size = (v1->len + 1) / 2;
-	d1 = (l2v_t *)v1->data;
-	d2 = (l2v_t *)v2->data;
-	r  = (l2v_t *)rv->data;
-	rv->len = v1->len;
-	switch(align)
-	{
-		case 0: /* Same size arrays */
-			for(i = 0; i < size; i++)
-			{
-				func(&d1[i].v, &d2[i].v, &r[i].v);
-			}
-			break;
-		case 1: /* Operand is exactly 4 long (size of 1 sse register) */
-			for(i = 0; i < size; i++)
-			{
-				func(&d1[i].v, &d2[0].v, &r[i].v);
-			}
-			break;
-		default: /* Self is a multiple of operand's length long */
-			for(i = 0; i < size; i++)
-			{
-				func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
-			}
-	}
-	if(rv->len != rv->len + (rv->len % 2))
-	{
-		r[size].f[1] = 1;
-	}
-	return(result_obj);
-}
 /* Function: Multiply two vectors. */
 static void func_multiply(void *v1, void *v2, void *r)
 {

data/ext/simd/simd_longarray.h CHANGED

@@ -11,8 +11,6 @@ static VALUE method_xor(VALUE self, VALUE obj);
 static VALUE method_subtract(VALUE self, VALUE obj);
 static VALUE method_to_a(VALUE self);
-static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
 static void func_multiply(void *v1, void *v2, void *r);
 static void func_divide(void *v1, void *v2, void *r);
 static void func_add(void *v1, void *v2, void *r);

data/ext/simd/simd_smallfloatarray.c CHANGED

@@ -22,7 +22,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
 {
 	vector_t *vector;
 	f4v_t *data;
-	unsigned long n,m,i;
+	unsigned long n,i;
 	Check_Type(rb_array, T_ARRAY);
 	Data_Get_Struct(self, vector_t, vector);
@@ -34,7 +34,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
 		rb_raise(rb_eArgError, "Vectors must be at least 4 long");
 	}
-	vector->data = internal_allocate_vector_array(vector->len, sizeof(f4v_t));
+	vector->data = internal_allocate_vector_array(vector->len);
 	data = (f4v_t *)vector->data;
 	for(i = 0; i < vector->len; i++)
@@ -42,12 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
 		data[i/4].f[i%4] = (float)NUM2DBL(rb_ary_entry(rb_array, i));
 	}
-	/* If the array is an odd number of elements, set the final element to 1 */
-	m = n + (n % 4);
-	for(i = n % 4; i > 0; i--)
-	{
-		data[m/4].f[i] = 1.0;
-	}
+	internal_sanitize_unaligned_final_vector(vector, sizeof(float));
 	return(self);
 }
@@ -56,28 +51,28 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_multiply(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_multiply));
+	return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_multiply));
 }
 /* Public: Divide values contained in the data array by those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_divide(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_divide));
+	return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_divide));
 }
 /* Public: add values contained in the data array with those contained in
  * another FloatArray object, returning a new FloatArray. */
 static VALUE method_add(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_add));
+	return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_add));
 }
 /* Public: Subtract values contained in another FloatArray object from those
  * contained in the current data array object, returning a new FloatArray. */
 static VALUE method_subtract(VALUE self, VALUE obj)
 {
-	return(internal_apply_operation(self, obj, func_subtract));
+	return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_subtract));
 }
 /* Public: Return a Ruby Array containing the doubles within the data array. */
@@ -98,64 +93,6 @@ static VALUE method_to_a(VALUE self)
 	return(rb_array);
 }
-/* Internal: Given another FloatArray object, perform an action specified via a
- * function pointer against both. */
-static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
-{
-	unsigned long size, i;
-	int align;
-	vector_t *v1, *v2, *rv;
-	f4v_t *d1, *d2, *r;
-	VALUE result_obj = allocate(SIMD_SmallFloatArray);
-	Data_Get_Struct(self, vector_t, v1);
-	Data_Get_Struct(obj, vector_t, v2);
-	Data_Get_Struct(result_obj, vector_t, rv);
-	rv->data = internal_allocate_vector_array(v1->len, sizeof(f4v_t));
-	align = internal_align_vectors(v1->len, v2->len, 4);
-	/* Ensure that size will be the result of ceil(len / 4.0) */
-	size = (v1->len + 3) / 4;
-	d1 = (f4v_t *)v1->data;
-	d2 = (f4v_t *)v2->data;
-	r  = (f4v_t *)rv->data;
-	rv->len = v1->len;
-	switch(align)
-	{
-		case 0: /* Same size arrays */
-			for(i = 0; i < size; i++)
-			{
-				func(&d1[i].v, &d2[i].v, &r[i].v);
-			}
-			break;
-		case 1: /* Operand is exactly 4 long (size of 1 sse register) */
-			for(i = 0; i < size; i++)
-			{
-				func(&d1[i].v, &d2[0].v, &r[i].v);
-			}
-			break;
-		default: /* Self is a multiple of operand's length long */
-			for(i = 0; i < size; i++)
-			{
-				func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
-			}
-	}
-	if(rv->len != rv->len + (rv->len % 4))
-	{
-		for(i = 3; i > rv->len + (rv->len % 4); i--)
-		{
-			r[size].f[i] = 1;
-		}
-	}
-	return(result_obj);
-}
 /* Function: Multiply two vectors. */
 static void func_multiply(void *v1, void *v2, void *r)
 {

data/ext/simd/simd_smallfloatarray.h CHANGED

@@ -8,8 +8,6 @@ static VALUE method_add(VALUE self, VALUE obj);
 static VALUE method_subtract(VALUE self, VALUE obj);
 static VALUE method_to_a(VALUE self);
-static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
 static void func_multiply(void *v1, void *v2, void *r);
 static void func_divide(void *v1, void *v2, void *r);
 static void func_add(void *v1, void *v2, void *r);

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: simd
 version: !ruby/object:Gem::Version
-  version: 0.4.0
+  version: 0.5.0
 platform: ruby
 authors:
 - Tina Wuest
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-12-12 00:00:00.000000000 Z
+date: 2014-12-16 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake-compiler
@@ -64,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.4
+rubygems_version: 2.2.2
 signing_key:
 specification_version: 4
 summary: SIMD instructions in ruby