RubyGems - simd - Versions diffs - 0.1.0 → 0.2.0 - Mend

simd 0.1.0 → 0.2.0

Files changed (11) hide show

checksums.yaml +4 -4
data/ext/simd/simd.c +1 -0
data/ext/simd/simd.h +1 -0
data/ext/simd/simd_common.c +76 -0
data/ext/simd/simd_common.h +12 -0
data/ext/simd/simd_floatarray.c +34 -96
data/ext/simd/simd_floatarray.h +5 -2
data/ext/simd/simd_smallfloatarray.c +173 -0
data/ext/simd/simd_smallfloatarray.h +25 -0
data/ext/simd/simd_types.h +20 -3
metadata +6 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 8f0bff704f3580fee55205c32ada6a1844dde4ae
-  data.tar.gz: 9e4f9d097fa20a349b77279535d39ea695a881a4
+  metadata.gz: 3acc0dcd98951246c84b35036f47dedd69d798cb
+  data.tar.gz: 323dbca8597e5993d0bd48ebb8805febdf064c47
 SHA512:
-  metadata.gz: ccaa110b5ff7708c5e14dd5355f4361544832b412c2d150e9849f8106c55e1183252cbe540163941b65968037369b8618b5e66c9267e75807553aa2e7e8ccda5
-  data.tar.gz: e107fe3e242f6baa6d93ab6edf2886f58499d70fd690f6f1c72d7bff165932ce953026d480b0ac3a41c5e35d1ec4dc8b96c19d38d19857f0fdc8c7e759c3fa44
+  metadata.gz: e05f61501a47ad89bf55a6a25fcda70add8aa004701336e0543c4421ca0b4a9628aab30281e0aa9c30bb79a994fad2152c7da853dc6989adea400a79c9ada05c
+  data.tar.gz: 45f06b3ead4a140d992f34c2135724f649143e58035945e9e8f8de9e0595b8fda54538cab233a494ddcb0eb1e6e1a6d0f5948084b6e29e91d461c34d901eb8ef

data/ext/simd/simd.c CHANGED

@@ -5,4 +5,5 @@ void Init_simd()
 {
 	SIMD = rb_define_module("SIMD");
 	Init_SIMD_FloatArray(SIMD);
+	Init_SIMD_SmallFloatArray(SIMD);
 }

data/ext/simd/simd.h CHANGED

@@ -2,3 +2,4 @@
 void Init_simd();
 void Init_SIMD_FloatArray(VALUE parent);
+void Init_SIMD_SmallFloatArray(VALUE parent);

data/ext/simd/simd_common.c ADDED

@@ -0,0 +1,76 @@
+#include "simd_common.h"
+/* Internal: Allocate memory for the vector container. */
+VALUE allocate(VALUE klass)
+{
+	vector_t *vector = malloc(sizeof(vector_t));
+	if(vector == NULL)
+	{
+		rb_raise(rb_eNoMemError, "Unable to allocate memory");
+	}
+	vector->data = NULL; /* Avoid potentially freeing unitialized memory. */
+	return(Data_Wrap_Struct(klass, NULL, deallocate, vector));
+}
+/* Internal: Free memory from the vector container and the data array. */
+void deallocate(vector_t *vector)
+{
+	if(vector)
+	{
+		if(vector->data)
+		{
+			free(vector->data);
+		}
+		free(vector);
+	}
+}
+/* Public: Return the number of elements in the Array. */
+VALUE method_length(VALUE self)
+{
+	vector_t *vector;
+	Data_Get_Struct(self, vector_t, vector);
+	return(INT2NUM(vector->len));
+}
+/* Internal: Allocate memory for the data array. */
+void *internal_allocate_vector_array(unsigned long count, size_t size)
+{
+	unsigned int modulo = 16 / size;
+	void *vector = malloc(((count + (count % modulo)) / modulo + 1) * size);
+	if(vector == NULL)
+	{
+		rb_raise(rb_eNoMemError, "Unable to allocate memory");
+	}
+	return(vector);
+}
+/* Internal: Determine if two arrays can be acted upon, by being of equal
+ * lengths or with the operand's length being a multiple of the data array's. */
+int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo)
+{
+	if((v1 % modulo) != (v2 % modulo))
+	{
+		rb_raise(rb_eArgError, "Both Vectors must be of even or odd length.");
+	}
+	if(v1 == v2)
+	{
+		return(0);
+	}
+	if(v2 == modulo)
+	{
+		return(1);
+	}
+	if(v1 % v2 == 0 && v2 % modulo == 0)
+	{
+		return(2);
+	}
+	rb_raise(rb_eArgError, "Vector lengths misaligned.");
+	/* Never reached */
+	return(-1);
+}

data/ext/simd/simd_common.h ADDED

@@ -0,0 +1,12 @@
+#pragma once
+#include "ruby.h"
+#include "simd_types.h"
+VALUE allocate(VALUE klass);
+void deallocate(vector_t *vector);
+VALUE method_length(VALUE self);
+void *internal_allocate_vector_array(unsigned long count, size_t size);
+int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo);

data/ext/simd/simd_floatarray.c CHANGED

@@ -16,59 +16,37 @@ void Init_SIMD_FloatArray(VALUE parent)
 	rb_define_method(SIMD_FloatArray, "to_a", method_to_a, 0);
 }
-/* Internal: Allocate memory for the vector container. */
-static VALUE allocate(VALUE klass)
-{
-	d2v_container *vector = malloc(sizeof(d2v_container));
-	if(vector == NULL)
-	{
-		rb_raise(rb_eNoMemError, "Unable to allocate memory");
-	}
-	vector->data = NULL; /* Avoid potentially freeing unitialized memory. */
-	return(Data_Wrap_Struct(klass, NULL, deallocate, vector));
-}
-/* Internal: Free memory from the vector container and the data array. */
-static void deallocate(d2v_container *vector)
-{
-	if(vector)
-	{
-		if(vector->data)
-		{
-			free(vector->data);
-		}
-		free(vector);
-	}
-}
 /* Public: Initialize the FloatArray object given a Ruby Array of values
  * which can be cast to a double. */
 static VALUE method_initialize(VALUE self, VALUE rb_array)
 {
-	d2v_container *vector;
+	vector_t *vector;
+	d2v_t *data;
 	unsigned long n,m,i;
 	Check_Type(rb_array, T_ARRAY);
-	Data_Get_Struct(self, d2v_container, vector);
+	Data_Get_Struct(self, vector_t, vector);
 	vector->len = n = RARRAY_LEN(rb_array);
 	if(vector->len < 2)
 	{
-		rb_raise(rb_eArgError, "Vectors must be at least 2 long");
+		rb_raise(rb_eArgError, "Vectors must be at least 4 long");
 	}
-	vector->data = internal_allocate_vector_array(vector->len);
+	vector->data = internal_allocate_vector_array(vector->len, sizeof(d2v_t));
-	for(i = 0; i < n; i++)
-		vector->data[i/2].f[i%2] = NUM2DBL(rb_ary_entry(rb_array, i));
+	data = (d2v_t *)vector->data;
+	for(i = 0; i < vector->len; i++)
+	{
+		data[i/2].f[i%2] = NUM2DBL(rb_ary_entry(rb_array, i));
+	}
 	/* If the array is an odd number of elements, set the final element to 1 */
 	m = n + (n % 2);
-	if(n < m)
+	for(i = n % 4; i > 0; i--)
 	{
-		vector->data[m/2].f[1] = 1.0;
+		data[m/2].f[1] = 1.0;
 	}
 	return(self);
@@ -102,26 +80,19 @@ static VALUE method_subtract(VALUE self, VALUE obj)
 	return(internal_apply_operation(self, obj, func_subtract));
 }
-/* Public: Return the number of elements in the Array. */
-static VALUE method_length(VALUE self)
-{
-	d2v_container *vector;
-	Data_Get_Struct(self, d2v_container, vector);
-	return(INT2NUM(vector->len));
-}
 /* Public: Return a Ruby Array containing the doubles within the data array. */
 static VALUE method_to_a(VALUE self)
 {
 	unsigned long i;
-	d2v_container *vector;
+	vector_t *vector;
+	d2v_t *data;
 	VALUE rb_array = rb_ary_new();
-	Data_Get_Struct(self, d2v_container, vector);
+	Data_Get_Struct(self, vector_t, vector);
+	data = (d2v_t *)vector->data;
 	for(i = 0; i < vector->len; i++)
 	{
-		rb_ary_store(rb_array, i, DBL2NUM(vector->data[i/2].f[i%2]));
+		rb_ary_store(rb_array, i, DBL2NUM(data[i/2].f[i%2]));
 	}
 	return(rb_array);
@@ -133,83 +104,50 @@ static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
 {
 	unsigned long size, i;
 	int align;
-	d2v_container *v1, *v2, *r;
+	vector_t *v1, *v2, *rv;
+	d2v_t *d1, *d2, *r;
 	VALUE result_obj = allocate(SIMD_FloatArray);
-	Data_Get_Struct(self, d2v_container, v1);
-	Data_Get_Struct(obj, d2v_container, v2);
-	Data_Get_Struct(result_obj, d2v_container, r);
+	Data_Get_Struct(self, vector_t, v1);
+	Data_Get_Struct(obj, vector_t, v2);
+	Data_Get_Struct(result_obj, vector_t, rv);
+	rv->data = internal_allocate_vector_array(v1->len, sizeof(d2v_t));
-	align = internal_align_vectors(v1->len, v2->len);
+	align = internal_align_vectors(v1->len, v2->len, 2);
-	/* Ensure that size will be the result of ceil(len / 2.0) */
+	/* Ensure that size will be the result of ceil(len / 4.0) */
 	size = (v1->len + 1) / 2;
-	r->data = internal_allocate_vector_array(v1->len);
-	r->len = v1->len;
+	d1 = (d2v_t *)v1->data;
+	d2 = (d2v_t *)v2->data;
+	r  = (d2v_t *)rv->data;
+	rv->len = v1->len;
 	switch(align)
 	{
 		case 0: /* Same size arrays */
 			for(i = 0; i < size; i++)
 			{
-				r->data[i].v = func(v1->data[i].v, v2->data[i].v);
+				r[i].v = func(d1[i].v, d2[i].v);
 			}
 			break;
 		case 1: /* Operand is exactly 2 long (size of 1 sse register) */
 			for(i = 0; i < size; i++)
 			{
-				r->data[i].v = func(v1->data[i].v, v2->data[0].v);
+				r[i].v = func(d1[i].v, d2[0].v);
 			}
 			break;
 		default: /* Self is a multiple of operand's length long */
 			for(i = 0; i < size; i++)
 			{
-				r->data[i].v = func(v1->data[i].v, v2->data[i % v2->len].v);
+				r[i].v = func(d1[i].v, d2[i % v2->len].v);
 			}
 	}
 	return(result_obj);
 }
-/* Internal: Allocate memory for the data array. */
-static d2v_t *internal_allocate_vector_array(unsigned long size)
-{
-	d2v_t *vector = malloc(((size + (size % 2)) / 2 + 1) * sizeof(d2v_t));
-	if(vector == NULL)
-	{
-		rb_raise(rb_eNoMemError, "Unable to allocate memory");
-	}
-	return(vector);
-}
-/* Internal: Determine if two arrays can be acted upon, by being of equal
- * lengths or with the operand's length being a multiple of the data array's. */
-static int internal_align_vectors(unsigned long v1, unsigned long v2)
-{
-	if((v1 % 2) != (v2 % 2))
-	{
-		rb_raise(rb_eArgError, "Both Vectors must be of even or odd length.");
-	}
-	if(v1 == v2)
-	{
-		return(0);
-	}
-	if(v2 == 2)
-	{
-		return(1);
-	}
-	if(v1 % v2 == 0)
-	{
-		return(2);
-	}
-	rb_raise(rb_eArgError, "Vector length must be evenly divisible by operand.");
-	/* Never reached */
-	return(-1);
-}
 /* Function: Multiply two vectors. */
 static d2v func_multiply(d2v v1, d2v v2)
 {

data/ext/simd/simd_floatarray.h CHANGED

@@ -1,19 +1,22 @@
 #include "ruby.h"
-#include "simd_types.h"
+#include "simd_common.h"
+/*
 static VALUE allocate(VALUE klass);
 static void deallocate(d2v_container *floatarray);
+*/
 static VALUE method_initialize(VALUE self, VALUE rb_array);
 static VALUE method_multiply(VALUE self, VALUE obj);
 static VALUE method_divide(VALUE self, VALUE obj);
 static VALUE method_add(VALUE self, VALUE obj);
 static VALUE method_subtract(VALUE self, VALUE obj);
-static VALUE method_length(VALUE self);
 static VALUE method_to_a(VALUE self);
+/*
 static d2v_t *internal_allocate_vector_array(unsigned long size);
 static int internal_align_vectors(unsigned long v1, unsigned long v2);
+*/
 static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
 static d2v func_multiply(d2v v1, d2v v2);

data/ext/simd/simd_smallfloatarray.c ADDED

@@ -0,0 +1,173 @@
+#include "simd_smallfloatarray.h"
+VALUE SIMD_SmallFloatArray = Qnil;
+/* Internal: Create the SIMD::FloatArray class. */
+void Init_SIMD_SmallFloatArray(VALUE parent)
+{
+	SIMD_SmallFloatArray = rb_define_class_under(parent, "SmallFloatArray", rb_cObject);
+	rb_define_alloc_func(SIMD_SmallFloatArray, allocate);
+	rb_define_method(SIMD_SmallFloatArray, "initialize", method_initialize, 1);
+	rb_define_method(SIMD_SmallFloatArray, "*", method_multiply, 1);
+	rb_define_method(SIMD_SmallFloatArray, "/", method_divide, 1);
+	rb_define_method(SIMD_SmallFloatArray, "+", method_add, 1);
+	rb_define_method(SIMD_SmallFloatArray, "-", method_subtract, 1);
+	rb_define_method(SIMD_SmallFloatArray, "length", method_length, 0);
+	rb_define_method(SIMD_SmallFloatArray, "to_a", method_to_a, 0);
+}
+/* Public: Initialize the FloatArray object given a Ruby Array of values
+ * which can be cast to a double. */
+static VALUE method_initialize(VALUE self, VALUE rb_array)
+{
+	vector_t *vector;
+	f4v_t *data;
+	unsigned long n,m,i;
+	Check_Type(rb_array, T_ARRAY);
+	Data_Get_Struct(self, vector_t, vector);
+	vector->len = n = RARRAY_LEN(rb_array);
+	if(vector->len < 4)
+	{
+		rb_raise(rb_eArgError, "Vectors must be at least 4 long");
+	}
+	vector->data = internal_allocate_vector_array(vector->len, sizeof(f4v_t));
+	data = (f4v_t *)vector->data;
+	for(i = 0; i < vector->len; i++)
+	{
+		data[i/4].f[i%4] = (float)NUM2DBL(rb_ary_entry(rb_array, i));
+	}
+	/* If the array is an odd number of elements, set the final element to 1 */
+	m = n + (n % 4);
+	for(i = n % 4; i > 0; i--)
+	{
+		data[m/4].f[i] = 1.0;
+	}
+	return(self);
+}
+/* Public: Multiply values contained in the data array with those contained in
+ * another FloatArray object, returning a new FloatArray. */
+static VALUE method_multiply(VALUE self, VALUE obj)
+{
+	return(internal_apply_operation(self, obj, func_multiply));
+}
+/* Public: Divide values contained in the data array by those contained in
+ * another FloatArray object, returning a new FloatArray. */
+static VALUE method_divide(VALUE self, VALUE obj)
+{
+	return(internal_apply_operation(self, obj, func_divide));
+}
+/* Public: add values contained in the data array with those contained in
+ * another FloatArray object, returning a new FloatArray. */
+static VALUE method_add(VALUE self, VALUE obj)
+{
+	return(internal_apply_operation(self, obj, func_add));
+}
+/* Public: Subtract values contained in another FloatArray object from those
+ * contained in the current data array object, returning a new FloatArray. */
+static VALUE method_subtract(VALUE self, VALUE obj)
+{
+	return(internal_apply_operation(self, obj, func_subtract));
+}
+/* Public: Return a Ruby Array containing the doubles within the data array. */
+static VALUE method_to_a(VALUE self)
+{
+	unsigned long i;
+	vector_t *vector;
+	f4v_t *data;
+	VALUE rb_array = rb_ary_new();
+	Data_Get_Struct(self, vector_t, vector);
+	data = (f4v_t *)vector->data;
+	for(i = 0; i < vector->len; i++)
+	{
+		rb_ary_store(rb_array, i, DBL2NUM(data[i/4].f[i%4]));
+	}
+	return(rb_array);
+}
+/* Internal: Given another FloatArray object, perform an action specified via a
+ * function pointer against both. */
+static VALUE internal_apply_operation(VALUE self, VALUE obj, bf_operation func)
+{
+	unsigned long size, i;
+	int align;
+	vector_t *v1, *v2, *rv;
+	f4v_t *d1, *d2, *r;
+	VALUE result_obj = allocate(SIMD_SmallFloatArray);
+	Data_Get_Struct(self, vector_t, v1);
+	Data_Get_Struct(obj, vector_t, v2);
+	Data_Get_Struct(result_obj, vector_t, rv);
+	rv->data = internal_allocate_vector_array(v1->len, sizeof(f4v_t));
+	align = internal_align_vectors(v1->len, v2->len, 4);
+	/* Ensure that size will be the result of ceil(len / 4.0) */
+	size = (v1->len + 3) / 4;
+	d1 = (f4v_t *)v1->data;
+	d2 = (f4v_t *)v2->data;
+	r  = (f4v_t *)rv->data;
+	rv->len = v1->len;
+	switch(align)
+	{
+		case 0: /* Same size arrays */
+			for(i = 0; i < size; i++)
+			{
+				r[i].v = func(d1[i].v, d2[i].v);
+			}
+			break;
+		case 1: /* Operand is exactly 4 long (size of 1 sse register) */
+			for(i = 0; i < size; i++)
+			{
+				r[i].v = func(d1[i].v, d2[0].v);
+			}
+			break;
+		default: /* Self is a multiple of operand's length long */
+			for(i = 0; i < size; i++)
+			{
+				r[i].v = func(d1[i].v, d2[i % v2->len].v);
+			}
+	}
+	return(result_obj);
+}
+/* Function: Multiply two vectors. */
+static f4v func_multiply(f4v v1, f4v v2)
+{
+	return(v1 * v2);
+}
+/* Function: Divide two vectors. */
+static f4v func_divide(f4v v1, f4v v2)
+{
+	return(v1 / v2);
+}
+/* Function: Add two vectors. */
+static f4v func_add(f4v v1, f4v v2)
+{
+	return(v1 + v2);
+}
+/* Function: Subtract two vectors. */
+static f4v func_subtract(f4v v1, f4v v2)
+{
+	return(v1 - v2);
+}

data/ext/simd/simd_smallfloatarray.h ADDED

@@ -0,0 +1,25 @@
+#include "ruby.h"
+#include "simd_common.h"
+/*
+static VALUE allocate(VALUE klass);
+static void deallocate(vector_t *vector);
+*/
+static VALUE method_initialize(VALUE self, VALUE rb_array);
+static VALUE method_multiply(VALUE self, VALUE obj);
+static VALUE method_divide(VALUE self, VALUE obj);
+static VALUE method_add(VALUE self, VALUE obj);
+static VALUE method_subtract(VALUE self, VALUE obj);
+static VALUE method_to_a(VALUE self);
+/*
+static f4v_t *internal_allocate_vector_array(unsigned long size);
+static int internal_align_vectors(unsigned long v1, unsigned long v2);
+*/
+static VALUE internal_apply_operation(VALUE self, VALUE obj, bf_operation func);
+static f4v func_multiply(f4v v1, f4v v2);
+static f4v func_divide(f4v v1, f4v v2);
+static f4v func_add(f4v v1, f4v v2);
+static f4v func_subtract(f4v v1, f4v v2);

data/ext/simd/simd_types.h CHANGED

@@ -1,3 +1,5 @@
+#pragma once
 /*
  * Types for FloatArray
  *
@@ -11,10 +13,25 @@ typedef union d2v_t
 	double f[2];
 } d2v_t;
-typedef struct d2v_container
+/*
+ * Types for SmallFloatArray
+ *
+ * Since ruby internally uses doubles for the Float type, SIMD::SmallFloatArray
+ * provides reduced-size floats for faster operation when it's known that the
+ * lack of precision and range will not be a detriment.
+ */
+typedef float __attribute__ ((vector_size (16))) f4v;
+typedef union f4v_t
+{
+	f4v v;
+	float f[4];
+} f4v_t;
+typedef struct vector_t
 {
-	d2v_t *data;
+	void *data;
 	unsigned long len;
-} d2v_container;
+} vector_t;
 typedef d2v (*b_operation)(d2v v1, d2v v2);
+typedef f4v (*bf_operation)(f4v v1, f4v v2);

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: simd
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.2.0
 platform: ruby
 authors:
 - Tina Wuest
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-12-11 00:00:00.000000000 Z
+date: 2014-12-12 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake-compiler
@@ -34,8 +34,12 @@ files:
 - ext/simd/extconf.rb
 - ext/simd/simd.c
 - ext/simd/simd.h
+- ext/simd/simd_common.c
+- ext/simd/simd_common.h
 - ext/simd/simd_floatarray.c
 - ext/simd/simd_floatarray.h
+- ext/simd/simd_smallfloatarray.c
+- ext/simd/simd_smallfloatarray.h
 - ext/simd/simd_types.h
 homepage: https://gitlab.com/wuest/simd-ruby
 licenses: []