simd 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3acc0dcd98951246c84b35036f47dedd69d798cb
4
- data.tar.gz: 323dbca8597e5993d0bd48ebb8805febdf064c47
3
+ metadata.gz: e2917ec7ede39c14aaac0a85f4e0c6cd6cf204cd
4
+ data.tar.gz: 8bf4b61b6337ba5e5d833797dcffec0e2de32480
5
5
  SHA512:
6
- metadata.gz: e05f61501a47ad89bf55a6a25fcda70add8aa004701336e0543c4421ca0b4a9628aab30281e0aa9c30bb79a994fad2152c7da853dc6989adea400a79c9ada05c
7
- data.tar.gz: 45f06b3ead4a140d992f34c2135724f649143e58035945e9e8f8de9e0595b8fda54538cab233a494ddcb0eb1e6e1a6d0f5948084b6e29e91d461c34d901eb8ef
6
+ metadata.gz: e7ffa5a5ca85d1affc0326871517ba1dfde05d46544c0c8d803edf307d10ffe87dfdd8f78bbfd74b672f488d266037718dfd0c7d59b96967cb69ab3d8943b64e
7
+ data.tar.gz: 69a5db3a4baf9fa3b68ff13a7d418337b20a237b150253d2cfd09421b8f63f2a11a50f51cd86d5412ab5a71bc46841143132cbe6af74f31d618408b9b1040d03
data/ext/simd/simd.c CHANGED
@@ -6,4 +6,5 @@ void Init_simd()
6
6
  SIMD = rb_define_module("SIMD");
7
7
  Init_SIMD_FloatArray(SIMD);
8
8
  Init_SIMD_SmallFloatArray(SIMD);
9
+ Init_SIMD_IntArray(SIMD);
9
10
  }
data/ext/simd/simd.h CHANGED
@@ -3,3 +3,4 @@
3
3
  void Init_simd();
4
4
  void Init_SIMD_FloatArray(VALUE parent);
5
5
  void Init_SIMD_SmallFloatArray(VALUE parent);
6
+ void Init_SIMD_IntArray(VALUE parent);
@@ -129,45 +129,50 @@ static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
129
129
  case 0: /* Same size arrays */
130
130
  for(i = 0; i < size; i++)
131
131
  {
132
- r[i].v = func(d1[i].v, d2[i].v);
132
+ func(&d1[i].v, &d2[i].v, &r[i].v);
133
133
  }
134
134
  break;
135
135
  case 1: /* Operand is exactly 2 long (size of 1 sse register) */
136
136
  for(i = 0; i < size; i++)
137
137
  {
138
- r[i].v = func(d1[i].v, d2[0].v);
138
+ func(&d1[i].v, &d2[0].v, &r[i].v);
139
139
  }
140
140
  break;
141
141
  default: /* Self is a multiple of operand's length long */
142
142
  for(i = 0; i < size; i++)
143
143
  {
144
- r[i].v = func(d1[i].v, d2[i % v2->len].v);
144
+ func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
145
145
  }
146
146
  }
147
147
 
148
+ if(rv->len != rv->len + (rv->len % 2))
149
+ {
150
+ r[size].f[1] = 1;
151
+ }
152
+
148
153
  return(result_obj);
149
154
  }
150
155
 
151
156
  /* Function: Multiply two vectors. */
152
- static d2v func_multiply(d2v v1, d2v v2)
157
+ static void func_multiply(void *v1, void *v2, void *r)
153
158
  {
154
- return(v1 * v2);
159
+ *(d2v *)r = *(d2v *)v1 * *(d2v *)v2;
155
160
  }
156
161
 
157
162
  /* Function: Divide two vectors. */
158
- static d2v func_divide(d2v v1, d2v v2)
163
+ static void func_divide(void *v1, void *v2, void *r)
159
164
  {
160
- return(v1 / v2);
165
+ *(d2v *)r = *(d2v *)v1 / *(d2v *)v2;
161
166
  }
162
167
 
163
168
  /* Function: Add two vectors. */
164
- static d2v func_add(d2v v1, d2v v2)
169
+ static void func_add(void *v1, void *v2, void *r)
165
170
  {
166
- return(v1 + v2);
171
+ *(d2v *)r = *(d2v *)v1 + *(d2v *)v2;
167
172
  }
168
173
 
169
174
  /* Function: Subtract two vectors. */
170
- static d2v func_subtract(d2v v1, d2v v2)
175
+ static void func_subtract(void *v1, void *v2, void *r)
171
176
  {
172
- return(v1 - v2);
177
+ *(d2v *)r = *(d2v *)v1 - *(d2v *)v2;
173
178
  }
@@ -1,11 +1,6 @@
1
1
  #include "ruby.h"
2
2
  #include "simd_common.h"
3
3
 
4
- /*
5
- static VALUE allocate(VALUE klass);
6
- static void deallocate(d2v_container *floatarray);
7
- */
8
-
9
4
  static VALUE method_initialize(VALUE self, VALUE rb_array);
10
5
  static VALUE method_multiply(VALUE self, VALUE obj);
11
6
  static VALUE method_divide(VALUE self, VALUE obj);
@@ -13,13 +8,9 @@ static VALUE method_add(VALUE self, VALUE obj);
13
8
  static VALUE method_subtract(VALUE self, VALUE obj);
14
9
  static VALUE method_to_a(VALUE self);
15
10
 
16
- /*
17
- static d2v_t *internal_allocate_vector_array(unsigned long size);
18
- static int internal_align_vectors(unsigned long v1, unsigned long v2);
19
- */
20
11
  static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
21
12
 
22
- static d2v func_multiply(d2v v1, d2v v2);
23
- static d2v func_divide(d2v v1, d2v v2);
24
- static d2v func_add(d2v v1, d2v v2);
25
- static d2v func_subtract(d2v v1, d2v v2);
13
+ static void func_multiply(void *v1, void *v2, void *r);
14
+ static void func_divide(void *v1, void *v2, void *r);
15
+ static void func_add(void *v1, void *v2, void *r);
16
+ static void func_subtract(void *v1, void *v2, void *r);
@@ -0,0 +1,181 @@
1
+ #include "simd_intarray.h"
2
+
3
+ VALUE SIMD_IntArray = Qnil;
4
+
5
+ /* Internal: Create the SIMD::FloatArray class. */
6
+ void Init_SIMD_IntArray(VALUE parent)
7
+ {
8
+ SIMD_IntArray = rb_define_class_under(parent, "IntArray", rb_cObject);
9
+ rb_define_alloc_func(SIMD_IntArray, allocate);
10
+ rb_define_method(SIMD_IntArray, "initialize", method_initialize, 1);
11
+ rb_define_method(SIMD_IntArray, "*", method_multiply, 1);
12
+ rb_define_method(SIMD_IntArray, "/", method_divide, 1);
13
+ rb_define_method(SIMD_IntArray, "+", method_add, 1);
14
+ rb_define_method(SIMD_IntArray, "-", method_subtract, 1);
15
+ rb_define_method(SIMD_IntArray, "length", method_length, 0);
16
+ rb_define_method(SIMD_IntArray, "to_a", method_to_a, 0);
17
+ }
18
+
19
+ /* Public: Initialize the FloatArray object given a Ruby Array of values
20
+ * which can be cast to a double. */
21
+ static VALUE method_initialize(VALUE self, VALUE rb_array)
22
+ {
23
+ vector_t *vector;
24
+ i4v_t *data;
25
+ unsigned long n,m,i;
26
+
27
+ Check_Type(rb_array, T_ARRAY);
28
+ Data_Get_Struct(self, vector_t, vector);
29
+
30
+ vector->len = n = RARRAY_LEN(rb_array);
31
+
32
+ if(vector->len < 4)
33
+ {
34
+ rb_raise(rb_eArgError, "Vectors must be at least 4 long");
35
+ }
36
+
37
+ vector->data = internal_allocate_vector_array(vector->len, sizeof(i4v_t));
38
+
39
+ data = (i4v_t *)vector->data;
40
+ for(i = 0; i < vector->len; i++)
41
+ {
42
+ data[i/4].f[i%4] = NUM2INT(rb_ary_entry(rb_array, i));
43
+ }
44
+
45
+ /* If the array is an odd number of elements, set the final element to 1 */
46
+ m = n + (n % 4);
47
+ for(i = n % 4; i > 0; i--)
48
+ {
49
+ data[m/4].f[i] = 1.0;
50
+ }
51
+
52
+ return(self);
53
+ }
54
+
55
+ /* Public: Multiply values contained in the data array with those contained in
56
+ * another FloatArray object, returning a new FloatArray. */
57
+ static VALUE method_multiply(VALUE self, VALUE obj)
58
+ {
59
+ return(internal_apply_operation(self, obj, func_multiply));
60
+ }
61
+
62
+ /* Public: Divide values contained in the data array by those contained in
63
+ * another FloatArray object, returning a new FloatArray. */
64
+ static VALUE method_divide(VALUE self, VALUE obj)
65
+ {
66
+ return(internal_apply_operation(self, obj, func_divide));
67
+ }
68
+
69
+ /* Public: add values contained in the data array with those contained in
70
+ * another FloatArray object, returning a new FloatArray. */
71
+ static VALUE method_add(VALUE self, VALUE obj)
72
+ {
73
+ return(internal_apply_operation(self, obj, func_add));
74
+ }
75
+
76
+ /* Public: Subtract values contained in another FloatArray object from those
77
+ * contained in the current data array object, returning a new FloatArray. */
78
+ static VALUE method_subtract(VALUE self, VALUE obj)
79
+ {
80
+ return(internal_apply_operation(self, obj, func_subtract));
81
+ }
82
+
83
+ /* Public: Return a Ruby Array containing the doubles within the data array. */
84
+ static VALUE method_to_a(VALUE self)
85
+ {
86
+ unsigned long i;
87
+ vector_t *vector;
88
+ i4v_t *data;
89
+ VALUE rb_array = rb_ary_new();
90
+
91
+ Data_Get_Struct(self, vector_t, vector);
92
+ data = (i4v_t *)vector->data;
93
+ for(i = 0; i < vector->len; i++)
94
+ {
95
+ rb_ary_store(rb_array, i, INT2NUM(data[i/4].f[i%4]));
96
+ }
97
+
98
+ return(rb_array);
99
+ }
100
+
101
+ /* Internal: Given another FloatArray object, perform an action specified via a
102
+ * function pointer against both. */
103
+ static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
104
+ {
105
+ unsigned long size, i;
106
+ int align;
107
+ vector_t *v1, *v2, *rv;
108
+ i4v_t *d1, *d2, *r;
109
+ VALUE result_obj = allocate(SIMD_IntArray);
110
+
111
+ Data_Get_Struct(self, vector_t, v1);
112
+ Data_Get_Struct(obj, vector_t, v2);
113
+ Data_Get_Struct(result_obj, vector_t, rv);
114
+ rv->data = internal_allocate_vector_array(v1->len, sizeof(i4v_t));
115
+
116
+ align = internal_align_vectors(v1->len, v2->len, 4);
117
+
118
+ /* Ensure that size will be the result of ceil(len / 4.0) */
119
+ size = (v1->len + 3) / 4;
120
+
121
+ d1 = (i4v_t *)v1->data;
122
+ d2 = (i4v_t *)v2->data;
123
+ r = (i4v_t *)rv->data;
124
+
125
+ rv->len = v1->len;
126
+
127
+ switch(align)
128
+ {
129
+ case 0: /* Same size arrays */
130
+ for(i = 0; i < size; i++)
131
+ {
132
+ func(&d1[i].v, &d2[i].v, &r[i].v);
133
+ }
134
+ break;
135
+ case 1: /* Operand is exactly 4 long (size of 1 sse register) */
136
+ for(i = 0; i < size; i++)
137
+ {
138
+ func(&d1[i].v, &d2[0].v, &r[i].v);
139
+ }
140
+ break;
141
+ default: /* Self is a multiple of operand's length long */
142
+ for(i = 0; i < size; i++)
143
+ {
144
+ func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
145
+ }
146
+ }
147
+
148
+ if(rv->len != rv->len + (rv->len % 4))
149
+ {
150
+ for(i = 3; i > rv->len + (rv->len % 4); i--)
151
+ {
152
+ r[size].f[i] = 1;
153
+ }
154
+ }
155
+
156
+ return(result_obj);
157
+ }
158
+
159
+ /* Function: Multiply two vectors. */
160
+ static void func_multiply(void *v1, void *v2, void *r)
161
+ {
162
+ *(i4v *)r = *(i4v *)v1 * *(i4v *)v2;
163
+ }
164
+
165
+ /* Function: Divide two vectors. */
166
+ static void func_divide(void *v1, void *v2, void *r)
167
+ {
168
+ *(i4v *)r = *(i4v *)v1 / *(i4v *)v2;
169
+ }
170
+
171
+ /* Function: Add two vectors. */
172
+ static void func_add(void *v1, void *v2, void *r)
173
+ {
174
+ *(i4v *)r = *(i4v *)v1 + *(i4v *)v2;
175
+ }
176
+
177
+ /* Function: Subtract two vectors. */
178
+ static void func_subtract(void *v1, void *v2, void *r)
179
+ {
180
+ *(i4v *)r = *(i4v *)v1 - *(i4v *)v2;
181
+ }
@@ -0,0 +1,16 @@
1
+ #include "ruby.h"
2
+ #include "simd_common.h"
3
+
4
+ static VALUE method_initialize(VALUE self, VALUE rb_array);
5
+ static VALUE method_multiply(VALUE self, VALUE obj);
6
+ static VALUE method_divide(VALUE self, VALUE obj);
7
+ static VALUE method_add(VALUE self, VALUE obj);
8
+ static VALUE method_subtract(VALUE self, VALUE obj);
9
+ static VALUE method_to_a(VALUE self);
10
+
11
+ static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
12
+
13
+ static void func_multiply(void *v1, void *v2, void *r);
14
+ static void func_divide(void *v1, void *v2, void *r);
15
+ static void func_add(void *v1, void *v2, void *r);
16
+ static void func_subtract(void *v1, void *v2, void *r);
@@ -100,7 +100,7 @@ static VALUE method_to_a(VALUE self)
100
100
 
101
101
  /* Internal: Given another FloatArray object, perform an action specified via a
102
102
  * function pointer against both. */
103
- static VALUE internal_apply_operation(VALUE self, VALUE obj, bf_operation func)
103
+ static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
104
104
  {
105
105
  unsigned long size, i;
106
106
  int align;
@@ -129,45 +129,53 @@ static VALUE internal_apply_operation(VALUE self, VALUE obj, bf_operation func)
129
129
  case 0: /* Same size arrays */
130
130
  for(i = 0; i < size; i++)
131
131
  {
132
- r[i].v = func(d1[i].v, d2[i].v);
132
+ func(&d1[i].v, &d2[i].v, &r[i].v);
133
133
  }
134
134
  break;
135
135
  case 1: /* Operand is exactly 4 long (size of 1 sse register) */
136
136
  for(i = 0; i < size; i++)
137
137
  {
138
- r[i].v = func(d1[i].v, d2[0].v);
138
+ func(&d1[i].v, &d2[0].v, &r[i].v);
139
139
  }
140
140
  break;
141
141
  default: /* Self is a multiple of operand's length long */
142
142
  for(i = 0; i < size; i++)
143
143
  {
144
- r[i].v = func(d1[i].v, d2[i % v2->len].v);
144
+ func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
145
145
  }
146
146
  }
147
147
 
148
+ if(rv->len != rv->len + (rv->len % 4))
149
+ {
150
+ for(i = 3; i > rv->len + (rv->len % 4); i--)
151
+ {
152
+ r[size].f[i] = 1;
153
+ }
154
+ }
155
+
148
156
  return(result_obj);
149
157
  }
150
158
 
151
159
  /* Function: Multiply two vectors. */
152
- static f4v func_multiply(f4v v1, f4v v2)
160
+ static void func_multiply(void *v1, void *v2, void *r)
153
161
  {
154
- return(v1 * v2);
162
+ *(f4v *)r = *(f4v *)v1 * *(f4v *)v2;
155
163
  }
156
164
 
157
165
  /* Function: Divide two vectors. */
158
- static f4v func_divide(f4v v1, f4v v2)
166
+ static void func_divide(void *v1, void *v2, void *r)
159
167
  {
160
- return(v1 / v2);
168
+ *(f4v *)r = *(f4v *)v1 / *(f4v *)v2;
161
169
  }
162
170
 
163
171
  /* Function: Add two vectors. */
164
- static f4v func_add(f4v v1, f4v v2)
172
+ static void func_add(void *v1, void *v2, void *r)
165
173
  {
166
- return(v1 + v2);
174
+ *(f4v *)r = *(f4v *)v1 + *(f4v *)v2;
167
175
  }
168
176
 
169
177
  /* Function: Subtract two vectors. */
170
- static f4v func_subtract(f4v v1, f4v v2)
178
+ static void func_subtract(void *v1, void *v2, void *r)
171
179
  {
172
- return(v1 - v2);
180
+ *(f4v *)r = *(f4v *)v1 - *(f4v *)v2;
173
181
  }
@@ -1,11 +1,6 @@
1
1
  #include "ruby.h"
2
2
  #include "simd_common.h"
3
3
 
4
- /*
5
- static VALUE allocate(VALUE klass);
6
- static void deallocate(vector_t *vector);
7
- */
8
-
9
4
  static VALUE method_initialize(VALUE self, VALUE rb_array);
10
5
  static VALUE method_multiply(VALUE self, VALUE obj);
11
6
  static VALUE method_divide(VALUE self, VALUE obj);
@@ -13,13 +8,9 @@ static VALUE method_add(VALUE self, VALUE obj);
13
8
  static VALUE method_subtract(VALUE self, VALUE obj);
14
9
  static VALUE method_to_a(VALUE self);
15
10
 
16
- /*
17
- static f4v_t *internal_allocate_vector_array(unsigned long size);
18
- static int internal_align_vectors(unsigned long v1, unsigned long v2);
19
- */
20
- static VALUE internal_apply_operation(VALUE self, VALUE obj, bf_operation func);
11
+ static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
21
12
 
22
- static f4v func_multiply(f4v v1, f4v v2);
23
- static f4v func_divide(f4v v1, f4v v2);
24
- static f4v func_add(f4v v1, f4v v2);
25
- static f4v func_subtract(f4v v1, f4v v2);
13
+ static void func_multiply(void *v1, void *v2, void *r);
14
+ static void func_divide(void *v1, void *v2, void *r);
15
+ static void func_add(void *v1, void *v2, void *r);
16
+ static void func_subtract(void *v1, void *v2, void *r);
@@ -1,6 +1,6 @@
1
1
  #pragma once
2
2
 
3
- /*
3
+ /*
4
4
  * Types for FloatArray
5
5
  *
6
6
  * Since ruby internally uses doubles for the Float type, SIMD::FloatArray will
@@ -13,7 +13,7 @@ typedef union d2v_t
13
13
  double f[2];
14
14
  } d2v_t;
15
15
 
16
- /*
16
+ /*
17
17
  * Types for SmallFloatArray
18
18
  *
19
19
  * Since ruby internally uses doubles for the Float type, SIMD::SmallFloatArray
@@ -27,11 +27,20 @@ typedef union f4v_t
27
27
  float f[4];
28
28
  } f4v_t;
29
29
 
30
+ /*
31
+ * Types for IntArray
32
+ */
33
+ typedef int __attribute__ ((vector_size (16))) i4v;
34
+ typedef union i4v_t
35
+ {
36
+ i4v v;
37
+ int f[4];
38
+ } i4v_t;
39
+
30
40
  typedef struct vector_t
31
41
  {
32
42
  void *data;
33
43
  unsigned long len;
34
44
  } vector_t;
35
45
 
36
- typedef d2v (*b_operation)(d2v v1, d2v v2);
37
- typedef f4v (*bf_operation)(f4v v1, f4v v2);
46
+ typedef void (b_operation)(void *v1, void *v2, void *r);
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tina Wuest
@@ -38,6 +38,8 @@ files:
38
38
  - ext/simd/simd_common.h
39
39
  - ext/simd/simd_floatarray.c
40
40
  - ext/simd/simd_floatarray.h
41
+ - ext/simd/simd_intarray.c
42
+ - ext/simd/simd_intarray.h
41
43
  - ext/simd/simd_smallfloatarray.c
42
44
  - ext/simd/simd_smallfloatarray.h
43
45
  - ext/simd/simd_types.h