simd 0.4.0 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c15d248c872e4369c45e4151faf81e9761ea1587
4
- data.tar.gz: cdfc5170454be15be1bc75ccc5eba08aa96c884d
2
+ SHA256:
3
+ metadata.gz: d43585672169c7a727e7502dea99cb56e8103c2e6a8effe4f32521ee4cae6e5d
4
+ data.tar.gz: c404674a578002bdf5933baee75e54680717c562b1e8c839a0661e04dfee27a6
5
5
  SHA512:
6
- metadata.gz: 38df7b30a113b325bba7be834fcbe24c44738706495579af5a9e4b508e4db4375e2f36b16a97612eab4dd8da379c450d062c6e0bceb6e8ddf308a952ecf6419c
7
- data.tar.gz: 342680d7f4f6690c8b3338de32ef192aaadb2952461e62e86a848650414b09d793d71e70c5b38909e0c882f062e346995f86f40d3dbf1de692cbe3eb0cfb0c73
6
+ metadata.gz: 86c6b9e43f219190f4bd2a34d0ad1c309eacb56068dbdb0d44f46894b1ceee19bc68c77d4f682d42b335039250730c9637a1d19c5075861c9731fd13a48d71d0
7
+ data.tar.gz: a71afed09b9ead560531c9ec2a9ffb0260843398255963b549abe44db0b45dc036fb604a945c9caff7f5d54dd51626f78ed4f2877803a9a5d2bc455e68c5c332
data/ext/simd/extconf.rb CHANGED
@@ -1,6 +1,12 @@
1
1
  # Makes Makefiles for Ruby extensions.
2
2
  require 'mkmf'
3
3
 
4
+ cpu = RbConfig::CONFIG['arch'].downcase
5
+ if cpu.include?('arm')
6
+ ver = cpu.gsub(/[^\d]*(\d+).*/, '\\1').to_i
7
+ $CFLAGS << ' -mfpu=neon' if ver >= 6
8
+ end
9
+
4
10
  extension_name = 'simd'
5
11
  dir_config(extension_name)
6
12
  create_makefile(extension_name)
@@ -1,5 +1,7 @@
1
1
  #include "simd_common.h"
2
2
 
3
+ #define XMM_BYTES 16 /* Width of the xmm1,2... registers */
4
+
3
5
  /* Internal: Allocate memory for the vector container. */
4
6
  VALUE allocate(VALUE klass)
5
7
  {
@@ -36,10 +38,9 @@ VALUE method_length(VALUE self)
36
38
  }
37
39
 
38
40
  /* Internal: Allocate memory for the data array. */
39
- void *internal_allocate_vector_array(unsigned long count, size_t size)
41
+ void *internal_allocate_vector_array(unsigned long long int count)
40
42
  {
41
- unsigned int modulo = 16 / size;
42
- void *vector = malloc(((count + (count % modulo)) / modulo + 1) * size);
43
+ void *vector = malloc((count + 1) * XMM_BYTES);
43
44
  if(vector == NULL)
44
45
  {
45
46
  rb_raise(rb_eNoMemError, "Unable to allocate memory");
@@ -50,7 +51,7 @@ void *internal_allocate_vector_array(unsigned long count, size_t size)
50
51
 
51
52
  /* Internal: Determine if two arrays can be acted upon, by being of equal
52
53
  * lengths or with the operand's length being a multiple of the data array's. */
53
- int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo)
54
+ int internal_align_vectors(unsigned long long int v1, unsigned long long int v2, unsigned int modulo)
54
55
  {
55
56
  if((v1 % modulo) != (v2 % modulo))
56
57
  {
@@ -74,3 +75,80 @@ int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modu
74
75
  /* Never reached */
75
76
  return(-1);
76
77
  }
78
+
79
+ /* Internal: Given another object, perform an action specified via a function
80
+ * pointer against both.
81
+ *
82
+ * Since arithmetic is purposefully performed against a void pointers, disable
83
+ * warnings regarding this for the current function. */
84
+ #pragma GCC diagnostic push
85
+ #pragma GCC diagnostic ignored "-Wpointer-arith"
86
+ VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func)
87
+ {
88
+ unsigned long long int length, i, j;
89
+ int align;
90
+ vector_t *v1, *v2, *rv;
91
+ void *data;
92
+ VALUE result_obj = allocate(klass);
93
+
94
+ Data_Get_Struct(self, vector_t, v1);
95
+ Data_Get_Struct(obj, vector_t, v2);
96
+ Data_Get_Struct(result_obj, vector_t, rv);
97
+ rv->data = internal_allocate_vector_array(v1->len);
98
+
99
+ align = internal_align_vectors(v1->len, v2->len, (XMM_BYTES / size));
100
+
101
+ length = ((v1->len + (XMM_BYTES / size - 1)) / (XMM_BYTES / size));
102
+ rv->len = v1->len;
103
+
104
+ switch(align)
105
+ {
106
+ case 0: /* Same size arrays */
107
+ for(i = 0; i < length; i++)
108
+ {
109
+ func((v1->data + XMM_BYTES * i), (v2->data + XMM_BYTES * i), (rv->data + XMM_BYTES * i));
110
+ }
111
+ break;
112
+ case 1: /* Operand is exactly 4 long (size of 1 sse register) */
113
+ for(i = 0; i < length; i++)
114
+ {
115
+ func((v1->data + XMM_BYTES * i), v2->data, (rv->data + XMM_BYTES * i));
116
+ }
117
+ break;
118
+ default: /* Self is a multiple of operand's length long */
119
+ for(j = 0; j < v2->len; j++)
120
+ {
121
+ data = v2->data + XMM_BYTES * j;
122
+ for(i = j; i < length; i+=v2->len)
123
+ {
124
+ func((v1->data + XMM_BYTES * i), data, (rv->data + XMM_BYTES * i));
125
+ }
126
+ }
127
+ }
128
+ internal_sanitize_unaligned_final_vector(rv, size);
129
+
130
+ return(result_obj);
131
+ }
132
+ #pragma GCC diagnostic pop
133
+
134
+ /* Internal: Make sure that no null bytes exist beyond the boundary of
135
+ * unaligned vectors. This function should be called after any operation that
136
+ * results in the mutation or creation of a vector array.
137
+ *
138
+ * Since arithmetic is purposefully performed against a void pointers, disable
139
+ * warnings regarding this for the current function. */
140
+ #pragma GCC diagnostic push
141
+ #pragma GCC diagnostic ignored "-Wpointer-arith"
142
+ void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size)
143
+ {
144
+ unsigned long long int i;
145
+
146
+ if((rv->len * size) % XMM_BYTES)
147
+ {
148
+ for(i = 1; i <= XMM_BYTES - ((rv->len * size) % XMM_BYTES); i+=size)
149
+ {
150
+ *(unsigned char *)(rv->data + (rv->len * size + i)) = 1;
151
+ }
152
+ }
153
+ }
154
+ #pragma GCC diagnostic pop
@@ -8,5 +8,7 @@ void deallocate(vector_t *vector);
8
8
 
9
9
  VALUE method_length(VALUE self);
10
10
 
11
- void *internal_allocate_vector_array(unsigned long count, size_t size);
12
- int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo);
11
+ void *internal_allocate_vector_array(unsigned long long int count);
12
+ int internal_align_vectors(unsigned long long int v1, unsigned long long int v2, unsigned int modulo);
13
+ VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func);
14
+ void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size);
@@ -1,4 +1,5 @@
1
1
  #include "simd_floatarray.h"
2
+ #include "simd_longarray.h"
2
3
 
3
4
  VALUE SIMD_FloatArray = Qnil;
4
5
 
@@ -12,6 +13,13 @@ void Init_SIMD_FloatArray(VALUE parent)
12
13
  rb_define_method(SIMD_FloatArray, "/", method_divide, 1);
13
14
  rb_define_method(SIMD_FloatArray, "+", method_add, 1);
14
15
  rb_define_method(SIMD_FloatArray, "-", method_subtract, 1);
16
+ rb_define_method(SIMD_FloatArray, "&", method_and, 1);
17
+ rb_define_method(SIMD_FloatArray, "|", method_or, 1);
18
+ rb_define_method(SIMD_FloatArray, "^", method_xor, 1);
19
+ rb_define_method(SIMD_FloatArray, "gt", method_gt, 1);
20
+ rb_define_method(SIMD_FloatArray, "lt", method_lt, 1);
21
+ rb_define_method(SIMD_FloatArray, ">", method_gt, 1);
22
+ rb_define_method(SIMD_FloatArray, "<", method_lt, 1);
15
23
  rb_define_method(SIMD_FloatArray, "length", method_length, 0);
16
24
  rb_define_method(SIMD_FloatArray, "to_a", method_to_a, 0);
17
25
  }
@@ -22,7 +30,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
22
30
  {
23
31
  vector_t *vector;
24
32
  d2v_t *data;
25
- unsigned long n,m,i;
33
+ unsigned long long int n,i;
26
34
 
27
35
  Check_Type(rb_array, T_ARRAY);
28
36
  Data_Get_Struct(self, vector_t, vector);
@@ -34,7 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
34
42
  rb_raise(rb_eArgError, "Vectors must be at least 4 long");
35
43
  }
36
44
 
37
- vector->data = internal_allocate_vector_array(vector->len, sizeof(d2v_t));
45
+ vector->data = internal_allocate_vector_array(vector->len);
38
46
 
39
47
  data = (d2v_t *)vector->data;
40
48
  for(i = 0; i < vector->len; i++)
@@ -42,12 +50,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
42
50
  data[i/2].f[i%2] = NUM2DBL(rb_ary_entry(rb_array, i));
43
51
  }
44
52
 
45
- /* If the array is an odd number of elements, set the final element to 1 */
46
- m = n + (n % 2);
47
- for(i = n % 4; i > 0; i--)
48
- {
49
- data[m/2].f[1] = 1.0;
50
- }
53
+ internal_sanitize_unaligned_final_vector(vector, sizeof(double));
51
54
 
52
55
  return(self);
53
56
  }
@@ -56,34 +59,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
56
59
  * another FloatArray object, returning a new FloatArray. */
57
60
  static VALUE method_multiply(VALUE self, VALUE obj)
58
61
  {
59
- return(internal_apply_operation(self, obj, func_multiply));
62
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_multiply));
60
63
  }
61
64
 
62
65
  /* Public: Divide values contained in the data array by those contained in
63
66
  * another FloatArray object, returning a new FloatArray. */
64
67
  static VALUE method_divide(VALUE self, VALUE obj)
65
68
  {
66
- return(internal_apply_operation(self, obj, func_divide));
69
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_divide));
67
70
  }
68
71
 
69
72
  /* Public: add values contained in the data array with those contained in
70
73
  * another FloatArray object, returning a new FloatArray. */
71
74
  static VALUE method_add(VALUE self, VALUE obj)
72
75
  {
73
- return(internal_apply_operation(self, obj, func_add));
76
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_add));
74
77
  }
75
78
 
76
79
  /* Public: Subtract values contained in another FloatArray object from those
77
80
  * contained in the current data array object, returning a new FloatArray. */
78
81
  static VALUE method_subtract(VALUE self, VALUE obj)
79
82
  {
80
- return(internal_apply_operation(self, obj, func_subtract));
83
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_subtract));
84
+ }
85
+
86
+ /* Public: and values contained in the data array with those contained in
87
+ * another FloatArray object, returning a new FloatArray. */
88
+ static VALUE method_and(VALUE self, VALUE obj)
89
+ {
90
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_and));
91
+ }
92
+
93
+ /* Public: or values contained in the data array with those contained in
94
+ * another FloatArray object, returning a new FloatArray. */
95
+ static VALUE method_or(VALUE self, VALUE obj)
96
+ {
97
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_or));
98
+ }
99
+
100
+ /* Public: xor values contained in the data array with those contained in
101
+ * another FloatArray object, returning a new FloatArray. */
102
+ static VALUE method_xor(VALUE self, VALUE obj)
103
+ {
104
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_xor));
105
+ }
106
+
107
+ /* Public: Compare values contained in the data array with those contained in
108
+ * another FloatArray object, return a new LongArray with each element being
109
+ * -1 if the data array's value is greater, and 0 otherwise. */
110
+ static VALUE method_gt(VALUE self, VALUE obj)
111
+ {
112
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_LongArray, func_gt));
113
+ }
114
+
115
+ /* Public: Compare values contained in the data array with those contained in
116
+ * another FloatArray object, return a new LongArray with each element being
117
+ * -1 if the data array's value is less, and 0 otherwise. */
118
+ static VALUE method_lt(VALUE self, VALUE obj)
119
+ {
120
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_LongArray, func_lt));
81
121
  }
82
122
 
83
123
  /* Public: Return a Ruby Array containing the doubles within the data array. */
84
124
  static VALUE method_to_a(VALUE self)
85
125
  {
86
- unsigned long i;
126
+ unsigned long long int i;
87
127
  vector_t *vector;
88
128
  d2v_t *data;
89
129
  VALUE rb_array = rb_ary_new();
@@ -98,61 +138,6 @@ static VALUE method_to_a(VALUE self)
98
138
  return(rb_array);
99
139
  }
100
140
 
101
- /* Internal: Given another FloatArray object, perform an action specified via a
102
- * function pointer against both. */
103
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
104
- {
105
- unsigned long size, i;
106
- int align;
107
- vector_t *v1, *v2, *rv;
108
- d2v_t *d1, *d2, *r;
109
- VALUE result_obj = allocate(SIMD_FloatArray);
110
-
111
- Data_Get_Struct(self, vector_t, v1);
112
- Data_Get_Struct(obj, vector_t, v2);
113
- Data_Get_Struct(result_obj, vector_t, rv);
114
- rv->data = internal_allocate_vector_array(v1->len, sizeof(d2v_t));
115
-
116
- align = internal_align_vectors(v1->len, v2->len, 2);
117
-
118
- /* Ensure that size will be the result of ceil(len / 4.0) */
119
- size = (v1->len + 1) / 2;
120
-
121
- d1 = (d2v_t *)v1->data;
122
- d2 = (d2v_t *)v2->data;
123
- r = (d2v_t *)rv->data;
124
-
125
- rv->len = v1->len;
126
-
127
- switch(align)
128
- {
129
- case 0: /* Same size arrays */
130
- for(i = 0; i < size; i++)
131
- {
132
- func(&d1[i].v, &d2[i].v, &r[i].v);
133
- }
134
- break;
135
- case 1: /* Operand is exactly 2 long (size of 1 sse register) */
136
- for(i = 0; i < size; i++)
137
- {
138
- func(&d1[i].v, &d2[0].v, &r[i].v);
139
- }
140
- break;
141
- default: /* Self is a multiple of operand's length long */
142
- for(i = 0; i < size; i++)
143
- {
144
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
145
- }
146
- }
147
-
148
- if(rv->len != rv->len + (rv->len % 2))
149
- {
150
- r[size].f[1] = 1;
151
- }
152
-
153
- return(result_obj);
154
- }
155
-
156
141
  /* Function: Multiply two vectors. */
157
142
  static void func_multiply(void *v1, void *v2, void *r)
158
143
  {
@@ -176,3 +161,33 @@ static void func_subtract(void *v1, void *v2, void *r)
176
161
  {
177
162
  *(d2v *)r = *(d2v *)v1 - *(d2v *)v2;
178
163
  }
164
+
165
+ /* Function: Perform a binary AND on two vectors. */
166
+ static void func_and(void *v1, void *v2, void *r)
167
+ {
168
+ *(l2v *)r = *(l2v *)v1 & *(l2v *)v2;
169
+ }
170
+
171
+ /* Function: Perform a binary OR on two vectors. */
172
+ static void func_or(void *v1, void *v2, void *r)
173
+ {
174
+ *(l2v *)r = *(l2v *)v1 | *(l2v *)v2;
175
+ }
176
+
177
+ /* Function: Perform a binary XOR on two vectors. */
178
+ static void func_xor(void *v1, void *v2, void *r)
179
+ {
180
+ *(l2v *)r = *(l2v *)v1 ^ *(l2v *)v2;
181
+ }
182
+
183
+ /* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
184
+ static void func_gt(void *v1, void *v2, void *r)
185
+ {
186
+ *(l2v *)r = (*(d2v *)v1 > *(d2v *)v2);
187
+ }
188
+
189
+ /* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
190
+ static void func_lt(void *v1, void *v2, void *r)
191
+ {
192
+ *(l2v *)r = (*(d2v *)v1 < *(d2v *)v2);
193
+ }
@@ -6,11 +6,19 @@ static VALUE method_multiply(VALUE self, VALUE obj);
6
6
  static VALUE method_divide(VALUE self, VALUE obj);
7
7
  static VALUE method_add(VALUE self, VALUE obj);
8
8
  static VALUE method_subtract(VALUE self, VALUE obj);
9
+ static VALUE method_and(VALUE self, VALUE obj);
10
+ static VALUE method_or(VALUE self, VALUE obj);
11
+ static VALUE method_xor(VALUE self, VALUE obj);
12
+ static VALUE method_gt(VALUE self, VALUE obj);
13
+ static VALUE method_lt(VALUE self, VALUE obj);
9
14
  static VALUE method_to_a(VALUE self);
10
15
 
11
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
12
-
13
16
  static void func_multiply(void *v1, void *v2, void *r);
14
17
  static void func_divide(void *v1, void *v2, void *r);
15
18
  static void func_add(void *v1, void *v2, void *r);
16
19
  static void func_subtract(void *v1, void *v2, void *r);
20
+ static void func_and(void *v1, void *v2, void *r);
21
+ static void func_or(void *v1, void *v2, void *r);
22
+ static void func_xor(void *v1, void *v2, void *r);
23
+ static void func_gt(void *v1, void *v2, void *r);
24
+ static void func_lt(void *v1, void *v2, void *r);
@@ -15,6 +15,10 @@ void Init_SIMD_IntArray(VALUE parent)
15
15
  rb_define_method(SIMD_IntArray, "&", method_and, 1);
16
16
  rb_define_method(SIMD_IntArray, "|", method_or, 1);
17
17
  rb_define_method(SIMD_IntArray, "^", method_xor, 1);
18
+ rb_define_method(SIMD_IntArray, "gt", method_gt, 1);
19
+ rb_define_method(SIMD_IntArray, "lt", method_lt, 1);
20
+ rb_define_method(SIMD_IntArray, ">", method_gt, 1);
21
+ rb_define_method(SIMD_IntArray, "<", method_lt, 1);
18
22
  rb_define_method(SIMD_IntArray, "length", method_length, 0);
19
23
  rb_define_method(SIMD_IntArray, "to_a", method_to_a, 0);
20
24
  }
@@ -25,7 +29,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
25
29
  {
26
30
  vector_t *vector;
27
31
  i4v_t *data;
28
- unsigned long n,m,i;
32
+ unsigned long long int n,i;
29
33
 
30
34
  Check_Type(rb_array, T_ARRAY);
31
35
  Data_Get_Struct(self, vector_t, vector);
@@ -37,7 +41,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
37
41
  rb_raise(rb_eArgError, "Vectors must be at least 4 long");
38
42
  }
39
43
 
40
- vector->data = internal_allocate_vector_array(vector->len, sizeof(i4v_t));
44
+ vector->data = internal_allocate_vector_array(vector->len);
41
45
 
42
46
  data = (i4v_t *)vector->data;
43
47
  for(i = 0; i < vector->len; i++)
@@ -45,12 +49,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
45
49
  data[i/4].f[i%4] = NUM2INT(rb_ary_entry(rb_array, i));
46
50
  }
47
51
 
48
- /* If the array is an odd number of elements, set the final element to 1 */
49
- m = n + (n % 4);
50
- for(i = n % 4; i > 0; i--)
51
- {
52
- data[m/4].f[i] = 1.0;
53
- }
52
+ internal_sanitize_unaligned_final_vector(vector, sizeof(int));
54
53
 
55
54
  return(self);
56
55
  }
@@ -59,55 +58,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
59
58
  * another FloatArray object, returning a new FloatArray. */
60
59
  static VALUE method_multiply(VALUE self, VALUE obj)
61
60
  {
62
- return(internal_apply_operation(self, obj, func_multiply));
61
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_multiply));
63
62
  }
64
63
 
65
64
  /* Public: Divide values contained in the data array by those contained in
66
65
  * another FloatArray object, returning a new FloatArray. */
67
66
  static VALUE method_divide(VALUE self, VALUE obj)
68
67
  {
69
- return(internal_apply_operation(self, obj, func_divide));
68
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_divide));
70
69
  }
71
70
 
72
71
  /* Public: add values contained in the data array with those contained in
73
72
  * another FloatArray object, returning a new FloatArray. */
74
73
  static VALUE method_add(VALUE self, VALUE obj)
75
74
  {
76
- return(internal_apply_operation(self, obj, func_add));
75
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_add));
77
76
  }
78
77
 
79
78
  /* Public: and values contained in the data array with those contained in
80
79
  * another FloatArray object, returning a new FloatArray. */
81
80
  static VALUE method_and(VALUE self, VALUE obj)
82
81
  {
83
- return(internal_apply_operation(self, obj, func_and));
82
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_and));
84
83
  }
85
84
 
86
85
  /* Public: or values contained in the data array with those contained in
87
86
  * another FloatArray object, returning a new FloatArray. */
88
87
  static VALUE method_or(VALUE self, VALUE obj)
89
88
  {
90
- return(internal_apply_operation(self, obj, func_or));
89
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_or));
91
90
  }
92
91
 
93
92
  /* Public: xor values contained in the data array with those contained in
94
93
  * another FloatArray object, returning a new FloatArray. */
95
94
  static VALUE method_xor(VALUE self, VALUE obj)
96
95
  {
97
- return(internal_apply_operation(self, obj, func_xor));
96
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_xor));
97
+ }
98
+
99
+ /* Public: Compare values contained in the data array with those contained in
100
+ * another IntArray object, return a new IntArray with each element being -1
101
+ * if the data array's value is greater, and 0 otherwise. */
102
+ static VALUE method_gt(VALUE self, VALUE obj)
103
+ {
104
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_gt));
105
+ }
106
+
107
+ /* Public: Compare values contained in the data array with those contained in
108
+ * another IntArray object, return a new IntArray with each element being -1 if
109
+ * the data array's value is less, and 0 otherwise. */
110
+ static VALUE method_lt(VALUE self, VALUE obj)
111
+ {
112
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_lt));
98
113
  }
99
114
 
100
115
  /* Public: Subtract values contained in another FloatArray object from those
101
116
  * contained in the current data array object, returning a new FloatArray. */
102
117
  static VALUE method_subtract(VALUE self, VALUE obj)
103
118
  {
104
- return(internal_apply_operation(self, obj, func_subtract));
119
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_subtract));
105
120
  }
106
121
 
107
122
  /* Public: Return a Ruby Array containing the doubles within the data array. */
108
123
  static VALUE method_to_a(VALUE self)
109
124
  {
110
- unsigned long i;
125
+ unsigned long long int i;
111
126
  vector_t *vector;
112
127
  i4v_t *data;
113
128
  VALUE rb_array = rb_ary_new();
@@ -122,64 +137,6 @@ static VALUE method_to_a(VALUE self)
122
137
  return(rb_array);
123
138
  }
124
139
 
125
- /* Internal: Given another FloatArray object, perform an action specified via a
126
- * function pointer against both. */
127
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
128
- {
129
- unsigned long size, i;
130
- int align;
131
- vector_t *v1, *v2, *rv;
132
- i4v_t *d1, *d2, *r;
133
- VALUE result_obj = allocate(SIMD_IntArray);
134
-
135
- Data_Get_Struct(self, vector_t, v1);
136
- Data_Get_Struct(obj, vector_t, v2);
137
- Data_Get_Struct(result_obj, vector_t, rv);
138
- rv->data = internal_allocate_vector_array(v1->len, sizeof(i4v_t));
139
-
140
- align = internal_align_vectors(v1->len, v2->len, 4);
141
-
142
- /* Ensure that size will be the result of ceil(len / 4.0) */
143
- size = (v1->len + 3) / 4;
144
-
145
- d1 = (i4v_t *)v1->data;
146
- d2 = (i4v_t *)v2->data;
147
- r = (i4v_t *)rv->data;
148
-
149
- rv->len = v1->len;
150
-
151
- switch(align)
152
- {
153
- case 0: /* Same size arrays */
154
- for(i = 0; i < size; i++)
155
- {
156
- func(&d1[i].v, &d2[i].v, &r[i].v);
157
- }
158
- break;
159
- case 1: /* Operand is exactly 4 long (size of 1 sse register) */
160
- for(i = 0; i < size; i++)
161
- {
162
- func(&d1[i].v, &d2[0].v, &r[i].v);
163
- }
164
- break;
165
- default: /* Self is a multiple of operand's length long */
166
- for(i = 0; i < size; i++)
167
- {
168
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
169
- }
170
- }
171
-
172
- if(rv->len != rv->len + (rv->len % 4))
173
- {
174
- for(i = 3; i > rv->len + (rv->len % 4); i--)
175
- {
176
- r[size].f[i] = 1;
177
- }
178
- }
179
-
180
- return(result_obj);
181
- }
182
-
183
140
  /* Function: Multiply two vectors. */
184
141
  static void func_multiply(void *v1, void *v2, void *r)
185
142
  {
@@ -221,3 +178,15 @@ static void func_xor(void *v1, void *v2, void *r)
221
178
  {
222
179
  *(i4v *)r = *(i4v *)v1 ^ *(i4v *)v2;
223
180
  }
181
+
182
+ /* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
183
+ static void func_gt(void *v1, void *v2, void *r)
184
+ {
185
+ *(i4v *)r = (*(i4v *)v1 > *(i4v *)v2);
186
+ }
187
+
188
+ /* Function: Compare vectors, return -1 if v1 is less than v2, 0 otherwise */
189
+ static void func_lt(void *v1, void *v2, void *r)
190
+ {
191
+ *(i4v *)r = (*(i4v *)v1 < *(i4v *)v2);
192
+ }
@@ -1,18 +1,20 @@
1
1
  #include "ruby.h"
2
2
  #include "simd_common.h"
3
3
 
4
+ extern VALUE SIMD_IntArray;
5
+
4
6
  static VALUE method_initialize(VALUE self, VALUE rb_array);
5
7
  static VALUE method_multiply(VALUE self, VALUE obj);
6
8
  static VALUE method_divide(VALUE self, VALUE obj);
7
9
  static VALUE method_add(VALUE self, VALUE obj);
10
+ static VALUE method_subtract(VALUE self, VALUE obj);
8
11
  static VALUE method_and(VALUE self, VALUE obj);
9
12
  static VALUE method_or(VALUE self, VALUE obj);
10
13
  static VALUE method_xor(VALUE self, VALUE obj);
11
- static VALUE method_subtract(VALUE self, VALUE obj);
14
+ static VALUE method_gt(VALUE self, VALUE obj);
15
+ static VALUE method_lt(VALUE self, VALUE obj);
12
16
  static VALUE method_to_a(VALUE self);
13
17
 
14
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
15
-
16
18
  static void func_multiply(void *v1, void *v2, void *r);
17
19
  static void func_divide(void *v1, void *v2, void *r);
18
20
  static void func_add(void *v1, void *v2, void *r);
@@ -20,3 +22,5 @@ static void func_subtract(void *v1, void *v2, void *r);
20
22
  static void func_and(void *v1, void *v2, void *r);
21
23
  static void func_or(void *v1, void *v2, void *r);
22
24
  static void func_xor(void *v1, void *v2, void *r);
25
+ static void func_gt(void *v1, void *v2, void *r);
26
+ static void func_lt(void *v1, void *v2, void *r);
@@ -15,6 +15,10 @@ void Init_SIMD_LongArray(VALUE parent)
15
15
  rb_define_method(SIMD_LongArray, "&", method_and, 1);
16
16
  rb_define_method(SIMD_LongArray, "|", method_or, 1);
17
17
  rb_define_method(SIMD_LongArray, "^", method_xor, 1);
18
+ rb_define_method(SIMD_LongArray, "gt", method_gt, 1);
19
+ rb_define_method(SIMD_LongArray, "lt", method_lt, 1);
20
+ rb_define_method(SIMD_LongArray, ">", method_gt, 1);
21
+ rb_define_method(SIMD_LongArray, "<", method_lt, 1);
18
22
  rb_define_method(SIMD_LongArray, "length", method_length, 0);
19
23
  rb_define_method(SIMD_LongArray, "to_a", method_to_a, 0);
20
24
  }
@@ -25,7 +29,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
25
29
  {
26
30
  vector_t *vector;
27
31
  l2v_t *data;
28
- unsigned long n,m,i;
32
+ unsigned long long int n,i;
29
33
 
30
34
  Check_Type(rb_array, T_ARRAY);
31
35
  Data_Get_Struct(self, vector_t, vector);
@@ -37,7 +41,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
37
41
  rb_raise(rb_eArgError, "Vectors must be at least 2 long");
38
42
  }
39
43
 
40
- vector->data = internal_allocate_vector_array(vector->len, sizeof(l2v_t));
44
+ vector->data = internal_allocate_vector_array(vector->len);
41
45
 
42
46
  data = (l2v_t *)vector->data;
43
47
  for(i = 0; i < vector->len; i++)
@@ -45,12 +49,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
45
49
  data[i/2].f[i%2] = NUM2LONG(rb_ary_entry(rb_array, i));
46
50
  }
47
51
 
48
- /* If the array is an odd number of elements, set the final element to 1 */
49
- m = n + (n % 2);
50
- for(i = n % 2; i > 0; i--)
51
- {
52
- data[m/2].f[i] = 1;
53
- }
52
+ internal_sanitize_unaligned_final_vector(vector, sizeof(long long int));
54
53
 
55
54
  return(self);
56
55
  }
@@ -59,55 +58,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
59
58
  * another FloatArray object, returning a new FloatArray. */
60
59
  static VALUE method_multiply(VALUE self, VALUE obj)
61
60
  {
62
- return(internal_apply_operation(self, obj, func_multiply));
61
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_multiply));
63
62
  }
64
63
 
65
64
  /* Public: Divide values contained in the data array by those contained in
66
65
  * another FloatArray object, returning a new FloatArray. */
67
66
  static VALUE method_divide(VALUE self, VALUE obj)
68
67
  {
69
- return(internal_apply_operation(self, obj, func_divide));
68
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_divide));
70
69
  }
71
70
 
72
71
  /* Public: add values contained in the data array with those contained in
73
72
  * another FloatArray object, returning a new FloatArray. */
74
73
  static VALUE method_add(VALUE self, VALUE obj)
75
74
  {
76
- return(internal_apply_operation(self, obj, func_add));
75
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_add));
77
76
  }
78
77
 
79
78
  /* Public: and values contained in the data array with those contained in
80
79
  * another FloatArray object, returning a new FloatArray. */
81
80
  static VALUE method_and(VALUE self, VALUE obj)
82
81
  {
83
- return(internal_apply_operation(self, obj, func_and));
82
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_and));
84
83
  }
85
84
 
86
85
  /* Public: or values contained in the data array with those contained in
87
86
  * another FloatArray object, returning a new FloatArray. */
88
87
  static VALUE method_or(VALUE self, VALUE obj)
89
88
  {
90
- return(internal_apply_operation(self, obj, func_or));
89
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_or));
91
90
  }
92
91
 
93
92
  /* Public: xor values contained in the data array with those contained in
94
93
  * another FloatArray object, returning a new FloatArray. */
95
94
  static VALUE method_xor(VALUE self, VALUE obj)
96
95
  {
97
- return(internal_apply_operation(self, obj, func_xor));
96
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_xor));
97
+ }
98
+
99
+ /* Public: Compare values contained in the data array with those contained in
100
+ * another Longrray object, return a new LongArray with each element being -1
101
+ * if the data array's value is greater, and 0 otherwise. */
102
+ static VALUE method_gt(VALUE self, VALUE obj)
103
+ {
104
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_gt));
105
+ }
106
+
107
+ /* Public: Compare values contained in the data array with those contained in
108
+ * another LongArray object, return a new LongArray with each element being -1 if
109
+ * the data array's value is less, and 0 otherwise. */
110
+ static VALUE method_lt(VALUE self, VALUE obj)
111
+ {
112
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_lt));
98
113
  }
99
114
 
100
115
  /* Public: Subtract values contained in another FloatArray object from those
101
116
  * contained in the current data array object, returning a new FloatArray. */
102
117
  static VALUE method_subtract(VALUE self, VALUE obj)
103
118
  {
104
- return(internal_apply_operation(self, obj, func_subtract));
119
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_subtract));
105
120
  }
106
121
 
107
122
  /* Public: Return a Ruby Array containing the doubles within the data array. */
108
123
  static VALUE method_to_a(VALUE self)
109
124
  {
110
- unsigned long i;
125
+ unsigned long long int i;
111
126
  vector_t *vector;
112
127
  l2v_t *data;
113
128
  VALUE rb_array = rb_ary_new();
@@ -122,61 +137,6 @@ static VALUE method_to_a(VALUE self)
122
137
  return(rb_array);
123
138
  }
124
139
 
125
- /* Internal: Given another FloatArray object, perform an action specified via a
126
- * function pointer against both. */
127
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
128
- {
129
- unsigned long size, i;
130
- int align;
131
- vector_t *v1, *v2, *rv;
132
- l2v_t *d1, *d2, *r;
133
- VALUE result_obj = allocate(SIMD_LongArray);
134
-
135
- Data_Get_Struct(self, vector_t, v1);
136
- Data_Get_Struct(obj, vector_t, v2);
137
- Data_Get_Struct(result_obj, vector_t, rv);
138
- rv->data = internal_allocate_vector_array(v1->len, sizeof(l2v_t));
139
-
140
- align = internal_align_vectors(v1->len, v2->len, 2);
141
-
142
- /* Ensure that size will be the result of ceil(len / 4.0) */
143
- size = (v1->len + 1) / 2;
144
-
145
- d1 = (l2v_t *)v1->data;
146
- d2 = (l2v_t *)v2->data;
147
- r = (l2v_t *)rv->data;
148
-
149
- rv->len = v1->len;
150
-
151
- switch(align)
152
- {
153
- case 0: /* Same size arrays */
154
- for(i = 0; i < size; i++)
155
- {
156
- func(&d1[i].v, &d2[i].v, &r[i].v);
157
- }
158
- break;
159
- case 1: /* Operand is exactly 4 long (size of 1 sse register) */
160
- for(i = 0; i < size; i++)
161
- {
162
- func(&d1[i].v, &d2[0].v, &r[i].v);
163
- }
164
- break;
165
- default: /* Self is a multiple of operand's length long */
166
- for(i = 0; i < size; i++)
167
- {
168
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
169
- }
170
- }
171
-
172
- if(rv->len != rv->len + (rv->len % 2))
173
- {
174
- r[size].f[1] = 1;
175
- }
176
-
177
- return(result_obj);
178
- }
179
-
180
140
  /* Function: Multiply two vectors. */
181
141
  static void func_multiply(void *v1, void *v2, void *r)
182
142
  {
@@ -218,3 +178,15 @@ static void func_xor(void *v1, void *v2, void *r)
218
178
  {
219
179
  *(l2v *)r = *(l2v *)v1 ^ *(l2v *)v2;
220
180
  }
181
+
182
+ /* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
183
+ static void func_gt(void *v1, void *v2, void *r)
184
+ {
185
+ *(l2v *)r = (*(l2v *)v1 > *(l2v *)v2);
186
+ }
187
+
188
+ /* Function: Compare vectors, return -1 if v1 is less than v2, 0 otherwise */
189
+ static void func_lt(void *v1, void *v2, void *r)
190
+ {
191
+ *(l2v *)r = (*(l2v *)v1 < *(l2v *)v2);
192
+ }
@@ -1,18 +1,20 @@
1
1
  #include "ruby.h"
2
2
  #include "simd_common.h"
3
3
 
4
+ extern VALUE SIMD_LongArray;
5
+
4
6
  static VALUE method_initialize(VALUE self, VALUE rb_array);
5
7
  static VALUE method_multiply(VALUE self, VALUE obj);
6
8
  static VALUE method_divide(VALUE self, VALUE obj);
7
9
  static VALUE method_add(VALUE self, VALUE obj);
10
+ static VALUE method_subtract(VALUE self, VALUE obj);
8
11
  static VALUE method_and(VALUE self, VALUE obj);
9
12
  static VALUE method_or(VALUE self, VALUE obj);
10
13
  static VALUE method_xor(VALUE self, VALUE obj);
11
- static VALUE method_subtract(VALUE self, VALUE obj);
14
+ static VALUE method_gt(VALUE self, VALUE obj);
15
+ static VALUE method_lt(VALUE self, VALUE obj);
12
16
  static VALUE method_to_a(VALUE self);
13
17
 
14
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
15
-
16
18
  static void func_multiply(void *v1, void *v2, void *r);
17
19
  static void func_divide(void *v1, void *v2, void *r);
18
20
  static void func_add(void *v1, void *v2, void *r);
@@ -20,3 +22,5 @@ static void func_subtract(void *v1, void *v2, void *r);
20
22
  static void func_and(void *v1, void *v2, void *r);
21
23
  static void func_or(void *v1, void *v2, void *r);
22
24
  static void func_xor(void *v1, void *v2, void *r);
25
+ static void func_gt(void *v1, void *v2, void *r);
26
+ static void func_lt(void *v1, void *v2, void *r);
@@ -1,4 +1,5 @@
1
1
  #include "simd_smallfloatarray.h"
2
+ #include "simd_intarray.h"
2
3
 
3
4
  VALUE SIMD_SmallFloatArray = Qnil;
4
5
 
@@ -12,6 +13,13 @@ void Init_SIMD_SmallFloatArray(VALUE parent)
12
13
  rb_define_method(SIMD_SmallFloatArray, "/", method_divide, 1);
13
14
  rb_define_method(SIMD_SmallFloatArray, "+", method_add, 1);
14
15
  rb_define_method(SIMD_SmallFloatArray, "-", method_subtract, 1);
16
+ rb_define_method(SIMD_SmallFloatArray, "&", method_and, 1);
17
+ rb_define_method(SIMD_SmallFloatArray, "|", method_or, 1);
18
+ rb_define_method(SIMD_SmallFloatArray, "^", method_xor, 1);
19
+ rb_define_method(SIMD_SmallFloatArray, "gt", method_gt, 1);
20
+ rb_define_method(SIMD_SmallFloatArray, "lt", method_lt, 1);
21
+ rb_define_method(SIMD_SmallFloatArray, ">", method_gt, 1);
22
+ rb_define_method(SIMD_SmallFloatArray, "<", method_lt, 1);
15
23
  rb_define_method(SIMD_SmallFloatArray, "length", method_length, 0);
16
24
  rb_define_method(SIMD_SmallFloatArray, "to_a", method_to_a, 0);
17
25
  }
@@ -22,7 +30,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
22
30
  {
23
31
  vector_t *vector;
24
32
  f4v_t *data;
25
- unsigned long n,m,i;
33
+ unsigned long long int n,i;
26
34
 
27
35
  Check_Type(rb_array, T_ARRAY);
28
36
  Data_Get_Struct(self, vector_t, vector);
@@ -34,7 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
34
42
  rb_raise(rb_eArgError, "Vectors must be at least 4 long");
35
43
  }
36
44
 
37
- vector->data = internal_allocate_vector_array(vector->len, sizeof(f4v_t));
45
+ vector->data = internal_allocate_vector_array(vector->len);
38
46
 
39
47
  data = (f4v_t *)vector->data;
40
48
  for(i = 0; i < vector->len; i++)
@@ -42,12 +50,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
42
50
  data[i/4].f[i%4] = (float)NUM2DBL(rb_ary_entry(rb_array, i));
43
51
  }
44
52
 
45
- /* If the array is an odd number of elements, set the final element to 1 */
46
- m = n + (n % 4);
47
- for(i = n % 4; i > 0; i--)
48
- {
49
- data[m/4].f[i] = 1.0;
50
- }
53
+ internal_sanitize_unaligned_final_vector(vector, sizeof(float));
51
54
 
52
55
  return(self);
53
56
  }
@@ -56,34 +59,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
56
59
  * another FloatArray object, returning a new FloatArray. */
57
60
  static VALUE method_multiply(VALUE self, VALUE obj)
58
61
  {
59
- return(internal_apply_operation(self, obj, func_multiply));
62
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_multiply));
60
63
  }
61
64
 
62
65
  /* Public: Divide values contained in the data array by those contained in
63
66
  * another FloatArray object, returning a new FloatArray. */
64
67
  static VALUE method_divide(VALUE self, VALUE obj)
65
68
  {
66
- return(internal_apply_operation(self, obj, func_divide));
69
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_divide));
67
70
  }
68
71
 
69
72
  /* Public: add values contained in the data array with those contained in
70
73
  * another FloatArray object, returning a new FloatArray. */
71
74
  static VALUE method_add(VALUE self, VALUE obj)
72
75
  {
73
- return(internal_apply_operation(self, obj, func_add));
76
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_add));
74
77
  }
75
78
 
76
79
  /* Public: Subtract values contained in another FloatArray object from those
77
80
  * contained in the current data array object, returning a new FloatArray. */
78
81
  static VALUE method_subtract(VALUE self, VALUE obj)
79
82
  {
80
- return(internal_apply_operation(self, obj, func_subtract));
83
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_subtract));
84
+ }
85
+
86
+ /* Public: and values contained in the data array with those contained in
87
+ * another FloatArray object, returning a new FloatArray. */
88
+ static VALUE method_and(VALUE self, VALUE obj)
89
+ {
90
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_and));
91
+ }
92
+
93
+ /* Public: or values contained in the data array with those contained in
94
+ * another FloatArray object, returning a new FloatArray. */
95
+ static VALUE method_or(VALUE self, VALUE obj)
96
+ {
97
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_or));
98
+ }
99
+
100
+ /* Public: xor values contained in the data array with those contained in
101
+ * another FloatArray object, returning a new FloatArray. */
102
+ static VALUE method_xor(VALUE self, VALUE obj)
103
+ {
104
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_xor));
105
+ }
106
+
107
+ /* Public: Compare values contained in the data array with those contained in
108
+ * another SmallFloatArray object, return a new IntArray with each element being
109
+ * -1 if the data array's value is greater, and 0 otherwise. */
110
+ static VALUE method_gt(VALUE self, VALUE obj)
111
+ {
112
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_IntArray, func_gt));
113
+ }
114
+
115
+ /* Public: Compare values contained in the data array with those contained in
116
+ * another SmallFloatArray object, return a new IntArray with each element being
117
+ * -1 if the data array's value is less, and 0 otherwise. */
118
+ static VALUE method_lt(VALUE self, VALUE obj)
119
+ {
120
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_IntArray, func_lt));
81
121
  }
82
122
 
83
123
  /* Public: Return a Ruby Array containing the doubles within the data array. */
84
124
  static VALUE method_to_a(VALUE self)
85
125
  {
86
- unsigned long i;
126
+ unsigned long long int i;
87
127
  vector_t *vector;
88
128
  f4v_t *data;
89
129
  VALUE rb_array = rb_ary_new();
@@ -98,64 +138,6 @@ static VALUE method_to_a(VALUE self)
98
138
  return(rb_array);
99
139
  }
100
140
 
101
- /* Internal: Given another FloatArray object, perform an action specified via a
102
- * function pointer against both. */
103
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
104
- {
105
- unsigned long size, i;
106
- int align;
107
- vector_t *v1, *v2, *rv;
108
- f4v_t *d1, *d2, *r;
109
- VALUE result_obj = allocate(SIMD_SmallFloatArray);
110
-
111
- Data_Get_Struct(self, vector_t, v1);
112
- Data_Get_Struct(obj, vector_t, v2);
113
- Data_Get_Struct(result_obj, vector_t, rv);
114
- rv->data = internal_allocate_vector_array(v1->len, sizeof(f4v_t));
115
-
116
- align = internal_align_vectors(v1->len, v2->len, 4);
117
-
118
- /* Ensure that size will be the result of ceil(len / 4.0) */
119
- size = (v1->len + 3) / 4;
120
-
121
- d1 = (f4v_t *)v1->data;
122
- d2 = (f4v_t *)v2->data;
123
- r = (f4v_t *)rv->data;
124
-
125
- rv->len = v1->len;
126
-
127
- switch(align)
128
- {
129
- case 0: /* Same size arrays */
130
- for(i = 0; i < size; i++)
131
- {
132
- func(&d1[i].v, &d2[i].v, &r[i].v);
133
- }
134
- break;
135
- case 1: /* Operand is exactly 4 long (size of 1 sse register) */
136
- for(i = 0; i < size; i++)
137
- {
138
- func(&d1[i].v, &d2[0].v, &r[i].v);
139
- }
140
- break;
141
- default: /* Self is a multiple of operand's length long */
142
- for(i = 0; i < size; i++)
143
- {
144
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
145
- }
146
- }
147
-
148
- if(rv->len != rv->len + (rv->len % 4))
149
- {
150
- for(i = 3; i > rv->len + (rv->len % 4); i--)
151
- {
152
- r[size].f[i] = 1;
153
- }
154
- }
155
-
156
- return(result_obj);
157
- }
158
-
159
141
  /* Function: Multiply two vectors. */
160
142
  static void func_multiply(void *v1, void *v2, void *r)
161
143
  {
@@ -179,3 +161,33 @@ static void func_subtract(void *v1, void *v2, void *r)
179
161
  {
180
162
  *(f4v *)r = *(f4v *)v1 - *(f4v *)v2;
181
163
  }
164
+
165
+ /* Function: Perform a binary AND on two vectors. */
166
+ static void func_and(void *v1, void *v2, void *r)
167
+ {
168
+ *(i4v *)r = *(i4v *)v1 & *(i4v *)v2;
169
+ }
170
+
171
+ /* Function: Perform a binary OR on two vectors. */
172
+ static void func_or(void *v1, void *v2, void *r)
173
+ {
174
+ *(i4v *)r = *(i4v *)v1 | *(i4v *)v2;
175
+ }
176
+
177
+ /* Function: Perform a binary XOR on two vectors. */
178
+ static void func_xor(void *v1, void *v2, void *r)
179
+ {
180
+ *(i4v *)r = *(i4v *)v1 ^ *(i4v *)v2;
181
+ }
182
+
183
+ /* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
184
+ static void func_gt(void *v1, void *v2, void *r)
185
+ {
186
+ *(i4v *)r = (*(f4v *)v1 > *(f4v *)v2);
187
+ }
188
+
189
+ /* Function: Compare vectors, return -1 if v1 is less than v2, 0 otherwise */
190
+ static void func_lt(void *v1, void *v2, void *r)
191
+ {
192
+ *(i4v *)r = (*(f4v *)v1 < *(f4v *)v2);
193
+ }
@@ -6,11 +6,19 @@ static VALUE method_multiply(VALUE self, VALUE obj);
6
6
  static VALUE method_divide(VALUE self, VALUE obj);
7
7
  static VALUE method_add(VALUE self, VALUE obj);
8
8
  static VALUE method_subtract(VALUE self, VALUE obj);
9
+ static VALUE method_and(VALUE self, VALUE obj);
10
+ static VALUE method_or(VALUE self, VALUE obj);
11
+ static VALUE method_xor(VALUE self, VALUE obj);
12
+ static VALUE method_gt(VALUE self, VALUE obj);
13
+ static VALUE method_lt(VALUE self, VALUE obj);
9
14
  static VALUE method_to_a(VALUE self);
10
15
 
11
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
12
-
13
16
  static void func_multiply(void *v1, void *v2, void *r);
14
17
  static void func_divide(void *v1, void *v2, void *r);
15
18
  static void func_add(void *v1, void *v2, void *r);
16
19
  static void func_subtract(void *v1, void *v2, void *r);
20
+ static void func_and(void *v1, void *v2, void *r);
21
+ static void func_or(void *v1, void *v2, void *r);
22
+ static void func_xor(void *v1, void *v2, void *r);
23
+ static void func_gt(void *v1, void *v2, void *r);
24
+ static void func_lt(void *v1, void *v2, void *r);
@@ -40,17 +40,17 @@ typedef union i4v_t
40
40
  /*
41
41
  * Types for LongArray
42
42
  */
43
- typedef long int __attribute__ ((vector_size (16))) l2v;
43
+ typedef long long int __attribute__ ((vector_size (16))) l2v;
44
44
  typedef union l2v_t
45
45
  {
46
46
  l2v v;
47
- long int f[2];
47
+ long long int f[2];
48
48
  } l2v_t;
49
49
 
50
50
  typedef struct vector_t
51
51
  {
52
52
  void *data;
53
- unsigned long len;
53
+ unsigned long long int len;
54
54
  } vector_t;
55
55
 
56
56
  typedef void (b_operation)(void *v1, void *v2, void *r);
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tina Wuest
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-12 00:00:00.000000000 Z
11
+ date: 2022-08-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '1.2'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '1.2'
27
27
  description: Access to SIMD (Single Instruction Multiple Data) instructions in Ruby
28
28
  email: tina@wuest.me
29
29
  executables: []
@@ -48,7 +48,7 @@ files:
48
48
  homepage: https://gitlab.com/wuest/simd-ruby
49
49
  licenses: []
50
50
  metadata: {}
51
- post_install_message:
51
+ post_install_message:
52
52
  rdoc_options: []
53
53
  require_paths:
54
54
  - lib
@@ -63,9 +63,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
63
63
  - !ruby/object:Gem::Version
64
64
  version: '0'
65
65
  requirements: []
66
- rubyforge_project:
67
- rubygems_version: 2.4.4
68
- signing_key:
66
+ rubygems_version: 3.3.7
67
+ signing_key:
69
68
  specification_version: 4
70
69
  summary: SIMD instructions in ruby
71
70
  test_files: []