simd 0.4.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c15d248c872e4369c45e4151faf81e9761ea1587
4
- data.tar.gz: cdfc5170454be15be1bc75ccc5eba08aa96c884d
2
+ SHA256:
3
+ metadata.gz: d43585672169c7a727e7502dea99cb56e8103c2e6a8effe4f32521ee4cae6e5d
4
+ data.tar.gz: c404674a578002bdf5933baee75e54680717c562b1e8c839a0661e04dfee27a6
5
5
  SHA512:
6
- metadata.gz: 38df7b30a113b325bba7be834fcbe24c44738706495579af5a9e4b508e4db4375e2f36b16a97612eab4dd8da379c450d062c6e0bceb6e8ddf308a952ecf6419c
7
- data.tar.gz: 342680d7f4f6690c8b3338de32ef192aaadb2952461e62e86a848650414b09d793d71e70c5b38909e0c882f062e346995f86f40d3dbf1de692cbe3eb0cfb0c73
6
+ metadata.gz: 86c6b9e43f219190f4bd2a34d0ad1c309eacb56068dbdb0d44f46894b1ceee19bc68c77d4f682d42b335039250730c9637a1d19c5075861c9731fd13a48d71d0
7
+ data.tar.gz: a71afed09b9ead560531c9ec2a9ffb0260843398255963b549abe44db0b45dc036fb604a945c9caff7f5d54dd51626f78ed4f2877803a9a5d2bc455e68c5c332
data/ext/simd/extconf.rb CHANGED
@@ -1,6 +1,12 @@
1
1
  # Makes Makefiles for Ruby extensions.
2
2
  require 'mkmf'
3
3
 
4
+ cpu = RbConfig::CONFIG['arch'].downcase
5
+ if cpu.include?('arm')
6
+ ver = cpu.gsub(/[^\d]*(\d+).*/, '\\1').to_i
7
+ $CFLAGS << ' -mfpu=neon' if ver >= 6
8
+ end
9
+
4
10
  extension_name = 'simd'
5
11
  dir_config(extension_name)
6
12
  create_makefile(extension_name)
@@ -1,5 +1,7 @@
1
1
  #include "simd_common.h"
2
2
 
3
+ #define XMM_BYTES 16 /* Width of the xmm1,2... registers */
4
+
3
5
  /* Internal: Allocate memory for the vector container. */
4
6
  VALUE allocate(VALUE klass)
5
7
  {
@@ -36,10 +38,9 @@ VALUE method_length(VALUE self)
36
38
  }
37
39
 
38
40
  /* Internal: Allocate memory for the data array. */
39
- void *internal_allocate_vector_array(unsigned long count, size_t size)
41
+ void *internal_allocate_vector_array(unsigned long long int count)
40
42
  {
41
- unsigned int modulo = 16 / size;
42
- void *vector = malloc(((count + (count % modulo)) / modulo + 1) * size);
43
+ void *vector = malloc((count + 1) * XMM_BYTES);
43
44
  if(vector == NULL)
44
45
  {
45
46
  rb_raise(rb_eNoMemError, "Unable to allocate memory");
@@ -50,7 +51,7 @@ void *internal_allocate_vector_array(unsigned long count, size_t size)
50
51
 
51
52
  /* Internal: Determine if two arrays can be acted upon, by being of equal
52
53
  * lengths or with the operand's length being a multiple of the data array's. */
53
- int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo)
54
+ int internal_align_vectors(unsigned long long int v1, unsigned long long int v2, unsigned int modulo)
54
55
  {
55
56
  if((v1 % modulo) != (v2 % modulo))
56
57
  {
@@ -74,3 +75,80 @@ int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modu
74
75
  /* Never reached */
75
76
  return(-1);
76
77
  }
78
+
79
+ /* Internal: Given another object, perform an action specified via a function
80
+ * pointer against both.
81
+ *
82
+ * Since arithmetic is purposefully performed against a void pointers, disable
83
+ * warnings regarding this for the current function. */
84
+ #pragma GCC diagnostic push
85
+ #pragma GCC diagnostic ignored "-Wpointer-arith"
86
+ VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func)
87
+ {
88
+ unsigned long long int length, i, j;
89
+ int align;
90
+ vector_t *v1, *v2, *rv;
91
+ void *data;
92
+ VALUE result_obj = allocate(klass);
93
+
94
+ Data_Get_Struct(self, vector_t, v1);
95
+ Data_Get_Struct(obj, vector_t, v2);
96
+ Data_Get_Struct(result_obj, vector_t, rv);
97
+ rv->data = internal_allocate_vector_array(v1->len);
98
+
99
+ align = internal_align_vectors(v1->len, v2->len, (XMM_BYTES / size));
100
+
101
+ length = ((v1->len + (XMM_BYTES / size - 1)) / (XMM_BYTES / size));
102
+ rv->len = v1->len;
103
+
104
+ switch(align)
105
+ {
106
+ case 0: /* Same size arrays */
107
+ for(i = 0; i < length; i++)
108
+ {
109
+ func((v1->data + XMM_BYTES * i), (v2->data + XMM_BYTES * i), (rv->data + XMM_BYTES * i));
110
+ }
111
+ break;
112
+ case 1: /* Operand is exactly 4 long (size of 1 sse register) */
113
+ for(i = 0; i < length; i++)
114
+ {
115
+ func((v1->data + XMM_BYTES * i), v2->data, (rv->data + XMM_BYTES * i));
116
+ }
117
+ break;
118
+ default: /* Self is a multiple of operand's length long */
119
+ for(j = 0; j < v2->len; j++)
120
+ {
121
+ data = v2->data + XMM_BYTES * j;
122
+ for(i = j; i < length; i+=v2->len)
123
+ {
124
+ func((v1->data + XMM_BYTES * i), data, (rv->data + XMM_BYTES * i));
125
+ }
126
+ }
127
+ }
128
+ internal_sanitize_unaligned_final_vector(rv, size);
129
+
130
+ return(result_obj);
131
+ }
132
+ #pragma GCC diagnostic pop
133
+
134
+ /* Internal: Make sure that no null bytes exist beyond the boundary of
135
+ * unaligned vectors. This function should be called after any operation that
136
+ * results in the mutation or creation of a vector array.
137
+ *
138
+ * Since arithmetic is purposefully performed against a void pointers, disable
139
+ * warnings regarding this for the current function. */
140
+ #pragma GCC diagnostic push
141
+ #pragma GCC diagnostic ignored "-Wpointer-arith"
142
+ void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size)
143
+ {
144
+ unsigned long long int i;
145
+
146
+ if((rv->len * size) % XMM_BYTES)
147
+ {
148
+ for(i = 1; i <= XMM_BYTES - ((rv->len * size) % XMM_BYTES); i+=size)
149
+ {
150
+ *(unsigned char *)(rv->data + (rv->len * size + i)) = 1;
151
+ }
152
+ }
153
+ }
154
+ #pragma GCC diagnostic pop
@@ -8,5 +8,7 @@ void deallocate(vector_t *vector);
8
8
 
9
9
  VALUE method_length(VALUE self);
10
10
 
11
- void *internal_allocate_vector_array(unsigned long count, size_t size);
12
- int internal_align_vectors(unsigned long v1, unsigned long v2, unsigned int modulo);
11
+ void *internal_allocate_vector_array(unsigned long long int count);
12
+ int internal_align_vectors(unsigned long long int v1, unsigned long long int v2, unsigned int modulo);
13
+ VALUE internal_apply_operation(VALUE self, VALUE obj, size_t size, VALUE klass, b_operation func);
14
+ void internal_sanitize_unaligned_final_vector(vector_t *rv, size_t size);
@@ -1,4 +1,5 @@
1
1
  #include "simd_floatarray.h"
2
+ #include "simd_longarray.h"
2
3
 
3
4
  VALUE SIMD_FloatArray = Qnil;
4
5
 
@@ -12,6 +13,13 @@ void Init_SIMD_FloatArray(VALUE parent)
12
13
  rb_define_method(SIMD_FloatArray, "/", method_divide, 1);
13
14
  rb_define_method(SIMD_FloatArray, "+", method_add, 1);
14
15
  rb_define_method(SIMD_FloatArray, "-", method_subtract, 1);
16
+ rb_define_method(SIMD_FloatArray, "&", method_and, 1);
17
+ rb_define_method(SIMD_FloatArray, "|", method_or, 1);
18
+ rb_define_method(SIMD_FloatArray, "^", method_xor, 1);
19
+ rb_define_method(SIMD_FloatArray, "gt", method_gt, 1);
20
+ rb_define_method(SIMD_FloatArray, "lt", method_lt, 1);
21
+ rb_define_method(SIMD_FloatArray, ">", method_gt, 1);
22
+ rb_define_method(SIMD_FloatArray, "<", method_lt, 1);
15
23
  rb_define_method(SIMD_FloatArray, "length", method_length, 0);
16
24
  rb_define_method(SIMD_FloatArray, "to_a", method_to_a, 0);
17
25
  }
@@ -22,7 +30,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
22
30
  {
23
31
  vector_t *vector;
24
32
  d2v_t *data;
25
- unsigned long n,m,i;
33
+ unsigned long long int n,i;
26
34
 
27
35
  Check_Type(rb_array, T_ARRAY);
28
36
  Data_Get_Struct(self, vector_t, vector);
@@ -34,7 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
34
42
  rb_raise(rb_eArgError, "Vectors must be at least 4 long");
35
43
  }
36
44
 
37
- vector->data = internal_allocate_vector_array(vector->len, sizeof(d2v_t));
45
+ vector->data = internal_allocate_vector_array(vector->len);
38
46
 
39
47
  data = (d2v_t *)vector->data;
40
48
  for(i = 0; i < vector->len; i++)
@@ -42,12 +50,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
42
50
  data[i/2].f[i%2] = NUM2DBL(rb_ary_entry(rb_array, i));
43
51
  }
44
52
 
45
- /* If the array is an odd number of elements, set the final element to 1 */
46
- m = n + (n % 2);
47
- for(i = n % 4; i > 0; i--)
48
- {
49
- data[m/2].f[1] = 1.0;
50
- }
53
+ internal_sanitize_unaligned_final_vector(vector, sizeof(double));
51
54
 
52
55
  return(self);
53
56
  }
@@ -56,34 +59,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
56
59
  * another FloatArray object, returning a new FloatArray. */
57
60
  static VALUE method_multiply(VALUE self, VALUE obj)
58
61
  {
59
- return(internal_apply_operation(self, obj, func_multiply));
62
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_multiply));
60
63
  }
61
64
 
62
65
  /* Public: Divide values contained in the data array by those contained in
63
66
  * another FloatArray object, returning a new FloatArray. */
64
67
  static VALUE method_divide(VALUE self, VALUE obj)
65
68
  {
66
- return(internal_apply_operation(self, obj, func_divide));
69
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_divide));
67
70
  }
68
71
 
69
72
  /* Public: add values contained in the data array with those contained in
70
73
  * another FloatArray object, returning a new FloatArray. */
71
74
  static VALUE method_add(VALUE self, VALUE obj)
72
75
  {
73
- return(internal_apply_operation(self, obj, func_add));
76
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_add));
74
77
  }
75
78
 
76
79
  /* Public: Subtract values contained in another FloatArray object from those
77
80
  * contained in the current data array object, returning a new FloatArray. */
78
81
  static VALUE method_subtract(VALUE self, VALUE obj)
79
82
  {
80
- return(internal_apply_operation(self, obj, func_subtract));
83
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_subtract));
84
+ }
85
+
86
+ /* Public: and values contained in the data array with those contained in
87
+ * another FloatArray object, returning a new FloatArray. */
88
+ static VALUE method_and(VALUE self, VALUE obj)
89
+ {
90
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_and));
91
+ }
92
+
93
+ /* Public: or values contained in the data array with those contained in
94
+ * another FloatArray object, returning a new FloatArray. */
95
+ static VALUE method_or(VALUE self, VALUE obj)
96
+ {
97
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_or));
98
+ }
99
+
100
+ /* Public: xor values contained in the data array with those contained in
101
+ * another FloatArray object, returning a new FloatArray. */
102
+ static VALUE method_xor(VALUE self, VALUE obj)
103
+ {
104
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_FloatArray, func_xor));
105
+ }
106
+
107
+ /* Public: Compare values contained in the data array with those contained in
108
+ * another FloatArray object, return a new LongArray with each element being
109
+ * -1 if the data array's value is greater, and 0 otherwise. */
110
+ static VALUE method_gt(VALUE self, VALUE obj)
111
+ {
112
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_LongArray, func_gt));
113
+ }
114
+
115
+ /* Public: Compare values contained in the data array with those contained in
116
+ * another FloatArray object, return a new LongArray with each element being
117
+ * -1 if the data array's value is less, and 0 otherwise. */
118
+ static VALUE method_lt(VALUE self, VALUE obj)
119
+ {
120
+ return(internal_apply_operation(self, obj, sizeof(double), SIMD_LongArray, func_lt));
81
121
  }
82
122
 
83
123
  /* Public: Return a Ruby Array containing the doubles within the data array. */
84
124
  static VALUE method_to_a(VALUE self)
85
125
  {
86
- unsigned long i;
126
+ unsigned long long int i;
87
127
  vector_t *vector;
88
128
  d2v_t *data;
89
129
  VALUE rb_array = rb_ary_new();
@@ -98,61 +138,6 @@ static VALUE method_to_a(VALUE self)
98
138
  return(rb_array);
99
139
  }
100
140
 
101
- /* Internal: Given another FloatArray object, perform an action specified via a
102
- * function pointer against both. */
103
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
104
- {
105
- unsigned long size, i;
106
- int align;
107
- vector_t *v1, *v2, *rv;
108
- d2v_t *d1, *d2, *r;
109
- VALUE result_obj = allocate(SIMD_FloatArray);
110
-
111
- Data_Get_Struct(self, vector_t, v1);
112
- Data_Get_Struct(obj, vector_t, v2);
113
- Data_Get_Struct(result_obj, vector_t, rv);
114
- rv->data = internal_allocate_vector_array(v1->len, sizeof(d2v_t));
115
-
116
- align = internal_align_vectors(v1->len, v2->len, 2);
117
-
118
- /* Ensure that size will be the result of ceil(len / 4.0) */
119
- size = (v1->len + 1) / 2;
120
-
121
- d1 = (d2v_t *)v1->data;
122
- d2 = (d2v_t *)v2->data;
123
- r = (d2v_t *)rv->data;
124
-
125
- rv->len = v1->len;
126
-
127
- switch(align)
128
- {
129
- case 0: /* Same size arrays */
130
- for(i = 0; i < size; i++)
131
- {
132
- func(&d1[i].v, &d2[i].v, &r[i].v);
133
- }
134
- break;
135
- case 1: /* Operand is exactly 2 long (size of 1 sse register) */
136
- for(i = 0; i < size; i++)
137
- {
138
- func(&d1[i].v, &d2[0].v, &r[i].v);
139
- }
140
- break;
141
- default: /* Self is a multiple of operand's length long */
142
- for(i = 0; i < size; i++)
143
- {
144
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
145
- }
146
- }
147
-
148
- if(rv->len != rv->len + (rv->len % 2))
149
- {
150
- r[size].f[1] = 1;
151
- }
152
-
153
- return(result_obj);
154
- }
155
-
156
141
  /* Function: Multiply two vectors. */
157
142
  static void func_multiply(void *v1, void *v2, void *r)
158
143
  {
@@ -176,3 +161,33 @@ static void func_subtract(void *v1, void *v2, void *r)
176
161
  {
177
162
  *(d2v *)r = *(d2v *)v1 - *(d2v *)v2;
178
163
  }
164
+
165
+ /* Function: Perform a binary AND on two vectors. */
166
+ static void func_and(void *v1, void *v2, void *r)
167
+ {
168
+ *(l2v *)r = *(l2v *)v1 & *(l2v *)v2;
169
+ }
170
+
171
+ /* Function: Perform a binary OR on two vectors. */
172
+ static void func_or(void *v1, void *v2, void *r)
173
+ {
174
+ *(l2v *)r = *(l2v *)v1 | *(l2v *)v2;
175
+ }
176
+
177
+ /* Function: Perform a binary XOR on two vectors. */
178
+ static void func_xor(void *v1, void *v2, void *r)
179
+ {
180
+ *(l2v *)r = *(l2v *)v1 ^ *(l2v *)v2;
181
+ }
182
+
183
+ /* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
184
+ static void func_gt(void *v1, void *v2, void *r)
185
+ {
186
+ *(l2v *)r = (*(d2v *)v1 > *(d2v *)v2);
187
+ }
188
+
189
+ /* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
190
+ static void func_lt(void *v1, void *v2, void *r)
191
+ {
192
+ *(l2v *)r = (*(d2v *)v1 < *(d2v *)v2);
193
+ }
@@ -6,11 +6,19 @@ static VALUE method_multiply(VALUE self, VALUE obj);
6
6
  static VALUE method_divide(VALUE self, VALUE obj);
7
7
  static VALUE method_add(VALUE self, VALUE obj);
8
8
  static VALUE method_subtract(VALUE self, VALUE obj);
9
+ static VALUE method_and(VALUE self, VALUE obj);
10
+ static VALUE method_or(VALUE self, VALUE obj);
11
+ static VALUE method_xor(VALUE self, VALUE obj);
12
+ static VALUE method_gt(VALUE self, VALUE obj);
13
+ static VALUE method_lt(VALUE self, VALUE obj);
9
14
  static VALUE method_to_a(VALUE self);
10
15
 
11
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
12
-
13
16
  static void func_multiply(void *v1, void *v2, void *r);
14
17
  static void func_divide(void *v1, void *v2, void *r);
15
18
  static void func_add(void *v1, void *v2, void *r);
16
19
  static void func_subtract(void *v1, void *v2, void *r);
20
+ static void func_and(void *v1, void *v2, void *r);
21
+ static void func_or(void *v1, void *v2, void *r);
22
+ static void func_xor(void *v1, void *v2, void *r);
23
+ static void func_gt(void *v1, void *v2, void *r);
24
+ static void func_lt(void *v1, void *v2, void *r);
@@ -15,6 +15,10 @@ void Init_SIMD_IntArray(VALUE parent)
15
15
  rb_define_method(SIMD_IntArray, "&", method_and, 1);
16
16
  rb_define_method(SIMD_IntArray, "|", method_or, 1);
17
17
  rb_define_method(SIMD_IntArray, "^", method_xor, 1);
18
+ rb_define_method(SIMD_IntArray, "gt", method_gt, 1);
19
+ rb_define_method(SIMD_IntArray, "lt", method_lt, 1);
20
+ rb_define_method(SIMD_IntArray, ">", method_gt, 1);
21
+ rb_define_method(SIMD_IntArray, "<", method_lt, 1);
18
22
  rb_define_method(SIMD_IntArray, "length", method_length, 0);
19
23
  rb_define_method(SIMD_IntArray, "to_a", method_to_a, 0);
20
24
  }
@@ -25,7 +29,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
25
29
  {
26
30
  vector_t *vector;
27
31
  i4v_t *data;
28
- unsigned long n,m,i;
32
+ unsigned long long int n,i;
29
33
 
30
34
  Check_Type(rb_array, T_ARRAY);
31
35
  Data_Get_Struct(self, vector_t, vector);
@@ -37,7 +41,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
37
41
  rb_raise(rb_eArgError, "Vectors must be at least 4 long");
38
42
  }
39
43
 
40
- vector->data = internal_allocate_vector_array(vector->len, sizeof(i4v_t));
44
+ vector->data = internal_allocate_vector_array(vector->len);
41
45
 
42
46
  data = (i4v_t *)vector->data;
43
47
  for(i = 0; i < vector->len; i++)
@@ -45,12 +49,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
45
49
  data[i/4].f[i%4] = NUM2INT(rb_ary_entry(rb_array, i));
46
50
  }
47
51
 
48
- /* If the array is an odd number of elements, set the final element to 1 */
49
- m = n + (n % 4);
50
- for(i = n % 4; i > 0; i--)
51
- {
52
- data[m/4].f[i] = 1.0;
53
- }
52
+ internal_sanitize_unaligned_final_vector(vector, sizeof(int));
54
53
 
55
54
  return(self);
56
55
  }
@@ -59,55 +58,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
59
58
  * another FloatArray object, returning a new FloatArray. */
60
59
  static VALUE method_multiply(VALUE self, VALUE obj)
61
60
  {
62
- return(internal_apply_operation(self, obj, func_multiply));
61
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_multiply));
63
62
  }
64
63
 
65
64
  /* Public: Divide values contained in the data array by those contained in
66
65
  * another FloatArray object, returning a new FloatArray. */
67
66
  static VALUE method_divide(VALUE self, VALUE obj)
68
67
  {
69
- return(internal_apply_operation(self, obj, func_divide));
68
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_divide));
70
69
  }
71
70
 
72
71
  /* Public: add values contained in the data array with those contained in
73
72
  * another FloatArray object, returning a new FloatArray. */
74
73
  static VALUE method_add(VALUE self, VALUE obj)
75
74
  {
76
- return(internal_apply_operation(self, obj, func_add));
75
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_add));
77
76
  }
78
77
 
79
78
  /* Public: and values contained in the data array with those contained in
80
79
  * another FloatArray object, returning a new FloatArray. */
81
80
  static VALUE method_and(VALUE self, VALUE obj)
82
81
  {
83
- return(internal_apply_operation(self, obj, func_and));
82
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_and));
84
83
  }
85
84
 
86
85
  /* Public: or values contained in the data array with those contained in
87
86
  * another FloatArray object, returning a new FloatArray. */
88
87
  static VALUE method_or(VALUE self, VALUE obj)
89
88
  {
90
- return(internal_apply_operation(self, obj, func_or));
89
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_or));
91
90
  }
92
91
 
93
92
  /* Public: xor values contained in the data array with those contained in
94
93
  * another FloatArray object, returning a new FloatArray. */
95
94
  static VALUE method_xor(VALUE self, VALUE obj)
96
95
  {
97
- return(internal_apply_operation(self, obj, func_xor));
96
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_xor));
97
+ }
98
+
99
+ /* Public: Compare values contained in the data array with those contained in
100
+ * another IntArray object, return a new IntArray with each element being -1
101
+ * if the data array's value is greater, and 0 otherwise. */
102
+ static VALUE method_gt(VALUE self, VALUE obj)
103
+ {
104
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_gt));
105
+ }
106
+
107
+ /* Public: Compare values contained in the data array with those contained in
108
+ * another IntArray object, return a new IntArray with each element being -1 if
109
+ * the data array's value is less, and 0 otherwise. */
110
+ static VALUE method_lt(VALUE self, VALUE obj)
111
+ {
112
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_lt));
98
113
  }
99
114
 
100
115
  /* Public: Subtract values contained in another FloatArray object from those
101
116
  * contained in the current data array object, returning a new FloatArray. */
102
117
  static VALUE method_subtract(VALUE self, VALUE obj)
103
118
  {
104
- return(internal_apply_operation(self, obj, func_subtract));
119
+ return(internal_apply_operation(self, obj, sizeof(int), SIMD_IntArray, func_subtract));
105
120
  }
106
121
 
107
122
  /* Public: Return a Ruby Array containing the doubles within the data array. */
108
123
  static VALUE method_to_a(VALUE self)
109
124
  {
110
- unsigned long i;
125
+ unsigned long long int i;
111
126
  vector_t *vector;
112
127
  i4v_t *data;
113
128
  VALUE rb_array = rb_ary_new();
@@ -122,64 +137,6 @@ static VALUE method_to_a(VALUE self)
122
137
  return(rb_array);
123
138
  }
124
139
 
125
- /* Internal: Given another FloatArray object, perform an action specified via a
126
- * function pointer against both. */
127
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
128
- {
129
- unsigned long size, i;
130
- int align;
131
- vector_t *v1, *v2, *rv;
132
- i4v_t *d1, *d2, *r;
133
- VALUE result_obj = allocate(SIMD_IntArray);
134
-
135
- Data_Get_Struct(self, vector_t, v1);
136
- Data_Get_Struct(obj, vector_t, v2);
137
- Data_Get_Struct(result_obj, vector_t, rv);
138
- rv->data = internal_allocate_vector_array(v1->len, sizeof(i4v_t));
139
-
140
- align = internal_align_vectors(v1->len, v2->len, 4);
141
-
142
- /* Ensure that size will be the result of ceil(len / 4.0) */
143
- size = (v1->len + 3) / 4;
144
-
145
- d1 = (i4v_t *)v1->data;
146
- d2 = (i4v_t *)v2->data;
147
- r = (i4v_t *)rv->data;
148
-
149
- rv->len = v1->len;
150
-
151
- switch(align)
152
- {
153
- case 0: /* Same size arrays */
154
- for(i = 0; i < size; i++)
155
- {
156
- func(&d1[i].v, &d2[i].v, &r[i].v);
157
- }
158
- break;
159
- case 1: /* Operand is exactly 4 long (size of 1 sse register) */
160
- for(i = 0; i < size; i++)
161
- {
162
- func(&d1[i].v, &d2[0].v, &r[i].v);
163
- }
164
- break;
165
- default: /* Self is a multiple of operand's length long */
166
- for(i = 0; i < size; i++)
167
- {
168
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
169
- }
170
- }
171
-
172
- if(rv->len != rv->len + (rv->len % 4))
173
- {
174
- for(i = 3; i > rv->len + (rv->len % 4); i--)
175
- {
176
- r[size].f[i] = 1;
177
- }
178
- }
179
-
180
- return(result_obj);
181
- }
182
-
183
140
  /* Function: Multiply two vectors. */
184
141
  static void func_multiply(void *v1, void *v2, void *r)
185
142
  {
@@ -221,3 +178,15 @@ static void func_xor(void *v1, void *v2, void *r)
221
178
  {
222
179
  *(i4v *)r = *(i4v *)v1 ^ *(i4v *)v2;
223
180
  }
181
+
182
+ /* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
183
+ static void func_gt(void *v1, void *v2, void *r)
184
+ {
185
+ *(i4v *)r = (*(i4v *)v1 > *(i4v *)v2);
186
+ }
187
+
188
+ /* Function: Compare vectors, return -1 if v1 is less than v2, 0 otherwise */
189
+ static void func_lt(void *v1, void *v2, void *r)
190
+ {
191
+ *(i4v *)r = (*(i4v *)v1 < *(i4v *)v2);
192
+ }
@@ -1,18 +1,20 @@
1
1
  #include "ruby.h"
2
2
  #include "simd_common.h"
3
3
 
4
+ extern VALUE SIMD_IntArray;
5
+
4
6
  static VALUE method_initialize(VALUE self, VALUE rb_array);
5
7
  static VALUE method_multiply(VALUE self, VALUE obj);
6
8
  static VALUE method_divide(VALUE self, VALUE obj);
7
9
  static VALUE method_add(VALUE self, VALUE obj);
10
+ static VALUE method_subtract(VALUE self, VALUE obj);
8
11
  static VALUE method_and(VALUE self, VALUE obj);
9
12
  static VALUE method_or(VALUE self, VALUE obj);
10
13
  static VALUE method_xor(VALUE self, VALUE obj);
11
- static VALUE method_subtract(VALUE self, VALUE obj);
14
+ static VALUE method_gt(VALUE self, VALUE obj);
15
+ static VALUE method_lt(VALUE self, VALUE obj);
12
16
  static VALUE method_to_a(VALUE self);
13
17
 
14
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
15
-
16
18
  static void func_multiply(void *v1, void *v2, void *r);
17
19
  static void func_divide(void *v1, void *v2, void *r);
18
20
  static void func_add(void *v1, void *v2, void *r);
@@ -20,3 +22,5 @@ static void func_subtract(void *v1, void *v2, void *r);
20
22
  static void func_and(void *v1, void *v2, void *r);
21
23
  static void func_or(void *v1, void *v2, void *r);
22
24
  static void func_xor(void *v1, void *v2, void *r);
25
+ static void func_gt(void *v1, void *v2, void *r);
26
+ static void func_lt(void *v1, void *v2, void *r);
@@ -15,6 +15,10 @@ void Init_SIMD_LongArray(VALUE parent)
15
15
  rb_define_method(SIMD_LongArray, "&", method_and, 1);
16
16
  rb_define_method(SIMD_LongArray, "|", method_or, 1);
17
17
  rb_define_method(SIMD_LongArray, "^", method_xor, 1);
18
+ rb_define_method(SIMD_LongArray, "gt", method_gt, 1);
19
+ rb_define_method(SIMD_LongArray, "lt", method_lt, 1);
20
+ rb_define_method(SIMD_LongArray, ">", method_gt, 1);
21
+ rb_define_method(SIMD_LongArray, "<", method_lt, 1);
18
22
  rb_define_method(SIMD_LongArray, "length", method_length, 0);
19
23
  rb_define_method(SIMD_LongArray, "to_a", method_to_a, 0);
20
24
  }
@@ -25,7 +29,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
25
29
  {
26
30
  vector_t *vector;
27
31
  l2v_t *data;
28
- unsigned long n,m,i;
32
+ unsigned long long int n,i;
29
33
 
30
34
  Check_Type(rb_array, T_ARRAY);
31
35
  Data_Get_Struct(self, vector_t, vector);
@@ -37,7 +41,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
37
41
  rb_raise(rb_eArgError, "Vectors must be at least 2 long");
38
42
  }
39
43
 
40
- vector->data = internal_allocate_vector_array(vector->len, sizeof(l2v_t));
44
+ vector->data = internal_allocate_vector_array(vector->len);
41
45
 
42
46
  data = (l2v_t *)vector->data;
43
47
  for(i = 0; i < vector->len; i++)
@@ -45,12 +49,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
45
49
  data[i/2].f[i%2] = NUM2LONG(rb_ary_entry(rb_array, i));
46
50
  }
47
51
 
48
- /* If the array is an odd number of elements, set the final element to 1 */
49
- m = n + (n % 2);
50
- for(i = n % 2; i > 0; i--)
51
- {
52
- data[m/2].f[i] = 1;
53
- }
52
+ internal_sanitize_unaligned_final_vector(vector, sizeof(long long int));
54
53
 
55
54
  return(self);
56
55
  }
@@ -59,55 +58,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
59
58
  * another FloatArray object, returning a new FloatArray. */
60
59
  static VALUE method_multiply(VALUE self, VALUE obj)
61
60
  {
62
- return(internal_apply_operation(self, obj, func_multiply));
61
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_multiply));
63
62
  }
64
63
 
65
64
  /* Public: Divide values contained in the data array by those contained in
66
65
  * another FloatArray object, returning a new FloatArray. */
67
66
  static VALUE method_divide(VALUE self, VALUE obj)
68
67
  {
69
- return(internal_apply_operation(self, obj, func_divide));
68
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_divide));
70
69
  }
71
70
 
72
71
  /* Public: add values contained in the data array with those contained in
73
72
  * another FloatArray object, returning a new FloatArray. */
74
73
  static VALUE method_add(VALUE self, VALUE obj)
75
74
  {
76
- return(internal_apply_operation(self, obj, func_add));
75
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_add));
77
76
  }
78
77
 
79
78
  /* Public: and values contained in the data array with those contained in
80
79
  * another FloatArray object, returning a new FloatArray. */
81
80
  static VALUE method_and(VALUE self, VALUE obj)
82
81
  {
83
- return(internal_apply_operation(self, obj, func_and));
82
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_and));
84
83
  }
85
84
 
86
85
  /* Public: or values contained in the data array with those contained in
87
86
  * another FloatArray object, returning a new FloatArray. */
88
87
  static VALUE method_or(VALUE self, VALUE obj)
89
88
  {
90
- return(internal_apply_operation(self, obj, func_or));
89
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_or));
91
90
  }
92
91
 
93
92
  /* Public: xor values contained in the data array with those contained in
94
93
  * another FloatArray object, returning a new FloatArray. */
95
94
  static VALUE method_xor(VALUE self, VALUE obj)
96
95
  {
97
- return(internal_apply_operation(self, obj, func_xor));
96
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_xor));
97
+ }
98
+
99
+ /* Public: Compare values contained in the data array with those contained in
100
+ * another Longrray object, return a new LongArray with each element being -1
101
+ * if the data array's value is greater, and 0 otherwise. */
102
+ static VALUE method_gt(VALUE self, VALUE obj)
103
+ {
104
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_gt));
105
+ }
106
+
107
+ /* Public: Compare values contained in the data array with those contained in
108
+ * another LongArray object, return a new LongArray with each element being -1 if
109
+ * the data array's value is less, and 0 otherwise. */
110
+ static VALUE method_lt(VALUE self, VALUE obj)
111
+ {
112
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_lt));
98
113
  }
99
114
 
100
115
  /* Public: Subtract values contained in another FloatArray object from those
101
116
  * contained in the current data array object, returning a new FloatArray. */
102
117
  static VALUE method_subtract(VALUE self, VALUE obj)
103
118
  {
104
- return(internal_apply_operation(self, obj, func_subtract));
119
+ return(internal_apply_operation(self, obj, sizeof(long long int), SIMD_LongArray, func_subtract));
105
120
  }
106
121
 
107
122
  /* Public: Return a Ruby Array containing the doubles within the data array. */
108
123
  static VALUE method_to_a(VALUE self)
109
124
  {
110
- unsigned long i;
125
+ unsigned long long int i;
111
126
  vector_t *vector;
112
127
  l2v_t *data;
113
128
  VALUE rb_array = rb_ary_new();
@@ -122,61 +137,6 @@ static VALUE method_to_a(VALUE self)
122
137
  return(rb_array);
123
138
  }
124
139
 
125
- /* Internal: Given another FloatArray object, perform an action specified via a
126
- * function pointer against both. */
127
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
128
- {
129
- unsigned long size, i;
130
- int align;
131
- vector_t *v1, *v2, *rv;
132
- l2v_t *d1, *d2, *r;
133
- VALUE result_obj = allocate(SIMD_LongArray);
134
-
135
- Data_Get_Struct(self, vector_t, v1);
136
- Data_Get_Struct(obj, vector_t, v2);
137
- Data_Get_Struct(result_obj, vector_t, rv);
138
- rv->data = internal_allocate_vector_array(v1->len, sizeof(l2v_t));
139
-
140
- align = internal_align_vectors(v1->len, v2->len, 2);
141
-
142
- /* Ensure that size will be the result of ceil(len / 4.0) */
143
- size = (v1->len + 1) / 2;
144
-
145
- d1 = (l2v_t *)v1->data;
146
- d2 = (l2v_t *)v2->data;
147
- r = (l2v_t *)rv->data;
148
-
149
- rv->len = v1->len;
150
-
151
- switch(align)
152
- {
153
- case 0: /* Same size arrays */
154
- for(i = 0; i < size; i++)
155
- {
156
- func(&d1[i].v, &d2[i].v, &r[i].v);
157
- }
158
- break;
159
- case 1: /* Operand is exactly 4 long (size of 1 sse register) */
160
- for(i = 0; i < size; i++)
161
- {
162
- func(&d1[i].v, &d2[0].v, &r[i].v);
163
- }
164
- break;
165
- default: /* Self is a multiple of operand's length long */
166
- for(i = 0; i < size; i++)
167
- {
168
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
169
- }
170
- }
171
-
172
- if(rv->len != rv->len + (rv->len % 2))
173
- {
174
- r[size].f[1] = 1;
175
- }
176
-
177
- return(result_obj);
178
- }
179
-
180
140
  /* Function: Multiply two vectors. */
181
141
  static void func_multiply(void *v1, void *v2, void *r)
182
142
  {
@@ -218,3 +178,15 @@ static void func_xor(void *v1, void *v2, void *r)
218
178
  {
219
179
  *(l2v *)r = *(l2v *)v1 ^ *(l2v *)v2;
220
180
  }
181
+
182
+ /* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
183
+ static void func_gt(void *v1, void *v2, void *r)
184
+ {
185
+ *(l2v *)r = (*(l2v *)v1 > *(l2v *)v2);
186
+ }
187
+
188
+ /* Function: Compare vectors, return -1 if v1 is less than v2, 0 otherwise */
189
+ static void func_lt(void *v1, void *v2, void *r)
190
+ {
191
+ *(l2v *)r = (*(l2v *)v1 < *(l2v *)v2);
192
+ }
@@ -1,18 +1,20 @@
1
1
  #include "ruby.h"
2
2
  #include "simd_common.h"
3
3
 
4
+ extern VALUE SIMD_LongArray;
5
+
4
6
  static VALUE method_initialize(VALUE self, VALUE rb_array);
5
7
  static VALUE method_multiply(VALUE self, VALUE obj);
6
8
  static VALUE method_divide(VALUE self, VALUE obj);
7
9
  static VALUE method_add(VALUE self, VALUE obj);
10
+ static VALUE method_subtract(VALUE self, VALUE obj);
8
11
  static VALUE method_and(VALUE self, VALUE obj);
9
12
  static VALUE method_or(VALUE self, VALUE obj);
10
13
  static VALUE method_xor(VALUE self, VALUE obj);
11
- static VALUE method_subtract(VALUE self, VALUE obj);
14
+ static VALUE method_gt(VALUE self, VALUE obj);
15
+ static VALUE method_lt(VALUE self, VALUE obj);
12
16
  static VALUE method_to_a(VALUE self);
13
17
 
14
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
15
-
16
18
  static void func_multiply(void *v1, void *v2, void *r);
17
19
  static void func_divide(void *v1, void *v2, void *r);
18
20
  static void func_add(void *v1, void *v2, void *r);
@@ -20,3 +22,5 @@ static void func_subtract(void *v1, void *v2, void *r);
20
22
  static void func_and(void *v1, void *v2, void *r);
21
23
  static void func_or(void *v1, void *v2, void *r);
22
24
  static void func_xor(void *v1, void *v2, void *r);
25
+ static void func_gt(void *v1, void *v2, void *r);
26
+ static void func_lt(void *v1, void *v2, void *r);
@@ -1,4 +1,5 @@
1
1
  #include "simd_smallfloatarray.h"
2
+ #include "simd_intarray.h"
2
3
 
3
4
  VALUE SIMD_SmallFloatArray = Qnil;
4
5
 
@@ -12,6 +13,13 @@ void Init_SIMD_SmallFloatArray(VALUE parent)
12
13
  rb_define_method(SIMD_SmallFloatArray, "/", method_divide, 1);
13
14
  rb_define_method(SIMD_SmallFloatArray, "+", method_add, 1);
14
15
  rb_define_method(SIMD_SmallFloatArray, "-", method_subtract, 1);
16
+ rb_define_method(SIMD_SmallFloatArray, "&", method_and, 1);
17
+ rb_define_method(SIMD_SmallFloatArray, "|", method_or, 1);
18
+ rb_define_method(SIMD_SmallFloatArray, "^", method_xor, 1);
19
+ rb_define_method(SIMD_SmallFloatArray, "gt", method_gt, 1);
20
+ rb_define_method(SIMD_SmallFloatArray, "lt", method_lt, 1);
21
+ rb_define_method(SIMD_SmallFloatArray, ">", method_gt, 1);
22
+ rb_define_method(SIMD_SmallFloatArray, "<", method_lt, 1);
15
23
  rb_define_method(SIMD_SmallFloatArray, "length", method_length, 0);
16
24
  rb_define_method(SIMD_SmallFloatArray, "to_a", method_to_a, 0);
17
25
  }
@@ -22,7 +30,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
22
30
  {
23
31
  vector_t *vector;
24
32
  f4v_t *data;
25
- unsigned long n,m,i;
33
+ unsigned long long int n,i;
26
34
 
27
35
  Check_Type(rb_array, T_ARRAY);
28
36
  Data_Get_Struct(self, vector_t, vector);
@@ -34,7 +42,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
34
42
  rb_raise(rb_eArgError, "Vectors must be at least 4 long");
35
43
  }
36
44
 
37
- vector->data = internal_allocate_vector_array(vector->len, sizeof(f4v_t));
45
+ vector->data = internal_allocate_vector_array(vector->len);
38
46
 
39
47
  data = (f4v_t *)vector->data;
40
48
  for(i = 0; i < vector->len; i++)
@@ -42,12 +50,7 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
42
50
  data[i/4].f[i%4] = (float)NUM2DBL(rb_ary_entry(rb_array, i));
43
51
  }
44
52
 
45
- /* If the array is an odd number of elements, set the final element to 1 */
46
- m = n + (n % 4);
47
- for(i = n % 4; i > 0; i--)
48
- {
49
- data[m/4].f[i] = 1.0;
50
- }
53
+ internal_sanitize_unaligned_final_vector(vector, sizeof(float));
51
54
 
52
55
  return(self);
53
56
  }
@@ -56,34 +59,71 @@ static VALUE method_initialize(VALUE self, VALUE rb_array)
56
59
  * another FloatArray object, returning a new FloatArray. */
57
60
  static VALUE method_multiply(VALUE self, VALUE obj)
58
61
  {
59
- return(internal_apply_operation(self, obj, func_multiply));
62
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_multiply));
60
63
  }
61
64
 
62
65
  /* Public: Divide values contained in the data array by those contained in
63
66
  * another FloatArray object, returning a new FloatArray. */
64
67
  static VALUE method_divide(VALUE self, VALUE obj)
65
68
  {
66
- return(internal_apply_operation(self, obj, func_divide));
69
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_divide));
67
70
  }
68
71
 
69
72
  /* Public: add values contained in the data array with those contained in
70
73
  * another FloatArray object, returning a new FloatArray. */
71
74
  static VALUE method_add(VALUE self, VALUE obj)
72
75
  {
73
- return(internal_apply_operation(self, obj, func_add));
76
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_add));
74
77
  }
75
78
 
76
79
  /* Public: Subtract values contained in another FloatArray object from those
77
80
  * contained in the current data array object, returning a new FloatArray. */
78
81
  static VALUE method_subtract(VALUE self, VALUE obj)
79
82
  {
80
- return(internal_apply_operation(self, obj, func_subtract));
83
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_subtract));
84
+ }
85
+
86
+ /* Public: and values contained in the data array with those contained in
87
+ * another FloatArray object, returning a new FloatArray. */
88
+ static VALUE method_and(VALUE self, VALUE obj)
89
+ {
90
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_and));
91
+ }
92
+
93
+ /* Public: or values contained in the data array with those contained in
94
+ * another FloatArray object, returning a new FloatArray. */
95
+ static VALUE method_or(VALUE self, VALUE obj)
96
+ {
97
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_or));
98
+ }
99
+
100
+ /* Public: xor values contained in the data array with those contained in
101
+ * another FloatArray object, returning a new FloatArray. */
102
+ static VALUE method_xor(VALUE self, VALUE obj)
103
+ {
104
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_SmallFloatArray, func_xor));
105
+ }
106
+
107
+ /* Public: Compare values contained in the data array with those contained in
108
+ * another SmallFloatArray object, return a new IntArray with each element being
109
+ * -1 if the data array's value is greater, and 0 otherwise. */
110
+ static VALUE method_gt(VALUE self, VALUE obj)
111
+ {
112
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_IntArray, func_gt));
113
+ }
114
+
115
+ /* Public: Compare values contained in the data array with those contained in
116
+ * another SmallFloatArray object, return a new IntArray with each element being
117
+ * -1 if the data array's value is less, and 0 otherwise. */
118
+ static VALUE method_lt(VALUE self, VALUE obj)
119
+ {
120
+ return(internal_apply_operation(self, obj, sizeof(float), SIMD_IntArray, func_lt));
81
121
  }
82
122
 
83
123
  /* Public: Return a Ruby Array containing the doubles within the data array. */
84
124
  static VALUE method_to_a(VALUE self)
85
125
  {
86
- unsigned long i;
126
+ unsigned long long int i;
87
127
  vector_t *vector;
88
128
  f4v_t *data;
89
129
  VALUE rb_array = rb_ary_new();
@@ -98,64 +138,6 @@ static VALUE method_to_a(VALUE self)
98
138
  return(rb_array);
99
139
  }
100
140
 
101
- /* Internal: Given another FloatArray object, perform an action specified via a
102
- * function pointer against both. */
103
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func)
104
- {
105
- unsigned long size, i;
106
- int align;
107
- vector_t *v1, *v2, *rv;
108
- f4v_t *d1, *d2, *r;
109
- VALUE result_obj = allocate(SIMD_SmallFloatArray);
110
-
111
- Data_Get_Struct(self, vector_t, v1);
112
- Data_Get_Struct(obj, vector_t, v2);
113
- Data_Get_Struct(result_obj, vector_t, rv);
114
- rv->data = internal_allocate_vector_array(v1->len, sizeof(f4v_t));
115
-
116
- align = internal_align_vectors(v1->len, v2->len, 4);
117
-
118
- /* Ensure that size will be the result of ceil(len / 4.0) */
119
- size = (v1->len + 3) / 4;
120
-
121
- d1 = (f4v_t *)v1->data;
122
- d2 = (f4v_t *)v2->data;
123
- r = (f4v_t *)rv->data;
124
-
125
- rv->len = v1->len;
126
-
127
- switch(align)
128
- {
129
- case 0: /* Same size arrays */
130
- for(i = 0; i < size; i++)
131
- {
132
- func(&d1[i].v, &d2[i].v, &r[i].v);
133
- }
134
- break;
135
- case 1: /* Operand is exactly 4 long (size of 1 sse register) */
136
- for(i = 0; i < size; i++)
137
- {
138
- func(&d1[i].v, &d2[0].v, &r[i].v);
139
- }
140
- break;
141
- default: /* Self is a multiple of operand's length long */
142
- for(i = 0; i < size; i++)
143
- {
144
- func(&d1[i].v, &d2[i % v2->len].v, &r[i].v);
145
- }
146
- }
147
-
148
- if(rv->len != rv->len + (rv->len % 4))
149
- {
150
- for(i = 3; i > rv->len + (rv->len % 4); i--)
151
- {
152
- r[size].f[i] = 1;
153
- }
154
- }
155
-
156
- return(result_obj);
157
- }
158
-
159
141
  /* Function: Multiply two vectors. */
160
142
  static void func_multiply(void *v1, void *v2, void *r)
161
143
  {
@@ -179,3 +161,33 @@ static void func_subtract(void *v1, void *v2, void *r)
179
161
  {
180
162
  *(f4v *)r = *(f4v *)v1 - *(f4v *)v2;
181
163
  }
164
+
165
+ /* Function: Perform a binary AND on two vectors. */
166
+ static void func_and(void *v1, void *v2, void *r)
167
+ {
168
+ *(i4v *)r = *(i4v *)v1 & *(i4v *)v2;
169
+ }
170
+
171
+ /* Function: Perform a binary OR on two vectors. */
172
+ static void func_or(void *v1, void *v2, void *r)
173
+ {
174
+ *(i4v *)r = *(i4v *)v1 | *(i4v *)v2;
175
+ }
176
+
177
+ /* Function: Perform a binary XOR on two vectors. */
178
+ static void func_xor(void *v1, void *v2, void *r)
179
+ {
180
+ *(i4v *)r = *(i4v *)v1 ^ *(i4v *)v2;
181
+ }
182
+
183
+ /* Function: Compare vectors, return -1 if v1 is greater than v2, 0 otherwise */
184
+ static void func_gt(void *v1, void *v2, void *r)
185
+ {
186
+ *(i4v *)r = (*(f4v *)v1 > *(f4v *)v2);
187
+ }
188
+
189
+ /* Function: Compare vectors, return -1 if v1 is less than v2, 0 otherwise */
190
+ static void func_lt(void *v1, void *v2, void *r)
191
+ {
192
+ *(i4v *)r = (*(f4v *)v1 < *(f4v *)v2);
193
+ }
@@ -6,11 +6,19 @@ static VALUE method_multiply(VALUE self, VALUE obj);
6
6
  static VALUE method_divide(VALUE self, VALUE obj);
7
7
  static VALUE method_add(VALUE self, VALUE obj);
8
8
  static VALUE method_subtract(VALUE self, VALUE obj);
9
+ static VALUE method_and(VALUE self, VALUE obj);
10
+ static VALUE method_or(VALUE self, VALUE obj);
11
+ static VALUE method_xor(VALUE self, VALUE obj);
12
+ static VALUE method_gt(VALUE self, VALUE obj);
13
+ static VALUE method_lt(VALUE self, VALUE obj);
9
14
  static VALUE method_to_a(VALUE self);
10
15
 
11
- static VALUE internal_apply_operation(VALUE self, VALUE obj, b_operation func);
12
-
13
16
  static void func_multiply(void *v1, void *v2, void *r);
14
17
  static void func_divide(void *v1, void *v2, void *r);
15
18
  static void func_add(void *v1, void *v2, void *r);
16
19
  static void func_subtract(void *v1, void *v2, void *r);
20
+ static void func_and(void *v1, void *v2, void *r);
21
+ static void func_or(void *v1, void *v2, void *r);
22
+ static void func_xor(void *v1, void *v2, void *r);
23
+ static void func_gt(void *v1, void *v2, void *r);
24
+ static void func_lt(void *v1, void *v2, void *r);
@@ -40,17 +40,17 @@ typedef union i4v_t
40
40
  /*
41
41
  * Types for LongArray
42
42
  */
43
- typedef long int __attribute__ ((vector_size (16))) l2v;
43
+ typedef long long int __attribute__ ((vector_size (16))) l2v;
44
44
  typedef union l2v_t
45
45
  {
46
46
  l2v v;
47
- long int f[2];
47
+ long long int f[2];
48
48
  } l2v_t;
49
49
 
50
50
  typedef struct vector_t
51
51
  {
52
52
  void *data;
53
- unsigned long len;
53
+ unsigned long long int len;
54
54
  } vector_t;
55
55
 
56
56
  typedef void (b_operation)(void *v1, void *v2, void *r);
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tina Wuest
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-12 00:00:00.000000000 Z
11
+ date: 2022-08-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '1.2'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '1.2'
27
27
  description: Access to SIMD (Single Instruction Multiple Data) instructions in Ruby
28
28
  email: tina@wuest.me
29
29
  executables: []
@@ -48,7 +48,7 @@ files:
48
48
  homepage: https://gitlab.com/wuest/simd-ruby
49
49
  licenses: []
50
50
  metadata: {}
51
- post_install_message:
51
+ post_install_message:
52
52
  rdoc_options: []
53
53
  require_paths:
54
54
  - lib
@@ -63,9 +63,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
63
63
  - !ruby/object:Gem::Version
64
64
  version: '0'
65
65
  requirements: []
66
- rubyforge_project:
67
- rubygems_version: 2.4.4
68
- signing_key:
66
+ rubygems_version: 3.3.7
67
+ signing_key:
69
68
  specification_version: 4
70
69
  summary: SIMD instructions in ruby
71
70
  test_files: []