barracuda 1.0 → 1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +25 -5
- data/benchmarks/normalize.rb +44 -0
- data/benchmarks/sort.rb +30 -0
- data/benchmarks/to_float.rb +2 -2
- data/ext/barracuda.c +339 -114
- data/test/test_barracuda.rb +114 -14
- metadata +4 -2
data/README.md
CHANGED
@@ -15,9 +15,6 @@ Barracuda aims to abstract both CUDA and OpenCL, however for now only OpenCL
|
|
15
15
|
on OSX 10.6 is supported. Patches to extend this support would be joyously
|
16
16
|
accepted!
|
17
17
|
|
18
|
-
Also note that Barracuda currently only supports data types, namely ints and
|
19
|
-
floats only. This should also be expanded.
|
20
|
-
|
21
18
|
INSTALLING
|
22
19
|
----------
|
23
20
|
|
@@ -87,7 +84,7 @@ to run). Barracuda automatically selects the size of the largest buffer as
|
|
87
84
|
the work group size, but in some cases this may be too small or too large. To
|
88
85
|
manually specify the work group size, call the kernel with an options hash:
|
89
86
|
|
90
|
-
program.my_kernel_method(..., :
|
87
|
+
program.my_kernel_method(..., :times => 512)
|
91
88
|
|
92
89
|
Note that the work group size must be a power of 2. Barracuda will increase
|
93
90
|
the work group size to the next power of 2 if it needs to. This means your
|
@@ -95,6 +92,29 @@ OpenCL program might run more iterations of your kernel method than you
|
|
95
92
|
request. Because we can't rely on the work group size, we pass in the total
|
96
93
|
data size to ensure we do not exceed the bounds of our data.
|
97
94
|
|
95
|
+
CONVERTING TYPES
|
96
|
+
----------------
|
97
|
+
|
98
|
+
OpenCL has a variety of native types. Most of them are supported, however some
|
99
|
+
are not. Because Ruby only has the concept of Float and Fixnum (integer), you
|
100
|
+
may need to tell Barracuda the type of your input if you're trying to pass in
|
101
|
+
a char, short or double (or possibly have some signedness restrictions). To
|
102
|
+
do this, simply call `.to_type(:my_type)` on the input where `:my_type` is
|
103
|
+
a key in the `Barracuda::TYPES` hash:
|
104
|
+
|
105
|
+
>> Barracuda::TYPES.keys
|
106
|
+
=> [:bool, :char, :uchar, :short, :ushort, :int, :uint, :long,
|
107
|
+
:ulong, :float, :half, :double, :size_t, :ptrdiff_t,
|
108
|
+
:intptr_t, :uintptr_t]
|
109
|
+
|
110
|
+
For example, to pass in a short, do:
|
111
|
+
|
112
|
+
program.my_kernel(2.to_type(:short))
|
113
|
+
|
114
|
+
This can also be applied to an Array of shorts:
|
115
|
+
|
116
|
+
program.my_kernel([1, 2, 3].to_type(:short))
|
117
|
+
|
98
118
|
CLASS DETAILS
|
99
119
|
-------------
|
100
120
|
|
@@ -110,7 +130,7 @@ Represents an OpenCL program
|
|
110
130
|
- args should be the arguments defined in the kernel method.
|
111
131
|
- supported argument types are Float and Fixnum objects only.
|
112
132
|
- if the last arg is a Hash, it should be an options hash with keys:
|
113
|
-
- :
|
133
|
+
- :times => FIXNUM (the number of iterations to run)
|
114
134
|
|
115
135
|
**Barracuda::Buffer**:
|
116
136
|
|
@@ -0,0 +1,44 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__) + '/../ext')
|
2
|
+
|
3
|
+
require 'barracuda'
|
4
|
+
require 'benchmark'
|
5
|
+
|
6
|
+
include Barracuda
|
7
|
+
|
8
|
+
def dist(*vec)
|
9
|
+
vec[0] * vec[0] + vec[1] * vec[1] + vec[2] * vec[2]
|
10
|
+
end
|
11
|
+
|
12
|
+
def normalize(*vec)
|
13
|
+
d = dist(*vec)
|
14
|
+
vec.map {|c| c / d }
|
15
|
+
end
|
16
|
+
|
17
|
+
def norm_all(arr)
|
18
|
+
out = []
|
19
|
+
0.step(arr.size - 1, 4) do |i|
|
20
|
+
vec = normalize(arr[i], arr[i + 1], arr[i + 2])
|
21
|
+
out.push(*vec, 0.0)
|
22
|
+
end
|
23
|
+
out
|
24
|
+
end
|
25
|
+
|
26
|
+
srand
|
27
|
+
prog = Program.new <<-'eof'
|
28
|
+
__kernel norm(__global float4 *out, __global float4 *in, int total) {
|
29
|
+
int i = get_global_id(0);
|
30
|
+
if (i < total) out[i] = normalize(in[i]);
|
31
|
+
}
|
32
|
+
eof
|
33
|
+
|
34
|
+
num_vecs = 100000
|
35
|
+
arr = []
|
36
|
+
num_vecs.times { arr.push(rand, rand, rand, 0.0) }
|
37
|
+
output = OutputBuffer.new(:float, arr.size)
|
38
|
+
|
39
|
+
|
40
|
+
Benchmark.bmbm do |x|
|
41
|
+
x.report("cpu") { norm_all(arr) }
|
42
|
+
x.report("gpu") { prog.norm(output, arr, num_vecs) }
|
43
|
+
end
|
44
|
+
|
data/benchmarks/sort.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__) + '/../ext')
|
2
|
+
|
3
|
+
require 'barracuda'
|
4
|
+
require 'benchmark'
|
5
|
+
|
6
|
+
include Barracuda
|
7
|
+
|
8
|
+
prog = Program.new <<-'eof'
|
9
|
+
__kernel sort(__global int *out, __global int *in, int total) {
|
10
|
+
int i, final_index = 0, extra = 0;
|
11
|
+
int id = get_global_id(0);
|
12
|
+
if (id >= total) return;
|
13
|
+
int my_value = in[id];
|
14
|
+
for (i = 0; i < total; i++) {
|
15
|
+
if (in[i] < my_value) final_index++;
|
16
|
+
if (in[i] == my_value && i < id) extra++;
|
17
|
+
}
|
18
|
+
out[final_index+extra] = my_value;
|
19
|
+
}
|
20
|
+
eof
|
21
|
+
|
22
|
+
max = 1000
|
23
|
+
arr = (1..max).map { (rand * max).to_i }
|
24
|
+
output = OutputBuffer.new(:int, arr.size)
|
25
|
+
|
26
|
+
Benchmark.bm do |x|
|
27
|
+
x.report("cpu") { arr.sort }
|
28
|
+
x.report("gpu") { prog.sort(output, arr, arr.size) }
|
29
|
+
end
|
30
|
+
|
data/benchmarks/to_float.rb
CHANGED
@@ -18,7 +18,7 @@ output = OutputBuffer.new(:float, arr.size)
|
|
18
18
|
|
19
19
|
TIMES = 1
|
20
20
|
Benchmark.bmbm do |x|
|
21
|
-
x.report("
|
22
|
-
x.report("
|
21
|
+
x.report("regular") { TIMES.times { arr.map {|x| (x.to_f + 0.5) / 3.8 + 2.0 } } }
|
22
|
+
x.report("opencl") { TIMES.times { prog.sum(output, input, arr.size); output.clear } }
|
23
23
|
end
|
24
24
|
|
data/ext/barracuda.c
CHANGED
@@ -7,8 +7,31 @@ static VALUE rb_cOutputBuffer;
|
|
7
7
|
static VALUE rb_cProgram;
|
8
8
|
static VALUE rb_eProgramSyntaxError;
|
9
9
|
static VALUE rb_eOpenCLError;
|
10
|
-
|
11
|
-
static
|
10
|
+
static VALUE rb_cType;
|
11
|
+
static VALUE rb_hTypes;
|
12
|
+
|
13
|
+
static ID id_times;
|
14
|
+
static ID id_to_sym;
|
15
|
+
static ID id_data_type;
|
16
|
+
static ID id_object;
|
17
|
+
|
18
|
+
static ID id_type_bool;
|
19
|
+
static ID id_type_char;
|
20
|
+
static ID id_type_uchar;
|
21
|
+
static ID id_type_short;
|
22
|
+
static ID id_type_ushort;
|
23
|
+
static ID id_type_int;
|
24
|
+
static ID id_type_uint;
|
25
|
+
static ID id_type_long;
|
26
|
+
static ID id_type_ulong;
|
27
|
+
static ID id_type_float;
|
28
|
+
static ID id_type_half;
|
29
|
+
static ID id_type_double;
|
30
|
+
static ID id_type_size_t;
|
31
|
+
static ID id_type_ptrdiff_t;
|
32
|
+
static ID id_type_intptr_t;
|
33
|
+
static ID id_type_uintptr_t;
|
34
|
+
/*static ID id_type_void;*/
|
12
35
|
|
13
36
|
static VALUE program_compile(VALUE self, VALUE source);
|
14
37
|
static VALUE buffer_data_set(VALUE self, VALUE new_value);
|
@@ -17,9 +40,7 @@ static cl_device_id device_id = NULL;
|
|
17
40
|
static cl_context context = NULL;
|
18
41
|
static int err;
|
19
42
|
|
20
|
-
#define
|
21
|
-
#define BUFFER_TYPE_INT 0x0002
|
22
|
-
#define BUFFER_TYPE_CHAR 0x0003
|
43
|
+
#define VERSION_STRING "1.1"
|
23
44
|
|
24
45
|
struct program {
|
25
46
|
cl_program program;
|
@@ -31,13 +52,78 @@ struct kernel {
|
|
31
52
|
|
32
53
|
struct buffer {
|
33
54
|
VALUE arr;
|
34
|
-
|
55
|
+
ID type;
|
35
56
|
size_t num_items;
|
36
57
|
size_t member_size;
|
37
58
|
void *cachebuf;
|
38
59
|
cl_mem data;
|
39
60
|
};
|
40
61
|
|
62
|
+
static VALUE
|
63
|
+
data_type_set(VALUE self, VALUE value)
|
64
|
+
{
|
65
|
+
if (TYPE(value) != T_SYMBOL) {
|
66
|
+
value = rb_str_intern(rb_String(value));
|
67
|
+
}
|
68
|
+
if (rb_hash_aref(rb_hTypes, value) == Qnil) {
|
69
|
+
rb_raise(rb_eArgError, "invalid data type %s",
|
70
|
+
RSTRING_PTR(rb_inspect(value)));
|
71
|
+
}
|
72
|
+
|
73
|
+
rb_ivar_set(self, id_data_type, value);
|
74
|
+
return self;
|
75
|
+
}
|
76
|
+
|
77
|
+
static VALUE
|
78
|
+
data_type_get(VALUE self, ID type)
|
79
|
+
{
|
80
|
+
VALUE value = rb_ivar_get(self, id_data_type);
|
81
|
+
if (NIL_P(value)) {
|
82
|
+
value = ID2SYM(type);
|
83
|
+
data_type_set(self, value);
|
84
|
+
}
|
85
|
+
return value;
|
86
|
+
}
|
87
|
+
|
88
|
+
static VALUE
|
89
|
+
object_data_type_get(VALUE self)
|
90
|
+
{
|
91
|
+
return rb_ivar_get(self, id_data_type);
|
92
|
+
}
|
93
|
+
|
94
|
+
static VALUE
|
95
|
+
fixnum_data_type_get(VALUE self)
|
96
|
+
{
|
97
|
+
return ID2SYM(id_type_int);
|
98
|
+
}
|
99
|
+
|
100
|
+
static VALUE
|
101
|
+
bignum_data_type_get(VALUE self)
|
102
|
+
{
|
103
|
+
return data_type_get(self, id_type_long);
|
104
|
+
}
|
105
|
+
|
106
|
+
static VALUE
|
107
|
+
float_data_type_get(VALUE self)
|
108
|
+
{
|
109
|
+
return data_type_get(self, id_type_float);
|
110
|
+
}
|
111
|
+
|
112
|
+
static VALUE
|
113
|
+
array_data_type_get(VALUE self)
|
114
|
+
{
|
115
|
+
VALUE value = rb_ivar_get(self, id_data_type);
|
116
|
+
if (RTEST(value)) return value;
|
117
|
+
|
118
|
+
if (RARRAY_LEN(self) > 0) {
|
119
|
+
VALUE value = rb_funcall(RARRAY_PTR(self)[0], id_data_type, 0);
|
120
|
+
if (RTEST(value)) return value;
|
121
|
+
}
|
122
|
+
|
123
|
+
rb_raise(rb_eRuntimeError, "unknown buffer data in array %s",
|
124
|
+
RSTRING_PTR(rb_inspect(self)));
|
125
|
+
}
|
126
|
+
|
41
127
|
#define GET_PROGRAM() \
|
42
128
|
struct program *program; \
|
43
129
|
Data_Get_Struct(self, struct program, program);
|
@@ -46,28 +132,143 @@ struct buffer {
|
|
46
132
|
struct buffer *buffer; \
|
47
133
|
Data_Get_Struct(self, struct buffer, buffer);
|
48
134
|
|
135
|
+
#define TYPE_SET(type, size) \
|
136
|
+
id_type_##type = rb_intern(#type); \
|
137
|
+
rb_hash_aset(rb_hTypes, ID2SYM(id_type_##type), INT2FIX(sizeof(size)));
|
138
|
+
|
139
|
+
#define TYPE_TO_NATIVE(type_name, cast_type, CONVERT_FUNC) \
|
140
|
+
if (id_type_##type_name == data_type) { \
|
141
|
+
*((cast_type*)native_value) = (cast_type)CONVERT_FUNC(value); \
|
142
|
+
return; \
|
143
|
+
}
|
144
|
+
|
145
|
+
#define TYPE_TO_RUBY(type_name, cast_type, CONVERT_FUNC) \
|
146
|
+
if (id_type_##type_name == data_type) { \
|
147
|
+
return CONVERT_FUNC(*((cast_type*)native_value)); \
|
148
|
+
}
|
149
|
+
|
49
150
|
static void
|
50
|
-
|
151
|
+
types_hash_init()
|
51
152
|
{
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
153
|
+
TYPE_SET(bool, char);
|
154
|
+
TYPE_SET(char, cl_char);
|
155
|
+
TYPE_SET(uchar, cl_uchar);
|
156
|
+
TYPE_SET(short, cl_short);
|
157
|
+
TYPE_SET(ushort, cl_ushort);
|
158
|
+
TYPE_SET(int, cl_int);
|
159
|
+
TYPE_SET(uint, cl_uint);
|
160
|
+
TYPE_SET(long, cl_long);
|
161
|
+
TYPE_SET(ulong, cl_ulong);
|
162
|
+
TYPE_SET(float, cl_float);
|
163
|
+
TYPE_SET(half, cl_half);
|
164
|
+
TYPE_SET(double, cl_double);
|
165
|
+
TYPE_SET(size_t, size_t);
|
166
|
+
TYPE_SET(ptrdiff_t, ptrdiff_t);
|
167
|
+
TYPE_SET(intptr_t, intptr_t);
|
168
|
+
TYPE_SET(uintptr_t, uintptr_t);
|
169
|
+
OBJ_FREEZE(rb_hTypes);
|
170
|
+
}
|
58
171
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
172
|
+
static void
|
173
|
+
type_to_native(VALUE value, ID data_type, void *native_value)
|
174
|
+
{
|
175
|
+
if (id_type_char == data_type || id_type_uchar == data_type) {
|
176
|
+
if (TYPE(value) == T_FIXNUM) {
|
177
|
+
value = rb_funcall(value, rb_intern("chr"), 0);
|
63
178
|
}
|
179
|
+
*((cl_char *)native_value) = RSTRING_PTR(value)[0];
|
180
|
+
return;
|
181
|
+
}
|
182
|
+
if (id_type_float == data_type || id_type_double == data_type) {
|
183
|
+
*((cl_float *)native_value) = TYPE(value) == T_FIXNUM ?
|
184
|
+
(cl_float)FIX2INT(value) : RFLOAT_VALUE(value);
|
185
|
+
return;
|
186
|
+
}
|
187
|
+
if (id_type_half == data_type) {
|
188
|
+
*((cl_half *)native_value) = TYPE(value) == T_FIXNUM ?
|
189
|
+
(cl_half)FIX2INT(value) : RFLOAT_VALUE(value);
|
190
|
+
return;
|
64
191
|
}
|
192
|
+
|
193
|
+
TYPE_TO_NATIVE(bool, char, FIX2INT);
|
194
|
+
TYPE_TO_NATIVE(short, cl_short, FIX2INT);
|
195
|
+
TYPE_TO_NATIVE(ushort, cl_ushort, NUM2UINT);
|
196
|
+
TYPE_TO_NATIVE(int, cl_int, FIX2INT);
|
197
|
+
TYPE_TO_NATIVE(uint, cl_uint, NUM2UINT);
|
198
|
+
TYPE_TO_NATIVE(long, cl_long, NUM2LONG);
|
199
|
+
TYPE_TO_NATIVE(ulong, cl_ulong, NUM2ULONG);
|
200
|
+
TYPE_TO_NATIVE(double, cl_double, NUM2DBL);
|
201
|
+
TYPE_TO_NATIVE(size_t, size_t, NUM2UINT);
|
202
|
+
TYPE_TO_NATIVE(ptrdiff_t, ptrdiff_t, NUM2UINT);
|
203
|
+
TYPE_TO_NATIVE(intptr_t, intptr_t, NUM2UINT);
|
204
|
+
TYPE_TO_NATIVE(uintptr_t, uintptr_t, NUM2UINT);
|
205
|
+
}
|
206
|
+
|
207
|
+
static VALUE
|
208
|
+
type_to_ruby(void *native_value, ID data_type)
|
209
|
+
{
|
210
|
+
TYPE_TO_RUBY(bool, char, INT2FIX);
|
211
|
+
TYPE_TO_RUBY(char, cl_char, INT2FIX);
|
212
|
+
TYPE_TO_RUBY(uchar, cl_uchar, UINT2NUM);
|
213
|
+
TYPE_TO_RUBY(short, cl_short, INT2FIX);
|
214
|
+
TYPE_TO_RUBY(ushort, cl_ushort, UINT2NUM);
|
215
|
+
TYPE_TO_RUBY(int, cl_int, INT2FIX);
|
216
|
+
TYPE_TO_RUBY(uint, cl_uint, UINT2NUM);
|
217
|
+
TYPE_TO_RUBY(long, cl_long, LONG2NUM);
|
218
|
+
TYPE_TO_RUBY(ulong, cl_ulong, ULONG2NUM);
|
219
|
+
TYPE_TO_RUBY(float, cl_float, rb_float_new);
|
220
|
+
TYPE_TO_RUBY(half, cl_half, rb_float_new);
|
221
|
+
TYPE_TO_RUBY(double, cl_double, DBL2NUM);
|
222
|
+
TYPE_TO_RUBY(size_t, size_t, UINT2NUM);
|
223
|
+
TYPE_TO_RUBY(ptrdiff_t, ptrdiff_t, UINT2NUM);
|
224
|
+
TYPE_TO_RUBY(intptr_t, intptr_t, UINT2NUM);
|
225
|
+
TYPE_TO_RUBY(uintptr_t, uintptr_t, UINT2NUM);
|
226
|
+
return Qnil;
|
227
|
+
}
|
228
|
+
|
229
|
+
static VALUE
|
230
|
+
type_initialize(VALUE self, VALUE object)
|
231
|
+
{
|
232
|
+
rb_ivar_set(self, id_object, object);
|
233
|
+
return self;
|
234
|
+
}
|
235
|
+
|
236
|
+
static VALUE
|
237
|
+
type_method_missing(VALUE self, VALUE type)
|
238
|
+
{
|
239
|
+
data_type_set(self, type);
|
240
|
+
return self;
|
241
|
+
}
|
242
|
+
|
243
|
+
static VALUE
|
244
|
+
type_object(VALUE self)
|
245
|
+
{
|
246
|
+
return rb_ivar_get(self, id_object);
|
247
|
+
}
|
248
|
+
|
249
|
+
static VALUE
|
250
|
+
object_to_type(VALUE self, VALUE type)
|
251
|
+
{
|
252
|
+
rb_ivar_set(self, id_data_type, type);
|
253
|
+
return self;
|
254
|
+
}
|
255
|
+
|
256
|
+
static VALUE
|
257
|
+
fixnum_to_type(VALUE self, VALUE type)
|
258
|
+
{
|
259
|
+
VALUE out = rb_funcall(rb_cType, rb_intern("new"), 1, self);
|
260
|
+
return type_method_missing(out, type);
|
261
|
+
}
|
262
|
+
|
263
|
+
static VALUE
|
264
|
+
type_new(VALUE klass, VALUE type)
|
265
|
+
{
|
266
|
+
return rb_funcall(rb_cType, rb_intern("new"), 1, type);
|
65
267
|
}
|
66
268
|
|
67
269
|
static void
|
68
270
|
free_buffer(struct buffer *buffer)
|
69
271
|
{
|
70
|
-
fflush(stdout);
|
71
272
|
clReleaseMemObject(buffer->data);
|
72
273
|
rb_gc_mark(buffer->arr);
|
73
274
|
ruby_xfree(buffer->cachebuf);
|
@@ -88,26 +289,15 @@ static void
|
|
88
289
|
buffer_update_cache_info(struct buffer *buffer)
|
89
290
|
{
|
90
291
|
buffer->num_items = RARRAY_LEN(buffer->arr);
|
91
|
-
|
92
|
-
|
93
|
-
case T_FIXNUM:
|
94
|
-
buffer->type = BUFFER_TYPE_INT;
|
95
|
-
buffer->member_size = sizeof(int);
|
96
|
-
break;
|
97
|
-
case T_FLOAT:
|
98
|
-
buffer->type = BUFFER_TYPE_FLOAT;
|
99
|
-
buffer->member_size = sizeof(float);
|
100
|
-
break;
|
101
|
-
default:
|
102
|
-
rb_raise(rb_eRuntimeError, "invalid buffer data %s",
|
103
|
-
RSTRING_PTR(rb_inspect(buffer->arr)));
|
104
|
-
}
|
292
|
+
buffer->type = SYM2ID(rb_funcall(buffer->arr, id_data_type, 0));
|
293
|
+
buffer->member_size = FIX2INT(rb_hash_aref(rb_hTypes, ID2SYM(buffer->type)));
|
105
294
|
}
|
106
295
|
|
107
296
|
static VALUE
|
108
297
|
buffer_write(VALUE self)
|
109
298
|
{
|
110
|
-
unsigned int i;
|
299
|
+
unsigned int i, index;
|
300
|
+
unsigned long data_ptr[16]; // data buffer
|
111
301
|
|
112
302
|
GET_BUFFER();
|
113
303
|
|
@@ -118,22 +308,11 @@ buffer_write(VALUE self)
|
|
118
308
|
}
|
119
309
|
buffer->cachebuf = malloc(buffer->num_items * buffer->member_size);
|
120
310
|
|
121
|
-
for (i = 0; i < RARRAY_LEN(buffer->arr); i
|
311
|
+
for (i = 0, index = 0; i < RARRAY_LEN(buffer->arr); i++, index += buffer->member_size) {
|
122
312
|
VALUE item = RARRAY_PTR(buffer->arr)[i];
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
((int *)buffer->cachebuf)[i] = value;
|
127
|
-
break;
|
128
|
-
}
|
129
|
-
case BUFFER_TYPE_FLOAT: {
|
130
|
-
float value = RFLOAT_VALUE(item);
|
131
|
-
((float *)buffer->cachebuf)[i] = value;
|
132
|
-
break;
|
133
|
-
}
|
134
|
-
default:
|
135
|
-
((uint32_t *)buffer->cachebuf)[i] = 0;
|
136
|
-
}
|
313
|
+
|
314
|
+
type_to_native(item, buffer->type, (void *)data_ptr);
|
315
|
+
memcpy(((int8_t*)buffer->cachebuf) + index, (void *)data_ptr, buffer->member_size);
|
137
316
|
}
|
138
317
|
|
139
318
|
return self;
|
@@ -142,24 +321,16 @@ buffer_write(VALUE self)
|
|
142
321
|
static VALUE
|
143
322
|
buffer_read(VALUE self)
|
144
323
|
{
|
145
|
-
unsigned int i;
|
324
|
+
unsigned int i, index;
|
146
325
|
|
147
326
|
GET_BUFFER();
|
148
327
|
|
149
328
|
rb_gc_mark(buffer->arr);
|
150
329
|
buffer->arr = rb_ary_new2(buffer->num_items);
|
151
330
|
|
152
|
-
for (i = 0; i < buffer->num_items; i
|
153
|
-
|
154
|
-
|
155
|
-
rb_ary_push(buffer->arr, INT2FIX(((int *)buffer->cachebuf)[i]));
|
156
|
-
break;
|
157
|
-
case BUFFER_TYPE_FLOAT:
|
158
|
-
rb_ary_push(buffer->arr, rb_float_new(((float *)buffer->cachebuf)[i]));
|
159
|
-
break;
|
160
|
-
default:
|
161
|
-
rb_ary_push(buffer->arr, Qnil);
|
162
|
-
}
|
331
|
+
for (i = 0, index = 0; i < buffer->num_items; i++, index += buffer->member_size) {
|
332
|
+
VALUE value = type_to_ruby(((int8_t*)buffer->cachebuf) + index, buffer->type);
|
333
|
+
rb_ary_push(buffer->arr, value);
|
163
334
|
}
|
164
335
|
|
165
336
|
return self;
|
@@ -224,25 +395,21 @@ buffer_initialize(int argc, VALUE *argv, VALUE self)
|
|
224
395
|
static VALUE
|
225
396
|
obuffer_initialize(VALUE self, VALUE type, VALUE size)
|
226
397
|
{
|
398
|
+
VALUE type_sym, member_size;
|
227
399
|
GET_BUFFER();
|
228
400
|
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
else if (strcmp(RSTRING_PTR(type), "int") == 0) {
|
235
|
-
buffer->type = BUFFER_TYPE_INT;
|
236
|
-
buffer->member_size = sizeof(int);
|
237
|
-
}
|
238
|
-
else {
|
239
|
-
rb_raise(rb_eArgError, "type can only be :float or :int");
|
401
|
+
type_sym = rb_funcall(type, id_to_sym, 0);
|
402
|
+
member_size = rb_hash_aref(rb_hTypes, type_sym);
|
403
|
+
if (NIL_P(member_size)) {
|
404
|
+
rb_raise(rb_eArgError, "type can only be one of %s",
|
405
|
+
RSTRING_PTR(rb_inspect(rb_funcall(rb_hTypes, rb_intern("keys"), 0))));
|
240
406
|
}
|
241
|
-
|
242
407
|
if (TYPE(size) != T_FIXNUM) {
|
243
408
|
rb_raise(rb_eArgError, "expecting buffer size as argument 2");
|
244
409
|
}
|
245
410
|
|
411
|
+
buffer->type = SYM2ID(type_sym);
|
412
|
+
buffer->member_size = FIX2INT(member_size);
|
246
413
|
buffer->num_items = FIX2UINT(size);
|
247
414
|
buffer->cachebuf = malloc(buffer->num_items * buffer->member_size);
|
248
415
|
buffer->data = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
@@ -355,62 +522,79 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
|
|
355
522
|
|
356
523
|
commands = clCreateCommandQueue(context, device_id, 0, &err);
|
357
524
|
if (!commands) {
|
525
|
+
clReleaseKernel(kernel);
|
358
526
|
rb_raise(rb_eOpenCLError, "could not execute kernel method '%s'", RSTRING_PTR(argv[0]));
|
359
527
|
}
|
360
528
|
|
361
529
|
for (i = 1; i < argc; i++) {
|
362
|
-
|
363
|
-
|
364
|
-
|
530
|
+
VALUE item = argv[i];
|
531
|
+
err = !CL_SUCCESS;
|
532
|
+
|
533
|
+
if (i == argc - 1 && TYPE(item) == T_HASH) {
|
534
|
+
VALUE worker_size = rb_hash_aref(item, ID2SYM(id_times));
|
365
535
|
if (RTEST(worker_size) && TYPE(worker_size) == T_FIXNUM) {
|
366
536
|
global = FIX2UINT(worker_size);
|
367
537
|
}
|
368
538
|
else {
|
369
539
|
CLEAN();
|
370
|
-
rb_raise(rb_eArgError, "opts hash must be {:
|
371
|
-
RSTRING_PTR(rb_inspect(
|
540
|
+
rb_raise(rb_eArgError, "opts hash must be {:times => INT_VALUE}, got %s",
|
541
|
+
RSTRING_PTR(rb_inspect(item)));
|
372
542
|
}
|
373
543
|
break;
|
374
544
|
}
|
375
545
|
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
546
|
+
if (TYPE(item) == T_ARRAY) {
|
547
|
+
/* create buffer from arg */
|
548
|
+
VALUE buf = buffer_s_allocate(rb_cBuffer);
|
549
|
+
item = buffer_initialize(1, &item, buf);
|
550
|
+
}
|
551
|
+
|
552
|
+
if (CLASS_OF(item) == rb_cOutputBuffer) {
|
553
|
+
struct buffer *buffer;
|
554
|
+
Data_Get_Struct(item, struct buffer, buffer);
|
555
|
+
err = clSetKernelArg(kernel, i - 1, sizeof(cl_mem), &buffer->data);
|
556
|
+
if (buffer->num_items > global) {
|
557
|
+
global = buffer->num_items;
|
381
558
|
}
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
559
|
+
}
|
560
|
+
else if (CLASS_OF(item) == rb_cBuffer) {
|
561
|
+
struct buffer *buffer;
|
562
|
+
Data_Get_Struct(item, struct buffer, buffer);
|
563
|
+
|
564
|
+
buffer_write(item);
|
565
|
+
clEnqueueWriteBuffer(commands, buffer->data, CL_TRUE, 0,
|
566
|
+
buffer->num_items * buffer->member_size, buffer->cachebuf, 0, NULL, NULL);
|
567
|
+
err = clSetKernelArg(kernel, i - 1, sizeof(cl_mem), &buffer->data);
|
568
|
+
if (buffer->num_items > global) {
|
569
|
+
global = buffer->num_items;
|
386
570
|
}
|
387
|
-
|
388
|
-
|
389
|
-
|
571
|
+
}
|
572
|
+
else {
|
573
|
+
unsigned long data_ptr[16]; // a buffer of data
|
574
|
+
size_t data_size_t;
|
575
|
+
VALUE data_type, data_size;
|
576
|
+
|
577
|
+
if (CLASS_OF(item) == rb_cType) {
|
578
|
+
data_type = rb_funcall(type_object(item), id_data_type, 0);
|
579
|
+
}
|
580
|
+
else {
|
581
|
+
data_type = rb_funcall(item, id_data_type, 0);
|
390
582
|
}
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
struct buffer *buffer;
|
402
|
-
Data_Get_Struct(argv[i], struct buffer, buffer);
|
403
|
-
|
404
|
-
buffer_write(argv[i]);
|
405
|
-
clEnqueueWriteBuffer(commands, buffer->data, CL_TRUE, 0,
|
406
|
-
buffer->num_items * buffer->member_size, buffer->cachebuf, 0, NULL, NULL);
|
407
|
-
err = clSetKernelArg(kernel, i - 1, sizeof(cl_mem), &buffer->data);
|
408
|
-
}
|
409
|
-
break;
|
583
|
+
data_size = rb_hash_aref(rb_hTypes, data_type);
|
584
|
+
if (NIL_P(data_size)) {
|
585
|
+
CLEAN();
|
586
|
+
rb_raise(rb_eRuntimeError, "invalid data type for %s",
|
587
|
+
RSTRING_PTR(rb_inspect(item)));
|
588
|
+
}
|
589
|
+
|
590
|
+
data_size_t = FIX2UINT(data_size);
|
591
|
+
type_to_native(item, SYM2ID(data_type), (void *)data_ptr);
|
592
|
+
err = clSetKernelArg(kernel, i - 1, data_size_t, data_ptr);
|
410
593
|
}
|
594
|
+
|
411
595
|
if (err != CL_SUCCESS) {
|
412
596
|
CLEAN();
|
413
|
-
rb_raise(rb_eArgError, "invalid kernel method parameter: %s", RSTRING_PTR(rb_inspect(
|
597
|
+
rb_raise(rb_eArgError, "invalid kernel method parameter: %s", RSTRING_PTR(rb_inspect(item)));
|
414
598
|
}
|
415
599
|
}
|
416
600
|
|
@@ -430,13 +614,14 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
|
|
430
614
|
clFinish(commands);
|
431
615
|
|
432
616
|
for (i = 1; i < argc; i++) {
|
433
|
-
|
617
|
+
VALUE item = argv[i];
|
618
|
+
if (CLASS_OF(item) == rb_cOutputBuffer) {
|
434
619
|
struct buffer *buffer;
|
435
|
-
Data_Get_Struct(
|
620
|
+
Data_Get_Struct(item, struct buffer, buffer);
|
436
621
|
err = clEnqueueReadBuffer(commands, buffer->data, CL_TRUE, 0,
|
437
622
|
buffer->num_items * buffer->member_size, buffer->cachebuf, 0, NULL, NULL);
|
438
623
|
ERROR("failed to read output buffer");
|
439
|
-
buffer_read(
|
624
|
+
buffer_read(item);
|
440
625
|
}
|
441
626
|
}
|
442
627
|
|
@@ -444,12 +629,39 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
|
|
444
629
|
return Qnil;
|
445
630
|
}
|
446
631
|
|
632
|
+
static void
|
633
|
+
init_opencl()
|
634
|
+
{
|
635
|
+
if (device_id == NULL) {
|
636
|
+
err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
|
637
|
+
if (err != CL_SUCCESS) {
|
638
|
+
rb_raise(rb_eOpenCLError, "failed to create a device group");
|
639
|
+
}
|
640
|
+
}
|
641
|
+
|
642
|
+
if (context == NULL) {
|
643
|
+
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
|
644
|
+
if (!context) {
|
645
|
+
rb_raise(rb_eOpenCLError, "failed to create a program context");
|
646
|
+
}
|
647
|
+
}
|
648
|
+
}
|
649
|
+
|
447
650
|
void
|
448
651
|
Init_barracuda()
|
449
652
|
{
|
450
|
-
|
653
|
+
id_times = rb_intern("times");
|
654
|
+
id_to_sym = rb_intern("to_sym");
|
655
|
+
id_data_type = rb_intern("data_type");
|
656
|
+
id_object = rb_intern("object");
|
657
|
+
|
658
|
+
rb_hTypes = rb_hash_new();
|
659
|
+
rb_define_method(rb_mKernel, "Type", type_new, 1);
|
660
|
+
types_hash_init();
|
451
661
|
|
452
662
|
rb_mBarracuda = rb_define_module("Barracuda");
|
663
|
+
rb_define_const(rb_mBarracuda, "VERSION", rb_str_new2(VERSION_STRING));
|
664
|
+
rb_define_const(rb_mBarracuda, "TYPES", rb_hTypes);
|
453
665
|
|
454
666
|
rb_eProgramSyntaxError = rb_define_class_under(rb_mBarracuda, "SyntaxError", rb_eSyntaxError);
|
455
667
|
rb_eOpenCLError = rb_define_class_under(rb_mBarracuda, "OpenCLError", rb_eStandardError);
|
@@ -476,6 +688,19 @@ Init_barracuda()
|
|
476
688
|
rb_undef_method(rb_cOutputBuffer, "write");
|
477
689
|
rb_undef_method(rb_cOutputBuffer, "size_changed");
|
478
690
|
rb_undef_method(rb_cOutputBuffer, "data=");
|
479
|
-
|
691
|
+
|
692
|
+
rb_cType = rb_define_class_under(rb_mBarracuda, "Type", rb_cObject);
|
693
|
+
rb_define_method(rb_cType, "initialize", type_initialize, 1);
|
694
|
+
rb_define_method(rb_cType, "method_missing", type_method_missing, 1);
|
695
|
+
rb_define_method(rb_cType, "object", type_object, 0);
|
696
|
+
|
697
|
+
rb_define_method(rb_cObject, "to_type", object_to_type, 1);
|
698
|
+
rb_define_method(rb_cFixnum, "to_type", fixnum_to_type, 1);
|
699
|
+
rb_define_method(rb_cObject, "data_type", object_data_type_get, 0);
|
700
|
+
rb_define_method(rb_cArray, "data_type", array_data_type_get, 0);
|
701
|
+
rb_define_method(rb_cFixnum, "data_type", fixnum_data_type_get, 0);
|
702
|
+
rb_define_method(rb_cBignum, "data_type", bignum_data_type_get, 0);
|
703
|
+
rb_define_method(rb_cFloat, "data_type", float_data_type_get, 0);
|
704
|
+
|
480
705
|
init_opencl();
|
481
706
|
}
|
data/test/test_barracuda.rb
CHANGED
@@ -5,6 +5,55 @@ require "barracuda"
|
|
5
5
|
|
6
6
|
include Barracuda
|
7
7
|
|
8
|
+
class TestDataTypes < Test::Unit::TestCase
|
9
|
+
def test_default_fixnum_type
|
10
|
+
assert_equal :int, 2.data_type
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_default_bignum_type
|
14
|
+
assert_equal :long, (2**64).data_type
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_default_float_type
|
18
|
+
assert_equal :float, 2.5.data_type
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_default_array_type
|
22
|
+
assert_equal :int, [2].data_type
|
23
|
+
assert_equal :float, [2.5, 2.6].data_type
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_set_data_type_fixnum
|
27
|
+
assert_equal :char, 2.to_type(:char).data_type
|
28
|
+
assert_equal :int, 2.data_type
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_set_data_type
|
32
|
+
[2**64, 2.5, [2]].each do |v|
|
33
|
+
assert_equal :char, v.to_type(:char).data_type
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_set_invalid_data_type
|
38
|
+
assert_raise(ArgumentError) { 1.to_type(:unknown) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_invalid_array_data_type
|
42
|
+
assert_raise(RuntimeError) { [Object.new].data_type }
|
43
|
+
assert_raise(RuntimeError) { ['x'].data_type }
|
44
|
+
assert_raise(RuntimeError) { [].data_type }
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_object_data_type
|
48
|
+
assert_nil Object.new.data_type
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_type_class
|
52
|
+
assert_equal :long, Type.new(1).long.data_type
|
53
|
+
assert_equal :uchar, Type(1).uchar.data_type
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
8
57
|
class TestBuffer < Test::Unit::TestCase
|
9
58
|
def test_buffer_create_no_data
|
10
59
|
assert_raise(ArgumentError) { Buffer.new }
|
@@ -55,18 +104,14 @@ class TestBuffer < Test::Unit::TestCase
|
|
55
104
|
end
|
56
105
|
|
57
106
|
class TestOutputBuffer < Test::Unit::TestCase
|
58
|
-
def
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
def test_create_int_output_buffer
|
64
|
-
b = OutputBuffer.new(:float, 5)
|
65
|
-
assert_equal 5, b.size
|
107
|
+
def test_create_output_buffer_valid_types
|
108
|
+
TYPES.keys.each do |type|
|
109
|
+
assert_nothing_raised { OutputBuffer.new(type.to_s, 5) }
|
110
|
+
end
|
66
111
|
end
|
67
112
|
|
68
113
|
def test_create_output_buffer_with_invalid_type
|
69
|
-
assert_raise(ArgumentError) { OutputBuffer.new(:
|
114
|
+
assert_raise(ArgumentError) { OutputBuffer.new(:CHAR, 5) }
|
70
115
|
end
|
71
116
|
|
72
117
|
def test_create_output_buffer_with_invalid_size
|
@@ -98,6 +143,46 @@ class TestProgram < Test::Unit::TestCase
|
|
98
143
|
assert_raise(NoMethodError) { p.not_x_y_z }
|
99
144
|
end
|
100
145
|
|
146
|
+
def test_program_implicit_array_buffer
|
147
|
+
p = Program.new <<-'eof'
|
148
|
+
__kernel copy(__global int *out, __global int *in, int total) {
|
149
|
+
int i = get_global_id(0);
|
150
|
+
if (i < total) out[i] = in[i] + 1;
|
151
|
+
}
|
152
|
+
eof
|
153
|
+
|
154
|
+
out = OutputBuffer.new(:int, 3)
|
155
|
+
p.copy(out, [1, 2, 3], 3)
|
156
|
+
assert_equal [2, 3, 4], out.data
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_program_types
|
160
|
+
arr = (1..5).to_a
|
161
|
+
outarr = arr.map {|x| x + 1 }
|
162
|
+
p = Program.new
|
163
|
+
|
164
|
+
TYPES.keys.each do |type|
|
165
|
+
# FIXME These types are currently broken (unimplemented in opencl?)
|
166
|
+
next if type == :bool
|
167
|
+
next if type == :double
|
168
|
+
next if type == :size_t
|
169
|
+
next if type == :ptrdiff_t
|
170
|
+
next if type == :intptr_t
|
171
|
+
next if type == :uintptr_t
|
172
|
+
|
173
|
+
p.compile <<-eof
|
174
|
+
__kernel run(__global #{type} *out, __global #{type} *in, int total) {
|
175
|
+
int id = get_global_id(0);
|
176
|
+
if (id < total) out[id] = in[id] + 1;
|
177
|
+
}
|
178
|
+
eof
|
179
|
+
|
180
|
+
out = OutputBuffer.new(type, arr.size)
|
181
|
+
p.run(out, arr.to_type(type), arr.size)
|
182
|
+
assert_equal({type => outarr}, {type => out.data})
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
101
186
|
def test_program_int_input_buffer
|
102
187
|
p = Program.new <<-'eof'
|
103
188
|
__kernel run(__global int* out, __global int* in, int total) {
|
@@ -128,7 +213,7 @@ class TestProgram < Test::Unit::TestCase
|
|
128
213
|
assert_equal arr.map {|x| x.to_f + 0.5 }, out.data
|
129
214
|
end
|
130
215
|
|
131
|
-
def
|
216
|
+
def test_program_set_times
|
132
217
|
p = Program.new <<-'eof'
|
133
218
|
__kernel sum(__global int* out, __global int* in, int total) {
|
134
219
|
int id = get_global_id(0);
|
@@ -140,7 +225,7 @@ class TestProgram < Test::Unit::TestCase
|
|
140
225
|
sum = arr.inject(0) {|acc, el| acc + el }
|
141
226
|
_in = Buffer.new(arr)
|
142
227
|
out = OutputBuffer.new(:int, 1)
|
143
|
-
p.sum(out, _in, arr.size, :
|
228
|
+
p.sum(out, _in, arr.size, :times => arr.size)
|
144
229
|
assert_equal sum, out.data[0]
|
145
230
|
end
|
146
231
|
|
@@ -160,10 +245,10 @@ class TestProgram < Test::Unit::TestCase
|
|
160
245
|
assert_equal sum, out.data[0]
|
161
246
|
end
|
162
247
|
|
163
|
-
def
|
248
|
+
def test_program_invalid_times
|
164
249
|
p = Program.new("__kernel sum(int x) { }")
|
165
|
-
assert_raise(ArgumentError) { p.sum(:
|
166
|
-
assert_raise(ArgumentError) { p.sum(:
|
250
|
+
assert_raise(ArgumentError) { p.sum(:times => "hello") }
|
251
|
+
assert_raise(ArgumentError) { p.sum(:time => 1) }
|
167
252
|
end
|
168
253
|
|
169
254
|
def test_program_invalid_args
|
@@ -171,4 +256,19 @@ class TestProgram < Test::Unit::TestCase
|
|
171
256
|
assert_raise(ArgumentError) { p.sum(1, 2) }
|
172
257
|
assert_raise(ArgumentError) { p.sum(1, OutputBuffer.new(:int, 1), 3) }
|
173
258
|
end
|
259
|
+
|
260
|
+
def test_program_vectors
|
261
|
+
p = Program.new <<-'eof'
|
262
|
+
__kernel copy_to_out(__global float4 *out, __global float4 *vec) {
|
263
|
+
out[0].x = vec[0].x + 0.5;
|
264
|
+
out[0].y = vec[0].y + 0.5;
|
265
|
+
out[0].z = vec[0].z + 0.5;
|
266
|
+
out[0].w = vec[0].w + 0.5;
|
267
|
+
}
|
268
|
+
eof
|
269
|
+
|
270
|
+
out = OutputBuffer.new(:float, 4)
|
271
|
+
p.copy_to_out(out, [2.5, 2.5, 2.5, 2.5])
|
272
|
+
assert_equal [3, 3, 3, 3], out.data
|
273
|
+
end
|
174
274
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: barracuda
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "1.
|
4
|
+
version: "1.1"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Loren Segal
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-09-02 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -24,6 +24,8 @@ extra_rdoc_files: []
|
|
24
24
|
files:
|
25
25
|
- ext/barracuda.c
|
26
26
|
- ext/extconf.rb
|
27
|
+
- benchmarks/normalize.rb
|
28
|
+
- benchmarks/sort.rb
|
27
29
|
- benchmarks/to_float.rb
|
28
30
|
- test/test_barracuda.rb
|
29
31
|
- LICENSE
|