barracuda 1.0 → 1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +25 -5
- data/benchmarks/normalize.rb +44 -0
- data/benchmarks/sort.rb +30 -0
- data/benchmarks/to_float.rb +2 -2
- data/ext/barracuda.c +339 -114
- data/test/test_barracuda.rb +114 -14
- metadata +4 -2
data/README.md
CHANGED
@@ -15,9 +15,6 @@ Barracuda aims to abstract both CUDA and OpenCL, however for now only OpenCL
|
|
15
15
|
on OSX 10.6 is supported. Patches to extend this support would be joyously
|
16
16
|
accepted!
|
17
17
|
|
18
|
-
Also note that Barracuda currently only supports data types, namely ints and
|
19
|
-
floats only. This should also be expanded.
|
20
|
-
|
21
18
|
INSTALLING
|
22
19
|
----------
|
23
20
|
|
@@ -87,7 +84,7 @@ to run). Barracuda automatically selects the size of the largest buffer as
|
|
87
84
|
the work group size, but in some cases this may be too small or too large. To
|
88
85
|
manually specify the work group size, call the kernel with an options hash:
|
89
86
|
|
90
|
-
program.my_kernel_method(..., :
|
87
|
+
program.my_kernel_method(..., :times => 512)
|
91
88
|
|
92
89
|
Note that the work group size must be a power of 2. Barracuda will increase
|
93
90
|
the work group size to the next power of 2 if it needs to. This means your
|
@@ -95,6 +92,29 @@ OpenCL program might run more iterations of your kernel method than you
|
|
95
92
|
request. Because we can't rely on the work group size, we pass in the total
|
96
93
|
data size to ensure we do not exceed the bounds of our data.
|
97
94
|
|
95
|
+
CONVERTING TYPES
|
96
|
+
----------------
|
97
|
+
|
98
|
+
OpenCL has a variety of native types. Most of them are supported, however some
|
99
|
+
are not. Because Ruby only has the concept of Float and Fixnum (integer), you
|
100
|
+
may need to tell Barracuda the type of your input if you're trying to pass in
|
101
|
+
a char, short or double (or possibly have some signedness restrictions). To
|
102
|
+
do this, simply call `.to_type(:my_type)` on the input where `:my_type` is
|
103
|
+
a key in the `Barracuda::TYPES` hash:
|
104
|
+
|
105
|
+
>> Barracuda::TYPES.keys
|
106
|
+
=> [:bool, :char, :uchar, :short, :ushort, :int, :uint, :long,
|
107
|
+
:ulong, :float, :half, :double, :size_t, :ptrdiff_t,
|
108
|
+
:intptr_t, :uintptr_t]
|
109
|
+
|
110
|
+
For example, to pass in a short, do:
|
111
|
+
|
112
|
+
program.my_kernel(2.to_type(:short))
|
113
|
+
|
114
|
+
This can also be applied to an Array of shorts:
|
115
|
+
|
116
|
+
program.my_kernel([1, 2, 3].to_type(:short))
|
117
|
+
|
98
118
|
CLASS DETAILS
|
99
119
|
-------------
|
100
120
|
|
@@ -110,7 +130,7 @@ Represents an OpenCL program
|
|
110
130
|
- args should be the arguments defined in the kernel method.
|
111
131
|
- supported argument types are Float and Fixnum objects only.
|
112
132
|
- if the last arg is a Hash, it should be an options hash with keys:
|
113
|
-
- :
|
133
|
+
- :times => FIXNUM (the number of iterations to run)
|
114
134
|
|
115
135
|
**Barracuda::Buffer**:
|
116
136
|
|
@@ -0,0 +1,44 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__) + '/../ext')
|
2
|
+
|
3
|
+
require 'barracuda'
|
4
|
+
require 'benchmark'
|
5
|
+
|
6
|
+
include Barracuda
|
7
|
+
|
8
|
+
def dist(*vec)
|
9
|
+
vec[0] * vec[0] + vec[1] * vec[1] + vec[2] * vec[2]
|
10
|
+
end
|
11
|
+
|
12
|
+
def normalize(*vec)
|
13
|
+
d = dist(*vec)
|
14
|
+
vec.map {|c| c / d }
|
15
|
+
end
|
16
|
+
|
17
|
+
def norm_all(arr)
|
18
|
+
out = []
|
19
|
+
0.step(arr.size - 1, 4) do |i|
|
20
|
+
vec = normalize(arr[i], arr[i + 1], arr[i + 2])
|
21
|
+
out.push(*vec, 0.0)
|
22
|
+
end
|
23
|
+
out
|
24
|
+
end
|
25
|
+
|
26
|
+
srand
|
27
|
+
prog = Program.new <<-'eof'
|
28
|
+
__kernel norm(__global float4 *out, __global float4 *in, int total) {
|
29
|
+
int i = get_global_id(0);
|
30
|
+
if (i < total) out[i] = normalize(in[i]);
|
31
|
+
}
|
32
|
+
eof
|
33
|
+
|
34
|
+
num_vecs = 100000
|
35
|
+
arr = []
|
36
|
+
num_vecs.times { arr.push(rand, rand, rand, 0.0) }
|
37
|
+
output = OutputBuffer.new(:float, arr.size)
|
38
|
+
|
39
|
+
|
40
|
+
Benchmark.bmbm do |x|
|
41
|
+
x.report("cpu") { norm_all(arr) }
|
42
|
+
x.report("gpu") { prog.norm(output, arr, num_vecs) }
|
43
|
+
end
|
44
|
+
|
data/benchmarks/sort.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__) + '/../ext')
|
2
|
+
|
3
|
+
require 'barracuda'
|
4
|
+
require 'benchmark'
|
5
|
+
|
6
|
+
include Barracuda
|
7
|
+
|
8
|
+
prog = Program.new <<-'eof'
|
9
|
+
__kernel sort(__global int *out, __global int *in, int total) {
|
10
|
+
int i, final_index = 0, extra = 0;
|
11
|
+
int id = get_global_id(0);
|
12
|
+
if (id >= total) return;
|
13
|
+
int my_value = in[id];
|
14
|
+
for (i = 0; i < total; i++) {
|
15
|
+
if (in[i] < my_value) final_index++;
|
16
|
+
if (in[i] == my_value && i < id) extra++;
|
17
|
+
}
|
18
|
+
out[final_index+extra] = my_value;
|
19
|
+
}
|
20
|
+
eof
|
21
|
+
|
22
|
+
max = 1000
|
23
|
+
arr = (1..max).map { (rand * max).to_i }
|
24
|
+
output = OutputBuffer.new(:int, arr.size)
|
25
|
+
|
26
|
+
Benchmark.bm do |x|
|
27
|
+
x.report("cpu") { arr.sort }
|
28
|
+
x.report("gpu") { prog.sort(output, arr, arr.size) }
|
29
|
+
end
|
30
|
+
|
data/benchmarks/to_float.rb
CHANGED
@@ -18,7 +18,7 @@ output = OutputBuffer.new(:float, arr.size)
|
|
18
18
|
|
19
19
|
TIMES = 1
|
20
20
|
Benchmark.bmbm do |x|
|
21
|
-
x.report("
|
22
|
-
x.report("
|
21
|
+
x.report("regular") { TIMES.times { arr.map {|x| (x.to_f + 0.5) / 3.8 + 2.0 } } }
|
22
|
+
x.report("opencl") { TIMES.times { prog.sum(output, input, arr.size); output.clear } }
|
23
23
|
end
|
24
24
|
|
data/ext/barracuda.c
CHANGED
@@ -7,8 +7,31 @@ static VALUE rb_cOutputBuffer;
|
|
7
7
|
static VALUE rb_cProgram;
|
8
8
|
static VALUE rb_eProgramSyntaxError;
|
9
9
|
static VALUE rb_eOpenCLError;
|
10
|
-
|
11
|
-
static
|
10
|
+
static VALUE rb_cType;
|
11
|
+
static VALUE rb_hTypes;
|
12
|
+
|
13
|
+
static ID id_times;
|
14
|
+
static ID id_to_sym;
|
15
|
+
static ID id_data_type;
|
16
|
+
static ID id_object;
|
17
|
+
|
18
|
+
static ID id_type_bool;
|
19
|
+
static ID id_type_char;
|
20
|
+
static ID id_type_uchar;
|
21
|
+
static ID id_type_short;
|
22
|
+
static ID id_type_ushort;
|
23
|
+
static ID id_type_int;
|
24
|
+
static ID id_type_uint;
|
25
|
+
static ID id_type_long;
|
26
|
+
static ID id_type_ulong;
|
27
|
+
static ID id_type_float;
|
28
|
+
static ID id_type_half;
|
29
|
+
static ID id_type_double;
|
30
|
+
static ID id_type_size_t;
|
31
|
+
static ID id_type_ptrdiff_t;
|
32
|
+
static ID id_type_intptr_t;
|
33
|
+
static ID id_type_uintptr_t;
|
34
|
+
/*static ID id_type_void;*/
|
12
35
|
|
13
36
|
static VALUE program_compile(VALUE self, VALUE source);
|
14
37
|
static VALUE buffer_data_set(VALUE self, VALUE new_value);
|
@@ -17,9 +40,7 @@ static cl_device_id device_id = NULL;
|
|
17
40
|
static cl_context context = NULL;
|
18
41
|
static int err;
|
19
42
|
|
20
|
-
#define
|
21
|
-
#define BUFFER_TYPE_INT 0x0002
|
22
|
-
#define BUFFER_TYPE_CHAR 0x0003
|
43
|
+
#define VERSION_STRING "1.1"
|
23
44
|
|
24
45
|
struct program {
|
25
46
|
cl_program program;
|
@@ -31,13 +52,78 @@ struct kernel {
|
|
31
52
|
|
32
53
|
struct buffer {
|
33
54
|
VALUE arr;
|
34
|
-
|
55
|
+
ID type;
|
35
56
|
size_t num_items;
|
36
57
|
size_t member_size;
|
37
58
|
void *cachebuf;
|
38
59
|
cl_mem data;
|
39
60
|
};
|
40
61
|
|
62
|
+
static VALUE
|
63
|
+
data_type_set(VALUE self, VALUE value)
|
64
|
+
{
|
65
|
+
if (TYPE(value) != T_SYMBOL) {
|
66
|
+
value = rb_str_intern(rb_String(value));
|
67
|
+
}
|
68
|
+
if (rb_hash_aref(rb_hTypes, value) == Qnil) {
|
69
|
+
rb_raise(rb_eArgError, "invalid data type %s",
|
70
|
+
RSTRING_PTR(rb_inspect(value)));
|
71
|
+
}
|
72
|
+
|
73
|
+
rb_ivar_set(self, id_data_type, value);
|
74
|
+
return self;
|
75
|
+
}
|
76
|
+
|
77
|
+
static VALUE
|
78
|
+
data_type_get(VALUE self, ID type)
|
79
|
+
{
|
80
|
+
VALUE value = rb_ivar_get(self, id_data_type);
|
81
|
+
if (NIL_P(value)) {
|
82
|
+
value = ID2SYM(type);
|
83
|
+
data_type_set(self, value);
|
84
|
+
}
|
85
|
+
return value;
|
86
|
+
}
|
87
|
+
|
88
|
+
static VALUE
|
89
|
+
object_data_type_get(VALUE self)
|
90
|
+
{
|
91
|
+
return rb_ivar_get(self, id_data_type);
|
92
|
+
}
|
93
|
+
|
94
|
+
static VALUE
|
95
|
+
fixnum_data_type_get(VALUE self)
|
96
|
+
{
|
97
|
+
return ID2SYM(id_type_int);
|
98
|
+
}
|
99
|
+
|
100
|
+
static VALUE
|
101
|
+
bignum_data_type_get(VALUE self)
|
102
|
+
{
|
103
|
+
return data_type_get(self, id_type_long);
|
104
|
+
}
|
105
|
+
|
106
|
+
static VALUE
|
107
|
+
float_data_type_get(VALUE self)
|
108
|
+
{
|
109
|
+
return data_type_get(self, id_type_float);
|
110
|
+
}
|
111
|
+
|
112
|
+
static VALUE
|
113
|
+
array_data_type_get(VALUE self)
|
114
|
+
{
|
115
|
+
VALUE value = rb_ivar_get(self, id_data_type);
|
116
|
+
if (RTEST(value)) return value;
|
117
|
+
|
118
|
+
if (RARRAY_LEN(self) > 0) {
|
119
|
+
VALUE value = rb_funcall(RARRAY_PTR(self)[0], id_data_type, 0);
|
120
|
+
if (RTEST(value)) return value;
|
121
|
+
}
|
122
|
+
|
123
|
+
rb_raise(rb_eRuntimeError, "unknown buffer data in array %s",
|
124
|
+
RSTRING_PTR(rb_inspect(self)));
|
125
|
+
}
|
126
|
+
|
41
127
|
#define GET_PROGRAM() \
|
42
128
|
struct program *program; \
|
43
129
|
Data_Get_Struct(self, struct program, program);
|
@@ -46,28 +132,143 @@ struct buffer {
|
|
46
132
|
struct buffer *buffer; \
|
47
133
|
Data_Get_Struct(self, struct buffer, buffer);
|
48
134
|
|
135
|
+
#define TYPE_SET(type, size) \
|
136
|
+
id_type_##type = rb_intern(#type); \
|
137
|
+
rb_hash_aset(rb_hTypes, ID2SYM(id_type_##type), INT2FIX(sizeof(size)));
|
138
|
+
|
139
|
+
#define TYPE_TO_NATIVE(type_name, cast_type, CONVERT_FUNC) \
|
140
|
+
if (id_type_##type_name == data_type) { \
|
141
|
+
*((cast_type*)native_value) = (cast_type)CONVERT_FUNC(value); \
|
142
|
+
return; \
|
143
|
+
}
|
144
|
+
|
145
|
+
#define TYPE_TO_RUBY(type_name, cast_type, CONVERT_FUNC) \
|
146
|
+
if (id_type_##type_name == data_type) { \
|
147
|
+
return CONVERT_FUNC(*((cast_type*)native_value)); \
|
148
|
+
}
|
149
|
+
|
49
150
|
static void
|
50
|
-
|
151
|
+
types_hash_init()
|
51
152
|
{
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
153
|
+
TYPE_SET(bool, char);
|
154
|
+
TYPE_SET(char, cl_char);
|
155
|
+
TYPE_SET(uchar, cl_uchar);
|
156
|
+
TYPE_SET(short, cl_short);
|
157
|
+
TYPE_SET(ushort, cl_ushort);
|
158
|
+
TYPE_SET(int, cl_int);
|
159
|
+
TYPE_SET(uint, cl_uint);
|
160
|
+
TYPE_SET(long, cl_long);
|
161
|
+
TYPE_SET(ulong, cl_ulong);
|
162
|
+
TYPE_SET(float, cl_float);
|
163
|
+
TYPE_SET(half, cl_half);
|
164
|
+
TYPE_SET(double, cl_double);
|
165
|
+
TYPE_SET(size_t, size_t);
|
166
|
+
TYPE_SET(ptrdiff_t, ptrdiff_t);
|
167
|
+
TYPE_SET(intptr_t, intptr_t);
|
168
|
+
TYPE_SET(uintptr_t, uintptr_t);
|
169
|
+
OBJ_FREEZE(rb_hTypes);
|
170
|
+
}
|
58
171
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
172
|
+
static void
|
173
|
+
type_to_native(VALUE value, ID data_type, void *native_value)
|
174
|
+
{
|
175
|
+
if (id_type_char == data_type || id_type_uchar == data_type) {
|
176
|
+
if (TYPE(value) == T_FIXNUM) {
|
177
|
+
value = rb_funcall(value, rb_intern("chr"), 0);
|
63
178
|
}
|
179
|
+
*((cl_char *)native_value) = RSTRING_PTR(value)[0];
|
180
|
+
return;
|
181
|
+
}
|
182
|
+
if (id_type_float == data_type || id_type_double == data_type) {
|
183
|
+
*((cl_float *)native_value) = TYPE(value) == T_FIXNUM ?
|
184
|
+
(cl_float)FIX2INT(value) : RFLOAT_VALUE(value);
|
185
|
+
return;
|
186
|
+
}
|
187
|
+
if (id_type_half == data_type) {
|
188
|
+
*((cl_half *)native_value) = TYPE(value) == T_FIXNUM ?
|
189
|
+
(cl_half)FIX2INT(value) : RFLOAT_VALUE(value);
|
190
|
+
return;
|
64
191
|
}
|
192
|
+
|
193
|
+
TYPE_TO_NATIVE(bool, char, FIX2INT);
|
194
|
+
TYPE_TO_NATIVE(short, cl_short, FIX2INT);
|
195
|
+
TYPE_TO_NATIVE(ushort, cl_ushort, NUM2UINT);
|
196
|
+
TYPE_TO_NATIVE(int, cl_int, FIX2INT);
|
197
|
+
TYPE_TO_NATIVE(uint, cl_uint, NUM2UINT);
|
198
|
+
TYPE_TO_NATIVE(long, cl_long, NUM2LONG);
|
199
|
+
TYPE_TO_NATIVE(ulong, cl_ulong, NUM2ULONG);
|
200
|
+
TYPE_TO_NATIVE(double, cl_double, NUM2DBL);
|
201
|
+
TYPE_TO_NATIVE(size_t, size_t, NUM2UINT);
|
202
|
+
TYPE_TO_NATIVE(ptrdiff_t, ptrdiff_t, NUM2UINT);
|
203
|
+
TYPE_TO_NATIVE(intptr_t, intptr_t, NUM2UINT);
|
204
|
+
TYPE_TO_NATIVE(uintptr_t, uintptr_t, NUM2UINT);
|
205
|
+
}
|
206
|
+
|
207
|
+
static VALUE
|
208
|
+
type_to_ruby(void *native_value, ID data_type)
|
209
|
+
{
|
210
|
+
TYPE_TO_RUBY(bool, char, INT2FIX);
|
211
|
+
TYPE_TO_RUBY(char, cl_char, INT2FIX);
|
212
|
+
TYPE_TO_RUBY(uchar, cl_uchar, UINT2NUM);
|
213
|
+
TYPE_TO_RUBY(short, cl_short, INT2FIX);
|
214
|
+
TYPE_TO_RUBY(ushort, cl_ushort, UINT2NUM);
|
215
|
+
TYPE_TO_RUBY(int, cl_int, INT2FIX);
|
216
|
+
TYPE_TO_RUBY(uint, cl_uint, UINT2NUM);
|
217
|
+
TYPE_TO_RUBY(long, cl_long, LONG2NUM);
|
218
|
+
TYPE_TO_RUBY(ulong, cl_ulong, ULONG2NUM);
|
219
|
+
TYPE_TO_RUBY(float, cl_float, rb_float_new);
|
220
|
+
TYPE_TO_RUBY(half, cl_half, rb_float_new);
|
221
|
+
TYPE_TO_RUBY(double, cl_double, DBL2NUM);
|
222
|
+
TYPE_TO_RUBY(size_t, size_t, UINT2NUM);
|
223
|
+
TYPE_TO_RUBY(ptrdiff_t, ptrdiff_t, UINT2NUM);
|
224
|
+
TYPE_TO_RUBY(intptr_t, intptr_t, UINT2NUM);
|
225
|
+
TYPE_TO_RUBY(uintptr_t, uintptr_t, UINT2NUM);
|
226
|
+
return Qnil;
|
227
|
+
}
|
228
|
+
|
229
|
+
static VALUE
|
230
|
+
type_initialize(VALUE self, VALUE object)
|
231
|
+
{
|
232
|
+
rb_ivar_set(self, id_object, object);
|
233
|
+
return self;
|
234
|
+
}
|
235
|
+
|
236
|
+
static VALUE
|
237
|
+
type_method_missing(VALUE self, VALUE type)
|
238
|
+
{
|
239
|
+
data_type_set(self, type);
|
240
|
+
return self;
|
241
|
+
}
|
242
|
+
|
243
|
+
static VALUE
|
244
|
+
type_object(VALUE self)
|
245
|
+
{
|
246
|
+
return rb_ivar_get(self, id_object);
|
247
|
+
}
|
248
|
+
|
249
|
+
static VALUE
|
250
|
+
object_to_type(VALUE self, VALUE type)
|
251
|
+
{
|
252
|
+
rb_ivar_set(self, id_data_type, type);
|
253
|
+
return self;
|
254
|
+
}
|
255
|
+
|
256
|
+
static VALUE
|
257
|
+
fixnum_to_type(VALUE self, VALUE type)
|
258
|
+
{
|
259
|
+
VALUE out = rb_funcall(rb_cType, rb_intern("new"), 1, self);
|
260
|
+
return type_method_missing(out, type);
|
261
|
+
}
|
262
|
+
|
263
|
+
static VALUE
|
264
|
+
type_new(VALUE klass, VALUE type)
|
265
|
+
{
|
266
|
+
return rb_funcall(rb_cType, rb_intern("new"), 1, type);
|
65
267
|
}
|
66
268
|
|
67
269
|
static void
|
68
270
|
free_buffer(struct buffer *buffer)
|
69
271
|
{
|
70
|
-
fflush(stdout);
|
71
272
|
clReleaseMemObject(buffer->data);
|
72
273
|
rb_gc_mark(buffer->arr);
|
73
274
|
ruby_xfree(buffer->cachebuf);
|
@@ -88,26 +289,15 @@ static void
|
|
88
289
|
buffer_update_cache_info(struct buffer *buffer)
|
89
290
|
{
|
90
291
|
buffer->num_items = RARRAY_LEN(buffer->arr);
|
91
|
-
|
92
|
-
|
93
|
-
case T_FIXNUM:
|
94
|
-
buffer->type = BUFFER_TYPE_INT;
|
95
|
-
buffer->member_size = sizeof(int);
|
96
|
-
break;
|
97
|
-
case T_FLOAT:
|
98
|
-
buffer->type = BUFFER_TYPE_FLOAT;
|
99
|
-
buffer->member_size = sizeof(float);
|
100
|
-
break;
|
101
|
-
default:
|
102
|
-
rb_raise(rb_eRuntimeError, "invalid buffer data %s",
|
103
|
-
RSTRING_PTR(rb_inspect(buffer->arr)));
|
104
|
-
}
|
292
|
+
buffer->type = SYM2ID(rb_funcall(buffer->arr, id_data_type, 0));
|
293
|
+
buffer->member_size = FIX2INT(rb_hash_aref(rb_hTypes, ID2SYM(buffer->type)));
|
105
294
|
}
|
106
295
|
|
107
296
|
static VALUE
|
108
297
|
buffer_write(VALUE self)
|
109
298
|
{
|
110
|
-
unsigned int i;
|
299
|
+
unsigned int i, index;
|
300
|
+
unsigned long data_ptr[16]; // data buffer
|
111
301
|
|
112
302
|
GET_BUFFER();
|
113
303
|
|
@@ -118,22 +308,11 @@ buffer_write(VALUE self)
|
|
118
308
|
}
|
119
309
|
buffer->cachebuf = malloc(buffer->num_items * buffer->member_size);
|
120
310
|
|
121
|
-
for (i = 0; i < RARRAY_LEN(buffer->arr); i
|
311
|
+
for (i = 0, index = 0; i < RARRAY_LEN(buffer->arr); i++, index += buffer->member_size) {
|
122
312
|
VALUE item = RARRAY_PTR(buffer->arr)[i];
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
((int *)buffer->cachebuf)[i] = value;
|
127
|
-
break;
|
128
|
-
}
|
129
|
-
case BUFFER_TYPE_FLOAT: {
|
130
|
-
float value = RFLOAT_VALUE(item);
|
131
|
-
((float *)buffer->cachebuf)[i] = value;
|
132
|
-
break;
|
133
|
-
}
|
134
|
-
default:
|
135
|
-
((uint32_t *)buffer->cachebuf)[i] = 0;
|
136
|
-
}
|
313
|
+
|
314
|
+
type_to_native(item, buffer->type, (void *)data_ptr);
|
315
|
+
memcpy(((int8_t*)buffer->cachebuf) + index, (void *)data_ptr, buffer->member_size);
|
137
316
|
}
|
138
317
|
|
139
318
|
return self;
|
@@ -142,24 +321,16 @@ buffer_write(VALUE self)
|
|
142
321
|
static VALUE
|
143
322
|
buffer_read(VALUE self)
|
144
323
|
{
|
145
|
-
unsigned int i;
|
324
|
+
unsigned int i, index;
|
146
325
|
|
147
326
|
GET_BUFFER();
|
148
327
|
|
149
328
|
rb_gc_mark(buffer->arr);
|
150
329
|
buffer->arr = rb_ary_new2(buffer->num_items);
|
151
330
|
|
152
|
-
for (i = 0; i < buffer->num_items; i
|
153
|
-
|
154
|
-
|
155
|
-
rb_ary_push(buffer->arr, INT2FIX(((int *)buffer->cachebuf)[i]));
|
156
|
-
break;
|
157
|
-
case BUFFER_TYPE_FLOAT:
|
158
|
-
rb_ary_push(buffer->arr, rb_float_new(((float *)buffer->cachebuf)[i]));
|
159
|
-
break;
|
160
|
-
default:
|
161
|
-
rb_ary_push(buffer->arr, Qnil);
|
162
|
-
}
|
331
|
+
for (i = 0, index = 0; i < buffer->num_items; i++, index += buffer->member_size) {
|
332
|
+
VALUE value = type_to_ruby(((int8_t*)buffer->cachebuf) + index, buffer->type);
|
333
|
+
rb_ary_push(buffer->arr, value);
|
163
334
|
}
|
164
335
|
|
165
336
|
return self;
|
@@ -224,25 +395,21 @@ buffer_initialize(int argc, VALUE *argv, VALUE self)
|
|
224
395
|
static VALUE
|
225
396
|
obuffer_initialize(VALUE self, VALUE type, VALUE size)
|
226
397
|
{
|
398
|
+
VALUE type_sym, member_size;
|
227
399
|
GET_BUFFER();
|
228
400
|
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
else if (strcmp(RSTRING_PTR(type), "int") == 0) {
|
235
|
-
buffer->type = BUFFER_TYPE_INT;
|
236
|
-
buffer->member_size = sizeof(int);
|
237
|
-
}
|
238
|
-
else {
|
239
|
-
rb_raise(rb_eArgError, "type can only be :float or :int");
|
401
|
+
type_sym = rb_funcall(type, id_to_sym, 0);
|
402
|
+
member_size = rb_hash_aref(rb_hTypes, type_sym);
|
403
|
+
if (NIL_P(member_size)) {
|
404
|
+
rb_raise(rb_eArgError, "type can only be one of %s",
|
405
|
+
RSTRING_PTR(rb_inspect(rb_funcall(rb_hTypes, rb_intern("keys"), 0))));
|
240
406
|
}
|
241
|
-
|
242
407
|
if (TYPE(size) != T_FIXNUM) {
|
243
408
|
rb_raise(rb_eArgError, "expecting buffer size as argument 2");
|
244
409
|
}
|
245
410
|
|
411
|
+
buffer->type = SYM2ID(type_sym);
|
412
|
+
buffer->member_size = FIX2INT(member_size);
|
246
413
|
buffer->num_items = FIX2UINT(size);
|
247
414
|
buffer->cachebuf = malloc(buffer->num_items * buffer->member_size);
|
248
415
|
buffer->data = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
@@ -355,62 +522,79 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
|
|
355
522
|
|
356
523
|
commands = clCreateCommandQueue(context, device_id, 0, &err);
|
357
524
|
if (!commands) {
|
525
|
+
clReleaseKernel(kernel);
|
358
526
|
rb_raise(rb_eOpenCLError, "could not execute kernel method '%s'", RSTRING_PTR(argv[0]));
|
359
527
|
}
|
360
528
|
|
361
529
|
for (i = 1; i < argc; i++) {
|
362
|
-
|
363
|
-
|
364
|
-
|
530
|
+
VALUE item = argv[i];
|
531
|
+
err = !CL_SUCCESS;
|
532
|
+
|
533
|
+
if (i == argc - 1 && TYPE(item) == T_HASH) {
|
534
|
+
VALUE worker_size = rb_hash_aref(item, ID2SYM(id_times));
|
365
535
|
if (RTEST(worker_size) && TYPE(worker_size) == T_FIXNUM) {
|
366
536
|
global = FIX2UINT(worker_size);
|
367
537
|
}
|
368
538
|
else {
|
369
539
|
CLEAN();
|
370
|
-
rb_raise(rb_eArgError, "opts hash must be {:
|
371
|
-
RSTRING_PTR(rb_inspect(
|
540
|
+
rb_raise(rb_eArgError, "opts hash must be {:times => INT_VALUE}, got %s",
|
541
|
+
RSTRING_PTR(rb_inspect(item)));
|
372
542
|
}
|
373
543
|
break;
|
374
544
|
}
|
375
545
|
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
546
|
+
if (TYPE(item) == T_ARRAY) {
|
547
|
+
/* create buffer from arg */
|
548
|
+
VALUE buf = buffer_s_allocate(rb_cBuffer);
|
549
|
+
item = buffer_initialize(1, &item, buf);
|
550
|
+
}
|
551
|
+
|
552
|
+
if (CLASS_OF(item) == rb_cOutputBuffer) {
|
553
|
+
struct buffer *buffer;
|
554
|
+
Data_Get_Struct(item, struct buffer, buffer);
|
555
|
+
err = clSetKernelArg(kernel, i - 1, sizeof(cl_mem), &buffer->data);
|
556
|
+
if (buffer->num_items > global) {
|
557
|
+
global = buffer->num_items;
|
381
558
|
}
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
559
|
+
}
|
560
|
+
else if (CLASS_OF(item) == rb_cBuffer) {
|
561
|
+
struct buffer *buffer;
|
562
|
+
Data_Get_Struct(item, struct buffer, buffer);
|
563
|
+
|
564
|
+
buffer_write(item);
|
565
|
+
clEnqueueWriteBuffer(commands, buffer->data, CL_TRUE, 0,
|
566
|
+
buffer->num_items * buffer->member_size, buffer->cachebuf, 0, NULL, NULL);
|
567
|
+
err = clSetKernelArg(kernel, i - 1, sizeof(cl_mem), &buffer->data);
|
568
|
+
if (buffer->num_items > global) {
|
569
|
+
global = buffer->num_items;
|
386
570
|
}
|
387
|
-
|
388
|
-
|
389
|
-
|
571
|
+
}
|
572
|
+
else {
|
573
|
+
unsigned long data_ptr[16]; // a buffer of data
|
574
|
+
size_t data_size_t;
|
575
|
+
VALUE data_type, data_size;
|
576
|
+
|
577
|
+
if (CLASS_OF(item) == rb_cType) {
|
578
|
+
data_type = rb_funcall(type_object(item), id_data_type, 0);
|
579
|
+
}
|
580
|
+
else {
|
581
|
+
data_type = rb_funcall(item, id_data_type, 0);
|
390
582
|
}
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
struct buffer *buffer;
|
402
|
-
Data_Get_Struct(argv[i], struct buffer, buffer);
|
403
|
-
|
404
|
-
buffer_write(argv[i]);
|
405
|
-
clEnqueueWriteBuffer(commands, buffer->data, CL_TRUE, 0,
|
406
|
-
buffer->num_items * buffer->member_size, buffer->cachebuf, 0, NULL, NULL);
|
407
|
-
err = clSetKernelArg(kernel, i - 1, sizeof(cl_mem), &buffer->data);
|
408
|
-
}
|
409
|
-
break;
|
583
|
+
data_size = rb_hash_aref(rb_hTypes, data_type);
|
584
|
+
if (NIL_P(data_size)) {
|
585
|
+
CLEAN();
|
586
|
+
rb_raise(rb_eRuntimeError, "invalid data type for %s",
|
587
|
+
RSTRING_PTR(rb_inspect(item)));
|
588
|
+
}
|
589
|
+
|
590
|
+
data_size_t = FIX2UINT(data_size);
|
591
|
+
type_to_native(item, SYM2ID(data_type), (void *)data_ptr);
|
592
|
+
err = clSetKernelArg(kernel, i - 1, data_size_t, data_ptr);
|
410
593
|
}
|
594
|
+
|
411
595
|
if (err != CL_SUCCESS) {
|
412
596
|
CLEAN();
|
413
|
-
rb_raise(rb_eArgError, "invalid kernel method parameter: %s", RSTRING_PTR(rb_inspect(
|
597
|
+
rb_raise(rb_eArgError, "invalid kernel method parameter: %s", RSTRING_PTR(rb_inspect(item)));
|
414
598
|
}
|
415
599
|
}
|
416
600
|
|
@@ -430,13 +614,14 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
|
|
430
614
|
clFinish(commands);
|
431
615
|
|
432
616
|
for (i = 1; i < argc; i++) {
|
433
|
-
|
617
|
+
VALUE item = argv[i];
|
618
|
+
if (CLASS_OF(item) == rb_cOutputBuffer) {
|
434
619
|
struct buffer *buffer;
|
435
|
-
Data_Get_Struct(
|
620
|
+
Data_Get_Struct(item, struct buffer, buffer);
|
436
621
|
err = clEnqueueReadBuffer(commands, buffer->data, CL_TRUE, 0,
|
437
622
|
buffer->num_items * buffer->member_size, buffer->cachebuf, 0, NULL, NULL);
|
438
623
|
ERROR("failed to read output buffer");
|
439
|
-
buffer_read(
|
624
|
+
buffer_read(item);
|
440
625
|
}
|
441
626
|
}
|
442
627
|
|
@@ -444,12 +629,39 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
|
|
444
629
|
return Qnil;
|
445
630
|
}
|
446
631
|
|
632
|
+
static void
|
633
|
+
init_opencl()
|
634
|
+
{
|
635
|
+
if (device_id == NULL) {
|
636
|
+
err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
|
637
|
+
if (err != CL_SUCCESS) {
|
638
|
+
rb_raise(rb_eOpenCLError, "failed to create a device group");
|
639
|
+
}
|
640
|
+
}
|
641
|
+
|
642
|
+
if (context == NULL) {
|
643
|
+
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
|
644
|
+
if (!context) {
|
645
|
+
rb_raise(rb_eOpenCLError, "failed to create a program context");
|
646
|
+
}
|
647
|
+
}
|
648
|
+
}
|
649
|
+
|
447
650
|
void
|
448
651
|
Init_barracuda()
|
449
652
|
{
|
450
|
-
|
653
|
+
id_times = rb_intern("times");
|
654
|
+
id_to_sym = rb_intern("to_sym");
|
655
|
+
id_data_type = rb_intern("data_type");
|
656
|
+
id_object = rb_intern("object");
|
657
|
+
|
658
|
+
rb_hTypes = rb_hash_new();
|
659
|
+
rb_define_method(rb_mKernel, "Type", type_new, 1);
|
660
|
+
types_hash_init();
|
451
661
|
|
452
662
|
rb_mBarracuda = rb_define_module("Barracuda");
|
663
|
+
rb_define_const(rb_mBarracuda, "VERSION", rb_str_new2(VERSION_STRING));
|
664
|
+
rb_define_const(rb_mBarracuda, "TYPES", rb_hTypes);
|
453
665
|
|
454
666
|
rb_eProgramSyntaxError = rb_define_class_under(rb_mBarracuda, "SyntaxError", rb_eSyntaxError);
|
455
667
|
rb_eOpenCLError = rb_define_class_under(rb_mBarracuda, "OpenCLError", rb_eStandardError);
|
@@ -476,6 +688,19 @@ Init_barracuda()
|
|
476
688
|
rb_undef_method(rb_cOutputBuffer, "write");
|
477
689
|
rb_undef_method(rb_cOutputBuffer, "size_changed");
|
478
690
|
rb_undef_method(rb_cOutputBuffer, "data=");
|
479
|
-
|
691
|
+
|
692
|
+
rb_cType = rb_define_class_under(rb_mBarracuda, "Type", rb_cObject);
|
693
|
+
rb_define_method(rb_cType, "initialize", type_initialize, 1);
|
694
|
+
rb_define_method(rb_cType, "method_missing", type_method_missing, 1);
|
695
|
+
rb_define_method(rb_cType, "object", type_object, 0);
|
696
|
+
|
697
|
+
rb_define_method(rb_cObject, "to_type", object_to_type, 1);
|
698
|
+
rb_define_method(rb_cFixnum, "to_type", fixnum_to_type, 1);
|
699
|
+
rb_define_method(rb_cObject, "data_type", object_data_type_get, 0);
|
700
|
+
rb_define_method(rb_cArray, "data_type", array_data_type_get, 0);
|
701
|
+
rb_define_method(rb_cFixnum, "data_type", fixnum_data_type_get, 0);
|
702
|
+
rb_define_method(rb_cBignum, "data_type", bignum_data_type_get, 0);
|
703
|
+
rb_define_method(rb_cFloat, "data_type", float_data_type_get, 0);
|
704
|
+
|
480
705
|
init_opencl();
|
481
706
|
}
|
data/test/test_barracuda.rb
CHANGED
@@ -5,6 +5,55 @@ require "barracuda"
|
|
5
5
|
|
6
6
|
include Barracuda
|
7
7
|
|
8
|
+
class TestDataTypes < Test::Unit::TestCase
|
9
|
+
def test_default_fixnum_type
|
10
|
+
assert_equal :int, 2.data_type
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_default_bignum_type
|
14
|
+
assert_equal :long, (2**64).data_type
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_default_float_type
|
18
|
+
assert_equal :float, 2.5.data_type
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_default_array_type
|
22
|
+
assert_equal :int, [2].data_type
|
23
|
+
assert_equal :float, [2.5, 2.6].data_type
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_set_data_type_fixnum
|
27
|
+
assert_equal :char, 2.to_type(:char).data_type
|
28
|
+
assert_equal :int, 2.data_type
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_set_data_type
|
32
|
+
[2**64, 2.5, [2]].each do |v|
|
33
|
+
assert_equal :char, v.to_type(:char).data_type
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_set_invalid_data_type
|
38
|
+
assert_raise(ArgumentError) { 1.to_type(:unknown) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_invalid_array_data_type
|
42
|
+
assert_raise(RuntimeError) { [Object.new].data_type }
|
43
|
+
assert_raise(RuntimeError) { ['x'].data_type }
|
44
|
+
assert_raise(RuntimeError) { [].data_type }
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_object_data_type
|
48
|
+
assert_nil Object.new.data_type
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_type_class
|
52
|
+
assert_equal :long, Type.new(1).long.data_type
|
53
|
+
assert_equal :uchar, Type(1).uchar.data_type
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
8
57
|
class TestBuffer < Test::Unit::TestCase
|
9
58
|
def test_buffer_create_no_data
|
10
59
|
assert_raise(ArgumentError) { Buffer.new }
|
@@ -55,18 +104,14 @@ class TestBuffer < Test::Unit::TestCase
|
|
55
104
|
end
|
56
105
|
|
57
106
|
class TestOutputBuffer < Test::Unit::TestCase
|
58
|
-
def
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
def test_create_int_output_buffer
|
64
|
-
b = OutputBuffer.new(:float, 5)
|
65
|
-
assert_equal 5, b.size
|
107
|
+
def test_create_output_buffer_valid_types
|
108
|
+
TYPES.keys.each do |type|
|
109
|
+
assert_nothing_raised { OutputBuffer.new(type.to_s, 5) }
|
110
|
+
end
|
66
111
|
end
|
67
112
|
|
68
113
|
def test_create_output_buffer_with_invalid_type
|
69
|
-
assert_raise(ArgumentError) { OutputBuffer.new(:
|
114
|
+
assert_raise(ArgumentError) { OutputBuffer.new(:CHAR, 5) }
|
70
115
|
end
|
71
116
|
|
72
117
|
def test_create_output_buffer_with_invalid_size
|
@@ -98,6 +143,46 @@ class TestProgram < Test::Unit::TestCase
|
|
98
143
|
assert_raise(NoMethodError) { p.not_x_y_z }
|
99
144
|
end
|
100
145
|
|
146
|
+
def test_program_implicit_array_buffer
|
147
|
+
p = Program.new <<-'eof'
|
148
|
+
__kernel copy(__global int *out, __global int *in, int total) {
|
149
|
+
int i = get_global_id(0);
|
150
|
+
if (i < total) out[i] = in[i] + 1;
|
151
|
+
}
|
152
|
+
eof
|
153
|
+
|
154
|
+
out = OutputBuffer.new(:int, 3)
|
155
|
+
p.copy(out, [1, 2, 3], 3)
|
156
|
+
assert_equal [2, 3, 4], out.data
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_program_types
|
160
|
+
arr = (1..5).to_a
|
161
|
+
outarr = arr.map {|x| x + 1 }
|
162
|
+
p = Program.new
|
163
|
+
|
164
|
+
TYPES.keys.each do |type|
|
165
|
+
# FIXME These types are currently broken (unimplemented in opencl?)
|
166
|
+
next if type == :bool
|
167
|
+
next if type == :double
|
168
|
+
next if type == :size_t
|
169
|
+
next if type == :ptrdiff_t
|
170
|
+
next if type == :intptr_t
|
171
|
+
next if type == :uintptr_t
|
172
|
+
|
173
|
+
p.compile <<-eof
|
174
|
+
__kernel run(__global #{type} *out, __global #{type} *in, int total) {
|
175
|
+
int id = get_global_id(0);
|
176
|
+
if (id < total) out[id] = in[id] + 1;
|
177
|
+
}
|
178
|
+
eof
|
179
|
+
|
180
|
+
out = OutputBuffer.new(type, arr.size)
|
181
|
+
p.run(out, arr.to_type(type), arr.size)
|
182
|
+
assert_equal({type => outarr}, {type => out.data})
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
101
186
|
def test_program_int_input_buffer
|
102
187
|
p = Program.new <<-'eof'
|
103
188
|
__kernel run(__global int* out, __global int* in, int total) {
|
@@ -128,7 +213,7 @@ class TestProgram < Test::Unit::TestCase
|
|
128
213
|
assert_equal arr.map {|x| x.to_f + 0.5 }, out.data
|
129
214
|
end
|
130
215
|
|
131
|
-
def
|
216
|
+
def test_program_set_times
|
132
217
|
p = Program.new <<-'eof'
|
133
218
|
__kernel sum(__global int* out, __global int* in, int total) {
|
134
219
|
int id = get_global_id(0);
|
@@ -140,7 +225,7 @@ class TestProgram < Test::Unit::TestCase
|
|
140
225
|
sum = arr.inject(0) {|acc, el| acc + el }
|
141
226
|
_in = Buffer.new(arr)
|
142
227
|
out = OutputBuffer.new(:int, 1)
|
143
|
-
p.sum(out, _in, arr.size, :
|
228
|
+
p.sum(out, _in, arr.size, :times => arr.size)
|
144
229
|
assert_equal sum, out.data[0]
|
145
230
|
end
|
146
231
|
|
@@ -160,10 +245,10 @@ class TestProgram < Test::Unit::TestCase
|
|
160
245
|
assert_equal sum, out.data[0]
|
161
246
|
end
|
162
247
|
|
163
|
-
def
|
248
|
+
def test_program_invalid_times
|
164
249
|
p = Program.new("__kernel sum(int x) { }")
|
165
|
-
assert_raise(ArgumentError) { p.sum(:
|
166
|
-
assert_raise(ArgumentError) { p.sum(:
|
250
|
+
assert_raise(ArgumentError) { p.sum(:times => "hello") }
|
251
|
+
assert_raise(ArgumentError) { p.sum(:time => 1) }
|
167
252
|
end
|
168
253
|
|
169
254
|
def test_program_invalid_args
|
@@ -171,4 +256,19 @@ class TestProgram < Test::Unit::TestCase
|
|
171
256
|
assert_raise(ArgumentError) { p.sum(1, 2) }
|
172
257
|
assert_raise(ArgumentError) { p.sum(1, OutputBuffer.new(:int, 1), 3) }
|
173
258
|
end
|
259
|
+
|
260
|
+
def test_program_vectors
|
261
|
+
p = Program.new <<-'eof'
|
262
|
+
__kernel copy_to_out(__global float4 *out, __global float4 *vec) {
|
263
|
+
out[0].x = vec[0].x + 0.5;
|
264
|
+
out[0].y = vec[0].y + 0.5;
|
265
|
+
out[0].z = vec[0].z + 0.5;
|
266
|
+
out[0].w = vec[0].w + 0.5;
|
267
|
+
}
|
268
|
+
eof
|
269
|
+
|
270
|
+
out = OutputBuffer.new(:float, 4)
|
271
|
+
p.copy_to_out(out, [2.5, 2.5, 2.5, 2.5])
|
272
|
+
assert_equal [3, 3, 3, 3], out.data
|
273
|
+
end
|
174
274
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: barracuda
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "1.
|
4
|
+
version: "1.1"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Loren Segal
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-09-02 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -24,6 +24,8 @@ extra_rdoc_files: []
|
|
24
24
|
files:
|
25
25
|
- ext/barracuda.c
|
26
26
|
- ext/extconf.rb
|
27
|
+
- benchmarks/normalize.rb
|
28
|
+
- benchmarks/sort.rb
|
27
29
|
- benchmarks/to_float.rb
|
28
30
|
- test/test_barracuda.rb
|
29
31
|
- LICENSE
|