barracuda 1.0 → 1.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -15,9 +15,6 @@ Barracuda aims to abstract both CUDA and OpenCL, however for now only OpenCL
15
15
  on OSX 10.6 is supported. Patches to extend this support would be joyously
16
16
  accepted!
17
17
 
18
- Also note that Barracuda currently only supports data types, namely ints and
19
- floats only. This should also be expanded.
20
-
21
18
  INSTALLING
22
19
  ----------
23
20
 
@@ -87,7 +84,7 @@ to run). Barracuda automatically selects the size of the largest buffer as
87
84
  the work group size, but in some cases this may be too small or too large. To
88
85
  manually specify the work group size, call the kernel with an options hash:
89
86
 
90
- program.my_kernel_method(..., :worker_size => 512)
87
+ program.my_kernel_method(..., :times => 512)
91
88
 
92
89
  Note that the work group size must be a power of 2. Barracuda will increase
93
90
  the work group size to the next power of 2 if it needs to. This means your
@@ -95,6 +92,29 @@ OpenCL program might run more iterations of your kernel method than you
95
92
  request. Because we can't rely on the work group size, we pass in the total
96
93
  data size to ensure we do not exceed the bounds of our data.
97
94
 
95
+ CONVERTING TYPES
96
+ ----------------
97
+
98
+ OpenCL has a variety of native types. Most of them are supported, however some
99
+ are not. Because Ruby only has the concept of Float and Fixnum (integer), you
100
+ may need to tell Barracuda the type of your input if you're trying to pass in
101
+ a char, short or double (or possibly have some signedness restrictions). To
102
+ do this, simply call `.to_type(:my_type)` on the input where `:my_type` is
103
+ a key in the `Barracuda::TYPES` hash:
104
+
105
+ >> Barracuda::TYPES.keys
106
+ => [:bool, :char, :uchar, :short, :ushort, :int, :uint, :long,
107
+ :ulong, :float, :half, :double, :size_t, :ptrdiff_t,
108
+ :intptr_t, :uintptr_t]
109
+
110
+ For example, to pass in a short, do:
111
+
112
+ program.my_kernel(2.to_type(:short))
113
+
114
+ This can also be applied to an Array of shorts:
115
+
116
+ program.my_kernel([1, 2, 3].to_type(:short))
117
+
98
118
  CLASS DETAILS
99
119
  -------------
100
120
 
@@ -110,7 +130,7 @@ Represents an OpenCL program
110
130
  - args should be the arguments defined in the kernel method.
111
131
  - supported argument types are Float and Fixnum objects only.
112
132
  - if the last arg is a Hash, it should be an options hash with keys:
113
- - :worker_size => FIXNUM (the number of iterations to run)
133
+ - :times => FIXNUM (the number of iterations to run)
114
134
 
115
135
  **Barracuda::Buffer**:
116
136
 
@@ -0,0 +1,44 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../ext')
2
+
3
+ require 'barracuda'
4
+ require 'benchmark'
5
+
6
+ include Barracuda
7
+
8
+ def dist(*vec)
9
+ vec[0] * vec[0] + vec[1] * vec[1] + vec[2] * vec[2]
10
+ end
11
+
12
+ def normalize(*vec)
13
+ d = dist(*vec)
14
+ vec.map {|c| c / d }
15
+ end
16
+
17
+ def norm_all(arr)
18
+ out = []
19
+ 0.step(arr.size - 1, 4) do |i|
20
+ vec = normalize(arr[i], arr[i + 1], arr[i + 2])
21
+ out.push(*vec, 0.0)
22
+ end
23
+ out
24
+ end
25
+
26
+ srand
27
+ prog = Program.new <<-'eof'
28
+ __kernel norm(__global float4 *out, __global float4 *in, int total) {
29
+ int i = get_global_id(0);
30
+ if (i < total) out[i] = normalize(in[i]);
31
+ }
32
+ eof
33
+
34
+ num_vecs = 100000
35
+ arr = []
36
+ num_vecs.times { arr.push(rand, rand, rand, 0.0) }
37
+ output = OutputBuffer.new(:float, arr.size)
38
+
39
+
40
+ Benchmark.bmbm do |x|
41
+ x.report("cpu") { norm_all(arr) }
42
+ x.report("gpu") { prog.norm(output, arr, num_vecs) }
43
+ end
44
+
@@ -0,0 +1,30 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../ext')
2
+
3
+ require 'barracuda'
4
+ require 'benchmark'
5
+
6
+ include Barracuda
7
+
8
+ prog = Program.new <<-'eof'
9
+ __kernel sort(__global int *out, __global int *in, int total) {
10
+ int i, final_index = 0, extra = 0;
11
+ int id = get_global_id(0);
12
+ if (id >= total) return;
13
+ int my_value = in[id];
14
+ for (i = 0; i < total; i++) {
15
+ if (in[i] < my_value) final_index++;
16
+ if (in[i] == my_value && i < id) extra++;
17
+ }
18
+ out[final_index+extra] = my_value;
19
+ }
20
+ eof
21
+
22
+ max = 1000
23
+ arr = (1..max).map { (rand * max).to_i }
24
+ output = OutputBuffer.new(:int, arr.size)
25
+
26
+ Benchmark.bm do |x|
27
+ x.report("cpu") { arr.sort }
28
+ x.report("gpu") { prog.sort(output, arr, arr.size) }
29
+ end
30
+
@@ -18,7 +18,7 @@ output = OutputBuffer.new(:float, arr.size)
18
18
 
19
19
  TIMES = 1
20
20
  Benchmark.bmbm do |x|
21
- x.report("cpu") { TIMES.times { arr.map {|x| (x.to_f + 0.5) / 3.8 + 2.0 } } }
22
- x.report("gpu") { TIMES.times { prog.sum(output, input, arr.size); output.clear } }
21
+ x.report("regular") { TIMES.times { arr.map {|x| (x.to_f + 0.5) / 3.8 + 2.0 } } }
22
+ x.report("opencl") { TIMES.times { prog.sum(output, input, arr.size); output.clear } }
23
23
  end
24
24
 
@@ -7,8 +7,31 @@ static VALUE rb_cOutputBuffer;
7
7
  static VALUE rb_cProgram;
8
8
  static VALUE rb_eProgramSyntaxError;
9
9
  static VALUE rb_eOpenCLError;
10
-
11
- static ID ba_worker_size;
10
+ static VALUE rb_cType;
11
+ static VALUE rb_hTypes;
12
+
13
+ static ID id_times;
14
+ static ID id_to_sym;
15
+ static ID id_data_type;
16
+ static ID id_object;
17
+
18
+ static ID id_type_bool;
19
+ static ID id_type_char;
20
+ static ID id_type_uchar;
21
+ static ID id_type_short;
22
+ static ID id_type_ushort;
23
+ static ID id_type_int;
24
+ static ID id_type_uint;
25
+ static ID id_type_long;
26
+ static ID id_type_ulong;
27
+ static ID id_type_float;
28
+ static ID id_type_half;
29
+ static ID id_type_double;
30
+ static ID id_type_size_t;
31
+ static ID id_type_ptrdiff_t;
32
+ static ID id_type_intptr_t;
33
+ static ID id_type_uintptr_t;
34
+ /*static ID id_type_void;*/
12
35
 
13
36
  static VALUE program_compile(VALUE self, VALUE source);
14
37
  static VALUE buffer_data_set(VALUE self, VALUE new_value);
@@ -17,9 +40,7 @@ static cl_device_id device_id = NULL;
17
40
  static cl_context context = NULL;
18
41
  static int err;
19
42
 
20
- #define BUFFER_TYPE_FLOAT 0x0001
21
- #define BUFFER_TYPE_INT 0x0002
22
- #define BUFFER_TYPE_CHAR 0x0003
43
+ #define VERSION_STRING "1.1"
23
44
 
24
45
  struct program {
25
46
  cl_program program;
@@ -31,13 +52,78 @@ struct kernel {
31
52
 
32
53
  struct buffer {
33
54
  VALUE arr;
34
- unsigned int type;
55
+ ID type;
35
56
  size_t num_items;
36
57
  size_t member_size;
37
58
  void *cachebuf;
38
59
  cl_mem data;
39
60
  };
40
61
 
62
+ static VALUE
63
+ data_type_set(VALUE self, VALUE value)
64
+ {
65
+ if (TYPE(value) != T_SYMBOL) {
66
+ value = rb_str_intern(rb_String(value));
67
+ }
68
+ if (rb_hash_aref(rb_hTypes, value) == Qnil) {
69
+ rb_raise(rb_eArgError, "invalid data type %s",
70
+ RSTRING_PTR(rb_inspect(value)));
71
+ }
72
+
73
+ rb_ivar_set(self, id_data_type, value);
74
+ return self;
75
+ }
76
+
77
+ static VALUE
78
+ data_type_get(VALUE self, ID type)
79
+ {
80
+ VALUE value = rb_ivar_get(self, id_data_type);
81
+ if (NIL_P(value)) {
82
+ value = ID2SYM(type);
83
+ data_type_set(self, value);
84
+ }
85
+ return value;
86
+ }
87
+
88
+ static VALUE
89
+ object_data_type_get(VALUE self)
90
+ {
91
+ return rb_ivar_get(self, id_data_type);
92
+ }
93
+
94
+ static VALUE
95
+ fixnum_data_type_get(VALUE self)
96
+ {
97
+ return ID2SYM(id_type_int);
98
+ }
99
+
100
+ static VALUE
101
+ bignum_data_type_get(VALUE self)
102
+ {
103
+ return data_type_get(self, id_type_long);
104
+ }
105
+
106
+ static VALUE
107
+ float_data_type_get(VALUE self)
108
+ {
109
+ return data_type_get(self, id_type_float);
110
+ }
111
+
112
+ static VALUE
113
+ array_data_type_get(VALUE self)
114
+ {
115
+ VALUE value = rb_ivar_get(self, id_data_type);
116
+ if (RTEST(value)) return value;
117
+
118
+ if (RARRAY_LEN(self) > 0) {
119
+ VALUE value = rb_funcall(RARRAY_PTR(self)[0], id_data_type, 0);
120
+ if (RTEST(value)) return value;
121
+ }
122
+
123
+ rb_raise(rb_eRuntimeError, "unknown buffer data in array %s",
124
+ RSTRING_PTR(rb_inspect(self)));
125
+ }
126
+
41
127
  #define GET_PROGRAM() \
42
128
  struct program *program; \
43
129
  Data_Get_Struct(self, struct program, program);
@@ -46,28 +132,143 @@ struct buffer {
46
132
  struct buffer *buffer; \
47
133
  Data_Get_Struct(self, struct buffer, buffer);
48
134
 
135
+ #define TYPE_SET(type, size) \
136
+ id_type_##type = rb_intern(#type); \
137
+ rb_hash_aset(rb_hTypes, ID2SYM(id_type_##type), INT2FIX(sizeof(size)));
138
+
139
+ #define TYPE_TO_NATIVE(type_name, cast_type, CONVERT_FUNC) \
140
+ if (id_type_##type_name == data_type) { \
141
+ *((cast_type*)native_value) = (cast_type)CONVERT_FUNC(value); \
142
+ return; \
143
+ }
144
+
145
+ #define TYPE_TO_RUBY(type_name, cast_type, CONVERT_FUNC) \
146
+ if (id_type_##type_name == data_type) { \
147
+ return CONVERT_FUNC(*((cast_type*)native_value)); \
148
+ }
149
+
49
150
  static void
50
- init_opencl()
151
+ types_hash_init()
51
152
  {
52
- if (device_id == NULL) {
53
- err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
54
- if (err != CL_SUCCESS) {
55
- rb_raise(rb_eOpenCLError, "failed to create a device group");
56
- }
57
- }
153
+ TYPE_SET(bool, char);
154
+ TYPE_SET(char, cl_char);
155
+ TYPE_SET(uchar, cl_uchar);
156
+ TYPE_SET(short, cl_short);
157
+ TYPE_SET(ushort, cl_ushort);
158
+ TYPE_SET(int, cl_int);
159
+ TYPE_SET(uint, cl_uint);
160
+ TYPE_SET(long, cl_long);
161
+ TYPE_SET(ulong, cl_ulong);
162
+ TYPE_SET(float, cl_float);
163
+ TYPE_SET(half, cl_half);
164
+ TYPE_SET(double, cl_double);
165
+ TYPE_SET(size_t, size_t);
166
+ TYPE_SET(ptrdiff_t, ptrdiff_t);
167
+ TYPE_SET(intptr_t, intptr_t);
168
+ TYPE_SET(uintptr_t, uintptr_t);
169
+ OBJ_FREEZE(rb_hTypes);
170
+ }
58
171
 
59
- if (context == NULL) {
60
- context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
61
- if (!context) {
62
- rb_raise(rb_eOpenCLError, "failed to create a program context");
172
+ static void
173
+ type_to_native(VALUE value, ID data_type, void *native_value)
174
+ {
175
+ if (id_type_char == data_type || id_type_uchar == data_type) {
176
+ if (TYPE(value) == T_FIXNUM) {
177
+ value = rb_funcall(value, rb_intern("chr"), 0);
63
178
  }
179
+ *((cl_char *)native_value) = RSTRING_PTR(value)[0];
180
+ return;
181
+ }
182
+ if (id_type_float == data_type || id_type_double == data_type) {
183
+ *((cl_float *)native_value) = TYPE(value) == T_FIXNUM ?
184
+ (cl_float)FIX2INT(value) : RFLOAT_VALUE(value);
185
+ return;
186
+ }
187
+ if (id_type_half == data_type) {
188
+ *((cl_half *)native_value) = TYPE(value) == T_FIXNUM ?
189
+ (cl_half)FIX2INT(value) : RFLOAT_VALUE(value);
190
+ return;
64
191
  }
192
+
193
+ TYPE_TO_NATIVE(bool, char, FIX2INT);
194
+ TYPE_TO_NATIVE(short, cl_short, FIX2INT);
195
+ TYPE_TO_NATIVE(ushort, cl_ushort, NUM2UINT);
196
+ TYPE_TO_NATIVE(int, cl_int, FIX2INT);
197
+ TYPE_TO_NATIVE(uint, cl_uint, NUM2UINT);
198
+ TYPE_TO_NATIVE(long, cl_long, NUM2LONG);
199
+ TYPE_TO_NATIVE(ulong, cl_ulong, NUM2ULONG);
200
+ TYPE_TO_NATIVE(double, cl_double, NUM2DBL);
201
+ TYPE_TO_NATIVE(size_t, size_t, NUM2UINT);
202
+ TYPE_TO_NATIVE(ptrdiff_t, ptrdiff_t, NUM2UINT);
203
+ TYPE_TO_NATIVE(intptr_t, intptr_t, NUM2UINT);
204
+ TYPE_TO_NATIVE(uintptr_t, uintptr_t, NUM2UINT);
205
+ }
206
+
207
+ static VALUE
208
+ type_to_ruby(void *native_value, ID data_type)
209
+ {
210
+ TYPE_TO_RUBY(bool, char, INT2FIX);
211
+ TYPE_TO_RUBY(char, cl_char, INT2FIX);
212
+ TYPE_TO_RUBY(uchar, cl_uchar, UINT2NUM);
213
+ TYPE_TO_RUBY(short, cl_short, INT2FIX);
214
+ TYPE_TO_RUBY(ushort, cl_ushort, UINT2NUM);
215
+ TYPE_TO_RUBY(int, cl_int, INT2FIX);
216
+ TYPE_TO_RUBY(uint, cl_uint, UINT2NUM);
217
+ TYPE_TO_RUBY(long, cl_long, LONG2NUM);
218
+ TYPE_TO_RUBY(ulong, cl_ulong, ULONG2NUM);
219
+ TYPE_TO_RUBY(float, cl_float, rb_float_new);
220
+ TYPE_TO_RUBY(half, cl_half, rb_float_new);
221
+ TYPE_TO_RUBY(double, cl_double, DBL2NUM);
222
+ TYPE_TO_RUBY(size_t, size_t, UINT2NUM);
223
+ TYPE_TO_RUBY(ptrdiff_t, ptrdiff_t, UINT2NUM);
224
+ TYPE_TO_RUBY(intptr_t, intptr_t, UINT2NUM);
225
+ TYPE_TO_RUBY(uintptr_t, uintptr_t, UINT2NUM);
226
+ return Qnil;
227
+ }
228
+
229
+ static VALUE
230
+ type_initialize(VALUE self, VALUE object)
231
+ {
232
+ rb_ivar_set(self, id_object, object);
233
+ return self;
234
+ }
235
+
236
+ static VALUE
237
+ type_method_missing(VALUE self, VALUE type)
238
+ {
239
+ data_type_set(self, type);
240
+ return self;
241
+ }
242
+
243
+ static VALUE
244
+ type_object(VALUE self)
245
+ {
246
+ return rb_ivar_get(self, id_object);
247
+ }
248
+
249
+ static VALUE
250
+ object_to_type(VALUE self, VALUE type)
251
+ {
252
+ rb_ivar_set(self, id_data_type, type);
253
+ return self;
254
+ }
255
+
256
+ static VALUE
257
+ fixnum_to_type(VALUE self, VALUE type)
258
+ {
259
+ VALUE out = rb_funcall(rb_cType, rb_intern("new"), 1, self);
260
+ return type_method_missing(out, type);
261
+ }
262
+
263
+ static VALUE
264
+ type_new(VALUE klass, VALUE type)
265
+ {
266
+ return rb_funcall(rb_cType, rb_intern("new"), 1, type);
65
267
  }
66
268
 
67
269
  static void
68
270
  free_buffer(struct buffer *buffer)
69
271
  {
70
- fflush(stdout);
71
272
  clReleaseMemObject(buffer->data);
72
273
  rb_gc_mark(buffer->arr);
73
274
  ruby_xfree(buffer->cachebuf);
@@ -88,26 +289,15 @@ static void
88
289
  buffer_update_cache_info(struct buffer *buffer)
89
290
  {
90
291
  buffer->num_items = RARRAY_LEN(buffer->arr);
91
-
92
- switch (TYPE(RARRAY_PTR(buffer->arr)[0])) {
93
- case T_FIXNUM:
94
- buffer->type = BUFFER_TYPE_INT;
95
- buffer->member_size = sizeof(int);
96
- break;
97
- case T_FLOAT:
98
- buffer->type = BUFFER_TYPE_FLOAT;
99
- buffer->member_size = sizeof(float);
100
- break;
101
- default:
102
- rb_raise(rb_eRuntimeError, "invalid buffer data %s",
103
- RSTRING_PTR(rb_inspect(buffer->arr)));
104
- }
292
+ buffer->type = SYM2ID(rb_funcall(buffer->arr, id_data_type, 0));
293
+ buffer->member_size = FIX2INT(rb_hash_aref(rb_hTypes, ID2SYM(buffer->type)));
105
294
  }
106
295
 
107
296
  static VALUE
108
297
  buffer_write(VALUE self)
109
298
  {
110
- unsigned int i;
299
+ unsigned int i, index;
300
+ unsigned long data_ptr[16]; // data buffer
111
301
 
112
302
  GET_BUFFER();
113
303
 
@@ -118,22 +308,11 @@ buffer_write(VALUE self)
118
308
  }
119
309
  buffer->cachebuf = malloc(buffer->num_items * buffer->member_size);
120
310
 
121
- for (i = 0; i < RARRAY_LEN(buffer->arr); i++) {
311
+ for (i = 0, index = 0; i < RARRAY_LEN(buffer->arr); i++, index += buffer->member_size) {
122
312
  VALUE item = RARRAY_PTR(buffer->arr)[i];
123
- switch (buffer->type) {
124
- case BUFFER_TYPE_INT: {
125
- int value = FIX2INT(item);
126
- ((int *)buffer->cachebuf)[i] = value;
127
- break;
128
- }
129
- case BUFFER_TYPE_FLOAT: {
130
- float value = RFLOAT_VALUE(item);
131
- ((float *)buffer->cachebuf)[i] = value;
132
- break;
133
- }
134
- default:
135
- ((uint32_t *)buffer->cachebuf)[i] = 0;
136
- }
313
+
314
+ type_to_native(item, buffer->type, (void *)data_ptr);
315
+ memcpy(((int8_t*)buffer->cachebuf) + index, (void *)data_ptr, buffer->member_size);
137
316
  }
138
317
 
139
318
  return self;
@@ -142,24 +321,16 @@ buffer_write(VALUE self)
142
321
  static VALUE
143
322
  buffer_read(VALUE self)
144
323
  {
145
- unsigned int i;
324
+ unsigned int i, index;
146
325
 
147
326
  GET_BUFFER();
148
327
 
149
328
  rb_gc_mark(buffer->arr);
150
329
  buffer->arr = rb_ary_new2(buffer->num_items);
151
330
 
152
- for (i = 0; i < buffer->num_items; i++) {
153
- switch (buffer->type) {
154
- case BUFFER_TYPE_INT:
155
- rb_ary_push(buffer->arr, INT2FIX(((int *)buffer->cachebuf)[i]));
156
- break;
157
- case BUFFER_TYPE_FLOAT:
158
- rb_ary_push(buffer->arr, rb_float_new(((float *)buffer->cachebuf)[i]));
159
- break;
160
- default:
161
- rb_ary_push(buffer->arr, Qnil);
162
- }
331
+ for (i = 0, index = 0; i < buffer->num_items; i++, index += buffer->member_size) {
332
+ VALUE value = type_to_ruby(((int8_t*)buffer->cachebuf) + index, buffer->type);
333
+ rb_ary_push(buffer->arr, value);
163
334
  }
164
335
 
165
336
  return self;
@@ -224,25 +395,21 @@ buffer_initialize(int argc, VALUE *argv, VALUE self)
224
395
  static VALUE
225
396
  obuffer_initialize(VALUE self, VALUE type, VALUE size)
226
397
  {
398
+ VALUE type_sym, member_size;
227
399
  GET_BUFFER();
228
400
 
229
- StringValue(type);
230
- if (strcmp(RSTRING_PTR(type), "float") == 0) {
231
- buffer->type = BUFFER_TYPE_FLOAT;
232
- buffer->member_size = sizeof(float);
233
- }
234
- else if (strcmp(RSTRING_PTR(type), "int") == 0) {
235
- buffer->type = BUFFER_TYPE_INT;
236
- buffer->member_size = sizeof(int);
237
- }
238
- else {
239
- rb_raise(rb_eArgError, "type can only be :float or :int");
401
+ type_sym = rb_funcall(type, id_to_sym, 0);
402
+ member_size = rb_hash_aref(rb_hTypes, type_sym);
403
+ if (NIL_P(member_size)) {
404
+ rb_raise(rb_eArgError, "type can only be one of %s",
405
+ RSTRING_PTR(rb_inspect(rb_funcall(rb_hTypes, rb_intern("keys"), 0))));
240
406
  }
241
-
242
407
  if (TYPE(size) != T_FIXNUM) {
243
408
  rb_raise(rb_eArgError, "expecting buffer size as argument 2");
244
409
  }
245
410
 
411
+ buffer->type = SYM2ID(type_sym);
412
+ buffer->member_size = FIX2INT(member_size);
246
413
  buffer->num_items = FIX2UINT(size);
247
414
  buffer->cachebuf = malloc(buffer->num_items * buffer->member_size);
248
415
  buffer->data = clCreateBuffer(context, CL_MEM_READ_WRITE,
@@ -355,62 +522,79 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
355
522
 
356
523
  commands = clCreateCommandQueue(context, device_id, 0, &err);
357
524
  if (!commands) {
525
+ clReleaseKernel(kernel);
358
526
  rb_raise(rb_eOpenCLError, "could not execute kernel method '%s'", RSTRING_PTR(argv[0]));
359
527
  }
360
528
 
361
529
  for (i = 1; i < argc; i++) {
362
- err = 0;
363
- if (i == argc - 1 && TYPE(argv[i]) == T_HASH) {
364
- VALUE worker_size = rb_hash_aref(argv[i], ID2SYM(ba_worker_size));
530
+ VALUE item = argv[i];
531
+ err = !CL_SUCCESS;
532
+
533
+ if (i == argc - 1 && TYPE(item) == T_HASH) {
534
+ VALUE worker_size = rb_hash_aref(item, ID2SYM(id_times));
365
535
  if (RTEST(worker_size) && TYPE(worker_size) == T_FIXNUM) {
366
536
  global = FIX2UINT(worker_size);
367
537
  }
368
538
  else {
369
539
  CLEAN();
370
- rb_raise(rb_eArgError, "opts hash must be {:worker_size => INT_VALUE}, got %s",
371
- RSTRING_PTR(rb_inspect(argv[i])));
540
+ rb_raise(rb_eArgError, "opts hash must be {:times => INT_VALUE}, got %s",
541
+ RSTRING_PTR(rb_inspect(item)));
372
542
  }
373
543
  break;
374
544
  }
375
545
 
376
- switch(TYPE(argv[i])) {
377
- case T_FIXNUM: {
378
- int value = FIX2INT(argv[i]);
379
- err = clSetKernelArg(kernel, i - 1, sizeof(int), &value);
380
- break;
546
+ if (TYPE(item) == T_ARRAY) {
547
+ /* create buffer from arg */
548
+ VALUE buf = buffer_s_allocate(rb_cBuffer);
549
+ item = buffer_initialize(1, &item, buf);
550
+ }
551
+
552
+ if (CLASS_OF(item) == rb_cOutputBuffer) {
553
+ struct buffer *buffer;
554
+ Data_Get_Struct(item, struct buffer, buffer);
555
+ err = clSetKernelArg(kernel, i - 1, sizeof(cl_mem), &buffer->data);
556
+ if (buffer->num_items > global) {
557
+ global = buffer->num_items;
381
558
  }
382
- case T_FLOAT: {
383
- float value = RFLOAT_VALUE(argv[i]);
384
- err = clSetKernelArg(kernel, i - 1, sizeof(float), &value);
385
- break;
559
+ }
560
+ else if (CLASS_OF(item) == rb_cBuffer) {
561
+ struct buffer *buffer;
562
+ Data_Get_Struct(item, struct buffer, buffer);
563
+
564
+ buffer_write(item);
565
+ clEnqueueWriteBuffer(commands, buffer->data, CL_TRUE, 0,
566
+ buffer->num_items * buffer->member_size, buffer->cachebuf, 0, NULL, NULL);
567
+ err = clSetKernelArg(kernel, i - 1, sizeof(cl_mem), &buffer->data);
568
+ if (buffer->num_items > global) {
569
+ global = buffer->num_items;
386
570
  }
387
- case T_ARRAY: {
388
- /* TODO */
389
- /* fall-through */
571
+ }
572
+ else {
573
+ unsigned long data_ptr[16]; // a buffer of data
574
+ size_t data_size_t;
575
+ VALUE data_type, data_size;
576
+
577
+ if (CLASS_OF(item) == rb_cType) {
578
+ data_type = rb_funcall(type_object(item), id_data_type, 0);
579
+ }
580
+ else {
581
+ data_type = rb_funcall(item, id_data_type, 0);
390
582
  }
391
- default:
392
- if (CLASS_OF(argv[i]) == rb_cOutputBuffer) {
393
- struct buffer *buffer;
394
- Data_Get_Struct(argv[i], struct buffer, buffer);
395
- err = clSetKernelArg(kernel, i - 1, sizeof(cl_mem), &buffer->data);
396
- if (buffer->num_items > global) {
397
- global = buffer->num_items;
398
- }
399
- }
400
- else if (CLASS_OF(argv[i]) == rb_cBuffer) {
401
- struct buffer *buffer;
402
- Data_Get_Struct(argv[i], struct buffer, buffer);
403
-
404
- buffer_write(argv[i]);
405
- clEnqueueWriteBuffer(commands, buffer->data, CL_TRUE, 0,
406
- buffer->num_items * buffer->member_size, buffer->cachebuf, 0, NULL, NULL);
407
- err = clSetKernelArg(kernel, i - 1, sizeof(cl_mem), &buffer->data);
408
- }
409
- break;
583
+ data_size = rb_hash_aref(rb_hTypes, data_type);
584
+ if (NIL_P(data_size)) {
585
+ CLEAN();
586
+ rb_raise(rb_eRuntimeError, "invalid data type for %s",
587
+ RSTRING_PTR(rb_inspect(item)));
588
+ }
589
+
590
+ data_size_t = FIX2UINT(data_size);
591
+ type_to_native(item, SYM2ID(data_type), (void *)data_ptr);
592
+ err = clSetKernelArg(kernel, i - 1, data_size_t, data_ptr);
410
593
  }
594
+
411
595
  if (err != CL_SUCCESS) {
412
596
  CLEAN();
413
- rb_raise(rb_eArgError, "invalid kernel method parameter: %s", RSTRING_PTR(rb_inspect(argv[i])));
597
+ rb_raise(rb_eArgError, "invalid kernel method parameter: %s", RSTRING_PTR(rb_inspect(item)));
414
598
  }
415
599
  }
416
600
 
@@ -430,13 +614,14 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
430
614
  clFinish(commands);
431
615
 
432
616
  for (i = 1; i < argc; i++) {
433
- if (CLASS_OF(argv[i]) == rb_cOutputBuffer) {
617
+ VALUE item = argv[i];
618
+ if (CLASS_OF(item) == rb_cOutputBuffer) {
434
619
  struct buffer *buffer;
435
- Data_Get_Struct(argv[i], struct buffer, buffer);
620
+ Data_Get_Struct(item, struct buffer, buffer);
436
621
  err = clEnqueueReadBuffer(commands, buffer->data, CL_TRUE, 0,
437
622
  buffer->num_items * buffer->member_size, buffer->cachebuf, 0, NULL, NULL);
438
623
  ERROR("failed to read output buffer");
439
- buffer_read(argv[i]);
624
+ buffer_read(item);
440
625
  }
441
626
  }
442
627
 
@@ -444,12 +629,39 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
444
629
  return Qnil;
445
630
  }
446
631
 
632
+ static void
633
+ init_opencl()
634
+ {
635
+ if (device_id == NULL) {
636
+ err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
637
+ if (err != CL_SUCCESS) {
638
+ rb_raise(rb_eOpenCLError, "failed to create a device group");
639
+ }
640
+ }
641
+
642
+ if (context == NULL) {
643
+ context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
644
+ if (!context) {
645
+ rb_raise(rb_eOpenCLError, "failed to create a program context");
646
+ }
647
+ }
648
+ }
649
+
447
650
  void
448
651
  Init_barracuda()
449
652
  {
450
- ba_worker_size = rb_intern("worker_size");
653
+ id_times = rb_intern("times");
654
+ id_to_sym = rb_intern("to_sym");
655
+ id_data_type = rb_intern("data_type");
656
+ id_object = rb_intern("object");
657
+
658
+ rb_hTypes = rb_hash_new();
659
+ rb_define_method(rb_mKernel, "Type", type_new, 1);
660
+ types_hash_init();
451
661
 
452
662
  rb_mBarracuda = rb_define_module("Barracuda");
663
+ rb_define_const(rb_mBarracuda, "VERSION", rb_str_new2(VERSION_STRING));
664
+ rb_define_const(rb_mBarracuda, "TYPES", rb_hTypes);
453
665
 
454
666
  rb_eProgramSyntaxError = rb_define_class_under(rb_mBarracuda, "SyntaxError", rb_eSyntaxError);
455
667
  rb_eOpenCLError = rb_define_class_under(rb_mBarracuda, "OpenCLError", rb_eStandardError);
@@ -476,6 +688,19 @@ Init_barracuda()
476
688
  rb_undef_method(rb_cOutputBuffer, "write");
477
689
  rb_undef_method(rb_cOutputBuffer, "size_changed");
478
690
  rb_undef_method(rb_cOutputBuffer, "data=");
479
-
691
+
692
+ rb_cType = rb_define_class_under(rb_mBarracuda, "Type", rb_cObject);
693
+ rb_define_method(rb_cType, "initialize", type_initialize, 1);
694
+ rb_define_method(rb_cType, "method_missing", type_method_missing, 1);
695
+ rb_define_method(rb_cType, "object", type_object, 0);
696
+
697
+ rb_define_method(rb_cObject, "to_type", object_to_type, 1);
698
+ rb_define_method(rb_cFixnum, "to_type", fixnum_to_type, 1);
699
+ rb_define_method(rb_cObject, "data_type", object_data_type_get, 0);
700
+ rb_define_method(rb_cArray, "data_type", array_data_type_get, 0);
701
+ rb_define_method(rb_cFixnum, "data_type", fixnum_data_type_get, 0);
702
+ rb_define_method(rb_cBignum, "data_type", bignum_data_type_get, 0);
703
+ rb_define_method(rb_cFloat, "data_type", float_data_type_get, 0);
704
+
480
705
  init_opencl();
481
706
  }
@@ -5,6 +5,55 @@ require "barracuda"
5
5
 
6
6
  include Barracuda
7
7
 
8
+ class TestDataTypes < Test::Unit::TestCase
9
+ def test_default_fixnum_type
10
+ assert_equal :int, 2.data_type
11
+ end
12
+
13
+ def test_default_bignum_type
14
+ assert_equal :long, (2**64).data_type
15
+ end
16
+
17
+ def test_default_float_type
18
+ assert_equal :float, 2.5.data_type
19
+ end
20
+
21
+ def test_default_array_type
22
+ assert_equal :int, [2].data_type
23
+ assert_equal :float, [2.5, 2.6].data_type
24
+ end
25
+
26
+ def test_set_data_type_fixnum
27
+ assert_equal :char, 2.to_type(:char).data_type
28
+ assert_equal :int, 2.data_type
29
+ end
30
+
31
+ def test_set_data_type
32
+ [2**64, 2.5, [2]].each do |v|
33
+ assert_equal :char, v.to_type(:char).data_type
34
+ end
35
+ end
36
+
37
+ def test_set_invalid_data_type
38
+ assert_raise(ArgumentError) { 1.to_type(:unknown) }
39
+ end
40
+
41
+ def test_invalid_array_data_type
42
+ assert_raise(RuntimeError) { [Object.new].data_type }
43
+ assert_raise(RuntimeError) { ['x'].data_type }
44
+ assert_raise(RuntimeError) { [].data_type }
45
+ end
46
+
47
+ def test_object_data_type
48
+ assert_nil Object.new.data_type
49
+ end
50
+
51
+ def test_type_class
52
+ assert_equal :long, Type.new(1).long.data_type
53
+ assert_equal :uchar, Type(1).uchar.data_type
54
+ end
55
+ end
56
+
8
57
  class TestBuffer < Test::Unit::TestCase
9
58
  def test_buffer_create_no_data
10
59
  assert_raise(ArgumentError) { Buffer.new }
@@ -55,18 +104,14 @@ class TestBuffer < Test::Unit::TestCase
55
104
  end
56
105
 
57
106
  class TestOutputBuffer < Test::Unit::TestCase
58
- def test_create_int_output_buffer
59
- b = OutputBuffer.new(:int, 5)
60
- assert_equal 5, b.size
61
- end
62
-
63
- def test_create_int_output_buffer
64
- b = OutputBuffer.new(:float, 5)
65
- assert_equal 5, b.size
107
+ def test_create_output_buffer_valid_types
108
+ TYPES.keys.each do |type|
109
+ assert_nothing_raised { OutputBuffer.new(type.to_s, 5) }
110
+ end
66
111
  end
67
112
 
68
113
  def test_create_output_buffer_with_invalid_type
69
- assert_raise(ArgumentError) { OutputBuffer.new(:char, 5) }
114
+ assert_raise(ArgumentError) { OutputBuffer.new(:CHAR, 5) }
70
115
  end
71
116
 
72
117
  def test_create_output_buffer_with_invalid_size
@@ -98,6 +143,46 @@ class TestProgram < Test::Unit::TestCase
98
143
  assert_raise(NoMethodError) { p.not_x_y_z }
99
144
  end
100
145
 
146
+ def test_program_implicit_array_buffer
147
+ p = Program.new <<-'eof'
148
+ __kernel copy(__global int *out, __global int *in, int total) {
149
+ int i = get_global_id(0);
150
+ if (i < total) out[i] = in[i] + 1;
151
+ }
152
+ eof
153
+
154
+ out = OutputBuffer.new(:int, 3)
155
+ p.copy(out, [1, 2, 3], 3)
156
+ assert_equal [2, 3, 4], out.data
157
+ end
158
+
159
+ def test_program_types
160
+ arr = (1..5).to_a
161
+ outarr = arr.map {|x| x + 1 }
162
+ p = Program.new
163
+
164
+ TYPES.keys.each do |type|
165
+ # FIXME These types are currently broken (unimplemented in opencl?)
166
+ next if type == :bool
167
+ next if type == :double
168
+ next if type == :size_t
169
+ next if type == :ptrdiff_t
170
+ next if type == :intptr_t
171
+ next if type == :uintptr_t
172
+
173
+ p.compile <<-eof
174
+ __kernel run(__global #{type} *out, __global #{type} *in, int total) {
175
+ int id = get_global_id(0);
176
+ if (id < total) out[id] = in[id] + 1;
177
+ }
178
+ eof
179
+
180
+ out = OutputBuffer.new(type, arr.size)
181
+ p.run(out, arr.to_type(type), arr.size)
182
+ assert_equal({type => outarr}, {type => out.data})
183
+ end
184
+ end
185
+
101
186
  def test_program_int_input_buffer
102
187
  p = Program.new <<-'eof'
103
188
  __kernel run(__global int* out, __global int* in, int total) {
@@ -128,7 +213,7 @@ class TestProgram < Test::Unit::TestCase
128
213
  assert_equal arr.map {|x| x.to_f + 0.5 }, out.data
129
214
  end
130
215
 
131
- def test_program_set_worker_size
216
+ def test_program_set_times
132
217
  p = Program.new <<-'eof'
133
218
  __kernel sum(__global int* out, __global int* in, int total) {
134
219
  int id = get_global_id(0);
@@ -140,7 +225,7 @@ class TestProgram < Test::Unit::TestCase
140
225
  sum = arr.inject(0) {|acc, el| acc + el }
141
226
  _in = Buffer.new(arr)
142
227
  out = OutputBuffer.new(:int, 1)
143
- p.sum(out, _in, arr.size, :worker_size => arr.size)
228
+ p.sum(out, _in, arr.size, :times => arr.size)
144
229
  assert_equal sum, out.data[0]
145
230
  end
146
231
 
@@ -160,10 +245,10 @@ class TestProgram < Test::Unit::TestCase
160
245
  assert_equal sum, out.data[0]
161
246
  end
162
247
 
163
- def test_program_invalid_worker_size
248
+ def test_program_invalid_times
164
249
  p = Program.new("__kernel sum(int x) { }")
165
- assert_raise(ArgumentError) { p.sum(:worker_size => "hello") }
166
- assert_raise(ArgumentError) { p.sum(:worker => 1) }
250
+ assert_raise(ArgumentError) { p.sum(:times => "hello") }
251
+ assert_raise(ArgumentError) { p.sum(:time => 1) }
167
252
  end
168
253
 
169
254
  def test_program_invalid_args
@@ -171,4 +256,19 @@ class TestProgram < Test::Unit::TestCase
171
256
  assert_raise(ArgumentError) { p.sum(1, 2) }
172
257
  assert_raise(ArgumentError) { p.sum(1, OutputBuffer.new(:int, 1), 3) }
173
258
  end
259
+
260
+ def test_program_vectors
261
+ p = Program.new <<-'eof'
262
+ __kernel copy_to_out(__global float4 *out, __global float4 *vec) {
263
+ out[0].x = vec[0].x + 0.5;
264
+ out[0].y = vec[0].y + 0.5;
265
+ out[0].z = vec[0].z + 0.5;
266
+ out[0].w = vec[0].w + 0.5;
267
+ }
268
+ eof
269
+
270
+ out = OutputBuffer.new(:float, 4)
271
+ p.copy_to_out(out, [2.5, 2.5, 2.5, 2.5])
272
+ assert_equal [3, 3, 3, 3], out.data
273
+ end
174
274
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: barracuda
3
3
  version: !ruby/object:Gem::Version
4
- version: "1.0"
4
+ version: "1.1"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Loren Segal
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-30 00:00:00 -04:00
12
+ date: 2009-09-02 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -24,6 +24,8 @@ extra_rdoc_files: []
24
24
  files:
25
25
  - ext/barracuda.c
26
26
  - ext/extconf.rb
27
+ - benchmarks/normalize.rb
28
+ - benchmarks/sort.rb
27
29
  - benchmarks/to_float.rb
28
30
  - test/test_barracuda.rb
29
31
  - LICENSE