barracuda 1.1 → 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -58,16 +58,13 @@ EXAMPLE
58
58
  Consider the following example to sum a bunch of integers:
59
59
 
60
60
  program = Program.new <<-'eof'
61
- __kernel sum(__global int *out, __global int *in, int total) {
62
- int id = get_global_id(0);
63
- if (id < total) atom_add(&out[0], in[id]);
61
+ __kernel sum(__global int *in, __global int *out) {
62
+ atom_add(out, in[get_global_id(0)]);
64
63
  }
65
64
  eof
66
65
 
67
- arr = (1..65536).to_a
68
- input = Buffer.new(arr)
69
66
  output = OutputBuffer.new(:int, 1)
70
- program.sum(output, input, arr.size)
67
+ program.sum((1..65536).to_a, output)
71
68
 
72
69
  puts "The sum is: " + output.data[0].to_s
73
70
 
@@ -86,12 +83,6 @@ manually specify the work group size, call the kernel with an options hash:
86
83
 
87
84
  program.my_kernel_method(..., :times => 512)
88
85
 
89
- Note that the work group size must be a power of 2. Barracuda will increase
90
- the work group size to the next power of 2 if it needs to. This means your
91
- OpenCL program might run more iterations of your kernel method than you
92
- request. Because we can't rely on the work group size, we pass in the total
93
- data size to ensure we do not exceed the bounds of our data.
94
-
95
86
  CONVERTING TYPES
96
87
  ----------------
97
88
 
@@ -31,12 +31,11 @@ prog = Program.new <<-'eof'
31
31
  }
32
32
  eof
33
33
 
34
- num_vecs = 100000
34
+ num_vecs = 1000000
35
35
  arr = []
36
36
  num_vecs.times { arr.push(rand, rand, rand, 0.0) }
37
37
  output = OutputBuffer.new(:float, arr.size)
38
38
 
39
-
40
39
  Benchmark.bmbm do |x|
41
40
  x.report("cpu") { norm_all(arr) }
42
41
  x.report("gpu") { prog.norm(output, arr, num_vecs) }
@@ -9,7 +9,6 @@ prog = Program.new <<-'eof'
9
9
  __kernel sort(__global int *out, __global int *in, int total) {
10
10
  int i, final_index = 0, extra = 0;
11
11
  int id = get_global_id(0);
12
- if (id >= total) return;
13
12
  int my_value = in[id];
14
13
  for (i = 0; i < total; i++) {
15
14
  if (in[i] < my_value) final_index++;
@@ -19,7 +18,7 @@ prog = Program.new <<-'eof'
19
18
  }
20
19
  eof
21
20
 
22
- max = 1000
21
+ max = 10000
23
22
  arr = (1..max).map { (rand * max).to_i }
24
23
  output = OutputBuffer.new(:int, arr.size)
25
24
 
@@ -6,9 +6,9 @@ require 'benchmark'
6
6
  include Barracuda
7
7
 
8
8
  prog = Program.new <<-'eof'
9
- __kernel sum(__global float *out, __global int *in, int total) {
9
+ __kernel sum(__global float *out, __global int *in) {
10
10
  int i = get_global_id(0);
11
- if (i < total) out[i] = ((float)in[i] + 0.5) / 3.8 + 2.0;
11
+ out[i] = ((float)in[i] + 0.5) / 3.8 + 2.0;
12
12
  }
13
13
  eof
14
14
 
@@ -16,9 +16,8 @@ arr = (1..3333333).to_a
16
16
  input = Buffer.new(arr)
17
17
  output = OutputBuffer.new(:float, arr.size)
18
18
 
19
- TIMES = 1
20
19
  Benchmark.bmbm do |x|
21
- x.report("regular") { TIMES.times { arr.map {|x| (x.to_f + 0.5) / 3.8 + 2.0 } } }
22
- x.report("opencl") { TIMES.times { prog.sum(output, input, arr.size); output.clear } }
20
+ x.report("regular") { arr.map {|x| (x.to_f + 0.5) / 3.8 + 2.0 } }
21
+ x.report("opencl") { prog.sum(output, input) }
23
22
  end
24
23
 
@@ -1,4 +1,5 @@
1
1
  #include <ruby.h>
2
+ #include <math.h>
2
3
  #include <OpenCL/OpenCL.h>
3
4
 
4
5
  static VALUE rb_mBarracuda;
@@ -38,18 +39,15 @@ static VALUE buffer_data_set(VALUE self, VALUE new_value);
38
39
 
39
40
  static cl_device_id device_id = NULL;
40
41
  static cl_context context = NULL;
42
+ static size_t max_work_group_size = 65535;
41
43
  static int err;
42
44
 
43
- #define VERSION_STRING "1.1"
45
+ #define VERSION_STRING "1.2"
44
46
 
45
47
  struct program {
46
48
  cl_program program;
47
49
  };
48
50
 
49
- struct kernel {
50
- cl_kernel kernel;
51
- };
52
-
53
51
  struct buffer {
54
52
  VALUE arr;
55
53
  ID type;
@@ -376,8 +374,6 @@ buffer_data_set(VALUE self, VALUE new_value)
376
374
  static VALUE
377
375
  buffer_initialize(int argc, VALUE *argv, VALUE self)
378
376
  {
379
- GET_BUFFER();
380
-
381
377
  if (argc == 0) {
382
378
  rb_raise(rb_eArgError, "no buffer data given");
383
379
  }
@@ -495,21 +491,13 @@ program_compile(VALUE self, VALUE source)
495
491
  return Qtrue;
496
492
  }
497
493
 
498
- #define CLEAN() program_clean(kernel, commands);
499
- #define ERROR(msg) if (err != CL_SUCCESS) { CLEAN(); rb_raise(rb_eOpenCLError, msg); }
500
-
501
- static void
502
- program_clean(cl_kernel kernel, cl_command_queue commands)
503
- {
504
- clReleaseKernel(kernel);
505
- clReleaseCommandQueue(commands);
506
- }
494
+ #define CLEAN() { clReleaseKernel(kernel); clReleaseCommandQueue(commands); }
507
495
 
508
496
  static VALUE
509
497
  program_method_missing(int argc, VALUE *argv, VALUE self)
510
498
  {
511
499
  int i;
512
- size_t local = 0, global = 0;
500
+ size_t global[3] = {1, 1, 1}, local;
513
501
  cl_kernel kernel;
514
502
  cl_command_queue commands;
515
503
  GET_PROGRAM();
@@ -533,7 +521,7 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
533
521
  if (i == argc - 1 && TYPE(item) == T_HASH) {
534
522
  VALUE worker_size = rb_hash_aref(item, ID2SYM(id_times));
535
523
  if (RTEST(worker_size) && TYPE(worker_size) == T_FIXNUM) {
536
- global = FIX2UINT(worker_size);
524
+ global[0] = FIX2UINT(worker_size);
537
525
  }
538
526
  else {
539
527
  CLEAN();
@@ -553,8 +541,8 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
553
541
  struct buffer *buffer;
554
542
  Data_Get_Struct(item, struct buffer, buffer);
555
543
  err = clSetKernelArg(kernel, i - 1, sizeof(cl_mem), &buffer->data);
556
- if (buffer->num_items > global) {
557
- global = buffer->num_items;
544
+ if (buffer->num_items > global[0]) {
545
+ global[0] = buffer->num_items;
558
546
  }
559
547
  }
560
548
  else if (CLASS_OF(item) == rb_cBuffer) {
@@ -565,8 +553,8 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
565
553
  clEnqueueWriteBuffer(commands, buffer->data, CL_TRUE, 0,
566
554
  buffer->num_items * buffer->member_size, buffer->cachebuf, 0, NULL, NULL);
567
555
  err = clSetKernelArg(kernel, i - 1, sizeof(cl_mem), &buffer->data);
568
- if (buffer->num_items > global) {
569
- global = buffer->num_items;
556
+ if (buffer->num_items > global[0]) {
557
+ global[0] = buffer->num_items;
570
558
  }
571
559
  }
572
560
  else {
@@ -599,17 +587,16 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
599
587
  }
600
588
 
601
589
  err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &local, NULL);
602
- ERROR("failed to retrieve kernel work group info");
603
-
604
- { /* global work size must be power of 2, greater than 3 and not smaller than local */
605
- size_t size = 4;
606
- while (size < global) size *= 2;
607
- global = size;
608
- if (global < local) global = local;
590
+ err = clEnqueueNDRangeKernel(commands, kernel, 3, NULL, global, NULL, 0, NULL, NULL);
591
+ if (err != CL_SUCCESS) {
592
+ CLEAN();
593
+ if (err == CL_INVALID_KERNEL_ARGS) {
594
+ rb_raise(rb_eArgError, "invalid arguments");
595
+ }
596
+ else {
597
+ rb_raise(rb_eOpenCLError, "failed to execute kernel method %d", err);
598
+ }
609
599
  }
610
-
611
- clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
612
- if (err) { CLEAN(); rb_raise(rb_eOpenCLError, "failed to execute kernel method"); }
613
600
 
614
601
  clFinish(commands);
615
602
 
@@ -620,7 +607,10 @@ program_method_missing(int argc, VALUE *argv, VALUE self)
620
607
  Data_Get_Struct(item, struct buffer, buffer);
621
608
  err = clEnqueueReadBuffer(commands, buffer->data, CL_TRUE, 0,
622
609
  buffer->num_items * buffer->member_size, buffer->cachebuf, 0, NULL, NULL);
623
- ERROR("failed to read output buffer");
610
+ if (err != CL_SUCCESS) {
611
+ CLEAN();
612
+ rb_raise(rb_eOpenCLError, "failed to read output buffer");
613
+ }
624
614
  buffer_read(item);
625
615
  }
626
616
  }
@@ -645,6 +635,10 @@ init_opencl()
645
635
  rb_raise(rb_eOpenCLError, "failed to create a program context");
646
636
  }
647
637
  }
638
+
639
+ clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE,
640
+ sizeof(size_t), &max_work_group_size, NULL);
641
+ max_work_group_size = 4096;
648
642
  }
649
643
 
650
644
  void
@@ -135,7 +135,7 @@ class TestProgram < Test::Unit::TestCase
135
135
 
136
136
  def test_kernel_run
137
137
  p = Program.new("__kernel x_y_z(int x) { }")
138
- assert_nothing_raised { p.x_y_z }
138
+ assert_raise(ArgumentError) { p.x_y_z }
139
139
  end
140
140
 
141
141
  def test_kernel_missing
@@ -217,7 +217,7 @@ class TestProgram < Test::Unit::TestCase
217
217
  p = Program.new <<-'eof'
218
218
  __kernel sum(__global int* out, __global int* in, int total) {
219
219
  int id = get_global_id(0);
220
- if (id < total) atom_add(&out[0], in[id]);
220
+ if (id < total) atom_add(out, in[id]);
221
221
  }
222
222
  eof
223
223
 
@@ -233,7 +233,7 @@ class TestProgram < Test::Unit::TestCase
233
233
  p = Program.new <<-'eof'
234
234
  __kernel sum(__global int* out, __global int* in, int total) {
235
235
  int id = get_global_id(0);
236
- if (id < total) atom_add(&out[0], in[id]);
236
+ if (id < total) atom_add(out, in[id]);
237
237
  }
238
238
  eof
239
239
 
@@ -271,4 +271,21 @@ class TestProgram < Test::Unit::TestCase
271
271
  p.copy_to_out(out, [2.5, 2.5, 2.5, 2.5])
272
272
  assert_equal [3, 3, 3, 3], out.data
273
273
  end
274
+
275
+ def test_program_no_total
276
+ p = Program.new <<-'eof'
277
+ __kernel copy(__global int *out, __global int *in) {
278
+ int i = get_global_id(0);
279
+ out[i] = in[i] + 1;
280
+ }
281
+ eof
282
+
283
+ out = OutputBuffer.new(:int, 3)
284
+ p.copy(out, (1..3).to_a)
285
+ assert_equal (2..4).to_a, out.data
286
+
287
+ out = OutputBuffer.new(:int, 50446)
288
+ p.copy(out, (1..50446).to_a)
289
+ assert_equal (2..50447).to_a, out.data
290
+ end
274
291
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: barracuda
3
3
  version: !ruby/object:Gem::Version
4
- version: "1.1"
4
+ version: "1.2"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Loren Segal
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-02 00:00:00 -04:00
12
+ date: 2009-09-03 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies: []
15
15