BOAST 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/BOAST.gemspec +2 -31
  3. data/README.md +240 -0
  4. data/lib/BOAST/{OpenMP.rb → Language/OpenMP.rb} +1 -0
  5. data/lib/BOAST/{Variable.rb → Language/Variable.rb} +2 -1
  6. data/lib/BOAST/Runtime/CKernel.rb +94 -0
  7. data/lib/BOAST/Runtime/CRuntime.rb +32 -0
  8. data/lib/BOAST/Runtime/CUDARuntime.rb +158 -0
  9. data/lib/BOAST/Runtime/CompiledRuntime.rb +398 -0
  10. data/lib/BOAST/Runtime/Compilers.rb +205 -0
  11. data/lib/BOAST/Runtime/Config.rb +94 -0
  12. data/lib/BOAST/Runtime/FFIRuntime.rb +104 -0
  13. data/lib/BOAST/Runtime/FORTRANRuntime.rb +45 -0
  14. data/lib/BOAST/Runtime/MPPARuntime.rb +464 -0
  15. data/lib/BOAST/Runtime/NonRegression.rb +157 -0
  16. data/lib/BOAST/Runtime/OpenCLRuntime.rb +181 -0
  17. data/lib/BOAST/Runtime/Probe.rb +136 -0
  18. data/lib/BOAST.rb +37 -26
  19. metadata +40 -28
  20. data/lib/BOAST/CKernel.rb +0 -1236
  21. /data/lib/BOAST/{Algorithm.rb → Language/Algorithm.rb} +0 -0
  22. /data/lib/BOAST/{Arithmetic.rb → Language/Arithmetic.rb} +0 -0
  23. /data/lib/BOAST/{BOAST_OpenCL.rb → Language/BOAST_OpenCL.rb} +0 -0
  24. /data/lib/BOAST/{Case.rb → Language/Case.rb} +0 -0
  25. /data/lib/BOAST/{ControlStructure.rb → Language/ControlStructure.rb} +0 -0
  26. /data/lib/BOAST/{DataTypes.rb → Language/DataTypes.rb} +0 -0
  27. /data/lib/BOAST/{Expression.rb → Language/Expression.rb} +0 -0
  28. /data/lib/BOAST/{For.rb → Language/For.rb} +0 -0
  29. /data/lib/BOAST/{FuncCall.rb → Language/FuncCall.rb} +0 -0
  30. /data/lib/BOAST/{Functors.rb → Language/Functors.rb} +0 -0
  31. /data/lib/BOAST/{If.rb → Language/If.rb} +0 -0
  32. /data/lib/BOAST/{Index.rb → Language/Index.rb} +0 -0
  33. /data/lib/BOAST/{Inspectable.rb → Language/Inspectable.rb} +0 -0
  34. /data/lib/BOAST/{Operators.rb → Language/Operators.rb} +0 -0
  35. /data/lib/BOAST/{Optimization.rb → Language/Optimization.rb} +0 -0
  36. /data/lib/BOAST/{Parens.rb → Language/Parens.rb} +0 -0
  37. /data/lib/BOAST/{Pragma.rb → Language/Pragma.rb} +0 -0
  38. /data/lib/BOAST/{Print.rb → Language/Print.rb} +0 -0
  39. /data/lib/BOAST/{Procedure.rb → Language/Procedure.rb} +0 -0
  40. /data/lib/BOAST/{Slice.rb → Language/Slice.rb} +0 -0
  41. /data/lib/BOAST/{State.rb → Language/State.rb} +0 -0
  42. /data/lib/BOAST/{Transitions.rb → Language/Transitions.rb} +0 -0
  43. /data/lib/BOAST/{While.rb → Language/While.rb} +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 78df529b137f3cd4300f47711bc4d620c098a338
4
- data.tar.gz: a0f73edc8256e5ac857a61418f57eecfd87521ee
3
+ metadata.gz: f5eb4ec74e14155954a1ecc7c3004e79088160c4
4
+ data.tar.gz: a4226683ccc72753105a61c2af6a9ddb0ce69bc3
5
5
  SHA512:
6
- metadata.gz: 5bc73c3b16e34b758c819bed3a5f85703093dd32aaa03e48d20783c55f7bab49203fe2c82c48e621fa3f1b1d2ea7873c0e141cd8ad8d4910d07f1ae60037a1be
7
- data.tar.gz: d98656618d9436cbbf75ae1661c70bfc2635b5e38c742e551a9310153afff97c7cf97588ed56c6fadb4e2ffe5fe8efe45e18c3ed781d7810fe643bc6d5b51072
6
+ metadata.gz: 7f1ac4576270947a645a2b8878f771d0c2dde7974d963b6c57a29eda1dc7a48be24003dc6df38f05e6f17fa9689ebf820634c0007495fecbe4c75bfddec8f5f0
7
+ data.tar.gz: fc98fd3ce8322e5c995167f043170ef42b61b49b14409d1c66d7cdea0a019e19cbef085012f8341f4add6b254b28e6d660094ffcf65676acdc3316b0bcc7051c
data/BOAST.gemspec CHANGED
@@ -1,41 +1,12 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'BOAST'
3
- s.version = "1.0.3"
3
+ s.version = "1.0.4"
4
4
  s.author = "Brice Videau"
5
5
  s.email = "brice.videau@imag.fr"
6
6
  s.homepage = "https://github.com/Nanosim-LIG/boast"
7
7
  s.summary = "BOAST is a computing kernel metaprogramming tool."
8
8
  s.description = "BOAST aims at providing a framework to metaprogram, benchmark and validate computing kernels"
9
- s.files = %w( BOAST.gemspec
10
- LICENSE
11
- lib/BOAST.rb
12
- lib/BOAST/Algorithm.rb
13
- lib/BOAST/CKernel.rb
14
- lib/BOAST/BOAST_OpenCL.rb
15
- lib/BOAST/Transitions.rb
16
- lib/BOAST/Parens.rb
17
- lib/BOAST/Operators.rb
18
- lib/BOAST/DataTypes.rb
19
- lib/BOAST/Arithmetic.rb
20
- lib/BOAST/Expression.rb
21
- lib/BOAST/Index.rb
22
- lib/BOAST/Variable.rb
23
- lib/BOAST/Procedure.rb
24
- lib/BOAST/If.rb
25
- lib/BOAST/For.rb
26
- lib/BOAST/Case.rb
27
- lib/BOAST/While.rb
28
- lib/BOAST/FuncCall.rb
29
- lib/BOAST/Pragma.rb
30
- lib/BOAST/Inspectable.rb
31
- lib/BOAST/Functors.rb
32
- lib/BOAST/ControlStructure.rb
33
- lib/BOAST/Print.rb
34
- lib/BOAST/State.rb
35
- lib/BOAST/Optimization.rb
36
- lib/BOAST/OpenMP.rb
37
- lib/BOAST/Slice.rb
38
- )
9
+ s.files = Dir['BOAST.gemspec', 'LICENSE', 'README.md', 'lib/**/*']
39
10
  s.has_rdoc = true
40
11
  s.license = 'BSD'
41
12
  s.required_ruby_version = '>= 1.9.3'
data/README.md ADDED
@@ -0,0 +1,240 @@
1
+ BOAST
2
+ =====
3
+
4
+ This section will present some simple examples to familiarize the user
5
+ with BOAST. More samples can be found in the git repository.
6
+
7
+ Installation
8
+ ------------
9
+
10
+ BOAST is ruby based, so ruby needs to be installed on the machine.
11
+ Installation of boast can be done using the ruby built-in package
12
+ manager: *gem*. See following Listing for reference.
13
+
14
+ ```bash
15
+ sudo apt-get install ruby ruby-dev
16
+ gem install --user-install BOAST
17
+ ```
18
+
19
+ Variable and Procedure Declaration
20
+ ----------------------------------
21
+
22
+ The following samples are presented using *irb* ruby interactive interpreter.
23
+ It can be launched using the *irb* command in a terminal. Following
24
+ Listing shows the declaration of two variables of different kind.
25
+
26
+ irb(main):001:0> require 'BOAST'
27
+ => true
28
+ irb(main):002:0> a = BOAST::Int "a"
29
+ => a
30
+ irb(main):003:0> b = BOAST::Real "b"
31
+ => b
32
+ irb(main):004:0> BOAST::decl a, b
33
+ integer(kind=4) :: a
34
+ real(kind=8) :: b
35
+ => [a, b]
36
+
37
+ Following Listing shows the declaration of a procedure using the two previous
38
+ variables as parameters. For clarity irb echoes have been suppressed.
39
+
40
+ 005:0> p = BOAST::Procedure( "test_proc", [a,b] )
41
+ 006:0> BOAST::opn p
42
+ SUBROUTINE test_proc(a, b)
43
+ integer, parameter :: wp=kind(1.0d0)
44
+ integer(kind=4) :: a
45
+ real(kind=8) :: b
46
+ 007:0> BOAST::close p
47
+ END SUBROUTINE test_proc
48
+
49
+ Switching Language
50
+ ------------------
51
+
52
+ Following Listing shows how to switch BOAST to C. Available languages are
53
+ *FORTRAN*, *C*, *CUDA* and *CL*.
54
+
55
+ 008:0> BOAST::lang = BOAST::C
56
+ 009:0> BOAST::opn p
57
+ void test_proc(int32_t a, double b){
58
+ 010:0> BOAST::close p
59
+ }
60
+
61
+ Defining a Complete Procedure
62
+ -----------------------------
63
+
64
+ Following Listing shows how to define a procedure and the associated code. Note
65
+ that here the parameters of the procedure have been associated a direction:
66
+ one, *a*, is an input parameter while the other, *b*, is an output parameter.
67
+
68
+ 011:0> BOAST::lang = BOAST::FORTRAN
69
+ 012:0> a = BOAST::Real( "a", :dir => :in)
70
+ 013:0> b = BOAST::Real( "b", :dir => :out)
71
+ 014:0> p = BOAST::Procedure( "plus_two", [a,b] ) {
72
+ 015:1* BOAST::pr b === a + 2
73
+ 016:1> }
74
+ 017:0> BOAST::pr p
75
+ SUBROUTINE plus_two(a, b)
76
+ integer, parameter :: wp=kind(1.0d0)
77
+ real(kind=8), intent(in) :: a
78
+ real(kind=8), intent(out) :: b
79
+ b = a + 2
80
+ END SUBROUTINE plus_two
81
+ 018:0> BOAST::lang = BOAST::C
82
+ 019:0> BOAST::pr p
83
+ void plus_two(const double a, double * b){
84
+ (*b) = a + 2;
85
+ }
86
+
87
+ Creating, Building and Running a Computing Kernel
88
+ -------------------------------------------------
89
+
90
+ Following Listing shows how to create a Computing kernel (*CKernel*) and build
91
+ it. Once a computing kernel is instantiated the output of BOAST will be
92
+ redirected to the computing kernel source code. Line 4 sets the entry point of
93
+ the computing kernel to the procedure we just defined. By default compilation
94
+ commands are not shown unless an error occurs. This behavior can be changed by
95
+ switching to verbose mode.
96
+
97
+ When running the kernel all the arguments have to be specified. Running
98
+ a kernel returns a hash table containing information about the procedure
99
+ execution. In this simple case two informations are returned, first the
100
+ value of the output parameter *b* and second the time the kernel
101
+ execution took.
102
+
103
+ 020:0> BOAST::lang = BOAST::FORTRAN
104
+ 021:0> k = BOAST::CKernel::new
105
+ 022:0> BOAST::pr p
106
+ 023:0> k.procedure = p
107
+ 024:0> puts k
108
+ SUBROUTINE plus_two(a, b)
109
+ integer, parameter :: wp=kind(1.0d0)
110
+ real(kind=8), intent(in) :: a
111
+ real(kind=8), intent(out) :: b
112
+ b = a + 2
113
+ END SUBROUTINE plus_two
114
+ 025:0> k.build
115
+ 026:0> BOAST::verbose = true
116
+ 027:0> k.build
117
+ gcc -O2 -Wall -fPIC -I/usr/lib/x86_64-linux-gnu/ruby/2.1.0 -I/usr/include/ruby-2.1.0 -I/usr/include/ruby-2.1.0/x86_64-linux-gnu -I/usr/include/x86_64-linux-gnu/ruby-2.1.0 -I/var/lib/gems/2.1.0/gems/narray-0.6.1.1 -DHAVE_NARRAY_H -c -o /tmp/Mod_plus_two20150309_4611_5a129k.o /tmp/Mod_plus_two20150309_4611_5a129k.c
118
+ gfortran -O2 -Wall -fPIC -c -o /tmp/plus_two20150309-4611-5a129k.o /tmp/plus_two20150309-4611-5a129k.f90
119
+ gcc -shared -o /tmp/Mod_plus_two20150309_4611_5a129k.so /tmp/Mod_plus_two20150309_4611_5a129k.o /tmp/plus_two20150309-4611-5a129k.o -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic -L/usr/lib -lruby-2.1 -lrt
120
+ 028:0> r = k.run(5,0)
121
+ 029:0> puts r
122
+ {:reference_return=>{:b=>7.0}, :duration=>5.84e-07}
123
+
124
+ Using Arrays in Procedures
125
+ --------------------------
126
+
127
+ Most computing kernels don't work on scalar values but rather on arrays
128
+ of data. Following Listing shows how to use arrays in computing
129
+ kernels. In this case we place ourselves in BOAST namespace to reduce
130
+ the syntax overhead. Variables *a* and *b* are one-dimensional arrays of
131
+ size *n*. Arrays in BOAST start at index 1 unless specified otherwise.
132
+ For instance `Dim(0,n-1)` would have created a dimension starting at 0.
133
+ Array bounds can also be negative and several dimensions can be
134
+ specified to obtain muti-dimensional arrays. For self contained
135
+ procedures/kernels one can use the shortcut written on line 13 to create
136
+ a CKernel object. As we are not specifying build options the build
137
+ command can also be omitted and will be automatically called when
138
+ running the kernel the first time. Lines 17 to 19 are used to check the
139
+ result of the kernel.
140
+
141
+ 001:0> require 'BOAST'
142
+ 002:0> require 'narray'
143
+ 003:0> include BOAST
144
+ 004:0> n = Int( "n", :dir => :in )
145
+ 005:0> a = Real( "a", :dir => :in, :dim => [Dim(n)] )
146
+ 006:0> b = Real( "b", :dir => :out, :dim => [Dim(n)] )
147
+ 007:0> p = Procedure( "plus_two", [n, a, b] ) {
148
+ 008:1* decl i = Int( "i" )
149
+ 009:1> pr For( i, 1, n ) {
150
+ 010:2* pr b[i] === a[i] + 2.0
151
+ 011:2> }
152
+ 012:1> }
153
+ 013:0> k = p.ckernel
154
+ 014:0> input = NArray.float(1024).random
155
+ 015:0> output = NArray.float(1024)
156
+ 016:0> k.run(input.length, input, output)
157
+ 017:0> (output - input).each { |val|
158
+ 018:1* raise "Error!" if (val-2).abs > 1e-15
159
+ 019:1> }
160
+ 020:0> stats = k.run(input.length, input, output)
161
+ 021:0> puts "Success, duration: #{stats[:duration]} s"
162
+ Success, duration: 3.79e-06 s
163
+
164
+ The Canonical Case: Vector Addition
165
+ -----------------------------------
166
+
167
+ Following Listing shows the addition of two vectors in a third one. Here BOAST
168
+ is configured to have arrays starting at 0 and to use single precision reals by
169
+ default (Lines 5 and 6). The kernel declaration is encapsulated inside a method
170
+ to avoid cluttering the global namespace. Line 15 the expression `c[i] === a[i]+ b[i]`
171
+ is stored inside a variable *expr* for later use. Lines 16 to 23 show
172
+ that the kernel differs depending on the target language, in CUDA and OpenCL
173
+ each thread will process one element.
174
+
175
+ ```ruby
176
+ require 'narray'
177
+ require 'BOAST'
178
+ include BOAST
179
+
180
+ set_array_start(0)
181
+ set_default_real_size(4)
182
+
183
+ def vector_add
184
+ n = Int("n",:dir => :in)
185
+ a = Real("a",:dir => :in, :dim => [ Dim(n)] )
186
+ b = Real("b",:dir => :in, :dim => [ Dim(n)] )
187
+ c = Real("c",:dir => :out, :dim => [ Dim(n)] )
188
+ p = Procedure("vector_add", [n,a,b,c]) {
189
+ decl i = Int("i")
190
+ expr = c[i] === a[i] + b[i]
191
+ if (get_lang == CL or get_lang == CUDA) then
192
+ pr i === get_global_id(0)
193
+ pr expr
194
+ else
195
+ pr For(i,0,n-1) {
196
+ pr expr
197
+ }
198
+ end
199
+ }
200
+ return p.ckernel
201
+ end
202
+ ```
203
+
204
+ Following Listing shows the a way to check the validity of the previous kernel
205
+ over the available range of languages. The options that are passed to run are
206
+ only relevant for GPU languages and are thus ignored in FORTRAN and C
207
+ (Line 16). Success is only printed if results are validated, else an exception
208
+ is raised (Lines 17 to 20).
209
+
210
+ ```ruby
211
+ n = 1024*1024
212
+ a = NArray.sfloat(n).random
213
+ b = NArray.sfloat(n).random
214
+ c = NArray.sfloat(n)
215
+
216
+ epsilon = 10e-15
217
+
218
+ c_ref = a + b
219
+
220
+ [:FORTRAN, :C, :CL, :CUDA].each { |l|
221
+ set_lang( BOAST.const_get(l) )
222
+ puts "#{l}:"
223
+ k = vector_add
224
+ puts k.print
225
+ c.random!
226
+ k.run(n, a, b, c, :global_work_size => [n,1,1], :local_work_size => [32,1,1])
227
+ diff = (c_ref - c).abs
228
+ diff.each { |elem|
229
+ raise "Warning: residue too big: #{elem}" if elem > epsilon
230
+ }
231
+ }
232
+ puts "Success!"
233
+ ```
234
+
235
+ Acknowledgment
236
+ --------------
237
+
238
+ The research leading to these results has received funding from the
239
+ European Community's Seventh Framework Programme [FP7/2007-2013] under
240
+ grant agreement n° 288777 and 610402.
@@ -70,6 +70,7 @@ EOF
70
70
  register_clause(:flush_list, :option_list)
71
71
  register_clause(:threadprivate_list, :option_list)
72
72
  register_clause(:if, :simple)
73
+ register_clause(:final, :simple)
73
74
  register_clause(:num_threads, :simple)
74
75
  register_clause(:default, :simple)
75
76
  register_clause(:collapse, :simple)
@@ -12,7 +12,8 @@ module BOAST
12
12
  def initialize(v1=nil,v2=nil)
13
13
  if v1 then
14
14
  if v2 then
15
- @size = Expression::new(Substraction, v2, v1) + 1
15
+ #@size = Expression::new(Substraction, v2, v1) + 1
16
+ @size = v2-v1+1
16
17
  else
17
18
  @size = v1
18
19
  end
@@ -0,0 +1,94 @@
1
+ require 'stringio'
2
+ require 'rake'
3
+ require 'tempfile'
4
+ require 'rbconfig'
5
+ require 'systemu'
6
+ require 'yaml'
7
+ require 'pathname'
8
+ require 'os'
9
+
10
+ module BOAST
11
+
12
+ class CKernel
13
+ include Compilers
14
+ include Rake::DSL
15
+ include Inspectable
16
+ include PrivateStateAccessor
17
+ include TypeTransition
18
+
19
+ attr_accessor :code
20
+ attr_accessor :procedure
21
+ attr_accessor :lang
22
+ attr_accessor :binary
23
+ attr_accessor :kernels
24
+ attr_accessor :cost_function
25
+
26
+ def initialize(options={})
27
+ if options[:code] then
28
+ @code = options[:code]
29
+ elsif get_chain_code
30
+ @code = get_output
31
+ @code.seek(0,SEEK_END)
32
+ else
33
+ @code = StringIO::new
34
+ end
35
+ set_output(@code)
36
+ if options[:kernels] then
37
+ @kernels = options[:kernels]
38
+ else
39
+ @kernels = []
40
+ end
41
+ if options[:lang] then
42
+ @lang = options[:lang]
43
+ else
44
+ @lang = get_lang
45
+ end
46
+ if options[:architecture] then
47
+ @architecture = options[:architecture]
48
+ else
49
+ @architecture = get_architecture
50
+ end
51
+ @probes = [TimerProbe, PAPIProbe]
52
+
53
+ case @lang
54
+ when CL
55
+ extend OpenCLRuntime
56
+ when CUDA
57
+ extend CUDARuntime
58
+ when FORTRAN
59
+ extend FORTRANRuntime
60
+ extend FFIRuntime if ffi?
61
+ else
62
+ if @architecture == MPPA then
63
+ extend MPPARuntime
64
+ else
65
+ extend CRuntime
66
+ extend FFIRuntime if ffi?
67
+ end
68
+ end
69
+ end
70
+
71
+ def print
72
+ @code.rewind
73
+ puts @code.read
74
+ end
75
+
76
+ def to_s
77
+ @code.rewind
78
+ return code.read
79
+ end
80
+
81
+ def method_missing(meth, *args, &block)
82
+ if meth.to_s == "run" then
83
+ build
84
+ run(*args,&block)
85
+ else
86
+ super
87
+ end
88
+ end
89
+
90
+ def cost(*args)
91
+ @cost_function.call(*args)
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,32 @@
1
+ module BOAST
2
+
3
+ module CRuntime
4
+ include CompiledRuntime
5
+
6
+ def fill_library_header
7
+ get_output.puts "#include <inttypes.h>"
8
+ end
9
+
10
+ def fill_library_source
11
+ fill_library_header
12
+ @code.rewind
13
+ get_output.write @code.read
14
+ end
15
+
16
+ def create_procedure_call_parameters
17
+ params = []
18
+ @procedure.parameters.each { |param|
19
+ if param.dimension then
20
+ params.push( param.name )
21
+ elsif param.direction == :out or param.direction == :inout then
22
+ params.push( "&"+param.name )
23
+ else
24
+ params.push( param.name )
25
+ end
26
+ }
27
+ return params
28
+ end
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,158 @@
1
+ module BOAST
2
+
3
+ module CUDARuntime
4
+ include CRuntime
5
+
6
+ alias fill_library_source_old fill_library_source
7
+ alias fill_library_header_old fill_library_header
8
+ alias fill_module_header_old fill_module_header
9
+ alias get_params_value_old get_params_value
10
+ alias fill_decl_module_params_old fill_decl_module_params
11
+ alias create_procedure_call_parameters_old create_procedure_call_parameters
12
+
13
+ def fill_module_header
14
+ fill_module_header_old
15
+ get_output.puts "#include <cuda_runtime.h>"
16
+ end
17
+
18
+ def fill_library_header
19
+ fill_library_header_old
20
+ get_output.puts "#include <cuda.h>"
21
+ end
22
+
23
+ def fill_library_source
24
+ fill_library_source_old
25
+ get_output.write <<EOF
26
+ extern "C" {
27
+ #{@procedure.boast_header_s(CUDA)}{
28
+ dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
29
+ dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
30
+ cudaEvent_t __start, __stop;
31
+ float __time;
32
+ cudaEventCreate(&__start);
33
+ cudaEventCreate(&__stop);
34
+ cudaEventRecord(__start, 0);
35
+ #{@procedure.name}<<<dimGrid,dimBlock>>>(#{@procedure.parameters.join(", ")});
36
+ cudaEventRecord(__stop, 0);
37
+ cudaEventSynchronize(__stop);
38
+ cudaEventElapsedTime(&__time, __start, __stop);
39
+ return (unsigned long long int)((double)__time*(double)1e6);
40
+ }
41
+ }
42
+ EOF
43
+ end
44
+
45
+ def copy_array_param_from_ruby( param, ruby_param )
46
+ rb_ptr = Variable::new("_boast_rb_ptr", CustomType, :type_name => "VALUE")
47
+ (rb_ptr === ruby_param).pr
48
+ get_output.print <<EOF
49
+ if ( IsNArray(_boast_rb_ptr) ) {
50
+ struct NARRAY *_boast_n_ary;
51
+ size_t _boast_array_size;
52
+ Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
53
+ _boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
54
+ cudaMalloc( (void **) &#{param}, _boast_array_size);
55
+ cudaMemcpy(#{param}, (void *) _boast_n_ary->ptr, _boast_array_size, cudaMemcpyHostToDevice);
56
+ } else {
57
+ rb_raise(rb_eArgError, "Wrong type of argument for %s, expecting array!", "#{param}");
58
+ }
59
+ EOF
60
+ end
61
+
62
+ def fill_decl_module_params
63
+ fill_decl_module_params_old
64
+ get_output.print <<EOF
65
+ size_t _boast_block_size[3] = {1,1,1};
66
+ size_t _boast_block_number[3] = {1,1,1};
67
+ EOF
68
+ end
69
+
70
+ def get_params_value
71
+ get_params_value_old
72
+ get_output.print <<EOF
73
+ if( _boast_rb_opts != Qnil ) {
74
+ VALUE _boast_rb_array_data = Qnil;
75
+ int _boast_i;
76
+ _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_size")));
77
+ if( _boast_rb_ptr != Qnil ) {
78
+ if (TYPE(_boast_rb_ptr) != T_ARRAY)
79
+ rb_raise(rb_eArgError, "Cuda option block_size should be an array");
80
+ for(_boast_i=0; _boast_i<3; _boast_i++) {
81
+ _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
82
+ if( _boast_rb_array_data != Qnil )
83
+ _boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
84
+ }
85
+ } else {
86
+ _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("local_work_size")));
87
+ if( _boast_rb_ptr != Qnil ) {
88
+ if (TYPE(_boast_rb_ptr) != T_ARRAY)
89
+ rb_raise(rb_eArgError, "Cuda option local_work_size should be an array");
90
+ for(_boast_i=0; _boast_i<3; _boast_i++) {
91
+ _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
92
+ if( _boast_rb_array_data != Qnil )
93
+ _boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
94
+ }
95
+ }
96
+ }
97
+ _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_number")));
98
+ if( _boast_rb_ptr != Qnil ) {
99
+ if (TYPE(_boast_rb_ptr) != T_ARRAY)
100
+ rb_raise(rb_eArgError, "Cuda option block_number should be an array");
101
+ for(_boast_i=0; _boast_i<3; _boast_i++) {
102
+ _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
103
+ if( _boast_rb_array_data != Qnil )
104
+ _boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
105
+ }
106
+ } else {
107
+ _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("global_work_size")));
108
+ if( _boast_rb_ptr != Qnil ) {
109
+ if (TYPE(_boast_rb_ptr) != T_ARRAY)
110
+ rb_raise(rb_eArgError, "Cuda option global_work_size should be an array");
111
+ for(_boast_i=0; _boast_i<3; _boast_i++) {
112
+ _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
113
+ if( _boast_rb_array_data != Qnil )
114
+ _boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data ) / _boast_block_size[_boast_i];
115
+ }
116
+ }
117
+ }
118
+ }
119
+ EOF
120
+ end
121
+
122
+ def create_procedure_call_parameters
123
+ return create_procedure_call_parameters_old + ["_boast_block_number", "_boast_block_size"]
124
+ end
125
+
126
+ def create_procedure_call
127
+ get_output.print " #{TimerProbe::RESULT} = "
128
+ get_output.print " #{method_name}_wrapper( "
129
+ get_output.print create_procedure_call_parameters.join(", ")
130
+ get_output.puts " );"
131
+ end
132
+
133
+ def copy_array_param_to_ruby(param, ruby_param)
134
+ rb_ptr = Variable::new("_boast_rb_ptr", CustomType, :type_name => "VALUE")
135
+ (rb_ptr === ruby_param).pr
136
+ get_output.print <<EOF
137
+ if ( IsNArray(_boast_rb_ptr) ) {
138
+ EOF
139
+ if param.direction == :out or param.direction == :inout then
140
+ get_output.print <<EOF
141
+ struct NARRAY *_boast_n_ary;
142
+ size_t _boast_array_size;
143
+ Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
144
+ _boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
145
+ cudaMemcpy((void *) _boast_n_ary->ptr, #{param}, _boast_array_size, cudaMemcpyDeviceToHost);
146
+ EOF
147
+ end
148
+ get_output.print <<EOF
149
+ cudaFree( (void *) #{param});
150
+ } else {
151
+ rb_raise(rb_eArgError, "Wrong type of argument for %s, expecting array!", "#{param}");
152
+ }
153
+ EOF
154
+ end
155
+
156
+ end
157
+
158
+ end