BOAST 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/BOAST.gemspec +2 -31
  3. data/README.md +240 -0
  4. data/lib/BOAST/{OpenMP.rb → Language/OpenMP.rb} +1 -0
  5. data/lib/BOAST/{Variable.rb → Language/Variable.rb} +2 -1
  6. data/lib/BOAST/Runtime/CKernel.rb +94 -0
  7. data/lib/BOAST/Runtime/CRuntime.rb +32 -0
  8. data/lib/BOAST/Runtime/CUDARuntime.rb +158 -0
  9. data/lib/BOAST/Runtime/CompiledRuntime.rb +398 -0
  10. data/lib/BOAST/Runtime/Compilers.rb +205 -0
  11. data/lib/BOAST/Runtime/Config.rb +94 -0
  12. data/lib/BOAST/Runtime/FFIRuntime.rb +104 -0
  13. data/lib/BOAST/Runtime/FORTRANRuntime.rb +45 -0
  14. data/lib/BOAST/Runtime/MPPARuntime.rb +464 -0
  15. data/lib/BOAST/Runtime/NonRegression.rb +157 -0
  16. data/lib/BOAST/Runtime/OpenCLRuntime.rb +181 -0
  17. data/lib/BOAST/Runtime/Probe.rb +136 -0
  18. data/lib/BOAST.rb +37 -26
  19. metadata +40 -28
  20. data/lib/BOAST/CKernel.rb +0 -1236
  21. /data/lib/BOAST/{Algorithm.rb → Language/Algorithm.rb} +0 -0
  22. /data/lib/BOAST/{Arithmetic.rb → Language/Arithmetic.rb} +0 -0
  23. /data/lib/BOAST/{BOAST_OpenCL.rb → Language/BOAST_OpenCL.rb} +0 -0
  24. /data/lib/BOAST/{Case.rb → Language/Case.rb} +0 -0
  25. /data/lib/BOAST/{ControlStructure.rb → Language/ControlStructure.rb} +0 -0
  26. /data/lib/BOAST/{DataTypes.rb → Language/DataTypes.rb} +0 -0
  27. /data/lib/BOAST/{Expression.rb → Language/Expression.rb} +0 -0
  28. /data/lib/BOAST/{For.rb → Language/For.rb} +0 -0
  29. /data/lib/BOAST/{FuncCall.rb → Language/FuncCall.rb} +0 -0
  30. /data/lib/BOAST/{Functors.rb → Language/Functors.rb} +0 -0
  31. /data/lib/BOAST/{If.rb → Language/If.rb} +0 -0
  32. /data/lib/BOAST/{Index.rb → Language/Index.rb} +0 -0
  33. /data/lib/BOAST/{Inspectable.rb → Language/Inspectable.rb} +0 -0
  34. /data/lib/BOAST/{Operators.rb → Language/Operators.rb} +0 -0
  35. /data/lib/BOAST/{Optimization.rb → Language/Optimization.rb} +0 -0
  36. /data/lib/BOAST/{Parens.rb → Language/Parens.rb} +0 -0
  37. /data/lib/BOAST/{Pragma.rb → Language/Pragma.rb} +0 -0
  38. /data/lib/BOAST/{Print.rb → Language/Print.rb} +0 -0
  39. /data/lib/BOAST/{Procedure.rb → Language/Procedure.rb} +0 -0
  40. /data/lib/BOAST/{Slice.rb → Language/Slice.rb} +0 -0
  41. /data/lib/BOAST/{State.rb → Language/State.rb} +0 -0
  42. /data/lib/BOAST/{Transitions.rb → Language/Transitions.rb} +0 -0
  43. /data/lib/BOAST/{While.rb → Language/While.rb} +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 78df529b137f3cd4300f47711bc4d620c098a338
4
- data.tar.gz: a0f73edc8256e5ac857a61418f57eecfd87521ee
3
+ metadata.gz: f5eb4ec74e14155954a1ecc7c3004e79088160c4
4
+ data.tar.gz: a4226683ccc72753105a61c2af6a9ddb0ce69bc3
5
5
  SHA512:
6
- metadata.gz: 5bc73c3b16e34b758c819bed3a5f85703093dd32aaa03e48d20783c55f7bab49203fe2c82c48e621fa3f1b1d2ea7873c0e141cd8ad8d4910d07f1ae60037a1be
7
- data.tar.gz: d98656618d9436cbbf75ae1661c70bfc2635b5e38c742e551a9310153afff97c7cf97588ed56c6fadb4e2ffe5fe8efe45e18c3ed781d7810fe643bc6d5b51072
6
+ metadata.gz: 7f1ac4576270947a645a2b8878f771d0c2dde7974d963b6c57a29eda1dc7a48be24003dc6df38f05e6f17fa9689ebf820634c0007495fecbe4c75bfddec8f5f0
7
+ data.tar.gz: fc98fd3ce8322e5c995167f043170ef42b61b49b14409d1c66d7cdea0a019e19cbef085012f8341f4add6b254b28e6d660094ffcf65676acdc3316b0bcc7051c
data/BOAST.gemspec CHANGED
@@ -1,41 +1,12 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'BOAST'
3
- s.version = "1.0.3"
3
+ s.version = "1.0.4"
4
4
  s.author = "Brice Videau"
5
5
  s.email = "brice.videau@imag.fr"
6
6
  s.homepage = "https://github.com/Nanosim-LIG/boast"
7
7
  s.summary = "BOAST is a computing kernel metaprogramming tool."
8
8
  s.description = "BOAST aims at providing a framework to metaprogram, benchmark and validate computing kernels"
9
- s.files = %w( BOAST.gemspec
10
- LICENSE
11
- lib/BOAST.rb
12
- lib/BOAST/Algorithm.rb
13
- lib/BOAST/CKernel.rb
14
- lib/BOAST/BOAST_OpenCL.rb
15
- lib/BOAST/Transitions.rb
16
- lib/BOAST/Parens.rb
17
- lib/BOAST/Operators.rb
18
- lib/BOAST/DataTypes.rb
19
- lib/BOAST/Arithmetic.rb
20
- lib/BOAST/Expression.rb
21
- lib/BOAST/Index.rb
22
- lib/BOAST/Variable.rb
23
- lib/BOAST/Procedure.rb
24
- lib/BOAST/If.rb
25
- lib/BOAST/For.rb
26
- lib/BOAST/Case.rb
27
- lib/BOAST/While.rb
28
- lib/BOAST/FuncCall.rb
29
- lib/BOAST/Pragma.rb
30
- lib/BOAST/Inspectable.rb
31
- lib/BOAST/Functors.rb
32
- lib/BOAST/ControlStructure.rb
33
- lib/BOAST/Print.rb
34
- lib/BOAST/State.rb
35
- lib/BOAST/Optimization.rb
36
- lib/BOAST/OpenMP.rb
37
- lib/BOAST/Slice.rb
38
- )
9
+ s.files = Dir['BOAST.gemspec', 'LICENSE', 'README.md', 'lib/**/*']
39
10
  s.has_rdoc = true
40
11
  s.license = 'BSD'
41
12
  s.required_ruby_version = '>= 1.9.3'
data/README.md ADDED
@@ -0,0 +1,240 @@
1
+ BOAST
2
+ =====
3
+
4
+ This section will present some simple examples to familiarize the user
5
+ with BOAST. More samples can be found in the git repository.
6
+
7
+ Installation
8
+ ------------
9
+
10
+ BOAST is ruby based, so ruby needs to be installed on the machine.
11
+ Installation of boast can be done using the ruby built-in package
12
+ manager: *gem*. See following Listing for reference.
13
+
14
+ ```bash
15
+ sudo apt-get install ruby ruby-dev
16
+ gem install --user-install BOAST
17
+ ```
18
+
19
+ Variable and Procedure Declaration
20
+ ----------------------------------
21
+
22
+ The following samples are presented using *irb* ruby interactive interpreter.
23
+ It can be launched using the *irb* command in a terminal. Following
24
+ Listing shows the declaration of two variables of different kind.
25
+
26
+ irb(main):001:0> require 'BOAST'
27
+ => true
28
+ irb(main):002:0> a = BOAST::Int "a"
29
+ => a
30
+ irb(main):003:0> b = BOAST::Real "b"
31
+ => b
32
+ irb(main):004:0> BOAST::decl a, b
33
+ integer(kind=4) :: a
34
+ real(kind=8) :: b
35
+ => [a, b]
36
+
37
+ Following Listing shows the declaration of a procedure using the two previous
38
+ variables as parameters. For clarity irb echoes have been suppressed.
39
+
40
+ 005:0> p = BOAST::Procedure( "test_proc", [a,b] )
41
+ 006:0> BOAST::opn p
42
+ SUBROUTINE test_proc(a, b)
43
+ integer, parameter :: wp=kind(1.0d0)
44
+ integer(kind=4) :: a
45
+ real(kind=8) :: b
46
+ 007:0> BOAST::close p
47
+ END SUBROUTINE test_proc
48
+
49
+ Switching Language
50
+ ------------------
51
+
52
+ Following Listing shows how to switch BOAST to C. Available languages are
53
+ *FORTRAN*, *C*, *CUDA* and *CL*.
54
+
55
+ 008:0> BOAST::lang = BOAST::C
56
+ 009:0> BOAST::opn p
57
+ void test_proc(int32_t a, double b){
58
+ 010:0> BOAST::close p
59
+ }
60
+
61
+ Defining a Complete Procedure
62
+ -----------------------------
63
+
64
+ Following Listing shows how to define a procedure and the associated code. Note
65
+ that here the parameters of the procedure have been associated a direction:
66
+ one, *a*, is an input parameter while the other, *b*, is an output parameter.
67
+
68
+ 011:0> BOAST::lang = BOAST::FORTRAN
69
+ 012:0> a = BOAST::Real( "a", :dir => :in)
70
+ 013:0> b = BOAST::Real( "b", :dir => :out)
71
+ 014:0> p = BOAST::Procedure( "plus_two", [a,b] ) {
72
+ 015:1* BOAST::pr b === a + 2
73
+ 016:1> }
74
+ 017:0> BOAST::pr p
75
+ SUBROUTINE plus_two(a, b)
76
+ integer, parameter :: wp=kind(1.0d0)
77
+ real(kind=8), intent(in) :: a
78
+ real(kind=8), intent(out) :: b
79
+ b = a + 2
80
+ END SUBROUTINE plus_two
81
+ 018:0> BOAST::lang = BOAST::C
82
+ 019:0> BOAST::pr p
83
+ void plus_two(const double a, double * b){
84
+ (*b) = a + 2;
85
+ }
86
+
87
+ Creating, Building and Running a Computing Kernel
88
+ -------------------------------------------------
89
+
90
+ Following Listing shows how to create a Computing kernel (*CKernel*) and build
91
+ it. Once a computing kernel is instantiated the output of BOAST will be
92
+ redirected to the computing kernel source code. Line 4 sets the entry point of
93
+ the computing kernel to the procedure we just defined. By default compilation
94
+ commands are not shown unless an error occurs. This behavior can be changed by
95
+ switching to verbose mode.
96
+
97
+ When running the kernel all the arguments have to be specified. Running
98
+ a kernel returns a hash table containing information about the procedure
99
+ execution. In this simple case two informations are returned, first the
100
+ value of the output parameter *b* and second the time the kernel
101
+ execution took.
102
+
103
+ 020:0> BOAST::lang = BOAST::FORTRAN
104
+ 021:0> k = BOAST::CKernel::new
105
+ 022:0> BOAST::pr p
106
+ 023:0> k.procedure = p
107
+ 024:0> puts k
108
+ SUBROUTINE plus_two(a, b)
109
+ integer, parameter :: wp=kind(1.0d0)
110
+ real(kind=8), intent(in) :: a
111
+ real(kind=8), intent(out) :: b
112
+ b = a + 2
113
+ END SUBROUTINE plus_two
114
+ 025:0> k.build
115
+ 026:0> BOAST::verbose = true
116
+ 027:0> k.build
117
+ gcc -O2 -Wall -fPIC -I/usr/lib/x86_64-linux-gnu/ruby/2.1.0 -I/usr/include/ruby-2.1.0 -I/usr/include/ruby-2.1.0/x86_64-linux-gnu -I/usr/include/x86_64-linux-gnu/ruby-2.1.0 -I/var/lib/gems/2.1.0/gems/narray-0.6.1.1 -DHAVE_NARRAY_H -c -o /tmp/Mod_plus_two20150309_4611_5a129k.o /tmp/Mod_plus_two20150309_4611_5a129k.c
118
+ gfortran -O2 -Wall -fPIC -c -o /tmp/plus_two20150309-4611-5a129k.o /tmp/plus_two20150309-4611-5a129k.f90
119
+ gcc -shared -o /tmp/Mod_plus_two20150309_4611_5a129k.so /tmp/Mod_plus_two20150309_4611_5a129k.o /tmp/plus_two20150309-4611-5a129k.o -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic -L/usr/lib -lruby-2.1 -lrt
120
+ 028:0> r = k.run(5,0)
121
+ 029:0> puts r
122
+ {:reference_return=>{:b=>7.0}, :duration=>5.84e-07}
123
+
124
+ Using Arrays in Procedures
125
+ --------------------------
126
+
127
+ Most computing kernels don't work on scalar values but rather on arrays
128
+ of data. Following Listing shows how to use arrays in computing
129
+ kernels. In this case we place ourselves in BOAST namespace to reduce
130
+ the syntax overhead. Variables *a* and *b* are one-dimensional arrays of
131
+ size *n*. Arrays in BOAST start at index 1 unless specified otherwise.
132
+ For instance `Dim(0,n-1)` would have created a dimension starting at 0.
133
+ Array bounds can also be negative and several dimensions can be
134
+ specified to obtain muti-dimensional arrays. For self contained
135
+ procedures/kernels one can use the shortcut written on line 13 to create
136
+ a CKernel object. As we are not specifying build options the build
137
+ command can also be omitted and will be automatically called when
138
+ running the kernel the first time. Lines 17 to 19 are used to check the
139
+ result of the kernel.
140
+
141
+ 001:0> require 'BOAST'
142
+ 002:0> require 'narray'
143
+ 003:0> include BOAST
144
+ 004:0> n = Int( "n", :dir => :in )
145
+ 005:0> a = Real( "a", :dir => :in, :dim => [Dim(n)] )
146
+ 006:0> b = Real( "b", :dir => :out, :dim => [Dim(n)] )
147
+ 007:0> p = Procedure( "plus_two", [n, a, b] ) {
148
+ 008:1* decl i = Int( "i" )
149
+ 009:1> pr For( i, 1, n ) {
150
+ 010:2* pr b[i] === a[i] + 2.0
151
+ 011:2> }
152
+ 012:1> }
153
+ 013:0> k = p.ckernel
154
+ 014:0> input = NArray.float(1024).random
155
+ 015:0> output = NArray.float(1024)
156
+ 016:0> k.run(input.length, input, output)
157
+ 017:0> (output - input).each { |val|
158
+ 018:1* raise "Error!" if (val-2).abs > 1e-15
159
+ 019:1> }
160
+ 020:0> stats = k.run(input.length, input, output)
161
+ 021:0> puts "Success, duration: #{stats[:duration]} s"
162
+ Success, duration: 3.79e-06 s
163
+
164
+ The Canonical Case: Vector Addition
165
+ -----------------------------------
166
+
167
+ Following Listing shows the addition of two vectors in a third one. Here BOAST
168
+ is configured to have arrays starting at 0 and to use single precision reals by
169
+ default (Lines 5 and 6). The kernel declaration is encapsulated inside a method
170
+ to avoid cluttering the global namespace. Line 15 the expression `c[i] === a[i]+ b[i]`
171
+ is stored inside a variable *expr* for later use. Lines 16 to 23 show
172
+ that the kernel differs depending on the target language, in CUDA and OpenCL
173
+ each thread will process one element.
174
+
175
+ ```ruby
176
+ require 'narray'
177
+ require 'BOAST'
178
+ include BOAST
179
+
180
+ set_array_start(0)
181
+ set_default_real_size(4)
182
+
183
+ def vector_add
184
+ n = Int("n",:dir => :in)
185
+ a = Real("a",:dir => :in, :dim => [ Dim(n)] )
186
+ b = Real("b",:dir => :in, :dim => [ Dim(n)] )
187
+ c = Real("c",:dir => :out, :dim => [ Dim(n)] )
188
+ p = Procedure("vector_add", [n,a,b,c]) {
189
+ decl i = Int("i")
190
+ expr = c[i] === a[i] + b[i]
191
+ if (get_lang == CL or get_lang == CUDA) then
192
+ pr i === get_global_id(0)
193
+ pr expr
194
+ else
195
+ pr For(i,0,n-1) {
196
+ pr expr
197
+ }
198
+ end
199
+ }
200
+ return p.ckernel
201
+ end
202
+ ```
203
+
204
+ Following Listing shows the a way to check the validity of the previous kernel
205
+ over the available range of languages. The options that are passed to run are
206
+ only relevant for GPU languages and are thus ignored in FORTRAN and C
207
+ (Line 16). Success is only printed if results are validated, else an exception
208
+ is raised (Lines 17 to 20).
209
+
210
+ ```ruby
211
+ n = 1024*1024
212
+ a = NArray.sfloat(n).random
213
+ b = NArray.sfloat(n).random
214
+ c = NArray.sfloat(n)
215
+
216
+ epsilon = 10e-15
217
+
218
+ c_ref = a + b
219
+
220
+ [:FORTRAN, :C, :CL, :CUDA].each { |l|
221
+ set_lang( BOAST.const_get(l) )
222
+ puts "#{l}:"
223
+ k = vector_add
224
+ puts k.print
225
+ c.random!
226
+ k.run(n, a, b, c, :global_work_size => [n,1,1], :local_work_size => [32,1,1])
227
+ diff = (c_ref - c).abs
228
+ diff.each { |elem|
229
+ raise "Warning: residue too big: #{elem}" if elem > epsilon
230
+ }
231
+ }
232
+ puts "Success!"
233
+ ```
234
+
235
+ Acknowledgment
236
+ --------------
237
+
238
+ The research leading to these results has received funding from the
239
+ European Community's Seventh Framework Programme [FP7/2007-2013] under
240
+ grant agreement n° 288777 and 610402.
@@ -70,6 +70,7 @@ EOF
70
70
  register_clause(:flush_list, :option_list)
71
71
  register_clause(:threadprivate_list, :option_list)
72
72
  register_clause(:if, :simple)
73
+ register_clause(:final, :simple)
73
74
  register_clause(:num_threads, :simple)
74
75
  register_clause(:default, :simple)
75
76
  register_clause(:collapse, :simple)
@@ -12,7 +12,8 @@ module BOAST
12
12
  def initialize(v1=nil,v2=nil)
13
13
  if v1 then
14
14
  if v2 then
15
- @size = Expression::new(Substraction, v2, v1) + 1
15
+ #@size = Expression::new(Substraction, v2, v1) + 1
16
+ @size = v2-v1+1
16
17
  else
17
18
  @size = v1
18
19
  end
@@ -0,0 +1,94 @@
1
+ require 'stringio'
2
+ require 'rake'
3
+ require 'tempfile'
4
+ require 'rbconfig'
5
+ require 'systemu'
6
+ require 'yaml'
7
+ require 'pathname'
8
+ require 'os'
9
+
10
+ module BOAST
11
+
12
+ class CKernel
13
+ include Compilers
14
+ include Rake::DSL
15
+ include Inspectable
16
+ include PrivateStateAccessor
17
+ include TypeTransition
18
+
19
+ attr_accessor :code
20
+ attr_accessor :procedure
21
+ attr_accessor :lang
22
+ attr_accessor :binary
23
+ attr_accessor :kernels
24
+ attr_accessor :cost_function
25
+
26
+ def initialize(options={})
27
+ if options[:code] then
28
+ @code = options[:code]
29
+ elsif get_chain_code
30
+ @code = get_output
31
+ @code.seek(0,SEEK_END)
32
+ else
33
+ @code = StringIO::new
34
+ end
35
+ set_output(@code)
36
+ if options[:kernels] then
37
+ @kernels = options[:kernels]
38
+ else
39
+ @kernels = []
40
+ end
41
+ if options[:lang] then
42
+ @lang = options[:lang]
43
+ else
44
+ @lang = get_lang
45
+ end
46
+ if options[:architecture] then
47
+ @architecture = options[:architecture]
48
+ else
49
+ @architecture = get_architecture
50
+ end
51
+ @probes = [TimerProbe, PAPIProbe]
52
+
53
+ case @lang
54
+ when CL
55
+ extend OpenCLRuntime
56
+ when CUDA
57
+ extend CUDARuntime
58
+ when FORTRAN
59
+ extend FORTRANRuntime
60
+ extend FFIRuntime if ffi?
61
+ else
62
+ if @architecture == MPPA then
63
+ extend MPPARuntime
64
+ else
65
+ extend CRuntime
66
+ extend FFIRuntime if ffi?
67
+ end
68
+ end
69
+ end
70
+
71
+ def print
72
+ @code.rewind
73
+ puts @code.read
74
+ end
75
+
76
+ def to_s
77
+ @code.rewind
78
+ return code.read
79
+ end
80
+
81
+ def method_missing(meth, *args, &block)
82
+ if meth.to_s == "run" then
83
+ build
84
+ run(*args,&block)
85
+ else
86
+ super
87
+ end
88
+ end
89
+
90
+ def cost(*args)
91
+ @cost_function.call(*args)
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,32 @@
1
+ module BOAST
2
+
3
+ module CRuntime
4
+ include CompiledRuntime
5
+
6
+ def fill_library_header
7
+ get_output.puts "#include <inttypes.h>"
8
+ end
9
+
10
+ def fill_library_source
11
+ fill_library_header
12
+ @code.rewind
13
+ get_output.write @code.read
14
+ end
15
+
16
+ def create_procedure_call_parameters
17
+ params = []
18
+ @procedure.parameters.each { |param|
19
+ if param.dimension then
20
+ params.push( param.name )
21
+ elsif param.direction == :out or param.direction == :inout then
22
+ params.push( "&"+param.name )
23
+ else
24
+ params.push( param.name )
25
+ end
26
+ }
27
+ return params
28
+ end
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,158 @@
1
+ module BOAST
2
+
3
+ module CUDARuntime
4
+ include CRuntime
5
+
6
+ alias fill_library_source_old fill_library_source
7
+ alias fill_library_header_old fill_library_header
8
+ alias fill_module_header_old fill_module_header
9
+ alias get_params_value_old get_params_value
10
+ alias fill_decl_module_params_old fill_decl_module_params
11
+ alias create_procedure_call_parameters_old create_procedure_call_parameters
12
+
13
+ def fill_module_header
14
+ fill_module_header_old
15
+ get_output.puts "#include <cuda_runtime.h>"
16
+ end
17
+
18
+ def fill_library_header
19
+ fill_library_header_old
20
+ get_output.puts "#include <cuda.h>"
21
+ end
22
+
23
+ def fill_library_source
24
+ fill_library_source_old
25
+ get_output.write <<EOF
26
+ extern "C" {
27
+ #{@procedure.boast_header_s(CUDA)}{
28
+ dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
29
+ dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
30
+ cudaEvent_t __start, __stop;
31
+ float __time;
32
+ cudaEventCreate(&__start);
33
+ cudaEventCreate(&__stop);
34
+ cudaEventRecord(__start, 0);
35
+ #{@procedure.name}<<<dimGrid,dimBlock>>>(#{@procedure.parameters.join(", ")});
36
+ cudaEventRecord(__stop, 0);
37
+ cudaEventSynchronize(__stop);
38
+ cudaEventElapsedTime(&__time, __start, __stop);
39
+ return (unsigned long long int)((double)__time*(double)1e6);
40
+ }
41
+ }
42
+ EOF
43
+ end
44
+
45
+ def copy_array_param_from_ruby( param, ruby_param )
46
+ rb_ptr = Variable::new("_boast_rb_ptr", CustomType, :type_name => "VALUE")
47
+ (rb_ptr === ruby_param).pr
48
+ get_output.print <<EOF
49
+ if ( IsNArray(_boast_rb_ptr) ) {
50
+ struct NARRAY *_boast_n_ary;
51
+ size_t _boast_array_size;
52
+ Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
53
+ _boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
54
+ cudaMalloc( (void **) &#{param}, _boast_array_size);
55
+ cudaMemcpy(#{param}, (void *) _boast_n_ary->ptr, _boast_array_size, cudaMemcpyHostToDevice);
56
+ } else {
57
+ rb_raise(rb_eArgError, "Wrong type of argument for %s, expecting array!", "#{param}");
58
+ }
59
+ EOF
60
+ end
61
+
62
+ def fill_decl_module_params
63
+ fill_decl_module_params_old
64
+ get_output.print <<EOF
65
+ size_t _boast_block_size[3] = {1,1,1};
66
+ size_t _boast_block_number[3] = {1,1,1};
67
+ EOF
68
+ end
69
+
70
+ def get_params_value
71
+ get_params_value_old
72
+ get_output.print <<EOF
73
+ if( _boast_rb_opts != Qnil ) {
74
+ VALUE _boast_rb_array_data = Qnil;
75
+ int _boast_i;
76
+ _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_size")));
77
+ if( _boast_rb_ptr != Qnil ) {
78
+ if (TYPE(_boast_rb_ptr) != T_ARRAY)
79
+ rb_raise(rb_eArgError, "Cuda option block_size should be an array");
80
+ for(_boast_i=0; _boast_i<3; _boast_i++) {
81
+ _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
82
+ if( _boast_rb_array_data != Qnil )
83
+ _boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
84
+ }
85
+ } else {
86
+ _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("local_work_size")));
87
+ if( _boast_rb_ptr != Qnil ) {
88
+ if (TYPE(_boast_rb_ptr) != T_ARRAY)
89
+ rb_raise(rb_eArgError, "Cuda option local_work_size should be an array");
90
+ for(_boast_i=0; _boast_i<3; _boast_i++) {
91
+ _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
92
+ if( _boast_rb_array_data != Qnil )
93
+ _boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
94
+ }
95
+ }
96
+ }
97
+ _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_number")));
98
+ if( _boast_rb_ptr != Qnil ) {
99
+ if (TYPE(_boast_rb_ptr) != T_ARRAY)
100
+ rb_raise(rb_eArgError, "Cuda option block_number should be an array");
101
+ for(_boast_i=0; _boast_i<3; _boast_i++) {
102
+ _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
103
+ if( _boast_rb_array_data != Qnil )
104
+ _boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
105
+ }
106
+ } else {
107
+ _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("global_work_size")));
108
+ if( _boast_rb_ptr != Qnil ) {
109
+ if (TYPE(_boast_rb_ptr) != T_ARRAY)
110
+ rb_raise(rb_eArgError, "Cuda option global_work_size should be an array");
111
+ for(_boast_i=0; _boast_i<3; _boast_i++) {
112
+ _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
113
+ if( _boast_rb_array_data != Qnil )
114
+ _boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data ) / _boast_block_size[_boast_i];
115
+ }
116
+ }
117
+ }
118
+ }
119
+ EOF
120
+ end
121
+
122
+ def create_procedure_call_parameters
123
+ return create_procedure_call_parameters_old + ["_boast_block_number", "_boast_block_size"]
124
+ end
125
+
126
+ def create_procedure_call
127
+ get_output.print " #{TimerProbe::RESULT} = "
128
+ get_output.print " #{method_name}_wrapper( "
129
+ get_output.print create_procedure_call_parameters.join(", ")
130
+ get_output.puts " );"
131
+ end
132
+
133
+ def copy_array_param_to_ruby(param, ruby_param)
134
+ rb_ptr = Variable::new("_boast_rb_ptr", CustomType, :type_name => "VALUE")
135
+ (rb_ptr === ruby_param).pr
136
+ get_output.print <<EOF
137
+ if ( IsNArray(_boast_rb_ptr) ) {
138
+ EOF
139
+ if param.direction == :out or param.direction == :inout then
140
+ get_output.print <<EOF
141
+ struct NARRAY *_boast_n_ary;
142
+ size_t _boast_array_size;
143
+ Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
144
+ _boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
145
+ cudaMemcpy((void *) _boast_n_ary->ptr, #{param}, _boast_array_size, cudaMemcpyDeviceToHost);
146
+ EOF
147
+ end
148
+ get_output.print <<EOF
149
+ cudaFree( (void *) #{param});
150
+ } else {
151
+ rb_raise(rb_eArgError, "Wrong type of argument for %s, expecting array!", "#{param}");
152
+ }
153
+ EOF
154
+ end
155
+
156
+ end
157
+
158
+ end