BOAST 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/BOAST.gemspec +2 -31
- data/README.md +240 -0
- data/lib/BOAST/{OpenMP.rb → Language/OpenMP.rb} +1 -0
- data/lib/BOAST/{Variable.rb → Language/Variable.rb} +2 -1
- data/lib/BOAST/Runtime/CKernel.rb +94 -0
- data/lib/BOAST/Runtime/CRuntime.rb +32 -0
- data/lib/BOAST/Runtime/CUDARuntime.rb +158 -0
- data/lib/BOAST/Runtime/CompiledRuntime.rb +398 -0
- data/lib/BOAST/Runtime/Compilers.rb +205 -0
- data/lib/BOAST/Runtime/Config.rb +94 -0
- data/lib/BOAST/Runtime/FFIRuntime.rb +104 -0
- data/lib/BOAST/Runtime/FORTRANRuntime.rb +45 -0
- data/lib/BOAST/Runtime/MPPARuntime.rb +464 -0
- data/lib/BOAST/Runtime/NonRegression.rb +157 -0
- data/lib/BOAST/Runtime/OpenCLRuntime.rb +181 -0
- data/lib/BOAST/Runtime/Probe.rb +136 -0
- data/lib/BOAST.rb +37 -26
- metadata +40 -28
- data/lib/BOAST/CKernel.rb +0 -1236
- /data/lib/BOAST/{Algorithm.rb → Language/Algorithm.rb} +0 -0
- /data/lib/BOAST/{Arithmetic.rb → Language/Arithmetic.rb} +0 -0
- /data/lib/BOAST/{BOAST_OpenCL.rb → Language/BOAST_OpenCL.rb} +0 -0
- /data/lib/BOAST/{Case.rb → Language/Case.rb} +0 -0
- /data/lib/BOAST/{ControlStructure.rb → Language/ControlStructure.rb} +0 -0
- /data/lib/BOAST/{DataTypes.rb → Language/DataTypes.rb} +0 -0
- /data/lib/BOAST/{Expression.rb → Language/Expression.rb} +0 -0
- /data/lib/BOAST/{For.rb → Language/For.rb} +0 -0
- /data/lib/BOAST/{FuncCall.rb → Language/FuncCall.rb} +0 -0
- /data/lib/BOAST/{Functors.rb → Language/Functors.rb} +0 -0
- /data/lib/BOAST/{If.rb → Language/If.rb} +0 -0
- /data/lib/BOAST/{Index.rb → Language/Index.rb} +0 -0
- /data/lib/BOAST/{Inspectable.rb → Language/Inspectable.rb} +0 -0
- /data/lib/BOAST/{Operators.rb → Language/Operators.rb} +0 -0
- /data/lib/BOAST/{Optimization.rb → Language/Optimization.rb} +0 -0
- /data/lib/BOAST/{Parens.rb → Language/Parens.rb} +0 -0
- /data/lib/BOAST/{Pragma.rb → Language/Pragma.rb} +0 -0
- /data/lib/BOAST/{Print.rb → Language/Print.rb} +0 -0
- /data/lib/BOAST/{Procedure.rb → Language/Procedure.rb} +0 -0
- /data/lib/BOAST/{Slice.rb → Language/Slice.rb} +0 -0
- /data/lib/BOAST/{State.rb → Language/State.rb} +0 -0
- /data/lib/BOAST/{Transitions.rb → Language/Transitions.rb} +0 -0
- /data/lib/BOAST/{While.rb → Language/While.rb} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5eb4ec74e14155954a1ecc7c3004e79088160c4
|
4
|
+
data.tar.gz: a4226683ccc72753105a61c2af6a9ddb0ce69bc3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f1ac4576270947a645a2b8878f771d0c2dde7974d963b6c57a29eda1dc7a48be24003dc6df38f05e6f17fa9689ebf820634c0007495fecbe4c75bfddec8f5f0
|
7
|
+
data.tar.gz: fc98fd3ce8322e5c995167f043170ef42b61b49b14409d1c66d7cdea0a019e19cbef085012f8341f4add6b254b28e6d660094ffcf65676acdc3316b0bcc7051c
|
data/BOAST.gemspec
CHANGED
@@ -1,41 +1,12 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'BOAST'
|
3
|
-
s.version = "1.0.
|
3
|
+
s.version = "1.0.4"
|
4
4
|
s.author = "Brice Videau"
|
5
5
|
s.email = "brice.videau@imag.fr"
|
6
6
|
s.homepage = "https://github.com/Nanosim-LIG/boast"
|
7
7
|
s.summary = "BOAST is a computing kernel metaprogramming tool."
|
8
8
|
s.description = "BOAST aims at providing a framework to metaprogram, benchmark and validate computing kernels"
|
9
|
-
s.files =
|
10
|
-
LICENSE
|
11
|
-
lib/BOAST.rb
|
12
|
-
lib/BOAST/Algorithm.rb
|
13
|
-
lib/BOAST/CKernel.rb
|
14
|
-
lib/BOAST/BOAST_OpenCL.rb
|
15
|
-
lib/BOAST/Transitions.rb
|
16
|
-
lib/BOAST/Parens.rb
|
17
|
-
lib/BOAST/Operators.rb
|
18
|
-
lib/BOAST/DataTypes.rb
|
19
|
-
lib/BOAST/Arithmetic.rb
|
20
|
-
lib/BOAST/Expression.rb
|
21
|
-
lib/BOAST/Index.rb
|
22
|
-
lib/BOAST/Variable.rb
|
23
|
-
lib/BOAST/Procedure.rb
|
24
|
-
lib/BOAST/If.rb
|
25
|
-
lib/BOAST/For.rb
|
26
|
-
lib/BOAST/Case.rb
|
27
|
-
lib/BOAST/While.rb
|
28
|
-
lib/BOAST/FuncCall.rb
|
29
|
-
lib/BOAST/Pragma.rb
|
30
|
-
lib/BOAST/Inspectable.rb
|
31
|
-
lib/BOAST/Functors.rb
|
32
|
-
lib/BOAST/ControlStructure.rb
|
33
|
-
lib/BOAST/Print.rb
|
34
|
-
lib/BOAST/State.rb
|
35
|
-
lib/BOAST/Optimization.rb
|
36
|
-
lib/BOAST/OpenMP.rb
|
37
|
-
lib/BOAST/Slice.rb
|
38
|
-
)
|
9
|
+
s.files = Dir['BOAST.gemspec', 'LICENSE', 'README.md', 'lib/**/*']
|
39
10
|
s.has_rdoc = true
|
40
11
|
s.license = 'BSD'
|
41
12
|
s.required_ruby_version = '>= 1.9.3'
|
data/README.md
ADDED
@@ -0,0 +1,240 @@
|
|
1
|
+
BOAST
|
2
|
+
=====
|
3
|
+
|
4
|
+
This section will present some simple examples to familiarize the user
|
5
|
+
with BOAST. More samples can be found in the git repository.
|
6
|
+
|
7
|
+
Installation
|
8
|
+
------------
|
9
|
+
|
10
|
+
BOAST is ruby based, so ruby needs to be installed on the machine.
|
11
|
+
Installation of boast can be done using the ruby built-in package
|
12
|
+
manager: *gem*. See following Listing for reference.
|
13
|
+
|
14
|
+
```bash
|
15
|
+
sudo apt-get install ruby ruby-dev
|
16
|
+
gem install --user-install BOAST
|
17
|
+
```
|
18
|
+
|
19
|
+
Variable and Procedure Declaration
|
20
|
+
----------------------------------
|
21
|
+
|
22
|
+
The following samples are presented using *irb* ruby interactive interpreter.
|
23
|
+
It can be launched using the *irb* command in a terminal. Following
|
24
|
+
Listing shows the declaration of two variables of different kind.
|
25
|
+
|
26
|
+
irb(main):001:0> require 'BOAST'
|
27
|
+
=> true
|
28
|
+
irb(main):002:0> a = BOAST::Int "a"
|
29
|
+
=> a
|
30
|
+
irb(main):003:0> b = BOAST::Real "b"
|
31
|
+
=> b
|
32
|
+
irb(main):004:0> BOAST::decl a, b
|
33
|
+
integer(kind=4) :: a
|
34
|
+
real(kind=8) :: b
|
35
|
+
=> [a, b]
|
36
|
+
|
37
|
+
Following Listing shows the declaration of a procedure using the two previous
|
38
|
+
variables as parameters. For clarity irb echoes have been suppressed.
|
39
|
+
|
40
|
+
005:0> p = BOAST::Procedure( "test_proc", [a,b] )
|
41
|
+
006:0> BOAST::opn p
|
42
|
+
SUBROUTINE test_proc(a, b)
|
43
|
+
integer, parameter :: wp=kind(1.0d0)
|
44
|
+
integer(kind=4) :: a
|
45
|
+
real(kind=8) :: b
|
46
|
+
007:0> BOAST::close p
|
47
|
+
END SUBROUTINE test_proc
|
48
|
+
|
49
|
+
Switching Language
|
50
|
+
------------------
|
51
|
+
|
52
|
+
Following Listing shows how to switch BOAST to C. Available languages are
|
53
|
+
*FORTRAN*, *C*, *CUDA* and *CL*.
|
54
|
+
|
55
|
+
008:0> BOAST::lang = BOAST::C
|
56
|
+
009:0> BOAST::opn p
|
57
|
+
void test_proc(int32_t a, double b){
|
58
|
+
010:0> BOAST::close p
|
59
|
+
}
|
60
|
+
|
61
|
+
Defining a Complete Procedure
|
62
|
+
-----------------------------
|
63
|
+
|
64
|
+
Following Listing shows how to define a procedure and the associated code. Note
|
65
|
+
that here the parameters of the procedure have been associated a direction:
|
66
|
+
one, *a*, is an input parameter while the other, *b*, is an output parameter.
|
67
|
+
|
68
|
+
011:0> BOAST::lang = BOAST::FORTRAN
|
69
|
+
012:0> a = BOAST::Real( "a", :dir => :in)
|
70
|
+
013:0> b = BOAST::Real( "b", :dir => :out)
|
71
|
+
014:0> p = BOAST::Procedure( "plus_two", [a,b] ) {
|
72
|
+
015:1* BOAST::pr b === a + 2
|
73
|
+
016:1> }
|
74
|
+
017:0> BOAST::pr p
|
75
|
+
SUBROUTINE plus_two(a, b)
|
76
|
+
integer, parameter :: wp=kind(1.0d0)
|
77
|
+
real(kind=8), intent(in) :: a
|
78
|
+
real(kind=8), intent(out) :: b
|
79
|
+
b = a + 2
|
80
|
+
END SUBROUTINE plus_two
|
81
|
+
018:0> BOAST::lang = BOAST::C
|
82
|
+
019:0> BOAST::pr p
|
83
|
+
void plus_two(const double a, double * b){
|
84
|
+
(*b) = a + 2;
|
85
|
+
}
|
86
|
+
|
87
|
+
Creating, Building and Running a Computing Kernel
|
88
|
+
-------------------------------------------------
|
89
|
+
|
90
|
+
Following Listing shows how to create a Computing kernel (*CKernel*) and build
|
91
|
+
it. Once a computing kernel is instantiated the output of BOAST will be
|
92
|
+
redirected to the computing kernel source code. Line 4 sets the entry point of
|
93
|
+
the computing kernel to the procedure we just defined. By default compilation
|
94
|
+
commands are not shown unless an error occurs. This behavior can be changed by
|
95
|
+
switching to verbose mode.
|
96
|
+
|
97
|
+
When running the kernel all the arguments have to be specified. Running
|
98
|
+
a kernel returns a hash table containing information about the procedure
|
99
|
+
execution. In this simple case two informations are returned, first the
|
100
|
+
value of the output parameter *b* and second the time the kernel
|
101
|
+
execution took.
|
102
|
+
|
103
|
+
020:0> BOAST::lang = BOAST::FORTRAN
|
104
|
+
021:0> k = BOAST::CKernel::new
|
105
|
+
022:0> BOAST::pr p
|
106
|
+
023:0> k.procedure = p
|
107
|
+
024:0> puts k
|
108
|
+
SUBROUTINE plus_two(a, b)
|
109
|
+
integer, parameter :: wp=kind(1.0d0)
|
110
|
+
real(kind=8), intent(in) :: a
|
111
|
+
real(kind=8), intent(out) :: b
|
112
|
+
b = a + 2
|
113
|
+
END SUBROUTINE plus_two
|
114
|
+
025:0> k.build
|
115
|
+
026:0> BOAST::verbose = true
|
116
|
+
027:0> k.build
|
117
|
+
gcc -O2 -Wall -fPIC -I/usr/lib/x86_64-linux-gnu/ruby/2.1.0 -I/usr/include/ruby-2.1.0 -I/usr/include/ruby-2.1.0/x86_64-linux-gnu -I/usr/include/x86_64-linux-gnu/ruby-2.1.0 -I/var/lib/gems/2.1.0/gems/narray-0.6.1.1 -DHAVE_NARRAY_H -c -o /tmp/Mod_plus_two20150309_4611_5a129k.o /tmp/Mod_plus_two20150309_4611_5a129k.c
|
118
|
+
gfortran -O2 -Wall -fPIC -c -o /tmp/plus_two20150309-4611-5a129k.o /tmp/plus_two20150309-4611-5a129k.f90
|
119
|
+
gcc -shared -o /tmp/Mod_plus_two20150309_4611_5a129k.so /tmp/Mod_plus_two20150309_4611_5a129k.o /tmp/plus_two20150309-4611-5a129k.o -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic -L/usr/lib -lruby-2.1 -lrt
|
120
|
+
028:0> r = k.run(5,0)
|
121
|
+
029:0> puts r
|
122
|
+
{:reference_return=>{:b=>7.0}, :duration=>5.84e-07}
|
123
|
+
|
124
|
+
Using Arrays in Procedures
|
125
|
+
--------------------------
|
126
|
+
|
127
|
+
Most computing kernels don't work on scalar values but rather on arrays
|
128
|
+
of data. Following Listing shows how to use arrays in computing
|
129
|
+
kernels. In this case we place ourselves in BOAST namespace to reduce
|
130
|
+
the syntax overhead. Variables *a* and *b* are one-dimensional arrays of
|
131
|
+
size *n*. Arrays in BOAST start at index 1 unless specified otherwise.
|
132
|
+
For instance `Dim(0,n-1)` would have created a dimension starting at 0.
|
133
|
+
Array bounds can also be negative and several dimensions can be
|
134
|
+
specified to obtain muti-dimensional arrays. For self contained
|
135
|
+
procedures/kernels one can use the shortcut written on line 13 to create
|
136
|
+
a CKernel object. As we are not specifying build options the build
|
137
|
+
command can also be omitted and will be automatically called when
|
138
|
+
running the kernel the first time. Lines 17 to 19 are used to check the
|
139
|
+
result of the kernel.
|
140
|
+
|
141
|
+
001:0> require 'BOAST'
|
142
|
+
002:0> require 'narray'
|
143
|
+
003:0> include BOAST
|
144
|
+
004:0> n = Int( "n", :dir => :in )
|
145
|
+
005:0> a = Real( "a", :dir => :in, :dim => [Dim(n)] )
|
146
|
+
006:0> b = Real( "b", :dir => :out, :dim => [Dim(n)] )
|
147
|
+
007:0> p = Procedure( "plus_two", [n, a, b] ) {
|
148
|
+
008:1* decl i = Int( "i" )
|
149
|
+
009:1> pr For( i, 1, n ) {
|
150
|
+
010:2* pr b[i] === a[i] + 2.0
|
151
|
+
011:2> }
|
152
|
+
012:1> }
|
153
|
+
013:0> k = p.ckernel
|
154
|
+
014:0> input = NArray.float(1024).random
|
155
|
+
015:0> output = NArray.float(1024)
|
156
|
+
016:0> k.run(input.length, input, output)
|
157
|
+
017:0> (output - input).each { |val|
|
158
|
+
018:1* raise "Error!" if (val-2).abs > 1e-15
|
159
|
+
019:1> }
|
160
|
+
020:0> stats = k.run(input.length, input, output)
|
161
|
+
021:0> puts "Success, duration: #{stats[:duration]} s"
|
162
|
+
Success, duration: 3.79e-06 s
|
163
|
+
|
164
|
+
The Canonical Case: Vector Addition
|
165
|
+
-----------------------------------
|
166
|
+
|
167
|
+
Following Listing shows the addition of two vectors in a third one. Here BOAST
|
168
|
+
is configured to have arrays starting at 0 and to use single precision reals by
|
169
|
+
default (Lines 5 and 6). The kernel declaration is encapsulated inside a method
|
170
|
+
to avoid cluttering the global namespace. Line 15 the expression `c[i] === a[i]+ b[i]`
|
171
|
+
is stored inside a variable *expr* for later use. Lines 16 to 23 show
|
172
|
+
that the kernel differs depending on the target language, in CUDA and OpenCL
|
173
|
+
each thread will process one element.
|
174
|
+
|
175
|
+
```ruby
|
176
|
+
require 'narray'
|
177
|
+
require 'BOAST'
|
178
|
+
include BOAST
|
179
|
+
|
180
|
+
set_array_start(0)
|
181
|
+
set_default_real_size(4)
|
182
|
+
|
183
|
+
def vector_add
|
184
|
+
n = Int("n",:dir => :in)
|
185
|
+
a = Real("a",:dir => :in, :dim => [ Dim(n)] )
|
186
|
+
b = Real("b",:dir => :in, :dim => [ Dim(n)] )
|
187
|
+
c = Real("c",:dir => :out, :dim => [ Dim(n)] )
|
188
|
+
p = Procedure("vector_add", [n,a,b,c]) {
|
189
|
+
decl i = Int("i")
|
190
|
+
expr = c[i] === a[i] + b[i]
|
191
|
+
if (get_lang == CL or get_lang == CUDA) then
|
192
|
+
pr i === get_global_id(0)
|
193
|
+
pr expr
|
194
|
+
else
|
195
|
+
pr For(i,0,n-1) {
|
196
|
+
pr expr
|
197
|
+
}
|
198
|
+
end
|
199
|
+
}
|
200
|
+
return p.ckernel
|
201
|
+
end
|
202
|
+
```
|
203
|
+
|
204
|
+
Following Listing shows the a way to check the validity of the previous kernel
|
205
|
+
over the available range of languages. The options that are passed to run are
|
206
|
+
only relevant for GPU languages and are thus ignored in FORTRAN and C
|
207
|
+
(Line 16). Success is only printed if results are validated, else an exception
|
208
|
+
is raised (Lines 17 to 20).
|
209
|
+
|
210
|
+
```ruby
|
211
|
+
n = 1024*1024
|
212
|
+
a = NArray.sfloat(n).random
|
213
|
+
b = NArray.sfloat(n).random
|
214
|
+
c = NArray.sfloat(n)
|
215
|
+
|
216
|
+
epsilon = 10e-15
|
217
|
+
|
218
|
+
c_ref = a + b
|
219
|
+
|
220
|
+
[:FORTRAN, :C, :CL, :CUDA].each { |l|
|
221
|
+
set_lang( BOAST.const_get(l) )
|
222
|
+
puts "#{l}:"
|
223
|
+
k = vector_add
|
224
|
+
puts k.print
|
225
|
+
c.random!
|
226
|
+
k.run(n, a, b, c, :global_work_size => [n,1,1], :local_work_size => [32,1,1])
|
227
|
+
diff = (c_ref - c).abs
|
228
|
+
diff.each { |elem|
|
229
|
+
raise "Warning: residue too big: #{elem}" if elem > epsilon
|
230
|
+
}
|
231
|
+
}
|
232
|
+
puts "Success!"
|
233
|
+
```
|
234
|
+
|
235
|
+
Acknowledgment
|
236
|
+
--------------
|
237
|
+
|
238
|
+
The research leading to these results has received funding from the
|
239
|
+
European Community's Seventh Framework Programme [FP7/2007-2013] under
|
240
|
+
grant agreement n° 288777 and 610402.
|
@@ -70,6 +70,7 @@ EOF
|
|
70
70
|
register_clause(:flush_list, :option_list)
|
71
71
|
register_clause(:threadprivate_list, :option_list)
|
72
72
|
register_clause(:if, :simple)
|
73
|
+
register_clause(:final, :simple)
|
73
74
|
register_clause(:num_threads, :simple)
|
74
75
|
register_clause(:default, :simple)
|
75
76
|
register_clause(:collapse, :simple)
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'rake'
|
3
|
+
require 'tempfile'
|
4
|
+
require 'rbconfig'
|
5
|
+
require 'systemu'
|
6
|
+
require 'yaml'
|
7
|
+
require 'pathname'
|
8
|
+
require 'os'
|
9
|
+
|
10
|
+
module BOAST
|
11
|
+
|
12
|
+
class CKernel
|
13
|
+
include Compilers
|
14
|
+
include Rake::DSL
|
15
|
+
include Inspectable
|
16
|
+
include PrivateStateAccessor
|
17
|
+
include TypeTransition
|
18
|
+
|
19
|
+
attr_accessor :code
|
20
|
+
attr_accessor :procedure
|
21
|
+
attr_accessor :lang
|
22
|
+
attr_accessor :binary
|
23
|
+
attr_accessor :kernels
|
24
|
+
attr_accessor :cost_function
|
25
|
+
|
26
|
+
def initialize(options={})
|
27
|
+
if options[:code] then
|
28
|
+
@code = options[:code]
|
29
|
+
elsif get_chain_code
|
30
|
+
@code = get_output
|
31
|
+
@code.seek(0,SEEK_END)
|
32
|
+
else
|
33
|
+
@code = StringIO::new
|
34
|
+
end
|
35
|
+
set_output(@code)
|
36
|
+
if options[:kernels] then
|
37
|
+
@kernels = options[:kernels]
|
38
|
+
else
|
39
|
+
@kernels = []
|
40
|
+
end
|
41
|
+
if options[:lang] then
|
42
|
+
@lang = options[:lang]
|
43
|
+
else
|
44
|
+
@lang = get_lang
|
45
|
+
end
|
46
|
+
if options[:architecture] then
|
47
|
+
@architecture = options[:architecture]
|
48
|
+
else
|
49
|
+
@architecture = get_architecture
|
50
|
+
end
|
51
|
+
@probes = [TimerProbe, PAPIProbe]
|
52
|
+
|
53
|
+
case @lang
|
54
|
+
when CL
|
55
|
+
extend OpenCLRuntime
|
56
|
+
when CUDA
|
57
|
+
extend CUDARuntime
|
58
|
+
when FORTRAN
|
59
|
+
extend FORTRANRuntime
|
60
|
+
extend FFIRuntime if ffi?
|
61
|
+
else
|
62
|
+
if @architecture == MPPA then
|
63
|
+
extend MPPARuntime
|
64
|
+
else
|
65
|
+
extend CRuntime
|
66
|
+
extend FFIRuntime if ffi?
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def print
|
72
|
+
@code.rewind
|
73
|
+
puts @code.read
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_s
|
77
|
+
@code.rewind
|
78
|
+
return code.read
|
79
|
+
end
|
80
|
+
|
81
|
+
def method_missing(meth, *args, &block)
|
82
|
+
if meth.to_s == "run" then
|
83
|
+
build
|
84
|
+
run(*args,&block)
|
85
|
+
else
|
86
|
+
super
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def cost(*args)
|
91
|
+
@cost_function.call(*args)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module BOAST
|
2
|
+
|
3
|
+
module CRuntime
|
4
|
+
include CompiledRuntime
|
5
|
+
|
6
|
+
def fill_library_header
|
7
|
+
get_output.puts "#include <inttypes.h>"
|
8
|
+
end
|
9
|
+
|
10
|
+
def fill_library_source
|
11
|
+
fill_library_header
|
12
|
+
@code.rewind
|
13
|
+
get_output.write @code.read
|
14
|
+
end
|
15
|
+
|
16
|
+
def create_procedure_call_parameters
|
17
|
+
params = []
|
18
|
+
@procedure.parameters.each { |param|
|
19
|
+
if param.dimension then
|
20
|
+
params.push( param.name )
|
21
|
+
elsif param.direction == :out or param.direction == :inout then
|
22
|
+
params.push( "&"+param.name )
|
23
|
+
else
|
24
|
+
params.push( param.name )
|
25
|
+
end
|
26
|
+
}
|
27
|
+
return params
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,158 @@
|
|
1
|
+
module BOAST
|
2
|
+
|
3
|
+
module CUDARuntime
|
4
|
+
include CRuntime
|
5
|
+
|
6
|
+
alias fill_library_source_old fill_library_source
|
7
|
+
alias fill_library_header_old fill_library_header
|
8
|
+
alias fill_module_header_old fill_module_header
|
9
|
+
alias get_params_value_old get_params_value
|
10
|
+
alias fill_decl_module_params_old fill_decl_module_params
|
11
|
+
alias create_procedure_call_parameters_old create_procedure_call_parameters
|
12
|
+
|
13
|
+
def fill_module_header
|
14
|
+
fill_module_header_old
|
15
|
+
get_output.puts "#include <cuda_runtime.h>"
|
16
|
+
end
|
17
|
+
|
18
|
+
def fill_library_header
|
19
|
+
fill_library_header_old
|
20
|
+
get_output.puts "#include <cuda.h>"
|
21
|
+
end
|
22
|
+
|
23
|
+
def fill_library_source
|
24
|
+
fill_library_source_old
|
25
|
+
get_output.write <<EOF
|
26
|
+
extern "C" {
|
27
|
+
#{@procedure.boast_header_s(CUDA)}{
|
28
|
+
dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
|
29
|
+
dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
|
30
|
+
cudaEvent_t __start, __stop;
|
31
|
+
float __time;
|
32
|
+
cudaEventCreate(&__start);
|
33
|
+
cudaEventCreate(&__stop);
|
34
|
+
cudaEventRecord(__start, 0);
|
35
|
+
#{@procedure.name}<<<dimGrid,dimBlock>>>(#{@procedure.parameters.join(", ")});
|
36
|
+
cudaEventRecord(__stop, 0);
|
37
|
+
cudaEventSynchronize(__stop);
|
38
|
+
cudaEventElapsedTime(&__time, __start, __stop);
|
39
|
+
return (unsigned long long int)((double)__time*(double)1e6);
|
40
|
+
}
|
41
|
+
}
|
42
|
+
EOF
|
43
|
+
end
|
44
|
+
|
45
|
+
def copy_array_param_from_ruby( param, ruby_param )
|
46
|
+
rb_ptr = Variable::new("_boast_rb_ptr", CustomType, :type_name => "VALUE")
|
47
|
+
(rb_ptr === ruby_param).pr
|
48
|
+
get_output.print <<EOF
|
49
|
+
if ( IsNArray(_boast_rb_ptr) ) {
|
50
|
+
struct NARRAY *_boast_n_ary;
|
51
|
+
size_t _boast_array_size;
|
52
|
+
Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
|
53
|
+
_boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
|
54
|
+
cudaMalloc( (void **) &#{param}, _boast_array_size);
|
55
|
+
cudaMemcpy(#{param}, (void *) _boast_n_ary->ptr, _boast_array_size, cudaMemcpyHostToDevice);
|
56
|
+
} else {
|
57
|
+
rb_raise(rb_eArgError, "Wrong type of argument for %s, expecting array!", "#{param}");
|
58
|
+
}
|
59
|
+
EOF
|
60
|
+
end
|
61
|
+
|
62
|
+
def fill_decl_module_params
|
63
|
+
fill_decl_module_params_old
|
64
|
+
get_output.print <<EOF
|
65
|
+
size_t _boast_block_size[3] = {1,1,1};
|
66
|
+
size_t _boast_block_number[3] = {1,1,1};
|
67
|
+
EOF
|
68
|
+
end
|
69
|
+
|
70
|
+
def get_params_value
|
71
|
+
get_params_value_old
|
72
|
+
get_output.print <<EOF
|
73
|
+
if( _boast_rb_opts != Qnil ) {
|
74
|
+
VALUE _boast_rb_array_data = Qnil;
|
75
|
+
int _boast_i;
|
76
|
+
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_size")));
|
77
|
+
if( _boast_rb_ptr != Qnil ) {
|
78
|
+
if (TYPE(_boast_rb_ptr) != T_ARRAY)
|
79
|
+
rb_raise(rb_eArgError, "Cuda option block_size should be an array");
|
80
|
+
for(_boast_i=0; _boast_i<3; _boast_i++) {
|
81
|
+
_boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
|
82
|
+
if( _boast_rb_array_data != Qnil )
|
83
|
+
_boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
|
84
|
+
}
|
85
|
+
} else {
|
86
|
+
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("local_work_size")));
|
87
|
+
if( _boast_rb_ptr != Qnil ) {
|
88
|
+
if (TYPE(_boast_rb_ptr) != T_ARRAY)
|
89
|
+
rb_raise(rb_eArgError, "Cuda option local_work_size should be an array");
|
90
|
+
for(_boast_i=0; _boast_i<3; _boast_i++) {
|
91
|
+
_boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
|
92
|
+
if( _boast_rb_array_data != Qnil )
|
93
|
+
_boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
|
94
|
+
}
|
95
|
+
}
|
96
|
+
}
|
97
|
+
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_number")));
|
98
|
+
if( _boast_rb_ptr != Qnil ) {
|
99
|
+
if (TYPE(_boast_rb_ptr) != T_ARRAY)
|
100
|
+
rb_raise(rb_eArgError, "Cuda option block_number should be an array");
|
101
|
+
for(_boast_i=0; _boast_i<3; _boast_i++) {
|
102
|
+
_boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
|
103
|
+
if( _boast_rb_array_data != Qnil )
|
104
|
+
_boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
|
105
|
+
}
|
106
|
+
} else {
|
107
|
+
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("global_work_size")));
|
108
|
+
if( _boast_rb_ptr != Qnil ) {
|
109
|
+
if (TYPE(_boast_rb_ptr) != T_ARRAY)
|
110
|
+
rb_raise(rb_eArgError, "Cuda option global_work_size should be an array");
|
111
|
+
for(_boast_i=0; _boast_i<3; _boast_i++) {
|
112
|
+
_boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
|
113
|
+
if( _boast_rb_array_data != Qnil )
|
114
|
+
_boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data ) / _boast_block_size[_boast_i];
|
115
|
+
}
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
EOF
|
120
|
+
end
|
121
|
+
|
122
|
+
def create_procedure_call_parameters
|
123
|
+
return create_procedure_call_parameters_old + ["_boast_block_number", "_boast_block_size"]
|
124
|
+
end
|
125
|
+
|
126
|
+
def create_procedure_call
|
127
|
+
get_output.print " #{TimerProbe::RESULT} = "
|
128
|
+
get_output.print " #{method_name}_wrapper( "
|
129
|
+
get_output.print create_procedure_call_parameters.join(", ")
|
130
|
+
get_output.puts " );"
|
131
|
+
end
|
132
|
+
|
133
|
+
def copy_array_param_to_ruby(param, ruby_param)
|
134
|
+
rb_ptr = Variable::new("_boast_rb_ptr", CustomType, :type_name => "VALUE")
|
135
|
+
(rb_ptr === ruby_param).pr
|
136
|
+
get_output.print <<EOF
|
137
|
+
if ( IsNArray(_boast_rb_ptr) ) {
|
138
|
+
EOF
|
139
|
+
if param.direction == :out or param.direction == :inout then
|
140
|
+
get_output.print <<EOF
|
141
|
+
struct NARRAY *_boast_n_ary;
|
142
|
+
size_t _boast_array_size;
|
143
|
+
Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
|
144
|
+
_boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
|
145
|
+
cudaMemcpy((void *) _boast_n_ary->ptr, #{param}, _boast_array_size, cudaMemcpyDeviceToHost);
|
146
|
+
EOF
|
147
|
+
end
|
148
|
+
get_output.print <<EOF
|
149
|
+
cudaFree( (void *) #{param});
|
150
|
+
} else {
|
151
|
+
rb_raise(rb_eArgError, "Wrong type of argument for %s, expecting array!", "#{param}");
|
152
|
+
}
|
153
|
+
EOF
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
end
|