BOAST 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BOAST.gemspec +2 -31
- data/README.md +240 -0
- data/lib/BOAST/{OpenMP.rb → Language/OpenMP.rb} +1 -0
- data/lib/BOAST/{Variable.rb → Language/Variable.rb} +2 -1
- data/lib/BOAST/Runtime/CKernel.rb +94 -0
- data/lib/BOAST/Runtime/CRuntime.rb +32 -0
- data/lib/BOAST/Runtime/CUDARuntime.rb +158 -0
- data/lib/BOAST/Runtime/CompiledRuntime.rb +398 -0
- data/lib/BOAST/Runtime/Compilers.rb +205 -0
- data/lib/BOAST/Runtime/Config.rb +94 -0
- data/lib/BOAST/Runtime/FFIRuntime.rb +104 -0
- data/lib/BOAST/Runtime/FORTRANRuntime.rb +45 -0
- data/lib/BOAST/Runtime/MPPARuntime.rb +464 -0
- data/lib/BOAST/Runtime/NonRegression.rb +157 -0
- data/lib/BOAST/Runtime/OpenCLRuntime.rb +181 -0
- data/lib/BOAST/Runtime/Probe.rb +136 -0
- data/lib/BOAST.rb +37 -26
- metadata +40 -28
- data/lib/BOAST/CKernel.rb +0 -1236
- /data/lib/BOAST/{Algorithm.rb → Language/Algorithm.rb} +0 -0
- /data/lib/BOAST/{Arithmetic.rb → Language/Arithmetic.rb} +0 -0
- /data/lib/BOAST/{BOAST_OpenCL.rb → Language/BOAST_OpenCL.rb} +0 -0
- /data/lib/BOAST/{Case.rb → Language/Case.rb} +0 -0
- /data/lib/BOAST/{ControlStructure.rb → Language/ControlStructure.rb} +0 -0
- /data/lib/BOAST/{DataTypes.rb → Language/DataTypes.rb} +0 -0
- /data/lib/BOAST/{Expression.rb → Language/Expression.rb} +0 -0
- /data/lib/BOAST/{For.rb → Language/For.rb} +0 -0
- /data/lib/BOAST/{FuncCall.rb → Language/FuncCall.rb} +0 -0
- /data/lib/BOAST/{Functors.rb → Language/Functors.rb} +0 -0
- /data/lib/BOAST/{If.rb → Language/If.rb} +0 -0
- /data/lib/BOAST/{Index.rb → Language/Index.rb} +0 -0
- /data/lib/BOAST/{Inspectable.rb → Language/Inspectable.rb} +0 -0
- /data/lib/BOAST/{Operators.rb → Language/Operators.rb} +0 -0
- /data/lib/BOAST/{Optimization.rb → Language/Optimization.rb} +0 -0
- /data/lib/BOAST/{Parens.rb → Language/Parens.rb} +0 -0
- /data/lib/BOAST/{Pragma.rb → Language/Pragma.rb} +0 -0
- /data/lib/BOAST/{Print.rb → Language/Print.rb} +0 -0
- /data/lib/BOAST/{Procedure.rb → Language/Procedure.rb} +0 -0
- /data/lib/BOAST/{Slice.rb → Language/Slice.rb} +0 -0
- /data/lib/BOAST/{State.rb → Language/State.rb} +0 -0
- /data/lib/BOAST/{Transitions.rb → Language/Transitions.rb} +0 -0
- /data/lib/BOAST/{While.rb → Language/While.rb} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5eb4ec74e14155954a1ecc7c3004e79088160c4
|
4
|
+
data.tar.gz: a4226683ccc72753105a61c2af6a9ddb0ce69bc3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f1ac4576270947a645a2b8878f771d0c2dde7974d963b6c57a29eda1dc7a48be24003dc6df38f05e6f17fa9689ebf820634c0007495fecbe4c75bfddec8f5f0
|
7
|
+
data.tar.gz: fc98fd3ce8322e5c995167f043170ef42b61b49b14409d1c66d7cdea0a019e19cbef085012f8341f4add6b254b28e6d660094ffcf65676acdc3316b0bcc7051c
|
data/BOAST.gemspec
CHANGED
@@ -1,41 +1,12 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'BOAST'
|
3
|
-
s.version = "1.0.
|
3
|
+
s.version = "1.0.4"
|
4
4
|
s.author = "Brice Videau"
|
5
5
|
s.email = "brice.videau@imag.fr"
|
6
6
|
s.homepage = "https://github.com/Nanosim-LIG/boast"
|
7
7
|
s.summary = "BOAST is a computing kernel metaprogramming tool."
|
8
8
|
s.description = "BOAST aims at providing a framework to metaprogram, benchmark and validate computing kernels"
|
9
|
-
s.files =
|
10
|
-
LICENSE
|
11
|
-
lib/BOAST.rb
|
12
|
-
lib/BOAST/Algorithm.rb
|
13
|
-
lib/BOAST/CKernel.rb
|
14
|
-
lib/BOAST/BOAST_OpenCL.rb
|
15
|
-
lib/BOAST/Transitions.rb
|
16
|
-
lib/BOAST/Parens.rb
|
17
|
-
lib/BOAST/Operators.rb
|
18
|
-
lib/BOAST/DataTypes.rb
|
19
|
-
lib/BOAST/Arithmetic.rb
|
20
|
-
lib/BOAST/Expression.rb
|
21
|
-
lib/BOAST/Index.rb
|
22
|
-
lib/BOAST/Variable.rb
|
23
|
-
lib/BOAST/Procedure.rb
|
24
|
-
lib/BOAST/If.rb
|
25
|
-
lib/BOAST/For.rb
|
26
|
-
lib/BOAST/Case.rb
|
27
|
-
lib/BOAST/While.rb
|
28
|
-
lib/BOAST/FuncCall.rb
|
29
|
-
lib/BOAST/Pragma.rb
|
30
|
-
lib/BOAST/Inspectable.rb
|
31
|
-
lib/BOAST/Functors.rb
|
32
|
-
lib/BOAST/ControlStructure.rb
|
33
|
-
lib/BOAST/Print.rb
|
34
|
-
lib/BOAST/State.rb
|
35
|
-
lib/BOAST/Optimization.rb
|
36
|
-
lib/BOAST/OpenMP.rb
|
37
|
-
lib/BOAST/Slice.rb
|
38
|
-
)
|
9
|
+
s.files = Dir['BOAST.gemspec', 'LICENSE', 'README.md', 'lib/**/*']
|
39
10
|
s.has_rdoc = true
|
40
11
|
s.license = 'BSD'
|
41
12
|
s.required_ruby_version = '>= 1.9.3'
|
data/README.md
ADDED
@@ -0,0 +1,240 @@
|
|
1
|
+
BOAST
|
2
|
+
=====
|
3
|
+
|
4
|
+
This section will present some simple examples to familiarize the user
|
5
|
+
with BOAST. More samples can be found in the git repository.
|
6
|
+
|
7
|
+
Installation
|
8
|
+
------------
|
9
|
+
|
10
|
+
BOAST is ruby based, so ruby needs to be installed on the machine.
|
11
|
+
Installation of boast can be done using the ruby built-in package
|
12
|
+
manager: *gem*. See following Listing for reference.
|
13
|
+
|
14
|
+
```bash
|
15
|
+
sudo apt-get install ruby ruby-dev
|
16
|
+
gem install --user-install BOAST
|
17
|
+
```
|
18
|
+
|
19
|
+
Variable and Procedure Declaration
|
20
|
+
----------------------------------
|
21
|
+
|
22
|
+
The following samples are presented using *irb* ruby interactive interpreter.
|
23
|
+
It can be launched using the *irb* command in a terminal. Following
|
24
|
+
Listing shows the declaration of two variables of different kind.
|
25
|
+
|
26
|
+
irb(main):001:0> require 'BOAST'
|
27
|
+
=> true
|
28
|
+
irb(main):002:0> a = BOAST::Int "a"
|
29
|
+
=> a
|
30
|
+
irb(main):003:0> b = BOAST::Real "b"
|
31
|
+
=> b
|
32
|
+
irb(main):004:0> BOAST::decl a, b
|
33
|
+
integer(kind=4) :: a
|
34
|
+
real(kind=8) :: b
|
35
|
+
=> [a, b]
|
36
|
+
|
37
|
+
Following Listing shows the declaration of a procedure using the two previous
|
38
|
+
variables as parameters. For clarity irb echoes have been suppressed.
|
39
|
+
|
40
|
+
005:0> p = BOAST::Procedure( "test_proc", [a,b] )
|
41
|
+
006:0> BOAST::opn p
|
42
|
+
SUBROUTINE test_proc(a, b)
|
43
|
+
integer, parameter :: wp=kind(1.0d0)
|
44
|
+
integer(kind=4) :: a
|
45
|
+
real(kind=8) :: b
|
46
|
+
007:0> BOAST::close p
|
47
|
+
END SUBROUTINE test_proc
|
48
|
+
|
49
|
+
Switching Language
|
50
|
+
------------------
|
51
|
+
|
52
|
+
Following Listing shows how to switch BOAST to C. Available languages are
|
53
|
+
*FORTRAN*, *C*, *CUDA* and *CL*.
|
54
|
+
|
55
|
+
008:0> BOAST::lang = BOAST::C
|
56
|
+
009:0> BOAST::opn p
|
57
|
+
void test_proc(int32_t a, double b){
|
58
|
+
010:0> BOAST::close p
|
59
|
+
}
|
60
|
+
|
61
|
+
Defining a Complete Procedure
|
62
|
+
-----------------------------
|
63
|
+
|
64
|
+
Following Listing shows how to define a procedure and the associated code. Note
|
65
|
+
that here the parameters of the procedure have been associated a direction:
|
66
|
+
one, *a*, is an input parameter while the other, *b*, is an output parameter.
|
67
|
+
|
68
|
+
011:0> BOAST::lang = BOAST::FORTRAN
|
69
|
+
012:0> a = BOAST::Real( "a", :dir => :in)
|
70
|
+
013:0> b = BOAST::Real( "b", :dir => :out)
|
71
|
+
014:0> p = BOAST::Procedure( "plus_two", [a,b] ) {
|
72
|
+
015:1* BOAST::pr b === a + 2
|
73
|
+
016:1> }
|
74
|
+
017:0> BOAST::pr p
|
75
|
+
SUBROUTINE plus_two(a, b)
|
76
|
+
integer, parameter :: wp=kind(1.0d0)
|
77
|
+
real(kind=8), intent(in) :: a
|
78
|
+
real(kind=8), intent(out) :: b
|
79
|
+
b = a + 2
|
80
|
+
END SUBROUTINE plus_two
|
81
|
+
018:0> BOAST::lang = BOAST::C
|
82
|
+
019:0> BOAST::pr p
|
83
|
+
void plus_two(const double a, double * b){
|
84
|
+
(*b) = a + 2;
|
85
|
+
}
|
86
|
+
|
87
|
+
Creating, Building and Running a Computing Kernel
|
88
|
+
-------------------------------------------------
|
89
|
+
|
90
|
+
Following Listing shows how to create a Computing kernel (*CKernel*) and build
|
91
|
+
it. Once a computing kernel is instantiated the output of BOAST will be
|
92
|
+
redirected to the computing kernel source code. Line 4 sets the entry point of
|
93
|
+
the computing kernel to the procedure we just defined. By default compilation
|
94
|
+
commands are not shown unless an error occurs. This behavior can be changed by
|
95
|
+
switching to verbose mode.
|
96
|
+
|
97
|
+
When running the kernel all the arguments have to be specified. Running
|
98
|
+
a kernel returns a hash table containing information about the procedure
|
99
|
+
execution. In this simple case two informations are returned, first the
|
100
|
+
value of the output parameter *b* and second the time the kernel
|
101
|
+
execution took.
|
102
|
+
|
103
|
+
020:0> BOAST::lang = BOAST::FORTRAN
|
104
|
+
021:0> k = BOAST::CKernel::new
|
105
|
+
022:0> BOAST::pr p
|
106
|
+
023:0> k.procedure = p
|
107
|
+
024:0> puts k
|
108
|
+
SUBROUTINE plus_two(a, b)
|
109
|
+
integer, parameter :: wp=kind(1.0d0)
|
110
|
+
real(kind=8), intent(in) :: a
|
111
|
+
real(kind=8), intent(out) :: b
|
112
|
+
b = a + 2
|
113
|
+
END SUBROUTINE plus_two
|
114
|
+
025:0> k.build
|
115
|
+
026:0> BOAST::verbose = true
|
116
|
+
027:0> k.build
|
117
|
+
gcc -O2 -Wall -fPIC -I/usr/lib/x86_64-linux-gnu/ruby/2.1.0 -I/usr/include/ruby-2.1.0 -I/usr/include/ruby-2.1.0/x86_64-linux-gnu -I/usr/include/x86_64-linux-gnu/ruby-2.1.0 -I/var/lib/gems/2.1.0/gems/narray-0.6.1.1 -DHAVE_NARRAY_H -c -o /tmp/Mod_plus_two20150309_4611_5a129k.o /tmp/Mod_plus_two20150309_4611_5a129k.c
|
118
|
+
gfortran -O2 -Wall -fPIC -c -o /tmp/plus_two20150309-4611-5a129k.o /tmp/plus_two20150309-4611-5a129k.f90
|
119
|
+
gcc -shared -o /tmp/Mod_plus_two20150309_4611_5a129k.so /tmp/Mod_plus_two20150309_4611_5a129k.o /tmp/plus_two20150309-4611-5a129k.o -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic -L/usr/lib -lruby-2.1 -lrt
|
120
|
+
028:0> r = k.run(5,0)
|
121
|
+
029:0> puts r
|
122
|
+
{:reference_return=>{:b=>7.0}, :duration=>5.84e-07}
|
123
|
+
|
124
|
+
Using Arrays in Procedures
|
125
|
+
--------------------------
|
126
|
+
|
127
|
+
Most computing kernels don't work on scalar values but rather on arrays
|
128
|
+
of data. Following Listing shows how to use arrays in computing
|
129
|
+
kernels. In this case we place ourselves in BOAST namespace to reduce
|
130
|
+
the syntax overhead. Variables *a* and *b* are one-dimensional arrays of
|
131
|
+
size *n*. Arrays in BOAST start at index 1 unless specified otherwise.
|
132
|
+
For instance `Dim(0,n-1)` would have created a dimension starting at 0.
|
133
|
+
Array bounds can also be negative and several dimensions can be
|
134
|
+
specified to obtain muti-dimensional arrays. For self contained
|
135
|
+
procedures/kernels one can use the shortcut written on line 13 to create
|
136
|
+
a CKernel object. As we are not specifying build options the build
|
137
|
+
command can also be omitted and will be automatically called when
|
138
|
+
running the kernel the first time. Lines 17 to 19 are used to check the
|
139
|
+
result of the kernel.
|
140
|
+
|
141
|
+
001:0> require 'BOAST'
|
142
|
+
002:0> require 'narray'
|
143
|
+
003:0> include BOAST
|
144
|
+
004:0> n = Int( "n", :dir => :in )
|
145
|
+
005:0> a = Real( "a", :dir => :in, :dim => [Dim(n)] )
|
146
|
+
006:0> b = Real( "b", :dir => :out, :dim => [Dim(n)] )
|
147
|
+
007:0> p = Procedure( "plus_two", [n, a, b] ) {
|
148
|
+
008:1* decl i = Int( "i" )
|
149
|
+
009:1> pr For( i, 1, n ) {
|
150
|
+
010:2* pr b[i] === a[i] + 2.0
|
151
|
+
011:2> }
|
152
|
+
012:1> }
|
153
|
+
013:0> k = p.ckernel
|
154
|
+
014:0> input = NArray.float(1024).random
|
155
|
+
015:0> output = NArray.float(1024)
|
156
|
+
016:0> k.run(input.length, input, output)
|
157
|
+
017:0> (output - input).each { |val|
|
158
|
+
018:1* raise "Error!" if (val-2).abs > 1e-15
|
159
|
+
019:1> }
|
160
|
+
020:0> stats = k.run(input.length, input, output)
|
161
|
+
021:0> puts "Success, duration: #{stats[:duration]} s"
|
162
|
+
Success, duration: 3.79e-06 s
|
163
|
+
|
164
|
+
The Canonical Case: Vector Addition
|
165
|
+
-----------------------------------
|
166
|
+
|
167
|
+
Following Listing shows the addition of two vectors in a third one. Here BOAST
|
168
|
+
is configured to have arrays starting at 0 and to use single precision reals by
|
169
|
+
default (Lines 5 and 6). The kernel declaration is encapsulated inside a method
|
170
|
+
to avoid cluttering the global namespace. Line 15 the expression `c[i] === a[i]+ b[i]`
|
171
|
+
is stored inside a variable *expr* for later use. Lines 16 to 23 show
|
172
|
+
that the kernel differs depending on the target language, in CUDA and OpenCL
|
173
|
+
each thread will process one element.
|
174
|
+
|
175
|
+
```ruby
|
176
|
+
require 'narray'
|
177
|
+
require 'BOAST'
|
178
|
+
include BOAST
|
179
|
+
|
180
|
+
set_array_start(0)
|
181
|
+
set_default_real_size(4)
|
182
|
+
|
183
|
+
def vector_add
|
184
|
+
n = Int("n",:dir => :in)
|
185
|
+
a = Real("a",:dir => :in, :dim => [ Dim(n)] )
|
186
|
+
b = Real("b",:dir => :in, :dim => [ Dim(n)] )
|
187
|
+
c = Real("c",:dir => :out, :dim => [ Dim(n)] )
|
188
|
+
p = Procedure("vector_add", [n,a,b,c]) {
|
189
|
+
decl i = Int("i")
|
190
|
+
expr = c[i] === a[i] + b[i]
|
191
|
+
if (get_lang == CL or get_lang == CUDA) then
|
192
|
+
pr i === get_global_id(0)
|
193
|
+
pr expr
|
194
|
+
else
|
195
|
+
pr For(i,0,n-1) {
|
196
|
+
pr expr
|
197
|
+
}
|
198
|
+
end
|
199
|
+
}
|
200
|
+
return p.ckernel
|
201
|
+
end
|
202
|
+
```
|
203
|
+
|
204
|
+
Following Listing shows the a way to check the validity of the previous kernel
|
205
|
+
over the available range of languages. The options that are passed to run are
|
206
|
+
only relevant for GPU languages and are thus ignored in FORTRAN and C
|
207
|
+
(Line 16). Success is only printed if results are validated, else an exception
|
208
|
+
is raised (Lines 17 to 20).
|
209
|
+
|
210
|
+
```ruby
|
211
|
+
n = 1024*1024
|
212
|
+
a = NArray.sfloat(n).random
|
213
|
+
b = NArray.sfloat(n).random
|
214
|
+
c = NArray.sfloat(n)
|
215
|
+
|
216
|
+
epsilon = 10e-15
|
217
|
+
|
218
|
+
c_ref = a + b
|
219
|
+
|
220
|
+
[:FORTRAN, :C, :CL, :CUDA].each { |l|
|
221
|
+
set_lang( BOAST.const_get(l) )
|
222
|
+
puts "#{l}:"
|
223
|
+
k = vector_add
|
224
|
+
puts k.print
|
225
|
+
c.random!
|
226
|
+
k.run(n, a, b, c, :global_work_size => [n,1,1], :local_work_size => [32,1,1])
|
227
|
+
diff = (c_ref - c).abs
|
228
|
+
diff.each { |elem|
|
229
|
+
raise "Warning: residue too big: #{elem}" if elem > epsilon
|
230
|
+
}
|
231
|
+
}
|
232
|
+
puts "Success!"
|
233
|
+
```
|
234
|
+
|
235
|
+
Acknowledgment
|
236
|
+
--------------
|
237
|
+
|
238
|
+
The research leading to these results has received funding from the
|
239
|
+
European Community's Seventh Framework Programme [FP7/2007-2013] under
|
240
|
+
grant agreement n° 288777 and 610402.
|
@@ -70,6 +70,7 @@ EOF
|
|
70
70
|
register_clause(:flush_list, :option_list)
|
71
71
|
register_clause(:threadprivate_list, :option_list)
|
72
72
|
register_clause(:if, :simple)
|
73
|
+
register_clause(:final, :simple)
|
73
74
|
register_clause(:num_threads, :simple)
|
74
75
|
register_clause(:default, :simple)
|
75
76
|
register_clause(:collapse, :simple)
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'rake'
|
3
|
+
require 'tempfile'
|
4
|
+
require 'rbconfig'
|
5
|
+
require 'systemu'
|
6
|
+
require 'yaml'
|
7
|
+
require 'pathname'
|
8
|
+
require 'os'
|
9
|
+
|
10
|
+
module BOAST
|
11
|
+
|
12
|
+
class CKernel
|
13
|
+
include Compilers
|
14
|
+
include Rake::DSL
|
15
|
+
include Inspectable
|
16
|
+
include PrivateStateAccessor
|
17
|
+
include TypeTransition
|
18
|
+
|
19
|
+
attr_accessor :code
|
20
|
+
attr_accessor :procedure
|
21
|
+
attr_accessor :lang
|
22
|
+
attr_accessor :binary
|
23
|
+
attr_accessor :kernels
|
24
|
+
attr_accessor :cost_function
|
25
|
+
|
26
|
+
def initialize(options={})
|
27
|
+
if options[:code] then
|
28
|
+
@code = options[:code]
|
29
|
+
elsif get_chain_code
|
30
|
+
@code = get_output
|
31
|
+
@code.seek(0,SEEK_END)
|
32
|
+
else
|
33
|
+
@code = StringIO::new
|
34
|
+
end
|
35
|
+
set_output(@code)
|
36
|
+
if options[:kernels] then
|
37
|
+
@kernels = options[:kernels]
|
38
|
+
else
|
39
|
+
@kernels = []
|
40
|
+
end
|
41
|
+
if options[:lang] then
|
42
|
+
@lang = options[:lang]
|
43
|
+
else
|
44
|
+
@lang = get_lang
|
45
|
+
end
|
46
|
+
if options[:architecture] then
|
47
|
+
@architecture = options[:architecture]
|
48
|
+
else
|
49
|
+
@architecture = get_architecture
|
50
|
+
end
|
51
|
+
@probes = [TimerProbe, PAPIProbe]
|
52
|
+
|
53
|
+
case @lang
|
54
|
+
when CL
|
55
|
+
extend OpenCLRuntime
|
56
|
+
when CUDA
|
57
|
+
extend CUDARuntime
|
58
|
+
when FORTRAN
|
59
|
+
extend FORTRANRuntime
|
60
|
+
extend FFIRuntime if ffi?
|
61
|
+
else
|
62
|
+
if @architecture == MPPA then
|
63
|
+
extend MPPARuntime
|
64
|
+
else
|
65
|
+
extend CRuntime
|
66
|
+
extend FFIRuntime if ffi?
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def print
|
72
|
+
@code.rewind
|
73
|
+
puts @code.read
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_s
|
77
|
+
@code.rewind
|
78
|
+
return code.read
|
79
|
+
end
|
80
|
+
|
81
|
+
def method_missing(meth, *args, &block)
|
82
|
+
if meth.to_s == "run" then
|
83
|
+
build
|
84
|
+
run(*args,&block)
|
85
|
+
else
|
86
|
+
super
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def cost(*args)
|
91
|
+
@cost_function.call(*args)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module BOAST
|
2
|
+
|
3
|
+
module CRuntime
|
4
|
+
include CompiledRuntime
|
5
|
+
|
6
|
+
def fill_library_header
|
7
|
+
get_output.puts "#include <inttypes.h>"
|
8
|
+
end
|
9
|
+
|
10
|
+
def fill_library_source
|
11
|
+
fill_library_header
|
12
|
+
@code.rewind
|
13
|
+
get_output.write @code.read
|
14
|
+
end
|
15
|
+
|
16
|
+
def create_procedure_call_parameters
|
17
|
+
params = []
|
18
|
+
@procedure.parameters.each { |param|
|
19
|
+
if param.dimension then
|
20
|
+
params.push( param.name )
|
21
|
+
elsif param.direction == :out or param.direction == :inout then
|
22
|
+
params.push( "&"+param.name )
|
23
|
+
else
|
24
|
+
params.push( param.name )
|
25
|
+
end
|
26
|
+
}
|
27
|
+
return params
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,158 @@
|
|
1
|
+
module BOAST
|
2
|
+
|
3
|
+
module CUDARuntime
|
4
|
+
include CRuntime
|
5
|
+
|
6
|
+
alias fill_library_source_old fill_library_source
|
7
|
+
alias fill_library_header_old fill_library_header
|
8
|
+
alias fill_module_header_old fill_module_header
|
9
|
+
alias get_params_value_old get_params_value
|
10
|
+
alias fill_decl_module_params_old fill_decl_module_params
|
11
|
+
alias create_procedure_call_parameters_old create_procedure_call_parameters
|
12
|
+
|
13
|
+
def fill_module_header
|
14
|
+
fill_module_header_old
|
15
|
+
get_output.puts "#include <cuda_runtime.h>"
|
16
|
+
end
|
17
|
+
|
18
|
+
def fill_library_header
|
19
|
+
fill_library_header_old
|
20
|
+
get_output.puts "#include <cuda.h>"
|
21
|
+
end
|
22
|
+
|
23
|
+
def fill_library_source
|
24
|
+
fill_library_source_old
|
25
|
+
get_output.write <<EOF
|
26
|
+
extern "C" {
|
27
|
+
#{@procedure.boast_header_s(CUDA)}{
|
28
|
+
dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
|
29
|
+
dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
|
30
|
+
cudaEvent_t __start, __stop;
|
31
|
+
float __time;
|
32
|
+
cudaEventCreate(&__start);
|
33
|
+
cudaEventCreate(&__stop);
|
34
|
+
cudaEventRecord(__start, 0);
|
35
|
+
#{@procedure.name}<<<dimGrid,dimBlock>>>(#{@procedure.parameters.join(", ")});
|
36
|
+
cudaEventRecord(__stop, 0);
|
37
|
+
cudaEventSynchronize(__stop);
|
38
|
+
cudaEventElapsedTime(&__time, __start, __stop);
|
39
|
+
return (unsigned long long int)((double)__time*(double)1e6);
|
40
|
+
}
|
41
|
+
}
|
42
|
+
EOF
|
43
|
+
end
|
44
|
+
|
45
|
+
def copy_array_param_from_ruby( param, ruby_param )
|
46
|
+
rb_ptr = Variable::new("_boast_rb_ptr", CustomType, :type_name => "VALUE")
|
47
|
+
(rb_ptr === ruby_param).pr
|
48
|
+
get_output.print <<EOF
|
49
|
+
if ( IsNArray(_boast_rb_ptr) ) {
|
50
|
+
struct NARRAY *_boast_n_ary;
|
51
|
+
size_t _boast_array_size;
|
52
|
+
Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
|
53
|
+
_boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
|
54
|
+
cudaMalloc( (void **) &#{param}, _boast_array_size);
|
55
|
+
cudaMemcpy(#{param}, (void *) _boast_n_ary->ptr, _boast_array_size, cudaMemcpyHostToDevice);
|
56
|
+
} else {
|
57
|
+
rb_raise(rb_eArgError, "Wrong type of argument for %s, expecting array!", "#{param}");
|
58
|
+
}
|
59
|
+
EOF
|
60
|
+
end
|
61
|
+
|
62
|
+
def fill_decl_module_params
|
63
|
+
fill_decl_module_params_old
|
64
|
+
get_output.print <<EOF
|
65
|
+
size_t _boast_block_size[3] = {1,1,1};
|
66
|
+
size_t _boast_block_number[3] = {1,1,1};
|
67
|
+
EOF
|
68
|
+
end
|
69
|
+
|
70
|
+
def get_params_value
|
71
|
+
get_params_value_old
|
72
|
+
get_output.print <<EOF
|
73
|
+
if( _boast_rb_opts != Qnil ) {
|
74
|
+
VALUE _boast_rb_array_data = Qnil;
|
75
|
+
int _boast_i;
|
76
|
+
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_size")));
|
77
|
+
if( _boast_rb_ptr != Qnil ) {
|
78
|
+
if (TYPE(_boast_rb_ptr) != T_ARRAY)
|
79
|
+
rb_raise(rb_eArgError, "Cuda option block_size should be an array");
|
80
|
+
for(_boast_i=0; _boast_i<3; _boast_i++) {
|
81
|
+
_boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
|
82
|
+
if( _boast_rb_array_data != Qnil )
|
83
|
+
_boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
|
84
|
+
}
|
85
|
+
} else {
|
86
|
+
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("local_work_size")));
|
87
|
+
if( _boast_rb_ptr != Qnil ) {
|
88
|
+
if (TYPE(_boast_rb_ptr) != T_ARRAY)
|
89
|
+
rb_raise(rb_eArgError, "Cuda option local_work_size should be an array");
|
90
|
+
for(_boast_i=0; _boast_i<3; _boast_i++) {
|
91
|
+
_boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
|
92
|
+
if( _boast_rb_array_data != Qnil )
|
93
|
+
_boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
|
94
|
+
}
|
95
|
+
}
|
96
|
+
}
|
97
|
+
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_number")));
|
98
|
+
if( _boast_rb_ptr != Qnil ) {
|
99
|
+
if (TYPE(_boast_rb_ptr) != T_ARRAY)
|
100
|
+
rb_raise(rb_eArgError, "Cuda option block_number should be an array");
|
101
|
+
for(_boast_i=0; _boast_i<3; _boast_i++) {
|
102
|
+
_boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
|
103
|
+
if( _boast_rb_array_data != Qnil )
|
104
|
+
_boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
|
105
|
+
}
|
106
|
+
} else {
|
107
|
+
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("global_work_size")));
|
108
|
+
if( _boast_rb_ptr != Qnil ) {
|
109
|
+
if (TYPE(_boast_rb_ptr) != T_ARRAY)
|
110
|
+
rb_raise(rb_eArgError, "Cuda option global_work_size should be an array");
|
111
|
+
for(_boast_i=0; _boast_i<3; _boast_i++) {
|
112
|
+
_boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
|
113
|
+
if( _boast_rb_array_data != Qnil )
|
114
|
+
_boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data ) / _boast_block_size[_boast_i];
|
115
|
+
}
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
EOF
|
120
|
+
end
|
121
|
+
|
122
|
+
def create_procedure_call_parameters
|
123
|
+
return create_procedure_call_parameters_old + ["_boast_block_number", "_boast_block_size"]
|
124
|
+
end
|
125
|
+
|
126
|
+
def create_procedure_call
|
127
|
+
get_output.print " #{TimerProbe::RESULT} = "
|
128
|
+
get_output.print " #{method_name}_wrapper( "
|
129
|
+
get_output.print create_procedure_call_parameters.join(", ")
|
130
|
+
get_output.puts " );"
|
131
|
+
end
|
132
|
+
|
133
|
+
def copy_array_param_to_ruby(param, ruby_param)
|
134
|
+
rb_ptr = Variable::new("_boast_rb_ptr", CustomType, :type_name => "VALUE")
|
135
|
+
(rb_ptr === ruby_param).pr
|
136
|
+
get_output.print <<EOF
|
137
|
+
if ( IsNArray(_boast_rb_ptr) ) {
|
138
|
+
EOF
|
139
|
+
if param.direction == :out or param.direction == :inout then
|
140
|
+
get_output.print <<EOF
|
141
|
+
struct NARRAY *_boast_n_ary;
|
142
|
+
size_t _boast_array_size;
|
143
|
+
Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
|
144
|
+
_boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
|
145
|
+
cudaMemcpy((void *) _boast_n_ary->ptr, #{param}, _boast_array_size, cudaMemcpyDeviceToHost);
|
146
|
+
EOF
|
147
|
+
end
|
148
|
+
get_output.print <<EOF
|
149
|
+
cudaFree( (void *) #{param});
|
150
|
+
} else {
|
151
|
+
rb_raise(rb_eArgError, "Wrong type of argument for %s, expecting array!", "#{param}");
|
152
|
+
}
|
153
|
+
EOF
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
end
|