sgc-ruby-cuda 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +2 -0
- data/COPYING +674 -0
- data/README.rdoc +106 -0
- data/Rakefile +76 -0
- data/doc/devel.rdoc +77 -0
- data/doc/features.rdoc +55 -0
- data/lib/cuda/driver/context.rb +236 -0
- data/lib/cuda/driver/cu.rb +60 -0
- data/lib/cuda/driver/device.rb +155 -0
- data/lib/cuda/driver/deviceptr.rb +69 -0
- data/lib/cuda/driver/error.rb +182 -0
- data/lib/cuda/driver/event.rb +124 -0
- data/lib/cuda/driver/ffi-cu.rb +620 -0
- data/lib/cuda/driver/function.rb +293 -0
- data/lib/cuda/driver/init.rb +45 -0
- data/lib/cuda/driver/memory.rb +134 -0
- data/lib/cuda/driver/module.rb +142 -0
- data/lib/cuda/driver/rubycu.rb +37 -0
- data/lib/cuda/driver/stream.rb +128 -0
- data/lib/cuda/driver/version.rb +42 -0
- data/lib/cuda/runtime/cuda.rb +65 -0
- data/lib/cuda/runtime/device.rb +175 -0
- data/lib/cuda/runtime/error.rb +197 -0
- data/lib/cuda/runtime/event.rb +117 -0
- data/lib/cuda/runtime/ffi-cuda.rb +588 -0
- data/lib/cuda/runtime/function.rb +161 -0
- data/lib/cuda/runtime/memory.rb +110 -0
- data/lib/cuda/runtime/rubycuda.rb +34 -0
- data/lib/cuda/runtime/stream.rb +126 -0
- data/lib/cuda/runtime/thread.rb +81 -0
- data/lib/cuda/runtime/version.rb +51 -0
- data/lib/ffi/prettystruct.rb +32 -0
- data/lib/helpers/flags.rb +82 -0
- data/lib/helpers/interface/ienum.rb +45 -0
- data/lib/helpers/klass.rb +45 -0
- data/lib/memory/buffer.rb +125 -0
- data/lib/memory/interface/ibuffer.rb +63 -0
- data/lib/memory/pointer.rb +72 -0
- data/lib/rubycu.rb +1 -0
- data/lib/rubycuda.rb +1 -0
- data/test/bad.ptx +0 -0
- data/test/memory/test_buffer.rb +93 -0
- data/test/rubycu/test_cucontext.rb +148 -0
- data/test/rubycu/test_cudevice.rb +69 -0
- data/test/rubycu/test_cudeviceptr.rb +43 -0
- data/test/rubycu/test_cuevent.rb +81 -0
- data/test/rubycu/test_cufunction.rb +165 -0
- data/test/rubycu/test_cumemory.rb +113 -0
- data/test/rubycu/test_cumodule.rb +114 -0
- data/test/rubycu/test_custream.rb +77 -0
- data/test/rubycu/test_cuversion.rb +39 -0
- data/test/rubycu/testbase.rb +107 -0
- data/test/rubycuda/test_cudadevice.rb +125 -0
- data/test/rubycuda/test_cudaerror.rb +48 -0
- data/test/rubycuda/test_cudaevent.rb +78 -0
- data/test/rubycuda/test_cudafunction.rb +106 -0
- data/test/rubycuda/test_cudamemory.rb +90 -0
- data/test/rubycuda/test_cudastream.rb +72 -0
- data/test/rubycuda/test_cudathread.rb +69 -0
- data/test/rubycuda/test_cudaversion.rb +41 -0
- data/test/rubycuda/testbase.rb +67 -0
- data/test/vadd.cu +21 -0
- data/version.rb +1 -0
- metadata +180 -0
@@ -0,0 +1,293 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'cuda/driver/ffi-cu'
|
26
|
+
require 'cuda/driver/error'
|
27
|
+
|
28
|
+
|
29
|
+
module SGC
|
30
|
+
module CU
|
31
|
+
|
32
|
+
class CUFunction
|
33
|
+
|
34
|
+
# @deprecated Use {#launch_kernel}.
|
35
|
+
#
|
36
|
+
# Set the argument list of subsequent function call to _arg1_, _arg2_, *other_args.
|
37
|
+
# @param *args The list of arguments to pass to the kernel function.
|
38
|
+
def param=(*args)
|
39
|
+
offset = 0
|
40
|
+
args.flatten.each do |x|
|
41
|
+
case x
|
42
|
+
when Fixnum
|
43
|
+
p = FFI::MemoryPointer.new(:int)
|
44
|
+
p.write_int(x)
|
45
|
+
size = 4
|
46
|
+
when Float
|
47
|
+
p = FFI::MemoryPointer.new(:float)
|
48
|
+
p.write_float(x)
|
49
|
+
size = 4
|
50
|
+
when CUDevicePtr
|
51
|
+
p = FFI::MemoryPointer.new(:CUDevicePtr)
|
52
|
+
API::write_cudeviceptr(p, x.to_api.address)
|
53
|
+
size = p.size
|
54
|
+
else
|
55
|
+
raise TypeError, "Invalid type of argument #{x.to_s}."
|
56
|
+
end
|
57
|
+
offset = align_up(offset, size)
|
58
|
+
status = API::cuParamSetv(self.to_api, offset, p, size)
|
59
|
+
Pvt::handle_error(status, "Failed to set function parameters: offset = #{offset}, value = #{x}.")
|
60
|
+
offset += size
|
61
|
+
end
|
62
|
+
|
63
|
+
status = API::cuParamSetSize(self.to_api, offset)
|
64
|
+
Pvt::handle_error(status, "Failed to set function parameter size: size = #{offset}.")
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
# @deprecated Use {#launch_kernel}.
|
69
|
+
#
|
70
|
+
# Set a float parameter to the function's argument list at _offset_ with _value_.
|
71
|
+
# @param [Integer] offset Number of bytes to offset.
|
72
|
+
# @param [Float] value The floating-point value to set as the function parameter.
|
73
|
+
# @return [CUFunction] This function.
|
74
|
+
def param_setf(offset, value)
|
75
|
+
status = API::cuParamSetf(self.to_api, offset, value)
|
76
|
+
Pvt::handle_error(status, "Failed to set function float parameter: offset = #{offset}, value = #{value}.")
|
77
|
+
self
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
# @deprecated Use {#launch_kernel}.
|
82
|
+
#
|
83
|
+
# Set an integer parameter to the function's argument list at _offset_ with _value_.
|
84
|
+
# @param [Integer] offset Number of bytes to offset.
|
85
|
+
# @param [Integer] value The integer value to set as the function parameter.
|
86
|
+
# @return [CUFunction] This function.
|
87
|
+
def param_seti(offset, value)
|
88
|
+
status = API::cuParamSeti(self.to_api, offset, value)
|
89
|
+
Pvt::handle_error(status, "Failed to set function integer parameter: offset = #{offset}, value = #{value}")
|
90
|
+
self
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
# @deprecated Use {#launch_kernel}.
|
95
|
+
#
|
96
|
+
# Set an arbitrary data to the function's argument list at _offset_ with _ptr_ pointed _nbytes_ data.
|
97
|
+
# @param [Integer] offset Number of bytes to offset.
|
98
|
+
# @param [CUDevicePtr] ptr A device pointer pointing to an arbitrary data to be used as the function parameter.
|
99
|
+
# @param [Integer] nbytes The size of the arbitrary data in bytes.
|
100
|
+
# @return [CUFunction] This function.
|
101
|
+
def param_setv(offset, ptr, nbytes)
|
102
|
+
status = API::cuParamSetv(self.to_api, offset, ptr.to_api, nbytes)
|
103
|
+
Pvt::handle_error(status, "Failed to set function arbitrary parameter: offset = #{offset}, size = #{nbytes}.")
|
104
|
+
self
|
105
|
+
end
|
106
|
+
|
107
|
+
|
108
|
+
# @deprecated Use {#launch_kernel}.
|
109
|
+
#
|
110
|
+
# Set the function parameter size to _nbytes_.
|
111
|
+
# @param [Integer] nbytes The parameter size in bytes.
|
112
|
+
# @return [CUFunction] This function.
|
113
|
+
def param_set_size(nbytes)
|
114
|
+
status = API::cuParamSetSize(self.to_api, nbytes)
|
115
|
+
Pvt::handle_error(status, "Failed to set function parameter size: size = #{nbytes}.")
|
116
|
+
self
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
# @deprecated
|
121
|
+
#
|
122
|
+
def param_set_texref(texunit, texref)
|
123
|
+
raise NotImplementedError
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
# @deprecated Use {#launch_kernel}.
|
128
|
+
#
|
129
|
+
# Set the block dimensions to use for next launch.
|
130
|
+
# @overload block_shape=(xdim)
|
131
|
+
# @overload block_shape=(xdim, ydim)
|
132
|
+
# @overload block_shape=(xdim, ydim, zdim)
|
133
|
+
# @param [Integer] xdim The size of the x dimension.
|
134
|
+
# @param [Integer] ydim The size of the y dimension. Defaults to 1.
|
135
|
+
# @param [Integer] zdim The size of the z dimension. Defaults to 1.
|
136
|
+
def block_shape=(*args)
|
137
|
+
xdim, ydim, zdim = args.flatten
|
138
|
+
ydim = 1 if ydim.nil?
|
139
|
+
zdim = 1 if zdim.nil?
|
140
|
+
status = API::cuFuncSetBlockShape(self.to_api, xdim, ydim, zdim)
|
141
|
+
Pvt::handle_error(status, "Failed to set function block shape: (x,y,z) = (#{xdim},#{ydim},#{zdim}).")
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
# @deprecated Use {#launch_kernel}.
|
146
|
+
#
|
147
|
+
# Set the dynamic shared-memory size to use for next launch.
|
148
|
+
# @param [Integer] nbytes Number of bytes.
|
149
|
+
def shared_size=(nbytes)
|
150
|
+
status = API::cuFuncSetSharedSize(self.to_api, nbytes)
|
151
|
+
Pvt::handle_error(status, "Failed to set function shared memory size: #{nbytes}.")
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
# @deprecated Use {#launch_kernel}.
|
156
|
+
#
|
157
|
+
# Launch this kernel function with 1x1x1 grid of blocks to execute on the current CUDA device.
|
158
|
+
# @return [CUFunction] This function.
|
159
|
+
def launch
|
160
|
+
status = API::cuLaunch(self.to_api)
|
161
|
+
Pvt::handle_error(status, "Failed to launch kernel function on 1x1x1 grid of blocks.")
|
162
|
+
self
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
# @deprecated Use {#launch_kernel}.
|
167
|
+
#
|
168
|
+
# Launch this kernel function with grid dimensions (_xdim_, _ydim_) to execute on the current CUDA device.
|
169
|
+
# @overload launch_grid(xdim)
|
170
|
+
# @overload launch_grid(xdim, ydim)
|
171
|
+
# @param [Integer] xdim The x dimensional size of the grid to launch.
|
172
|
+
# @param [Integer] ydim The y dimensional size of the grid to launch. Defaults to 1.
|
173
|
+
# @return [CUFunction] This function.
|
174
|
+
def launch_grid(xdim, ydim = 1)
|
175
|
+
status = API::cuLaunchGrid(self.to_api, xdim, ydim)
|
176
|
+
Pvt::handle_error(status, "Failed to launch kernel function on #{xdim}x#{ydim} grid of blocks.")
|
177
|
+
self
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
# @deprecated Use {#launch_kernel}.
|
182
|
+
#
|
183
|
+
# Launch this kernel function with grid dimensions (_xdim_, _ydim_) on _stream_ asynchronously to execute
|
184
|
+
# on the current CUDA device. Setting _stream_ to anything other than an instance of CUStream
|
185
|
+
# will execute on the default stream 0.
|
186
|
+
# @overload launch_grid_async(xdim, stream)
|
187
|
+
# @overload launch_grid_async(xdim, ydim, stream)
|
188
|
+
# @param [Integer] xdim The x dimensional size
|
189
|
+
def launch_grid_async(xdim, ydim = 1, stream)
|
190
|
+
s = Pvt::parse_stream(stream)
|
191
|
+
status = API::cuLaunchGridAsync(self.to_api, xdim, ydim, s)
|
192
|
+
Pvt::handle_error(status, "Failed to launch kernel function asynchronously on #{xdim}x#{ydim} grid of blocks.")
|
193
|
+
self
|
194
|
+
end
|
195
|
+
|
196
|
+
|
197
|
+
# @param [CUFunctionAttribute] attrib The attribute of the kernel function to query.
|
198
|
+
# @return [Integer] The particular attribute _attrib_ of this kernel function.
|
199
|
+
#
|
200
|
+
# @example Get function attribute.
|
201
|
+
# func.attribute(:MAX_THREADS_PER_BLOCK) #=> 512
|
202
|
+
# func.attribute(:SHARED_SIZE_BYTES) #=> 44
|
203
|
+
# func.attribute(:NUM_REGS) #=> 3
|
204
|
+
def attribute(attrib)
|
205
|
+
p = FFI::MemoryPointer.new(:int)
|
206
|
+
status = API::cuFuncGetAttribute(p, attrib, self.to_api)
|
207
|
+
Pvt::handle_error(status, "Failed to query function attribute: attribute = #{attrib}.")
|
208
|
+
p.read_int
|
209
|
+
end
|
210
|
+
|
211
|
+
|
212
|
+
# Set the preferred cache configuration (CUFunctionCache) to use for next launch.
|
213
|
+
# @param [CUFunctionCache] conf The preferred cache configuration.
|
214
|
+
def cache_config=(conf)
|
215
|
+
status = API::cuFuncSetCacheConfig(self.to_api, conf)
|
216
|
+
Pvt::handle_error(status, "Failed to set function cache config: config = #{conf}.")
|
217
|
+
end
|
218
|
+
|
219
|
+
# Launch this kernel function with full configuration parameters and function parameters
|
220
|
+
# to execute on the current CUDA device.
|
221
|
+
# @param [Integer] grid_xdim The x dimensional size of the grid to launch.
|
222
|
+
# @param [Integer] grid_ydim The y dimensional size of the grid to launch.
|
223
|
+
# @param [Integer] grid_zdim The z dimensional size of the grid to launch.
|
224
|
+
# @param [Integer] block_xdim The x dimensional size of a block in the grid.
|
225
|
+
# @param [Integer] block_ydim The y dimensional size of a block in the grid.
|
226
|
+
# @param [Integer] block_zdim The z dimensional size of a block in the grid.
|
227
|
+
# @param [Integer] shared_mem_size Number of bytes of dynamic shared memory for each thread block.
|
228
|
+
# @param [Integer, CUStream] stream The stream to launch this kernel function on.
|
229
|
+
# Setting _stream_ to anything other than an instance of CUStream will execute on the default stream 0.
|
230
|
+
# @param [Array<Fixnum, Float, CUDevicePtr>] params The list of parameters to pass in for the kernel function launch.
|
231
|
+
# * A Fixnum is mapped to a C type int.
|
232
|
+
# * A Float is mapped to a C type float.
|
233
|
+
# @return [CUFunction] This function.
|
234
|
+
#
|
235
|
+
# @todo Add support for other C data types for the kernel function parameters.
|
236
|
+
def launch_kernel(grid_xdim, grid_ydim, grid_zdim, block_xdim, block_ydim, block_zdim, shared_mem_size, stream, params)
|
237
|
+
p = parse_params(params)
|
238
|
+
s = Pvt::parse_stream(stream)
|
239
|
+
status = API::cuLaunchKernel(self.to_api, grid_xdim, grid_ydim, grid_zdim, block_xdim, block_ydim, block_zdim, shared_mem_size, s, p, nil)
|
240
|
+
Pvt::handle_error(status, "Failed to launch kernel function.\n" +
|
241
|
+
"* #{grid_xdim} x #{grid_ydim} x #{grid_zdim} grid\n" +
|
242
|
+
"* #{block_xdim} x #{block_ydim} x #{block_zdim} blocks\n" +
|
243
|
+
"* shared memory size = #{shared_mem_size}")
|
244
|
+
self
|
245
|
+
end
|
246
|
+
|
247
|
+
|
248
|
+
# @private
|
249
|
+
def initialize(ptr)
|
250
|
+
@pfunc = ptr
|
251
|
+
end
|
252
|
+
private_class_method :new
|
253
|
+
|
254
|
+
|
255
|
+
# @private
|
256
|
+
def to_api
|
257
|
+
API::read_cufunction(@pfunc)
|
258
|
+
end
|
259
|
+
|
260
|
+
private
|
261
|
+
|
262
|
+
def parse_params(params)
|
263
|
+
params.is_a?(Array) or raise TypeError, "Expect _params_ an Array, but we get a #{params.class}."
|
264
|
+
params.size <= 0 and return nil
|
265
|
+
|
266
|
+
p = FFI::MemoryPointer.new(:pointer, params.size)
|
267
|
+
params.each_with_index do |x,i|
|
268
|
+
m = case x
|
269
|
+
when Fixnum
|
270
|
+
FFI::MemoryPointer.new(:int).write_int(x)
|
271
|
+
when Float
|
272
|
+
FFI::MemoryPointer.new(:float).write_float(x)
|
273
|
+
when CUDevicePtr
|
274
|
+
ptr = FFI::MemoryPointer.new(:CUDevicePtr)
|
275
|
+
API::write_cudeviceptr(ptr, x.to_api.address)
|
276
|
+
ptr
|
277
|
+
else
|
278
|
+
raise TypeError, "Invalid type of kernel parameter #{x.to_s}."
|
279
|
+
end
|
280
|
+
p[i].write_pointer(m)
|
281
|
+
end
|
282
|
+
p
|
283
|
+
end
|
284
|
+
|
285
|
+
|
286
|
+
def align_up(offset, alignment)
|
287
|
+
(offset + alignment - 1) & ~(alignment - 1)
|
288
|
+
end
|
289
|
+
|
290
|
+
end
|
291
|
+
|
292
|
+
end # module
|
293
|
+
end # module
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'cuda/driver/ffi-cu'
|
26
|
+
require 'cuda/driver/error'
|
27
|
+
|
28
|
+
|
29
|
+
module SGC
|
30
|
+
module CU
|
31
|
+
|
32
|
+
class CUInit
|
33
|
+
|
34
|
+
# Initialize the CUDA driver API. This must be called before other CUDA driver functions.
|
35
|
+
# @param [Integer] flags Currently _flags_ must be set to zero.
|
36
|
+
def self.init(flags = 0)
|
37
|
+
status = API::cuInit(flags)
|
38
|
+
Pvt::handle_error(status, "Failed to initialize the CUDA driver API.")
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end # module
|
45
|
+
end # module
|
@@ -0,0 +1,134 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'cuda/driver/ffi-cu'
|
26
|
+
require 'cuda/driver/error'
|
27
|
+
require 'cuda/driver/stream'
|
28
|
+
|
29
|
+
|
30
|
+
module SGC
|
31
|
+
module CU
|
32
|
+
|
33
|
+
module CUMemory
|
34
|
+
|
35
|
+
# Copy _nbytes_ from the memory at _src_ptr_ to the memory at _dst_ptr_.
|
36
|
+
# The type of memory (host or device) is inferred from the pointer value.
|
37
|
+
def memcpy(dst_ptr, src_ptr, nbytes)
|
38
|
+
status = API::cuMemcpy(dst_ptr.to_api, src_ptr.to_api, nbytes)
|
39
|
+
Pvt::handle_error(status, "Failed to copy memory: size = #{nbytes}")
|
40
|
+
nil
|
41
|
+
end
|
42
|
+
module_function :memcpy
|
43
|
+
|
44
|
+
|
45
|
+
# Copy _nbytes_ from the memory at _src_ptr_ to the memory at _dst_ptr_ on _stream_ asynchronously.
|
46
|
+
# The type of memory (host or device) is inferred from the pointer value.
|
47
|
+
def memcpy_async(dst_ptr, src_ptr, nbytes, stream)
|
48
|
+
s = Pvt::parse_stream(stream)
|
49
|
+
status = API::cuMemcpyAsync(dst_ptr.to_api, src_ptr.to_api, nbytes, s)
|
50
|
+
Pvt::handle_error(status, "Failed to copy memory asynchronously: size = #{nbytes}")
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
module_function :memcpy_async
|
54
|
+
|
55
|
+
|
56
|
+
# Copy _nbytes_ from the host memory at _src_mem_ to the device memory at _dst_devptr_.
|
57
|
+
def memcpy_htod(dst_devptr, src_mem, nbytes)
|
58
|
+
status = API::cuMemcpyHtoD(dst_devptr.to_api, src_mem.ptr, nbytes)
|
59
|
+
Pvt::handle_error(status, "Failed to copy memory from host to device: size = #{nbytes}")
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
module_function :memcpy_htod
|
63
|
+
|
64
|
+
|
65
|
+
# Copy _nbytes_ from the host memory at _src_mem_ to the device memory at _dst_devptr_ on _stream_ asynchronously.
|
66
|
+
#
|
67
|
+
# @note The _src_mem_ should be *page-locked* memory.
|
68
|
+
# @note Not implemented yet.
|
69
|
+
def memcpy_htod_async(dst_devptr, src_mem, nbytes, stream)
|
70
|
+
s = Pvt::parse_stream(stream)
|
71
|
+
status = API::cuMemcpyHtoDAsync(dst_devptr.to_api, src_mem.ptr, nbytes, s)
|
72
|
+
Pvt::handle_error(status, "Failed to copy memory from host to device asynchronously: size = #{nbytes}")
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
module_function :memcpy_htod_async
|
76
|
+
|
77
|
+
|
78
|
+
# Copy _nbytes_ from the device memory at _src_devptr_ to the host memory at _dst_mem_.
|
79
|
+
def memcpy_dtoh(dst_mem, src_devptr, nbytes)
|
80
|
+
status = API::cuMemcpyDtoH(dst_mem.ptr, src_devptr.to_api, nbytes)
|
81
|
+
Pvt::handle_error(status, "Failed to copy memory from device to host: size = #{nbytes}")
|
82
|
+
nil
|
83
|
+
end
|
84
|
+
module_function :memcpy_dtoh
|
85
|
+
|
86
|
+
|
87
|
+
# Copy _nbytes_ from the device memory at _src_devptr_ to the host memory at _dst_mem_ on _stream_ asynchronously.
|
88
|
+
#
|
89
|
+
# @note The _dst_mem_ should be *page-locked* memory.
|
90
|
+
# @note Not implemented yet.
|
91
|
+
def memcpy_dtoh_async(dst_mem, src_devptr, nbytes, stream)
|
92
|
+
s = Pvt::parse_stream(stream)
|
93
|
+
status = API::cuMemcpyDtoHAsync(dst_mem.ptr, src_devptr.to_api, nbytes, s)
|
94
|
+
Pvt::handle_error(status, "Failed to copy memory from device to host asynchronously: size = #{nbytes}")
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
module_function :memcpy_dtoh_async
|
98
|
+
|
99
|
+
|
100
|
+
# Copy _nbytes_ from the device memory at _src_devptr_ to the device memory at _dst_devptr_ asynchronously.
|
101
|
+
def memcpy_dtod(dst_devptr, src_devptr, nbytes)
|
102
|
+
status = API::cuMemcpyDtoD(dst_devptr.to_api, src_devptr.to_api, nbytes)
|
103
|
+
Pvt::handle_error(status, "Failed to copy memory from device to device asynchronously: size = #{nbytes}.")
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
module_function :memcpy_dtod
|
107
|
+
|
108
|
+
|
109
|
+
# Copy _nbytes_ from the device memory at _src_devptr_ to the device memory at _dst_devptr_ on _stream_ asynchronously.
|
110
|
+
#
|
111
|
+
# @note Not implemented yet.
|
112
|
+
def memcpy_dtod_async(dst_devptr, src_devptr, nbytes, stream)
|
113
|
+
s = Pvt::parse_stream(stream)
|
114
|
+
status = API::cuMemcpyDtoDAsync(dst_devptr.to_api, src_devptr.to_api, nbytes, s)
|
115
|
+
Pvt::handle_error(status, "Failed to copy memory from device to device asynchronously: size = #{nbytes}.")
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
module_function :memcpy_dtod_async
|
119
|
+
|
120
|
+
|
121
|
+
# @return [Hash{ :free, :total }] A hash with the amount of free and total device memory in bytes.
|
122
|
+
def mem_info
|
123
|
+
pfree = FFI::MemoryPointer.new(:size_t)
|
124
|
+
ptotal = FFI::MemoryPointer.new(:size_t)
|
125
|
+
status = API::cuMemGetInfo(pfree, ptotal)
|
126
|
+
Pvt::handle_error(status, "Failed to get memory information.")
|
127
|
+
{ free: API::read_size_t(pfree), total: API::read_size_t(ptotal) }
|
128
|
+
end
|
129
|
+
module_function :mem_info
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
end # module
|
134
|
+
end # module
|