sgc-ruby-cuda 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +2 -0
- data/COPYING +674 -0
- data/README.rdoc +106 -0
- data/Rakefile +76 -0
- data/doc/devel.rdoc +77 -0
- data/doc/features.rdoc +55 -0
- data/lib/cuda/driver/context.rb +236 -0
- data/lib/cuda/driver/cu.rb +60 -0
- data/lib/cuda/driver/device.rb +155 -0
- data/lib/cuda/driver/deviceptr.rb +69 -0
- data/lib/cuda/driver/error.rb +182 -0
- data/lib/cuda/driver/event.rb +124 -0
- data/lib/cuda/driver/ffi-cu.rb +620 -0
- data/lib/cuda/driver/function.rb +293 -0
- data/lib/cuda/driver/init.rb +45 -0
- data/lib/cuda/driver/memory.rb +134 -0
- data/lib/cuda/driver/module.rb +142 -0
- data/lib/cuda/driver/rubycu.rb +37 -0
- data/lib/cuda/driver/stream.rb +128 -0
- data/lib/cuda/driver/version.rb +42 -0
- data/lib/cuda/runtime/cuda.rb +65 -0
- data/lib/cuda/runtime/device.rb +175 -0
- data/lib/cuda/runtime/error.rb +197 -0
- data/lib/cuda/runtime/event.rb +117 -0
- data/lib/cuda/runtime/ffi-cuda.rb +588 -0
- data/lib/cuda/runtime/function.rb +161 -0
- data/lib/cuda/runtime/memory.rb +110 -0
- data/lib/cuda/runtime/rubycuda.rb +34 -0
- data/lib/cuda/runtime/stream.rb +126 -0
- data/lib/cuda/runtime/thread.rb +81 -0
- data/lib/cuda/runtime/version.rb +51 -0
- data/lib/ffi/prettystruct.rb +32 -0
- data/lib/helpers/flags.rb +82 -0
- data/lib/helpers/interface/ienum.rb +45 -0
- data/lib/helpers/klass.rb +45 -0
- data/lib/memory/buffer.rb +125 -0
- data/lib/memory/interface/ibuffer.rb +63 -0
- data/lib/memory/pointer.rb +72 -0
- data/lib/rubycu.rb +1 -0
- data/lib/rubycuda.rb +1 -0
- data/test/bad.ptx +0 -0
- data/test/memory/test_buffer.rb +93 -0
- data/test/rubycu/test_cucontext.rb +148 -0
- data/test/rubycu/test_cudevice.rb +69 -0
- data/test/rubycu/test_cudeviceptr.rb +43 -0
- data/test/rubycu/test_cuevent.rb +81 -0
- data/test/rubycu/test_cufunction.rb +165 -0
- data/test/rubycu/test_cumemory.rb +113 -0
- data/test/rubycu/test_cumodule.rb +114 -0
- data/test/rubycu/test_custream.rb +77 -0
- data/test/rubycu/test_cuversion.rb +39 -0
- data/test/rubycu/testbase.rb +107 -0
- data/test/rubycuda/test_cudadevice.rb +125 -0
- data/test/rubycuda/test_cudaerror.rb +48 -0
- data/test/rubycuda/test_cudaevent.rb +78 -0
- data/test/rubycuda/test_cudafunction.rb +106 -0
- data/test/rubycuda/test_cudamemory.rb +90 -0
- data/test/rubycuda/test_cudastream.rb +72 -0
- data/test/rubycuda/test_cudathread.rb +69 -0
- data/test/rubycuda/test_cudaversion.rb +41 -0
- data/test/rubycuda/testbase.rb +67 -0
- data/test/vadd.cu +21 -0
- data/version.rb +1 -0
- metadata +180 -0
@@ -0,0 +1,293 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'cuda/driver/ffi-cu'
|
26
|
+
require 'cuda/driver/error'
|
27
|
+
|
28
|
+
|
29
|
+
module SGC
|
30
|
+
module CU
|
31
|
+
|
32
|
+
class CUFunction
|
33
|
+
|
34
|
+
# @deprecated Use {#launch_kernel}.
|
35
|
+
#
|
36
|
+
# Set the argument list of subsequent function call to _arg1_, _arg2_, *other_args.
|
37
|
+
# @param *args The list of arguments to pass to the kernel function.
|
38
|
+
def param=(*args)
|
39
|
+
offset = 0
|
40
|
+
args.flatten.each do |x|
|
41
|
+
case x
|
42
|
+
when Fixnum
|
43
|
+
p = FFI::MemoryPointer.new(:int)
|
44
|
+
p.write_int(x)
|
45
|
+
size = 4
|
46
|
+
when Float
|
47
|
+
p = FFI::MemoryPointer.new(:float)
|
48
|
+
p.write_float(x)
|
49
|
+
size = 4
|
50
|
+
when CUDevicePtr
|
51
|
+
p = FFI::MemoryPointer.new(:CUDevicePtr)
|
52
|
+
API::write_cudeviceptr(p, x.to_api.address)
|
53
|
+
size = p.size
|
54
|
+
else
|
55
|
+
raise TypeError, "Invalid type of argument #{x.to_s}."
|
56
|
+
end
|
57
|
+
offset = align_up(offset, size)
|
58
|
+
status = API::cuParamSetv(self.to_api, offset, p, size)
|
59
|
+
Pvt::handle_error(status, "Failed to set function parameters: offset = #{offset}, value = #{x}.")
|
60
|
+
offset += size
|
61
|
+
end
|
62
|
+
|
63
|
+
status = API::cuParamSetSize(self.to_api, offset)
|
64
|
+
Pvt::handle_error(status, "Failed to set function parameter size: size = #{offset}.")
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
# @deprecated Use {#launch_kernel}.
|
69
|
+
#
|
70
|
+
# Set a float parameter to the function's argument list at _offset_ with _value_.
|
71
|
+
# @param [Integer] offset Number of bytes to offset.
|
72
|
+
# @param [Float] value The floating-point value to set as the function parameter.
|
73
|
+
# @return [CUFunction] This function.
|
74
|
+
def param_setf(offset, value)
|
75
|
+
status = API::cuParamSetf(self.to_api, offset, value)
|
76
|
+
Pvt::handle_error(status, "Failed to set function float parameter: offset = #{offset}, value = #{value}.")
|
77
|
+
self
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
# @deprecated Use {#launch_kernel}.
|
82
|
+
#
|
83
|
+
# Set an integer parameter to the function's argument list at _offset_ with _value_.
|
84
|
+
# @param [Integer] offset Number of bytes to offset.
|
85
|
+
# @param [Integer] value The integer value to set as the function parameter.
|
86
|
+
# @return [CUFunction] This function.
|
87
|
+
def param_seti(offset, value)
|
88
|
+
status = API::cuParamSeti(self.to_api, offset, value)
|
89
|
+
Pvt::handle_error(status, "Failed to set function integer parameter: offset = #{offset}, value = #{value}")
|
90
|
+
self
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
# @deprecated Use {#launch_kernel}.
|
95
|
+
#
|
96
|
+
# Set an arbitrary data to the function's argument list at _offset_ with _ptr_ pointed _nbytes_ data.
|
97
|
+
# @param [Integer] offset Number of bytes to offset.
|
98
|
+
# @param [CUDevicePtr] ptr A device pointer pointing to an arbitrary data to be used as the function parameter.
|
99
|
+
# @param [Integer] nbytes The size of the arbitrary data in bytes.
|
100
|
+
# @return [CUFunction] This function.
|
101
|
+
def param_setv(offset, ptr, nbytes)
|
102
|
+
status = API::cuParamSetv(self.to_api, offset, ptr.to_api, nbytes)
|
103
|
+
Pvt::handle_error(status, "Failed to set function arbitrary parameter: offset = #{offset}, size = #{nbytes}.")
|
104
|
+
self
|
105
|
+
end
|
106
|
+
|
107
|
+
|
108
|
+
# @deprecated Use {#launch_kernel}.
|
109
|
+
#
|
110
|
+
# Set the function parameter size to _nbytes_.
|
111
|
+
# @param [Integer] nbytes The parameter size in bytes.
|
112
|
+
# @return [CUFunction] This function.
|
113
|
+
def param_set_size(nbytes)
|
114
|
+
status = API::cuParamSetSize(self.to_api, nbytes)
|
115
|
+
Pvt::handle_error(status, "Failed to set function parameter size: size = #{nbytes}.")
|
116
|
+
self
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
# @deprecated
|
121
|
+
#
|
122
|
+
def param_set_texref(texunit, texref)
|
123
|
+
raise NotImplementedError
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
# @deprecated Use {#launch_kernel}.
|
128
|
+
#
|
129
|
+
# Set the block dimensions to use for next launch.
|
130
|
+
# @overload block_shape=(xdim)
|
131
|
+
# @overload block_shape=(xdim, ydim)
|
132
|
+
# @overload block_shape=(xdim, ydim, zdim)
|
133
|
+
# @param [Integer] xdim The size of the x dimension.
|
134
|
+
# @param [Integer] ydim The size of the y dimension. Defaults to 1.
|
135
|
+
# @param [Integer] zdim The size of the z dimension. Defaults to 1.
|
136
|
+
def block_shape=(*args)
|
137
|
+
xdim, ydim, zdim = args.flatten
|
138
|
+
ydim = 1 if ydim.nil?
|
139
|
+
zdim = 1 if zdim.nil?
|
140
|
+
status = API::cuFuncSetBlockShape(self.to_api, xdim, ydim, zdim)
|
141
|
+
Pvt::handle_error(status, "Failed to set function block shape: (x,y,z) = (#{xdim},#{ydim},#{zdim}).")
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
# @deprecated Use {#launch_kernel}.
|
146
|
+
#
|
147
|
+
# Set the dynamic shared-memory size to use for next launch.
|
148
|
+
# @param [Integer] nbytes Number of bytes.
|
149
|
+
def shared_size=(nbytes)
|
150
|
+
status = API::cuFuncSetSharedSize(self.to_api, nbytes)
|
151
|
+
Pvt::handle_error(status, "Failed to set function shared memory size: #{nbytes}.")
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
# @deprecated Use {#launch_kernel}.
|
156
|
+
#
|
157
|
+
# Launch this kernel function with 1x1x1 grid of blocks to execute on the current CUDA device.
|
158
|
+
# @return [CUFunction] This function.
|
159
|
+
def launch
|
160
|
+
status = API::cuLaunch(self.to_api)
|
161
|
+
Pvt::handle_error(status, "Failed to launch kernel function on 1x1x1 grid of blocks.")
|
162
|
+
self
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
# @deprecated Use {#launch_kernel}.
|
167
|
+
#
|
168
|
+
# Launch this kernel function with grid dimensions (_xdim_, _ydim_) to execute on the current CUDA device.
|
169
|
+
# @overload launch_grid(xdim)
|
170
|
+
# @overload launch_grid(xdim, ydim)
|
171
|
+
# @param [Integer] xdim The x dimensional size of the grid to launch.
|
172
|
+
# @param [Integer] ydim The y dimensional size of the grid to launch. Defaults to 1.
|
173
|
+
# @return [CUFunction] This function.
|
174
|
+
def launch_grid(xdim, ydim = 1)
|
175
|
+
status = API::cuLaunchGrid(self.to_api, xdim, ydim)
|
176
|
+
Pvt::handle_error(status, "Failed to launch kernel function on #{xdim}x#{ydim} grid of blocks.")
|
177
|
+
self
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
# @deprecated Use {#launch_kernel}.
|
182
|
+
#
|
183
|
+
# Launch this kernel function with grid dimensions (_xdim_, _ydim_) on _stream_ asynchronously to execute
|
184
|
+
# on the current CUDA device. Setting _stream_ to anything other than an instance of CUStream
|
185
|
+
# will execute on the default stream 0.
|
186
|
+
# @overload launch_grid_async(xdim, stream)
|
187
|
+
# @overload launch_grid_async(xdim, ydim, stream)
|
188
|
+
# @param [Integer] xdim The x dimensional size
|
189
|
+
def launch_grid_async(xdim, ydim = 1, stream)
|
190
|
+
s = Pvt::parse_stream(stream)
|
191
|
+
status = API::cuLaunchGridAsync(self.to_api, xdim, ydim, s)
|
192
|
+
Pvt::handle_error(status, "Failed to launch kernel function asynchronously on #{xdim}x#{ydim} grid of blocks.")
|
193
|
+
self
|
194
|
+
end
|
195
|
+
|
196
|
+
|
197
|
+
# @param [CUFunctionAttribute] attrib The attribute of the kernel function to query.
|
198
|
+
# @return [Integer] The particular attribute _attrib_ of this kernel function.
|
199
|
+
#
|
200
|
+
# @example Get function attribute.
|
201
|
+
# func.attribute(:MAX_THREADS_PER_BLOCK) #=> 512
|
202
|
+
# func.attribute(:SHARED_SIZE_BYTES) #=> 44
|
203
|
+
# func.attribute(:NUM_REGS) #=> 3
|
204
|
+
def attribute(attrib)
|
205
|
+
p = FFI::MemoryPointer.new(:int)
|
206
|
+
status = API::cuFuncGetAttribute(p, attrib, self.to_api)
|
207
|
+
Pvt::handle_error(status, "Failed to query function attribute: attribute = #{attrib}.")
|
208
|
+
p.read_int
|
209
|
+
end
|
210
|
+
|
211
|
+
|
212
|
+
# Set the preferred cache configuration (CUFunctionCache) to use for next launch.
|
213
|
+
# @param [CUFunctionCache] conf The preferred cache configuration.
|
214
|
+
def cache_config=(conf)
|
215
|
+
status = API::cuFuncSetCacheConfig(self.to_api, conf)
|
216
|
+
Pvt::handle_error(status, "Failed to set function cache config: config = #{conf}.")
|
217
|
+
end
|
218
|
+
|
219
|
+
# Launch this kernel function with full configuration parameters and function parameters
|
220
|
+
# to execute on the current CUDA device.
|
221
|
+
# @param [Integer] grid_xdim The x dimensional size of the grid to launch.
|
222
|
+
# @param [Integer] grid_ydim The y dimensional size of the grid to launch.
|
223
|
+
# @param [Integer] grid_zdim The z dimensional size of the grid to launch.
|
224
|
+
# @param [Integer] block_xdim The x dimensional size of a block in the grid.
|
225
|
+
# @param [Integer] block_ydim The y dimensional size of a block in the grid.
|
226
|
+
# @param [Integer] block_zdim The z dimensional size of a block in the grid.
|
227
|
+
# @param [Integer] shared_mem_size Number of bytes of dynamic shared memory for each thread block.
|
228
|
+
# @param [Integer, CUStream] stream The stream to launch this kernel function on.
|
229
|
+
# Setting _stream_ to anything other than an instance of CUStream will execute on the default stream 0.
|
230
|
+
# @param [Array<Fixnum, Float, CUDevicePtr>] params The list of parameters to pass in for the kernel function launch.
|
231
|
+
# * A Fixnum is mapped to a C type int.
|
232
|
+
# * A Float is mapped to a C type float.
|
233
|
+
# @return [CUFunction] This function.
|
234
|
+
#
|
235
|
+
# @todo Add support for other C data types for the kernel function parameters.
|
236
|
+
def launch_kernel(grid_xdim, grid_ydim, grid_zdim, block_xdim, block_ydim, block_zdim, shared_mem_size, stream, params)
|
237
|
+
p = parse_params(params)
|
238
|
+
s = Pvt::parse_stream(stream)
|
239
|
+
status = API::cuLaunchKernel(self.to_api, grid_xdim, grid_ydim, grid_zdim, block_xdim, block_ydim, block_zdim, shared_mem_size, s, p, nil)
|
240
|
+
Pvt::handle_error(status, "Failed to launch kernel function.\n" +
|
241
|
+
"* #{grid_xdim} x #{grid_ydim} x #{grid_zdim} grid\n" +
|
242
|
+
"* #{block_xdim} x #{block_ydim} x #{block_zdim} blocks\n" +
|
243
|
+
"* shared memory size = #{shared_mem_size}")
|
244
|
+
self
|
245
|
+
end
|
246
|
+
|
247
|
+
|
248
|
+
# @private
|
249
|
+
def initialize(ptr)
|
250
|
+
@pfunc = ptr
|
251
|
+
end
|
252
|
+
private_class_method :new
|
253
|
+
|
254
|
+
|
255
|
+
# @private
|
256
|
+
def to_api
|
257
|
+
API::read_cufunction(@pfunc)
|
258
|
+
end
|
259
|
+
|
260
|
+
private
|
261
|
+
|
262
|
+
def parse_params(params)
|
263
|
+
params.is_a?(Array) or raise TypeError, "Expect _params_ an Array, but we get a #{params.class}."
|
264
|
+
params.size <= 0 and return nil
|
265
|
+
|
266
|
+
p = FFI::MemoryPointer.new(:pointer, params.size)
|
267
|
+
params.each_with_index do |x,i|
|
268
|
+
m = case x
|
269
|
+
when Fixnum
|
270
|
+
FFI::MemoryPointer.new(:int).write_int(x)
|
271
|
+
when Float
|
272
|
+
FFI::MemoryPointer.new(:float).write_float(x)
|
273
|
+
when CUDevicePtr
|
274
|
+
ptr = FFI::MemoryPointer.new(:CUDevicePtr)
|
275
|
+
API::write_cudeviceptr(ptr, x.to_api.address)
|
276
|
+
ptr
|
277
|
+
else
|
278
|
+
raise TypeError, "Invalid type of kernel parameter #{x.to_s}."
|
279
|
+
end
|
280
|
+
p[i].write_pointer(m)
|
281
|
+
end
|
282
|
+
p
|
283
|
+
end
|
284
|
+
|
285
|
+
|
286
|
+
def align_up(offset, alignment)
|
287
|
+
(offset + alignment - 1) & ~(alignment - 1)
|
288
|
+
end
|
289
|
+
|
290
|
+
end
|
291
|
+
|
292
|
+
end # module
|
293
|
+
end # module
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'cuda/driver/ffi-cu'
|
26
|
+
require 'cuda/driver/error'
|
27
|
+
|
28
|
+
|
29
|
+
module SGC
|
30
|
+
module CU
|
31
|
+
|
32
|
+
class CUInit
|
33
|
+
|
34
|
+
# Initialize the CUDA driver API. This must be called before other CUDA driver functions.
|
35
|
+
# @param [Integer] flags Currently _flags_ must be set to zero.
|
36
|
+
def self.init(flags = 0)
|
37
|
+
status = API::cuInit(flags)
|
38
|
+
Pvt::handle_error(status, "Failed to initialize the CUDA driver API.")
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end # module
|
45
|
+
end # module
|
@@ -0,0 +1,134 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'cuda/driver/ffi-cu'
|
26
|
+
require 'cuda/driver/error'
|
27
|
+
require 'cuda/driver/stream'
|
28
|
+
|
29
|
+
|
30
|
+
module SGC
|
31
|
+
module CU
|
32
|
+
|
33
|
+
module CUMemory
|
34
|
+
|
35
|
+
# Copy _nbytes_ from the memory at _src_ptr_ to the memory at _dst_ptr_.
|
36
|
+
# The type of memory (host or device) is inferred from the pointer value.
|
37
|
+
def memcpy(dst_ptr, src_ptr, nbytes)
|
38
|
+
status = API::cuMemcpy(dst_ptr.to_api, src_ptr.to_api, nbytes)
|
39
|
+
Pvt::handle_error(status, "Failed to copy memory: size = #{nbytes}")
|
40
|
+
nil
|
41
|
+
end
|
42
|
+
module_function :memcpy
|
43
|
+
|
44
|
+
|
45
|
+
# Copy _nbytes_ from the memory at _src_ptr_ to the memory at _dst_ptr_ on _stream_ asynchronously.
|
46
|
+
# The type of memory (host or device) is inferred from the pointer value.
|
47
|
+
def memcpy_async(dst_ptr, src_ptr, nbytes, stream)
|
48
|
+
s = Pvt::parse_stream(stream)
|
49
|
+
status = API::cuMemcpyAsync(dst_ptr.to_api, src_ptr.to_api, nbytes, s)
|
50
|
+
Pvt::handle_error(status, "Failed to copy memory asynchronously: size = #{nbytes}")
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
module_function :memcpy_async
|
54
|
+
|
55
|
+
|
56
|
+
# Copy _nbytes_ from the host memory at _src_mem_ to the device memory at _dst_devptr_.
|
57
|
+
def memcpy_htod(dst_devptr, src_mem, nbytes)
|
58
|
+
status = API::cuMemcpyHtoD(dst_devptr.to_api, src_mem.ptr, nbytes)
|
59
|
+
Pvt::handle_error(status, "Failed to copy memory from host to device: size = #{nbytes}")
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
module_function :memcpy_htod
|
63
|
+
|
64
|
+
|
65
|
+
# Copy _nbytes_ from the host memory at _src_mem_ to the device memory at _dst_devptr_ on _stream_ asynchronously.
|
66
|
+
#
|
67
|
+
# @note The _src_mem_ should be *page-locked* memory.
|
68
|
+
# @note Not implemented yet.
|
69
|
+
def memcpy_htod_async(dst_devptr, src_mem, nbytes, stream)
|
70
|
+
s = Pvt::parse_stream(stream)
|
71
|
+
status = API::cuMemcpyHtoDAsync(dst_devptr.to_api, src_mem.ptr, nbytes, s)
|
72
|
+
Pvt::handle_error(status, "Failed to copy memory from host to device asynchronously: size = #{nbytes}")
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
module_function :memcpy_htod_async
|
76
|
+
|
77
|
+
|
78
|
+
# Copy _nbytes_ from the device memory at _src_devptr_ to the host memory at _dst_mem_.
|
79
|
+
def memcpy_dtoh(dst_mem, src_devptr, nbytes)
|
80
|
+
status = API::cuMemcpyDtoH(dst_mem.ptr, src_devptr.to_api, nbytes)
|
81
|
+
Pvt::handle_error(status, "Failed to copy memory from device to host: size = #{nbytes}")
|
82
|
+
nil
|
83
|
+
end
|
84
|
+
module_function :memcpy_dtoh
|
85
|
+
|
86
|
+
|
87
|
+
# Copy _nbytes_ from the device memory at _src_devptr_ to the host memory at _dst_mem_ on _stream_ asynchronously.
|
88
|
+
#
|
89
|
+
# @note The _dst_mem_ should be *page-locked* memory.
|
90
|
+
# @note Not implemented yet.
|
91
|
+
def memcpy_dtoh_async(dst_mem, src_devptr, nbytes, stream)
|
92
|
+
s = Pvt::parse_stream(stream)
|
93
|
+
status = API::cuMemcpyDtoHAsync(dst_mem.ptr, src_devptr.to_api, nbytes, s)
|
94
|
+
Pvt::handle_error(status, "Failed to copy memory from device to host asynchronously: size = #{nbytes}")
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
module_function :memcpy_dtoh_async
|
98
|
+
|
99
|
+
|
100
|
+
# Copy _nbytes_ from the device memory at _src_devptr_ to the device memory at _dst_devptr_ asynchronously.
|
101
|
+
def memcpy_dtod(dst_devptr, src_devptr, nbytes)
|
102
|
+
status = API::cuMemcpyDtoD(dst_devptr.to_api, src_devptr.to_api, nbytes)
|
103
|
+
Pvt::handle_error(status, "Failed to copy memory from device to device asynchronously: size = #{nbytes}.")
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
module_function :memcpy_dtod
|
107
|
+
|
108
|
+
|
109
|
+
# Copy _nbytes_ from the device memory at _src_devptr_ to the device memory at _dst_devptr_ on _stream_ asynchronously.
|
110
|
+
#
|
111
|
+
# @note Not implemented yet.
|
112
|
+
def memcpy_dtod_async(dst_devptr, src_devptr, nbytes, stream)
|
113
|
+
s = Pvt::parse_stream(stream)
|
114
|
+
status = API::cuMemcpyDtoDAsync(dst_devptr.to_api, src_devptr.to_api, nbytes, s)
|
115
|
+
Pvt::handle_error(status, "Failed to copy memory from device to device asynchronously: size = #{nbytes}.")
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
module_function :memcpy_dtod_async
|
119
|
+
|
120
|
+
|
121
|
+
# @return [Hash{ :free, :total }] A hash with the amount of free and total device memory in bytes.
|
122
|
+
def mem_info
|
123
|
+
pfree = FFI::MemoryPointer.new(:size_t)
|
124
|
+
ptotal = FFI::MemoryPointer.new(:size_t)
|
125
|
+
status = API::cuMemGetInfo(pfree, ptotal)
|
126
|
+
Pvt::handle_error(status, "Failed to get memory information.")
|
127
|
+
{ free: API::read_size_t(pfree), total: API::read_size_t(ptotal) }
|
128
|
+
end
|
129
|
+
module_function :mem_info
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
end # module
|
134
|
+
end # module
|