sgc-ruby-cuda 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.yardopts +2 -0
  2. data/COPYING +674 -0
  3. data/README.rdoc +106 -0
  4. data/Rakefile +76 -0
  5. data/doc/devel.rdoc +77 -0
  6. data/doc/features.rdoc +55 -0
  7. data/lib/cuda/driver/context.rb +236 -0
  8. data/lib/cuda/driver/cu.rb +60 -0
  9. data/lib/cuda/driver/device.rb +155 -0
  10. data/lib/cuda/driver/deviceptr.rb +69 -0
  11. data/lib/cuda/driver/error.rb +182 -0
  12. data/lib/cuda/driver/event.rb +124 -0
  13. data/lib/cuda/driver/ffi-cu.rb +620 -0
  14. data/lib/cuda/driver/function.rb +293 -0
  15. data/lib/cuda/driver/init.rb +45 -0
  16. data/lib/cuda/driver/memory.rb +134 -0
  17. data/lib/cuda/driver/module.rb +142 -0
  18. data/lib/cuda/driver/rubycu.rb +37 -0
  19. data/lib/cuda/driver/stream.rb +128 -0
  20. data/lib/cuda/driver/version.rb +42 -0
  21. data/lib/cuda/runtime/cuda.rb +65 -0
  22. data/lib/cuda/runtime/device.rb +175 -0
  23. data/lib/cuda/runtime/error.rb +197 -0
  24. data/lib/cuda/runtime/event.rb +117 -0
  25. data/lib/cuda/runtime/ffi-cuda.rb +588 -0
  26. data/lib/cuda/runtime/function.rb +161 -0
  27. data/lib/cuda/runtime/memory.rb +110 -0
  28. data/lib/cuda/runtime/rubycuda.rb +34 -0
  29. data/lib/cuda/runtime/stream.rb +126 -0
  30. data/lib/cuda/runtime/thread.rb +81 -0
  31. data/lib/cuda/runtime/version.rb +51 -0
  32. data/lib/ffi/prettystruct.rb +32 -0
  33. data/lib/helpers/flags.rb +82 -0
  34. data/lib/helpers/interface/ienum.rb +45 -0
  35. data/lib/helpers/klass.rb +45 -0
  36. data/lib/memory/buffer.rb +125 -0
  37. data/lib/memory/interface/ibuffer.rb +63 -0
  38. data/lib/memory/pointer.rb +72 -0
  39. data/lib/rubycu.rb +1 -0
  40. data/lib/rubycuda.rb +1 -0
  41. data/test/bad.ptx +0 -0
  42. data/test/memory/test_buffer.rb +93 -0
  43. data/test/rubycu/test_cucontext.rb +148 -0
  44. data/test/rubycu/test_cudevice.rb +69 -0
  45. data/test/rubycu/test_cudeviceptr.rb +43 -0
  46. data/test/rubycu/test_cuevent.rb +81 -0
  47. data/test/rubycu/test_cufunction.rb +165 -0
  48. data/test/rubycu/test_cumemory.rb +113 -0
  49. data/test/rubycu/test_cumodule.rb +114 -0
  50. data/test/rubycu/test_custream.rb +77 -0
  51. data/test/rubycu/test_cuversion.rb +39 -0
  52. data/test/rubycu/testbase.rb +107 -0
  53. data/test/rubycuda/test_cudadevice.rb +125 -0
  54. data/test/rubycuda/test_cudaerror.rb +48 -0
  55. data/test/rubycuda/test_cudaevent.rb +78 -0
  56. data/test/rubycuda/test_cudafunction.rb +106 -0
  57. data/test/rubycuda/test_cudamemory.rb +90 -0
  58. data/test/rubycuda/test_cudastream.rb +72 -0
  59. data/test/rubycuda/test_cudathread.rb +69 -0
  60. data/test/rubycuda/test_cudaversion.rb +41 -0
  61. data/test/rubycuda/testbase.rb +67 -0
  62. data/test/vadd.cu +21 -0
  63. data/version.rb +1 -0
  64. metadata +180 -0
@@ -0,0 +1,293 @@
1
+ #
2
+ # Copyright (c) 2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/driver/ffi-cu'
26
+ require 'cuda/driver/error'
27
+
28
+
29
+ module SGC
30
+ module CU
31
+
32
+ class CUFunction
33
+
34
+ # @deprecated Use {#launch_kernel}.
35
+ #
36
+ # Set the argument list of subsequent function call to _arg1_, _arg2_, *other_args.
37
+ # @param *args The list of arguments to pass to the kernel function.
38
+ def param=(*args)
39
+ offset = 0
40
+ args.flatten.each do |x|
41
+ case x
42
+ when Fixnum
43
+ p = FFI::MemoryPointer.new(:int)
44
+ p.write_int(x)
45
+ size = 4
46
+ when Float
47
+ p = FFI::MemoryPointer.new(:float)
48
+ p.write_float(x)
49
+ size = 4
50
+ when CUDevicePtr
51
+ p = FFI::MemoryPointer.new(:CUDevicePtr)
52
+ API::write_cudeviceptr(p, x.to_api.address)
53
+ size = p.size
54
+ else
55
+ raise TypeError, "Invalid type of argument #{x.to_s}."
56
+ end
57
+ offset = align_up(offset, size)
58
+ status = API::cuParamSetv(self.to_api, offset, p, size)
59
+ Pvt::handle_error(status, "Failed to set function parameters: offset = #{offset}, value = #{x}.")
60
+ offset += size
61
+ end
62
+
63
+ status = API::cuParamSetSize(self.to_api, offset)
64
+ Pvt::handle_error(status, "Failed to set function parameter size: size = #{offset}.")
65
+ end
66
+
67
+
68
+ # @deprecated Use {#launch_kernel}.
69
+ #
70
+ # Set a float parameter to the function's argument list at _offset_ with _value_.
71
+ # @param [Integer] offset Number of bytes to offset.
72
+ # @param [Float] value The floating-point value to set as the function parameter.
73
+ # @return [CUFunction] This function.
74
+ def param_setf(offset, value)
75
+ status = API::cuParamSetf(self.to_api, offset, value)
76
+ Pvt::handle_error(status, "Failed to set function float parameter: offset = #{offset}, value = #{value}.")
77
+ self
78
+ end
79
+
80
+
81
+ # @deprecated Use {#launch_kernel}.
82
+ #
83
+ # Set an integer parameter to the function's argument list at _offset_ with _value_.
84
+ # @param [Integer] offset Number of bytes to offset.
85
+ # @param [Integer] value The integer value to set as the function parameter.
86
+ # @return [CUFunction] This function.
87
+ def param_seti(offset, value)
88
+ status = API::cuParamSeti(self.to_api, offset, value)
89
+ Pvt::handle_error(status, "Failed to set function integer parameter: offset = #{offset}, value = #{value}")
90
+ self
91
+ end
92
+
93
+
94
+ # @deprecated Use {#launch_kernel}.
95
+ #
96
+ # Set an arbitrary data to the function's argument list at _offset_ with _ptr_ pointed _nbytes_ data.
97
+ # @param [Integer] offset Number of bytes to offset.
98
+ # @param [CUDevicePtr] ptr A device pointer pointing to an arbitrary data to be used as the function parameter.
99
+ # @param [Integer] nbytes The size of the arbitrary data in bytes.
100
+ # @return [CUFunction] This function.
101
+ def param_setv(offset, ptr, nbytes)
102
+ status = API::cuParamSetv(self.to_api, offset, ptr.to_api, nbytes)
103
+ Pvt::handle_error(status, "Failed to set function arbitrary parameter: offset = #{offset}, size = #{nbytes}.")
104
+ self
105
+ end
106
+
107
+
108
+ # @deprecated Use {#launch_kernel}.
109
+ #
110
+ # Set the function parameter size to _nbytes_.
111
+ # @param [Integer] nbytes The parameter size in bytes.
112
+ # @return [CUFunction] This function.
113
+ def param_set_size(nbytes)
114
+ status = API::cuParamSetSize(self.to_api, nbytes)
115
+ Pvt::handle_error(status, "Failed to set function parameter size: size = #{nbytes}.")
116
+ self
117
+ end
118
+
119
+
120
+ # @deprecated
121
+ #
122
+ def param_set_texref(texunit, texref)
123
+ raise NotImplementedError
124
+ end
125
+
126
+
127
+ # @deprecated Use {#launch_kernel}.
128
+ #
129
+ # Set the block dimensions to use for next launch.
130
+ # @overload block_shape=(xdim)
131
+ # @overload block_shape=(xdim, ydim)
132
+ # @overload block_shape=(xdim, ydim, zdim)
133
+ # @param [Integer] xdim The size of the x dimension.
134
+ # @param [Integer] ydim The size of the y dimension. Defaults to 1.
135
+ # @param [Integer] zdim The size of the z dimension. Defaults to 1.
136
+ def block_shape=(*args)
137
+ xdim, ydim, zdim = args.flatten
138
+ ydim = 1 if ydim.nil?
139
+ zdim = 1 if zdim.nil?
140
+ status = API::cuFuncSetBlockShape(self.to_api, xdim, ydim, zdim)
141
+ Pvt::handle_error(status, "Failed to set function block shape: (x,y,z) = (#{xdim},#{ydim},#{zdim}).")
142
+ end
143
+
144
+
145
+ # @deprecated Use {#launch_kernel}.
146
+ #
147
+ # Set the dynamic shared-memory size to use for next launch.
148
+ # @param [Integer] nbytes Number of bytes.
149
+ def shared_size=(nbytes)
150
+ status = API::cuFuncSetSharedSize(self.to_api, nbytes)
151
+ Pvt::handle_error(status, "Failed to set function shared memory size: #{nbytes}.")
152
+ end
153
+
154
+
155
+ # @deprecated Use {#launch_kernel}.
156
+ #
157
+ # Launch this kernel function with 1x1x1 grid of blocks to execute on the current CUDA device.
158
+ # @return [CUFunction] This function.
159
+ def launch
160
+ status = API::cuLaunch(self.to_api)
161
+ Pvt::handle_error(status, "Failed to launch kernel function on 1x1x1 grid of blocks.")
162
+ self
163
+ end
164
+
165
+
166
+ # @deprecated Use {#launch_kernel}.
167
+ #
168
+ # Launch this kernel function with grid dimensions (_xdim_, _ydim_) to execute on the current CUDA device.
169
+ # @overload launch_grid(xdim)
170
+ # @overload launch_grid(xdim, ydim)
171
+ # @param [Integer] xdim The x dimensional size of the grid to launch.
172
+ # @param [Integer] ydim The y dimensional size of the grid to launch. Defaults to 1.
173
+ # @return [CUFunction] This function.
174
+ def launch_grid(xdim, ydim = 1)
175
+ status = API::cuLaunchGrid(self.to_api, xdim, ydim)
176
+ Pvt::handle_error(status, "Failed to launch kernel function on #{xdim}x#{ydim} grid of blocks.")
177
+ self
178
+ end
179
+
180
+
181
+ # @deprecated Use {#launch_kernel}.
182
+ #
183
+ # Launch this kernel function with grid dimensions (_xdim_, _ydim_) on _stream_ asynchronously to execute
184
+ # on the current CUDA device. Setting _stream_ to anything other than an instance of CUStream
185
+ # will execute on the default stream 0.
186
+ # @overload launch_grid_async(xdim, stream)
187
+ # @overload launch_grid_async(xdim, ydim, stream)
188
+ # @param [Integer] xdim The x dimensional size
189
+ def launch_grid_async(xdim, ydim = 1, stream)
190
+ s = Pvt::parse_stream(stream)
191
+ status = API::cuLaunchGridAsync(self.to_api, xdim, ydim, s)
192
+ Pvt::handle_error(status, "Failed to launch kernel function asynchronously on #{xdim}x#{ydim} grid of blocks.")
193
+ self
194
+ end
195
+
196
+
197
+ # @param [CUFunctionAttribute] attrib The attribute of the kernel function to query.
198
+ # @return [Integer] The particular attribute _attrib_ of this kernel function.
199
+ #
200
+ # @example Get function attribute.
201
+ # func.attribute(:MAX_THREADS_PER_BLOCK) #=> 512
202
+ # func.attribute(:SHARED_SIZE_BYTES) #=> 44
203
+ # func.attribute(:NUM_REGS) #=> 3
204
+ def attribute(attrib)
205
+ p = FFI::MemoryPointer.new(:int)
206
+ status = API::cuFuncGetAttribute(p, attrib, self.to_api)
207
+ Pvt::handle_error(status, "Failed to query function attribute: attribute = #{attrib}.")
208
+ p.read_int
209
+ end
210
+
211
+
212
+ # Set the preferred cache configuration (CUFunctionCache) to use for next launch.
213
+ # @param [CUFunctionCache] conf The preferred cache configuration.
214
+ def cache_config=(conf)
215
+ status = API::cuFuncSetCacheConfig(self.to_api, conf)
216
+ Pvt::handle_error(status, "Failed to set function cache config: config = #{conf}.")
217
+ end
218
+
219
+ # Launch this kernel function with full configuration parameters and function parameters
220
+ # to execute on the current CUDA device.
221
+ # @param [Integer] grid_xdim The x dimensional size of the grid to launch.
222
+ # @param [Integer] grid_ydim The y dimensional size of the grid to launch.
223
+ # @param [Integer] grid_zdim The z dimensional size of the grid to launch.
224
+ # @param [Integer] block_xdim The x dimensional size of a block in the grid.
225
+ # @param [Integer] block_ydim The y dimensional size of a block in the grid.
226
+ # @param [Integer] block_zdim The z dimensional size of a block in the grid.
227
+ # @param [Integer] shared_mem_size Number of bytes of dynamic shared memory for each thread block.
228
+ # @param [Integer, CUStream] stream The stream to launch this kernel function on.
229
+ # Setting _stream_ to anything other than an instance of CUStream will execute on the default stream 0.
230
+ # @param [Array<Fixnum, Float, CUDevicePtr>] params The list of parameters to pass in for the kernel function launch.
231
+ # * A Fixnum is mapped to a C type int.
232
+ # * A Float is mapped to a C type float.
233
+ # @return [CUFunction] This function.
234
+ #
235
+ # @todo Add support for other C data types for the kernel function parameters.
236
+ def launch_kernel(grid_xdim, grid_ydim, grid_zdim, block_xdim, block_ydim, block_zdim, shared_mem_size, stream, params)
237
+ p = parse_params(params)
238
+ s = Pvt::parse_stream(stream)
239
+ status = API::cuLaunchKernel(self.to_api, grid_xdim, grid_ydim, grid_zdim, block_xdim, block_ydim, block_zdim, shared_mem_size, s, p, nil)
240
+ Pvt::handle_error(status, "Failed to launch kernel function.\n" +
241
+ "* #{grid_xdim} x #{grid_ydim} x #{grid_zdim} grid\n" +
242
+ "* #{block_xdim} x #{block_ydim} x #{block_zdim} blocks\n" +
243
+ "* shared memory size = #{shared_mem_size}")
244
+ self
245
+ end
246
+
247
+
248
+ # @private
249
+ def initialize(ptr)
250
+ @pfunc = ptr
251
+ end
252
+ private_class_method :new
253
+
254
+
255
+ # @private
256
+ def to_api
257
+ API::read_cufunction(@pfunc)
258
+ end
259
+
260
+ private
261
+
262
+ def parse_params(params)
263
+ params.is_a?(Array) or raise TypeError, "Expect _params_ an Array, but we get a #{params.class}."
264
+ params.size <= 0 and return nil
265
+
266
+ p = FFI::MemoryPointer.new(:pointer, params.size)
267
+ params.each_with_index do |x,i|
268
+ m = case x
269
+ when Fixnum
270
+ FFI::MemoryPointer.new(:int).write_int(x)
271
+ when Float
272
+ FFI::MemoryPointer.new(:float).write_float(x)
273
+ when CUDevicePtr
274
+ ptr = FFI::MemoryPointer.new(:CUDevicePtr)
275
+ API::write_cudeviceptr(ptr, x.to_api.address)
276
+ ptr
277
+ else
278
+ raise TypeError, "Invalid type of kernel parameter #{x.to_s}."
279
+ end
280
+ p[i].write_pointer(m)
281
+ end
282
+ p
283
+ end
284
+
285
+
286
+ def align_up(offset, alignment)
287
+ (offset + alignment - 1) & ~(alignment - 1)
288
+ end
289
+
290
+ end
291
+
292
+ end # module
293
+ end # module
@@ -0,0 +1,45 @@
1
+ #
2
+ # Copyright (c) 2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/driver/ffi-cu'
26
+ require 'cuda/driver/error'
27
+
28
+
29
+ module SGC
30
+ module CU
31
+
32
+ class CUInit
33
+
34
+ # Initialize the CUDA driver API. This must be called before other CUDA driver functions.
35
+ # @param [Integer] flags Currently _flags_ must be set to zero.
36
+ def self.init(flags = 0)
37
+ status = API::cuInit(flags)
38
+ Pvt::handle_error(status, "Failed to initialize the CUDA driver API.")
39
+ nil
40
+ end
41
+
42
+ end
43
+
44
+ end # module
45
+ end # module
@@ -0,0 +1,134 @@
1
+ #
2
+ # Copyright (c) 2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/driver/ffi-cu'
26
+ require 'cuda/driver/error'
27
+ require 'cuda/driver/stream'
28
+
29
+
30
+ module SGC
31
+ module CU
32
+
33
+ module CUMemory
34
+
35
+ # Copy _nbytes_ from the memory at _src_ptr_ to the memory at _dst_ptr_.
36
+ # The type of memory (host or device) is inferred from the pointer value.
37
+ def memcpy(dst_ptr, src_ptr, nbytes)
38
+ status = API::cuMemcpy(dst_ptr.to_api, src_ptr.to_api, nbytes)
39
+ Pvt::handle_error(status, "Failed to copy memory: size = #{nbytes}")
40
+ nil
41
+ end
42
+ module_function :memcpy
43
+
44
+
45
+ # Copy _nbytes_ from the memory at _src_ptr_ to the memory at _dst_ptr_ on _stream_ asynchronously.
46
+ # The type of memory (host or device) is inferred from the pointer value.
47
+ def memcpy_async(dst_ptr, src_ptr, nbytes, stream)
48
+ s = Pvt::parse_stream(stream)
49
+ status = API::cuMemcpyAsync(dst_ptr.to_api, src_ptr.to_api, nbytes, s)
50
+ Pvt::handle_error(status, "Failed to copy memory asynchronously: size = #{nbytes}")
51
+ nil
52
+ end
53
+ module_function :memcpy_async
54
+
55
+
56
+ # Copy _nbytes_ from the host memory at _src_mem_ to the device memory at _dst_devptr_.
57
+ def memcpy_htod(dst_devptr, src_mem, nbytes)
58
+ status = API::cuMemcpyHtoD(dst_devptr.to_api, src_mem.ptr, nbytes)
59
+ Pvt::handle_error(status, "Failed to copy memory from host to device: size = #{nbytes}")
60
+ nil
61
+ end
62
+ module_function :memcpy_htod
63
+
64
+
65
+ # Copy _nbytes_ from the host memory at _src_mem_ to the device memory at _dst_devptr_ on _stream_ asynchronously.
66
+ #
67
+ # @note The _src_mem_ should be *page-locked* memory.
68
+ # @note Not implemented yet.
69
+ def memcpy_htod_async(dst_devptr, src_mem, nbytes, stream)
70
+ s = Pvt::parse_stream(stream)
71
+ status = API::cuMemcpyHtoDAsync(dst_devptr.to_api, src_mem.ptr, nbytes, s)
72
+ Pvt::handle_error(status, "Failed to copy memory from host to device asynchronously: size = #{nbytes}")
73
+ nil
74
+ end
75
+ module_function :memcpy_htod_async
76
+
77
+
78
+ # Copy _nbytes_ from the device memory at _src_devptr_ to the host memory at _dst_mem_.
79
+ def memcpy_dtoh(dst_mem, src_devptr, nbytes)
80
+ status = API::cuMemcpyDtoH(dst_mem.ptr, src_devptr.to_api, nbytes)
81
+ Pvt::handle_error(status, "Failed to copy memory from device to host: size = #{nbytes}")
82
+ nil
83
+ end
84
+ module_function :memcpy_dtoh
85
+
86
+
87
+ # Copy _nbytes_ from the device memory at _src_devptr_ to the host memory at _dst_mem_ on _stream_ asynchronously.
88
+ #
89
+ # @note The _dst_mem_ should be *page-locked* memory.
90
+ # @note Not implemented yet.
91
+ def memcpy_dtoh_async(dst_mem, src_devptr, nbytes, stream)
92
+ s = Pvt::parse_stream(stream)
93
+ status = API::cuMemcpyDtoHAsync(dst_mem.ptr, src_devptr.to_api, nbytes, s)
94
+ Pvt::handle_error(status, "Failed to copy memory from device to host asynchronously: size = #{nbytes}")
95
+ nil
96
+ end
97
+ module_function :memcpy_dtoh_async
98
+
99
+
100
+ # Copy _nbytes_ from the device memory at _src_devptr_ to the device memory at _dst_devptr_ asynchronously.
101
+ def memcpy_dtod(dst_devptr, src_devptr, nbytes)
102
+ status = API::cuMemcpyDtoD(dst_devptr.to_api, src_devptr.to_api, nbytes)
103
+ Pvt::handle_error(status, "Failed to copy memory from device to device asynchronously: size = #{nbytes}.")
104
+ nil
105
+ end
106
+ module_function :memcpy_dtod
107
+
108
+
109
+ # Copy _nbytes_ from the device memory at _src_devptr_ to the device memory at _dst_devptr_ on _stream_ asynchronously.
110
+ #
111
+ # @note Not implemented yet.
112
+ def memcpy_dtod_async(dst_devptr, src_devptr, nbytes, stream)
113
+ s = Pvt::parse_stream(stream)
114
+ status = API::cuMemcpyDtoDAsync(dst_devptr.to_api, src_devptr.to_api, nbytes, s)
115
+ Pvt::handle_error(status, "Failed to copy memory from device to device asynchronously: size = #{nbytes}.")
116
+ nil
117
+ end
118
+ module_function :memcpy_dtod_async
119
+
120
+
121
+ # @return [Hash{ :free, :total }] A hash with the amount of free and total device memory in bytes.
122
+ def mem_info
123
+ pfree = FFI::MemoryPointer.new(:size_t)
124
+ ptotal = FFI::MemoryPointer.new(:size_t)
125
+ status = API::cuMemGetInfo(pfree, ptotal)
126
+ Pvt::handle_error(status, "Failed to get memory information.")
127
+ { free: API::read_size_t(pfree), total: API::read_size_t(ptotal) }
128
+ end
129
+ module_function :mem_info
130
+
131
+ end
132
+
133
+ end # module
134
+ end # module