sgc-ruby-cuda 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. data/.yardopts +2 -0
  2. data/COPYING +674 -0
  3. data/README.rdoc +106 -0
  4. data/Rakefile +76 -0
  5. data/doc/devel.rdoc +77 -0
  6. data/doc/features.rdoc +55 -0
  7. data/lib/cuda/driver/context.rb +236 -0
  8. data/lib/cuda/driver/cu.rb +60 -0
  9. data/lib/cuda/driver/device.rb +155 -0
  10. data/lib/cuda/driver/deviceptr.rb +69 -0
  11. data/lib/cuda/driver/error.rb +182 -0
  12. data/lib/cuda/driver/event.rb +124 -0
  13. data/lib/cuda/driver/ffi-cu.rb +620 -0
  14. data/lib/cuda/driver/function.rb +293 -0
  15. data/lib/cuda/driver/init.rb +45 -0
  16. data/lib/cuda/driver/memory.rb +134 -0
  17. data/lib/cuda/driver/module.rb +142 -0
  18. data/lib/cuda/driver/rubycu.rb +37 -0
  19. data/lib/cuda/driver/stream.rb +128 -0
  20. data/lib/cuda/driver/version.rb +42 -0
  21. data/lib/cuda/runtime/cuda.rb +65 -0
  22. data/lib/cuda/runtime/device.rb +175 -0
  23. data/lib/cuda/runtime/error.rb +197 -0
  24. data/lib/cuda/runtime/event.rb +117 -0
  25. data/lib/cuda/runtime/ffi-cuda.rb +588 -0
  26. data/lib/cuda/runtime/function.rb +161 -0
  27. data/lib/cuda/runtime/memory.rb +110 -0
  28. data/lib/cuda/runtime/rubycuda.rb +34 -0
  29. data/lib/cuda/runtime/stream.rb +126 -0
  30. data/lib/cuda/runtime/thread.rb +81 -0
  31. data/lib/cuda/runtime/version.rb +51 -0
  32. data/lib/ffi/prettystruct.rb +32 -0
  33. data/lib/helpers/flags.rb +82 -0
  34. data/lib/helpers/interface/ienum.rb +45 -0
  35. data/lib/helpers/klass.rb +45 -0
  36. data/lib/memory/buffer.rb +125 -0
  37. data/lib/memory/interface/ibuffer.rb +63 -0
  38. data/lib/memory/pointer.rb +72 -0
  39. data/lib/rubycu.rb +1 -0
  40. data/lib/rubycuda.rb +1 -0
  41. data/test/bad.ptx +0 -0
  42. data/test/memory/test_buffer.rb +93 -0
  43. data/test/rubycu/test_cucontext.rb +148 -0
  44. data/test/rubycu/test_cudevice.rb +69 -0
  45. data/test/rubycu/test_cudeviceptr.rb +43 -0
  46. data/test/rubycu/test_cuevent.rb +81 -0
  47. data/test/rubycu/test_cufunction.rb +165 -0
  48. data/test/rubycu/test_cumemory.rb +113 -0
  49. data/test/rubycu/test_cumodule.rb +114 -0
  50. data/test/rubycu/test_custream.rb +77 -0
  51. data/test/rubycu/test_cuversion.rb +39 -0
  52. data/test/rubycu/testbase.rb +107 -0
  53. data/test/rubycuda/test_cudadevice.rb +125 -0
  54. data/test/rubycuda/test_cudaerror.rb +48 -0
  55. data/test/rubycuda/test_cudaevent.rb +78 -0
  56. data/test/rubycuda/test_cudafunction.rb +106 -0
  57. data/test/rubycuda/test_cudamemory.rb +90 -0
  58. data/test/rubycuda/test_cudastream.rb +72 -0
  59. data/test/rubycuda/test_cudathread.rb +69 -0
  60. data/test/rubycuda/test_cudaversion.rb +41 -0
  61. data/test/rubycuda/testbase.rb +67 -0
  62. data/test/vadd.cu +21 -0
  63. data/version.rb +1 -0
  64. metadata +180 -0
@@ -0,0 +1,293 @@
1
+ #
2
+ # Copyright (c) 2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/driver/ffi-cu'
26
+ require 'cuda/driver/error'
27
+
28
+
29
+ module SGC
30
+ module CU
31
+
32
+ class CUFunction
33
+
34
+ # @deprecated Use {#launch_kernel}.
35
+ #
36
+ # Set the argument list of subsequent function call to _arg1_, _arg2_, *other_args.
37
+ # @param *args The list of arguments to pass to the kernel function.
38
+ def param=(*args)
39
+ offset = 0
40
+ args.flatten.each do |x|
41
+ case x
42
+ when Fixnum
43
+ p = FFI::MemoryPointer.new(:int)
44
+ p.write_int(x)
45
+ size = 4
46
+ when Float
47
+ p = FFI::MemoryPointer.new(:float)
48
+ p.write_float(x)
49
+ size = 4
50
+ when CUDevicePtr
51
+ p = FFI::MemoryPointer.new(:CUDevicePtr)
52
+ API::write_cudeviceptr(p, x.to_api.address)
53
+ size = p.size
54
+ else
55
+ raise TypeError, "Invalid type of argument #{x.to_s}."
56
+ end
57
+ offset = align_up(offset, size)
58
+ status = API::cuParamSetv(self.to_api, offset, p, size)
59
+ Pvt::handle_error(status, "Failed to set function parameters: offset = #{offset}, value = #{x}.")
60
+ offset += size
61
+ end
62
+
63
+ status = API::cuParamSetSize(self.to_api, offset)
64
+ Pvt::handle_error(status, "Failed to set function parameter size: size = #{offset}.")
65
+ end
66
+
67
+
68
+ # @deprecated Use {#launch_kernel}.
69
+ #
70
+ # Set a float parameter to the function's argument list at _offset_ with _value_.
71
+ # @param [Integer] offset Number of bytes to offset.
72
+ # @param [Float] value The floating-point value to set as the function parameter.
73
+ # @return [CUFunction] This function.
74
+ def param_setf(offset, value)
75
+ status = API::cuParamSetf(self.to_api, offset, value)
76
+ Pvt::handle_error(status, "Failed to set function float parameter: offset = #{offset}, value = #{value}.")
77
+ self
78
+ end
79
+
80
+
81
+ # @deprecated Use {#launch_kernel}.
82
+ #
83
+ # Set an integer parameter to the function's argument list at _offset_ with _value_.
84
+ # @param [Integer] offset Number of bytes to offset.
85
+ # @param [Integer] value The integer value to set as the function parameter.
86
+ # @return [CUFunction] This function.
87
+ def param_seti(offset, value)
88
+ status = API::cuParamSeti(self.to_api, offset, value)
89
+ Pvt::handle_error(status, "Failed to set function integer parameter: offset = #{offset}, value = #{value}")
90
+ self
91
+ end
92
+
93
+
94
+ # @deprecated Use {#launch_kernel}.
95
+ #
96
+ # Set an arbitrary data to the function's argument list at _offset_ with _ptr_ pointed _nbytes_ data.
97
+ # @param [Integer] offset Number of bytes to offset.
98
+ # @param [CUDevicePtr] ptr A device pointer pointing to an arbitrary data to be used as the function parameter.
99
+ # @param [Integer] nbytes The size of the arbitrary data in bytes.
100
+ # @return [CUFunction] This function.
101
+ def param_setv(offset, ptr, nbytes)
102
+ status = API::cuParamSetv(self.to_api, offset, ptr.to_api, nbytes)
103
+ Pvt::handle_error(status, "Failed to set function arbitrary parameter: offset = #{offset}, size = #{nbytes}.")
104
+ self
105
+ end
106
+
107
+
108
+ # @deprecated Use {#launch_kernel}.
109
+ #
110
+ # Set the function parameter size to _nbytes_.
111
+ # @param [Integer] nbytes The parameter size in bytes.
112
+ # @return [CUFunction] This function.
113
+ def param_set_size(nbytes)
114
+ status = API::cuParamSetSize(self.to_api, nbytes)
115
+ Pvt::handle_error(status, "Failed to set function parameter size: size = #{nbytes}.")
116
+ self
117
+ end
118
+
119
+
120
+ # @deprecated
121
+ #
122
+ def param_set_texref(texunit, texref)
123
+ raise NotImplementedError
124
+ end
125
+
126
+
127
+ # @deprecated Use {#launch_kernel}.
128
+ #
129
+ # Set the block dimensions to use for next launch.
130
+ # @overload block_shape=(xdim)
131
+ # @overload block_shape=(xdim, ydim)
132
+ # @overload block_shape=(xdim, ydim, zdim)
133
+ # @param [Integer] xdim The size of the x dimension.
134
+ # @param [Integer] ydim The size of the y dimension. Defaults to 1.
135
+ # @param [Integer] zdim The size of the z dimension. Defaults to 1.
136
+ def block_shape=(*args)
137
+ xdim, ydim, zdim = args.flatten
138
+ ydim = 1 if ydim.nil?
139
+ zdim = 1 if zdim.nil?
140
+ status = API::cuFuncSetBlockShape(self.to_api, xdim, ydim, zdim)
141
+ Pvt::handle_error(status, "Failed to set function block shape: (x,y,z) = (#{xdim},#{ydim},#{zdim}).")
142
+ end
143
+
144
+
145
+ # @deprecated Use {#launch_kernel}.
146
+ #
147
+ # Set the dynamic shared-memory size to use for next launch.
148
+ # @param [Integer] nbytes Number of bytes.
149
+ def shared_size=(nbytes)
150
+ status = API::cuFuncSetSharedSize(self.to_api, nbytes)
151
+ Pvt::handle_error(status, "Failed to set function shared memory size: #{nbytes}.")
152
+ end
153
+
154
+
155
+ # @deprecated Use {#launch_kernel}.
156
+ #
157
+ # Launch this kernel function with 1x1x1 grid of blocks to execute on the current CUDA device.
158
+ # @return [CUFunction] This function.
159
+ def launch
160
+ status = API::cuLaunch(self.to_api)
161
+ Pvt::handle_error(status, "Failed to launch kernel function on 1x1x1 grid of blocks.")
162
+ self
163
+ end
164
+
165
+
166
+ # @deprecated Use {#launch_kernel}.
167
+ #
168
+ # Launch this kernel function with grid dimensions (_xdim_, _ydim_) to execute on the current CUDA device.
169
+ # @overload launch_grid(xdim)
170
+ # @overload launch_grid(xdim, ydim)
171
+ # @param [Integer] xdim The x dimensional size of the grid to launch.
172
+ # @param [Integer] ydim The y dimensional size of the grid to launch. Defaults to 1.
173
+ # @return [CUFunction] This function.
174
+ def launch_grid(xdim, ydim = 1)
175
+ status = API::cuLaunchGrid(self.to_api, xdim, ydim)
176
+ Pvt::handle_error(status, "Failed to launch kernel function on #{xdim}x#{ydim} grid of blocks.")
177
+ self
178
+ end
179
+
180
+
181
+ # @deprecated Use {#launch_kernel}.
182
+ #
183
+ # Launch this kernel function with grid dimensions (_xdim_, _ydim_) on _stream_ asynchronously to execute
184
+ # on the current CUDA device. Setting _stream_ to anything other than an instance of CUStream
185
+ # will execute on the default stream 0.
186
+ # @overload launch_grid_async(xdim, stream)
187
+ # @overload launch_grid_async(xdim, ydim, stream)
188
+ # @param [Integer] xdim The x dimensional size
189
+ def launch_grid_async(xdim, ydim = 1, stream)
190
+ s = Pvt::parse_stream(stream)
191
+ status = API::cuLaunchGridAsync(self.to_api, xdim, ydim, s)
192
+ Pvt::handle_error(status, "Failed to launch kernel function asynchronously on #{xdim}x#{ydim} grid of blocks.")
193
+ self
194
+ end
195
+
196
+
197
+ # @param [CUFunctionAttribute] attrib The attribute of the kernel function to query.
198
+ # @return [Integer] The particular attribute _attrib_ of this kernel function.
199
+ #
200
+ # @example Get function attribute.
201
+ # func.attribute(:MAX_THREADS_PER_BLOCK) #=> 512
202
+ # func.attribute(:SHARED_SIZE_BYTES) #=> 44
203
+ # func.attribute(:NUM_REGS) #=> 3
204
+ def attribute(attrib)
205
+ p = FFI::MemoryPointer.new(:int)
206
+ status = API::cuFuncGetAttribute(p, attrib, self.to_api)
207
+ Pvt::handle_error(status, "Failed to query function attribute: attribute = #{attrib}.")
208
+ p.read_int
209
+ end
210
+
211
+
212
+ # Set the preferred cache configuration (CUFunctionCache) to use for next launch.
213
+ # @param [CUFunctionCache] conf The preferred cache configuration.
214
+ def cache_config=(conf)
215
+ status = API::cuFuncSetCacheConfig(self.to_api, conf)
216
+ Pvt::handle_error(status, "Failed to set function cache config: config = #{conf}.")
217
+ end
218
+
219
+ # Launch this kernel function with full configuration parameters and function parameters
220
+ # to execute on the current CUDA device.
221
+ # @param [Integer] grid_xdim The x dimensional size of the grid to launch.
222
+ # @param [Integer] grid_ydim The y dimensional size of the grid to launch.
223
+ # @param [Integer] grid_zdim The z dimensional size of the grid to launch.
224
+ # @param [Integer] block_xdim The x dimensional size of a block in the grid.
225
+ # @param [Integer] block_ydim The y dimensional size of a block in the grid.
226
+ # @param [Integer] block_zdim The z dimensional size of a block in the grid.
227
+ # @param [Integer] shared_mem_size Number of bytes of dynamic shared memory for each thread block.
228
+ # @param [Integer, CUStream] stream The stream to launch this kernel function on.
229
+ # Setting _stream_ to anything other than an instance of CUStream will execute on the default stream 0.
230
+ # @param [Array<Fixnum, Float, CUDevicePtr>] params The list of parameters to pass in for the kernel function launch.
231
+ # * A Fixnum is mapped to a C type int.
232
+ # * A Float is mapped to a C type float.
233
+ # @return [CUFunction] This function.
234
+ #
235
+ # @todo Add support for other C data types for the kernel function parameters.
236
+ def launch_kernel(grid_xdim, grid_ydim, grid_zdim, block_xdim, block_ydim, block_zdim, shared_mem_size, stream, params)
237
+ p = parse_params(params)
238
+ s = Pvt::parse_stream(stream)
239
+ status = API::cuLaunchKernel(self.to_api, grid_xdim, grid_ydim, grid_zdim, block_xdim, block_ydim, block_zdim, shared_mem_size, s, p, nil)
240
+ Pvt::handle_error(status, "Failed to launch kernel function.\n" +
241
+ "* #{grid_xdim} x #{grid_ydim} x #{grid_zdim} grid\n" +
242
+ "* #{block_xdim} x #{block_ydim} x #{block_zdim} blocks\n" +
243
+ "* shared memory size = #{shared_mem_size}")
244
+ self
245
+ end
246
+
247
+
248
+ # @private
249
+ def initialize(ptr)
250
+ @pfunc = ptr
251
+ end
252
+ private_class_method :new
253
+
254
+
255
+ # @private
256
+ def to_api
257
+ API::read_cufunction(@pfunc)
258
+ end
259
+
260
+ private
261
+
262
+ def parse_params(params)
263
+ params.is_a?(Array) or raise TypeError, "Expect _params_ an Array, but we get a #{params.class}."
264
+ params.size <= 0 and return nil
265
+
266
+ p = FFI::MemoryPointer.new(:pointer, params.size)
267
+ params.each_with_index do |x,i|
268
+ m = case x
269
+ when Fixnum
270
+ FFI::MemoryPointer.new(:int).write_int(x)
271
+ when Float
272
+ FFI::MemoryPointer.new(:float).write_float(x)
273
+ when CUDevicePtr
274
+ ptr = FFI::MemoryPointer.new(:CUDevicePtr)
275
+ API::write_cudeviceptr(ptr, x.to_api.address)
276
+ ptr
277
+ else
278
+ raise TypeError, "Invalid type of kernel parameter #{x.to_s}."
279
+ end
280
+ p[i].write_pointer(m)
281
+ end
282
+ p
283
+ end
284
+
285
+
286
+ def align_up(offset, alignment)
287
+ (offset + alignment - 1) & ~(alignment - 1)
288
+ end
289
+
290
+ end
291
+
292
+ end # module
293
+ end # module
@@ -0,0 +1,45 @@
1
+ #
2
+ # Copyright (c) 2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/driver/ffi-cu'
26
+ require 'cuda/driver/error'
27
+
28
+
29
+ module SGC
30
+ module CU
31
+
32
+ class CUInit
33
+
34
+ # Initialize the CUDA driver API. This must be called before other CUDA driver functions.
35
+ # @param [Integer] flags Currently _flags_ must be set to zero.
36
+ def self.init(flags = 0)
37
+ status = API::cuInit(flags)
38
+ Pvt::handle_error(status, "Failed to initialize the CUDA driver API.")
39
+ nil
40
+ end
41
+
42
+ end
43
+
44
+ end # module
45
+ end # module
@@ -0,0 +1,134 @@
1
+ #
2
+ # Copyright (c) 2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/driver/ffi-cu'
26
+ require 'cuda/driver/error'
27
+ require 'cuda/driver/stream'
28
+
29
+
30
+ module SGC
31
+ module CU
32
+
33
+ module CUMemory
34
+
35
+ # Copy _nbytes_ from the memory at _src_ptr_ to the memory at _dst_ptr_.
36
+ # The type of memory (host or device) is inferred from the pointer value.
37
+ def memcpy(dst_ptr, src_ptr, nbytes)
38
+ status = API::cuMemcpy(dst_ptr.to_api, src_ptr.to_api, nbytes)
39
+ Pvt::handle_error(status, "Failed to copy memory: size = #{nbytes}")
40
+ nil
41
+ end
42
+ module_function :memcpy
43
+
44
+
45
+ # Copy _nbytes_ from the memory at _src_ptr_ to the memory at _dst_ptr_ on _stream_ asynchronously.
46
+ # The type of memory (host or device) is inferred from the pointer value.
47
+ def memcpy_async(dst_ptr, src_ptr, nbytes, stream)
48
+ s = Pvt::parse_stream(stream)
49
+ status = API::cuMemcpyAsync(dst_ptr.to_api, src_ptr.to_api, nbytes, s)
50
+ Pvt::handle_error(status, "Failed to copy memory asynchronously: size = #{nbytes}")
51
+ nil
52
+ end
53
+ module_function :memcpy_async
54
+
55
+
56
+ # Copy _nbytes_ from the host memory at _src_mem_ to the device memory at _dst_devptr_.
57
+ def memcpy_htod(dst_devptr, src_mem, nbytes)
58
+ status = API::cuMemcpyHtoD(dst_devptr.to_api, src_mem.ptr, nbytes)
59
+ Pvt::handle_error(status, "Failed to copy memory from host to device: size = #{nbytes}")
60
+ nil
61
+ end
62
+ module_function :memcpy_htod
63
+
64
+
65
+ # Copy _nbytes_ from the host memory at _src_mem_ to the device memory at _dst_devptr_ on _stream_ asynchronously.
66
+ #
67
+ # @note The _src_mem_ should be *page-locked* memory.
68
+ # @note Not implemented yet.
69
+ def memcpy_htod_async(dst_devptr, src_mem, nbytes, stream)
70
+ s = Pvt::parse_stream(stream)
71
+ status = API::cuMemcpyHtoDAsync(dst_devptr.to_api, src_mem.ptr, nbytes, s)
72
+ Pvt::handle_error(status, "Failed to copy memory from host to device asynchronously: size = #{nbytes}")
73
+ nil
74
+ end
75
+ module_function :memcpy_htod_async
76
+
77
+
78
+ # Copy _nbytes_ from the device memory at _src_devptr_ to the host memory at _dst_mem_.
79
+ def memcpy_dtoh(dst_mem, src_devptr, nbytes)
80
+ status = API::cuMemcpyDtoH(dst_mem.ptr, src_devptr.to_api, nbytes)
81
+ Pvt::handle_error(status, "Failed to copy memory from device to host: size = #{nbytes}")
82
+ nil
83
+ end
84
+ module_function :memcpy_dtoh
85
+
86
+
87
+ # Copy _nbytes_ from the device memory at _src_devptr_ to the host memory at _dst_mem_ on _stream_ asynchronously.
88
+ #
89
+ # @note The _dst_mem_ should be *page-locked* memory.
90
+ # @note Not implemented yet.
91
+ def memcpy_dtoh_async(dst_mem, src_devptr, nbytes, stream)
92
+ s = Pvt::parse_stream(stream)
93
+ status = API::cuMemcpyDtoHAsync(dst_mem.ptr, src_devptr.to_api, nbytes, s)
94
+ Pvt::handle_error(status, "Failed to copy memory from device to host asynchronously: size = #{nbytes}")
95
+ nil
96
+ end
97
+ module_function :memcpy_dtoh_async
98
+
99
+
100
+ # Copy _nbytes_ from the device memory at _src_devptr_ to the device memory at _dst_devptr_ asynchronously.
101
+ def memcpy_dtod(dst_devptr, src_devptr, nbytes)
102
+ status = API::cuMemcpyDtoD(dst_devptr.to_api, src_devptr.to_api, nbytes)
103
+ Pvt::handle_error(status, "Failed to copy memory from device to device asynchronously: size = #{nbytes}.")
104
+ nil
105
+ end
106
+ module_function :memcpy_dtod
107
+
108
+
109
+ # Copy _nbytes_ from the device memory at _src_devptr_ to the device memory at _dst_devptr_ on _stream_ asynchronously.
110
+ #
111
+ # @note Not implemented yet.
112
+ def memcpy_dtod_async(dst_devptr, src_devptr, nbytes, stream)
113
+ s = Pvt::parse_stream(stream)
114
+ status = API::cuMemcpyDtoDAsync(dst_devptr.to_api, src_devptr.to_api, nbytes, s)
115
+ Pvt::handle_error(status, "Failed to copy memory from device to device asynchronously: size = #{nbytes}.")
116
+ nil
117
+ end
118
+ module_function :memcpy_dtod_async
119
+
120
+
121
+ # @return [Hash{ :free, :total }] A hash with the amount of free and total device memory in bytes.
122
+ def mem_info
123
+ pfree = FFI::MemoryPointer.new(:size_t)
124
+ ptotal = FFI::MemoryPointer.new(:size_t)
125
+ status = API::cuMemGetInfo(pfree, ptotal)
126
+ Pvt::handle_error(status, "Failed to get memory information.")
127
+ { free: API::read_size_t(pfree), total: API::read_size_t(ptotal) }
128
+ end
129
+ module_function :mem_info
130
+
131
+ end
132
+
133
+ end # module
134
+ end # module