ignis 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +15 -0
- data/lib/ignis.rb +94 -0
- data/lib/nnw/platform.rb +304 -0
- data/lib/nnw/shared/event_bus.rb +240 -0
- data/lib/nnw/shared/ffi_loader.rb +63 -0
- data/lib/nnw/shared/memory_contract.rb +204 -0
- data/lib/nnw/shared/nv_array.rb +710 -0
- data/lib/nnw/shared/recovery_protocol.rb +307 -0
- data/lib/nvruby/configuration.rb +217 -0
- data/lib/nvruby/cuda/device.rb +275 -0
- data/lib/nvruby/cuda/device_props.rb +202 -0
- data/lib/nvruby/cuda/graph.rb +265 -0
- data/lib/nvruby/cuda/graph_bindings.rb +119 -0
- data/lib/nvruby/cuda/library_loader.rb +285 -0
- data/lib/nvruby/cuda/memory.rb +410 -0
- data/lib/nvruby/cuda/runtime_api.rb +804 -0
- data/lib/nvruby/cuda/stream.rb +234 -0
- data/lib/nvruby/dtype.rb +139 -0
- data/lib/nvruby/epilogues.rb +438 -0
- data/lib/nvruby/errors.rb +303 -0
- data/lib/nvruby/half.rb +97 -0
- data/lib/nvruby/jit/compiled_kernel.rb +80 -0
- data/lib/nvruby/jit/compiler.rb +231 -0
- data/lib/nvruby/jit/driver_api_bindings.rb +363 -0
- data/lib/nvruby/jit/kernel.rb +240 -0
- data/lib/nvruby/jit/kernel_module.rb +133 -0
- data/lib/nvruby/jit/kernels/activations.rb +179 -0
- data/lib/nvruby/jit/kernels/attention.rb +504 -0
- data/lib/nvruby/jit/kernels/elementwise.rb +488 -0
- data/lib/nvruby/jit/kernels/loss.rb +213 -0
- data/lib/nvruby/jit/kernels/normalization.rb +200 -0
- data/lib/nvruby/jit/kernels/optimizer.rb +193 -0
- data/lib/nvruby/jit/nvrtc_bindings.rb +282 -0
- data/lib/nvruby/linalg/cublas_bindings.rb +295 -0
- data/lib/nvruby/linalg/cublaslt_bindings.rb +342 -0
- data/lib/nvruby/linalg/epilog.rb +67 -0
- data/lib/nvruby/linalg/matmul.rb +247 -0
- data/lib/nvruby/linalg/matmul_plan.rb +229 -0
- data/lib/nvruby/linalg/optimized_matmul.rb +412 -0
- data/lib/nvruby/memory/cuda_async_memory_resource.rb +123 -0
- data/lib/nvruby/memory/cuda_memory_resource.rb +68 -0
- data/lib/nvruby/memory/device_memory_resource.rb +106 -0
- data/lib/nvruby/memory/pinned_host_memory_resource.rb +112 -0
- data/lib/nvruby/memory/pool_memory_resource.rb +242 -0
- data/lib/nvruby/memory/stats.rb +107 -0
- data/lib/nvruby/memory.rb +124 -0
- data/lib/nvruby/version.rb +5 -0
- metadata +108 -0
|
@@ -0,0 +1,804 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'fiddle'
|
|
4
|
+
require 'fiddle/import'
|
|
5
|
+
|
|
6
|
+
module Ignis
|
|
7
|
+
module CUDA
|
|
8
|
+
# CUDA Runtime API — Fiddle-only hot-path bindings.
|
|
9
|
+
#
|
|
10
|
+
# Rule: This file uses ONLY Fiddle for hot-path calls (malloc, free, memcpy,
|
|
11
|
+
# stream, event, sync). FFI struct calls (cudaGetDeviceProperties) live in
|
|
12
|
+
# device_props.rb (Rule 4: never mix FFI and Fiddle in the same file).
|
|
13
|
+
#
|
|
14
|
+
# Cross-platform: Uses Ignis::Platform to resolve cudart path on Windows/Linux.
|
|
15
|
+
module RuntimeAPI
|
|
16
|
+
# CUDA memory copy directions
|
|
17
|
+
MEMCPY_HOST_TO_HOST = 0
|
|
18
|
+
MEMCPY_HOST_TO_DEVICE = 1
|
|
19
|
+
MEMCPY_DEVICE_TO_HOST = 2
|
|
20
|
+
MEMCPY_DEVICE_TO_DEVICE = 3
|
|
21
|
+
MEMCPY_DEFAULT = 4
|
|
22
|
+
|
|
23
|
+
# CUDA device flags
|
|
24
|
+
DEVICE_SCHEDULE_AUTO = 0
|
|
25
|
+
DEVICE_SCHEDULE_SPIN = 1
|
|
26
|
+
DEVICE_SCHEDULE_YIELD = 2
|
|
27
|
+
DEVICE_SCHEDULE_BLOCKING_SYNC = 4
|
|
28
|
+
DEVICE_MAP_HOST = 8
|
|
29
|
+
DEVICE_LMEM_RESIZE_TO_MAX = 16
|
|
30
|
+
|
|
31
|
+
# CUDA host alloc flags
|
|
32
|
+
HOST_ALLOC_DEFAULT = 0
|
|
33
|
+
HOST_ALLOC_PORTABLE = 1
|
|
34
|
+
HOST_ALLOC_MAPPED = 2
|
|
35
|
+
HOST_ALLOC_WRITECOMBINED = 4
|
|
36
|
+
|
|
37
|
+
# Resolve CUDA runtime library path at load time.
|
|
38
|
+
# Uses Ignis::Platform if available, falls back to OS detection.
|
|
39
|
+
CUDART_LIB = if defined?(Ignis::Platform)
|
|
40
|
+
Ignis::Platform.cudart_path
|
|
41
|
+
elsif RUBY_PLATFORM.match?(/mswin|mingw|cygwin/i)
|
|
42
|
+
File.join('C:', 'Program Files', 'NVIDIA GPU Computing Toolkit',
|
|
43
|
+
'CUDA', 'v13.0', 'bin', 'cudart64_130.dll')
|
|
44
|
+
else
|
|
45
|
+
'libcudart.so.13'
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
@loaded = false
|
|
49
|
+
@handle = nil
|
|
50
|
+
@functions = {}
|
|
51
|
+
|
|
52
|
+
class << self
|
|
53
|
+
# @return [Fiddle::Handle, nil] the loaded DLL handle
|
|
54
|
+
attr_reader :handle
|
|
55
|
+
|
|
56
|
+
# Ensure the CUDA runtime is loaded and all functions are bound.
|
|
57
|
+
# @return [void]
|
|
58
|
+
# @raise [Fiddle::DLError] if the library cannot be loaded
|
|
59
|
+
def ensure_loaded!
|
|
60
|
+
return if @loaded
|
|
61
|
+
|
|
62
|
+
@handle = Fiddle::Handle.new(CUDART_LIB)
|
|
63
|
+
attach_all_functions!
|
|
64
|
+
@loaded = true
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# @return [Boolean]
|
|
68
|
+
def loaded?
|
|
69
|
+
@loaded
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# ================================================================
|
|
73
|
+
# Device Management
|
|
74
|
+
# ================================================================
|
|
75
|
+
|
|
76
|
+
# @return [Integer] number of CUDA devices
|
|
77
|
+
def get_device_count
|
|
78
|
+
ensure_loaded!
|
|
79
|
+
ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT)
|
|
80
|
+
status = @functions[:cudaGetDeviceCount].call(ptr)
|
|
81
|
+
check_status!(status, 'cudaGetDeviceCount')
|
|
82
|
+
ptr[0, Fiddle::SIZEOF_INT].unpack1('l')
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# @return [Integer] current device index
|
|
86
|
+
def get_device
|
|
87
|
+
ensure_loaded!
|
|
88
|
+
ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT)
|
|
89
|
+
status = @functions[:cudaGetDevice].call(ptr)
|
|
90
|
+
check_status!(status, 'cudaGetDevice')
|
|
91
|
+
ptr[0, Fiddle::SIZEOF_INT].unpack1('l')
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# @param device [Integer] device index to set
|
|
95
|
+
# @return [void]
|
|
96
|
+
def set_device(device)
|
|
97
|
+
ensure_loaded!
|
|
98
|
+
status = @functions[:cudaSetDevice].call(device)
|
|
99
|
+
check_status!(status, 'cudaSetDevice')
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# @return [void]
|
|
103
|
+
def device_synchronize
|
|
104
|
+
ensure_loaded!
|
|
105
|
+
status = @functions[:cudaDeviceSynchronize].call
|
|
106
|
+
check_status!(status, 'cudaDeviceSynchronize')
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# @return [void]
|
|
110
|
+
def device_reset
|
|
111
|
+
ensure_loaded!
|
|
112
|
+
status = @functions[:cudaDeviceReset].call
|
|
113
|
+
check_status!(status, 'cudaDeviceReset')
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# @param attr_id [Integer] CUDA device attribute ID
|
|
117
|
+
# @param device [Integer] device index
|
|
118
|
+
# @return [Integer] attribute value
|
|
119
|
+
def device_get_attribute(attr_id, device)
|
|
120
|
+
ensure_loaded!
|
|
121
|
+
ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT)
|
|
122
|
+
status = @functions[:cudaDeviceGetAttribute].call(ptr, attr_id, device)
|
|
123
|
+
check_status!(status, 'cudaDeviceGetAttribute')
|
|
124
|
+
ptr[0, Fiddle::SIZEOF_INT].unpack1('l')
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# ================================================================
|
|
128
|
+
# Memory Management (Hot Path)
|
|
129
|
+
# ================================================================
|
|
130
|
+
|
|
131
|
+
# Allocate device memory.
|
|
132
|
+
# @param size [Integer] bytes to allocate
|
|
133
|
+
# @return [Fiddle::Pointer] device pointer
|
|
134
|
+
def malloc(size)
|
|
135
|
+
ensure_loaded!
|
|
136
|
+
ptr_buf = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
|
|
137
|
+
status = @functions[:cudaMalloc].call(ptr_buf, size)
|
|
138
|
+
check_status!(status, "cudaMalloc(#{size})")
|
|
139
|
+
Fiddle::Pointer.new(ptr_buf[0, Fiddle::SIZEOF_VOIDP].unpack1('Q'))
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Free device memory.
|
|
143
|
+
# @param ptr [Fiddle::Pointer] device pointer to free
|
|
144
|
+
# @return [void]
|
|
145
|
+
def free(ptr)
|
|
146
|
+
ensure_loaded!
|
|
147
|
+
status = @functions[:cudaFree].call(ptr)
|
|
148
|
+
check_status!(status, 'cudaFree')
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Copy memory.
|
|
152
|
+
# @param dst [Fiddle::Pointer] destination
|
|
153
|
+
# @param src [Fiddle::Pointer] source
|
|
154
|
+
# @param count [Integer] bytes to copy
|
|
155
|
+
# @param kind [Integer] copy direction constant
|
|
156
|
+
# @return [void]
|
|
157
|
+
def memcpy(dst, src, count, kind)
|
|
158
|
+
ensure_loaded!
|
|
159
|
+
status = @functions[:cudaMemcpy].call(dst, src, count, kind)
|
|
160
|
+
check_status!(status, "cudaMemcpy(#{count} bytes, kind=#{kind})")
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Async memory copy.
|
|
164
|
+
# @param dst [Fiddle::Pointer]
|
|
165
|
+
# @param src [Fiddle::Pointer]
|
|
166
|
+
# @param count [Integer]
|
|
167
|
+
# @param kind [Integer]
|
|
168
|
+
# @param stream [Fiddle::Pointer] CUDA stream
|
|
169
|
+
# @return [void]
|
|
170
|
+
def memcpy_async(dst, src, count, kind, stream)
|
|
171
|
+
ensure_loaded!
|
|
172
|
+
status = @functions[:cudaMemcpyAsync].call(dst, src, count, kind, stream)
|
|
173
|
+
check_status!(status, "cudaMemcpyAsync(#{count} bytes)")
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Set device memory.
|
|
177
|
+
# @param ptr [Fiddle::Pointer] device pointer
|
|
178
|
+
# @param value [Integer] byte value to set
|
|
179
|
+
# @param count [Integer] bytes to set
|
|
180
|
+
# @return [void]
|
|
181
|
+
def memset(ptr, value, count)
|
|
182
|
+
ensure_loaded!
|
|
183
|
+
status = @functions[:cudaMemset].call(ptr, value, count)
|
|
184
|
+
check_status!(status, "cudaMemset(#{count} bytes)")
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Allocate pinned host memory.
|
|
188
|
+
# @param size [Integer] bytes
|
|
189
|
+
# @param flags [Integer] allocation flags
|
|
190
|
+
# @return [Fiddle::Pointer] host pointer
|
|
191
|
+
def host_alloc(size, flags = HOST_ALLOC_DEFAULT)
|
|
192
|
+
ensure_loaded!
|
|
193
|
+
ptr_buf = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
|
|
194
|
+
status = @functions[:cudaHostAlloc].call(ptr_buf, size, flags)
|
|
195
|
+
check_status!(status, "cudaHostAlloc(#{size})")
|
|
196
|
+
Fiddle::Pointer.new(ptr_buf[0, Fiddle::SIZEOF_VOIDP].unpack1('Q'))
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Free pinned host memory.
|
|
200
|
+
# @param ptr [Fiddle::Pointer]
|
|
201
|
+
# @return [void]
|
|
202
|
+
def free_host(ptr)
|
|
203
|
+
ensure_loaded!
|
|
204
|
+
status = @functions[:cudaFreeHost].call(ptr)
|
|
205
|
+
check_status!(status, 'cudaFreeHost')
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Query GPU memory info.
|
|
209
|
+
# @return [Hash] {free_bytes:, total_bytes:}
|
|
210
|
+
def mem_get_info
|
|
211
|
+
ensure_loaded!
|
|
212
|
+
free_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_SIZE_T)
|
|
213
|
+
total_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_SIZE_T)
|
|
214
|
+
status = @functions[:cudaMemGetInfo].call(free_ptr, total_ptr)
|
|
215
|
+
check_status!(status, 'cudaMemGetInfo')
|
|
216
|
+
{
|
|
217
|
+
free_bytes: free_ptr[0, Fiddle::SIZEOF_SIZE_T].unpack1('Q'),
|
|
218
|
+
total_bytes: total_ptr[0, Fiddle::SIZEOF_SIZE_T].unpack1('Q')
|
|
219
|
+
}
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# ================================================================
|
|
223
|
+
# Stream-Ordered Memory (CUDA 11.2+)
|
|
224
|
+
# ================================================================
|
|
225
|
+
|
|
226
|
+
# @param size [Integer] bytes
|
|
227
|
+
# @param stream [Fiddle::Pointer] CUDA stream
|
|
228
|
+
# @return [Fiddle::Pointer] device pointer
|
|
229
|
+
def malloc_async(size, stream)
|
|
230
|
+
ensure_loaded!
|
|
231
|
+
ptr_buf = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
|
|
232
|
+
status = @functions[:cudaMallocAsync].call(ptr_buf, size, stream)
|
|
233
|
+
check_status!(status, "cudaMallocAsync(#{size})")
|
|
234
|
+
Fiddle::Pointer.new(ptr_buf[0, Fiddle::SIZEOF_VOIDP].unpack1('Q'))
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# @param ptr [Fiddle::Pointer] device pointer
|
|
238
|
+
# @param stream [Fiddle::Pointer] CUDA stream
|
|
239
|
+
# @return [void]
|
|
240
|
+
def free_async(ptr, stream)
|
|
241
|
+
ensure_loaded!
|
|
242
|
+
status = @functions[:cudaFreeAsync].call(ptr, stream)
|
|
243
|
+
check_status!(status, 'cudaFreeAsync')
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# ================================================================
|
|
247
|
+
# Stream Management
|
|
248
|
+
# ================================================================
|
|
249
|
+
|
|
250
|
+
# Create a CUDA stream.
|
|
251
|
+
# @return [Fiddle::Pointer] stream handle
|
|
252
|
+
def stream_create
|
|
253
|
+
ensure_loaded!
|
|
254
|
+
ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
|
|
255
|
+
status = @functions[:cudaStreamCreate].call(ptr)
|
|
256
|
+
check_status!(status, 'cudaStreamCreate')
|
|
257
|
+
Fiddle::Pointer.new(ptr[0, Fiddle::SIZEOF_VOIDP].unpack1('Q'))
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Destroy a CUDA stream.
|
|
261
|
+
# @param stream [Fiddle::Pointer]
|
|
262
|
+
# @return [void]
|
|
263
|
+
def stream_destroy(stream)
|
|
264
|
+
ensure_loaded!
|
|
265
|
+
status = @functions[:cudaStreamDestroy].call(stream)
|
|
266
|
+
check_status!(status, 'cudaStreamDestroy')
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Synchronize a stream (blocks until all commands complete).
|
|
270
|
+
# @param stream [Fiddle::Pointer]
|
|
271
|
+
# @return [void]
|
|
272
|
+
def stream_synchronize(stream)
|
|
273
|
+
ensure_loaded!
|
|
274
|
+
status = @functions[:cudaStreamSynchronize].call(stream)
|
|
275
|
+
check_status!(status, 'cudaStreamSynchronize')
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Query stream completion status.
|
|
279
|
+
# @param stream [Fiddle::Pointer]
|
|
280
|
+
# @return [Boolean] true if all work complete
|
|
281
|
+
def stream_query(stream)
|
|
282
|
+
ensure_loaded!
|
|
283
|
+
status = @functions[:cudaStreamQuery].call(stream)
|
|
284
|
+
status.zero?
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# ================================================================
|
|
288
|
+
# Event Management
|
|
289
|
+
# ================================================================
|
|
290
|
+
|
|
291
|
+
# Create a CUDA event.
|
|
292
|
+
# @return [Fiddle::Pointer] event handle
|
|
293
|
+
def event_create
|
|
294
|
+
ensure_loaded!
|
|
295
|
+
ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
|
|
296
|
+
status = @functions[:cudaEventCreate].call(ptr)
|
|
297
|
+
check_status!(status, 'cudaEventCreate')
|
|
298
|
+
Fiddle::Pointer.new(ptr[0, Fiddle::SIZEOF_VOIDP].unpack1('Q'))
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# Destroy a CUDA event.
|
|
302
|
+
# @param event [Fiddle::Pointer]
|
|
303
|
+
# @return [void]
|
|
304
|
+
def event_destroy(event)
|
|
305
|
+
ensure_loaded!
|
|
306
|
+
status = @functions[:cudaEventDestroy].call(event)
|
|
307
|
+
check_status!(status, 'cudaEventDestroy')
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# Record an event in a stream.
|
|
311
|
+
# @param event [Fiddle::Pointer]
|
|
312
|
+
# @param stream [Fiddle::Pointer]
|
|
313
|
+
# @return [void]
|
|
314
|
+
def event_record(event, stream)
|
|
315
|
+
ensure_loaded!
|
|
316
|
+
status = @functions[:cudaEventRecord].call(event, stream)
|
|
317
|
+
check_status!(status, 'cudaEventRecord')
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# Block until event completes.
|
|
321
|
+
# @param event [Fiddle::Pointer]
|
|
322
|
+
# @return [void]
|
|
323
|
+
def event_synchronize(event)
|
|
324
|
+
ensure_loaded!
|
|
325
|
+
status = @functions[:cudaEventSynchronize].call(event)
|
|
326
|
+
check_status!(status, 'cudaEventSynchronize')
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# Compute elapsed time between two events.
|
|
330
|
+
# @param start_event [Fiddle::Pointer]
|
|
331
|
+
# @param end_event [Fiddle::Pointer]
|
|
332
|
+
# @return [Float] elapsed time in milliseconds
|
|
333
|
+
def event_elapsed_time(start_event, end_event)
|
|
334
|
+
ensure_loaded!
|
|
335
|
+
ms_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_FLOAT)
|
|
336
|
+
status = @functions[:cudaEventElapsedTime].call(ms_ptr, start_event, end_event)
|
|
337
|
+
check_status!(status, 'cudaEventElapsedTime')
|
|
338
|
+
ms_ptr[0, Fiddle::SIZEOF_FLOAT].unpack1('e')
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
# ================================================================
|
|
342
|
+
# Error Handling
|
|
343
|
+
# ================================================================
|
|
344
|
+
|
|
345
|
+
# @return [Integer] last error code
|
|
346
|
+
def get_last_error
|
|
347
|
+
ensure_loaded!
|
|
348
|
+
@functions[:cudaGetLastError].call
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# @return [Integer] last error without clearing
|
|
352
|
+
def peek_at_last_error
|
|
353
|
+
ensure_loaded!
|
|
354
|
+
@functions[:cudaPeekAtLastError].call
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
# Get error string for a status code.
|
|
358
|
+
# @param status [Integer]
|
|
359
|
+
# @return [String] error description
|
|
360
|
+
def get_error_string(status)
|
|
361
|
+
ensure_loaded!
|
|
362
|
+
ptr = @functions[:cudaGetErrorString].call(status)
|
|
363
|
+
Fiddle::Pointer.new(ptr).to_s
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
# Get error name for a status code.
|
|
367
|
+
# @param status [Integer]
|
|
368
|
+
# @return [String] error name
|
|
369
|
+
def get_error_name(status)
|
|
370
|
+
ensure_loaded!
|
|
371
|
+
ptr = @functions[:cudaGetErrorName].call(status)
|
|
372
|
+
Fiddle::Pointer.new(ptr).to_s
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
# ================================================================
|
|
376
|
+
# Version Info
|
|
377
|
+
# ================================================================
|
|
378
|
+
|
|
379
|
+
# @return [Integer] CUDA runtime version
|
|
380
|
+
def runtime_version
|
|
381
|
+
ensure_loaded!
|
|
382
|
+
ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT)
|
|
383
|
+
status = @functions[:cudaRuntimeGetVersion].call(ptr)
|
|
384
|
+
check_status!(status, 'cudaRuntimeGetVersion')
|
|
385
|
+
ptr[0, Fiddle::SIZEOF_INT].unpack1('l')
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
# @return [Integer] CUDA driver version
|
|
389
|
+
def driver_version
|
|
390
|
+
ensure_loaded!
|
|
391
|
+
ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT)
|
|
392
|
+
status = @functions[:cudaDriverGetVersion].call(ptr)
|
|
393
|
+
check_status!(status, 'cudaDriverGetVersion')
|
|
394
|
+
ptr[0, Fiddle::SIZEOF_INT].unpack1('l')
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
# Check CUDA status and raise error if not success.
|
|
398
|
+
# @param status [Integer]
|
|
399
|
+
# @param context [String]
|
|
400
|
+
# @return [void]
|
|
401
|
+
# @raise [CudaRuntimeError]
|
|
402
|
+
def check_status!(status, context = 'CUDA operation')
|
|
403
|
+
return if status.zero?
|
|
404
|
+
|
|
405
|
+
ensure_loaded!
|
|
406
|
+
error_name = get_error_name(status)
|
|
407
|
+
error_string = get_error_string(status)
|
|
408
|
+
raise CudaRuntimeError.new(
|
|
409
|
+
"#{context}: #{error_name} - #{error_string}",
|
|
410
|
+
cuda_code: status
|
|
411
|
+
)
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
private
|
|
415
|
+
|
|
416
|
+
# Bind all CUDA runtime functions via Fiddle::Function.
|
|
417
|
+
# @return [void]
|
|
418
|
+
def attach_all_functions!
|
|
419
|
+
# Device management
|
|
420
|
+
bind(:cudaGetDeviceCount, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
421
|
+
bind(:cudaGetDevice, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
422
|
+
bind(:cudaSetDevice, [Fiddle::TYPE_INT], Fiddle::TYPE_INT)
|
|
423
|
+
bind(:cudaDeviceSynchronize, [], Fiddle::TYPE_INT)
|
|
424
|
+
bind(:cudaDeviceReset, [], Fiddle::TYPE_INT)
|
|
425
|
+
bind(:cudaDeviceGetAttribute, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT, Fiddle::TYPE_INT], Fiddle::TYPE_INT)
|
|
426
|
+
bind(:cudaGetDeviceProperties, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT], Fiddle::TYPE_INT)
|
|
427
|
+
|
|
428
|
+
# Memory management (hot path)
|
|
429
|
+
bind(:cudaMalloc, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_SIZE_T], Fiddle::TYPE_INT)
|
|
430
|
+
bind(:cudaFree, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
431
|
+
bind(:cudaMemcpy, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_VOIDP, Fiddle::TYPE_SIZE_T, Fiddle::TYPE_INT], Fiddle::TYPE_INT)
|
|
432
|
+
bind(:cudaMemcpyAsync, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_VOIDP, Fiddle::TYPE_SIZE_T, Fiddle::TYPE_INT, Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
433
|
+
bind(:cudaMemset, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT, Fiddle::TYPE_SIZE_T], Fiddle::TYPE_INT)
|
|
434
|
+
bind(:cudaMemsetAsync, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT, Fiddle::TYPE_SIZE_T, Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
435
|
+
bind(:cudaHostAlloc, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_SIZE_T, Fiddle::TYPE_INT], Fiddle::TYPE_INT)
|
|
436
|
+
bind(:cudaFreeHost, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
437
|
+
bind(:cudaMemGetInfo, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
438
|
+
bind(:cudaHostRegister, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_SIZE_T, Fiddle::TYPE_INT], Fiddle::TYPE_INT)
|
|
439
|
+
bind(:cudaHostUnregister, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
440
|
+
bind(:cudaMallocHost, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_SIZE_T], Fiddle::TYPE_INT)
|
|
441
|
+
|
|
442
|
+
# Stream-ordered memory
|
|
443
|
+
bind(:cudaMallocAsync, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_SIZE_T, Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
444
|
+
bind(:cudaFreeAsync, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
445
|
+
bind(:cudaMallocFromPoolAsync, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_SIZE_T, Fiddle::TYPE_VOIDP, Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
446
|
+
bind(:cudaMemPoolCreate, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
447
|
+
bind(:cudaMemPoolDestroy, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
448
|
+
bind(:cudaMemPoolSetAttribute, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT, Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
449
|
+
bind(:cudaMemPoolGetAttribute, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT, Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
450
|
+
bind(:cudaDeviceGetDefaultMemPool, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT], Fiddle::TYPE_INT)
|
|
451
|
+
bind(:cudaDeviceSetMemPool, [Fiddle::TYPE_INT, Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
452
|
+
|
|
453
|
+
# Stream management
|
|
454
|
+
bind(:cudaStreamCreate, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
455
|
+
bind(:cudaStreamDestroy, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
456
|
+
bind(:cudaStreamSynchronize, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
457
|
+
bind(:cudaStreamQuery, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
458
|
+
|
|
459
|
+
# Event management
|
|
460
|
+
bind(:cudaEventCreate, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
461
|
+
bind(:cudaEventDestroy, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
462
|
+
bind(:cudaEventRecord, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
463
|
+
bind(:cudaEventSynchronize, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
464
|
+
bind(:cudaEventElapsedTime, [Fiddle::TYPE_VOIDP, Fiddle::TYPE_VOIDP, Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
465
|
+
|
|
466
|
+
# Error handling
|
|
467
|
+
bind(:cudaGetLastError, [], Fiddle::TYPE_INT)
|
|
468
|
+
bind(:cudaPeekAtLastError, [], Fiddle::TYPE_INT)
|
|
469
|
+
bind(:cudaGetErrorString, [Fiddle::TYPE_INT], Fiddle::TYPE_VOIDP)
|
|
470
|
+
bind(:cudaGetErrorName, [Fiddle::TYPE_INT], Fiddle::TYPE_VOIDP)
|
|
471
|
+
|
|
472
|
+
# Version
|
|
473
|
+
bind(:cudaRuntimeGetVersion, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
474
|
+
bind(:cudaDriverGetVersion, [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT)
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
# Bind a single CUDA runtime function via Fiddle.
|
|
478
|
+
# @param name [Symbol] function name
|
|
479
|
+
# @param arg_types [Array<Integer>] Fiddle type constants for arguments
|
|
480
|
+
# @param ret_type [Integer] Fiddle type constant for return
|
|
481
|
+
def bind(name, arg_types, ret_type)
|
|
482
|
+
@functions[name] = Fiddle::Function.new(
|
|
483
|
+
@handle[name.to_s],
|
|
484
|
+
arg_types,
|
|
485
|
+
ret_type
|
|
486
|
+
)
|
|
487
|
+
rescue Fiddle::DLError
|
|
488
|
+
$stderr.puts "[RuntimeAPI] WARNING: #{name} not found in #{CUDART_DLL}"
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
# ================================================================
|
|
492
|
+
# Backward-Compatible Shims
|
|
493
|
+
# ================================================================
|
|
494
|
+
# These methods maintain the old RuntimeAPI.cudaXxx call signatures
|
|
495
|
+
# so existing collective/transport code continues to work without
|
|
496
|
+
# modification. They delegate to the new Fiddle-based methods above.
|
|
497
|
+
# The raw Fiddle function is available for callers that need status
|
|
498
|
+
# codes instead of exceptions.
|
|
499
|
+
|
|
500
|
+
public
|
|
501
|
+
|
|
502
|
+
# @!group Backward-Compatible Shims
|
|
503
|
+
|
|
504
|
+
# Raw CUDA function calls returning status codes (for legacy callers).
|
|
505
|
+
# These call the underlying Fiddle::Function directly.
|
|
506
|
+
|
|
507
|
+
# @param device_id [Integer]
|
|
508
|
+
# @return [Integer] CUDA status code
|
|
509
|
+
def cudaSetDevice(device_id)
|
|
510
|
+
ensure_loaded!
|
|
511
|
+
@functions[:cudaSetDevice].call(device_id)
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
# @return [Integer] status
|
|
515
|
+
def cudaDeviceSynchronize
|
|
516
|
+
ensure_loaded!
|
|
517
|
+
@functions[:cudaDeviceSynchronize].call
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
# @return [Integer] status
|
|
521
|
+
def cudaDeviceReset
|
|
522
|
+
ensure_loaded!
|
|
523
|
+
@functions[:cudaDeviceReset].call
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
# @param ptr_out [Fiddle::Pointer, FFI::Pointer] pointer-to-pointer
|
|
527
|
+
# @return [Integer] status
|
|
528
|
+
def cudaGetDeviceCount(ptr_out)
|
|
529
|
+
ensure_loaded!
|
|
530
|
+
@functions[:cudaGetDeviceCount].call(ptr_out)
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
# @param ptr_out [Fiddle::Pointer, FFI::Pointer] pointer-to-int
|
|
534
|
+
# @return [Integer] status
|
|
535
|
+
def cudaGetDevice(ptr_out)
|
|
536
|
+
ensure_loaded!
|
|
537
|
+
@functions[:cudaGetDevice].call(ptr_out)
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
# @param ptr_out [Fiddle::Pointer] pointer-to-int
|
|
541
|
+
# @param attr_id [Integer]
|
|
542
|
+
# @param device [Integer]
|
|
543
|
+
# @return [Integer] status
|
|
544
|
+
def cudaDeviceGetAttribute(ptr_out, attr_id, device)
|
|
545
|
+
ensure_loaded!
|
|
546
|
+
@functions[:cudaDeviceGetAttribute].call(ptr_out, attr_id, device)
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
# @param ptr_ptr [Fiddle::Pointer] pointer-to-pointer
|
|
550
|
+
# @param size [Integer]
|
|
551
|
+
# @return [Integer] status
|
|
552
|
+
def cudaMalloc(ptr_ptr, size)
|
|
553
|
+
ensure_loaded!
|
|
554
|
+
@functions[:cudaMalloc].call(ptr_ptr, size)
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
# @param ptr [Fiddle::Pointer]
|
|
558
|
+
# @return [Integer] status
|
|
559
|
+
def cudaFree(ptr)
|
|
560
|
+
ensure_loaded!
|
|
561
|
+
@functions[:cudaFree].call(ptr)
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
# @return [Integer] status
|
|
565
|
+
def cudaMemcpy(dst, src, count, kind)
|
|
566
|
+
ensure_loaded!
|
|
567
|
+
kind_int = resolve_memcpy_kind(kind)
|
|
568
|
+
@functions[:cudaMemcpy].call(dst, src, count, kind_int)
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
# @return [Integer] status
|
|
572
|
+
def cudaMemcpyAsync(dst, src, count, kind, stream)
|
|
573
|
+
ensure_loaded!
|
|
574
|
+
kind_int = resolve_memcpy_kind(kind)
|
|
575
|
+
@functions[:cudaMemcpyAsync].call(dst, src, count, kind_int, stream)
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
# @return [Integer] status
|
|
579
|
+
def cudaMemset(ptr, value, count)
|
|
580
|
+
ensure_loaded!
|
|
581
|
+
@functions[:cudaMemset].call(ptr, value, count)
|
|
582
|
+
end
|
|
583
|
+
|
|
584
|
+
# @return [Integer] status
|
|
585
|
+
def cudaMemsetAsync(ptr, value, count, stream)
|
|
586
|
+
ensure_loaded!
|
|
587
|
+
@functions[:cudaMemsetAsync].call(ptr, value, count, stream)
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
# @return [Integer] status
|
|
591
|
+
def cudaMemGetInfo(free_ptr, total_ptr)
|
|
592
|
+
ensure_loaded!
|
|
593
|
+
@functions[:cudaMemGetInfo].call(free_ptr, total_ptr)
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
# @param ptr_ptr [Fiddle::Pointer] pointer-to-pointer
|
|
597
|
+
# @param size [Integer]
|
|
598
|
+
# @param flags [Integer]
|
|
599
|
+
# @return [Integer] status
|
|
600
|
+
def cudaHostAlloc(ptr_ptr, size, flags)
|
|
601
|
+
ensure_loaded!
|
|
602
|
+
@functions[:cudaHostAlloc].call(ptr_ptr, size, flags)
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
# @param ptr [Fiddle::Pointer]
|
|
606
|
+
# @return [Integer] status
|
|
607
|
+
def cudaFreeHost(ptr)
|
|
608
|
+
ensure_loaded!
|
|
609
|
+
@functions[:cudaFreeHost].call(ptr)
|
|
610
|
+
end
|
|
611
|
+
|
|
612
|
+
# @return [Integer] status
|
|
613
|
+
def cudaStreamCreate(ptr_out)
|
|
614
|
+
ensure_loaded!
|
|
615
|
+
@functions[:cudaStreamCreate].call(ptr_out)
|
|
616
|
+
end
|
|
617
|
+
|
|
618
|
+
# @return [Integer] status
|
|
619
|
+
def cudaStreamDestroy(stream)
|
|
620
|
+
ensure_loaded!
|
|
621
|
+
@functions[:cudaStreamDestroy].call(stream)
|
|
622
|
+
end
|
|
623
|
+
|
|
624
|
+
# @return [Integer] status
|
|
625
|
+
def cudaStreamSynchronize(stream)
|
|
626
|
+
ensure_loaded!
|
|
627
|
+
@functions[:cudaStreamSynchronize].call(stream)
|
|
628
|
+
end
|
|
629
|
+
|
|
630
|
+
# @return [Integer] status
|
|
631
|
+
def cudaEventCreate(ptr_out)
|
|
632
|
+
ensure_loaded!
|
|
633
|
+
@functions[:cudaEventCreate].call(ptr_out)
|
|
634
|
+
end
|
|
635
|
+
|
|
636
|
+
# @return [Integer] status
|
|
637
|
+
def cudaEventDestroy(event)
|
|
638
|
+
ensure_loaded!
|
|
639
|
+
@functions[:cudaEventDestroy].call(event)
|
|
640
|
+
end
|
|
641
|
+
|
|
642
|
+
# @return [Integer] status
|
|
643
|
+
def cudaEventRecord(event, stream)
|
|
644
|
+
ensure_loaded!
|
|
645
|
+
@functions[:cudaEventRecord].call(event, stream)
|
|
646
|
+
end
|
|
647
|
+
|
|
648
|
+
# @return [Integer] status
|
|
649
|
+
def cudaEventSynchronize(event)
|
|
650
|
+
ensure_loaded!
|
|
651
|
+
@functions[:cudaEventSynchronize].call(event)
|
|
652
|
+
end
|
|
653
|
+
|
|
654
|
+
# @return [Integer] status
|
|
655
|
+
def cudaEventElapsedTime(ms_ptr, start_event, end_event)
|
|
656
|
+
ensure_loaded!
|
|
657
|
+
@functions[:cudaEventElapsedTime].call(ms_ptr, start_event, end_event)
|
|
658
|
+
end
|
|
659
|
+
|
|
660
|
+
# @return [Integer] status
|
|
661
|
+
def cudaGetLastError
|
|
662
|
+
ensure_loaded!
|
|
663
|
+
@functions[:cudaGetLastError].call
|
|
664
|
+
end
|
|
665
|
+
|
|
666
|
+
# @return [Integer] status
|
|
667
|
+
def cudaPeekAtLastError
|
|
668
|
+
ensure_loaded!
|
|
669
|
+
@functions[:cudaPeekAtLastError].call
|
|
670
|
+
end
|
|
671
|
+
|
|
672
|
+
# @param status [Integer]
|
|
673
|
+
# @return [String]
|
|
674
|
+
def cudaGetErrorString(status)
|
|
675
|
+
get_error_string(status)
|
|
676
|
+
end
|
|
677
|
+
|
|
678
|
+
# @param status [Integer]
|
|
679
|
+
# @return [String]
|
|
680
|
+
def cudaGetErrorName(status)
|
|
681
|
+
get_error_name(status)
|
|
682
|
+
end
|
|
683
|
+
|
|
684
|
+
# @param ptr [Fiddle::Pointer] host pointer to register
|
|
685
|
+
# @param size [Integer] bytes
|
|
686
|
+
# @param flags [Integer]
|
|
687
|
+
# @return [Integer] status
|
|
688
|
+
def cudaHostRegister(ptr, size, flags)
|
|
689
|
+
ensure_loaded!
|
|
690
|
+
@functions[:cudaHostRegister]&.call(ptr, size, flags) || 0
|
|
691
|
+
end
|
|
692
|
+
|
|
693
|
+
# @param ptr [Fiddle::Pointer] host pointer to unregister
|
|
694
|
+
# @return [Integer] status
|
|
695
|
+
def cudaHostUnregister(ptr)
|
|
696
|
+
ensure_loaded!
|
|
697
|
+
@functions[:cudaHostUnregister]&.call(ptr) || 0
|
|
698
|
+
end
|
|
699
|
+
|
|
700
|
+
# @param ptr [Fiddle::Pointer] pointer-to-pointer
|
|
701
|
+
# @param size [Integer]
|
|
702
|
+
# @return [Integer] status
|
|
703
|
+
def cudaMallocHost(ptr_ptr, size)
|
|
704
|
+
ensure_loaded!
|
|
705
|
+
@functions[:cudaMallocHost]&.call(ptr_ptr, size) || cudaHostAlloc(ptr_ptr, size, 0)
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
# @param ptr_out [Fiddle::Pointer]
|
|
709
|
+
# @param size [Integer]
|
|
710
|
+
# @param stream [Fiddle::Pointer]
|
|
711
|
+
# @return [Integer] status
|
|
712
|
+
def cudaMallocAsync(ptr_out, size, stream)
|
|
713
|
+
ensure_loaded!
|
|
714
|
+
@functions[:cudaMallocAsync].call(ptr_out, size, stream)
|
|
715
|
+
end
|
|
716
|
+
|
|
717
|
+
# @param ptr [Fiddle::Pointer]
|
|
718
|
+
# @param stream [Fiddle::Pointer]
|
|
719
|
+
# @return [Integer] status
|
|
720
|
+
def cudaFreeAsync(ptr, stream)
|
|
721
|
+
ensure_loaded!
|
|
722
|
+
@functions[:cudaFreeAsync].call(ptr, stream)
|
|
723
|
+
end
|
|
724
|
+
|
|
725
|
+
# @param ptr_out [Fiddle::Pointer] pointer to store version
|
|
726
|
+
# @return [Integer] status
|
|
727
|
+
def cudaRuntimeGetVersion(ptr_out)
|
|
728
|
+
ensure_loaded!
|
|
729
|
+
@functions[:cudaRuntimeGetVersion].call(ptr_out)
|
|
730
|
+
end
|
|
731
|
+
|
|
732
|
+
# @return [Integer] status
|
|
733
|
+
def cudaMemPoolDestroy(pool)
|
|
734
|
+
ensure_loaded!
|
|
735
|
+
@functions[:cudaMemPoolDestroy]&.call(pool) || 0
|
|
736
|
+
end
|
|
737
|
+
|
|
738
|
+
# @return [Integer] status
|
|
739
|
+
def cudaMallocFromPoolAsync(ptr_ptr, size, pool, stream)
|
|
740
|
+
ensure_loaded!
|
|
741
|
+
@functions[:cudaMallocFromPoolAsync]&.call(ptr_ptr, size, pool, stream) || 0
|
|
742
|
+
end
|
|
743
|
+
|
|
744
|
+
# @return [Integer] status
|
|
745
|
+
def cudaDeviceGetDefaultMemPool(pool_ptr, device)
|
|
746
|
+
ensure_loaded!
|
|
747
|
+
@functions[:cudaDeviceGetDefaultMemPool]&.call(pool_ptr, device) || 0
|
|
748
|
+
end
|
|
749
|
+
|
|
750
|
+
# @return [Integer] status
|
|
751
|
+
def cudaMemPoolSetAttribute(pool, attr, value_ptr)
|
|
752
|
+
ensure_loaded!
|
|
753
|
+
@functions[:cudaMemPoolSetAttribute]&.call(pool, attr, value_ptr) || 0
|
|
754
|
+
end
|
|
755
|
+
|
|
756
|
+
# @return [Integer] status
|
|
757
|
+
def cudaMemPoolGetAttribute(pool, attr, value_ptr)
|
|
758
|
+
ensure_loaded!
|
|
759
|
+
@functions[:cudaMemPoolGetAttribute]&.call(pool, attr, value_ptr) || 0
|
|
760
|
+
end
|
|
761
|
+
|
|
762
|
+
# @return [Integer] status
|
|
763
|
+
def cudaDeviceSetMemPool(device, pool)
|
|
764
|
+
ensure_loaded!
|
|
765
|
+
@functions[:cudaDeviceSetMemPool]&.call(device, pool) || 0
|
|
766
|
+
end
|
|
767
|
+
|
|
768
|
+
# @return [Integer] status
|
|
769
|
+
def cudaMemPoolCreate(pool_ptr, props)
|
|
770
|
+
ensure_loaded!
|
|
771
|
+
@functions[:cudaMemPoolCreate]&.call(pool_ptr, props) || 0
|
|
772
|
+
end
|
|
773
|
+
|
|
774
|
+
# @return [Integer] status
|
|
775
|
+
def cudaGetDeviceProperties(prop_ptr, device)
|
|
776
|
+
ensure_loaded!
|
|
777
|
+
@functions[:cudaGetDeviceProperties]&.call(prop_ptr, device) || 0
|
|
778
|
+
end
|
|
779
|
+
|
|
780
|
+
# @!endgroup
|
|
781
|
+
|
|
782
|
+
private
|
|
783
|
+
|
|
784
|
+
# Resolve symbolic or integer memcpy kind.
|
|
785
|
+
# @param kind [Symbol, Integer]
|
|
786
|
+
# @return [Integer]
|
|
787
|
+
def resolve_memcpy_kind(kind)
|
|
788
|
+
case kind
|
|
789
|
+
when Integer then kind
|
|
790
|
+
when :host_to_host then MEMCPY_HOST_TO_HOST
|
|
791
|
+
when :host_to_device then MEMCPY_HOST_TO_DEVICE
|
|
792
|
+
when :device_to_host then MEMCPY_DEVICE_TO_HOST
|
|
793
|
+
when :device_to_device then MEMCPY_DEVICE_TO_DEVICE
|
|
794
|
+
when :default then MEMCPY_DEFAULT
|
|
795
|
+
else kind.to_i
|
|
796
|
+
end
|
|
797
|
+
end
|
|
798
|
+
end
|
|
799
|
+
|
|
800
|
+
# Also bind cudaHostRegister / cudaHostUnregister in attach_all_functions!
|
|
801
|
+
# This is handled by adding to the bind list
|
|
802
|
+
end
|
|
803
|
+
end
|
|
804
|
+
end
|