sgc-ruby-cuda 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +2 -0
- data/COPYING +674 -0
- data/README.rdoc +106 -0
- data/Rakefile +76 -0
- data/doc/devel.rdoc +77 -0
- data/doc/features.rdoc +55 -0
- data/lib/cuda/driver/context.rb +236 -0
- data/lib/cuda/driver/cu.rb +60 -0
- data/lib/cuda/driver/device.rb +155 -0
- data/lib/cuda/driver/deviceptr.rb +69 -0
- data/lib/cuda/driver/error.rb +182 -0
- data/lib/cuda/driver/event.rb +124 -0
- data/lib/cuda/driver/ffi-cu.rb +620 -0
- data/lib/cuda/driver/function.rb +293 -0
- data/lib/cuda/driver/init.rb +45 -0
- data/lib/cuda/driver/memory.rb +134 -0
- data/lib/cuda/driver/module.rb +142 -0
- data/lib/cuda/driver/rubycu.rb +37 -0
- data/lib/cuda/driver/stream.rb +128 -0
- data/lib/cuda/driver/version.rb +42 -0
- data/lib/cuda/runtime/cuda.rb +65 -0
- data/lib/cuda/runtime/device.rb +175 -0
- data/lib/cuda/runtime/error.rb +197 -0
- data/lib/cuda/runtime/event.rb +117 -0
- data/lib/cuda/runtime/ffi-cuda.rb +588 -0
- data/lib/cuda/runtime/function.rb +161 -0
- data/lib/cuda/runtime/memory.rb +110 -0
- data/lib/cuda/runtime/rubycuda.rb +34 -0
- data/lib/cuda/runtime/stream.rb +126 -0
- data/lib/cuda/runtime/thread.rb +81 -0
- data/lib/cuda/runtime/version.rb +51 -0
- data/lib/ffi/prettystruct.rb +32 -0
- data/lib/helpers/flags.rb +82 -0
- data/lib/helpers/interface/ienum.rb +45 -0
- data/lib/helpers/klass.rb +45 -0
- data/lib/memory/buffer.rb +125 -0
- data/lib/memory/interface/ibuffer.rb +63 -0
- data/lib/memory/pointer.rb +72 -0
- data/lib/rubycu.rb +1 -0
- data/lib/rubycuda.rb +1 -0
- data/test/bad.ptx +0 -0
- data/test/memory/test_buffer.rb +93 -0
- data/test/rubycu/test_cucontext.rb +148 -0
- data/test/rubycu/test_cudevice.rb +69 -0
- data/test/rubycu/test_cudeviceptr.rb +43 -0
- data/test/rubycu/test_cuevent.rb +81 -0
- data/test/rubycu/test_cufunction.rb +165 -0
- data/test/rubycu/test_cumemory.rb +113 -0
- data/test/rubycu/test_cumodule.rb +114 -0
- data/test/rubycu/test_custream.rb +77 -0
- data/test/rubycu/test_cuversion.rb +39 -0
- data/test/rubycu/testbase.rb +107 -0
- data/test/rubycuda/test_cudadevice.rb +125 -0
- data/test/rubycuda/test_cudaerror.rb +48 -0
- data/test/rubycuda/test_cudaevent.rb +78 -0
- data/test/rubycuda/test_cudafunction.rb +106 -0
- data/test/rubycuda/test_cudamemory.rb +90 -0
- data/test/rubycuda/test_cudastream.rb +72 -0
- data/test/rubycuda/test_cudathread.rb +69 -0
- data/test/rubycuda/test_cudaversion.rb +41 -0
- data/test/rubycuda/testbase.rb +67 -0
- data/test/vadd.cu +21 -0
- data/version.rb +1 -0
- metadata +180 -0
@@ -0,0 +1,197 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2010-2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'cuda/runtime/ffi-cuda'
|
26
|
+
|
27
|
+
|
28
|
+
module SGC
|
29
|
+
module Cuda
|
30
|
+
|
31
|
+
module Error
|
32
|
+
|
33
|
+
class CudaStandardError < RuntimeError; end
|
34
|
+
class CudaMissingConfigurationError < CudaStandardError; end
|
35
|
+
class CudaMemoryAllocationError < CudaStandardError; end
|
36
|
+
class CudaInitializationError < CudaStandardError; end
|
37
|
+
class CudaLaunchFailureError < CudaStandardError; end
|
38
|
+
class CudaPriorLaunchFailureError < CudaStandardError; end # @deprecated
|
39
|
+
class CudaLaunchTimeoutError < CudaStandardError; end
|
40
|
+
class CudaLaunchOutOfResourcesError < CudaStandardError; end
|
41
|
+
class CudaInvalidDeviceFunctionError < CudaStandardError; end
|
42
|
+
class CudaInvalidConfigurationError < CudaStandardError; end
|
43
|
+
class CudaInvalidDeviceError < CudaStandardError; end
|
44
|
+
class CudaInvalidValueError < CudaStandardError; end
|
45
|
+
class CudaInvalidPitchValueError < CudaStandardError; end
|
46
|
+
class CudaInvalidSymbolError < CudaStandardError; end
|
47
|
+
class CudaMapBufferObjectFailedError < CudaStandardError; end
|
48
|
+
class CudaUnmapBufferObjectFailedError < CudaStandardError; end
|
49
|
+
class CudaInvalidHostPointerError < CudaStandardError; end
|
50
|
+
class CudaInvalidDevicePointerError < CudaStandardError; end
|
51
|
+
class CudaInvalidTextureError < CudaStandardError; end
|
52
|
+
class CudaInvalidTextureBindingError < CudaStandardError; end
|
53
|
+
class CudaInvalidChannelDescriptorError < CudaStandardError; end
|
54
|
+
class CudaInvalidMemcpyDirectionError < CudaStandardError; end
|
55
|
+
class CudaAddressOfConstantError < CudaStandardError; end # @deprecated
|
56
|
+
class CudaTextureFetchFailedError < CudaStandardError; end # @deprecated
|
57
|
+
class CudaTextureNotFoundError < CudaStandardError; end # @deprecated
|
58
|
+
class CudaSynchronizationError < CudaStandardError; end # @deprecated
|
59
|
+
class CudaInvalidFilterSettingError < CudaStandardError; end
|
60
|
+
class CudaInvalidNormSettingError < CudaStandardError; end
|
61
|
+
class CudaMixedDeviceExecutionError < CudaStandardError; end # @deprecated
|
62
|
+
class CudaCudartUnloadingError < CudaStandardError; end # @deprecated
|
63
|
+
class CudaUnknownError < CudaStandardError; end
|
64
|
+
class CudaNotYetImplementedError < CudaStandardError; end
|
65
|
+
class CudaMemoryValueTooLargeError < CudaStandardError; end # @deprecated
|
66
|
+
class CudaInvalidResourceHandleError < CudaStandardError; end
|
67
|
+
class CudaNotReadyError < CudaStandardError; end
|
68
|
+
class CudaInsufficientDriverError < CudaStandardError; end
|
69
|
+
class CudaSetOnActiveProcessError < CudaStandardError; end
|
70
|
+
class CudaInvalidSurfaceError < CudaStandardError; end
|
71
|
+
class CudaNoDeviceError < CudaStandardError; end
|
72
|
+
class CudaECCUncorrectableError < CudaStandardError; end
|
73
|
+
class CudaSharedObjectSymbolNotFoundError < CudaStandardError; end
|
74
|
+
class CudaSharedObjectInitFailedError < CudaStandardError; end
|
75
|
+
class CudaUnsupportedLimitError < CudaStandardError; end
|
76
|
+
class CudaDuplicateVariableNameError < CudaStandardError; end
|
77
|
+
class CudaDuplicateTextureNameError < CudaStandardError; end
|
78
|
+
class CudaDuplicateSurfaceNameError < CudaStandardError; end
|
79
|
+
class CudaDevicesUnavailableError < CudaStandardError; end
|
80
|
+
class CudaInvalidKernelImageError < CudaStandardError; end
|
81
|
+
class CudaNoKernelImageForDeviceError < CudaStandardError; end
|
82
|
+
class CudaIncompatibleDriverContextError < CudaStandardError; end
|
83
|
+
class CudaPeerAccessAlreadyEnabledError < CudaStandardError; end
|
84
|
+
class CudaPeerAccessNotEnabledError < CudaStandardError; end
|
85
|
+
class CudaDeviceAlreadyInUseError < CudaStandardError; end
|
86
|
+
class CudaProfilerDisabledError < CudaStandardError; end
|
87
|
+
class CudaProfilerNotInitializedError < CudaStandardError; end
|
88
|
+
class CudaProfilerAlreadyStartedError < CudaStandardError; end
|
89
|
+
class CudaProfilerAlreadyStoppedError < CudaStandardError; end
|
90
|
+
class CudaStartupFailureError < CudaStandardError; end
|
91
|
+
class CudaAPIFailureBaseError < CudaStandardError; end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
# @param [Integer, CudaError] e A CUDA error value or label.
|
97
|
+
# @return [String] The error string of _e_.
|
98
|
+
def get_error_string(e)
|
99
|
+
API::cudaGetErrorString(e)
|
100
|
+
end
|
101
|
+
module_function :get_error_string
|
102
|
+
|
103
|
+
|
104
|
+
# @return [Integer] The error value of the last CUDA error.
|
105
|
+
def get_last_error
|
106
|
+
API::cudaGetLastError
|
107
|
+
end
|
108
|
+
module_function :get_last_error
|
109
|
+
|
110
|
+
|
111
|
+
# Return the last CUDA error, but do not reset the error.
|
112
|
+
# @return [Integer] The error value of the last CUDA error.
|
113
|
+
def peek_at_last_error
|
114
|
+
API::cudaPeekAtLastError
|
115
|
+
end
|
116
|
+
module_function :peek_at_last_error
|
117
|
+
|
118
|
+
# @private
|
119
|
+
module Pvt
|
120
|
+
|
121
|
+
include Error
|
122
|
+
|
123
|
+
|
124
|
+
def self.handle_error(status, msg = nil)
|
125
|
+
status == CUDA_SUCCESS or raise @error_class_by_enum[API::CudaError[status]], API::cudaGetErrorString(status) + " : #{msg}"
|
126
|
+
nil
|
127
|
+
end
|
128
|
+
|
129
|
+
|
130
|
+
CUDA_SUCCESS = API::CudaError[:SUCCESS]
|
131
|
+
CUDA_ERROR_NOT_READY = API::CudaError[:ERROR_NOT_READY]
|
132
|
+
|
133
|
+
@error_class_by_enum = {
|
134
|
+
ERROR_MISSING_CONFIGURATION: CudaMissingConfigurationError,
|
135
|
+
ERROR_MEMORY_ALLOCATION: CudaMemoryAllocationError,
|
136
|
+
ERROR_INITIALIZATION_ERROR: CudaInitializationError,
|
137
|
+
ERROR_LAUNCH_FAILURE: CudaLaunchFailureError,
|
138
|
+
ERROR_PRIOR_LAUNCH_FAILURE: CudaPriorLaunchFailureError,
|
139
|
+
ERROR_LAUNCH_TIMEOUT: CudaLaunchTimeoutError,
|
140
|
+
ERROR_LAUNCH_OUT_OF_RESOURCES: CudaLaunchOutOfResourcesError,
|
141
|
+
ERROR_INVALID_DEVICE_FUNCTION: CudaInvalidDeviceFunctionError,
|
142
|
+
ERROR_INVALID_CONFIGURATION: CudaInvalidConfigurationError,
|
143
|
+
ERROR_INVALID_DEVICE: CudaInvalidDeviceError,
|
144
|
+
ERROR_INVALID_VALUE: CudaInvalidValueError,
|
145
|
+
ERROR_INVALID_PITCH_VALUE: CudaInvalidPitchValueError,
|
146
|
+
ERROR_INVALID_SYMBOL: CudaInvalidSymbolError,
|
147
|
+
ERROR_MAP_BUFFER_OBJECT_FAILED: CudaMapBufferObjectFailedError,
|
148
|
+
ERROR_UNMAP_BUFFER_OBJECT_FAILED: CudaUnmapBufferObjectFailedError,
|
149
|
+
ERROR_INVALID_HOST_POINTER: CudaInvalidHostPointerError,
|
150
|
+
ERROR_INVALID_DEVICE_POINTER: CudaInvalidDevicePointerError,
|
151
|
+
ERROR_INVALID_TEXTURE: CudaInvalidTextureError,
|
152
|
+
ERROR_INVALID_TEXTURE_BINDING: CudaInvalidTextureBindingError,
|
153
|
+
ERROR_INVALID_CHANNEL_DESCRIPTOR: CudaInvalidChannelDescriptorError,
|
154
|
+
ERROR_INVALID_MEMCPY_DIRECTION: CudaInvalidMemcpyDirectionError,
|
155
|
+
ERROR_ADDRESS_OF_CONSTANT: CudaAddressOfConstantError,
|
156
|
+
ERROR_TEXTURE_FETCH_FAILED: CudaTextureFetchFailedError,
|
157
|
+
ERROR_TEXTURE_NOT_BOUND: CudaTextureNotFoundError,
|
158
|
+
ERROR_SYNCHRONIZATION_ERROR: CudaSynchronizationError,
|
159
|
+
ERROR_INVALID_FILTER_SETTING: CudaInvalidFilterSettingError,
|
160
|
+
ERROR_INVALID_NORM_SETTING: CudaInvalidNormSettingError,
|
161
|
+
ERROR_MIXED_DEVICE_EXECUTION: CudaMixedDeviceExecutionError,
|
162
|
+
ERROR_CUDART_UNLOADING: CudaCudartUnloadingError,
|
163
|
+
ERROR_UNKNOWN: CudaUnknownError,
|
164
|
+
ERROR_NOT_YET_IMPLEMENTED: CudaNotYetImplementedError,
|
165
|
+
ERROR_MEMORY_VALUE_TOO_LARGE: CudaMemoryValueTooLargeError,
|
166
|
+
ERROR_INVALID_RESOURCE_HANDLE: CudaInvalidResourceHandleError,
|
167
|
+
ERROR_NOT_READY: CudaNotReadyError,
|
168
|
+
ERROR_INSUFFICIENT_DRIVER: CudaInsufficientDriverError,
|
169
|
+
ERROR_SET_ON_ACTIVE_PROCESS: CudaSetOnActiveProcessError,
|
170
|
+
ERROR_INVALID_SURFACE: CudaInvalidSurfaceError,
|
171
|
+
ERROR_NO_DEVICE: CudaNoDeviceError,
|
172
|
+
ERROR_ECC_UNCORRECTABLE: CudaECCUncorrectableError,
|
173
|
+
ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: CudaSharedObjectSymbolNotFoundError,
|
174
|
+
ERROR_SHARED_OBJECT_INIT_FAILED: CudaSharedObjectInitFailedError,
|
175
|
+
ERROR_UNSUPPORTED_LIMIT: CudaUnsupportedLimitError,
|
176
|
+
ERROR_DUPLICATE_VARIABLE_NAME: CudaDuplicateVariableNameError,
|
177
|
+
ERROR_DUPLICATE_TEXTURE_NAME: CudaDuplicateTextureNameError,
|
178
|
+
ERROR_DUPLICATE_SURFACE_NAME: CudaDuplicateSurfaceNameError,
|
179
|
+
ERROR_DEVICES_UNAVAILABLE: CudaDevicesUnavailableError,
|
180
|
+
ERROR_INVALID_KERNEL_IMAGE: CudaInvalidKernelImageError,
|
181
|
+
ERROR_NO_KERNEL_IMAGE_FOR_DEVICE: CudaNoKernelImageForDeviceError,
|
182
|
+
ERROR_INCOMPATIBLE_DRIVER_CONTEXT: CudaIncompatibleDriverContextError,
|
183
|
+
ERROR_PEER_ACCESS_ALREADY_ENABLED: CudaPeerAccessAlreadyEnabledError,
|
184
|
+
ERROR_PEER_ACCESS_NOT_ENABLED: CudaPeerAccessNotEnabledError,
|
185
|
+
ERROR_DEVICE_ALREADY_IN_USE: CudaDeviceAlreadyInUseError,
|
186
|
+
ERROR_PROFILER_DISABLED: CudaProfilerDisabledError,
|
187
|
+
ERROR_PROFILER_NOT_INITIALIZED: CudaProfilerNotInitializedError,
|
188
|
+
ERROR_PROFILER_ALREADY_STARTED: CudaProfilerAlreadyStartedError,
|
189
|
+
ERROR_PROFILER_ALREADY_STOPPED: CudaProfilerAlreadyStoppedError,
|
190
|
+
ERROR_STARTUP_FAILURE: CudaStartupFailureError,
|
191
|
+
ERROR_API_FAILURE_BASE: CudaAPIFailureBaseError,
|
192
|
+
}
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
end # module
|
197
|
+
end # module
|
@@ -0,0 +1,117 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2010-2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'cuda/runtime/ffi-cuda'
|
26
|
+
require 'cuda/runtime/error'
|
27
|
+
|
28
|
+
|
29
|
+
module SGC
|
30
|
+
module Cuda
|
31
|
+
|
32
|
+
class CudaEvent
|
33
|
+
|
34
|
+
# Create and return an event with _flags_.
|
35
|
+
# @overload create
|
36
|
+
# @overload create(flags)
|
37
|
+
# @return [CudaEvent] An event created with _flags_.
|
38
|
+
def self.create(*flags)
|
39
|
+
flags.empty? == false or flags = :DEFAULT
|
40
|
+
p = FFI::MemoryPointer.new(:CudaEvent)
|
41
|
+
f = CudaEventFlags.value(flags)
|
42
|
+
status = API::cudaEventCreateWithFlags(p, f)
|
43
|
+
Pvt::handle_error(status, "Failed to create event: flags = #{flags}")
|
44
|
+
new(p)
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
# Destroy this event.
|
49
|
+
def destroy
|
50
|
+
status = API::cudaEventDestroy(self.to_api)
|
51
|
+
Pvt::handle_error(status, "Failed to destroy event.")
|
52
|
+
API::write_cudaevent(@pevent, 0)
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
# @return [Boolean] Return true if this event has been recorded. Otherwise, return false.
|
58
|
+
def query
|
59
|
+
status = API::cudaEventQuery(self.to_api)
|
60
|
+
if status == Pvt::CUDA_SUCCESS
|
61
|
+
return true
|
62
|
+
elsif status == Pvt::CUDA_ERROR_NOT_READY
|
63
|
+
return false
|
64
|
+
end
|
65
|
+
Pvt::handle_error(status, "Failed to query event.")
|
66
|
+
raise CudaStandardError, "Error handling fails to catch this error."
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
# Record this event asynchronously on _stream_.
|
71
|
+
# @param [Integer, CudaStream] stream The CUDA stream to record this event on.
|
72
|
+
# Setting _stream_ on anything other than an instance of CudaStream will record on any stream.
|
73
|
+
# @return [CudaEvent] This event.
|
74
|
+
def record(stream = 0)
|
75
|
+
s = Pvt::parse_stream(stream)
|
76
|
+
status = API::cudaEventRecord(self.to_api, s)
|
77
|
+
Pvt::handle_error(status, "Failed to record event.")
|
78
|
+
self
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
# Block the calling CPU thread until this event has been recorded.
|
83
|
+
# @return [CudaEvent] This event.
|
84
|
+
def synchronize
|
85
|
+
status = API::cudaEventSynchronize(self.to_api)
|
86
|
+
Pvt::handle_error(status)
|
87
|
+
self
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
# Compute the elapsed time (ms) from _event_start_ to _event_end_.
|
92
|
+
# @param [CudaEvent] event_start The event corresponds to the start time.
|
93
|
+
# @param [CudaEvent] event_end The event corresponds to the end time.
|
94
|
+
# @return [Numeric] The elapsed time in ms.
|
95
|
+
def self.elapsed_time(event_start, event_end)
|
96
|
+
t = FFI::MemoryPointer.new(:float)
|
97
|
+
API::cudaEventElapsedTime(t, event_start.to_api, event_end.to_api)
|
98
|
+
t.read_float
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
# @private
|
103
|
+
def initialize(ptr)
|
104
|
+
@pevent = ptr
|
105
|
+
end
|
106
|
+
private_class_method :new
|
107
|
+
|
108
|
+
|
109
|
+
# @private
|
110
|
+
def to_api
|
111
|
+
API::read_cudaevent(@pevent)
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
end # module
|
117
|
+
end # module
|
@@ -0,0 +1,588 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2010-2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'ffi'
|
26
|
+
require 'ffi/prettystruct'
|
27
|
+
require 'helpers/interface/ienum'
|
28
|
+
require 'helpers/flags'
|
29
|
+
require 'helpers/klass'
|
30
|
+
|
31
|
+
|
32
|
+
module SGC
|
33
|
+
module Cuda
|
34
|
+
module API
|
35
|
+
|
36
|
+
extend FFI::Library
|
37
|
+
ffi_lib "cudart"
|
38
|
+
|
39
|
+
class Enum
|
40
|
+
extend SGC::Helper::IEnum
|
41
|
+
extend SGC::Helper::FlagsValue
|
42
|
+
|
43
|
+
def self.inherited(subclass)
|
44
|
+
subclass.instance_eval %{
|
45
|
+
def symbols
|
46
|
+
SGC::Cuda::API::#{SGC::Helper.classname(subclass)}.symbols
|
47
|
+
end
|
48
|
+
|
49
|
+
def [](*args)
|
50
|
+
SGC::Cuda::API::#{SGC::Helper.classname(subclass)}[*args]
|
51
|
+
end
|
52
|
+
}
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
CudaError = enum(
|
57
|
+
:SUCCESS, 0,
|
58
|
+
:ERROR_MISSING_CONFIGURATION, 1,
|
59
|
+
:ERROR_MEMORY_ALLOCATION, 2,
|
60
|
+
:ERROR_INITIALIZATION_ERROR, 3,
|
61
|
+
:ERROR_LAUNCH_FAILURE, 4,
|
62
|
+
:ERROR_PRIOR_LAUNCH_FAILURE, 5, # Deprecated.
|
63
|
+
:ERROR_LAUNCH_TIMEOUT, 6,
|
64
|
+
:ERROR_LAUNCH_OUT_OF_RESOURCES, 7,
|
65
|
+
:ERROR_INVALID_DEVICE_FUNCTION, 8,
|
66
|
+
:ERROR_INVALID_CONFIGURATION, 9,
|
67
|
+
:ERROR_INVALID_DEVICE, 10,
|
68
|
+
:ERROR_INVALID_VALUE, 11,
|
69
|
+
:ERROR_INVALID_PITCH_VALUE, 12,
|
70
|
+
:ERROR_INVALID_SYMBOL, 13,
|
71
|
+
:ERROR_MAP_BUFFER_OBJECT_FAILED, 14,
|
72
|
+
:ERROR_UNMAP_BUFFER_OBJECT_FAILED, 15,
|
73
|
+
:ERROR_INVALID_HOST_POINTER, 16,
|
74
|
+
:ERROR_INVALID_DEVICE_POINTER, 17,
|
75
|
+
:ERROR_INVALID_TEXTURE, 18,
|
76
|
+
:ERROR_INVALID_TEXTURE_BINDING, 19,
|
77
|
+
:ERROR_INVALID_CHANNEL_DESCRIPTOR, 20,
|
78
|
+
:ERROR_INVALID_MEMCPY_DIRECTION, 21,
|
79
|
+
:ERROR_ADDRESS_OF_CONSTANT, 22, # Deprecated.
|
80
|
+
:ERROR_TEXTURE_FETCH_FAILED, 23, # Deprecated.
|
81
|
+
:ERROR_TEXTURE_NOT_BOUND, 24, # Deprecated.
|
82
|
+
:ERROR_SYNCHRONIZATION_ERROR, 25, # Deprecated.
|
83
|
+
:ERROR_INVALID_FILTER_SETTING, 26,
|
84
|
+
:ERROR_INVALID_NORM_SETTING, 27,
|
85
|
+
:ERROR_MIXED_DEVICE_EXECUTION, 28, # Deprecated.
|
86
|
+
:ERROR_CUDART_UNLOADING, 29, # Deprecated.
|
87
|
+
:ERROR_UNKNOWN, 30,
|
88
|
+
:ERROR_NOT_YET_IMPLEMENTED, 31,
|
89
|
+
:ERROR_MEMORY_VALUE_TOO_LARGE, 32, # Deprecated.
|
90
|
+
:ERROR_INVALID_RESOURCE_HANDLE, 33,
|
91
|
+
:ERROR_NOT_READY, 34,
|
92
|
+
:ERROR_INSUFFICIENT_DRIVER, 35,
|
93
|
+
:ERROR_SET_ON_ACTIVE_PROCESS, 36,
|
94
|
+
:ERROR_INVALID_SURFACE, 37,
|
95
|
+
:ERROR_NO_DEVICE, 38,
|
96
|
+
:ERROR_ECC_UNCORRECTABLE, 39,
|
97
|
+
:ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, 40,
|
98
|
+
:ERROR_SHARED_OBJECT_INIT_FAILED, 41,
|
99
|
+
:ERROR_UNSUPPORTED_LIMIT, 42,
|
100
|
+
:ERROR_DUPLICATE_VARIABLE_NAME, 43,
|
101
|
+
:ERROR_DUPLICATE_TEXTURE_NAME, 44,
|
102
|
+
:ERROR_DUPLICATE_SURFACE_NAME, 45,
|
103
|
+
:ERROR_DEVICES_UNAVAILABLE, 46,
|
104
|
+
:ERROR_INVALID_KERNEL_IMAGE, 47,
|
105
|
+
:ERROR_NO_KERNEL_IMAGE_FOR_DEVICE, 48,
|
106
|
+
:ERROR_INCOMPATIBLE_DRIVER_CONTEXT, 49,
|
107
|
+
:ERROR_PEER_ACCESS_ALREADY_ENABLED, 50,
|
108
|
+
:ERROR_PEER_ACCESS_NOT_ENABLED, 51,
|
109
|
+
:ERROR_DEVICE_ALREADY_IN_USE, 54,
|
110
|
+
:ERROR_PROFILER_DISABLED, 55,
|
111
|
+
:ERROR_PROFILER_NOT_INITIALIZED, 56,
|
112
|
+
:ERROR_PROFILER_ALREADY_STARTED, 57,
|
113
|
+
:ERROR_PROFILER_ALREADY_STOPPED, 58,
|
114
|
+
:ERROR_STARTUP_FAILURE, 0x7F,
|
115
|
+
:ERROR_API_FAILURE_BASE, 10000,
|
116
|
+
)
|
117
|
+
|
118
|
+
CudaDeviceFlags = enum(
|
119
|
+
:SCHEDULE_AUTO, 0,
|
120
|
+
:SCHEDULE_SPIN, 1,
|
121
|
+
:SCHEDULE_YIELD, 2,
|
122
|
+
:SCHEDULE_BLOCKING_SYNC, 4,
|
123
|
+
:BLOCKING_SYNC, 4, # Deprecated. Use :SCHEDULE_BLOCKING_SYNC.
|
124
|
+
:MAP_HOST, 8,
|
125
|
+
:LMEM_RESIZE_TO_MAX, 16,
|
126
|
+
)
|
127
|
+
|
128
|
+
CudaEventFlags = enum(
|
129
|
+
:DEFAULT, 0,
|
130
|
+
:BLOCKING_SYNC, 1,
|
131
|
+
:DISABLE_TIMING, 2,
|
132
|
+
)
|
133
|
+
|
134
|
+
CudaHostAllocFlags = enum(
|
135
|
+
:DEFAULT, 0,
|
136
|
+
:PORTABLE, 1,
|
137
|
+
:MAPPED, 2,
|
138
|
+
:WRITE_COMBINED, 4,
|
139
|
+
)
|
140
|
+
|
141
|
+
CudaHostRegisterFlags = enum(
|
142
|
+
:DEFAULT, 0,
|
143
|
+
:PORTABLE, 1,
|
144
|
+
:MAPPED, 2,
|
145
|
+
)
|
146
|
+
|
147
|
+
CudaArrayFlags = enum(
|
148
|
+
:DEFAULT, 0x00,
|
149
|
+
:LAYERED, 0x01,
|
150
|
+
:SURFACE_LOAD_STORE, 0x02,
|
151
|
+
)
|
152
|
+
|
153
|
+
CudaMemoryType = enum(
|
154
|
+
:Host, 1,
|
155
|
+
:DEVICE, 2,
|
156
|
+
)
|
157
|
+
|
158
|
+
CudaMemcpyKind = enum(
|
159
|
+
:HOST_TO_HOST, 0,
|
160
|
+
:HOST_TO_DEVICE, 1,
|
161
|
+
:DEVICE_TO_HOST, 2,
|
162
|
+
:DEVICE_TO_DEVICE, 3,
|
163
|
+
:DEFAULT, 4,
|
164
|
+
)
|
165
|
+
|
166
|
+
CudaChannelFormatKind = enum(
|
167
|
+
:SIGNED, 0,
|
168
|
+
:UNSIGNED, 1,
|
169
|
+
:FLOAT, 2,
|
170
|
+
:None,3,
|
171
|
+
)
|
172
|
+
|
173
|
+
CudaFunctionCache = enum(
|
174
|
+
:PREFER_NONE, 0,
|
175
|
+
:PREFER_SHARED, 1,
|
176
|
+
:PREFER_L1, 2,
|
177
|
+
)
|
178
|
+
|
179
|
+
CudaLimit = enum(
|
180
|
+
:STACK_SIZE, 0x00,
|
181
|
+
:PRINTF_FIFO_SIZE, 0x01,
|
182
|
+
:MALLOC_HEAP_SIZE, 0x02,
|
183
|
+
)
|
184
|
+
|
185
|
+
CudaOutputMode = enum(
|
186
|
+
:KEY_VALUE_PAIR, 0x00,
|
187
|
+
:CSV, 0x01,
|
188
|
+
)
|
189
|
+
|
190
|
+
CudaComputeMode = enum(
|
191
|
+
:DEFAULT, 0,
|
192
|
+
:EXCLUSIVE, 1,
|
193
|
+
:PROHIBITED, 2,
|
194
|
+
:EXCLUSIVE_PROCESS, 3,
|
195
|
+
)
|
196
|
+
|
197
|
+
CudaSurfaceBoundaryMode = enum(
|
198
|
+
:ZERO, 0,
|
199
|
+
:CLAMP, 1,
|
200
|
+
:TRAP, 2,
|
201
|
+
)
|
202
|
+
|
203
|
+
CudaSurfaceFormatMode = enum(
|
204
|
+
:FORCED, 0,
|
205
|
+
:AUTO, 1,
|
206
|
+
)
|
207
|
+
|
208
|
+
CudaTextureAddressMode = enum(
|
209
|
+
:WRAP, 0,
|
210
|
+
:CLAMP, 1,
|
211
|
+
:MIRROR, 2,
|
212
|
+
:BORDER, 3,
|
213
|
+
)
|
214
|
+
|
215
|
+
CudaTextureFilterMode = enum(
|
216
|
+
:POINT, 0,
|
217
|
+
:LINEAR, 1,
|
218
|
+
)
|
219
|
+
|
220
|
+
CudaTextureReadMode = enum(
|
221
|
+
:ELEMENT_TYPE, 0,
|
222
|
+
:NORMALIZED_FLOAT, 1,
|
223
|
+
)
|
224
|
+
|
225
|
+
FFI::typedef :int, :enum
|
226
|
+
FFI::typedef :pointer, :CudaStream
|
227
|
+
FFI::typedef :pointer, :CudaEvent
|
228
|
+
|
229
|
+
def read_int(ptr); ptr.read_int; end
|
230
|
+
def read_long(ptr); ptr.read_long; end
|
231
|
+
def read_pointer(ptr); ptr.read_pointer; end
|
232
|
+
|
233
|
+
def write_int(ptr); ptr.write_int; end
|
234
|
+
def write_long(ptr); ptr.write_long; end
|
235
|
+
def write_pointer(ptr, value); ptr.write_pointer(value); end
|
236
|
+
|
237
|
+
alias read_size_t read_long
|
238
|
+
alias read_enum read_int
|
239
|
+
alias read_cudastream read_pointer
|
240
|
+
alias read_cudaevent read_pointer
|
241
|
+
|
242
|
+
alias write_size_t write_long
|
243
|
+
alias write_enum write_int
|
244
|
+
alias write_cudastream write_pointer
|
245
|
+
alias write_cudaevent write_pointer
|
246
|
+
|
247
|
+
module_function :read_size_t
|
248
|
+
module_function :read_enum
|
249
|
+
module_function :read_cudastream
|
250
|
+
module_function :read_cudaevent
|
251
|
+
|
252
|
+
module_function :write_size_t
|
253
|
+
module_function :write_enum
|
254
|
+
module_function :write_cudastream
|
255
|
+
module_function :write_cudaevent
|
256
|
+
|
257
|
+
|
258
|
+
class Dim3 < FFI::Struct
|
259
|
+
layout(
|
260
|
+
:array, [:uint, 3],
|
261
|
+
)
|
262
|
+
|
263
|
+
alias :init :initialize
|
264
|
+
alias :get :[]
|
265
|
+
alias :set :[]=
|
266
|
+
private :init, :get, :set
|
267
|
+
|
268
|
+
def initialize(x, y, z)
|
269
|
+
init
|
270
|
+
@array = get(:array)
|
271
|
+
@array[0], @array[1], @array[2] = x, y, z
|
272
|
+
end
|
273
|
+
|
274
|
+
def [](index); @array[index]; end
|
275
|
+
def []=(index, value); @array[index] = value; end
|
276
|
+
|
277
|
+
def x; @array[0]; end
|
278
|
+
def y; @array[1]; end
|
279
|
+
def z; @array[2]; end
|
280
|
+
|
281
|
+
def x=(value); @array[0] = value; end
|
282
|
+
def y=(value); @array[1] = value; end
|
283
|
+
def z=(value); @array[2] = value; end
|
284
|
+
end
|
285
|
+
|
286
|
+
class CudaDeviceProp < FFI::PrettyStruct
|
287
|
+
layout(
|
288
|
+
:name, [:char, 256],
|
289
|
+
:total_global_mem, :size_t,
|
290
|
+
:shared_mem_per_block, :size_t,
|
291
|
+
:regs_per_block, :int,
|
292
|
+
:warp_size, :int,
|
293
|
+
:mem_pitch, :size_t,
|
294
|
+
:max_threads_per_block, :int,
|
295
|
+
:max_threads_dim, [:int, 3],
|
296
|
+
:max_grid_size, [:int, 3],
|
297
|
+
:clock_rate, :int,
|
298
|
+
:total_const_mem, :size_t,
|
299
|
+
:major, :int,
|
300
|
+
:minor, :int,
|
301
|
+
:texture_alignment, :size_t,
|
302
|
+
:device_overlap, :int, # Deprecated. Use :async_engine_count.
|
303
|
+
:multi_processor_count, :int,
|
304
|
+
:kernel_exec_timeout_enabled, :int,
|
305
|
+
:integrated, :int,
|
306
|
+
:can_map_host_memory, :int,
|
307
|
+
:compute_mode, :int,
|
308
|
+
:max_texture1d, :int,
|
309
|
+
:max_texture2d, [:int, 2],
|
310
|
+
:max_texture3d, [:int, 3],
|
311
|
+
:max_texture1d_layered, [:int, 2],
|
312
|
+
:max_texture2d_layered, [:int, 3],
|
313
|
+
:surface_alignment, :size_t,
|
314
|
+
:concurrent_kernels, :int,
|
315
|
+
:ecc_enabled, :int,
|
316
|
+
:pci_bus_id, :int,
|
317
|
+
:pci_device_id, :int,
|
318
|
+
:tcc_driver, :int,
|
319
|
+
:async_engine_count, :int,
|
320
|
+
:unified_addressing, :int,
|
321
|
+
:memory_clock_rate, :int,
|
322
|
+
:memory_bus_width, :int,
|
323
|
+
:l2_cache_size, :int,
|
324
|
+
:max_threads_per_multi_processor, :int,
|
325
|
+
)
|
326
|
+
end
|
327
|
+
|
328
|
+
class CudaFunctionAttributes < FFI::PrettyStruct
|
329
|
+
layout(
|
330
|
+
:shared_size_bytes, :size_t,
|
331
|
+
:const_size_bytes, :size_t,
|
332
|
+
:local_size_bytes, :size_t,
|
333
|
+
:max_threads_per_block, :int,
|
334
|
+
:num_regs, :int,
|
335
|
+
:ptx_version, :int,
|
336
|
+
:binary_version, :int,
|
337
|
+
)
|
338
|
+
end
|
339
|
+
|
340
|
+
class CudaPointerAttributes < FFI::PrettyStruct
|
341
|
+
layout(
|
342
|
+
:memory_type, CudaMemoryType,
|
343
|
+
:device, :int,
|
344
|
+
:device_pointer, :pointer,
|
345
|
+
:host_pointer, :pointer,
|
346
|
+
)
|
347
|
+
end
|
348
|
+
|
349
|
+
class CudaChannelFormatDesc < FFI::PrettyStruct
|
350
|
+
layout(
|
351
|
+
:x, :int,
|
352
|
+
:y, :int,
|
353
|
+
:z, :int,
|
354
|
+
:w, :int,
|
355
|
+
:f, CudaChannelFormatKind,
|
356
|
+
)
|
357
|
+
end
|
358
|
+
|
359
|
+
class CudaPitchedPtr < FFI::PrettyStruct
|
360
|
+
layout(
|
361
|
+
:ptr, :pointer,
|
362
|
+
:pitch, :size_t,
|
363
|
+
:xsize, :size_t,
|
364
|
+
:ysize, :size_t,
|
365
|
+
)
|
366
|
+
end
|
367
|
+
|
368
|
+
class CudaPos < FFI::PrettyStruct
|
369
|
+
layout(
|
370
|
+
:x, :size_t,
|
371
|
+
:y, :size_t,
|
372
|
+
:z, :size_t,
|
373
|
+
)
|
374
|
+
end
|
375
|
+
|
376
|
+
class CudaExtent < FFI::PrettyStruct
|
377
|
+
layout(
|
378
|
+
:width, :size_t,
|
379
|
+
:height, :size_t,
|
380
|
+
:depth, :size_t,
|
381
|
+
)
|
382
|
+
end
|
383
|
+
|
384
|
+
class CudaMemcpy3DParms < FFI::PrettyStruct
|
385
|
+
layout(
|
386
|
+
:src_array, :pointer,
|
387
|
+
:src_pos, CudaPos,
|
388
|
+
:src_ptr, CudaPitchedPtr,
|
389
|
+
:dst_array, :pointer,
|
390
|
+
:dst_pos, CudaPos,
|
391
|
+
:dst_ptr, CudaPitchedPtr,
|
392
|
+
:extent, CudaExtent,
|
393
|
+
:kind, CudaMemcpyKind,
|
394
|
+
)
|
395
|
+
end
|
396
|
+
|
397
|
+
class CudaMemcpy3DPeerParms < FFI::PrettyStruct
|
398
|
+
layout(
|
399
|
+
:src_array, :pointer,
|
400
|
+
:src_pos, CudaPos,
|
401
|
+
:src_ptr, CudaPitchedPtr,
|
402
|
+
:src_device, :int,
|
403
|
+
:dst_array, :pointer,
|
404
|
+
:dst_pos, CudaPos,
|
405
|
+
:dst_ptr, CudaPitchedPtr,
|
406
|
+
:dst_device, :int,
|
407
|
+
:extent, CudaExtent,
|
408
|
+
)
|
409
|
+
end
|
410
|
+
|
411
|
+
class TextureReference < FFI::PrettyStruct
|
412
|
+
layout(
|
413
|
+
:normalized, :int,
|
414
|
+
:filter_mode, CudaTextureFilterMode,
|
415
|
+
:address_mode, [CudaTextureAddressMode, 3],
|
416
|
+
:channel_desc, CudaChannelFormatDesc,
|
417
|
+
:srgb, :int,
|
418
|
+
:__cuda_reserved, [:int, 15],
|
419
|
+
)
|
420
|
+
end
|
421
|
+
|
422
|
+
class SurfaceReference < FFI::PrettyStruct
|
423
|
+
layout(
|
424
|
+
:channel_desc, CudaChannelFormatDesc,
|
425
|
+
)
|
426
|
+
end
|
427
|
+
|
428
|
+
# CUDA Version Management.
|
429
|
+
attach_function :cudaDriverGetVersion, [:pointer], :enum
|
430
|
+
attach_function :cudaRuntimeGetVersion, [:pointer], :enum
|
431
|
+
|
432
|
+
# CUDA Error Handling.
|
433
|
+
attach_function :cudaGetErrorString, [CudaError], :string
|
434
|
+
attach_function :cudaGetLastError, [], :enum
|
435
|
+
attach_function :cudaPeekAtLastError, [], :enum
|
436
|
+
|
437
|
+
# CUDA Device Management.
|
438
|
+
attach_function :cudaChooseDevice, [:pointer, :pointer], :enum
|
439
|
+
attach_function :cudaDeviceGetCacheConfig, [:pointer], :enum
|
440
|
+
attach_function :cudaDeviceGetLimit, [:pointer, CudaLimit], :enum
|
441
|
+
attach_function :cudaDeviceReset, [], :enum
|
442
|
+
attach_function :cudaDeviceSetCacheConfig, [CudaFunctionCache], :enum
|
443
|
+
attach_function :cudaDeviceSetLimit, [CudaLimit, :size_t], :enum
|
444
|
+
attach_function :cudaDeviceSynchronize, [], :enum
|
445
|
+
attach_function :cudaGetDevice, [:pointer], :enum
|
446
|
+
attach_function :cudaGetDeviceCount, [:pointer], :enum
|
447
|
+
attach_function :cudaGetDeviceProperties, [:pointer, :int], :enum
|
448
|
+
attach_function :cudaSetDevice, [:int], :enum
|
449
|
+
attach_function :cudaSetDeviceFlags, [:uint], :enum
|
450
|
+
attach_function :cudaSetValidDevices, [:pointer, :int], :enum
|
451
|
+
|
452
|
+
# CUDA Thread Management.
|
453
|
+
# Deprecated.
|
454
|
+
attach_function :cudaThreadExit, [], :enum
|
455
|
+
attach_function :cudaThreadGetCacheConfig, [:pointer], :enum
|
456
|
+
attach_function :cudaThreadGetLimit, [:pointer, CudaLimit], :enum
|
457
|
+
attach_function :cudaThreadSetCacheConfig, [CudaFunctionCache], :enum
|
458
|
+
attach_function :cudaThreadSetLimit, [CudaLimit, :size_t], :enum
|
459
|
+
attach_function :cudaThreadSynchronize, [], :enum
|
460
|
+
|
461
|
+
# CUDA Memory Management.
|
462
|
+
attach_function :cudaFree, [:pointer], :enum
|
463
|
+
attach_function :cudaFreeArray, [:pointer], :enum
|
464
|
+
attach_function :cudaFreeHost, [:pointer], :enum
|
465
|
+
attach_function :cudaGetSymbolAddress, [:pointer, :string], :enum
|
466
|
+
attach_function :cudaGetSymbolSize, [:pointer, :string], :enum
|
467
|
+
attach_function :cudaHostAlloc, [:pointer, :size_t, :uint], :enum
|
468
|
+
attach_function :cudaHostGetDevicePointer, [:pointer, :pointer, :uint], :enum
|
469
|
+
attach_function :cudaHostGetFlags, [:pointer, :pointer], :enum
|
470
|
+
attach_function :cudaHostRegister, [:pointer, :size_t, :uint], :enum
|
471
|
+
attach_function :cudaHostUnregister, [:pointer], :enum
|
472
|
+
attach_function :cudaMalloc, [:pointer, :size_t], :enum
|
473
|
+
attach_function :cudaMalloc3D, [:pointer, CudaExtent.by_value], :enum
|
474
|
+
attach_function :cudaMalloc3DArray, [:pointer, :pointer, CudaExtent.by_value, :uint], :enum
|
475
|
+
attach_function :cudaMallocArray, [:pointer, :pointer, :size_t, :size_t, :uint], :enum
|
476
|
+
attach_function :cudaMallocHost, [:pointer, :size_t], :enum
|
477
|
+
attach_function :cudaMallocPitch, [:pointer, :pointer, :size_t, :size_t], :enum
|
478
|
+
attach_function :cudaMemcpy, [:pointer, :pointer, :size_t, CudaMemcpyKind], :enum
|
479
|
+
attach_function :cudaMemcpy2D, [:pointer, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind], :enum
|
480
|
+
attach_function :cudaMemcpy2DArrayToArray, [:pointer, :size_t, :size_t, :pointer, :size_t, :size_t, :size_t, :size_t, CudaMemcpyKind], :enum
|
481
|
+
attach_function :cudaMemcpy2DAsync, [:pointer, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
482
|
+
attach_function :cudaMemcpy2DFromArray, [:pointer, :size_t, :pointer, :size_t, :size_t, :size_t, :size_t, CudaMemcpyKind], :enum
|
483
|
+
attach_function :cudaMemcpy2DFromArrayAsync, [:pointer, :size_t, :pointer, :size_t, :size_t, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
484
|
+
attach_function :cudaMemcpy2DToArray, [:pointer, :size_t, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind], :enum
|
485
|
+
attach_function :cudaMemcpy2DToArrayAsync, [:pointer, :size_t, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
486
|
+
attach_function :cudaMemcpy3D, [:pointer], :enum
|
487
|
+
attach_function :cudaMemcpy3DAsync, [:pointer, :CudaStream], :enum
|
488
|
+
attach_function :cudaMemcpy3DPeer, [:pointer], :enum
|
489
|
+
attach_function :cudaMemcpy3DPeerAsync, [:pointer, :CudaStream], :enum
|
490
|
+
attach_function :cudaMemcpyArrayToArray, [:pointer, :size_t, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind], :enum
|
491
|
+
attach_function :cudaMemcpyAsync, [:pointer, :pointer, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
492
|
+
attach_function :cudaMemcpyFromArray, [:pointer, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind], :enum
|
493
|
+
attach_function :cudaMemcpyFromArrayAsync, [:pointer, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
494
|
+
attach_function :cudaMemcpyFromSymbol, [:pointer, :string, :size_t, :size_t, CudaMemcpyKind], :enum
|
495
|
+
attach_function :cudaMemcpyFromSymbolAsync, [:pointer, :string, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
496
|
+
attach_function :cudaMemcpyPeer, [:pointer, :int, :pointer, :int, :size_t], :enum
|
497
|
+
attach_function :cudaMemcpyPeerAsync, [:pointer, :int, :pointer, :int, :size_t, :CudaStream], :enum
|
498
|
+
attach_function :cudaMemcpyToArray, [:pointer, :size_t, :size_t, :pointer, :size_t, CudaMemcpyKind], :enum
|
499
|
+
attach_function :cudaMemcpyToArrayAsync, [:pointer, :size_t, :size_t, :pointer, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
500
|
+
attach_function :cudaMemcpyToSymbol, [:string, :pointer, :size_t, :size_t, CudaMemcpyKind], :enum
|
501
|
+
attach_function :cudaMemcpyToSymbolAsync, [:string, :pointer, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
502
|
+
attach_function :cudaMemGetInfo, [:pointer, :pointer], :enum
|
503
|
+
attach_function :cudaMemset, [:pointer, :int, :size_t], :enum
|
504
|
+
attach_function :cudaMemset2D, [:pointer, :size_t, :int, :size_t, :size_t], :enum
|
505
|
+
attach_function :cudaMemset2DAsync, [:pointer, :size_t, :int, :size_t, :size_t, :CudaStream], :enum
|
506
|
+
attach_function :cudaMemset3D, [CudaPitchedPtr.by_value, :int, CudaExtent.by_value], :enum
|
507
|
+
attach_function :cudaMemset3DAsync, [CudaPitchedPtr.by_value, :int, CudaExtent.by_value, :CudaStream], :enum
|
508
|
+
attach_function :cudaMemsetAsync, [:pointer, :int, :size_t, :CudaStream], :enum
|
509
|
+
# attach_function :make_cudaExtent, [:size_t, :size_t, :size_t], CudaExtent
|
510
|
+
# attach_function :make_cudaPitchedPtr, [:pointer, :size_t, :size_t, :size_t], CudaPitchedPtr
|
511
|
+
# attach_function :make_cudaPos, [:size_t, :size_t, :size_t], CudaPos
|
512
|
+
|
513
|
+
def make_cudaExtent(w, h, d)
|
514
|
+
e = CudaExtent.new
|
515
|
+
e[:width], e[:height], e[:depth] = w, h, d
|
516
|
+
e
|
517
|
+
end
|
518
|
+
module_function :make_cudaExtent
|
519
|
+
|
520
|
+
def make_cudaPitchedPtr(d, p, xsz, ysz)
|
521
|
+
s = CudaPitchedPtr.new
|
522
|
+
s[:ptr] = d
|
523
|
+
s[:pitch] = p
|
524
|
+
s[:xsize] = xsz
|
525
|
+
s[:ysize] = ysz
|
526
|
+
s
|
527
|
+
end
|
528
|
+
module_function :make_cudaPitchedPtr
|
529
|
+
|
530
|
+
def make_cudaPos(x, y, z)
|
531
|
+
p = CudaPos.new
|
532
|
+
p[:x] = x
|
533
|
+
p[:y] = y
|
534
|
+
p[:z] = z
|
535
|
+
p
|
536
|
+
end
|
537
|
+
module_function :make_cudaPos
|
538
|
+
|
539
|
+
# CUDA Unified Addressing.
|
540
|
+
attach_function :cudaPointerGetAttributes, [:pointer, :pointer], :enum
|
541
|
+
|
542
|
+
# CUDA Peer Device Memory Access.
|
543
|
+
attach_function :cudaDeviceCanAccessPeer, [:pointer, :int, :int], :enum
|
544
|
+
attach_function :cudaDeviceDisablePeerAccess, [:int], :enum
|
545
|
+
attach_function :cudaDeviceEnablePeerAccess, [:int, :uint], :enum
|
546
|
+
|
547
|
+
# CUDA Execution Control.
|
548
|
+
attach_function :cudaConfigureCall, [Dim3.by_value, Dim3.by_value, :size_t, :CudaStream], :enum
|
549
|
+
attach_function :cudaFuncGetAttributes, [:pointer, :string], :enum
|
550
|
+
attach_function :cudaFuncSetCacheConfig, [:string, CudaFunctionCache], :enum
|
551
|
+
attach_function :cudaLaunch, [:string], :enum
|
552
|
+
attach_function :cudaSetDoubleForDevice, [:pointer], :enum
|
553
|
+
attach_function :cudaSetDoubleForHost, [:pointer], :enum
|
554
|
+
attach_function :cudaSetupArgument, [:pointer, :size_t, :size_t], :enum
|
555
|
+
|
556
|
+
# CUDA Stream Management.
|
557
|
+
attach_function :cudaStreamCreate, [:pointer], :enum
|
558
|
+
attach_function :cudaStreamDestroy, [:CudaStream], :enum
|
559
|
+
attach_function :cudaStreamQuery, [:CudaStream], :enum
|
560
|
+
attach_function :cudaStreamSynchronize, [:CudaStream], :enum
|
561
|
+
attach_function :cudaStreamWaitEvent, [:CudaStream, :CudaEvent, :uint], :enum
|
562
|
+
|
563
|
+
# CUDA Event Management.
|
564
|
+
attach_function :cudaEventCreate, [:pointer], :enum
|
565
|
+
attach_function :cudaEventCreateWithFlags, [:pointer, :uint], :enum
|
566
|
+
attach_function :cudaEventDestroy, [:CudaEvent], :enum
|
567
|
+
attach_function :cudaEventElapsedTime, [:pointer, :CudaEvent, :CudaEvent], :enum
|
568
|
+
attach_function :cudaEventQuery, [:CudaEvent], :enum
|
569
|
+
attach_function :cudaEventRecord, [:CudaEvent, :CudaStream], :enum
|
570
|
+
attach_function :cudaEventSynchronize, [:CudaEvent], :enum
|
571
|
+
|
572
|
+
# CUDA Texture Reference Management.
|
573
|
+
attach_function :cudaBindTexture, [:pointer, :pointer, :pointer, :pointer, :size_t], :enum
|
574
|
+
attach_function :cudaBindTexture2D, [:pointer, :pointer, :pointer, :pointer, :size_t, :size_t, :size_t], :enum
|
575
|
+
attach_function :cudaBindTextureToArray, [:pointer, :pointer, :pointer], :enum
|
576
|
+
attach_function :cudaCreateChannelDesc, [:int, :int, :int, :int, CudaChannelFormatKind], CudaChannelFormatDesc.by_value
|
577
|
+
attach_function :cudaGetChannelDesc, [:pointer, :pointer], :enum
|
578
|
+
attach_function :cudaGetTextureAlignmentOffset, [:pointer, :pointer], :enum
|
579
|
+
attach_function :cudaGetTextureReference, [:pointer, :string], :enum
|
580
|
+
attach_function :cudaUnbindTexture, [:pointer], :enum
|
581
|
+
|
582
|
+
# CUDA Surface Reference Management.
|
583
|
+
attach_function :cudaBindSurfaceToArray, [:pointer, :pointer, :pointer], :enum
|
584
|
+
attach_function :cudaGetSurfaceReference, [:pointer, :string], :enum
|
585
|
+
|
586
|
+
end # module
|
587
|
+
end # module
|
588
|
+
end # module
|