sgc-ruby-cuda 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +2 -0
- data/COPYING +674 -0
- data/README.rdoc +106 -0
- data/Rakefile +76 -0
- data/doc/devel.rdoc +77 -0
- data/doc/features.rdoc +55 -0
- data/lib/cuda/driver/context.rb +236 -0
- data/lib/cuda/driver/cu.rb +60 -0
- data/lib/cuda/driver/device.rb +155 -0
- data/lib/cuda/driver/deviceptr.rb +69 -0
- data/lib/cuda/driver/error.rb +182 -0
- data/lib/cuda/driver/event.rb +124 -0
- data/lib/cuda/driver/ffi-cu.rb +620 -0
- data/lib/cuda/driver/function.rb +293 -0
- data/lib/cuda/driver/init.rb +45 -0
- data/lib/cuda/driver/memory.rb +134 -0
- data/lib/cuda/driver/module.rb +142 -0
- data/lib/cuda/driver/rubycu.rb +37 -0
- data/lib/cuda/driver/stream.rb +128 -0
- data/lib/cuda/driver/version.rb +42 -0
- data/lib/cuda/runtime/cuda.rb +65 -0
- data/lib/cuda/runtime/device.rb +175 -0
- data/lib/cuda/runtime/error.rb +197 -0
- data/lib/cuda/runtime/event.rb +117 -0
- data/lib/cuda/runtime/ffi-cuda.rb +588 -0
- data/lib/cuda/runtime/function.rb +161 -0
- data/lib/cuda/runtime/memory.rb +110 -0
- data/lib/cuda/runtime/rubycuda.rb +34 -0
- data/lib/cuda/runtime/stream.rb +126 -0
- data/lib/cuda/runtime/thread.rb +81 -0
- data/lib/cuda/runtime/version.rb +51 -0
- data/lib/ffi/prettystruct.rb +32 -0
- data/lib/helpers/flags.rb +82 -0
- data/lib/helpers/interface/ienum.rb +45 -0
- data/lib/helpers/klass.rb +45 -0
- data/lib/memory/buffer.rb +125 -0
- data/lib/memory/interface/ibuffer.rb +63 -0
- data/lib/memory/pointer.rb +72 -0
- data/lib/rubycu.rb +1 -0
- data/lib/rubycuda.rb +1 -0
- data/test/bad.ptx +0 -0
- data/test/memory/test_buffer.rb +93 -0
- data/test/rubycu/test_cucontext.rb +148 -0
- data/test/rubycu/test_cudevice.rb +69 -0
- data/test/rubycu/test_cudeviceptr.rb +43 -0
- data/test/rubycu/test_cuevent.rb +81 -0
- data/test/rubycu/test_cufunction.rb +165 -0
- data/test/rubycu/test_cumemory.rb +113 -0
- data/test/rubycu/test_cumodule.rb +114 -0
- data/test/rubycu/test_custream.rb +77 -0
- data/test/rubycu/test_cuversion.rb +39 -0
- data/test/rubycu/testbase.rb +107 -0
- data/test/rubycuda/test_cudadevice.rb +125 -0
- data/test/rubycuda/test_cudaerror.rb +48 -0
- data/test/rubycuda/test_cudaevent.rb +78 -0
- data/test/rubycuda/test_cudafunction.rb +106 -0
- data/test/rubycuda/test_cudamemory.rb +90 -0
- data/test/rubycuda/test_cudastream.rb +72 -0
- data/test/rubycuda/test_cudathread.rb +69 -0
- data/test/rubycuda/test_cudaversion.rb +41 -0
- data/test/rubycuda/testbase.rb +67 -0
- data/test/vadd.cu +21 -0
- data/version.rb +1 -0
- metadata +180 -0
@@ -0,0 +1,197 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2010-2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'cuda/runtime/ffi-cuda'
|
26
|
+
|
27
|
+
|
28
|
+
module SGC
|
29
|
+
module Cuda
|
30
|
+
|
31
|
+
module Error
|
32
|
+
|
33
|
+
class CudaStandardError < RuntimeError; end
|
34
|
+
class CudaMissingConfigurationError < CudaStandardError; end
|
35
|
+
class CudaMemoryAllocationError < CudaStandardError; end
|
36
|
+
class CudaInitializationError < CudaStandardError; end
|
37
|
+
class CudaLaunchFailureError < CudaStandardError; end
|
38
|
+
class CudaPriorLaunchFailureError < CudaStandardError; end # @deprecated
|
39
|
+
class CudaLaunchTimeoutError < CudaStandardError; end
|
40
|
+
class CudaLaunchOutOfResourcesError < CudaStandardError; end
|
41
|
+
class CudaInvalidDeviceFunctionError < CudaStandardError; end
|
42
|
+
class CudaInvalidConfigurationError < CudaStandardError; end
|
43
|
+
class CudaInvalidDeviceError < CudaStandardError; end
|
44
|
+
class CudaInvalidValueError < CudaStandardError; end
|
45
|
+
class CudaInvalidPitchValueError < CudaStandardError; end
|
46
|
+
class CudaInvalidSymbolError < CudaStandardError; end
|
47
|
+
class CudaMapBufferObjectFailedError < CudaStandardError; end
|
48
|
+
class CudaUnmapBufferObjectFailedError < CudaStandardError; end
|
49
|
+
class CudaInvalidHostPointerError < CudaStandardError; end
|
50
|
+
class CudaInvalidDevicePointerError < CudaStandardError; end
|
51
|
+
class CudaInvalidTextureError < CudaStandardError; end
|
52
|
+
class CudaInvalidTextureBindingError < CudaStandardError; end
|
53
|
+
class CudaInvalidChannelDescriptorError < CudaStandardError; end
|
54
|
+
class CudaInvalidMemcpyDirectionError < CudaStandardError; end
|
55
|
+
class CudaAddressOfConstantError < CudaStandardError; end # @deprecated
|
56
|
+
class CudaTextureFetchFailedError < CudaStandardError; end # @deprecated
|
57
|
+
class CudaTextureNotFoundError < CudaStandardError; end # @deprecated
|
58
|
+
class CudaSynchronizationError < CudaStandardError; end # @deprecated
|
59
|
+
class CudaInvalidFilterSettingError < CudaStandardError; end
|
60
|
+
class CudaInvalidNormSettingError < CudaStandardError; end
|
61
|
+
class CudaMixedDeviceExecutionError < CudaStandardError; end # @deprecated
|
62
|
+
class CudaCudartUnloadingError < CudaStandardError; end # @deprecated
|
63
|
+
class CudaUnknownError < CudaStandardError; end
|
64
|
+
class CudaNotYetImplementedError < CudaStandardError; end
|
65
|
+
class CudaMemoryValueTooLargeError < CudaStandardError; end # @deprecated
|
66
|
+
class CudaInvalidResourceHandleError < CudaStandardError; end
|
67
|
+
class CudaNotReadyError < CudaStandardError; end
|
68
|
+
class CudaInsufficientDriverError < CudaStandardError; end
|
69
|
+
class CudaSetOnActiveProcessError < CudaStandardError; end
|
70
|
+
class CudaInvalidSurfaceError < CudaStandardError; end
|
71
|
+
class CudaNoDeviceError < CudaStandardError; end
|
72
|
+
class CudaECCUncorrectableError < CudaStandardError; end
|
73
|
+
class CudaSharedObjectSymbolNotFoundError < CudaStandardError; end
|
74
|
+
class CudaSharedObjectInitFailedError < CudaStandardError; end
|
75
|
+
class CudaUnsupportedLimitError < CudaStandardError; end
|
76
|
+
class CudaDuplicateVariableNameError < CudaStandardError; end
|
77
|
+
class CudaDuplicateTextureNameError < CudaStandardError; end
|
78
|
+
class CudaDuplicateSurfaceNameError < CudaStandardError; end
|
79
|
+
class CudaDevicesUnavailableError < CudaStandardError; end
|
80
|
+
class CudaInvalidKernelImageError < CudaStandardError; end
|
81
|
+
class CudaNoKernelImageForDeviceError < CudaStandardError; end
|
82
|
+
class CudaIncompatibleDriverContextError < CudaStandardError; end
|
83
|
+
class CudaPeerAccessAlreadyEnabledError < CudaStandardError; end
|
84
|
+
class CudaPeerAccessNotEnabledError < CudaStandardError; end
|
85
|
+
class CudaDeviceAlreadyInUseError < CudaStandardError; end
|
86
|
+
class CudaProfilerDisabledError < CudaStandardError; end
|
87
|
+
class CudaProfilerNotInitializedError < CudaStandardError; end
|
88
|
+
class CudaProfilerAlreadyStartedError < CudaStandardError; end
|
89
|
+
class CudaProfilerAlreadyStoppedError < CudaStandardError; end
|
90
|
+
class CudaStartupFailureError < CudaStandardError; end
|
91
|
+
class CudaAPIFailureBaseError < CudaStandardError; end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
# @param [Integer, CudaError] e A CUDA error value or label.
|
97
|
+
# @return [String] The error string of _e_.
|
98
|
+
def get_error_string(e)
|
99
|
+
API::cudaGetErrorString(e)
|
100
|
+
end
|
101
|
+
module_function :get_error_string
|
102
|
+
|
103
|
+
|
104
|
+
# @return [Integer] The error value of the last CUDA error.
|
105
|
+
def get_last_error
|
106
|
+
API::cudaGetLastError
|
107
|
+
end
|
108
|
+
module_function :get_last_error
|
109
|
+
|
110
|
+
|
111
|
+
# Return the last CUDA error, but do not reset the error.
|
112
|
+
# @return [Integer] The error value of the last CUDA error.
|
113
|
+
def peek_at_last_error
|
114
|
+
API::cudaPeekAtLastError
|
115
|
+
end
|
116
|
+
module_function :peek_at_last_error
|
117
|
+
|
118
|
+
# @private
|
119
|
+
module Pvt
|
120
|
+
|
121
|
+
include Error
|
122
|
+
|
123
|
+
|
124
|
+
def self.handle_error(status, msg = nil)
|
125
|
+
status == CUDA_SUCCESS or raise @error_class_by_enum[API::CudaError[status]], API::cudaGetErrorString(status) + " : #{msg}"
|
126
|
+
nil
|
127
|
+
end
|
128
|
+
|
129
|
+
|
130
|
+
CUDA_SUCCESS = API::CudaError[:SUCCESS]
|
131
|
+
CUDA_ERROR_NOT_READY = API::CudaError[:ERROR_NOT_READY]
|
132
|
+
|
133
|
+
@error_class_by_enum = {
|
134
|
+
ERROR_MISSING_CONFIGURATION: CudaMissingConfigurationError,
|
135
|
+
ERROR_MEMORY_ALLOCATION: CudaMemoryAllocationError,
|
136
|
+
ERROR_INITIALIZATION_ERROR: CudaInitializationError,
|
137
|
+
ERROR_LAUNCH_FAILURE: CudaLaunchFailureError,
|
138
|
+
ERROR_PRIOR_LAUNCH_FAILURE: CudaPriorLaunchFailureError,
|
139
|
+
ERROR_LAUNCH_TIMEOUT: CudaLaunchTimeoutError,
|
140
|
+
ERROR_LAUNCH_OUT_OF_RESOURCES: CudaLaunchOutOfResourcesError,
|
141
|
+
ERROR_INVALID_DEVICE_FUNCTION: CudaInvalidDeviceFunctionError,
|
142
|
+
ERROR_INVALID_CONFIGURATION: CudaInvalidConfigurationError,
|
143
|
+
ERROR_INVALID_DEVICE: CudaInvalidDeviceError,
|
144
|
+
ERROR_INVALID_VALUE: CudaInvalidValueError,
|
145
|
+
ERROR_INVALID_PITCH_VALUE: CudaInvalidPitchValueError,
|
146
|
+
ERROR_INVALID_SYMBOL: CudaInvalidSymbolError,
|
147
|
+
ERROR_MAP_BUFFER_OBJECT_FAILED: CudaMapBufferObjectFailedError,
|
148
|
+
ERROR_UNMAP_BUFFER_OBJECT_FAILED: CudaUnmapBufferObjectFailedError,
|
149
|
+
ERROR_INVALID_HOST_POINTER: CudaInvalidHostPointerError,
|
150
|
+
ERROR_INVALID_DEVICE_POINTER: CudaInvalidDevicePointerError,
|
151
|
+
ERROR_INVALID_TEXTURE: CudaInvalidTextureError,
|
152
|
+
ERROR_INVALID_TEXTURE_BINDING: CudaInvalidTextureBindingError,
|
153
|
+
ERROR_INVALID_CHANNEL_DESCRIPTOR: CudaInvalidChannelDescriptorError,
|
154
|
+
ERROR_INVALID_MEMCPY_DIRECTION: CudaInvalidMemcpyDirectionError,
|
155
|
+
ERROR_ADDRESS_OF_CONSTANT: CudaAddressOfConstantError,
|
156
|
+
ERROR_TEXTURE_FETCH_FAILED: CudaTextureFetchFailedError,
|
157
|
+
ERROR_TEXTURE_NOT_BOUND: CudaTextureNotFoundError,
|
158
|
+
ERROR_SYNCHRONIZATION_ERROR: CudaSynchronizationError,
|
159
|
+
ERROR_INVALID_FILTER_SETTING: CudaInvalidFilterSettingError,
|
160
|
+
ERROR_INVALID_NORM_SETTING: CudaInvalidNormSettingError,
|
161
|
+
ERROR_MIXED_DEVICE_EXECUTION: CudaMixedDeviceExecutionError,
|
162
|
+
ERROR_CUDART_UNLOADING: CudaCudartUnloadingError,
|
163
|
+
ERROR_UNKNOWN: CudaUnknownError,
|
164
|
+
ERROR_NOT_YET_IMPLEMENTED: CudaNotYetImplementedError,
|
165
|
+
ERROR_MEMORY_VALUE_TOO_LARGE: CudaMemoryValueTooLargeError,
|
166
|
+
ERROR_INVALID_RESOURCE_HANDLE: CudaInvalidResourceHandleError,
|
167
|
+
ERROR_NOT_READY: CudaNotReadyError,
|
168
|
+
ERROR_INSUFFICIENT_DRIVER: CudaInsufficientDriverError,
|
169
|
+
ERROR_SET_ON_ACTIVE_PROCESS: CudaSetOnActiveProcessError,
|
170
|
+
ERROR_INVALID_SURFACE: CudaInvalidSurfaceError,
|
171
|
+
ERROR_NO_DEVICE: CudaNoDeviceError,
|
172
|
+
ERROR_ECC_UNCORRECTABLE: CudaECCUncorrectableError,
|
173
|
+
ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: CudaSharedObjectSymbolNotFoundError,
|
174
|
+
ERROR_SHARED_OBJECT_INIT_FAILED: CudaSharedObjectInitFailedError,
|
175
|
+
ERROR_UNSUPPORTED_LIMIT: CudaUnsupportedLimitError,
|
176
|
+
ERROR_DUPLICATE_VARIABLE_NAME: CudaDuplicateVariableNameError,
|
177
|
+
ERROR_DUPLICATE_TEXTURE_NAME: CudaDuplicateTextureNameError,
|
178
|
+
ERROR_DUPLICATE_SURFACE_NAME: CudaDuplicateSurfaceNameError,
|
179
|
+
ERROR_DEVICES_UNAVAILABLE: CudaDevicesUnavailableError,
|
180
|
+
ERROR_INVALID_KERNEL_IMAGE: CudaInvalidKernelImageError,
|
181
|
+
ERROR_NO_KERNEL_IMAGE_FOR_DEVICE: CudaNoKernelImageForDeviceError,
|
182
|
+
ERROR_INCOMPATIBLE_DRIVER_CONTEXT: CudaIncompatibleDriverContextError,
|
183
|
+
ERROR_PEER_ACCESS_ALREADY_ENABLED: CudaPeerAccessAlreadyEnabledError,
|
184
|
+
ERROR_PEER_ACCESS_NOT_ENABLED: CudaPeerAccessNotEnabledError,
|
185
|
+
ERROR_DEVICE_ALREADY_IN_USE: CudaDeviceAlreadyInUseError,
|
186
|
+
ERROR_PROFILER_DISABLED: CudaProfilerDisabledError,
|
187
|
+
ERROR_PROFILER_NOT_INITIALIZED: CudaProfilerNotInitializedError,
|
188
|
+
ERROR_PROFILER_ALREADY_STARTED: CudaProfilerAlreadyStartedError,
|
189
|
+
ERROR_PROFILER_ALREADY_STOPPED: CudaProfilerAlreadyStoppedError,
|
190
|
+
ERROR_STARTUP_FAILURE: CudaStartupFailureError,
|
191
|
+
ERROR_API_FAILURE_BASE: CudaAPIFailureBaseError,
|
192
|
+
}
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
end # module
|
197
|
+
end # module
|
@@ -0,0 +1,117 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2010-2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'cuda/runtime/ffi-cuda'
|
26
|
+
require 'cuda/runtime/error'
|
27
|
+
|
28
|
+
|
29
|
+
module SGC
|
30
|
+
module Cuda
|
31
|
+
|
32
|
+
class CudaEvent
|
33
|
+
|
34
|
+
# Create and return an event with _flags_.
|
35
|
+
# @overload create
|
36
|
+
# @overload create(flags)
|
37
|
+
# @return [CudaEvent] An event created with _flags_.
|
38
|
+
def self.create(*flags)
|
39
|
+
flags.empty? == false or flags = :DEFAULT
|
40
|
+
p = FFI::MemoryPointer.new(:CudaEvent)
|
41
|
+
f = CudaEventFlags.value(flags)
|
42
|
+
status = API::cudaEventCreateWithFlags(p, f)
|
43
|
+
Pvt::handle_error(status, "Failed to create event: flags = #{flags}")
|
44
|
+
new(p)
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
# Destroy this event.
|
49
|
+
def destroy
|
50
|
+
status = API::cudaEventDestroy(self.to_api)
|
51
|
+
Pvt::handle_error(status, "Failed to destroy event.")
|
52
|
+
API::write_cudaevent(@pevent, 0)
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
# @return [Boolean] Return true if this event has been recorded. Otherwise, return false.
|
58
|
+
def query
|
59
|
+
status = API::cudaEventQuery(self.to_api)
|
60
|
+
if status == Pvt::CUDA_SUCCESS
|
61
|
+
return true
|
62
|
+
elsif status == Pvt::CUDA_ERROR_NOT_READY
|
63
|
+
return false
|
64
|
+
end
|
65
|
+
Pvt::handle_error(status, "Failed to query event.")
|
66
|
+
raise CudaStandardError, "Error handling fails to catch this error."
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
# Record this event asynchronously on _stream_.
|
71
|
+
# @param [Integer, CudaStream] stream The CUDA stream to record this event on.
|
72
|
+
# Setting _stream_ on anything other than an instance of CudaStream will record on any stream.
|
73
|
+
# @return [CudaEvent] This event.
|
74
|
+
def record(stream = 0)
|
75
|
+
s = Pvt::parse_stream(stream)
|
76
|
+
status = API::cudaEventRecord(self.to_api, s)
|
77
|
+
Pvt::handle_error(status, "Failed to record event.")
|
78
|
+
self
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
# Block the calling CPU thread until this event has been recorded.
|
83
|
+
# @return [CudaEvent] This event.
|
84
|
+
def synchronize
|
85
|
+
status = API::cudaEventSynchronize(self.to_api)
|
86
|
+
Pvt::handle_error(status)
|
87
|
+
self
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
# Compute the elapsed time (ms) from _event_start_ to _event_end_.
|
92
|
+
# @param [CudaEvent] event_start The event corresponds to the start time.
|
93
|
+
# @param [CudaEvent] event_end The event corresponds to the end time.
|
94
|
+
# @return [Numeric] The elapsed time in ms.
|
95
|
+
def self.elapsed_time(event_start, event_end)
|
96
|
+
t = FFI::MemoryPointer.new(:float)
|
97
|
+
API::cudaEventElapsedTime(t, event_start.to_api, event_end.to_api)
|
98
|
+
t.read_float
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
# @private
|
103
|
+
def initialize(ptr)
|
104
|
+
@pevent = ptr
|
105
|
+
end
|
106
|
+
private_class_method :new
|
107
|
+
|
108
|
+
|
109
|
+
# @private
|
110
|
+
def to_api
|
111
|
+
API::read_cudaevent(@pevent)
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
end # module
|
117
|
+
end # module
|
@@ -0,0 +1,588 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2010-2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'ffi'
|
26
|
+
require 'ffi/prettystruct'
|
27
|
+
require 'helpers/interface/ienum'
|
28
|
+
require 'helpers/flags'
|
29
|
+
require 'helpers/klass'
|
30
|
+
|
31
|
+
|
32
|
+
module SGC
|
33
|
+
module Cuda
|
34
|
+
module API
|
35
|
+
|
36
|
+
extend FFI::Library
|
37
|
+
ffi_lib "cudart"
|
38
|
+
|
39
|
+
class Enum
|
40
|
+
extend SGC::Helper::IEnum
|
41
|
+
extend SGC::Helper::FlagsValue
|
42
|
+
|
43
|
+
def self.inherited(subclass)
|
44
|
+
subclass.instance_eval %{
|
45
|
+
def symbols
|
46
|
+
SGC::Cuda::API::#{SGC::Helper.classname(subclass)}.symbols
|
47
|
+
end
|
48
|
+
|
49
|
+
def [](*args)
|
50
|
+
SGC::Cuda::API::#{SGC::Helper.classname(subclass)}[*args]
|
51
|
+
end
|
52
|
+
}
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
CudaError = enum(
|
57
|
+
:SUCCESS, 0,
|
58
|
+
:ERROR_MISSING_CONFIGURATION, 1,
|
59
|
+
:ERROR_MEMORY_ALLOCATION, 2,
|
60
|
+
:ERROR_INITIALIZATION_ERROR, 3,
|
61
|
+
:ERROR_LAUNCH_FAILURE, 4,
|
62
|
+
:ERROR_PRIOR_LAUNCH_FAILURE, 5, # Deprecated.
|
63
|
+
:ERROR_LAUNCH_TIMEOUT, 6,
|
64
|
+
:ERROR_LAUNCH_OUT_OF_RESOURCES, 7,
|
65
|
+
:ERROR_INVALID_DEVICE_FUNCTION, 8,
|
66
|
+
:ERROR_INVALID_CONFIGURATION, 9,
|
67
|
+
:ERROR_INVALID_DEVICE, 10,
|
68
|
+
:ERROR_INVALID_VALUE, 11,
|
69
|
+
:ERROR_INVALID_PITCH_VALUE, 12,
|
70
|
+
:ERROR_INVALID_SYMBOL, 13,
|
71
|
+
:ERROR_MAP_BUFFER_OBJECT_FAILED, 14,
|
72
|
+
:ERROR_UNMAP_BUFFER_OBJECT_FAILED, 15,
|
73
|
+
:ERROR_INVALID_HOST_POINTER, 16,
|
74
|
+
:ERROR_INVALID_DEVICE_POINTER, 17,
|
75
|
+
:ERROR_INVALID_TEXTURE, 18,
|
76
|
+
:ERROR_INVALID_TEXTURE_BINDING, 19,
|
77
|
+
:ERROR_INVALID_CHANNEL_DESCRIPTOR, 20,
|
78
|
+
:ERROR_INVALID_MEMCPY_DIRECTION, 21,
|
79
|
+
:ERROR_ADDRESS_OF_CONSTANT, 22, # Deprecated.
|
80
|
+
:ERROR_TEXTURE_FETCH_FAILED, 23, # Deprecated.
|
81
|
+
:ERROR_TEXTURE_NOT_BOUND, 24, # Deprecated.
|
82
|
+
:ERROR_SYNCHRONIZATION_ERROR, 25, # Deprecated.
|
83
|
+
:ERROR_INVALID_FILTER_SETTING, 26,
|
84
|
+
:ERROR_INVALID_NORM_SETTING, 27,
|
85
|
+
:ERROR_MIXED_DEVICE_EXECUTION, 28, # Deprecated.
|
86
|
+
:ERROR_CUDART_UNLOADING, 29, # Deprecated.
|
87
|
+
:ERROR_UNKNOWN, 30,
|
88
|
+
:ERROR_NOT_YET_IMPLEMENTED, 31,
|
89
|
+
:ERROR_MEMORY_VALUE_TOO_LARGE, 32, # Deprecated.
|
90
|
+
:ERROR_INVALID_RESOURCE_HANDLE, 33,
|
91
|
+
:ERROR_NOT_READY, 34,
|
92
|
+
:ERROR_INSUFFICIENT_DRIVER, 35,
|
93
|
+
:ERROR_SET_ON_ACTIVE_PROCESS, 36,
|
94
|
+
:ERROR_INVALID_SURFACE, 37,
|
95
|
+
:ERROR_NO_DEVICE, 38,
|
96
|
+
:ERROR_ECC_UNCORRECTABLE, 39,
|
97
|
+
:ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, 40,
|
98
|
+
:ERROR_SHARED_OBJECT_INIT_FAILED, 41,
|
99
|
+
:ERROR_UNSUPPORTED_LIMIT, 42,
|
100
|
+
:ERROR_DUPLICATE_VARIABLE_NAME, 43,
|
101
|
+
:ERROR_DUPLICATE_TEXTURE_NAME, 44,
|
102
|
+
:ERROR_DUPLICATE_SURFACE_NAME, 45,
|
103
|
+
:ERROR_DEVICES_UNAVAILABLE, 46,
|
104
|
+
:ERROR_INVALID_KERNEL_IMAGE, 47,
|
105
|
+
:ERROR_NO_KERNEL_IMAGE_FOR_DEVICE, 48,
|
106
|
+
:ERROR_INCOMPATIBLE_DRIVER_CONTEXT, 49,
|
107
|
+
:ERROR_PEER_ACCESS_ALREADY_ENABLED, 50,
|
108
|
+
:ERROR_PEER_ACCESS_NOT_ENABLED, 51,
|
109
|
+
:ERROR_DEVICE_ALREADY_IN_USE, 54,
|
110
|
+
:ERROR_PROFILER_DISABLED, 55,
|
111
|
+
:ERROR_PROFILER_NOT_INITIALIZED, 56,
|
112
|
+
:ERROR_PROFILER_ALREADY_STARTED, 57,
|
113
|
+
:ERROR_PROFILER_ALREADY_STOPPED, 58,
|
114
|
+
:ERROR_STARTUP_FAILURE, 0x7F,
|
115
|
+
:ERROR_API_FAILURE_BASE, 10000,
|
116
|
+
)
|
117
|
+
|
118
|
+
CudaDeviceFlags = enum(
|
119
|
+
:SCHEDULE_AUTO, 0,
|
120
|
+
:SCHEDULE_SPIN, 1,
|
121
|
+
:SCHEDULE_YIELD, 2,
|
122
|
+
:SCHEDULE_BLOCKING_SYNC, 4,
|
123
|
+
:BLOCKING_SYNC, 4, # Deprecated. Use :SCHEDULE_BLOCKING_SYNC.
|
124
|
+
:MAP_HOST, 8,
|
125
|
+
:LMEM_RESIZE_TO_MAX, 16,
|
126
|
+
)
|
127
|
+
|
128
|
+
CudaEventFlags = enum(
|
129
|
+
:DEFAULT, 0,
|
130
|
+
:BLOCKING_SYNC, 1,
|
131
|
+
:DISABLE_TIMING, 2,
|
132
|
+
)
|
133
|
+
|
134
|
+
CudaHostAllocFlags = enum(
|
135
|
+
:DEFAULT, 0,
|
136
|
+
:PORTABLE, 1,
|
137
|
+
:MAPPED, 2,
|
138
|
+
:WRITE_COMBINED, 4,
|
139
|
+
)
|
140
|
+
|
141
|
+
CudaHostRegisterFlags = enum(
|
142
|
+
:DEFAULT, 0,
|
143
|
+
:PORTABLE, 1,
|
144
|
+
:MAPPED, 2,
|
145
|
+
)
|
146
|
+
|
147
|
+
CudaArrayFlags = enum(
|
148
|
+
:DEFAULT, 0x00,
|
149
|
+
:LAYERED, 0x01,
|
150
|
+
:SURFACE_LOAD_STORE, 0x02,
|
151
|
+
)
|
152
|
+
|
153
|
+
CudaMemoryType = enum(
|
154
|
+
:Host, 1,
|
155
|
+
:DEVICE, 2,
|
156
|
+
)
|
157
|
+
|
158
|
+
CudaMemcpyKind = enum(
|
159
|
+
:HOST_TO_HOST, 0,
|
160
|
+
:HOST_TO_DEVICE, 1,
|
161
|
+
:DEVICE_TO_HOST, 2,
|
162
|
+
:DEVICE_TO_DEVICE, 3,
|
163
|
+
:DEFAULT, 4,
|
164
|
+
)
|
165
|
+
|
166
|
+
CudaChannelFormatKind = enum(
|
167
|
+
:SIGNED, 0,
|
168
|
+
:UNSIGNED, 1,
|
169
|
+
:FLOAT, 2,
|
170
|
+
:None,3,
|
171
|
+
)
|
172
|
+
|
173
|
+
CudaFunctionCache = enum(
|
174
|
+
:PREFER_NONE, 0,
|
175
|
+
:PREFER_SHARED, 1,
|
176
|
+
:PREFER_L1, 2,
|
177
|
+
)
|
178
|
+
|
179
|
+
CudaLimit = enum(
|
180
|
+
:STACK_SIZE, 0x00,
|
181
|
+
:PRINTF_FIFO_SIZE, 0x01,
|
182
|
+
:MALLOC_HEAP_SIZE, 0x02,
|
183
|
+
)
|
184
|
+
|
185
|
+
CudaOutputMode = enum(
|
186
|
+
:KEY_VALUE_PAIR, 0x00,
|
187
|
+
:CSV, 0x01,
|
188
|
+
)
|
189
|
+
|
190
|
+
CudaComputeMode = enum(
|
191
|
+
:DEFAULT, 0,
|
192
|
+
:EXCLUSIVE, 1,
|
193
|
+
:PROHIBITED, 2,
|
194
|
+
:EXCLUSIVE_PROCESS, 3,
|
195
|
+
)
|
196
|
+
|
197
|
+
CudaSurfaceBoundaryMode = enum(
|
198
|
+
:ZERO, 0,
|
199
|
+
:CLAMP, 1,
|
200
|
+
:TRAP, 2,
|
201
|
+
)
|
202
|
+
|
203
|
+
CudaSurfaceFormatMode = enum(
|
204
|
+
:FORCED, 0,
|
205
|
+
:AUTO, 1,
|
206
|
+
)
|
207
|
+
|
208
|
+
CudaTextureAddressMode = enum(
|
209
|
+
:WRAP, 0,
|
210
|
+
:CLAMP, 1,
|
211
|
+
:MIRROR, 2,
|
212
|
+
:BORDER, 3,
|
213
|
+
)
|
214
|
+
|
215
|
+
CudaTextureFilterMode = enum(
|
216
|
+
:POINT, 0,
|
217
|
+
:LINEAR, 1,
|
218
|
+
)
|
219
|
+
|
220
|
+
CudaTextureReadMode = enum(
|
221
|
+
:ELEMENT_TYPE, 0,
|
222
|
+
:NORMALIZED_FLOAT, 1,
|
223
|
+
)
|
224
|
+
|
225
|
+
FFI::typedef :int, :enum
|
226
|
+
FFI::typedef :pointer, :CudaStream
|
227
|
+
FFI::typedef :pointer, :CudaEvent
|
228
|
+
|
229
|
+
def read_int(ptr); ptr.read_int; end
|
230
|
+
def read_long(ptr); ptr.read_long; end
|
231
|
+
def read_pointer(ptr); ptr.read_pointer; end
|
232
|
+
|
233
|
+
def write_int(ptr); ptr.write_int; end
|
234
|
+
def write_long(ptr); ptr.write_long; end
|
235
|
+
def write_pointer(ptr, value); ptr.write_pointer(value); end
|
236
|
+
|
237
|
+
alias read_size_t read_long
|
238
|
+
alias read_enum read_int
|
239
|
+
alias read_cudastream read_pointer
|
240
|
+
alias read_cudaevent read_pointer
|
241
|
+
|
242
|
+
alias write_size_t write_long
|
243
|
+
alias write_enum write_int
|
244
|
+
alias write_cudastream write_pointer
|
245
|
+
alias write_cudaevent write_pointer
|
246
|
+
|
247
|
+
module_function :read_size_t
|
248
|
+
module_function :read_enum
|
249
|
+
module_function :read_cudastream
|
250
|
+
module_function :read_cudaevent
|
251
|
+
|
252
|
+
module_function :write_size_t
|
253
|
+
module_function :write_enum
|
254
|
+
module_function :write_cudastream
|
255
|
+
module_function :write_cudaevent
|
256
|
+
|
257
|
+
|
258
|
+
class Dim3 < FFI::Struct
|
259
|
+
layout(
|
260
|
+
:array, [:uint, 3],
|
261
|
+
)
|
262
|
+
|
263
|
+
alias :init :initialize
|
264
|
+
alias :get :[]
|
265
|
+
alias :set :[]=
|
266
|
+
private :init, :get, :set
|
267
|
+
|
268
|
+
def initialize(x, y, z)
|
269
|
+
init
|
270
|
+
@array = get(:array)
|
271
|
+
@array[0], @array[1], @array[2] = x, y, z
|
272
|
+
end
|
273
|
+
|
274
|
+
def [](index); @array[index]; end
|
275
|
+
def []=(index, value); @array[index] = value; end
|
276
|
+
|
277
|
+
def x; @array[0]; end
|
278
|
+
def y; @array[1]; end
|
279
|
+
def z; @array[2]; end
|
280
|
+
|
281
|
+
def x=(value); @array[0] = value; end
|
282
|
+
def y=(value); @array[1] = value; end
|
283
|
+
def z=(value); @array[2] = value; end
|
284
|
+
end
|
285
|
+
|
286
|
+
class CudaDeviceProp < FFI::PrettyStruct
|
287
|
+
layout(
|
288
|
+
:name, [:char, 256],
|
289
|
+
:total_global_mem, :size_t,
|
290
|
+
:shared_mem_per_block, :size_t,
|
291
|
+
:regs_per_block, :int,
|
292
|
+
:warp_size, :int,
|
293
|
+
:mem_pitch, :size_t,
|
294
|
+
:max_threads_per_block, :int,
|
295
|
+
:max_threads_dim, [:int, 3],
|
296
|
+
:max_grid_size, [:int, 3],
|
297
|
+
:clock_rate, :int,
|
298
|
+
:total_const_mem, :size_t,
|
299
|
+
:major, :int,
|
300
|
+
:minor, :int,
|
301
|
+
:texture_alignment, :size_t,
|
302
|
+
:device_overlap, :int, # Deprecated. Use :async_engine_count.
|
303
|
+
:multi_processor_count, :int,
|
304
|
+
:kernel_exec_timeout_enabled, :int,
|
305
|
+
:integrated, :int,
|
306
|
+
:can_map_host_memory, :int,
|
307
|
+
:compute_mode, :int,
|
308
|
+
:max_texture1d, :int,
|
309
|
+
:max_texture2d, [:int, 2],
|
310
|
+
:max_texture3d, [:int, 3],
|
311
|
+
:max_texture1d_layered, [:int, 2],
|
312
|
+
:max_texture2d_layered, [:int, 3],
|
313
|
+
:surface_alignment, :size_t,
|
314
|
+
:concurrent_kernels, :int,
|
315
|
+
:ecc_enabled, :int,
|
316
|
+
:pci_bus_id, :int,
|
317
|
+
:pci_device_id, :int,
|
318
|
+
:tcc_driver, :int,
|
319
|
+
:async_engine_count, :int,
|
320
|
+
:unified_addressing, :int,
|
321
|
+
:memory_clock_rate, :int,
|
322
|
+
:memory_bus_width, :int,
|
323
|
+
:l2_cache_size, :int,
|
324
|
+
:max_threads_per_multi_processor, :int,
|
325
|
+
)
|
326
|
+
end
|
327
|
+
|
328
|
+
class CudaFunctionAttributes < FFI::PrettyStruct
|
329
|
+
layout(
|
330
|
+
:shared_size_bytes, :size_t,
|
331
|
+
:const_size_bytes, :size_t,
|
332
|
+
:local_size_bytes, :size_t,
|
333
|
+
:max_threads_per_block, :int,
|
334
|
+
:num_regs, :int,
|
335
|
+
:ptx_version, :int,
|
336
|
+
:binary_version, :int,
|
337
|
+
)
|
338
|
+
end
|
339
|
+
|
340
|
+
class CudaPointerAttributes < FFI::PrettyStruct
|
341
|
+
layout(
|
342
|
+
:memory_type, CudaMemoryType,
|
343
|
+
:device, :int,
|
344
|
+
:device_pointer, :pointer,
|
345
|
+
:host_pointer, :pointer,
|
346
|
+
)
|
347
|
+
end
|
348
|
+
|
349
|
+
class CudaChannelFormatDesc < FFI::PrettyStruct
|
350
|
+
layout(
|
351
|
+
:x, :int,
|
352
|
+
:y, :int,
|
353
|
+
:z, :int,
|
354
|
+
:w, :int,
|
355
|
+
:f, CudaChannelFormatKind,
|
356
|
+
)
|
357
|
+
end
|
358
|
+
|
359
|
+
class CudaPitchedPtr < FFI::PrettyStruct
|
360
|
+
layout(
|
361
|
+
:ptr, :pointer,
|
362
|
+
:pitch, :size_t,
|
363
|
+
:xsize, :size_t,
|
364
|
+
:ysize, :size_t,
|
365
|
+
)
|
366
|
+
end
|
367
|
+
|
368
|
+
class CudaPos < FFI::PrettyStruct
|
369
|
+
layout(
|
370
|
+
:x, :size_t,
|
371
|
+
:y, :size_t,
|
372
|
+
:z, :size_t,
|
373
|
+
)
|
374
|
+
end
|
375
|
+
|
376
|
+
class CudaExtent < FFI::PrettyStruct
|
377
|
+
layout(
|
378
|
+
:width, :size_t,
|
379
|
+
:height, :size_t,
|
380
|
+
:depth, :size_t,
|
381
|
+
)
|
382
|
+
end
|
383
|
+
|
384
|
+
class CudaMemcpy3DParms < FFI::PrettyStruct
|
385
|
+
layout(
|
386
|
+
:src_array, :pointer,
|
387
|
+
:src_pos, CudaPos,
|
388
|
+
:src_ptr, CudaPitchedPtr,
|
389
|
+
:dst_array, :pointer,
|
390
|
+
:dst_pos, CudaPos,
|
391
|
+
:dst_ptr, CudaPitchedPtr,
|
392
|
+
:extent, CudaExtent,
|
393
|
+
:kind, CudaMemcpyKind,
|
394
|
+
)
|
395
|
+
end
|
396
|
+
|
397
|
+
class CudaMemcpy3DPeerParms < FFI::PrettyStruct
|
398
|
+
layout(
|
399
|
+
:src_array, :pointer,
|
400
|
+
:src_pos, CudaPos,
|
401
|
+
:src_ptr, CudaPitchedPtr,
|
402
|
+
:src_device, :int,
|
403
|
+
:dst_array, :pointer,
|
404
|
+
:dst_pos, CudaPos,
|
405
|
+
:dst_ptr, CudaPitchedPtr,
|
406
|
+
:dst_device, :int,
|
407
|
+
:extent, CudaExtent,
|
408
|
+
)
|
409
|
+
end
|
410
|
+
|
411
|
+
class TextureReference < FFI::PrettyStruct
|
412
|
+
layout(
|
413
|
+
:normalized, :int,
|
414
|
+
:filter_mode, CudaTextureFilterMode,
|
415
|
+
:address_mode, [CudaTextureAddressMode, 3],
|
416
|
+
:channel_desc, CudaChannelFormatDesc,
|
417
|
+
:srgb, :int,
|
418
|
+
:__cuda_reserved, [:int, 15],
|
419
|
+
)
|
420
|
+
end
|
421
|
+
|
422
|
+
class SurfaceReference < FFI::PrettyStruct
|
423
|
+
layout(
|
424
|
+
:channel_desc, CudaChannelFormatDesc,
|
425
|
+
)
|
426
|
+
end
|
427
|
+
|
428
|
+
# CUDA Version Management.
|
429
|
+
attach_function :cudaDriverGetVersion, [:pointer], :enum
|
430
|
+
attach_function :cudaRuntimeGetVersion, [:pointer], :enum
|
431
|
+
|
432
|
+
# CUDA Error Handling.
|
433
|
+
attach_function :cudaGetErrorString, [CudaError], :string
|
434
|
+
attach_function :cudaGetLastError, [], :enum
|
435
|
+
attach_function :cudaPeekAtLastError, [], :enum
|
436
|
+
|
437
|
+
# CUDA Device Management.
|
438
|
+
attach_function :cudaChooseDevice, [:pointer, :pointer], :enum
|
439
|
+
attach_function :cudaDeviceGetCacheConfig, [:pointer], :enum
|
440
|
+
attach_function :cudaDeviceGetLimit, [:pointer, CudaLimit], :enum
|
441
|
+
attach_function :cudaDeviceReset, [], :enum
|
442
|
+
attach_function :cudaDeviceSetCacheConfig, [CudaFunctionCache], :enum
|
443
|
+
attach_function :cudaDeviceSetLimit, [CudaLimit, :size_t], :enum
|
444
|
+
attach_function :cudaDeviceSynchronize, [], :enum
|
445
|
+
attach_function :cudaGetDevice, [:pointer], :enum
|
446
|
+
attach_function :cudaGetDeviceCount, [:pointer], :enum
|
447
|
+
attach_function :cudaGetDeviceProperties, [:pointer, :int], :enum
|
448
|
+
attach_function :cudaSetDevice, [:int], :enum
|
449
|
+
attach_function :cudaSetDeviceFlags, [:uint], :enum
|
450
|
+
attach_function :cudaSetValidDevices, [:pointer, :int], :enum
|
451
|
+
|
452
|
+
# CUDA Thread Management.
|
453
|
+
# Deprecated.
|
454
|
+
attach_function :cudaThreadExit, [], :enum
|
455
|
+
attach_function :cudaThreadGetCacheConfig, [:pointer], :enum
|
456
|
+
attach_function :cudaThreadGetLimit, [:pointer, CudaLimit], :enum
|
457
|
+
attach_function :cudaThreadSetCacheConfig, [CudaFunctionCache], :enum
|
458
|
+
attach_function :cudaThreadSetLimit, [CudaLimit, :size_t], :enum
|
459
|
+
attach_function :cudaThreadSynchronize, [], :enum
|
460
|
+
|
461
|
+
# CUDA Memory Management.
|
462
|
+
attach_function :cudaFree, [:pointer], :enum
|
463
|
+
attach_function :cudaFreeArray, [:pointer], :enum
|
464
|
+
attach_function :cudaFreeHost, [:pointer], :enum
|
465
|
+
attach_function :cudaGetSymbolAddress, [:pointer, :string], :enum
|
466
|
+
attach_function :cudaGetSymbolSize, [:pointer, :string], :enum
|
467
|
+
attach_function :cudaHostAlloc, [:pointer, :size_t, :uint], :enum
|
468
|
+
attach_function :cudaHostGetDevicePointer, [:pointer, :pointer, :uint], :enum
|
469
|
+
attach_function :cudaHostGetFlags, [:pointer, :pointer], :enum
|
470
|
+
attach_function :cudaHostRegister, [:pointer, :size_t, :uint], :enum
|
471
|
+
attach_function :cudaHostUnregister, [:pointer], :enum
|
472
|
+
attach_function :cudaMalloc, [:pointer, :size_t], :enum
|
473
|
+
attach_function :cudaMalloc3D, [:pointer, CudaExtent.by_value], :enum
|
474
|
+
attach_function :cudaMalloc3DArray, [:pointer, :pointer, CudaExtent.by_value, :uint], :enum
|
475
|
+
attach_function :cudaMallocArray, [:pointer, :pointer, :size_t, :size_t, :uint], :enum
|
476
|
+
attach_function :cudaMallocHost, [:pointer, :size_t], :enum
|
477
|
+
attach_function :cudaMallocPitch, [:pointer, :pointer, :size_t, :size_t], :enum
|
478
|
+
attach_function :cudaMemcpy, [:pointer, :pointer, :size_t, CudaMemcpyKind], :enum
|
479
|
+
attach_function :cudaMemcpy2D, [:pointer, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind], :enum
|
480
|
+
attach_function :cudaMemcpy2DArrayToArray, [:pointer, :size_t, :size_t, :pointer, :size_t, :size_t, :size_t, :size_t, CudaMemcpyKind], :enum
|
481
|
+
attach_function :cudaMemcpy2DAsync, [:pointer, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
482
|
+
attach_function :cudaMemcpy2DFromArray, [:pointer, :size_t, :pointer, :size_t, :size_t, :size_t, :size_t, CudaMemcpyKind], :enum
|
483
|
+
attach_function :cudaMemcpy2DFromArrayAsync, [:pointer, :size_t, :pointer, :size_t, :size_t, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
484
|
+
attach_function :cudaMemcpy2DToArray, [:pointer, :size_t, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind], :enum
|
485
|
+
attach_function :cudaMemcpy2DToArrayAsync, [:pointer, :size_t, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
486
|
+
attach_function :cudaMemcpy3D, [:pointer], :enum
|
487
|
+
attach_function :cudaMemcpy3DAsync, [:pointer, :CudaStream], :enum
|
488
|
+
attach_function :cudaMemcpy3DPeer, [:pointer], :enum
|
489
|
+
attach_function :cudaMemcpy3DPeerAsync, [:pointer, :CudaStream], :enum
|
490
|
+
attach_function :cudaMemcpyArrayToArray, [:pointer, :size_t, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind], :enum
|
491
|
+
attach_function :cudaMemcpyAsync, [:pointer, :pointer, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
492
|
+
attach_function :cudaMemcpyFromArray, [:pointer, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind], :enum
|
493
|
+
attach_function :cudaMemcpyFromArrayAsync, [:pointer, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
494
|
+
attach_function :cudaMemcpyFromSymbol, [:pointer, :string, :size_t, :size_t, CudaMemcpyKind], :enum
|
495
|
+
attach_function :cudaMemcpyFromSymbolAsync, [:pointer, :string, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
496
|
+
attach_function :cudaMemcpyPeer, [:pointer, :int, :pointer, :int, :size_t], :enum
|
497
|
+
attach_function :cudaMemcpyPeerAsync, [:pointer, :int, :pointer, :int, :size_t, :CudaStream], :enum
|
498
|
+
attach_function :cudaMemcpyToArray, [:pointer, :size_t, :size_t, :pointer, :size_t, CudaMemcpyKind], :enum
|
499
|
+
attach_function :cudaMemcpyToArrayAsync, [:pointer, :size_t, :size_t, :pointer, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
500
|
+
attach_function :cudaMemcpyToSymbol, [:string, :pointer, :size_t, :size_t, CudaMemcpyKind], :enum
|
501
|
+
attach_function :cudaMemcpyToSymbolAsync, [:string, :pointer, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :enum
|
502
|
+
attach_function :cudaMemGetInfo, [:pointer, :pointer], :enum
|
503
|
+
attach_function :cudaMemset, [:pointer, :int, :size_t], :enum
|
504
|
+
attach_function :cudaMemset2D, [:pointer, :size_t, :int, :size_t, :size_t], :enum
|
505
|
+
attach_function :cudaMemset2DAsync, [:pointer, :size_t, :int, :size_t, :size_t, :CudaStream], :enum
|
506
|
+
attach_function :cudaMemset3D, [CudaPitchedPtr.by_value, :int, CudaExtent.by_value], :enum
|
507
|
+
attach_function :cudaMemset3DAsync, [CudaPitchedPtr.by_value, :int, CudaExtent.by_value, :CudaStream], :enum
|
508
|
+
attach_function :cudaMemsetAsync, [:pointer, :int, :size_t, :CudaStream], :enum
|
509
|
+
# attach_function :make_cudaExtent, [:size_t, :size_t, :size_t], CudaExtent
|
510
|
+
# attach_function :make_cudaPitchedPtr, [:pointer, :size_t, :size_t, :size_t], CudaPitchedPtr
|
511
|
+
# attach_function :make_cudaPos, [:size_t, :size_t, :size_t], CudaPos
|
512
|
+
|
513
|
+
def make_cudaExtent(w, h, d)
|
514
|
+
e = CudaExtent.new
|
515
|
+
e[:width], e[:height], e[:depth] = w, h, d
|
516
|
+
e
|
517
|
+
end
|
518
|
+
module_function :make_cudaExtent
|
519
|
+
|
520
|
+
def make_cudaPitchedPtr(d, p, xsz, ysz)
|
521
|
+
s = CudaPitchedPtr.new
|
522
|
+
s[:ptr] = d
|
523
|
+
s[:pitch] = p
|
524
|
+
s[:xsize] = xsz
|
525
|
+
s[:ysize] = ysz
|
526
|
+
s
|
527
|
+
end
|
528
|
+
module_function :make_cudaPitchedPtr
|
529
|
+
|
530
|
+
def make_cudaPos(x, y, z)
|
531
|
+
p = CudaPos.new
|
532
|
+
p[:x] = x
|
533
|
+
p[:y] = y
|
534
|
+
p[:z] = z
|
535
|
+
p
|
536
|
+
end
|
537
|
+
module_function :make_cudaPos
|
538
|
+
|
539
|
+
# CUDA Unified Addressing.
|
540
|
+
attach_function :cudaPointerGetAttributes, [:pointer, :pointer], :enum
|
541
|
+
|
542
|
+
# CUDA Peer Device Memory Access.
|
543
|
+
attach_function :cudaDeviceCanAccessPeer, [:pointer, :int, :int], :enum
|
544
|
+
attach_function :cudaDeviceDisablePeerAccess, [:int], :enum
|
545
|
+
attach_function :cudaDeviceEnablePeerAccess, [:int, :uint], :enum
|
546
|
+
|
547
|
+
# CUDA Execution Control.
|
548
|
+
attach_function :cudaConfigureCall, [Dim3.by_value, Dim3.by_value, :size_t, :CudaStream], :enum
|
549
|
+
attach_function :cudaFuncGetAttributes, [:pointer, :string], :enum
|
550
|
+
attach_function :cudaFuncSetCacheConfig, [:string, CudaFunctionCache], :enum
|
551
|
+
attach_function :cudaLaunch, [:string], :enum
|
552
|
+
attach_function :cudaSetDoubleForDevice, [:pointer], :enum
|
553
|
+
attach_function :cudaSetDoubleForHost, [:pointer], :enum
|
554
|
+
attach_function :cudaSetupArgument, [:pointer, :size_t, :size_t], :enum
|
555
|
+
|
556
|
+
# CUDA Stream Management.
|
557
|
+
attach_function :cudaStreamCreate, [:pointer], :enum
|
558
|
+
attach_function :cudaStreamDestroy, [:CudaStream], :enum
|
559
|
+
attach_function :cudaStreamQuery, [:CudaStream], :enum
|
560
|
+
attach_function :cudaStreamSynchronize, [:CudaStream], :enum
|
561
|
+
attach_function :cudaStreamWaitEvent, [:CudaStream, :CudaEvent, :uint], :enum
|
562
|
+
|
563
|
+
# CUDA Event Management.
|
564
|
+
attach_function :cudaEventCreate, [:pointer], :enum
|
565
|
+
attach_function :cudaEventCreateWithFlags, [:pointer, :uint], :enum
|
566
|
+
attach_function :cudaEventDestroy, [:CudaEvent], :enum
|
567
|
+
attach_function :cudaEventElapsedTime, [:pointer, :CudaEvent, :CudaEvent], :enum
|
568
|
+
attach_function :cudaEventQuery, [:CudaEvent], :enum
|
569
|
+
attach_function :cudaEventRecord, [:CudaEvent, :CudaStream], :enum
|
570
|
+
attach_function :cudaEventSynchronize, [:CudaEvent], :enum
|
571
|
+
|
572
|
+
# CUDA Texture Reference Management.
|
573
|
+
attach_function :cudaBindTexture, [:pointer, :pointer, :pointer, :pointer, :size_t], :enum
|
574
|
+
attach_function :cudaBindTexture2D, [:pointer, :pointer, :pointer, :pointer, :size_t, :size_t, :size_t], :enum
|
575
|
+
attach_function :cudaBindTextureToArray, [:pointer, :pointer, :pointer], :enum
|
576
|
+
attach_function :cudaCreateChannelDesc, [:int, :int, :int, :int, CudaChannelFormatKind], CudaChannelFormatDesc.by_value
|
577
|
+
attach_function :cudaGetChannelDesc, [:pointer, :pointer], :enum
|
578
|
+
attach_function :cudaGetTextureAlignmentOffset, [:pointer, :pointer], :enum
|
579
|
+
attach_function :cudaGetTextureReference, [:pointer, :string], :enum
|
580
|
+
attach_function :cudaUnbindTexture, [:pointer], :enum
|
581
|
+
|
582
|
+
# CUDA Surface Reference Management.
|
583
|
+
attach_function :cudaBindSurfaceToArray, [:pointer, :pointer, :pointer], :enum
|
584
|
+
attach_function :cudaGetSurfaceReference, [:pointer, :string], :enum
|
585
|
+
|
586
|
+
end # module
|
587
|
+
end # module
|
588
|
+
end # module
|