sgc-ruby-cuda 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +46 -1
- data/doc/features.rdoc +2 -2
- data/lib/cuda/driver/context.rb +25 -0
- data/lib/cuda/driver/cu.rb +16 -7
- data/lib/cuda/driver/device.rb +20 -1
- data/lib/cuda/driver/ffi-cu.rb +102 -53
- data/lib/cuda/driver/function.rb +5 -3
- data/lib/cuda/runtime/cuda.rb +24 -11
- data/lib/cuda/runtime/device.rb +39 -1
- data/lib/cuda/runtime/ffi-cuda.rb +4 -3
- data/lib/cuda/runtime/function.rb +2 -2
- data/lib/ffi/typedef.rb +36 -0
- data/lib/helpers/struct.rb +51 -0
- data/lib/memory/buffer.rb +1 -1
- data/lib/memory/pointer.rb +2 -2
- data/test/rubycu/test_cucontext.rb +24 -0
- data/test/rubycu/test_cudevice.rb +11 -0
- data/test/rubycu/test_cumemory.rb +0 -2
- data/test/rubycuda/test_cudadevice.rb +31 -0
- data/test/rubycuda/testbase.rb +2 -1
- data/version.rb +1 -1
- metadata +5 -3
data/README.rdoc
CHANGED
@@ -54,6 +54,20 @@ interpretors.
|
|
54
54
|
|
55
55
|
== How to get SGC-Ruby-CUDA
|
56
56
|
|
57
|
+
The SGC-Ruby-CUDA Ruby gem is available at rubygems.org and rubyforge.org:
|
58
|
+
|
59
|
+
# Install the gem with the following command.
|
60
|
+
$ gem install sgc-ruby-cuda
|
61
|
+
|
62
|
+
or
|
63
|
+
|
64
|
+
# Download the gem from the following URL and install.
|
65
|
+
http://rubyforge.org/frs/?group_id=9719
|
66
|
+
|
67
|
+
The online documentations are available at the following:
|
68
|
+
|
69
|
+
http://rubydoc.info/github/xman/sgc-ruby-cuda/master/frames
|
70
|
+
|
57
71
|
The SGC-Ruby-CUDA git repository can be found in the following:
|
58
72
|
|
59
73
|
http://github.com/xman/sgc-ruby-cuda
|
@@ -76,12 +90,18 @@ minimize unnecessary fixing commits.
|
|
76
90
|
# Setup the environment. Assuming the CUDA Toolkit is installed in
|
77
91
|
# the default path /usr/local/cuda.
|
78
92
|
|
79
|
-
# For 64bit Linux:
|
93
|
+
# For 64bit Linux only:
|
80
94
|
export CPATH="/usr/local/cuda/include"
|
81
95
|
export LIBRARY_PATH="/usr/local/cuda/lib64"
|
82
96
|
export LD_LIBRARY_PATH="/usr/local/cuda/lib64:$LD_LIBRARY_PATH"
|
83
97
|
export PATH="/usr/local/cuda/bin:$PATH"
|
84
98
|
|
99
|
+
# For Mac OSX only:
|
100
|
+
export CPATH="/usr/local/cuda/include"
|
101
|
+
export LIBRARY_PATH="/usr/local/cuda/lib"
|
102
|
+
export DYLD_LIBRARY_PATH="/usr/local/cuda/lib:$DYLD_LIBRARY_PATH"
|
103
|
+
export PATH="/usr/local/cuda/bin:$PATH"
|
104
|
+
|
85
105
|
gem install ffi
|
86
106
|
cd sgc-ruby-cuda
|
87
107
|
rake test
|
@@ -93,6 +113,31 @@ minimize unnecessary fixing commits.
|
|
93
113
|
# Check out the generated documentations in html/index.html with a browser.
|
94
114
|
|
95
115
|
|
116
|
+
== How to contribute
|
117
|
+
|
118
|
+
1. Create an account in github http://github.com
|
119
|
+
2. Fork this project SGC-Ruby-CUDA at http://github.com/xman/sgc-ruby-cuda
|
120
|
+
3. Make changes and submit commits to your local repository.
|
121
|
+
4. At your local repository, setup and send a pull request.
|
122
|
+
|
123
|
+
Ideally, a commit should do only one task. We like clean commits.
|
124
|
+
Commits should be applied to the master branch.
|
125
|
+
|
126
|
+
|
127
|
+
== How to file a bug report
|
128
|
+
|
129
|
+
Submit new issues at https://github.com/xman/sgc-ruby-cuda/issues
|
130
|
+
|
131
|
+
Please provide information on the SGC-Ruby-CUDA version or the commit hash,
|
132
|
+
OS platform, Ruby version, FFI version, how to reproduce the issue, the error
|
133
|
+
messages.
|
134
|
+
|
135
|
+
|
136
|
+
== Mailing list
|
137
|
+
|
138
|
+
http://groups.google.com/group/sgc-ruby-cuda
|
139
|
+
|
140
|
+
|
96
141
|
== License
|
97
142
|
|
98
143
|
SGC-Ruby-CUDA is released under the GNU GPLv3. See the file COPYING.
|
data/doc/features.rdoc
CHANGED
@@ -12,7 +12,7 @@
|
|
12
12
|
Module Management +
|
13
13
|
Memory Management +
|
14
14
|
Unified Addressing -
|
15
|
-
Peer Context Memory Access
|
15
|
+
Peer Context Memory Access *
|
16
16
|
Execution Control +
|
17
17
|
Stream Management *
|
18
18
|
Event Management *
|
@@ -40,7 +40,7 @@
|
|
40
40
|
Thread Management *
|
41
41
|
Memory Management +
|
42
42
|
Unified Addressing -
|
43
|
-
Peer Device Memory Access
|
43
|
+
Peer Device Memory Access *
|
44
44
|
Execution Control +
|
45
45
|
Stream Management *
|
46
46
|
Event Management *
|
data/lib/cuda/driver/context.rb
CHANGED
@@ -218,6 +218,31 @@ class CUContext
|
|
218
218
|
end
|
219
219
|
|
220
220
|
|
221
|
+
# Enable the current context to access the memory of the peer context.
|
222
|
+
# @param [CUContext] peer_context The peer context's memory to be accessed.
|
223
|
+
# @param [Integer] flags Currently flags must be set to zero.
|
224
|
+
# @return [Class] This class.
|
225
|
+
#
|
226
|
+
# @since CUDA 4.0
|
227
|
+
def self.enable_peer_access(peer_context, flags = 0)
|
228
|
+
status = API::cuCtxEnablePeerAccess(peer_context.to_api, flags)
|
229
|
+
Pvt::handle_error(status, "Failed to enable peer access: flags = #{flags}.")
|
230
|
+
self
|
231
|
+
end
|
232
|
+
|
233
|
+
|
234
|
+
# Disable the current context from accessing the memory of the peer context.
|
235
|
+
# @param [CUContext] peer_context The peer context.
|
236
|
+
# @return [Class] This class.
|
237
|
+
#
|
238
|
+
# @since CUDA 4.0
|
239
|
+
def self.disable_peer_access(peer_context)
|
240
|
+
status = API::cuCtxDisablePeerAccess(peer_context.to_api)
|
241
|
+
Pvt::handle_error(status, "Failed to disable peer access.")
|
242
|
+
self
|
243
|
+
end
|
244
|
+
|
245
|
+
|
221
246
|
# @private
|
222
247
|
def initialize(ptr)
|
223
248
|
@pcontext = ptr
|
data/lib/cuda/driver/cu.rb
CHANGED
@@ -22,8 +22,10 @@
|
|
22
22
|
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
23
|
#
|
24
24
|
|
25
|
+
require 'delegate'
|
25
26
|
require 'cuda/driver/ffi-cu'
|
26
27
|
require 'memory/buffer'
|
28
|
+
require 'helpers/struct'
|
27
29
|
|
28
30
|
|
29
31
|
module SGC
|
@@ -42,19 +44,26 @@ module CU
|
|
42
44
|
class CUAddressMode < API::Enum; end # @see API::CUAddressMode
|
43
45
|
class CUFilterMode < API::Enum; end # @see API::CUFilterMode
|
44
46
|
class CUTexRefFlags < API::Enum; end # @see API::CUTexRefFlags
|
45
|
-
class CUArrayFormat < API::Enum; end
|
47
|
+
class CUArrayFormat < API::Enum; end # @see API::CUArrayFormat
|
46
48
|
class CUMemoryType < API::Enum; end # @see API::CUMemoryType
|
47
49
|
class CUPointerAttribute < API::Enum; end # @see API::CUPointerAttribute
|
48
50
|
class CUJitOption < API::Enum; end # @see API::CUJitOption
|
49
51
|
class CUJitFallBack < API::Enum; end # @see API::CUJitFallBack
|
50
52
|
class CUJitTarget < API::Enum; end # @see API::CUJitTarget
|
51
53
|
|
52
|
-
class CUDevProp < API::CUDevProp; end
|
53
|
-
class CudaMemcpy2D < API::CudaMemcpy2D; end
|
54
|
-
class CudaMemcpy3D < API::CudaMemcpy3D; end
|
55
|
-
class CudaMemcpy3DPeer < API::CudaMemcpy3DPeer; end
|
56
|
-
class CudaArrayDescriptor < API::CudaArrayDescriptor; end
|
57
|
-
class CudaArray3DDescriptor < API::CudaArray3DDescriptor; end
|
54
|
+
class CUDevProp < DelegateClass(API::CUDevProp); end # See {file:lib/cuda/driver/ffi-cu.rb}
|
55
|
+
class CudaMemcpy2D < DelegateClass(API::CudaMemcpy2D); end # See {file:lib/cuda/driver/ffi-cu.rb}
|
56
|
+
class CudaMemcpy3D < DelegateClass(API::CudaMemcpy3D); end # See {file:lib/cuda/driver/ffi-cu.rb}
|
57
|
+
class CudaMemcpy3DPeer < DelegateClass(API::CudaMemcpy3DPeer); end # See {file:lib/cuda/driver/ffi-cu.rb}
|
58
|
+
class CudaArrayDescriptor < DelegateClass(API::CudaArrayDescriptor); end # See {file:lib/cuda/driver/ffi-cu.rb}
|
59
|
+
class CudaArray3DDescriptor < DelegateClass(API::CudaArray3DDescriptor); end # See {file:lib/cuda/driver/ffi-cu.rb}
|
60
|
+
|
61
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CUDevProp, API::CUDevProp)
|
62
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CudaMemcpy2D, API::CudaMemcpy2D)
|
63
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CudaMemcpy3D, API::CudaMemcpy3D)
|
64
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CudaMemcpy3DPeer, API::CudaMemcpy3DPeer)
|
65
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CudaArrayDescriptor, API::CudaArrayDescriptor)
|
66
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CudaArray3DDescriptor, API::CudaArray3DDescriptor)
|
58
67
|
|
59
68
|
end # module
|
60
69
|
end # module
|
data/lib/cuda/driver/device.rb
CHANGED
@@ -25,6 +25,7 @@
|
|
25
25
|
require 'cuda/driver/ffi-cu'
|
26
26
|
require 'cuda/driver/cu'
|
27
27
|
require 'cuda/driver/error'
|
28
|
+
require 'cuda/driver/context'
|
28
29
|
|
29
30
|
|
30
31
|
module SGC
|
@@ -99,7 +100,7 @@ class CUDevice
|
|
99
100
|
# * :texture_align
|
100
101
|
# * :total_constant_memory
|
101
102
|
def properties
|
102
|
-
prop = CUDevProp.new
|
103
|
+
prop = API::CUDevProp.new
|
103
104
|
status = API::cuDeviceGetProperties(prop.to_ptr, self.to_api)
|
104
105
|
Pvt::handle_error(status, "Failed to get device properties.")
|
105
106
|
h = {}
|
@@ -137,6 +138,24 @@ class CUDevice
|
|
137
138
|
end
|
138
139
|
|
139
140
|
|
141
|
+
# @param [CUDevice] dev The device which is to access the memory of the device _peer_dev_.
|
142
|
+
# @param [CUDevice] peer_dev The device which its memory is to be accessed by the device _dev_.
|
143
|
+
# @return [Boolean] True if device _dev_ may directly access the memory of device _peer_dev_.
|
144
|
+
#
|
145
|
+
# @since CUDA 4.0
|
146
|
+
def self.can_access_peer?(dev, peer_dev = nil)
|
147
|
+
# TODO: Remove the following workaround for JRuby when the default argument bug is fixed.
|
148
|
+
if peer_dev.nil?
|
149
|
+
peer_dev = dev
|
150
|
+
dev = CUContext.device
|
151
|
+
end
|
152
|
+
b = FFI::MemoryPointer.new(:int)
|
153
|
+
status = API::cuDeviceCanAccessPeer(b, dev.to_api, peer_dev.to_api)
|
154
|
+
Pvt::handle_error(status, "Failed to query can access peer.")
|
155
|
+
b.read_int == 1 ? true : false
|
156
|
+
end
|
157
|
+
|
158
|
+
|
140
159
|
# @private
|
141
160
|
def initialize(ptr)
|
142
161
|
@pdev = ptr
|
data/lib/cuda/driver/ffi-cu.rb
CHANGED
@@ -24,6 +24,7 @@
|
|
24
24
|
|
25
25
|
require 'ffi'
|
26
26
|
require 'ffi/prettystruct'
|
27
|
+
require 'ffi/typedef'
|
27
28
|
require 'helpers/interface/ienum'
|
28
29
|
require 'helpers/flags'
|
29
30
|
require 'helpers/klass'
|
@@ -268,7 +269,7 @@ module API
|
|
268
269
|
|
269
270
|
FFI::typedef :int, :enum
|
270
271
|
FFI::typedef :int, :CUDevice
|
271
|
-
FFI::typedef :
|
272
|
+
FFI::typedef :ulong_long, :CUDevicePtr
|
272
273
|
FFI::typedef :pointer, :CUContext
|
273
274
|
FFI::typedef :pointer, :CUModule
|
274
275
|
FFI::typedef :pointer, :CUFunction
|
@@ -280,16 +281,18 @@ module API
|
|
280
281
|
|
281
282
|
def read_int(ptr); ptr.read_int; end
|
282
283
|
def read_long(ptr); ptr.read_long; end
|
284
|
+
def read_ulong_long(ptr); ptr.read_ulong_long; end
|
283
285
|
def read_pointer(ptr); ptr.read_pointer; end
|
284
286
|
|
285
|
-
def write_int(ptr); ptr.write_int; end
|
286
|
-
def write_long(ptr); ptr.write_long; end
|
287
|
-
def
|
287
|
+
def write_int(ptr, value); ptr.write_int(value); end
|
288
|
+
def write_long(ptr, value); ptr.write_long(value); end
|
289
|
+
def write_ulong_long(ptr, value); ptr.write_ulong_long(value); end
|
290
|
+
def write_pointer(ptr, value); ptr.write_long(value.to_i); end
|
288
291
|
|
289
292
|
alias read_size_t read_long
|
290
293
|
alias read_enum read_int
|
291
294
|
alias read_cudevice read_int
|
292
|
-
alias read_cudeviceptr
|
295
|
+
alias read_cudeviceptr read_ulong_long
|
293
296
|
alias read_cucontext read_pointer
|
294
297
|
alias read_cumodule read_pointer
|
295
298
|
alias read_cufunction read_pointer
|
@@ -302,7 +305,7 @@ module API
|
|
302
305
|
alias write_size_t write_long
|
303
306
|
alias write_enum write_int
|
304
307
|
alias write_cudevice write_int
|
305
|
-
alias write_cudeviceptr
|
308
|
+
alias write_cudeviceptr write_ulong_long
|
306
309
|
alias write_cucontext write_pointer
|
307
310
|
alias write_cumodule write_pointer
|
308
311
|
alias write_cufunction write_pointer
|
@@ -468,92 +471,132 @@ module API
|
|
468
471
|
attach_function :cuDeviceGetCount, [:pointer], :enum
|
469
472
|
attach_function :cuDeviceGetName, [:pointer, :int, :CUDevice], :enum
|
470
473
|
attach_function :cuDeviceGetProperties, [:pointer, :CUDevice], :enum
|
471
|
-
attach_function :
|
474
|
+
attach_function :cuDeviceTotalMem_v2, [:pointer, :CUDevice], :enum
|
475
|
+
class << self; alias_method :cuDeviceTotalMem, :cuDeviceTotalMem_v2; end
|
472
476
|
|
473
477
|
# CU Context Management.
|
474
|
-
attach_function :
|
475
|
-
attach_function :
|
478
|
+
attach_function :cuCtxCreate_v2, [:pointer, :uint, :CUDevice], :enum
|
479
|
+
attach_function :cuCtxDestroy_v2, [:CUContext], :enum
|
476
480
|
attach_function :cuCtxGetApiVersion, [:CUContext, :pointer], :enum
|
477
481
|
attach_function :cuCtxGetCacheConfig, [:pointer], :enum
|
478
482
|
attach_function :cuCtxGetCurrent, [:pointer], :enum
|
479
483
|
attach_function :cuCtxGetDevice, [:pointer], :enum
|
480
484
|
attach_function :cuCtxGetLimit, [:pointer, CULimit], :enum
|
481
|
-
attach_function :
|
482
|
-
attach_function :
|
485
|
+
attach_function :cuCtxPopCurrent_v2, [:pointer], :enum
|
486
|
+
attach_function :cuCtxPushCurrent_v2, [:CUContext], :enum
|
483
487
|
attach_function :cuCtxSetCacheConfig, [CUFunctionCache], :enum
|
484
488
|
attach_function :cuCtxSetCurrent, [:CUContext], :enum
|
485
489
|
attach_function :cuCtxSetLimit, [CULimit, :size_t], :enum
|
486
490
|
attach_function :cuCtxSynchronize, [], :enum
|
491
|
+
class << self; alias_method :cuCtxDestroy, :cuCtxDestroy_v2; end
|
492
|
+
class << self; alias_method :cuCtxCreate, :cuCtxCreate_v2; end
|
493
|
+
class << self; alias_method :cuCtxPopCurrent, :cuCtxPopCurrent_v2; end
|
494
|
+
class << self; alias_method :cuCtxPushCurrent, :cuCtxPushCurrent_v2; end
|
487
495
|
# Deprecated.
|
488
496
|
attach_function :cuCtxAttach, [:pointer, :uint], :enum
|
489
497
|
attach_function :cuCtxDetach, [:CUContext], :enum
|
490
498
|
|
491
499
|
# CU Memory Management.
|
492
|
-
attach_function :
|
493
|
-
attach_function :
|
494
|
-
attach_function :
|
500
|
+
attach_function :cuArray3DCreate_v2, [:pointer, :pointer], :enum
|
501
|
+
attach_function :cuArray3DGetDescriptor_v2, [:pointer, :CUArray], :enum
|
502
|
+
attach_function :cuArrayCreate_v2, [:pointer, :pointer], :enum
|
495
503
|
attach_function :cuArrayDestroy, [:CUArray], :enum
|
496
|
-
attach_function :
|
497
|
-
attach_function :
|
498
|
-
attach_function :
|
499
|
-
attach_function :
|
504
|
+
attach_function :cuArrayGetDescriptor_v2, [:pointer, :CUArray], :enum
|
505
|
+
attach_function :cuMemAlloc_v2, [:pointer, :size_t], :enum
|
506
|
+
attach_function :cuMemAllocHost_v2, [:pointer, :size_t], :enum
|
507
|
+
attach_function :cuMemAllocPitch_v2, [:pointer, :pointer, :size_t, :size_t, :uint], :enum
|
500
508
|
attach_function :cuMemcpy, [:CUDevicePtr, :CUDevicePtr, :size_t], :enum
|
501
|
-
attach_function :
|
502
|
-
attach_function :
|
503
|
-
attach_function :
|
504
|
-
attach_function :
|
505
|
-
attach_function :
|
509
|
+
attach_function :cuMemcpy2D_v2, [:pointer], :enum
|
510
|
+
attach_function :cuMemcpy2DAsync_v2, [:pointer, :CUStream], :enum
|
511
|
+
attach_function :cuMemcpy2DUnaligned_v2, [:pointer], :enum
|
512
|
+
attach_function :cuMemcpy3D_v2, [:pointer], :enum
|
513
|
+
attach_function :cuMemcpy3DAsync_v2, [:pointer, :CUStream], :enum
|
506
514
|
attach_function :cuMemcpy3DPeer, [:pointer], :enum
|
507
515
|
attach_function :cuMemcpy3DPeerAsync, [:pointer, :CUStream], :enum
|
508
516
|
attach_function :cuMemcpyAsync, [:CUDevicePtr, :CUDevicePtr, :size_t, :CUStream], :enum
|
509
|
-
attach_function :
|
510
|
-
attach_function :
|
511
|
-
attach_function :
|
512
|
-
attach_function :
|
513
|
-
attach_function :
|
514
|
-
attach_function :
|
515
|
-
attach_function :
|
516
|
-
attach_function :
|
517
|
-
attach_function :
|
518
|
-
attach_function :
|
519
|
-
attach_function :
|
520
|
-
attach_function :
|
521
|
-
attach_function :
|
517
|
+
attach_function :cuMemcpyAtoA_v2, [:CUArray, :size_t, :CUArray, :size_t, :size_t], :enum
|
518
|
+
attach_function :cuMemcpyAtoD_v2, [:CUDevicePtr, :CUArray, :size_t, :size_t], :enum
|
519
|
+
attach_function :cuMemcpyAtoH_v2, [:pointer, :CUArray, :size_t, :size_t], :enum
|
520
|
+
attach_function :cuMemcpyAtoHAsync_v2, [:pointer, :CUArray, :size_t, :size_t, :CUStream], :enum
|
521
|
+
attach_function :cuMemcpyDtoA_v2, [:CUArray, :size_t, :CUDevicePtr, :size_t], :enum
|
522
|
+
attach_function :cuMemcpyDtoD_v2, [:CUDevicePtr, :CUDevicePtr, :size_t], :enum
|
523
|
+
attach_function :cuMemcpyDtoDAsync_v2, [:CUDevicePtr, :CUDevicePtr, :size_t, :CUStream], :enum
|
524
|
+
attach_function :cuMemcpyDtoH_v2, [:pointer, :CUDevicePtr, :size_t], :enum
|
525
|
+
attach_function :cuMemcpyDtoHAsync_v2, [:pointer, :CUDevicePtr, :size_t, :CUStream], :enum
|
526
|
+
attach_function :cuMemcpyHtoA_v2, [:CUArray, :size_t, :pointer, :size_t], :enum
|
527
|
+
attach_function :cuMemcpyHtoAAsync_v2, [:CUArray, :size_t, :pointer, :size_t, :CUStream], :enum
|
528
|
+
attach_function :cuMemcpyHtoD_v2, [:CUDevicePtr, :pointer, :size_t], :enum
|
529
|
+
attach_function :cuMemcpyHtoDAsync_v2, [:CUDevicePtr, :pointer, :size_t, :CUStream], :enum
|
522
530
|
attach_function :cuMemcpyPeer, [:CUDevicePtr, :CUContext, :CUDevicePtr, :CUContext, :size_t], :enum
|
523
531
|
attach_function :cuMemcpyPeerAsync, [:CUDevicePtr, :CUContext, :CUDevicePtr, :CUContext, :size_t, :CUStream], :enum
|
524
|
-
attach_function :
|
532
|
+
attach_function :cuMemFree_v2, [:CUDevicePtr], :enum
|
525
533
|
attach_function :cuMemFreeHost, [:pointer], :enum
|
526
|
-
attach_function :
|
527
|
-
attach_function :
|
534
|
+
attach_function :cuMemGetAddressRange_v2, [:pointer, :pointer, :CUDevicePtr], :enum
|
535
|
+
attach_function :cuMemGetInfo_v2, [:pointer, :pointer], :enum
|
528
536
|
attach_function :cuMemHostAlloc, [:pointer, :size_t, :uint], :enum
|
529
|
-
attach_function :
|
537
|
+
attach_function :cuMemHostGetDevicePointer_v2, [:pointer, :pointer, :uint], :enum
|
530
538
|
attach_function :cuMemHostGetFlags, [:pointer, :pointer], :enum
|
531
539
|
attach_function :cuMemHostRegister, [:pointer, :size_t, :uint], :enum
|
532
540
|
attach_function :cuMemHostUnregister, [:pointer], :enum
|
533
|
-
attach_function :
|
541
|
+
attach_function :cuMemsetD16_v2, [:CUDevicePtr, :ushort, :size_t], :enum
|
534
542
|
attach_function :cuMemsetD16Async, [:CUDevicePtr, :ushort, :size_t, :CUStream], :enum
|
535
|
-
attach_function :
|
543
|
+
attach_function :cuMemsetD2D16_v2, [:CUDevicePtr, :size_t, :ushort, :size_t, :size_t], :enum
|
536
544
|
attach_function :cuMemsetD2D16Async, [:CUDevicePtr, :size_t, :ushort, :size_t, :size_t, :CUStream], :enum
|
537
|
-
attach_function :
|
545
|
+
attach_function :cuMemsetD2D32_v2, [:CUDevicePtr, :size_t, :uint, :size_t, :size_t], :enum
|
538
546
|
attach_function :cuMemsetD2D32Async, [:CUDevicePtr, :size_t, :uint, :size_t, :size_t, :CUStream], :enum
|
539
|
-
attach_function :
|
547
|
+
attach_function :cuMemsetD2D8_v2, [:CUDevicePtr, :size_t, :uchar, :size_t, :size_t], :enum
|
540
548
|
attach_function :cuMemsetD2D8Async, [:CUDevicePtr, :size_t, :uchar, :size_t, :size_t, :CUStream], :enum
|
541
|
-
attach_function :
|
549
|
+
attach_function :cuMemsetD32_v2, [:CUDevicePtr, :uint, :size_t], :enum
|
542
550
|
attach_function :cuMemsetD32Async, [:CUDevicePtr, :uint, :size_t, :CUStream], :enum
|
543
|
-
attach_function :
|
551
|
+
attach_function :cuMemsetD8_v2, [:CUDevicePtr, :uchar, :size_t], :enum
|
544
552
|
attach_function :cuMemsetD8Async, [:CUDevicePtr, :uchar, :size_t, :CUStream], :enum
|
553
|
+
class << self; alias_method :cuArray3DCreate, :cuArray3DCreate_v2; end
|
554
|
+
class << self; alias_method :cuArray3DGetDescriptor, :cuArray3DGetDescriptor_v2; end
|
555
|
+
class << self; alias_method :cuArrayCreate, :cuArrayCreate_v2; end
|
556
|
+
class << self; alias_method :cuArrayGetDescriptor, :cuArrayGetDescriptor_v2; end
|
557
|
+
class << self; alias_method :cuMemAlloc, :cuMemAlloc_v2; end
|
558
|
+
class << self; alias_method :cuMemAllocHost, :cuMemAllocHost_v2; end
|
559
|
+
class << self; alias_method :cuMemAllocPitch, :cuMemAllocPitch_v2; end
|
560
|
+
class << self; alias_method :cuMemcpy2D, :cuMemcpy2D_v2; end
|
561
|
+
class << self; alias_method :cuMemcpy2DAsync, :cuMemcpy2DAsync_v2 ; end
|
562
|
+
class << self; alias_method :cuMemcpy2DUnaligned, :cuMemcpy2DUnaligned_v2; end
|
563
|
+
class << self; alias_method :cuMemcpy3D, :cuMemcpy3D_v2; end
|
564
|
+
class << self; alias_method :cuMemcpy3DAsync, :cuMemcpy3DAsync_v2; end
|
565
|
+
class << self; alias_method :cuMemcpyAtoA, :cuMemcpyAtoA_v2; end
|
566
|
+
class << self; alias_method :cuMemcpyAtoD, :cuMemcpyAtoD_v2; end
|
567
|
+
class << self; alias_method :cuMemcpyAtoH, :cuMemcpyAtoH_v2; end
|
568
|
+
class << self; alias_method :cuMemcpyAtoHAsync, :cuMemcpyAtoHAsync_v2; end
|
569
|
+
class << self; alias_method :cuMemcpyDtoA, :cuMemcpyDtoA_v2; end
|
570
|
+
class << self; alias_method :cuMemcpyDtoD, :cuMemcpyDtoD_v2; end
|
571
|
+
class << self; alias_method :cuMemcpyDtoDAsync, :cuMemcpyDtoDAsync_v2; end
|
572
|
+
class << self; alias_method :cuMemcpyDtoH, :cuMemcpyDtoH_v2; end
|
573
|
+
class << self; alias_method :cuMemcpyDtoHAsync, :cuMemcpyDtoHAsync_v2; end
|
574
|
+
class << self; alias_method :cuMemcpyHtoA, :cuMemcpyHtoA_v2; end
|
575
|
+
class << self; alias_method :cuMemcpyHtoAAsync, :cuMemcpyHtoAAsync_v2; end
|
576
|
+
class << self; alias_method :cuMemcpyHtoD, :cuMemcpyHtoD_v2; end
|
577
|
+
class << self; alias_method :cuMemcpyHtoDAsync, :cuMemcpyHtoDAsync_v2; end
|
578
|
+
class << self; alias_method :cuMemFree, :cuMemFree_v2; end
|
579
|
+
class << self; alias_method :cuMemGetAddressRange, :cuMemGetAddressRange_v2; end
|
580
|
+
class << self; alias_method :cuMemGetInfo, :cuMemGetInfo_v2; end
|
581
|
+
class << self; alias_method :cuMemHostGetDevicePointer, :cuMemHostGetDevicePointer_v2; end
|
582
|
+
class << self; alias_method :cuMemsetD16, :cuMemsetD16_v2; end
|
583
|
+
class << self; alias_method :cuMemsetD2D16, :cuMemsetD2D16_v2; end
|
584
|
+
class << self; alias_method :cuMemsetD2D32, :cuMemsetD2D32_v2; end
|
585
|
+
class << self; alias_method :cuMemsetD2D8, :cuMemsetD2D8_v2; end
|
586
|
+
class << self; alias_method :cuMemsetD32, :cuMemsetD32_v2; end
|
587
|
+
class << self; alias_method :cuMemsetD8, :cuMemsetD8_v2; end
|
545
588
|
|
546
589
|
# CU Unified Addressing.
|
547
590
|
attach_function :cuPointerGetAttribute, [:pointer, CUPointerAttribute, :CUDevicePtr], :enum
|
548
591
|
|
549
592
|
# CU Peer Context Memory Access.
|
550
593
|
attach_function :cuCtxDisablePeerAccess, [:CUContext], :enum
|
551
|
-
attach_function :cuCtxEnablePeerAccess, [:CUContext], :enum
|
594
|
+
attach_function :cuCtxEnablePeerAccess, [:CUContext, :uint], :enum
|
552
595
|
attach_function :cuDeviceCanAccessPeer, [:pointer, :CUDevice, :CUDevice], :enum
|
553
596
|
|
554
597
|
# CU Module Management.
|
555
598
|
attach_function :cuModuleGetFunction, [:pointer, :CUModule, :string], :enum
|
556
|
-
attach_function :
|
599
|
+
attach_function :cuModuleGetGlobal_v2, [:pointer, :pointer, :CUModule, :string], :enum
|
557
600
|
attach_function :cuModuleGetSurfRef, [:pointer, :CUModule, :string], :enum
|
558
601
|
attach_function :cuModuleGetTexRef, [:pointer, :CUModule, :string], :enum
|
559
602
|
attach_function :cuModuleLoad, [:pointer, :string], :enum
|
@@ -561,6 +604,7 @@ module API
|
|
561
604
|
attach_function :cuModuleLoadDataEx, [:pointer, :pointer, :uint, :pointer, :pointer], :enum
|
562
605
|
attach_function :cuModuleLoadFatBinary, [:pointer, :pointer], :enum
|
563
606
|
attach_function :cuModuleUnload, [:CUModule], :enum
|
607
|
+
class << self; alias_method :cuModuleGetGlobal, :cuModuleGetGlobal_v2; end
|
564
608
|
|
565
609
|
# CU Execution Control.
|
566
610
|
attach_function :cuFuncGetAttribute, [:pointer, CUFunctionAttribute, :CUFunction], :enum
|
@@ -580,33 +624,38 @@ module API
|
|
580
624
|
|
581
625
|
# CU Stream Management.
|
582
626
|
attach_function :cuStreamCreate, [:pointer, :uint], :enum
|
583
|
-
attach_function :
|
627
|
+
attach_function :cuStreamDestroy_v2, [:CUStream], :enum
|
584
628
|
attach_function :cuStreamQuery, [:CUStream], :enum
|
585
629
|
attach_function :cuStreamSynchronize, [:CUStream], :enum
|
586
630
|
attach_function :cuStreamWaitEvent, [:CUStream, :CUEvent, :uint], :enum
|
631
|
+
class << self; alias_method :cuStreamDestroy, :cuStreamDestroy_v2; end
|
587
632
|
|
588
633
|
# CU Event Management.
|
589
634
|
attach_function :cuEventCreate, [:pointer, :uint], :enum
|
590
|
-
attach_function :
|
635
|
+
attach_function :cuEventDestroy_v2, [:CUEvent], :enum
|
591
636
|
attach_function :cuEventElapsedTime, [:pointer, :CUEvent, :CUEvent], :enum
|
592
637
|
attach_function :cuEventQuery, [:CUEvent], :enum
|
593
638
|
attach_function :cuEventRecord, [:CUEvent, :CUStream], :enum
|
594
639
|
attach_function :cuEventSynchronize, [:CUEvent], :enum
|
640
|
+
class << self; alias_method :cuEventDestroy, :cuEventDestroy_v2; end
|
595
641
|
|
596
642
|
# CU Texture Reference Management.
|
597
|
-
attach_function :
|
643
|
+
attach_function :cuTexRefGetAddress_v2, [:pointer, :CUTexRef], :enum
|
598
644
|
attach_function :cuTexRefGetAddressMode, [:pointer, :CUTexRef, :int], :enum
|
599
645
|
attach_function :cuTexRefGetArray, [:pointer, :CUTexRef], :enum
|
600
646
|
attach_function :cuTexRefGetFilterMode, [:pointer, :CUTexRef], :enum
|
601
647
|
attach_function :cuTexRefGetFlags, [:pointer, :CUTexRef], :enum
|
602
648
|
attach_function :cuTexRefGetFormat, [:pointer, :pointer, :CUTexRef], :enum
|
603
|
-
attach_function :
|
604
|
-
attach_function :
|
649
|
+
attach_function :cuTexRefSetAddress_v2, [:pointer, :CUTexRef, :CUDevicePtr, :size_t], :enum
|
650
|
+
attach_function :cuTexRefSetAddress2D_v2, [:CUTexRef, :pointer, :CUDevicePtr, :size_t], :enum
|
605
651
|
attach_function :cuTexRefSetAddressMode, [:CUTexRef, :int, CUAddressMode], :enum
|
606
652
|
attach_function :cuTexRefSetArray, [:CUTexRef, :CUArray, :uint], :enum
|
607
653
|
attach_function :cuTexRefSetFilterMode, [:CUTexRef, CUFilterMode], :enum
|
608
654
|
attach_function :cuTexRefSetFlags, [:CUTexRef, :uint], :enum
|
609
655
|
attach_function :cuTexRefSetFormat, [:CUTexRef, CUArrayFormat, :int], :enum
|
656
|
+
class << self; alias_method :cuTexRefGetAddress, :cuTexRefGetAddress_v2; end
|
657
|
+
class << self; alias_method :cuTexRefSetAddress, :cuTexRefSetAddress_v2; end
|
658
|
+
class << self; alias_method :cuTexRefSetAddress2D, :cuTexRefSetAddress2D_v2; end
|
610
659
|
# Deprecated.
|
611
660
|
attach_function :cuTexRefCreate, [:pointer], :enum
|
612
661
|
attach_function :cuTexRefDestroy, [:CUTexRef], :enum
|
data/lib/cuda/driver/function.rb
CHANGED
@@ -49,7 +49,7 @@ class CUFunction
|
|
49
49
|
size = 4
|
50
50
|
when CUDevicePtr
|
51
51
|
p = FFI::MemoryPointer.new(:CUDevicePtr)
|
52
|
-
API::write_cudeviceptr(p, x.to_api
|
52
|
+
API::write_cudeviceptr(p, x.to_api)
|
53
53
|
size = p.size
|
54
54
|
else
|
55
55
|
raise TypeError, "Invalid type of argument #{x.to_s}."
|
@@ -99,7 +99,9 @@ class CUFunction
|
|
99
99
|
# @param [Integer] nbytes The size of the arbitrary data in bytes.
|
100
100
|
# @return [CUFunction] This function.
|
101
101
|
def param_setv(offset, ptr, nbytes)
|
102
|
-
|
102
|
+
p = FFI::MemoryPointer.new(:pointer)
|
103
|
+
API::write_size_t(p, ptr.to_api.to_i) # Workaround broken p.write_pointer() on 64bit pointer.
|
104
|
+
status = API::cuParamSetv(self.to_api, offset, p, nbytes)
|
103
105
|
Pvt::handle_error(status, "Failed to set function arbitrary parameter: offset = #{offset}, size = #{nbytes}.")
|
104
106
|
self
|
105
107
|
end
|
@@ -272,7 +274,7 @@ private
|
|
272
274
|
FFI::MemoryPointer.new(:float).write_float(x)
|
273
275
|
when CUDevicePtr
|
274
276
|
ptr = FFI::MemoryPointer.new(:CUDevicePtr)
|
275
|
-
API::write_cudeviceptr(ptr, x.to_api
|
277
|
+
API::write_cudeviceptr(ptr, x.to_api)
|
276
278
|
ptr
|
277
279
|
else
|
278
280
|
raise TypeError, "Invalid type of kernel parameter #{x.to_s}."
|
data/lib/cuda/runtime/cuda.rb
CHANGED
@@ -22,8 +22,10 @@
|
|
22
22
|
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
23
|
#
|
24
24
|
|
25
|
+
require 'delegate'
|
25
26
|
require 'cuda/runtime/ffi-cuda'
|
26
27
|
require 'memory/buffer'
|
28
|
+
require 'helpers/struct'
|
27
29
|
|
28
30
|
|
29
31
|
module SGC
|
@@ -37,7 +39,7 @@ module Cuda
|
|
37
39
|
class CudaHostAllocFlags < API::Enum; end # @see API::CudaHostAllocFlags
|
38
40
|
class CudaHostRegisterFlags < API::Enum; end # @see API::CudaHostRegisterFlags
|
39
41
|
class CudaArrayFlags < API::Enum; end # @see API::CudaArrayFlags
|
40
|
-
class CudaMemoryType < API::Enum; end
|
42
|
+
class CudaMemoryType < API::Enum; end # @see API::CudaMemoryType
|
41
43
|
class CudaMemcpyKind < API::Enum; end # @see API::CudaMemcpyKind
|
42
44
|
class CudaChannelFormatKind < API::Enum; end # @see API::CudaChannelFormatKind
|
43
45
|
class CudaFunctionCache < API::Enum; end # @see API::CudaFunctionCache
|
@@ -50,16 +52,27 @@ module Cuda
|
|
50
52
|
class CudaTextureFilterMode < API::Enum; end # @see API::CudaTextureFilterMode
|
51
53
|
class CudaTextureReadMode < API::Enum; end # @see API::CudaTextureReadMode
|
52
54
|
|
53
|
-
class Dim3 < API::Dim3; end
|
54
|
-
class CudaDeviceProp < API::CudaDeviceProp; end
|
55
|
-
class CudaFunctionAttributes < API::CudaFunctionAttributes; end
|
56
|
-
class CudaChannelFormatDesc < API::CudaChannelFormatDesc; end
|
57
|
-
class CudaPitchedPtr < API::CudaPitchedPtr; end
|
58
|
-
class CudaPos < API::CudaPos; end
|
59
|
-
class CudaExtent < API::CudaExtent; end
|
60
|
-
class CudaMemcpy3DParms < API::CudaMemcpy3DParms; end
|
61
|
-
class TextureReference < API::TextureReference; end
|
62
|
-
class SurfaceReference < API::SurfaceReference; end
|
55
|
+
class Dim3 < DelegateClass(API::Dim3); end # See {file:lib/cuda/runtime/ffi-cuda.rb}
|
56
|
+
class CudaDeviceProp < DelegateClass(API::CudaDeviceProp); end # See {file:lib/cuda/runtime/ffi-cuda.rb}
|
57
|
+
class CudaFunctionAttributes < DelegateClass(API::CudaFunctionAttributes); end # See {file:lib/cuda/runtime/ffi-cuda.rb}
|
58
|
+
class CudaChannelFormatDesc < DelegateClass(API::CudaChannelFormatDesc); end # See {file:lib/cuda/runtime/ffi-cuda.rb}
|
59
|
+
class CudaPitchedPtr < DelegateClass(API::CudaPitchedPtr); end # See {file:lib/cuda/runtime/ffi-cuda.rb}
|
60
|
+
class CudaPos < DelegateClass(API::CudaPos); end # See {file:lib/cuda/runtime/ffi-cuda.rb}
|
61
|
+
class CudaExtent < DelegateClass(API::CudaExtent); end # See {file:lib/cuda/runtime/ffi-cuda.rb}
|
62
|
+
class CudaMemcpy3DParms < DelegateClass(API::CudaMemcpy3DParms); end # See {file:lib/cuda/runtime/ffi-cuda.rb}
|
63
|
+
class TextureReference < DelegateClass(API::TextureReference); end # See {file:lib/cuda/runtime/ffi-cuda.rb}
|
64
|
+
class SurfaceReference < DelegateClass(API::SurfaceReference); end # See {file:lib/cuda/runtime/ffi-cuda.rb}
|
65
|
+
|
66
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(Dim3, API::Dim3)
|
67
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CudaDeviceProp, API::CudaDeviceProp)
|
68
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CudaFunctionAttributes, API::CudaFunctionAttributes)
|
69
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CudaChannelFormatDesc, API::CudaChannelFormatDesc)
|
70
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CudaPitchedPtr, API::CudaPitchedPtr)
|
71
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CudaPos, API::CudaPos)
|
72
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CudaExtent, API::CudaExtent)
|
73
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(CudaMemcpy3DParms, API::CudaMemcpy3DParms)
|
74
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(TextureReference, API::TextureReference)
|
75
|
+
SGC::Helper::Struct::Pvt::define_delegated_struct_methods(SurfaceReference, API::SurfaceReference)
|
63
76
|
|
64
77
|
end # module
|
65
78
|
end # module
|
data/lib/cuda/runtime/device.rb
CHANGED
@@ -72,7 +72,7 @@ class CudaDevice
|
|
72
72
|
|
73
73
|
|
74
74
|
# @param [Integer] devid The index of the device to query.
|
75
|
-
# @return [
|
75
|
+
# @return [CudaDeviceProp] The properties of the device _devid_.
|
76
76
|
def self.properties(devid = self.get)
|
77
77
|
prop = CudaDeviceProp.new
|
78
78
|
status = API::cudaGetDeviceProperties(prop.to_ptr, devid)
|
@@ -169,6 +169,44 @@ class CudaDevice
|
|
169
169
|
self
|
170
170
|
end
|
171
171
|
|
172
|
+
|
173
|
+
# @param [Integer] devid The device's ID which is to access the memory of the device _peer_devid_.
|
174
|
+
# @param [Integer] peer_devid The device's ID which its memory is to be accessed by the device _devid_.
|
175
|
+
# @return [Boolean] True if device _devid_ is capable of directly accessing memory from device _peer_devid_.
|
176
|
+
#
|
177
|
+
# @since CUDA 4.0
|
178
|
+
def self.can_access_peer?(devid = self.get, peer_devid)
|
179
|
+
b = FFI::MemoryPointer.new(:int)
|
180
|
+
status = API::cudaDeviceCanAccessPeer(b, devid, peer_devid)
|
181
|
+
Pvt::handle_error(status, "Failed to query can access peer: devid = #{devid}, peer_devid = #{peer_devid}.")
|
182
|
+
b.read_int == 1 ? true : false
|
183
|
+
end
|
184
|
+
|
185
|
+
|
186
|
+
# Enable the current device to access the memory of the peer device.
|
187
|
+
# @param [Integer] peer_devid The peer device's ID.
|
188
|
+
# @param [Integer] flags Currently flags must be set to zero.
|
189
|
+
# @return [Class] This class.
|
190
|
+
#
|
191
|
+
# @since CUDA 4.0
|
192
|
+
def self.enable_peer_access(peer_devid, flags = 0)
|
193
|
+
status = API::cudaDeviceEnablePeerAccess(peer_devid, flags)
|
194
|
+
Pvt::handle_error(status, "Failed to enable peer access: peer_devid = #{peer_devid}, flags = #{flags}.")
|
195
|
+
self
|
196
|
+
end
|
197
|
+
|
198
|
+
|
199
|
+
# Disable the current device from accessing the memory of the peer device.
|
200
|
+
# @param [Integer] peer_devid The peer device's ID.
|
201
|
+
# @return [Class] This class.
|
202
|
+
#
|
203
|
+
# @since CUDA 4.0
|
204
|
+
def self.disable_peer_access(peer_devid)
|
205
|
+
status = API::cudaDeviceDisablePeerAccess(peer_devid)
|
206
|
+
Pvt::handle_error(status, "Failed to disable peer access: peer_devid = #{peer_devid}.")
|
207
|
+
self
|
208
|
+
end
|
209
|
+
|
172
210
|
end
|
173
211
|
|
174
212
|
end # module
|
@@ -24,6 +24,7 @@
|
|
24
24
|
|
25
25
|
require 'ffi'
|
26
26
|
require 'ffi/prettystruct'
|
27
|
+
require 'ffi/typedef'
|
27
28
|
require 'helpers/interface/ienum'
|
28
29
|
require 'helpers/flags'
|
29
30
|
require 'helpers/klass'
|
@@ -230,9 +231,9 @@ module API
|
|
230
231
|
def read_long(ptr); ptr.read_long; end
|
231
232
|
def read_pointer(ptr); ptr.read_pointer; end
|
232
233
|
|
233
|
-
def write_int(ptr); ptr.write_int; end
|
234
|
-
def write_long(ptr); ptr.write_long; end
|
235
|
-
def write_pointer(ptr, value); ptr.
|
234
|
+
def write_int(ptr, value); ptr.write_int(value); end
|
235
|
+
def write_long(ptr, value); ptr.write_long(value); end
|
236
|
+
def write_pointer(ptr, value); ptr.write_long(value.to_i); end
|
236
237
|
|
237
238
|
alias read_size_t read_long
|
238
239
|
alias read_enum read_int
|
@@ -81,7 +81,7 @@ class CudaFunction
|
|
81
81
|
# @return [Class] This class.
|
82
82
|
def self.configure(grid_dim, block_dim, shared_mem_size = 0, stream = 0)
|
83
83
|
s = Pvt::parse_stream(stream)
|
84
|
-
status = API::cudaConfigureCall(grid_dim, block_dim, shared_mem_size, s)
|
84
|
+
status = API::cudaConfigureCall(grid_dim.to_api, block_dim.to_api, shared_mem_size, s)
|
85
85
|
Pvt::handle_error(status, "Failed to configure kernel function launch settings.\n" +
|
86
86
|
"* #{grid_dim.x} x #{grid_dim.y} x #{grid_dim.z} grid\n" +
|
87
87
|
"* #{block_dim.x} x #{block_dim.y} x #{block_dim.z} blocks\n" +
|
@@ -105,7 +105,7 @@ class CudaFunction
|
|
105
105
|
size = 4
|
106
106
|
when SGC::Memory::MemoryPointer
|
107
107
|
p = x.ref
|
108
|
-
size = FFI::
|
108
|
+
size = FFI::TypeDefs[:pointer].size
|
109
109
|
else
|
110
110
|
raise TypeError, "Invalid type of kernel parameters #{x}."
|
111
111
|
end
|
data/lib/ffi/typedef.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
require 'ffi'
|
26
|
+
|
27
|
+
|
28
|
+
module FFI
|
29
|
+
|
30
|
+
unless defined?(self.typedef)
|
31
|
+
class << self
|
32
|
+
alias_method :typedef, :add_typedef
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end # module
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2011 Chung Shin Yee
|
3
|
+
#
|
4
|
+
# shinyee@speedgocomputing.com
|
5
|
+
# http://www.speedgocomputing.com
|
6
|
+
# http://github.com/xman/sgc-ruby-cuda
|
7
|
+
# http://rubyforge.org/projects/rubycuda
|
8
|
+
#
|
9
|
+
# This file is part of SGC-Ruby-CUDA.
|
10
|
+
#
|
11
|
+
# SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
13
|
+
# the Free Software Foundation, either version 3 of the License, or
|
14
|
+
# (at your option) any later version.
|
15
|
+
#
|
16
|
+
# SGC-Ruby-CUDA is distributed in the hope that it will be useful,
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
+
# GNU General Public License for more details.
|
20
|
+
#
|
21
|
+
# You should have received a copy of the GNU General Public License
|
22
|
+
# along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
|
23
|
+
#
|
24
|
+
|
25
|
+
module SGC
|
26
|
+
module Helper
|
27
|
+
|
28
|
+
module Struct
|
29
|
+
|
30
|
+
# @private
|
31
|
+
module Pvt
|
32
|
+
|
33
|
+
def self.define_delegated_struct_methods(klassA, klassD)
|
34
|
+
klassA.class_eval %{
|
35
|
+
def initialize(*args)
|
36
|
+
@source = #{klassD.name}.new(*args)
|
37
|
+
super(@source)
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_api
|
41
|
+
@source
|
42
|
+
end
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
end # module
|
51
|
+
end # module
|
data/lib/memory/buffer.rb
CHANGED
data/lib/memory/pointer.rb
CHANGED
@@ -29,6 +29,7 @@ module SGC
|
|
29
29
|
module Memory
|
30
30
|
|
31
31
|
# A memory pointer class.
|
32
|
+
# TODO: To use #write_pointer() when FFI fix the use of FIX2INT().
|
32
33
|
class MemoryPointer
|
33
34
|
|
34
35
|
# @param [Integer] addr Memory address _addr_ to initialize to.
|
@@ -49,8 +50,7 @@ class MemoryPointer
|
|
49
50
|
# @param [Integer] addr Memory address to set to.
|
50
51
|
# @return _addr_.
|
51
52
|
def ptr=(addr)
|
52
|
-
@p.
|
53
|
-
addr
|
53
|
+
@p.write_long(addr.to_i)
|
54
54
|
end
|
55
55
|
|
56
56
|
|
@@ -145,4 +145,28 @@ class TestCUContext < Test::Unit::TestCase
|
|
145
145
|
assert_nil(s)
|
146
146
|
end
|
147
147
|
|
148
|
+
|
149
|
+
def test_context_enable_disable_peer_access
|
150
|
+
current_dev = CUContext.device
|
151
|
+
count = CUDevice.count
|
152
|
+
(0...count).each do |devid|
|
153
|
+
dev = CUDevice.get(devid)
|
154
|
+
ctx = CUContext.create(dev)
|
155
|
+
if CUDevice.can_access_peer?(current_dev, dev)
|
156
|
+
assert_nothing_raised do
|
157
|
+
CUContext.enable_peer_access(ctx)
|
158
|
+
CUContext.disable_peer_access(ctx)
|
159
|
+
end
|
160
|
+
else
|
161
|
+
assert_raise(CUInvalidDeviceError) do
|
162
|
+
CUContext.enable_peer_access(ctx)
|
163
|
+
end
|
164
|
+
assert_raise(CUPeerAccessNotEnabledError) do
|
165
|
+
CUContext.disable_peer_access(ctx)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
ctx.destroy
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
148
172
|
end
|
@@ -66,4 +66,15 @@ class TestCUDevice < Test::Unit::TestCase
|
|
66
66
|
end
|
67
67
|
end
|
68
68
|
|
69
|
+
|
70
|
+
def test_device_can_access_peer
|
71
|
+
current_dev = CUContext.device
|
72
|
+
count = CUDevice.count
|
73
|
+
(0...count).each do |devid|
|
74
|
+
dev = CUDevice.get(devid)
|
75
|
+
CUDevice.can_access_peer?(dev)
|
76
|
+
CUDevice.can_access_peer?(current_dev, dev)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
69
80
|
end
|
@@ -82,7 +82,6 @@ class TestCUMemory < Test::Unit::TestCase
|
|
82
82
|
assert_equal(b[i], e[i])
|
83
83
|
end
|
84
84
|
|
85
|
-
if false # FIXME: The memcpy is not working.
|
86
85
|
if @dev.attribute(:UNIFIED_ADDRESSING) > 0
|
87
86
|
(0...size).each do |i|
|
88
87
|
b[i] = i
|
@@ -96,7 +95,6 @@ class TestCUMemory < Test::Unit::TestCase
|
|
96
95
|
assert_equal(b[i], c[i])
|
97
96
|
end
|
98
97
|
end
|
99
|
-
end
|
100
98
|
|
101
99
|
p.free
|
102
100
|
q.free
|
@@ -122,4 +122,35 @@ class TestCudaDevice < Test::Unit::TestCase
|
|
122
122
|
assert_equal(CudaDevice, r)
|
123
123
|
end
|
124
124
|
|
125
|
+
|
126
|
+
def test_device_can_access_peer
|
127
|
+
current_devid = CudaDevice.get
|
128
|
+
count = CudaDevice.count
|
129
|
+
(0...count).each do |devid|
|
130
|
+
CudaDevice.can_access_peer?(devid)
|
131
|
+
CudaDevice.can_access_peer?(current_devid, devid)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
def test_device_enable_disable_peer_access
|
137
|
+
current_devid = CudaDevice.get
|
138
|
+
count = CudaDevice.count
|
139
|
+
(0...count).each do |devid|
|
140
|
+
if CudaDevice.can_access_peer?(devid)
|
141
|
+
assert_nothing_raised do
|
142
|
+
CudaDevice.enable_peer_access(devid)
|
143
|
+
CudaDevice.disable_peer_access(devid)
|
144
|
+
end
|
145
|
+
else
|
146
|
+
assert_raise(CudaInvalidDeviceError) do
|
147
|
+
CudaDevice.enable_peer_access(devid)
|
148
|
+
end
|
149
|
+
assert_raise(CudaPeerAccessNotEnabledError) do
|
150
|
+
CudaDevice.disable_peer_access(devid)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
125
156
|
end
|
data/test/rubycuda/testbase.rb
CHANGED
@@ -23,6 +23,7 @@
|
|
23
23
|
#-----------------------------------------------------------------------
|
24
24
|
|
25
25
|
require 'tempfile'
|
26
|
+
require 'rbconfig'
|
26
27
|
require 'rubycuda'
|
27
28
|
|
28
29
|
include SGC::Cuda
|
@@ -50,7 +51,7 @@ module CudaTestBase
|
|
50
51
|
end
|
51
52
|
|
52
53
|
def nvcc_build_dynamic_library(src_path, lib_path)
|
53
|
-
case
|
54
|
+
case Config::CONFIG['target_os']
|
54
55
|
when /darwin/ # Build universal binary for i386 and x86_64 platforms.
|
55
56
|
f32 = Tempfile.new("rubycuda_test32.so")
|
56
57
|
f64 = Tempfile.new("rubycuda_test64.so")
|
data/version.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
SGC_RUBY_CUDA_VERSION = "0.1.
|
1
|
+
SGC_RUBY_CUDA_VERSION = "0.1.1"
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 1
|
9
|
+
version: 0.1.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Chung Shin Yee
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-
|
17
|
+
date: 2011-05-08 00:00:00 +08:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -82,9 +82,11 @@ files:
|
|
82
82
|
- lib/cuda/runtime/thread.rb
|
83
83
|
- lib/cuda/runtime/version.rb
|
84
84
|
- lib/ffi/prettystruct.rb
|
85
|
+
- lib/ffi/typedef.rb
|
85
86
|
- lib/helpers/flags.rb
|
86
87
|
- lib/helpers/interface/ienum.rb
|
87
88
|
- lib/helpers/klass.rb
|
89
|
+
- lib/helpers/struct.rb
|
88
90
|
- lib/memory/buffer.rb
|
89
91
|
- lib/memory/interface/ibuffer.rb
|
90
92
|
- lib/memory/pointer.rb
|