sgc-ruby-cuda 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.yardopts +2 -0
  2. data/COPYING +674 -0
  3. data/README.rdoc +106 -0
  4. data/Rakefile +76 -0
  5. data/doc/devel.rdoc +77 -0
  6. data/doc/features.rdoc +55 -0
  7. data/lib/cuda/driver/context.rb +236 -0
  8. data/lib/cuda/driver/cu.rb +60 -0
  9. data/lib/cuda/driver/device.rb +155 -0
  10. data/lib/cuda/driver/deviceptr.rb +69 -0
  11. data/lib/cuda/driver/error.rb +182 -0
  12. data/lib/cuda/driver/event.rb +124 -0
  13. data/lib/cuda/driver/ffi-cu.rb +620 -0
  14. data/lib/cuda/driver/function.rb +293 -0
  15. data/lib/cuda/driver/init.rb +45 -0
  16. data/lib/cuda/driver/memory.rb +134 -0
  17. data/lib/cuda/driver/module.rb +142 -0
  18. data/lib/cuda/driver/rubycu.rb +37 -0
  19. data/lib/cuda/driver/stream.rb +128 -0
  20. data/lib/cuda/driver/version.rb +42 -0
  21. data/lib/cuda/runtime/cuda.rb +65 -0
  22. data/lib/cuda/runtime/device.rb +175 -0
  23. data/lib/cuda/runtime/error.rb +197 -0
  24. data/lib/cuda/runtime/event.rb +117 -0
  25. data/lib/cuda/runtime/ffi-cuda.rb +588 -0
  26. data/lib/cuda/runtime/function.rb +161 -0
  27. data/lib/cuda/runtime/memory.rb +110 -0
  28. data/lib/cuda/runtime/rubycuda.rb +34 -0
  29. data/lib/cuda/runtime/stream.rb +126 -0
  30. data/lib/cuda/runtime/thread.rb +81 -0
  31. data/lib/cuda/runtime/version.rb +51 -0
  32. data/lib/ffi/prettystruct.rb +32 -0
  33. data/lib/helpers/flags.rb +82 -0
  34. data/lib/helpers/interface/ienum.rb +45 -0
  35. data/lib/helpers/klass.rb +45 -0
  36. data/lib/memory/buffer.rb +125 -0
  37. data/lib/memory/interface/ibuffer.rb +63 -0
  38. data/lib/memory/pointer.rb +72 -0
  39. data/lib/rubycu.rb +1 -0
  40. data/lib/rubycuda.rb +1 -0
  41. data/test/bad.ptx +0 -0
  42. data/test/memory/test_buffer.rb +93 -0
  43. data/test/rubycu/test_cucontext.rb +148 -0
  44. data/test/rubycu/test_cudevice.rb +69 -0
  45. data/test/rubycu/test_cudeviceptr.rb +43 -0
  46. data/test/rubycu/test_cuevent.rb +81 -0
  47. data/test/rubycu/test_cufunction.rb +165 -0
  48. data/test/rubycu/test_cumemory.rb +113 -0
  49. data/test/rubycu/test_cumodule.rb +114 -0
  50. data/test/rubycu/test_custream.rb +77 -0
  51. data/test/rubycu/test_cuversion.rb +39 -0
  52. data/test/rubycu/testbase.rb +107 -0
  53. data/test/rubycuda/test_cudadevice.rb +125 -0
  54. data/test/rubycuda/test_cudaerror.rb +48 -0
  55. data/test/rubycuda/test_cudaevent.rb +78 -0
  56. data/test/rubycuda/test_cudafunction.rb +106 -0
  57. data/test/rubycuda/test_cudamemory.rb +90 -0
  58. data/test/rubycuda/test_cudastream.rb +72 -0
  59. data/test/rubycuda/test_cudathread.rb +69 -0
  60. data/test/rubycuda/test_cudaversion.rb +41 -0
  61. data/test/rubycuda/testbase.rb +67 -0
  62. data/test/vadd.cu +21 -0
  63. data/version.rb +1 -0
  64. metadata +180 -0
@@ -0,0 +1,161 @@
1
+ #
2
+ # Copyright (c) 2010-2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/runtime/ffi-cuda'
26
+ require 'cuda/runtime/cuda'
27
+ require 'cuda/runtime/error'
28
+ require 'cuda/runtime/stream'
29
+ require 'memory/pointer'
30
+ require 'dl'
31
+
32
+
33
+ module SGC
34
+ module Cuda
35
+
36
+ class CudaFunction
37
+
38
+ attr_reader :name
39
+
40
+
41
+ # Create an instance to function _name_.
42
+ def initialize(name)
43
+ @name = name
44
+ end
45
+
46
+
47
+ # @return [CudaFunctionAttributes] The attributes of this kernel function.
48
+ def attributes
49
+ a = CudaFunctionAttributes.new
50
+ status = API::cudaFuncGetAttributes(a.to_ptr, @name)
51
+ Pvt::handle_error(status, "Failed to query function attributes.")
52
+ a
53
+ end
54
+
55
+
56
+ # Set the preferred cache configuration to use for next launch on this kernel function.
57
+ # @param [CudaFunctionCache] conf The preferred cache configuration.
58
+ def cache_config=(conf)
59
+ status = API::cudaFuncSetCacheConfig(@name, conf)
60
+ Pvt::handle_error(status, "Failed to set function cache config: config = #{conf}.")
61
+ end
62
+
63
+
64
+ # Launch this kernel function with pre-configured settings.
65
+ # @return [Class] This class.
66
+ #
67
+ # @see .configure
68
+ def launch
69
+ status = API::cudaLaunch(@name)
70
+ Pvt::handle_error(status, "Failed to launch kernel function: name = #{@name}.")
71
+ self
72
+ end
73
+
74
+
75
+ # Configure the settings for the next kernel launch.
76
+ # @param [Dim3] grid_dim The 3D grid dimensions x, y, z to launch.
77
+ # @param [Dim3] block_dim The 3D block dimensions x, y, z to launch.
78
+ # @param [Integer] shared_mem_size Number of bytes of dynamic shared memory for each thread block.
79
+ # @param [Integer, CudaStream] stream The stream to launch this kernel function on.
80
+ # Setting _stream_ to anything other than an instance of CudaStream will execute on the default stream 0.
81
+ # @return [Class] This class.
82
+ def self.configure(grid_dim, block_dim, shared_mem_size = 0, stream = 0)
83
+ s = Pvt::parse_stream(stream)
84
+ status = API::cudaConfigureCall(grid_dim, block_dim, shared_mem_size, s)
85
+ Pvt::handle_error(status, "Failed to configure kernel function launch settings.\n" +
86
+ "* #{grid_dim.x} x #{grid_dim.y} x #{grid_dim.z} grid\n" +
87
+ "* #{block_dim.x} x #{block_dim.y} x #{block_dim.z} blocks\n" +
88
+ "* shared memory size = #{shared_mem_size}")
89
+ self
90
+ end
91
+
92
+
93
+ # Set the argument list of subsequent kernel function launch.
94
+ # @param [Array] *args The list of arguments to pass to the kernel.
95
+ # @return [Class] This class.
96
+ def self.setup(*args)
97
+ offset = 0
98
+ args.each do |x|
99
+ case x
100
+ when Fixnum
101
+ p = FFI::MemoryPointer.new(:int).write_int(x)
102
+ size = 4
103
+ when Float
104
+ p = FFI::MemoryPointer.new(:float).write_float(x)
105
+ size = 4
106
+ when SGC::Memory::MemoryPointer
107
+ p = x.ref
108
+ size = FFI::MemoryPointer.size
109
+ else
110
+ raise TypeError, "Invalid type of kernel parameters #{x}."
111
+ end
112
+ offset = align_up(offset, size)
113
+ status = API::cudaSetupArgument(p, size, offset)
114
+ Pvt::handle_error(status, "Failed to setup kernel argument for #{x}.")
115
+ offset += size
116
+ end
117
+ self
118
+ end
119
+
120
+
121
+ # Load a dynamic library with _name_ from dynamic library path.
122
+ # @param [String] name The name of the dynamic library to load.
123
+ # For library libcudart.so, its name is cudart.
124
+ # @return [Class] This class.
125
+ def self.load_lib(name)
126
+ raise NotImplementedError
127
+ end
128
+
129
+
130
+ # Load a dynamic library from the given path.
131
+ # @param [String] path The path of the dynamic library to load.
132
+ # @return [Class] This class.
133
+ def self.load_lib_file(path)
134
+ @@libs << DL::dlopen(path)
135
+ self
136
+ end
137
+
138
+
139
+ # Unload all the loaded dynamic libraries.
140
+ # @return [Class] This class.
141
+ def self.unload_all_libs
142
+ @@libs.each do |h|
143
+ h.close
144
+ end
145
+ @@libs = []
146
+ self
147
+ end
148
+
149
+
150
+ @@libs = [] # @private
151
+
152
+ private
153
+
154
+ def self.align_up(offset, alignment)
155
+ (offset + alignment - 1) & ~(alignment - 1)
156
+ end
157
+
158
+ end
159
+
160
+ end # module
161
+ end # module
@@ -0,0 +1,110 @@
1
+ #
2
+ # Copyright (c) 2010-2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/runtime/ffi-cuda'
26
+ require 'cuda/runtime/error'
27
+ require 'memory/pointer'
28
+
29
+
30
+ module SGC
31
+ module Cuda
32
+
33
+ class CudaDeviceMemory
34
+
35
+ # Allocate memory on the device.
36
+ # @param [Integer] nbytes The number of bytes of memory to allocate.
37
+ # @return [*SGC::Memory::MemoryPointer] A memory pointer to the allocated device memory.
38
+ #
39
+ # @note The returned memory pointer is enabled to call _free_ method on itself.
40
+ def self.malloc(nbytes)
41
+ p = SGC::Memory::MemoryPointer.new
42
+ status = API::cudaMalloc(p.ref, nbytes)
43
+ Pvt::handle_error(status, "Failed to allocate memory on the device: nbytes = #{nbytes}")
44
+ p.instance_eval %{
45
+ def free
46
+ CudaDeviceMemory.free(self)
47
+ end
48
+ }
49
+ p
50
+ end
51
+
52
+
53
+ # Free the device memory at _devptr_.
54
+ # @param [*SGC::Memory::MemoryPointer] devptr The memory pointer pointing to the device memory to be freed.
55
+ def self.free(devptr)
56
+ status = API::cudaFree(devptr.ptr)
57
+ Pvt::handle_error(status, "Failed to free the device memory.")
58
+ devptr.ptr = 0
59
+ nil
60
+ end
61
+
62
+ end
63
+
64
+
65
+ module CudaMemory
66
+
67
+ # Copy _nbytes_ from the memory at _src_ptr_ to the memory at _dst_ptr_.
68
+ # @param [#ptr] dst_ptr Destination of the memory copy.
69
+ # @param [#ptr] src_ptr Source of the memory copy.
70
+ # @param [Integer] nbytes The number of bytes to copy.
71
+ # @param [Symbol] memcpy_kind The direction of the memory copy specified with one of the following:
72
+ # * :HOST_TO_HOST
73
+ # * :HOST_TO_DEVICE
74
+ # * :DEVICE_TO_HOST
75
+ # * :DEVICE_TO_DEVICE
76
+ def memcpy(dst_ptr, src_ptr, nbytes, memcpy_kind)
77
+ status = API::cudaMemcpy(dst_ptr.ptr, src_ptr.ptr, nbytes, memcpy_kind)
78
+ Pvt::handle_error(status, "Failed to copy memory.")
79
+ end
80
+ module_function :memcpy
81
+
82
+
83
+ # Copy _nbytes_ from the host memory at _src_ptr_ to the host memory at _dst_ptr_.
84
+ def memcpy_htoh(dst_ptr, src_ptr, nbytes)
85
+ memcpy(dst_ptr, src_ptr, nbytes, :HOST_TO_HOST)
86
+ end
87
+ module_function :memcpy_htoh
88
+
89
+ # Copy _nbytes_ from the host memory at _src_ptr_ to the device memory at _dst_ptr_.
90
+ def memcpy_htod(dst_ptr, src_ptr, nbytes)
91
+ memcpy(dst_ptr, src_ptr, nbytes, :HOST_TO_DEVICE)
92
+ end
93
+ module_function :memcpy_htod
94
+
95
+ # Copy _nbytes_ from the device memory at _src_ptr_ to the host memory at _dst_ptr_.
96
+ def memcpy_dtoh(dst_ptr, src_ptr, nbytes)
97
+ memcpy(dst_ptr, src_ptr, nbytes, :DEVICE_TO_HOST)
98
+ end
99
+ module_function :memcpy_dtoh
100
+
101
+ # Copy _nbytes_ from the device memory at _src_ptr_ to the device memory at _dst_ptr_.
102
+ def memcpy_dtod(dst_ptr, src_ptr, nbytes)
103
+ memcpy(dst_ptr, src_ptr, nbytes, :DEVICE_TO_DEVICE)
104
+ end
105
+ module_function :memcpy_dtod
106
+
107
+ end
108
+
109
+ end # module
110
+ end # module
@@ -0,0 +1,34 @@
1
+ #
2
+ # Copyright (c) 2010-2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/runtime/ffi-cuda'
26
+ require 'cuda/runtime/cuda'
27
+ require 'cuda/runtime/error'
28
+ require 'cuda/runtime/version'
29
+ require 'cuda/runtime/device'
30
+ require 'cuda/runtime/thread'
31
+ require 'cuda/runtime/memory'
32
+ require 'cuda/runtime/function'
33
+ require 'cuda/runtime/stream'
34
+ require 'cuda/runtime/event'
@@ -0,0 +1,126 @@
1
+ #
2
+ # Copyright (c) 2010-2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/runtime/ffi-cuda'
26
+ require 'cuda/runtime/error'
27
+
28
+
29
+ module SGC
30
+ module Cuda
31
+
32
+ class CudaStream
33
+
34
+ # Create and return a CUDA stream.
35
+ # @return [CudaStream] A CUDA stream.
36
+ def self.create
37
+ p = FFI::MemoryPointer.new(:CudaStream)
38
+ status = API::cudaStreamCreate(p)
39
+ Pvt::handle_error(status, "Failed to create CUDA stream.")
40
+ new(p)
41
+ end
42
+
43
+
44
+ # Destroy this CUDA stream.
45
+ def destroy
46
+ status = API::cudaStreamDestroy(self.to_api)
47
+ Pvt::handle_error(status, "Failed to destroy this CUDA stream.")
48
+ API::write_cudastream(@pstream, 0)
49
+ nil
50
+ end
51
+
52
+
53
+ # @return [Boolean] Return true if all operations in this CUDA stream have completed. Otherwise, return false.
54
+ def query
55
+ status = API::cudaStreamQuery(self.to_api)
56
+ if status == Pvt::CUDA_SUCCESS
57
+ return true
58
+ elsif status == Pvt::CUDA_ERROR_NOT_READY
59
+ return false
60
+ end
61
+ Pvt::handle_error(status, "Failed to query stream.")
62
+ raise CudaStandardError, "Error handling fails to catch this error."
63
+ end
64
+
65
+
66
+ # Block the calling CPU thread until all operations in this CUDA stream complete.
67
+ # @return [CudaStream] This CUDA stream.
68
+ def synchronize
69
+ status = API::cudaStreamSynchronize(self.to_api)
70
+ Pvt::handle_error(status)
71
+ self
72
+ end
73
+
74
+
75
+ # Let all future operations submitted to this CUDA stream wait until _event_ complete before beginning execution.
76
+ # @overload wait_event(event)
77
+ # @overload wait_event(event, flags)
78
+ # @param [CudaEvent] event The event to wait for.
79
+ # @param [Integer] flags Currently _flags_ must be set to zero.
80
+ # @return [CudaStream] This CUDA stream.
81
+ def wait_event(event, flags = 0)
82
+ status = API::cudaStreamWaitEvent(self.to_api, event.to_api, flags)
83
+ Pvt::handle_error(status, "Failed to make this CUDA stream's future operations to wait event: flags = #{flags}.")
84
+ self
85
+ end
86
+
87
+
88
+ # Let all future operations submitted to any CUDA stream wait until _event_ complete before beginning execution.
89
+ # @overload wait_event(event)
90
+ # @overload wait_event(event, flags)
91
+ # @param (see CudaStream#wait_event)
92
+ def self.wait_event(event, flags = 0)
93
+ status = API::cudaStreamWaitEvent(nil, event.to_api, flags)
94
+ Pvt::handle_error(status, "Failed to make any CUDA stream's future operations to wait event: flags = #{flags}.")
95
+ nil
96
+ end
97
+
98
+
99
+ # @private
100
+ def initialize(ptr)
101
+ @pstream = ptr
102
+ end
103
+ private_class_method :new
104
+
105
+
106
+ # @private
107
+ def to_api
108
+ API::read_cudastream(@pstream)
109
+ end
110
+
111
+ end
112
+
113
+ # @private
114
+ module Pvt
115
+
116
+ def self.parse_stream(stream)
117
+ if stream.kind_of?(CudaStream)
118
+ return stream.to_api
119
+ end
120
+ nil
121
+ end
122
+
123
+ end
124
+
125
+ end # module
126
+ end # module
@@ -0,0 +1,81 @@
1
+ #
2
+ # Copyright (c) 2010-2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/runtime/ffi-cuda'
26
+ require 'cuda/runtime/cuda'
27
+ require 'cuda/runtime/error'
28
+
29
+
30
+ module SGC
31
+ module Cuda
32
+
33
+ # @deprecated
34
+ class CudaThread
35
+
36
+ def self.exit
37
+ status = API::cudaThreadExit
38
+ Pvt::handle_error(status)
39
+ self
40
+ end
41
+
42
+
43
+ def self.cache_config
44
+ p = FFI::MemoryPointer.new(:int)
45
+ status = API::cudaThreadGetCacheConfig(p)
46
+ Pvt::handle_error(status)
47
+ CudaFunctionCache[p.read_int]
48
+ end
49
+
50
+
51
+ def self.cache_config=(config)
52
+ status = API::cudaThreadSetCacheConfig(config)
53
+ Pvt::handle_error(status)
54
+ end
55
+
56
+
57
+ def self.limit(limit)
58
+ p = FFI::MemoryPointer.new(:size_t)
59
+ status = API::cudaThreadGetLimit(p, limit)
60
+ Pvt::handle_error(status)
61
+ p.read_long
62
+ end
63
+
64
+
65
+ def self.limit=(*limit_value_pair)
66
+ limit, value = limit_value_pair.flatten
67
+ status = API::cudaThreadSetLimit(limit, value)
68
+ Pvt::handle_error(status)
69
+ end
70
+
71
+
72
+ def self.synchronize
73
+ status = API::cudaThreadSynchronize
74
+ Pvt::handle_error(status)
75
+ self
76
+ end
77
+
78
+ end
79
+
80
+ end # module
81
+ end # module