sgc-ruby-cuda 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. data/.yardopts +2 -0
  2. data/COPYING +674 -0
  3. data/README.rdoc +106 -0
  4. data/Rakefile +76 -0
  5. data/doc/devel.rdoc +77 -0
  6. data/doc/features.rdoc +55 -0
  7. data/lib/cuda/driver/context.rb +236 -0
  8. data/lib/cuda/driver/cu.rb +60 -0
  9. data/lib/cuda/driver/device.rb +155 -0
  10. data/lib/cuda/driver/deviceptr.rb +69 -0
  11. data/lib/cuda/driver/error.rb +182 -0
  12. data/lib/cuda/driver/event.rb +124 -0
  13. data/lib/cuda/driver/ffi-cu.rb +620 -0
  14. data/lib/cuda/driver/function.rb +293 -0
  15. data/lib/cuda/driver/init.rb +45 -0
  16. data/lib/cuda/driver/memory.rb +134 -0
  17. data/lib/cuda/driver/module.rb +142 -0
  18. data/lib/cuda/driver/rubycu.rb +37 -0
  19. data/lib/cuda/driver/stream.rb +128 -0
  20. data/lib/cuda/driver/version.rb +42 -0
  21. data/lib/cuda/runtime/cuda.rb +65 -0
  22. data/lib/cuda/runtime/device.rb +175 -0
  23. data/lib/cuda/runtime/error.rb +197 -0
  24. data/lib/cuda/runtime/event.rb +117 -0
  25. data/lib/cuda/runtime/ffi-cuda.rb +588 -0
  26. data/lib/cuda/runtime/function.rb +161 -0
  27. data/lib/cuda/runtime/memory.rb +110 -0
  28. data/lib/cuda/runtime/rubycuda.rb +34 -0
  29. data/lib/cuda/runtime/stream.rb +126 -0
  30. data/lib/cuda/runtime/thread.rb +81 -0
  31. data/lib/cuda/runtime/version.rb +51 -0
  32. data/lib/ffi/prettystruct.rb +32 -0
  33. data/lib/helpers/flags.rb +82 -0
  34. data/lib/helpers/interface/ienum.rb +45 -0
  35. data/lib/helpers/klass.rb +45 -0
  36. data/lib/memory/buffer.rb +125 -0
  37. data/lib/memory/interface/ibuffer.rb +63 -0
  38. data/lib/memory/pointer.rb +72 -0
  39. data/lib/rubycu.rb +1 -0
  40. data/lib/rubycuda.rb +1 -0
  41. data/test/bad.ptx +0 -0
  42. data/test/memory/test_buffer.rb +93 -0
  43. data/test/rubycu/test_cucontext.rb +148 -0
  44. data/test/rubycu/test_cudevice.rb +69 -0
  45. data/test/rubycu/test_cudeviceptr.rb +43 -0
  46. data/test/rubycu/test_cuevent.rb +81 -0
  47. data/test/rubycu/test_cufunction.rb +165 -0
  48. data/test/rubycu/test_cumemory.rb +113 -0
  49. data/test/rubycu/test_cumodule.rb +114 -0
  50. data/test/rubycu/test_custream.rb +77 -0
  51. data/test/rubycu/test_cuversion.rb +39 -0
  52. data/test/rubycu/testbase.rb +107 -0
  53. data/test/rubycuda/test_cudadevice.rb +125 -0
  54. data/test/rubycuda/test_cudaerror.rb +48 -0
  55. data/test/rubycuda/test_cudaevent.rb +78 -0
  56. data/test/rubycuda/test_cudafunction.rb +106 -0
  57. data/test/rubycuda/test_cudamemory.rb +90 -0
  58. data/test/rubycuda/test_cudastream.rb +72 -0
  59. data/test/rubycuda/test_cudathread.rb +69 -0
  60. data/test/rubycuda/test_cudaversion.rb +41 -0
  61. data/test/rubycuda/testbase.rb +67 -0
  62. data/test/vadd.cu +21 -0
  63. data/version.rb +1 -0
  64. metadata +180 -0
@@ -0,0 +1,161 @@
1
+ #
2
+ # Copyright (c) 2010-2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/runtime/ffi-cuda'
26
+ require 'cuda/runtime/cuda'
27
+ require 'cuda/runtime/error'
28
+ require 'cuda/runtime/stream'
29
+ require 'memory/pointer'
30
+ require 'dl'
31
+
32
+
33
+ module SGC
34
+ module Cuda
35
+
36
+ class CudaFunction
37
+
38
+ attr_reader :name
39
+
40
+
41
+ # Create an instance to function _name_.
42
+ def initialize(name)
43
+ @name = name
44
+ end
45
+
46
+
47
+ # @return [CudaFunctionAttributes] The attributes of this kernel function.
48
+ def attributes
49
+ a = CudaFunctionAttributes.new
50
+ status = API::cudaFuncGetAttributes(a.to_ptr, @name)
51
+ Pvt::handle_error(status, "Failed to query function attributes.")
52
+ a
53
+ end
54
+
55
+
56
+ # Set the preferred cache configuration to use for next launch on this kernel function.
57
+ # @param [CudaFunctionCache] conf The preferred cache configuration.
58
+ def cache_config=(conf)
59
+ status = API::cudaFuncSetCacheConfig(@name, conf)
60
+ Pvt::handle_error(status, "Failed to set function cache config: config = #{conf}.")
61
+ end
62
+
63
+
64
+ # Launch this kernel function with pre-configured settings.
65
+ # @return [Class] This class.
66
+ #
67
+ # @see .configure
68
+ def launch
69
+ status = API::cudaLaunch(@name)
70
+ Pvt::handle_error(status, "Failed to launch kernel function: name = #{@name}.")
71
+ self
72
+ end
73
+
74
+
75
+ # Configure the settings for the next kernel launch.
76
+ # @param [Dim3] grid_dim The 3D grid dimensions x, y, z to launch.
77
+ # @param [Dim3] block_dim The 3D block dimensions x, y, z to launch.
78
+ # @param [Integer] shared_mem_size Number of bytes of dynamic shared memory for each thread block.
79
+ # @param [Integer, CudaStream] stream The stream to launch this kernel function on.
80
+ # Setting _stream_ to anything other than an instance of CudaStream will execute on the default stream 0.
81
+ # @return [Class] This class.
82
+ def self.configure(grid_dim, block_dim, shared_mem_size = 0, stream = 0)
83
+ s = Pvt::parse_stream(stream)
84
+ status = API::cudaConfigureCall(grid_dim, block_dim, shared_mem_size, s)
85
+ Pvt::handle_error(status, "Failed to configure kernel function launch settings.\n" +
86
+ "* #{grid_dim.x} x #{grid_dim.y} x #{grid_dim.z} grid\n" +
87
+ "* #{block_dim.x} x #{block_dim.y} x #{block_dim.z} blocks\n" +
88
+ "* shared memory size = #{shared_mem_size}")
89
+ self
90
+ end
91
+
92
+
93
+ # Set the argument list of subsequent kernel function launch.
94
+ # @param [Array] *args The list of arguments to pass to the kernel.
95
+ # @return [Class] This class.
96
+ def self.setup(*args)
97
+ offset = 0
98
+ args.each do |x|
99
+ case x
100
+ when Fixnum
101
+ p = FFI::MemoryPointer.new(:int).write_int(x)
102
+ size = 4
103
+ when Float
104
+ p = FFI::MemoryPointer.new(:float).write_float(x)
105
+ size = 4
106
+ when SGC::Memory::MemoryPointer
107
+ p = x.ref
108
+ size = FFI::MemoryPointer.size
109
+ else
110
+ raise TypeError, "Invalid type of kernel parameters #{x}."
111
+ end
112
+ offset = align_up(offset, size)
113
+ status = API::cudaSetupArgument(p, size, offset)
114
+ Pvt::handle_error(status, "Failed to setup kernel argument for #{x}.")
115
+ offset += size
116
+ end
117
+ self
118
+ end
119
+
120
+
121
+ # Load a dynamic library with _name_ from dynamic library path.
122
+ # @param [String] name The name of the dynamic library to load.
123
+ # For library libcudart.so, its name is cudart.
124
+ # @return [Class] This class.
125
+ def self.load_lib(name)
126
+ raise NotImplementedError
127
+ end
128
+
129
+
130
+ # Load a dynamic library from the given path.
131
+ # @param [String] path The path of the dynamic library to load.
132
+ # @return [Class] This class.
133
+ def self.load_lib_file(path)
134
+ @@libs << DL::dlopen(path)
135
+ self
136
+ end
137
+
138
+
139
+ # Unload all the loaded dynamic libraries.
140
+ # @return [Class] This class.
141
+ def self.unload_all_libs
142
+ @@libs.each do |h|
143
+ h.close
144
+ end
145
+ @@libs = []
146
+ self
147
+ end
148
+
149
+
150
+ @@libs = [] # @private
151
+
152
+ private
153
+
154
+ def self.align_up(offset, alignment)
155
+ (offset + alignment - 1) & ~(alignment - 1)
156
+ end
157
+
158
+ end
159
+
160
+ end # module
161
+ end # module
@@ -0,0 +1,110 @@
1
+ #
2
+ # Copyright (c) 2010-2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/runtime/ffi-cuda'
26
+ require 'cuda/runtime/error'
27
+ require 'memory/pointer'
28
+
29
+
30
+ module SGC
31
+ module Cuda
32
+
33
+ class CudaDeviceMemory
34
+
35
+ # Allocate memory on the device.
36
+ # @param [Integer] nbytes The number of bytes of memory to allocate.
37
+ # @return [*SGC::Memory::MemoryPointer] A memory pointer to the allocated device memory.
38
+ #
39
+ # @note The returned memory pointer is enabled to call _free_ method on itself.
40
+ def self.malloc(nbytes)
41
+ p = SGC::Memory::MemoryPointer.new
42
+ status = API::cudaMalloc(p.ref, nbytes)
43
+ Pvt::handle_error(status, "Failed to allocate memory on the device: nbytes = #{nbytes}")
44
+ p.instance_eval %{
45
+ def free
46
+ CudaDeviceMemory.free(self)
47
+ end
48
+ }
49
+ p
50
+ end
51
+
52
+
53
+ # Free the device memory at _devptr_.
54
+ # @param [*SGC::Memory::MemoryPointer] devptr The memory pointer pointing to the device memory to be freed.
55
+ def self.free(devptr)
56
+ status = API::cudaFree(devptr.ptr)
57
+ Pvt::handle_error(status, "Failed to free the device memory.")
58
+ devptr.ptr = 0
59
+ nil
60
+ end
61
+
62
+ end
63
+
64
+
65
+ module CudaMemory
66
+
67
+ # Copy _nbytes_ from the memory at _src_ptr_ to the memory at _dst_ptr_.
68
+ # @param [#ptr] dst_ptr Destination of the memory copy.
69
+ # @param [#ptr] src_ptr Source of the memory copy.
70
+ # @param [Integer] nbytes The number of bytes to copy.
71
+ # @param [Symbol] memcpy_kind The direction of the memory copy specified with one of the following:
72
+ # * :HOST_TO_HOST
73
+ # * :HOST_TO_DEVICE
74
+ # * :DEVICE_TO_HOST
75
+ # * :DEVICE_TO_DEVICE
76
+ def memcpy(dst_ptr, src_ptr, nbytes, memcpy_kind)
77
+ status = API::cudaMemcpy(dst_ptr.ptr, src_ptr.ptr, nbytes, memcpy_kind)
78
+ Pvt::handle_error(status, "Failed to copy memory.")
79
+ end
80
+ module_function :memcpy
81
+
82
+
83
+ # Copy _nbytes_ from the host memory at _src_ptr_ to the host memory at _dst_ptr_.
84
+ def memcpy_htoh(dst_ptr, src_ptr, nbytes)
85
+ memcpy(dst_ptr, src_ptr, nbytes, :HOST_TO_HOST)
86
+ end
87
+ module_function :memcpy_htoh
88
+
89
+ # Copy _nbytes_ from the host memory at _src_ptr_ to the device memory at _dst_ptr_.
90
+ def memcpy_htod(dst_ptr, src_ptr, nbytes)
91
+ memcpy(dst_ptr, src_ptr, nbytes, :HOST_TO_DEVICE)
92
+ end
93
+ module_function :memcpy_htod
94
+
95
+ # Copy _nbytes_ from the device memory at _src_ptr_ to the host memory at _dst_ptr_.
96
+ def memcpy_dtoh(dst_ptr, src_ptr, nbytes)
97
+ memcpy(dst_ptr, src_ptr, nbytes, :DEVICE_TO_HOST)
98
+ end
99
+ module_function :memcpy_dtoh
100
+
101
+ # Copy _nbytes_ from the device memory at _src_ptr_ to the device memory at _dst_ptr_.
102
+ def memcpy_dtod(dst_ptr, src_ptr, nbytes)
103
+ memcpy(dst_ptr, src_ptr, nbytes, :DEVICE_TO_DEVICE)
104
+ end
105
+ module_function :memcpy_dtod
106
+
107
+ end
108
+
109
+ end # module
110
+ end # module
@@ -0,0 +1,34 @@
1
+ #
2
+ # Copyright (c) 2010-2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/runtime/ffi-cuda'
26
+ require 'cuda/runtime/cuda'
27
+ require 'cuda/runtime/error'
28
+ require 'cuda/runtime/version'
29
+ require 'cuda/runtime/device'
30
+ require 'cuda/runtime/thread'
31
+ require 'cuda/runtime/memory'
32
+ require 'cuda/runtime/function'
33
+ require 'cuda/runtime/stream'
34
+ require 'cuda/runtime/event'
@@ -0,0 +1,126 @@
1
+ #
2
+ # Copyright (c) 2010-2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/runtime/ffi-cuda'
26
+ require 'cuda/runtime/error'
27
+
28
+
29
+ module SGC
30
+ module Cuda
31
+
32
+ class CudaStream
33
+
34
+ # Create and return a CUDA stream.
35
+ # @return [CudaStream] A CUDA stream.
36
+ def self.create
37
+ p = FFI::MemoryPointer.new(:CudaStream)
38
+ status = API::cudaStreamCreate(p)
39
+ Pvt::handle_error(status, "Failed to create CUDA stream.")
40
+ new(p)
41
+ end
42
+
43
+
44
+ # Destroy this CUDA stream.
45
+ def destroy
46
+ status = API::cudaStreamDestroy(self.to_api)
47
+ Pvt::handle_error(status, "Failed to destroy this CUDA stream.")
48
+ API::write_cudastream(@pstream, 0)
49
+ nil
50
+ end
51
+
52
+
53
+ # @return [Boolean] Return true if all operations in this CUDA stream have completed. Otherwise, return false.
54
+ def query
55
+ status = API::cudaStreamQuery(self.to_api)
56
+ if status == Pvt::CUDA_SUCCESS
57
+ return true
58
+ elsif status == Pvt::CUDA_ERROR_NOT_READY
59
+ return false
60
+ end
61
+ Pvt::handle_error(status, "Failed to query stream.")
62
+ raise CudaStandardError, "Error handling fails to catch this error."
63
+ end
64
+
65
+
66
+ # Block the calling CPU thread until all operations in this CUDA stream complete.
67
+ # @return [CudaStream] This CUDA stream.
68
+ def synchronize
69
+ status = API::cudaStreamSynchronize(self.to_api)
70
+ Pvt::handle_error(status)
71
+ self
72
+ end
73
+
74
+
75
+ # Let all future operations submitted to this CUDA stream wait until _event_ complete before beginning execution.
76
+ # @overload wait_event(event)
77
+ # @overload wait_event(event, flags)
78
+ # @param [CudaEvent] event The event to wait for.
79
+ # @param [Integer] flags Currently _flags_ must be set to zero.
80
+ # @return [CudaStream] This CUDA stream.
81
+ def wait_event(event, flags = 0)
82
+ status = API::cudaStreamWaitEvent(self.to_api, event.to_api, flags)
83
+ Pvt::handle_error(status, "Failed to make this CUDA stream's future operations to wait event: flags = #{flags}.")
84
+ self
85
+ end
86
+
87
+
88
+ # Let all future operations submitted to any CUDA stream wait until _event_ complete before beginning execution.
89
+ # @overload wait_event(event)
90
+ # @overload wait_event(event, flags)
91
+ # @param (see CudaStream#wait_event)
92
+ def self.wait_event(event, flags = 0)
93
+ status = API::cudaStreamWaitEvent(nil, event.to_api, flags)
94
+ Pvt::handle_error(status, "Failed to make any CUDA stream's future operations to wait event: flags = #{flags}.")
95
+ nil
96
+ end
97
+
98
+
99
+ # @private
100
+ def initialize(ptr)
101
+ @pstream = ptr
102
+ end
103
+ private_class_method :new
104
+
105
+
106
+ # @private
107
+ def to_api
108
+ API::read_cudastream(@pstream)
109
+ end
110
+
111
+ end
112
+
113
+ # @private
114
+ module Pvt
115
+
116
+ def self.parse_stream(stream)
117
+ if stream.kind_of?(CudaStream)
118
+ return stream.to_api
119
+ end
120
+ nil
121
+ end
122
+
123
+ end
124
+
125
+ end # module
126
+ end # module
@@ -0,0 +1,81 @@
1
+ #
2
+ # Copyright (c) 2010-2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/runtime/ffi-cuda'
26
+ require 'cuda/runtime/cuda'
27
+ require 'cuda/runtime/error'
28
+
29
+
30
+ module SGC
31
+ module Cuda
32
+
33
+ # @deprecated
34
+ class CudaThread
35
+
36
+ def self.exit
37
+ status = API::cudaThreadExit
38
+ Pvt::handle_error(status)
39
+ self
40
+ end
41
+
42
+
43
+ def self.cache_config
44
+ p = FFI::MemoryPointer.new(:int)
45
+ status = API::cudaThreadGetCacheConfig(p)
46
+ Pvt::handle_error(status)
47
+ CudaFunctionCache[p.read_int]
48
+ end
49
+
50
+
51
+ def self.cache_config=(config)
52
+ status = API::cudaThreadSetCacheConfig(config)
53
+ Pvt::handle_error(status)
54
+ end
55
+
56
+
57
+ def self.limit(limit)
58
+ p = FFI::MemoryPointer.new(:size_t)
59
+ status = API::cudaThreadGetLimit(p, limit)
60
+ Pvt::handle_error(status)
61
+ p.read_long
62
+ end
63
+
64
+
65
+ def self.limit=(*limit_value_pair)
66
+ limit, value = limit_value_pair.flatten
67
+ status = API::cudaThreadSetLimit(limit, value)
68
+ Pvt::handle_error(status)
69
+ end
70
+
71
+
72
+ def self.synchronize
73
+ status = API::cudaThreadSynchronize
74
+ Pvt::handle_error(status)
75
+ self
76
+ end
77
+
78
+ end
79
+
80
+ end # module
81
+ end # module