sgc-ruby-cuda 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.yardopts +2 -0
  2. data/COPYING +674 -0
  3. data/README.rdoc +106 -0
  4. data/Rakefile +76 -0
  5. data/doc/devel.rdoc +77 -0
  6. data/doc/features.rdoc +55 -0
  7. data/lib/cuda/driver/context.rb +236 -0
  8. data/lib/cuda/driver/cu.rb +60 -0
  9. data/lib/cuda/driver/device.rb +155 -0
  10. data/lib/cuda/driver/deviceptr.rb +69 -0
  11. data/lib/cuda/driver/error.rb +182 -0
  12. data/lib/cuda/driver/event.rb +124 -0
  13. data/lib/cuda/driver/ffi-cu.rb +620 -0
  14. data/lib/cuda/driver/function.rb +293 -0
  15. data/lib/cuda/driver/init.rb +45 -0
  16. data/lib/cuda/driver/memory.rb +134 -0
  17. data/lib/cuda/driver/module.rb +142 -0
  18. data/lib/cuda/driver/rubycu.rb +37 -0
  19. data/lib/cuda/driver/stream.rb +128 -0
  20. data/lib/cuda/driver/version.rb +42 -0
  21. data/lib/cuda/runtime/cuda.rb +65 -0
  22. data/lib/cuda/runtime/device.rb +175 -0
  23. data/lib/cuda/runtime/error.rb +197 -0
  24. data/lib/cuda/runtime/event.rb +117 -0
  25. data/lib/cuda/runtime/ffi-cuda.rb +588 -0
  26. data/lib/cuda/runtime/function.rb +161 -0
  27. data/lib/cuda/runtime/memory.rb +110 -0
  28. data/lib/cuda/runtime/rubycuda.rb +34 -0
  29. data/lib/cuda/runtime/stream.rb +126 -0
  30. data/lib/cuda/runtime/thread.rb +81 -0
  31. data/lib/cuda/runtime/version.rb +51 -0
  32. data/lib/ffi/prettystruct.rb +32 -0
  33. data/lib/helpers/flags.rb +82 -0
  34. data/lib/helpers/interface/ienum.rb +45 -0
  35. data/lib/helpers/klass.rb +45 -0
  36. data/lib/memory/buffer.rb +125 -0
  37. data/lib/memory/interface/ibuffer.rb +63 -0
  38. data/lib/memory/pointer.rb +72 -0
  39. data/lib/rubycu.rb +1 -0
  40. data/lib/rubycuda.rb +1 -0
  41. data/test/bad.ptx +0 -0
  42. data/test/memory/test_buffer.rb +93 -0
  43. data/test/rubycu/test_cucontext.rb +148 -0
  44. data/test/rubycu/test_cudevice.rb +69 -0
  45. data/test/rubycu/test_cudeviceptr.rb +43 -0
  46. data/test/rubycu/test_cuevent.rb +81 -0
  47. data/test/rubycu/test_cufunction.rb +165 -0
  48. data/test/rubycu/test_cumemory.rb +113 -0
  49. data/test/rubycu/test_cumodule.rb +114 -0
  50. data/test/rubycu/test_custream.rb +77 -0
  51. data/test/rubycu/test_cuversion.rb +39 -0
  52. data/test/rubycu/testbase.rb +107 -0
  53. data/test/rubycuda/test_cudadevice.rb +125 -0
  54. data/test/rubycuda/test_cudaerror.rb +48 -0
  55. data/test/rubycuda/test_cudaevent.rb +78 -0
  56. data/test/rubycuda/test_cudafunction.rb +106 -0
  57. data/test/rubycuda/test_cudamemory.rb +90 -0
  58. data/test/rubycuda/test_cudastream.rb +72 -0
  59. data/test/rubycuda/test_cudathread.rb +69 -0
  60. data/test/rubycuda/test_cudaversion.rb +41 -0
  61. data/test/rubycuda/testbase.rb +67 -0
  62. data/test/vadd.cu +21 -0
  63. data/version.rb +1 -0
  64. metadata +180 -0
@@ -0,0 +1,124 @@
1
+ #
2
+ # Copyright (c) 2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/driver/ffi-cu'
26
+ require 'cuda/driver/cu'
27
+ require 'cuda/driver/error'
28
+ require 'cuda/driver/stream'
29
+ require 'helpers/flags'
30
+
31
+
32
+ module SGC
33
+ module CU
34
+
35
+ class CUEvent
36
+
37
+ # Create and return an event with _flags_ (CUEventFlags).
38
+ # @overload create
39
+ # @overload create(flags)
40
+ # @return [CUEvent] An event created with _flags_.
41
+ #
42
+ # @example Create events with flags.
43
+ # CUEvent.create #=> event
44
+ # CUEvent.create(:DEFAULT) #=> event
45
+ # CUEvent.create(:BLOCKING_SYNC) #=> event
46
+ def self.create(*flags)
47
+ flags.empty? == false or flags = :DEFAULT
48
+ p = FFI::MemoryPointer.new(:CUEvent)
49
+ f = CUEventFlags.value(flags)
50
+ status = API::cuEventCreate(p, f)
51
+ Pvt::handle_error(status, "Failed to create event: flags = #{flags}.")
52
+ new(p)
53
+ end
54
+
55
+
56
+ # Destroy this event.
57
+ def destroy
58
+ status = API::cuEventDestroy(self.to_api)
59
+ Pvt::handle_error(status, "Failed to destroy event.")
60
+ nil
61
+ end
62
+
63
+
64
+ # @return [Boolean] Return true if this event has been recorded. Otherwise, return false.
65
+ def query
66
+ status = API::cuEventQuery(self.to_api)
67
+ if status == Pvt::CUDA_SUCCESS
68
+ return true
69
+ elsif status == Pvt::CUDA_ERROR_NOT_READY
70
+ return false
71
+ end
72
+ Pvt::handle_error(status, "Failed to query event.")
73
+ raise CUStandardError, "Error handling fails to catch this error."
74
+ end
75
+
76
+
77
+ # Record this event asynchronously on _stream_.
78
+ # @param [Integer, CUStream] stream The CUDA stream to record this event on.
79
+ # Setting _stream_ to anything other than an instance of CUStream will record on any stream.
80
+ # @return [CUEvent] This event.
81
+ def record(stream = 0)
82
+ s = Pvt::parse_stream(stream)
83
+ status = API::cuEventRecord(self.to_api, s)
84
+ Pvt::handle_error(status, "Failed to record event.")
85
+ self
86
+ end
87
+
88
+
89
+ # Block the calling CPU thread until this event has been recorded.
90
+ # @return [CUEvent] This event.
91
+ def synchronize
92
+ status = API::cuEventSynchronize(self.to_api)
93
+ Pvt::handle_error(status, "Failed to synchronize event.")
94
+ self
95
+ end
96
+
97
+
98
+ # Compute the elapsed time (ms) from _event_start_ (CUEvent) to _event_end_ (CUEvent).
99
+ # @param [CUEvent] event_start The event corresponds to the start time.
100
+ # @param [CUEvent] event_end The event corresponds to the end time.
101
+ # @return [Numeric] The elapsed time in ms.
102
+ def self.elapsed_time(event_start, event_end)
103
+ t = FFI::MemoryPointer.new(:float)
104
+ API::cuEventElapsedTime(t, event_start.to_api, event_end.to_api)
105
+ t.read_float
106
+ end
107
+
108
+
109
+ # @private
110
+ def initialize(ptr)
111
+ @pevent = ptr
112
+ end
113
+ private_class_method :new
114
+
115
+
116
+ # @private
117
+ def to_api
118
+ API::read_cuevent(@pevent)
119
+ end
120
+
121
+ end
122
+
123
+ end # module
124
+ end # module
@@ -0,0 +1,620 @@
1
+ #
2
+ # Copyright (c) 2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'ffi'
26
+ require 'ffi/prettystruct'
27
+ require 'helpers/interface/ienum'
28
+ require 'helpers/flags'
29
+ require 'helpers/klass'
30
+
31
+
32
+ module SGC
33
+ module CU
34
+ module API
35
+
36
+ extend FFI::Library
37
+ ffi_lib "cuda"
38
+
39
+ class Enum
40
+ extend SGC::Helper::IEnum
41
+ extend SGC::Helper::FlagsValue
42
+
43
+ def self.inherited(subclass)
44
+ subclass.instance_eval %{
45
+ def symbols
46
+ SGC::CU::API::#{SGC::Helper.classname(subclass)}.symbols
47
+ end
48
+
49
+ def [](*args)
50
+ SGC::CU::API::#{SGC::Helper.classname(subclass)}[*args]
51
+ end
52
+ }
53
+ end
54
+ end
55
+
56
+ CUResult = enum(
57
+ :SUCCESS, 0,
58
+ :ERROR_INVALID_VALUE, 1,
59
+ :ERROR_OUT_OF_MEMORY, 2,
60
+ :ERROR_NOT_INITIALIZED, 3,
61
+ :ERROR_DEINITIALIZED, 4,
62
+ :ERROR_PROFILER_DISABLED, 5,
63
+ :ERROR_PROFILER_NOT_INITIALIZED, 6,
64
+ :ERROR_PROFILER_ALREADY_STARTED, 7,
65
+ :ERROR_PROFILER_ALREADY_STOPPED, 8,
66
+ :ERROR_NO_DEVICE, 100,
67
+ :ERROR_INVALID_DEVICE, 101,
68
+ :ERROR_INVALID_IMAGE, 200,
69
+ :ERROR_INVALID_CONTEXT, 201,
70
+ :ERROR_CONTEXT_ALREADY_CURRENT, 202, # Deprecated.
71
+ :ERROR_MAP_FAILED, 205,
72
+ :ERROR_UNMAP_FAILED, 206,
73
+ :ERROR_ARRAY_IS_MAPPED, 207,
74
+ :ERROR_ALREADY_MAPPED, 208,
75
+ :ERROR_NO_BINARY_FOR_GPU, 209,
76
+ :ERROR_ALREADY_ACQUIRED, 210,
77
+ :ERROR_NOT_MAPPED, 211,
78
+ :ERROR_NOT_MAPPED_AS_ARRAY, 212,
79
+ :ERROR_NOT_MAPPED_AS_POINTER, 213,
80
+ :ERROR_ECC_UNCORRECTABLE, 214,
81
+ :ERROR_UNSUPPORTED_LIMIT, 215,
82
+ :ERROR_CONTEXT_ALREADY_IN_USE, 216,
83
+ :ERROR_INVALID_SOURCE, 300,
84
+ :ERROR_FILE_NOT_FOUND, 301,
85
+ :ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, 302,
86
+ :ERROR_SHARED_OBJECT_INIT_FAILED, 303,
87
+ :ERROR_OPERATING_SYSTEM, 304,
88
+ :ERROR_INVALID_HANDLE, 400,
89
+ :ERROR_NOT_FOUND, 500,
90
+ :ERROR_NOT_READY, 600,
91
+ :ERROR_LAUNCH_FAILED, 700,
92
+ :ERROR_LAUNCH_OUT_OF_RESOURCES, 701,
93
+ :ERROR_LAUNCH_TIMEOUT, 702,
94
+ :ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, 703,
95
+ :ERROR_PEER_ACCESS_ALREADY_ENABLED, 704,
96
+ :ERROR_PEER_ACCESS_NOT_ENABLED, 705,
97
+ :ERROR_PRIMARY_CONTEXT_ACTIVE, 708,
98
+ :ERROR_CONTEXT_IS_DESTROYED, 709,
99
+ :ERROR_UNKNOWN, 999,
100
+ )
101
+
102
+ CUComputeMode = enum(
103
+ :DEFAULT, 0,
104
+ :EXCLUSIVE, 1,
105
+ :PROHIBITED, 2,
106
+ :EXCLUSIVE_PROCESS, 3,
107
+ )
108
+
109
+ CUDeviceAttribute = enum(
110
+ :MAX_THREADS_PER_BLOCK, 1,
111
+ :MAX_BLOCK_DIM_X, 2,
112
+ :MAX_BLOCK_DIM_Y, 3,
113
+ :MAX_BLOCK_DIM_Z, 4,
114
+ :MAX_GRID_DIM_X, 5,
115
+ :MAX_GRID_DIM_Y, 6,
116
+ :MAX_GRID_DIM_Z, 7,
117
+ :MAX_SHARED_MEMORY_PER_BLOCK, 8,
118
+ :SHARED_MEMORY_PER_BLOCK, 8, # Deprecated. Use :MAX_SHARED_MEMORY_PER_BLOCK.
119
+ :TOTAL_CONSTANT_MEMORY, 9,
120
+ :WARP_SIZE, 10,
121
+ :MAX_PITCH, 11,
122
+ :MAX_REGISTERS_PER_BLOCK, 12,
123
+ :REGISTERS_PER_BLOCK, 12, # Deprecated. Use :MAX_REGISTERS_PER_BLOCK.
124
+ :CLOCK_RATE, 13,
125
+ :TEXTURE_ALIGNMENT, 14,
126
+ :GPU_OVERLAP, 15, # Deprecated. Use :ASYNC_ENGINE_COUNT.
127
+ :MULTIPROCESSOR_COUNT, 16,
128
+ :KERNEL_EXEC_TIMEOUT, 17,
129
+ :INTEGRATED, 18,
130
+ :CAN_MAP_HOST_MEMORY, 19,
131
+ :COMPUTE_MODE, 20,
132
+ :MAXIMUM_TEXTURE1D_WIDTH, 21,
133
+ :MAXIMUM_TEXTURE2D_WIDTH, 22,
134
+ :MAXIMUM_TEXTURE2D_HEIGHT, 23,
135
+ :MAXIMUM_TEXTURE3D_WIDTH, 24,
136
+ :MAXIMUM_TEXTURE3D_HEIGHT, 25,
137
+ :MAXIMUM_TEXTURE3D_DEPTH, 26,
138
+ :MAXIMUM_TEXTURE2D_LAYERED_WIDTH, 27,
139
+ :MAXIMUM_TEXTURE2D_LAYERED_HEIGHT, 28,
140
+ :MAXIMUM_TEXTURE2D_LAYERED_LAYERS, 29,
141
+ :MAXIMUM_TEXTURE2D_ARRAY_WIDTH, 27, # Deprecated. Use :MAXIMUM_TEXTURE2D_LAYERED_WIDTH.
142
+ :MAXIMUM_TEXTURE2D_ARRAY_HEIGHT, 28, # Deprecated. Use :MAXINUM_TEXTURE2D_LAYERED_HEIGHT.
143
+ :MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES, 29, # Deprecated. Use :MAXIMUM_TEXTURE2D_LAYERED_LAYERS.
144
+ :SURFACE_ALIGNMENT, 30,
145
+ :CONCURRENT_KERNELS, 31,
146
+ :ECC_ENABLED, 32,
147
+ :PCI_BUS_ID, 33,
148
+ :PCI_DEVICE_ID, 34,
149
+ :TCC_DRIVER, 35,
150
+ :MEMORY_CLOCK_RATE, 36,
151
+ :GLOBAL_MEMORY_BUS_WIDTH, 37,
152
+ :L2_CACHE_SIZE, 38,
153
+ :MAX_THREADS_PER_MULTIPROCESSOR, 39,
154
+ :ASYNC_ENGINE_COUNT, 40,
155
+ :UNIFIED_ADDRESSING, 41,
156
+ :MAXIMUM_TEXTURE1D_LAYERED_WIDTH, 42,
157
+ :MAXINUM_TEXTURE1D_LAYERED_LAYERS, 43,
158
+ )
159
+
160
+ CUContextFlags = enum(
161
+ :SCHED_AUTO, 0x00,
162
+ :SCHED_SPIN, 0x01,
163
+ :SCHED_YIELD, 0x02,
164
+ :SCHED_BLOCKING_SYNC, 0x04,
165
+ :BLOCKING_SYNC, 0x04, # Deprecated. Use :SCHED_BLOCKING_SYNC.
166
+ :MAP_HOST, 0x08,
167
+ :LMEM_RESIZE_TO_MAX, 0x10,
168
+ )
169
+
170
+ CULimit = enum(
171
+ :STACK_SIZE, 0x00,
172
+ :PRINTF_FIFO_SIZE, 0x01,
173
+ :MALLOC_HEAP_SIZE, 0x02,
174
+ )
175
+
176
+ CUFunctionAttribute = enum(
177
+ :MAX_THREADS_PER_BLOCK, 0,
178
+ :SHARED_SIZE_BYTES, 1,
179
+ :CONST_SIZE_BYTES, 2,
180
+ :LOCAL_SIZE_BYTES, 3,
181
+ :NUM_REGS, 4,
182
+ :PTX_VERSION, 5,
183
+ :BINARY_VERSION, 6,
184
+ )
185
+
186
+ CUFunctionCache = enum(
187
+ :PREFER_NONE, 0x00,
188
+ :PREFER_SHARED, 0x01,
189
+ :PREFER_L1, 0x02,
190
+ )
191
+
192
+ CUEventFlags = enum(
193
+ :DEFAULT, 0,
194
+ :BLOCKING_SYNC, 1,
195
+ :DISABLE_TIMING, 2,
196
+ )
197
+
198
+ CUAddressMode = enum(
199
+ :WRAP, 0,
200
+ :CLAMP, 1,
201
+ :MIRROR, 2,
202
+ :BORDER, 3,
203
+ )
204
+
205
+ CUFilterMode = enum(
206
+ :POINT, 0,
207
+ :LINEAR, 1,
208
+ )
209
+
210
+ CUTexRefFlags = enum(
211
+ :READ_AS_INTEGER, 0x01,
212
+ :NORMALIZED_COORDINATES, 0x02,
213
+ :SRGB, 0x10,
214
+ )
215
+
216
+ CUArrayFormat = enum(
217
+ :UNSIGNED_INT8, 0x01,
218
+ :UNSIGNED_INT16, 0x02,
219
+ :UNSIGNED_INT32, 0x03,
220
+ :SIGNED_INT8, 0x08,
221
+ :SIGNED_INT16, 0x09,
222
+ :SIGNED_INT32, 0x0a,
223
+ :HALF, 0x10,
224
+ :FLOAT, 0x20,
225
+ )
226
+
227
+ CUMemoryType = enum(
228
+ :HOST, 0x01,
229
+ :DEVICE, 0x02,
230
+ :ARRAY, 0x03,
231
+ :UNIFIED, 0x04,
232
+ )
233
+
234
+ CUPointerAttribute = enum(
235
+ :CONTEXT, 1,
236
+ :MEMORY_TYPE, 2,
237
+ :DEVICE_POINTER, 3,
238
+ :HOST_POINTER, 4,
239
+ )
240
+
241
+ CUJitOption = enum(
242
+ :MAX_REGISTERS, 0,
243
+ :THREADS_PER_BLOCK,
244
+ :WALL_TIME,
245
+ :INFO_LOG_BUFFER,
246
+ :INFO_LOG_BUFFER_SIZE_BYTES,
247
+ :ERROR_LOG_BUFFER,
248
+ :ERROR_LOG_BUFFER_SIZE_BYTES,
249
+ :OPTIMIZATION_LEVEL,
250
+ :TARGET_FROM_CUCONTEXT,
251
+ :TARGET,
252
+ :FALLBACK_STRATEGY,
253
+ )
254
+
255
+ CUJitFallBack = enum(
256
+ :PREFER_PTX, 0,
257
+ :PREFER_BINARY,
258
+ )
259
+
260
+ CUJitTarget = enum(
261
+ :COMPUTE_10, 0,
262
+ :COMPUTE_11,
263
+ :COMPUTE_12,
264
+ :COMPUTE_13,
265
+ :COMPUTE_20,
266
+ :COMPUTE_21,
267
+ )
268
+
269
+ FFI::typedef :int, :enum
270
+ FFI::typedef :int, :CUDevice
271
+ FFI::typedef :pointer, :CUDevicePtr
272
+ FFI::typedef :pointer, :CUContext
273
+ FFI::typedef :pointer, :CUModule
274
+ FFI::typedef :pointer, :CUFunction
275
+ FFI::typedef :pointer, :CUArray
276
+ FFI::typedef :pointer, :CUTexRef
277
+ FFI::typedef :pointer, :CUSurfRef
278
+ FFI::typedef :pointer, :CUEvent
279
+ FFI::typedef :pointer, :CUStream
280
+
281
+ def read_int(ptr); ptr.read_int; end
282
+ def read_long(ptr); ptr.read_long; end
283
+ def read_pointer(ptr); ptr.read_pointer; end
284
+
285
+ def write_int(ptr); ptr.write_int; end
286
+ def write_long(ptr); ptr.write_long; end
287
+ def write_pointer(ptr, value); ptr.write_pointer(value); end
288
+
289
+ alias read_size_t read_long
290
+ alias read_enum read_int
291
+ alias read_cudevice read_int
292
+ alias read_cudeviceptr read_pointer
293
+ alias read_cucontext read_pointer
294
+ alias read_cumodule read_pointer
295
+ alias read_cufunction read_pointer
296
+ alias read_cuarray read_pointer
297
+ alias read_cutexref read_pointer
298
+ alias read_cusurfref read_pointer
299
+ alias read_cuevent read_pointer
300
+ alias read_custream read_pointer
301
+
302
+ alias write_size_t write_long
303
+ alias write_enum write_int
304
+ alias write_cudevice write_int
305
+ alias write_cudeviceptr write_pointer
306
+ alias write_cucontext write_pointer
307
+ alias write_cumodule write_pointer
308
+ alias write_cufunction write_pointer
309
+ alias write_cuarray write_pointer
310
+ alias write_cutexref write_pointer
311
+ alias write_cusurfref write_pointer
312
+ alias write_cuevent write_pointer
313
+ alias write_custream write_pointer
314
+
315
+ module_function :read_size_t
316
+ module_function :read_enum
317
+ module_function :read_cudevice
318
+ module_function :read_cudeviceptr
319
+ module_function :read_cucontext
320
+ module_function :read_cumodule
321
+ module_function :read_cufunction
322
+ module_function :read_cuarray
323
+ module_function :read_cutexref
324
+ module_function :read_cusurfref
325
+ module_function :read_cuevent
326
+ module_function :read_custream
327
+
328
+ module_function :write_size_t
329
+ module_function :write_enum
330
+ module_function :write_cudevice
331
+ module_function :write_cudeviceptr
332
+ module_function :write_cucontext
333
+ module_function :write_cumodule
334
+ module_function :write_cufunction
335
+ module_function :write_cuarray
336
+ module_function :write_cutexref
337
+ module_function :write_cusurfref
338
+ module_function :write_cuevent
339
+ module_function :write_custream
340
+
341
+
342
+ class CUDevProp < FFI::PrettyStruct
343
+ layout(
344
+ :maxThreadsPerBlock, :int,
345
+ :maxThreadsDim, [:int, 3],
346
+ :maxGridSize, [:int, 3],
347
+ :sharedMemPerBlock, :int,
348
+ :totalConstantMemory, :int,
349
+ :SIMDWidth, :int,
350
+ :memPitch, :int,
351
+ :regsPerBlock, :int,
352
+ :clockRate, :int,
353
+ :textureAlign, :int,
354
+ )
355
+ end
356
+
357
+ class CudaMemcpy2D < FFI::PrettyStruct
358
+ layout(
359
+ :srcXInBytes, :size_t,
360
+ :srcY, :size_t,
361
+ :srcMemoryType, CUMemoryType,
362
+ :srcHost, :pointer,
363
+ :srcDevice, :CUDevicePtr,
364
+ :srcArray, :CUArray,
365
+ :srcPitch, :size_t,
366
+ :dstXInBytes, :size_t,
367
+ :dstY, :size_t,
368
+ :dstMemoryType, CUMemoryType,
369
+ :dstHost, :pointer,
370
+ :dstDevice, :CUDevicePtr,
371
+ :dstArray, :CUArray,
372
+ :dstPitch, :size_t,
373
+ :WidthInBytes, :size_t,
374
+ :Height, :size_t,
375
+ )
376
+ end
377
+
378
+ class CudaMemcpy3D < FFI::PrettyStruct
379
+ layout(
380
+ :srcXInBytes, :size_t,
381
+ :srcY, :size_t,
382
+ :srcZ, :size_t,
383
+ :srcLOD, :size_t,
384
+ :srcMemoryType, CUMemoryType,
385
+ :srcHost, :pointer,
386
+ :srcDevice, :CUDevicePtr,
387
+ :srcArray, :CUArray,
388
+ :reserved0, :pointer,
389
+ :srcPitch, :size_t,
390
+ :srcHeight, :size_t,
391
+ :dstXInBytes, :size_t,
392
+ :dstY, :size_t,
393
+ :dstZ, :size_t,
394
+ :dstLOD, :size_t,
395
+ :dstMemoryType, CUMemoryType,
396
+ :dstHost, :pointer,
397
+ :dstDevice, :CUDevicePtr,
398
+ :dstArray, :CUArray,
399
+ :reserved1, :pointer,
400
+ :dstPitch, :size_t,
401
+ :dstHeight, :size_t,
402
+ :WidthInBytes, :size_t,
403
+ :Height, :size_t,
404
+ :Depth, :size_t,
405
+ )
406
+ end
407
+
408
+ class CudaMemcpy3DPeer < FFI::PrettyStruct
409
+ layout(
410
+ :srcXInBytes, :size_t,
411
+ :srcY, :size_t,
412
+ :srcZ, :size_t,
413
+ :srcLOD, :size_t,
414
+ :srcMemoryType, CUMemoryType,
415
+ :srcHost, :pointer,
416
+ :srcDevice, :CUDevicePtr,
417
+ :srcArray, :CUArray,
418
+ :srcContext, :CUContext,
419
+ :srcPitch, :size_t,
420
+ :srcHeight, :size_t,
421
+ :dstXInBytes, :size_t,
422
+ :dstY, :size_t,
423
+ :dstZ, :size_t,
424
+ :dstLOD, :size_t,
425
+ :dstMemoryType, CUMemoryType,
426
+ :dstHost, :pointer,
427
+ :dstDevice, :CUDevicePtr,
428
+ :dstArray, :CUArray,
429
+ :dstContext, :CUContext,
430
+ :dstPitch, :size_t,
431
+ :dstHeight, :size_t,
432
+ :WidthInBytes, :size_t,
433
+ :Height, :size_t,
434
+ :Depth, :size_t,
435
+ )
436
+ end
437
+
438
+ class CudaArrayDescriptor < FFI::PrettyStruct
439
+ layout(
440
+ :Width, :size_t,
441
+ :Height, :size_t,
442
+ :Format, CUArrayFormat,
443
+ :NumChannels, :uint,
444
+ )
445
+ end
446
+
447
+ class CudaArray3DDescriptor < FFI::PrettyStruct
448
+ layout(
449
+ :Width, :size_t,
450
+ :Height, :size_t,
451
+ :Depth, :size_t,
452
+ :Format, CUArrayFormat,
453
+ :NumChannels, :uint,
454
+ :Flags, :uint,
455
+ )
456
+ end
457
+
458
+ # Initialization.
459
+ attach_function :cuInit, [:uint], :enum
460
+
461
+ # CU Version Management.
462
+ attach_function :cuDriverGetVersion, [:pointer], :enum
463
+
464
+ # CU Device Management.
465
+ attach_function :cuDeviceComputeCapability, [:pointer, :pointer, :CUDevice], :enum
466
+ attach_function :cuDeviceGet, [:pointer, :int], :enum
467
+ attach_function :cuDeviceGetAttribute, [:pointer, CUDeviceAttribute, :CUDevice], :enum
468
+ attach_function :cuDeviceGetCount, [:pointer], :enum
469
+ attach_function :cuDeviceGetName, [:pointer, :int, :CUDevice], :enum
470
+ attach_function :cuDeviceGetProperties, [:pointer, :CUDevice], :enum
471
+ attach_function :cuDeviceTotalMem, [:pointer, :CUDevice], :enum
472
+
473
+ # CU Context Management.
474
+ attach_function :cuCtxCreate, [:pointer, :uint, :CUDevice], :enum
475
+ attach_function :cuCtxDestroy, [:CUContext], :enum
476
+ attach_function :cuCtxGetApiVersion, [:CUContext, :pointer], :enum
477
+ attach_function :cuCtxGetCacheConfig, [:pointer], :enum
478
+ attach_function :cuCtxGetCurrent, [:pointer], :enum
479
+ attach_function :cuCtxGetDevice, [:pointer], :enum
480
+ attach_function :cuCtxGetLimit, [:pointer, CULimit], :enum
481
+ attach_function :cuCtxPopCurrent, [:pointer], :enum
482
+ attach_function :cuCtxPushCurrent, [:CUContext], :enum
483
+ attach_function :cuCtxSetCacheConfig, [CUFunctionCache], :enum
484
+ attach_function :cuCtxSetCurrent, [:CUContext], :enum
485
+ attach_function :cuCtxSetLimit, [CULimit, :size_t], :enum
486
+ attach_function :cuCtxSynchronize, [], :enum
487
+ # Deprecated.
488
+ attach_function :cuCtxAttach, [:pointer, :uint], :enum
489
+ attach_function :cuCtxDetach, [:CUContext], :enum
490
+
491
+ # CU Memory Management.
492
+ attach_function :cuArray3DCreate, [:pointer, :pointer], :enum
493
+ attach_function :cuArray3DGetDescriptor, [:pointer, :CUArray], :enum
494
+ attach_function :cuArrayCreate, [:pointer, :pointer], :enum
495
+ attach_function :cuArrayDestroy, [:CUArray], :enum
496
+ attach_function :cuArrayGetDescriptor, [:pointer, :CUArray], :enum
497
+ attach_function :cuMemAlloc, [:pointer, :size_t], :enum
498
+ attach_function :cuMemAllocHost, [:pointer, :size_t], :enum
499
+ attach_function :cuMemAllocPitch, [:pointer, :pointer, :size_t, :size_t, :uint], :enum
500
+ attach_function :cuMemcpy, [:CUDevicePtr, :CUDevicePtr, :size_t], :enum
501
+ attach_function :cuMemcpy2D, [:pointer], :enum
502
+ attach_function :cuMemcpy2DAsync, [:pointer, :CUStream], :enum
503
+ attach_function :cuMemcpy2DUnaligned, [:pointer], :enum
504
+ attach_function :cuMemcpy3D, [:pointer], :enum
505
+ attach_function :cuMemcpy3DAsync, [:pointer, :CUStream], :enum
506
+ attach_function :cuMemcpy3DPeer, [:pointer], :enum
507
+ attach_function :cuMemcpy3DPeerAsync, [:pointer, :CUStream], :enum
508
+ attach_function :cuMemcpyAsync, [:CUDevicePtr, :CUDevicePtr, :size_t, :CUStream], :enum
509
+ attach_function :cuMemcpyAtoA, [:CUArray, :size_t, :CUArray, :size_t, :size_t], :enum
510
+ attach_function :cuMemcpyAtoD, [:CUDevicePtr, :CUArray, :size_t, :size_t], :enum
511
+ attach_function :cuMemcpyAtoH, [:pointer, :CUArray, :size_t, :size_t], :enum
512
+ attach_function :cuMemcpyAtoHAsync, [:pointer, :CUArray, :size_t, :size_t, :CUStream], :enum
513
+ attach_function :cuMemcpyDtoA, [:CUArray, :size_t, :CUDevicePtr, :size_t], :enum
514
+ attach_function :cuMemcpyDtoD, [:CUDevicePtr, :CUDevicePtr, :size_t], :enum
515
+ attach_function :cuMemcpyDtoDAsync, [:CUDevicePtr, :CUDevicePtr, :size_t, :CUStream], :enum
516
+ attach_function :cuMemcpyDtoH, [:pointer, :CUDevicePtr, :size_t], :enum
517
+ attach_function :cuMemcpyDtoHAsync, [:pointer, :CUDevicePtr, :size_t, :CUStream], :enum
518
+ attach_function :cuMemcpyHtoA, [:CUArray, :size_t, :pointer, :size_t], :enum
519
+ attach_function :cuMemcpyHtoAAsync, [:CUArray, :size_t, :pointer, :size_t, :CUStream], :enum
520
+ attach_function :cuMemcpyHtoD, [:CUDevicePtr, :pointer, :size_t], :enum
521
+ attach_function :cuMemcpyHtoDAsync, [:CUDevicePtr, :pointer, :size_t, :CUStream], :enum
522
+ attach_function :cuMemcpyPeer, [:CUDevicePtr, :CUContext, :CUDevicePtr, :CUContext, :size_t], :enum
523
+ attach_function :cuMemcpyPeerAsync, [:CUDevicePtr, :CUContext, :CUDevicePtr, :CUContext, :size_t, :CUStream], :enum
524
+ attach_function :cuMemFree, [:CUDevicePtr], :enum
525
+ attach_function :cuMemFreeHost, [:pointer], :enum
526
+ attach_function :cuMemGetAddressRange, [:pointer, :pointer, :CUDevicePtr], :enum
527
+ attach_function :cuMemGetInfo, [:pointer, :pointer], :enum
528
+ attach_function :cuMemHostAlloc, [:pointer, :size_t, :uint], :enum
529
+ attach_function :cuMemHostGetDevicePointer, [:pointer, :pointer, :uint], :enum
530
+ attach_function :cuMemHostGetFlags, [:pointer, :pointer], :enum
531
+ attach_function :cuMemHostRegister, [:pointer, :size_t, :uint], :enum
532
+ attach_function :cuMemHostUnregister, [:pointer], :enum
533
+ attach_function :cuMemsetD16, [:CUDevicePtr, :ushort, :size_t], :enum
534
+ attach_function :cuMemsetD16Async, [:CUDevicePtr, :ushort, :size_t, :CUStream], :enum
535
+ attach_function :cuMemsetD2D16, [:CUDevicePtr, :size_t, :ushort, :size_t, :size_t], :enum
536
+ attach_function :cuMemsetD2D16Async, [:CUDevicePtr, :size_t, :ushort, :size_t, :size_t, :CUStream], :enum
537
+ attach_function :cuMemsetD2D32, [:CUDevicePtr, :size_t, :uint, :size_t, :size_t], :enum
538
+ attach_function :cuMemsetD2D32Async, [:CUDevicePtr, :size_t, :uint, :size_t, :size_t, :CUStream], :enum
539
+ attach_function :cuMemsetD2D8, [:CUDevicePtr, :size_t, :uchar, :size_t, :size_t], :enum
540
+ attach_function :cuMemsetD2D8Async, [:CUDevicePtr, :size_t, :uchar, :size_t, :size_t, :CUStream], :enum
541
+ attach_function :cuMemsetD32, [:CUDevicePtr, :uint, :size_t], :enum
542
+ attach_function :cuMemsetD32Async, [:CUDevicePtr, :uint, :size_t, :CUStream], :enum
543
+ attach_function :cuMemsetD8, [:CUDevicePtr, :uchar, :size_t], :enum
544
+ attach_function :cuMemsetD8Async, [:CUDevicePtr, :uchar, :size_t, :CUStream], :enum
545
+
546
+ # CU Unified Addressing.
547
+ attach_function :cuPointerGetAttribute, [:pointer, CUPointerAttribute, :CUDevicePtr], :enum
548
+
549
+ # CU Peer Context Memory Access.
550
+ attach_function :cuCtxDisablePeerAccess, [:CUContext], :enum
551
+ attach_function :cuCtxEnablePeerAccess, [:CUContext], :enum
552
+ attach_function :cuDeviceCanAccessPeer, [:pointer, :CUDevice, :CUDevice], :enum
553
+
554
+ # CU Module Management.
555
+ attach_function :cuModuleGetFunction, [:pointer, :CUModule, :string], :enum
556
+ attach_function :cuModuleGetGlobal, [:pointer, :pointer, :CUModule, :string], :enum
557
+ attach_function :cuModuleGetSurfRef, [:pointer, :CUModule, :string], :enum
558
+ attach_function :cuModuleGetTexRef, [:pointer, :CUModule, :string], :enum
559
+ attach_function :cuModuleLoad, [:pointer, :string], :enum
560
+ attach_function :cuModuleLoadData, [:pointer, :pointer], :enum
561
+ attach_function :cuModuleLoadDataEx, [:pointer, :pointer, :uint, :pointer, :pointer], :enum
562
+ attach_function :cuModuleLoadFatBinary, [:pointer, :pointer], :enum
563
+ attach_function :cuModuleUnload, [:CUModule], :enum
564
+
565
+ # CU Execution Control.
566
+ attach_function :cuFuncGetAttribute, [:pointer, CUFunctionAttribute, :CUFunction], :enum
567
+ attach_function :cuFuncSetCacheConfig, [:CUFunction, CUFunctionCache], :enum
568
+ attach_function :cuLaunchKernel, [:CUFunction, :uint, :uint, :uint, :uint, :uint, :uint, :uint, :CUStream, :pointer, :pointer], :enum
569
+ # Deprecated.
570
+ attach_function :cuFuncSetBlockShape, [:CUFunction, :int, :int, :int], :enum
571
+ attach_function :cuFuncSetSharedSize, [:CUFunction, :uint], :enum
572
+ attach_function :cuLaunch, [:CUFunction], :enum
573
+ attach_function :cuLaunchGrid, [:CUFunction, :int, :int], :enum
574
+ attach_function :cuLaunchGridAsync, [:CUFunction, :int, :int, :CUStream], :enum
575
+ attach_function :cuParamSetf, [:CUFunction, :int, :float], :enum
576
+ attach_function :cuParamSeti, [:CUFunction, :int, :uint], :enum
577
+ attach_function :cuParamSetSize, [:CUFunction, :uint], :enum
578
+ attach_function :cuParamSetTexRef, [:CUFunction, :int, :CUTexRef], :enum
579
+ attach_function :cuParamSetv, [:CUFunction, :int, :pointer, :uint], :enum
580
+
581
+ # CU Stream Management.
582
+ attach_function :cuStreamCreate, [:pointer, :uint], :enum
583
+ attach_function :cuStreamDestroy, [:CUStream], :enum
584
+ attach_function :cuStreamQuery, [:CUStream], :enum
585
+ attach_function :cuStreamSynchronize, [:CUStream], :enum
586
+ attach_function :cuStreamWaitEvent, [:CUStream, :CUEvent, :uint], :enum
587
+
588
+ # CU Event Management.
589
+ attach_function :cuEventCreate, [:pointer, :uint], :enum
590
+ attach_function :cuEventDestroy, [:CUEvent], :enum
591
+ attach_function :cuEventElapsedTime, [:pointer, :CUEvent, :CUEvent], :enum
592
+ attach_function :cuEventQuery, [:CUEvent], :enum
593
+ attach_function :cuEventRecord, [:CUEvent, :CUStream], :enum
594
+ attach_function :cuEventSynchronize, [:CUEvent], :enum
595
+
596
+ # CU Texture Reference Management.
597
+ attach_function :cuTexRefGetAddress, [:pointer, :CUTexRef], :enum
598
+ attach_function :cuTexRefGetAddressMode, [:pointer, :CUTexRef, :int], :enum
599
+ attach_function :cuTexRefGetArray, [:pointer, :CUTexRef], :enum
600
+ attach_function :cuTexRefGetFilterMode, [:pointer, :CUTexRef], :enum
601
+ attach_function :cuTexRefGetFlags, [:pointer, :CUTexRef], :enum
602
+ attach_function :cuTexRefGetFormat, [:pointer, :pointer, :CUTexRef], :enum
603
+ attach_function :cuTexRefSetAddress, [:pointer, :CUTexRef, :CUDevicePtr, :size_t], :enum
604
+ attach_function :cuTexRefSetAddress2D, [:CUTexRef, :pointer, :CUDevicePtr, :size_t], :enum
605
+ attach_function :cuTexRefSetAddressMode, [:CUTexRef, :int, CUAddressMode], :enum
606
+ attach_function :cuTexRefSetArray, [:CUTexRef, :CUArray, :uint], :enum
607
+ attach_function :cuTexRefSetFilterMode, [:CUTexRef, CUFilterMode], :enum
608
+ attach_function :cuTexRefSetFlags, [:CUTexRef, :uint], :enum
609
+ attach_function :cuTexRefSetFormat, [:CUTexRef, CUArrayFormat, :int], :enum
610
+ # Deprecated.
611
+ attach_function :cuTexRefCreate, [:pointer], :enum
612
+ attach_function :cuTexRefDestroy, [:CUTexRef], :enum
613
+
614
+ # CU Surface Reference Management.
615
+ attach_function :cuSurfRefGetArray, [:pointer, :CUSurfRef], :enum
616
+ attach_function :cuSurfRefSetArray, [:CUSurfRef, :CUArray, :uint], :enum
617
+
618
+ end # module
619
+ end # module
620
+ end # module