sgc-ruby-cuda 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. data/.yardopts +2 -0
  2. data/COPYING +674 -0
  3. data/README.rdoc +106 -0
  4. data/Rakefile +76 -0
  5. data/doc/devel.rdoc +77 -0
  6. data/doc/features.rdoc +55 -0
  7. data/lib/cuda/driver/context.rb +236 -0
  8. data/lib/cuda/driver/cu.rb +60 -0
  9. data/lib/cuda/driver/device.rb +155 -0
  10. data/lib/cuda/driver/deviceptr.rb +69 -0
  11. data/lib/cuda/driver/error.rb +182 -0
  12. data/lib/cuda/driver/event.rb +124 -0
  13. data/lib/cuda/driver/ffi-cu.rb +620 -0
  14. data/lib/cuda/driver/function.rb +293 -0
  15. data/lib/cuda/driver/init.rb +45 -0
  16. data/lib/cuda/driver/memory.rb +134 -0
  17. data/lib/cuda/driver/module.rb +142 -0
  18. data/lib/cuda/driver/rubycu.rb +37 -0
  19. data/lib/cuda/driver/stream.rb +128 -0
  20. data/lib/cuda/driver/version.rb +42 -0
  21. data/lib/cuda/runtime/cuda.rb +65 -0
  22. data/lib/cuda/runtime/device.rb +175 -0
  23. data/lib/cuda/runtime/error.rb +197 -0
  24. data/lib/cuda/runtime/event.rb +117 -0
  25. data/lib/cuda/runtime/ffi-cuda.rb +588 -0
  26. data/lib/cuda/runtime/function.rb +161 -0
  27. data/lib/cuda/runtime/memory.rb +110 -0
  28. data/lib/cuda/runtime/rubycuda.rb +34 -0
  29. data/lib/cuda/runtime/stream.rb +126 -0
  30. data/lib/cuda/runtime/thread.rb +81 -0
  31. data/lib/cuda/runtime/version.rb +51 -0
  32. data/lib/ffi/prettystruct.rb +32 -0
  33. data/lib/helpers/flags.rb +82 -0
  34. data/lib/helpers/interface/ienum.rb +45 -0
  35. data/lib/helpers/klass.rb +45 -0
  36. data/lib/memory/buffer.rb +125 -0
  37. data/lib/memory/interface/ibuffer.rb +63 -0
  38. data/lib/memory/pointer.rb +72 -0
  39. data/lib/rubycu.rb +1 -0
  40. data/lib/rubycuda.rb +1 -0
  41. data/test/bad.ptx +0 -0
  42. data/test/memory/test_buffer.rb +93 -0
  43. data/test/rubycu/test_cucontext.rb +148 -0
  44. data/test/rubycu/test_cudevice.rb +69 -0
  45. data/test/rubycu/test_cudeviceptr.rb +43 -0
  46. data/test/rubycu/test_cuevent.rb +81 -0
  47. data/test/rubycu/test_cufunction.rb +165 -0
  48. data/test/rubycu/test_cumemory.rb +113 -0
  49. data/test/rubycu/test_cumodule.rb +114 -0
  50. data/test/rubycu/test_custream.rb +77 -0
  51. data/test/rubycu/test_cuversion.rb +39 -0
  52. data/test/rubycu/testbase.rb +107 -0
  53. data/test/rubycuda/test_cudadevice.rb +125 -0
  54. data/test/rubycuda/test_cudaerror.rb +48 -0
  55. data/test/rubycuda/test_cudaevent.rb +78 -0
  56. data/test/rubycuda/test_cudafunction.rb +106 -0
  57. data/test/rubycuda/test_cudamemory.rb +90 -0
  58. data/test/rubycuda/test_cudastream.rb +72 -0
  59. data/test/rubycuda/test_cudathread.rb +69 -0
  60. data/test/rubycuda/test_cudaversion.rb +41 -0
  61. data/test/rubycuda/testbase.rb +67 -0
  62. data/test/vadd.cu +21 -0
  63. data/version.rb +1 -0
  64. metadata +180 -0
@@ -0,0 +1,124 @@
1
+ #
2
+ # Copyright (c) 2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'cuda/driver/ffi-cu'
26
+ require 'cuda/driver/cu'
27
+ require 'cuda/driver/error'
28
+ require 'cuda/driver/stream'
29
+ require 'helpers/flags'
30
+
31
+
32
+ module SGC
33
+ module CU
34
+
35
+ class CUEvent
36
+
37
+ # Create and return an event with _flags_ (CUEventFlags).
38
+ # @overload create
39
+ # @overload create(flags)
40
+ # @return [CUEvent] An event created with _flags_.
41
+ #
42
+ # @example Create events with flags.
43
+ # CUEvent.create #=> event
44
+ # CUEvent.create(:DEFAULT) #=> event
45
+ # CUEvent.create(:BLOCKING_SYNC) #=> event
46
+ def self.create(*flags)
47
+ flags.empty? == false or flags = :DEFAULT
48
+ p = FFI::MemoryPointer.new(:CUEvent)
49
+ f = CUEventFlags.value(flags)
50
+ status = API::cuEventCreate(p, f)
51
+ Pvt::handle_error(status, "Failed to create event: flags = #{flags}.")
52
+ new(p)
53
+ end
54
+
55
+
56
+ # Destroy this event.
57
+ def destroy
58
+ status = API::cuEventDestroy(self.to_api)
59
+ Pvt::handle_error(status, "Failed to destroy event.")
60
+ nil
61
+ end
62
+
63
+
64
+ # @return [Boolean] Return true if this event has been recorded. Otherwise, return false.
65
+ def query
66
+ status = API::cuEventQuery(self.to_api)
67
+ if status == Pvt::CUDA_SUCCESS
68
+ return true
69
+ elsif status == Pvt::CUDA_ERROR_NOT_READY
70
+ return false
71
+ end
72
+ Pvt::handle_error(status, "Failed to query event.")
73
+ raise CUStandardError, "Error handling fails to catch this error."
74
+ end
75
+
76
+
77
+ # Record this event asynchronously on _stream_.
78
+ # @param [Integer, CUStream] stream The CUDA stream to record this event on.
79
+ # Setting _stream_ to anything other than an instance of CUStream will record on any stream.
80
+ # @return [CUEvent] This event.
81
+ def record(stream = 0)
82
+ s = Pvt::parse_stream(stream)
83
+ status = API::cuEventRecord(self.to_api, s)
84
+ Pvt::handle_error(status, "Failed to record event.")
85
+ self
86
+ end
87
+
88
+
89
+ # Block the calling CPU thread until this event has been recorded.
90
+ # @return [CUEvent] This event.
91
+ def synchronize
92
+ status = API::cuEventSynchronize(self.to_api)
93
+ Pvt::handle_error(status, "Failed to synchronize event.")
94
+ self
95
+ end
96
+
97
+
98
+ # Compute the elapsed time (ms) from _event_start_ (CUEvent) to _event_end_ (CUEvent).
99
+ # @param [CUEvent] event_start The event corresponds to the start time.
100
+ # @param [CUEvent] event_end The event corresponds to the end time.
101
+ # @return [Numeric] The elapsed time in ms.
102
+ def self.elapsed_time(event_start, event_end)
103
+ t = FFI::MemoryPointer.new(:float)
104
+ API::cuEventElapsedTime(t, event_start.to_api, event_end.to_api)
105
+ t.read_float
106
+ end
107
+
108
+
109
+ # @private
110
+ def initialize(ptr)
111
+ @pevent = ptr
112
+ end
113
+ private_class_method :new
114
+
115
+
116
+ # @private
117
+ def to_api
118
+ API::read_cuevent(@pevent)
119
+ end
120
+
121
+ end
122
+
123
+ end # module
124
+ end # module
@@ -0,0 +1,620 @@
1
+ #
2
+ # Copyright (c) 2011 Chung Shin Yee
3
+ #
4
+ # shinyee@speedgocomputing.com
5
+ # http://www.speedgocomputing.com
6
+ # http://github.com/xman/sgc-ruby-cuda
7
+ # http://rubyforge.org/projects/rubycuda
8
+ #
9
+ # This file is part of SGC-Ruby-CUDA.
10
+ #
11
+ # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with SGC-Ruby-CUDA. If not, see <http://www.gnu.org/licenses/>.
23
+ #
24
+
25
+ require 'ffi'
26
+ require 'ffi/prettystruct'
27
+ require 'helpers/interface/ienum'
28
+ require 'helpers/flags'
29
+ require 'helpers/klass'
30
+
31
+
32
+ module SGC
33
+ module CU
34
+ module API
35
+
36
+ extend FFI::Library
37
+ ffi_lib "cuda"
38
+
39
+ class Enum
40
+ extend SGC::Helper::IEnum
41
+ extend SGC::Helper::FlagsValue
42
+
43
+ def self.inherited(subclass)
44
+ subclass.instance_eval %{
45
+ def symbols
46
+ SGC::CU::API::#{SGC::Helper.classname(subclass)}.symbols
47
+ end
48
+
49
+ def [](*args)
50
+ SGC::CU::API::#{SGC::Helper.classname(subclass)}[*args]
51
+ end
52
+ }
53
+ end
54
+ end
55
+
56
+ CUResult = enum(
57
+ :SUCCESS, 0,
58
+ :ERROR_INVALID_VALUE, 1,
59
+ :ERROR_OUT_OF_MEMORY, 2,
60
+ :ERROR_NOT_INITIALIZED, 3,
61
+ :ERROR_DEINITIALIZED, 4,
62
+ :ERROR_PROFILER_DISABLED, 5,
63
+ :ERROR_PROFILER_NOT_INITIALIZED, 6,
64
+ :ERROR_PROFILER_ALREADY_STARTED, 7,
65
+ :ERROR_PROFILER_ALREADY_STOPPED, 8,
66
+ :ERROR_NO_DEVICE, 100,
67
+ :ERROR_INVALID_DEVICE, 101,
68
+ :ERROR_INVALID_IMAGE, 200,
69
+ :ERROR_INVALID_CONTEXT, 201,
70
+ :ERROR_CONTEXT_ALREADY_CURRENT, 202, # Deprecated.
71
+ :ERROR_MAP_FAILED, 205,
72
+ :ERROR_UNMAP_FAILED, 206,
73
+ :ERROR_ARRAY_IS_MAPPED, 207,
74
+ :ERROR_ALREADY_MAPPED, 208,
75
+ :ERROR_NO_BINARY_FOR_GPU, 209,
76
+ :ERROR_ALREADY_ACQUIRED, 210,
77
+ :ERROR_NOT_MAPPED, 211,
78
+ :ERROR_NOT_MAPPED_AS_ARRAY, 212,
79
+ :ERROR_NOT_MAPPED_AS_POINTER, 213,
80
+ :ERROR_ECC_UNCORRECTABLE, 214,
81
+ :ERROR_UNSUPPORTED_LIMIT, 215,
82
+ :ERROR_CONTEXT_ALREADY_IN_USE, 216,
83
+ :ERROR_INVALID_SOURCE, 300,
84
+ :ERROR_FILE_NOT_FOUND, 301,
85
+ :ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, 302,
86
+ :ERROR_SHARED_OBJECT_INIT_FAILED, 303,
87
+ :ERROR_OPERATING_SYSTEM, 304,
88
+ :ERROR_INVALID_HANDLE, 400,
89
+ :ERROR_NOT_FOUND, 500,
90
+ :ERROR_NOT_READY, 600,
91
+ :ERROR_LAUNCH_FAILED, 700,
92
+ :ERROR_LAUNCH_OUT_OF_RESOURCES, 701,
93
+ :ERROR_LAUNCH_TIMEOUT, 702,
94
+ :ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, 703,
95
+ :ERROR_PEER_ACCESS_ALREADY_ENABLED, 704,
96
+ :ERROR_PEER_ACCESS_NOT_ENABLED, 705,
97
+ :ERROR_PRIMARY_CONTEXT_ACTIVE, 708,
98
+ :ERROR_CONTEXT_IS_DESTROYED, 709,
99
+ :ERROR_UNKNOWN, 999,
100
+ )
101
+
102
+ CUComputeMode = enum(
103
+ :DEFAULT, 0,
104
+ :EXCLUSIVE, 1,
105
+ :PROHIBITED, 2,
106
+ :EXCLUSIVE_PROCESS, 3,
107
+ )
108
+
109
+ CUDeviceAttribute = enum(
110
+ :MAX_THREADS_PER_BLOCK, 1,
111
+ :MAX_BLOCK_DIM_X, 2,
112
+ :MAX_BLOCK_DIM_Y, 3,
113
+ :MAX_BLOCK_DIM_Z, 4,
114
+ :MAX_GRID_DIM_X, 5,
115
+ :MAX_GRID_DIM_Y, 6,
116
+ :MAX_GRID_DIM_Z, 7,
117
+ :MAX_SHARED_MEMORY_PER_BLOCK, 8,
118
+ :SHARED_MEMORY_PER_BLOCK, 8, # Deprecated. Use :MAX_SHARED_MEMORY_PER_BLOCK.
119
+ :TOTAL_CONSTANT_MEMORY, 9,
120
+ :WARP_SIZE, 10,
121
+ :MAX_PITCH, 11,
122
+ :MAX_REGISTERS_PER_BLOCK, 12,
123
+ :REGISTERS_PER_BLOCK, 12, # Deprecated. Use :MAX_REGISTERS_PER_BLOCK.
124
+ :CLOCK_RATE, 13,
125
+ :TEXTURE_ALIGNMENT, 14,
126
+ :GPU_OVERLAP, 15, # Deprecated. Use :ASYNC_ENGINE_COUNT.
127
+ :MULTIPROCESSOR_COUNT, 16,
128
+ :KERNEL_EXEC_TIMEOUT, 17,
129
+ :INTEGRATED, 18,
130
+ :CAN_MAP_HOST_MEMORY, 19,
131
+ :COMPUTE_MODE, 20,
132
+ :MAXIMUM_TEXTURE1D_WIDTH, 21,
133
+ :MAXIMUM_TEXTURE2D_WIDTH, 22,
134
+ :MAXIMUM_TEXTURE2D_HEIGHT, 23,
135
+ :MAXIMUM_TEXTURE3D_WIDTH, 24,
136
+ :MAXIMUM_TEXTURE3D_HEIGHT, 25,
137
+ :MAXIMUM_TEXTURE3D_DEPTH, 26,
138
+ :MAXIMUM_TEXTURE2D_LAYERED_WIDTH, 27,
139
+ :MAXIMUM_TEXTURE2D_LAYERED_HEIGHT, 28,
140
+ :MAXIMUM_TEXTURE2D_LAYERED_LAYERS, 29,
141
+ :MAXIMUM_TEXTURE2D_ARRAY_WIDTH, 27, # Deprecated. Use :MAXIMUM_TEXTURE2D_LAYERED_WIDTH.
142
+ :MAXIMUM_TEXTURE2D_ARRAY_HEIGHT, 28, # Deprecated. Use :MAXINUM_TEXTURE2D_LAYERED_HEIGHT.
143
+ :MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES, 29, # Deprecated. Use :MAXIMUM_TEXTURE2D_LAYERED_LAYERS.
144
+ :SURFACE_ALIGNMENT, 30,
145
+ :CONCURRENT_KERNELS, 31,
146
+ :ECC_ENABLED, 32,
147
+ :PCI_BUS_ID, 33,
148
+ :PCI_DEVICE_ID, 34,
149
+ :TCC_DRIVER, 35,
150
+ :MEMORY_CLOCK_RATE, 36,
151
+ :GLOBAL_MEMORY_BUS_WIDTH, 37,
152
+ :L2_CACHE_SIZE, 38,
153
+ :MAX_THREADS_PER_MULTIPROCESSOR, 39,
154
+ :ASYNC_ENGINE_COUNT, 40,
155
+ :UNIFIED_ADDRESSING, 41,
156
+ :MAXIMUM_TEXTURE1D_LAYERED_WIDTH, 42,
157
+ :MAXINUM_TEXTURE1D_LAYERED_LAYERS, 43,
158
+ )
159
+
160
+ CUContextFlags = enum(
161
+ :SCHED_AUTO, 0x00,
162
+ :SCHED_SPIN, 0x01,
163
+ :SCHED_YIELD, 0x02,
164
+ :SCHED_BLOCKING_SYNC, 0x04,
165
+ :BLOCKING_SYNC, 0x04, # Deprecated. Use :SCHED_BLOCKING_SYNC.
166
+ :MAP_HOST, 0x08,
167
+ :LMEM_RESIZE_TO_MAX, 0x10,
168
+ )
169
+
170
+ CULimit = enum(
171
+ :STACK_SIZE, 0x00,
172
+ :PRINTF_FIFO_SIZE, 0x01,
173
+ :MALLOC_HEAP_SIZE, 0x02,
174
+ )
175
+
176
+ CUFunctionAttribute = enum(
177
+ :MAX_THREADS_PER_BLOCK, 0,
178
+ :SHARED_SIZE_BYTES, 1,
179
+ :CONST_SIZE_BYTES, 2,
180
+ :LOCAL_SIZE_BYTES, 3,
181
+ :NUM_REGS, 4,
182
+ :PTX_VERSION, 5,
183
+ :BINARY_VERSION, 6,
184
+ )
185
+
186
+ CUFunctionCache = enum(
187
+ :PREFER_NONE, 0x00,
188
+ :PREFER_SHARED, 0x01,
189
+ :PREFER_L1, 0x02,
190
+ )
191
+
192
+ CUEventFlags = enum(
193
+ :DEFAULT, 0,
194
+ :BLOCKING_SYNC, 1,
195
+ :DISABLE_TIMING, 2,
196
+ )
197
+
198
+ CUAddressMode = enum(
199
+ :WRAP, 0,
200
+ :CLAMP, 1,
201
+ :MIRROR, 2,
202
+ :BORDER, 3,
203
+ )
204
+
205
+ CUFilterMode = enum(
206
+ :POINT, 0,
207
+ :LINEAR, 1,
208
+ )
209
+
210
+ CUTexRefFlags = enum(
211
+ :READ_AS_INTEGER, 0x01,
212
+ :NORMALIZED_COORDINATES, 0x02,
213
+ :SRGB, 0x10,
214
+ )
215
+
216
+ CUArrayFormat = enum(
217
+ :UNSIGNED_INT8, 0x01,
218
+ :UNSIGNED_INT16, 0x02,
219
+ :UNSIGNED_INT32, 0x03,
220
+ :SIGNED_INT8, 0x08,
221
+ :SIGNED_INT16, 0x09,
222
+ :SIGNED_INT32, 0x0a,
223
+ :HALF, 0x10,
224
+ :FLOAT, 0x20,
225
+ )
226
+
227
+ CUMemoryType = enum(
228
+ :HOST, 0x01,
229
+ :DEVICE, 0x02,
230
+ :ARRAY, 0x03,
231
+ :UNIFIED, 0x04,
232
+ )
233
+
234
+ CUPointerAttribute = enum(
235
+ :CONTEXT, 1,
236
+ :MEMORY_TYPE, 2,
237
+ :DEVICE_POINTER, 3,
238
+ :HOST_POINTER, 4,
239
+ )
240
+
241
+ CUJitOption = enum(
242
+ :MAX_REGISTERS, 0,
243
+ :THREADS_PER_BLOCK,
244
+ :WALL_TIME,
245
+ :INFO_LOG_BUFFER,
246
+ :INFO_LOG_BUFFER_SIZE_BYTES,
247
+ :ERROR_LOG_BUFFER,
248
+ :ERROR_LOG_BUFFER_SIZE_BYTES,
249
+ :OPTIMIZATION_LEVEL,
250
+ :TARGET_FROM_CUCONTEXT,
251
+ :TARGET,
252
+ :FALLBACK_STRATEGY,
253
+ )
254
+
255
+ CUJitFallBack = enum(
256
+ :PREFER_PTX, 0,
257
+ :PREFER_BINARY,
258
+ )
259
+
260
+ CUJitTarget = enum(
261
+ :COMPUTE_10, 0,
262
+ :COMPUTE_11,
263
+ :COMPUTE_12,
264
+ :COMPUTE_13,
265
+ :COMPUTE_20,
266
+ :COMPUTE_21,
267
+ )
268
+
269
+ FFI::typedef :int, :enum
270
+ FFI::typedef :int, :CUDevice
271
+ FFI::typedef :pointer, :CUDevicePtr
272
+ FFI::typedef :pointer, :CUContext
273
+ FFI::typedef :pointer, :CUModule
274
+ FFI::typedef :pointer, :CUFunction
275
+ FFI::typedef :pointer, :CUArray
276
+ FFI::typedef :pointer, :CUTexRef
277
+ FFI::typedef :pointer, :CUSurfRef
278
+ FFI::typedef :pointer, :CUEvent
279
+ FFI::typedef :pointer, :CUStream
280
+
281
+ def read_int(ptr); ptr.read_int; end
282
+ def read_long(ptr); ptr.read_long; end
283
+ def read_pointer(ptr); ptr.read_pointer; end
284
+
285
+ def write_int(ptr); ptr.write_int; end
286
+ def write_long(ptr); ptr.write_long; end
287
+ def write_pointer(ptr, value); ptr.write_pointer(value); end
288
+
289
+ alias read_size_t read_long
290
+ alias read_enum read_int
291
+ alias read_cudevice read_int
292
+ alias read_cudeviceptr read_pointer
293
+ alias read_cucontext read_pointer
294
+ alias read_cumodule read_pointer
295
+ alias read_cufunction read_pointer
296
+ alias read_cuarray read_pointer
297
+ alias read_cutexref read_pointer
298
+ alias read_cusurfref read_pointer
299
+ alias read_cuevent read_pointer
300
+ alias read_custream read_pointer
301
+
302
+ alias write_size_t write_long
303
+ alias write_enum write_int
304
+ alias write_cudevice write_int
305
+ alias write_cudeviceptr write_pointer
306
+ alias write_cucontext write_pointer
307
+ alias write_cumodule write_pointer
308
+ alias write_cufunction write_pointer
309
+ alias write_cuarray write_pointer
310
+ alias write_cutexref write_pointer
311
+ alias write_cusurfref write_pointer
312
+ alias write_cuevent write_pointer
313
+ alias write_custream write_pointer
314
+
315
+ module_function :read_size_t
316
+ module_function :read_enum
317
+ module_function :read_cudevice
318
+ module_function :read_cudeviceptr
319
+ module_function :read_cucontext
320
+ module_function :read_cumodule
321
+ module_function :read_cufunction
322
+ module_function :read_cuarray
323
+ module_function :read_cutexref
324
+ module_function :read_cusurfref
325
+ module_function :read_cuevent
326
+ module_function :read_custream
327
+
328
+ module_function :write_size_t
329
+ module_function :write_enum
330
+ module_function :write_cudevice
331
+ module_function :write_cudeviceptr
332
+ module_function :write_cucontext
333
+ module_function :write_cumodule
334
+ module_function :write_cufunction
335
+ module_function :write_cuarray
336
+ module_function :write_cutexref
337
+ module_function :write_cusurfref
338
+ module_function :write_cuevent
339
+ module_function :write_custream
340
+
341
+
342
+ class CUDevProp < FFI::PrettyStruct
343
+ layout(
344
+ :maxThreadsPerBlock, :int,
345
+ :maxThreadsDim, [:int, 3],
346
+ :maxGridSize, [:int, 3],
347
+ :sharedMemPerBlock, :int,
348
+ :totalConstantMemory, :int,
349
+ :SIMDWidth, :int,
350
+ :memPitch, :int,
351
+ :regsPerBlock, :int,
352
+ :clockRate, :int,
353
+ :textureAlign, :int,
354
+ )
355
+ end
356
+
357
+ class CudaMemcpy2D < FFI::PrettyStruct
358
+ layout(
359
+ :srcXInBytes, :size_t,
360
+ :srcY, :size_t,
361
+ :srcMemoryType, CUMemoryType,
362
+ :srcHost, :pointer,
363
+ :srcDevice, :CUDevicePtr,
364
+ :srcArray, :CUArray,
365
+ :srcPitch, :size_t,
366
+ :dstXInBytes, :size_t,
367
+ :dstY, :size_t,
368
+ :dstMemoryType, CUMemoryType,
369
+ :dstHost, :pointer,
370
+ :dstDevice, :CUDevicePtr,
371
+ :dstArray, :CUArray,
372
+ :dstPitch, :size_t,
373
+ :WidthInBytes, :size_t,
374
+ :Height, :size_t,
375
+ )
376
+ end
377
+
378
+ class CudaMemcpy3D < FFI::PrettyStruct
379
+ layout(
380
+ :srcXInBytes, :size_t,
381
+ :srcY, :size_t,
382
+ :srcZ, :size_t,
383
+ :srcLOD, :size_t,
384
+ :srcMemoryType, CUMemoryType,
385
+ :srcHost, :pointer,
386
+ :srcDevice, :CUDevicePtr,
387
+ :srcArray, :CUArray,
388
+ :reserved0, :pointer,
389
+ :srcPitch, :size_t,
390
+ :srcHeight, :size_t,
391
+ :dstXInBytes, :size_t,
392
+ :dstY, :size_t,
393
+ :dstZ, :size_t,
394
+ :dstLOD, :size_t,
395
+ :dstMemoryType, CUMemoryType,
396
+ :dstHost, :pointer,
397
+ :dstDevice, :CUDevicePtr,
398
+ :dstArray, :CUArray,
399
+ :reserved1, :pointer,
400
+ :dstPitch, :size_t,
401
+ :dstHeight, :size_t,
402
+ :WidthInBytes, :size_t,
403
+ :Height, :size_t,
404
+ :Depth, :size_t,
405
+ )
406
+ end
407
+
408
+ class CudaMemcpy3DPeer < FFI::PrettyStruct
409
+ layout(
410
+ :srcXInBytes, :size_t,
411
+ :srcY, :size_t,
412
+ :srcZ, :size_t,
413
+ :srcLOD, :size_t,
414
+ :srcMemoryType, CUMemoryType,
415
+ :srcHost, :pointer,
416
+ :srcDevice, :CUDevicePtr,
417
+ :srcArray, :CUArray,
418
+ :srcContext, :CUContext,
419
+ :srcPitch, :size_t,
420
+ :srcHeight, :size_t,
421
+ :dstXInBytes, :size_t,
422
+ :dstY, :size_t,
423
+ :dstZ, :size_t,
424
+ :dstLOD, :size_t,
425
+ :dstMemoryType, CUMemoryType,
426
+ :dstHost, :pointer,
427
+ :dstDevice, :CUDevicePtr,
428
+ :dstArray, :CUArray,
429
+ :dstContext, :CUContext,
430
+ :dstPitch, :size_t,
431
+ :dstHeight, :size_t,
432
+ :WidthInBytes, :size_t,
433
+ :Height, :size_t,
434
+ :Depth, :size_t,
435
+ )
436
+ end
437
+
438
+ class CudaArrayDescriptor < FFI::PrettyStruct
439
+ layout(
440
+ :Width, :size_t,
441
+ :Height, :size_t,
442
+ :Format, CUArrayFormat,
443
+ :NumChannels, :uint,
444
+ )
445
+ end
446
+
447
+ class CudaArray3DDescriptor < FFI::PrettyStruct
448
+ layout(
449
+ :Width, :size_t,
450
+ :Height, :size_t,
451
+ :Depth, :size_t,
452
+ :Format, CUArrayFormat,
453
+ :NumChannels, :uint,
454
+ :Flags, :uint,
455
+ )
456
+ end
457
+
458
+ # Initialization.
459
+ attach_function :cuInit, [:uint], :enum
460
+
461
+ # CU Version Management.
462
+ attach_function :cuDriverGetVersion, [:pointer], :enum
463
+
464
+ # CU Device Management.
465
+ attach_function :cuDeviceComputeCapability, [:pointer, :pointer, :CUDevice], :enum
466
+ attach_function :cuDeviceGet, [:pointer, :int], :enum
467
+ attach_function :cuDeviceGetAttribute, [:pointer, CUDeviceAttribute, :CUDevice], :enum
468
+ attach_function :cuDeviceGetCount, [:pointer], :enum
469
+ attach_function :cuDeviceGetName, [:pointer, :int, :CUDevice], :enum
470
+ attach_function :cuDeviceGetProperties, [:pointer, :CUDevice], :enum
471
+ attach_function :cuDeviceTotalMem, [:pointer, :CUDevice], :enum
472
+
473
+ # CU Context Management.
474
+ attach_function :cuCtxCreate, [:pointer, :uint, :CUDevice], :enum
475
+ attach_function :cuCtxDestroy, [:CUContext], :enum
476
+ attach_function :cuCtxGetApiVersion, [:CUContext, :pointer], :enum
477
+ attach_function :cuCtxGetCacheConfig, [:pointer], :enum
478
+ attach_function :cuCtxGetCurrent, [:pointer], :enum
479
+ attach_function :cuCtxGetDevice, [:pointer], :enum
480
+ attach_function :cuCtxGetLimit, [:pointer, CULimit], :enum
481
+ attach_function :cuCtxPopCurrent, [:pointer], :enum
482
+ attach_function :cuCtxPushCurrent, [:CUContext], :enum
483
+ attach_function :cuCtxSetCacheConfig, [CUFunctionCache], :enum
484
+ attach_function :cuCtxSetCurrent, [:CUContext], :enum
485
+ attach_function :cuCtxSetLimit, [CULimit, :size_t], :enum
486
+ attach_function :cuCtxSynchronize, [], :enum
487
+ # Deprecated.
488
+ attach_function :cuCtxAttach, [:pointer, :uint], :enum
489
+ attach_function :cuCtxDetach, [:CUContext], :enum
490
+
491
+ # CU Memory Management.
492
+ attach_function :cuArray3DCreate, [:pointer, :pointer], :enum
493
+ attach_function :cuArray3DGetDescriptor, [:pointer, :CUArray], :enum
494
+ attach_function :cuArrayCreate, [:pointer, :pointer], :enum
495
+ attach_function :cuArrayDestroy, [:CUArray], :enum
496
+ attach_function :cuArrayGetDescriptor, [:pointer, :CUArray], :enum
497
+ attach_function :cuMemAlloc, [:pointer, :size_t], :enum
498
+ attach_function :cuMemAllocHost, [:pointer, :size_t], :enum
499
+ attach_function :cuMemAllocPitch, [:pointer, :pointer, :size_t, :size_t, :uint], :enum
500
+ attach_function :cuMemcpy, [:CUDevicePtr, :CUDevicePtr, :size_t], :enum
501
+ attach_function :cuMemcpy2D, [:pointer], :enum
502
+ attach_function :cuMemcpy2DAsync, [:pointer, :CUStream], :enum
503
+ attach_function :cuMemcpy2DUnaligned, [:pointer], :enum
504
+ attach_function :cuMemcpy3D, [:pointer], :enum
505
+ attach_function :cuMemcpy3DAsync, [:pointer, :CUStream], :enum
506
+ attach_function :cuMemcpy3DPeer, [:pointer], :enum
507
+ attach_function :cuMemcpy3DPeerAsync, [:pointer, :CUStream], :enum
508
+ attach_function :cuMemcpyAsync, [:CUDevicePtr, :CUDevicePtr, :size_t, :CUStream], :enum
509
+ attach_function :cuMemcpyAtoA, [:CUArray, :size_t, :CUArray, :size_t, :size_t], :enum
510
+ attach_function :cuMemcpyAtoD, [:CUDevicePtr, :CUArray, :size_t, :size_t], :enum
511
+ attach_function :cuMemcpyAtoH, [:pointer, :CUArray, :size_t, :size_t], :enum
512
+ attach_function :cuMemcpyAtoHAsync, [:pointer, :CUArray, :size_t, :size_t, :CUStream], :enum
513
+ attach_function :cuMemcpyDtoA, [:CUArray, :size_t, :CUDevicePtr, :size_t], :enum
514
+ attach_function :cuMemcpyDtoD, [:CUDevicePtr, :CUDevicePtr, :size_t], :enum
515
+ attach_function :cuMemcpyDtoDAsync, [:CUDevicePtr, :CUDevicePtr, :size_t, :CUStream], :enum
516
+ attach_function :cuMemcpyDtoH, [:pointer, :CUDevicePtr, :size_t], :enum
517
+ attach_function :cuMemcpyDtoHAsync, [:pointer, :CUDevicePtr, :size_t, :CUStream], :enum
518
+ attach_function :cuMemcpyHtoA, [:CUArray, :size_t, :pointer, :size_t], :enum
519
+ attach_function :cuMemcpyHtoAAsync, [:CUArray, :size_t, :pointer, :size_t, :CUStream], :enum
520
+ attach_function :cuMemcpyHtoD, [:CUDevicePtr, :pointer, :size_t], :enum
521
+ attach_function :cuMemcpyHtoDAsync, [:CUDevicePtr, :pointer, :size_t, :CUStream], :enum
522
+ attach_function :cuMemcpyPeer, [:CUDevicePtr, :CUContext, :CUDevicePtr, :CUContext, :size_t], :enum
523
+ attach_function :cuMemcpyPeerAsync, [:CUDevicePtr, :CUContext, :CUDevicePtr, :CUContext, :size_t, :CUStream], :enum
524
+ attach_function :cuMemFree, [:CUDevicePtr], :enum
525
+ attach_function :cuMemFreeHost, [:pointer], :enum
526
+ attach_function :cuMemGetAddressRange, [:pointer, :pointer, :CUDevicePtr], :enum
527
+ attach_function :cuMemGetInfo, [:pointer, :pointer], :enum
528
+ attach_function :cuMemHostAlloc, [:pointer, :size_t, :uint], :enum
529
+ attach_function :cuMemHostGetDevicePointer, [:pointer, :pointer, :uint], :enum
530
+ attach_function :cuMemHostGetFlags, [:pointer, :pointer], :enum
531
+ attach_function :cuMemHostRegister, [:pointer, :size_t, :uint], :enum
532
+ attach_function :cuMemHostUnregister, [:pointer], :enum
533
+ attach_function :cuMemsetD16, [:CUDevicePtr, :ushort, :size_t], :enum
534
+ attach_function :cuMemsetD16Async, [:CUDevicePtr, :ushort, :size_t, :CUStream], :enum
535
+ attach_function :cuMemsetD2D16, [:CUDevicePtr, :size_t, :ushort, :size_t, :size_t], :enum
536
+ attach_function :cuMemsetD2D16Async, [:CUDevicePtr, :size_t, :ushort, :size_t, :size_t, :CUStream], :enum
537
+ attach_function :cuMemsetD2D32, [:CUDevicePtr, :size_t, :uint, :size_t, :size_t], :enum
538
+ attach_function :cuMemsetD2D32Async, [:CUDevicePtr, :size_t, :uint, :size_t, :size_t, :CUStream], :enum
539
+ attach_function :cuMemsetD2D8, [:CUDevicePtr, :size_t, :uchar, :size_t, :size_t], :enum
540
+ attach_function :cuMemsetD2D8Async, [:CUDevicePtr, :size_t, :uchar, :size_t, :size_t, :CUStream], :enum
541
+ attach_function :cuMemsetD32, [:CUDevicePtr, :uint, :size_t], :enum
542
+ attach_function :cuMemsetD32Async, [:CUDevicePtr, :uint, :size_t, :CUStream], :enum
543
+ attach_function :cuMemsetD8, [:CUDevicePtr, :uchar, :size_t], :enum
544
+ attach_function :cuMemsetD8Async, [:CUDevicePtr, :uchar, :size_t, :CUStream], :enum
545
+
546
+ # CU Unified Addressing.
547
+ attach_function :cuPointerGetAttribute, [:pointer, CUPointerAttribute, :CUDevicePtr], :enum
548
+
549
+ # CU Peer Context Memory Access.
550
+ attach_function :cuCtxDisablePeerAccess, [:CUContext], :enum
551
+ attach_function :cuCtxEnablePeerAccess, [:CUContext], :enum
552
+ attach_function :cuDeviceCanAccessPeer, [:pointer, :CUDevice, :CUDevice], :enum
553
+
554
+ # CU Module Management.
555
+ attach_function :cuModuleGetFunction, [:pointer, :CUModule, :string], :enum
556
+ attach_function :cuModuleGetGlobal, [:pointer, :pointer, :CUModule, :string], :enum
557
+ attach_function :cuModuleGetSurfRef, [:pointer, :CUModule, :string], :enum
558
+ attach_function :cuModuleGetTexRef, [:pointer, :CUModule, :string], :enum
559
+ attach_function :cuModuleLoad, [:pointer, :string], :enum
560
+ attach_function :cuModuleLoadData, [:pointer, :pointer], :enum
561
+ attach_function :cuModuleLoadDataEx, [:pointer, :pointer, :uint, :pointer, :pointer], :enum
562
+ attach_function :cuModuleLoadFatBinary, [:pointer, :pointer], :enum
563
+ attach_function :cuModuleUnload, [:CUModule], :enum
564
+
565
+ # CU Execution Control.
566
+ attach_function :cuFuncGetAttribute, [:pointer, CUFunctionAttribute, :CUFunction], :enum
567
+ attach_function :cuFuncSetCacheConfig, [:CUFunction, CUFunctionCache], :enum
568
+ attach_function :cuLaunchKernel, [:CUFunction, :uint, :uint, :uint, :uint, :uint, :uint, :uint, :CUStream, :pointer, :pointer], :enum
569
+ # Deprecated.
570
+ attach_function :cuFuncSetBlockShape, [:CUFunction, :int, :int, :int], :enum
571
+ attach_function :cuFuncSetSharedSize, [:CUFunction, :uint], :enum
572
+ attach_function :cuLaunch, [:CUFunction], :enum
573
+ attach_function :cuLaunchGrid, [:CUFunction, :int, :int], :enum
574
+ attach_function :cuLaunchGridAsync, [:CUFunction, :int, :int, :CUStream], :enum
575
+ attach_function :cuParamSetf, [:CUFunction, :int, :float], :enum
576
+ attach_function :cuParamSeti, [:CUFunction, :int, :uint], :enum
577
+ attach_function :cuParamSetSize, [:CUFunction, :uint], :enum
578
+ attach_function :cuParamSetTexRef, [:CUFunction, :int, :CUTexRef], :enum
579
+ attach_function :cuParamSetv, [:CUFunction, :int, :pointer, :uint], :enum
580
+
581
+ # CU Stream Management.
582
+ attach_function :cuStreamCreate, [:pointer, :uint], :enum
583
+ attach_function :cuStreamDestroy, [:CUStream], :enum
584
+ attach_function :cuStreamQuery, [:CUStream], :enum
585
+ attach_function :cuStreamSynchronize, [:CUStream], :enum
586
+ attach_function :cuStreamWaitEvent, [:CUStream, :CUEvent, :uint], :enum
587
+
588
+ # CU Event Management.
589
+ attach_function :cuEventCreate, [:pointer, :uint], :enum
590
+ attach_function :cuEventDestroy, [:CUEvent], :enum
591
+ attach_function :cuEventElapsedTime, [:pointer, :CUEvent, :CUEvent], :enum
592
+ attach_function :cuEventQuery, [:CUEvent], :enum
593
+ attach_function :cuEventRecord, [:CUEvent, :CUStream], :enum
594
+ attach_function :cuEventSynchronize, [:CUEvent], :enum
595
+
596
+ # CU Texture Reference Management.
597
+ attach_function :cuTexRefGetAddress, [:pointer, :CUTexRef], :enum
598
+ attach_function :cuTexRefGetAddressMode, [:pointer, :CUTexRef, :int], :enum
599
+ attach_function :cuTexRefGetArray, [:pointer, :CUTexRef], :enum
600
+ attach_function :cuTexRefGetFilterMode, [:pointer, :CUTexRef], :enum
601
+ attach_function :cuTexRefGetFlags, [:pointer, :CUTexRef], :enum
602
+ attach_function :cuTexRefGetFormat, [:pointer, :pointer, :CUTexRef], :enum
603
+ attach_function :cuTexRefSetAddress, [:pointer, :CUTexRef, :CUDevicePtr, :size_t], :enum
604
+ attach_function :cuTexRefSetAddress2D, [:CUTexRef, :pointer, :CUDevicePtr, :size_t], :enum
605
+ attach_function :cuTexRefSetAddressMode, [:CUTexRef, :int, CUAddressMode], :enum
606
+ attach_function :cuTexRefSetArray, [:CUTexRef, :CUArray, :uint], :enum
607
+ attach_function :cuTexRefSetFilterMode, [:CUTexRef, CUFilterMode], :enum
608
+ attach_function :cuTexRefSetFlags, [:CUTexRef, :uint], :enum
609
+ attach_function :cuTexRefSetFormat, [:CUTexRef, CUArrayFormat, :int], :enum
610
+ # Deprecated.
611
+ attach_function :cuTexRefCreate, [:pointer], :enum
612
+ attach_function :cuTexRefDestroy, [:CUTexRef], :enum
613
+
614
+ # CU Surface Reference Management.
615
+ attach_function :cuSurfRefGetArray, [:pointer, :CUSurfRef], :enum
616
+ attach_function :cuSurfRefSetArray, [:CUSurfRef, :CUArray, :uint], :enum
617
+
618
+ end # module
619
+ end # module
620
+ end # module