opencl_ruby_ffi 1.3.4 → 1.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/opencl_ruby_ffi.rb +1 -0
- data/lib/opencl_ruby_ffi/Buffer.rb +7 -1
- data/lib/opencl_ruby_ffi/CommandQueue.rb +27 -12
- data/lib/opencl_ruby_ffi/Context.rb +79 -7
- data/lib/opencl_ruby_ffi/Device.rb +95 -13
- data/lib/opencl_ruby_ffi/Event.rb +13 -4
- data/lib/opencl_ruby_ffi/Image.rb +7 -1
- data/lib/opencl_ruby_ffi/Kernel.rb +30 -10
- data/lib/opencl_ruby_ffi/Mem.rb +25 -9
- data/lib/opencl_ruby_ffi/Pipe.rb +8 -0
- data/lib/opencl_ruby_ffi/Platform.rb +33 -6
- data/lib/opencl_ruby_ffi/Program.rb +57 -16
- data/lib/opencl_ruby_ffi/SVM.rb +23 -11
- data/lib/opencl_ruby_ffi/Sampler.rb +2 -1
- data/lib/opencl_ruby_ffi/ext.rb +1 -0
- data/lib/opencl_ruby_ffi/intel/accelerator.rb +152 -0
- data/lib/opencl_ruby_ffi/intel/advanced_motion_estimation.rb +65 -0
- data/lib/opencl_ruby_ffi/intel/driver_diagnostics.rb +19 -0
- data/lib/opencl_ruby_ffi/intel/kernel_profiling.rb +38 -0
- data/lib/opencl_ruby_ffi/intel/motion_estimation.rb +26 -0
- data/lib/opencl_ruby_ffi/intel/unified_shared_memory_preview.rb +586 -0
- data/lib/opencl_ruby_ffi/intel/unofficial.rb +95 -0
- data/lib/opencl_ruby_ffi/khr/device_uuid.rb +119 -0
- data/lib/opencl_ruby_ffi/opencl_ruby_ffi_base.rb +277 -38
- data/lib/opencl_ruby_ffi/opencl_ruby_ffi_base_gen.rb +84 -174
- data/lib/opencl_ruby_ffi/opencl_ruby_ffi_library.rb +181 -0
- data/lib/opencl_ruby_ffi/opencl_types.rb +4 -0
- data/opencl_ruby_ffi.gemspec +4 -5
- metadata +14 -6
@@ -0,0 +1,586 @@
|
|
1
|
+
module OpenCL
|
2
|
+
DEVICE_HOST_MEM_CAPABILITIES_INTEL = 0x4190
|
3
|
+
DEVICE_DEVICE_MEM_CAPABILITIES_INTEL = 0x4191
|
4
|
+
DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4192
|
5
|
+
DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4193
|
6
|
+
DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL = 0x4194
|
7
|
+
|
8
|
+
UNIFIED_SHARED_MEMORY_ACCESS_INTEL = (1 << 0)
|
9
|
+
UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL = (1 << 1)
|
10
|
+
UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL = (1 << 2)
|
11
|
+
UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL = (1 << 3)
|
12
|
+
|
13
|
+
MEM_ALLOC_FLAGS_INTEL = 0x4195
|
14
|
+
|
15
|
+
MEM_ALLOC_WRITE_COMBINED_INTEL = (1 << 0)
|
16
|
+
|
17
|
+
MEM_TYPE_UNKNOWN_INTEL = 0x4196
|
18
|
+
MEM_TYPE_HOST_INTEL = 0x4197
|
19
|
+
MEM_TYPE_DEVICE_INTEL = 0x4198
|
20
|
+
MEM_TYPE_SHARED_INTEL = 0x4199
|
21
|
+
|
22
|
+
MEM_ALLOC_TYPE_INTEL = 0x419A
|
23
|
+
MEM_ALLOC_BASE_PTR_INTEL = 0x419B
|
24
|
+
MEM_ALLOC_SIZE_INTEL = 0x419C
|
25
|
+
MEM_ALLOC_DEVICE_INTEL = 0x419D
|
26
|
+
MEM_ALLOC_INFO_TBD1_INTEL = 0x419E
|
27
|
+
MEM_ALLOC_INFO_TBD2_INTEL = 0x419F
|
28
|
+
|
29
|
+
KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL = 0x4200
|
30
|
+
KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL = 0x4201
|
31
|
+
KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL = 0x4202
|
32
|
+
KERNEL_EXEC_INFO_USM_PTRS_INTEL = 0x4203
|
33
|
+
|
34
|
+
MIGRATE_MEM_OBJECT_HOST_INTEL = (1 << 0)
|
35
|
+
MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED_INTEL = (1 << 1)
|
36
|
+
|
37
|
+
COMMAND_MEMFILL_INTEL = 0x4204
|
38
|
+
COMMAND_MEMCPY_INTEL = 0x4205
|
39
|
+
COMMAND_MIGRATEMEM_INTEL = 0x4206
|
40
|
+
COMMAND_MEMADVISE_INTEL = 0x4207
|
41
|
+
|
42
|
+
class CommandType
|
43
|
+
MEMFILL_INTEL = 0x4204
|
44
|
+
MEMCPY_INTEL = 0x4205
|
45
|
+
MIGRATEMEM_INTEL = 0x4206
|
46
|
+
MEMADVISE_INTEL = 0x4207
|
47
|
+
|
48
|
+
@codes[0x4204] = 'MEMFILL_INTEL'
|
49
|
+
@codes[0x4205] = 'MEMCPY_INTEL'
|
50
|
+
@codes[0x4206] = 'MIGRATEMEM_INTEL'
|
51
|
+
@codes[0x4207] = 'MEMADVISE_INTEL'
|
52
|
+
end
|
53
|
+
|
54
|
+
MEM_ADVICE_TBD0_INTEL = 0x4208
|
55
|
+
MEM_ADVICE_TBD1_INTEL = 0x4209
|
56
|
+
MEM_ADVICE_TBD2_INTEL = 0x420A
|
57
|
+
MEM_ADVICE_TBD3_INTEL = 0x420B
|
58
|
+
MEM_ADVICE_TBD4_INTEL = 0x420C
|
59
|
+
MEM_ADVICE_TBD5_INTEL = 0x420D
|
60
|
+
MEM_ADVICE_TBD6_INTEL = 0x420E
|
61
|
+
MEM_ADVICE_TBD7_INTEL = 0x420F
|
62
|
+
|
63
|
+
[[:cl_bitfield, :cl_mem_properties_intel],
|
64
|
+
[:cl_bitfield, :cl_mem_migration_flags_intel],
|
65
|
+
[:cl_bitfield, :cl_mem_alloc_flags_intel],
|
66
|
+
[:cl_uint, :cl_mem_info_intel],
|
67
|
+
[:cl_uint, :cl_mem_advice_intel],
|
68
|
+
[:cl_bitfield, :cl_unified_shared_memory_capabilities_intel],
|
69
|
+
[:cl_uint, :cl_unified_shared_memory_type_intel]
|
70
|
+
].each { |o_t, t|
|
71
|
+
typedef o_t, t
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
if RUBY_VERSION.scan(/\d+/).collect(&:to_i).first >= 2
|
76
|
+
module OpenCLRefinements
|
77
|
+
refine FFI::Pointer do
|
78
|
+
methods_prefix = [:put, :get, :write, :read, :put_array_of, :get_array_of]
|
79
|
+
[[:cl_bitfield, :cl_mem_properties_intel],
|
80
|
+
[:cl_bitfield, :cl_mem_migration_flags_intel],
|
81
|
+
[:cl_bitfield, :cl_mem_alloc_flags_intel],
|
82
|
+
[:cl_uint, :cl_mem_info_intel],
|
83
|
+
[:cl_uint, :cl_mem_advice_intel],
|
84
|
+
[:cl_bitfield, :cl_unified_shared_memory_capabilities_intel],
|
85
|
+
[:cl_uint, :cl_unified_shared_memory_type_intel]
|
86
|
+
].each { |orig, add|
|
87
|
+
methods_prefix.each { |meth|
|
88
|
+
alias_method "#{meth}_#{add}".to_sym, "#{meth}_#{orig}".to_sym
|
89
|
+
}
|
90
|
+
}
|
91
|
+
end
|
92
|
+
end
|
93
|
+
using OpenCLRefinements
|
94
|
+
else
|
95
|
+
class FFI::Pointer
|
96
|
+
methods_prefix = [:put, :get, :write, :read, :put_array_of, :get_array_of]
|
97
|
+
[[:cl_bitfield, :cl_mem_properties_intel],
|
98
|
+
[:cl_bitfield, :cl_mem_migration_flags_intel],
|
99
|
+
[:cl_bitfield, :cl_mem_alloc_flags_intel],
|
100
|
+
[:cl_uint, :cl_mem_info_intel],
|
101
|
+
[:cl_uint, :cl_mem_advice_intel],
|
102
|
+
[:cl_bitfield, :cl_unified_shared_memory_capabilities_intel],
|
103
|
+
[:cl_uint, :cl_unified_shared_memory_type_intel]
|
104
|
+
].each { |orig, add|
|
105
|
+
methods_prefix.each { |meth|
|
106
|
+
alias_method "#{meth}_#{add}".to_sym, "#{meth}_#{orig}".to_sym
|
107
|
+
}
|
108
|
+
}
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
module OpenCL
|
113
|
+
|
114
|
+
class UnifiedSharedMemoryCapabilitiesINTEL < Bitfield
|
115
|
+
UNIFIED_SHARED_MEMORY_ACCESS_INTEL = (1 << 0)
|
116
|
+
UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL = (1 << 1)
|
117
|
+
UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL = (1 << 2)
|
118
|
+
UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL = (1 << 3)
|
119
|
+
|
120
|
+
def names
|
121
|
+
fs = []
|
122
|
+
%w( UNIFIED_SHARED_MEMORY_ACCESS_INTEL UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL )..each { |f|
|
123
|
+
fs.push(f) if self.include?( self.class.const_get(f) )
|
124
|
+
}
|
125
|
+
return fs
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
module InnerInterface
|
130
|
+
TYPE_CONVERTER[:cl_unified_shared_memory_capabilities_intel] = UnifiedSharedMemoryCapabilitiesINTEL
|
131
|
+
end
|
132
|
+
|
133
|
+
class Mem
|
134
|
+
ALLOC_FLAGS_INTEL = 0x4195
|
135
|
+
|
136
|
+
TYPE_UNKNOWN_INTEL = 0x4196
|
137
|
+
TYPE_HOST_INTEL = 0x4197
|
138
|
+
TYPE_DEVICE_INTEL = 0x4198
|
139
|
+
TYPE_SHARED_INTEL = 0x4199
|
140
|
+
|
141
|
+
ALLOC_WRITE_COMBINED_INTEL = (1 << 0)
|
142
|
+
|
143
|
+
ALLOC_TYPE_INTEL = 0x419A
|
144
|
+
ALLOC_BASE_PTR_INTEL = 0x419B
|
145
|
+
ALLOC_SIZE_INTEL = 0x419C
|
146
|
+
ALLOC_DEVICE_INTEL = 0x419D
|
147
|
+
ALLOC_INFO_TBD1_INTEL = 0x419E
|
148
|
+
ALLOC_INFO_TBD2_INTEL = 0x419F
|
149
|
+
|
150
|
+
class UnifiedSharedMemoryTypeINTEL < EnumInt
|
151
|
+
UNKNOWN_INTEL = 0x4196
|
152
|
+
HOST_INTEL = 0x4197
|
153
|
+
DEVICE_INTEL = 0x4198
|
154
|
+
SHARED_INTEL = 0x4199
|
155
|
+
@codes = {}
|
156
|
+
@codes[0x4196] = 'UNKNOWN_INTEL'
|
157
|
+
@codes[0x4197] = 'HOST_INTEL'
|
158
|
+
@codes[0x4198] = 'DEVICE_INTEL'
|
159
|
+
@codes[0x4199] = 'SHARED_INTEL'
|
160
|
+
end
|
161
|
+
|
162
|
+
class AllocFlagsINTEL < Bitfield
|
163
|
+
DEFAULT_INTEL = 0
|
164
|
+
WRITE_COMBINED_INTEL = (1 << 0)
|
165
|
+
def names
|
166
|
+
fs = []
|
167
|
+
%w( WRITE_COMBINED_INTEL ).each { |f|
|
168
|
+
fs.push(f) if self.include?( self.class.const_get(f) )
|
169
|
+
}
|
170
|
+
return fs
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
class AdviceINTEL < EnumInt
|
175
|
+
TBD0_INTEL = 0x4208
|
176
|
+
TBD1_INTEL = 0x4209
|
177
|
+
TBD2_INTEL = 0x420A
|
178
|
+
TBD3_INTEL = 0x420B
|
179
|
+
TBD4_INTEL = 0x420C
|
180
|
+
TBD5_INTEL = 0x420D
|
181
|
+
TBD6_INTEL = 0x420E
|
182
|
+
TBD7_INTEL = 0x420F
|
183
|
+
@codes = {}
|
184
|
+
@codes[0x4208] = 'TBD0_INTEL'
|
185
|
+
@codes[0x4209] = 'TBD1_INTEL'
|
186
|
+
@codes[0x420A] = 'TBD2_INTEL'
|
187
|
+
@codes[0x420B] = 'TBD3_INTEL'
|
188
|
+
@codes[0x420C] = 'TBD4_INTEL'
|
189
|
+
@codes[0x420D] = 'TBD5_INTEL'
|
190
|
+
@codes[0x420E] = 'TBD6_INTEL'
|
191
|
+
@codes[0x420F] = 'TBD7_INTEL'
|
192
|
+
end
|
193
|
+
|
194
|
+
class MigrationFlagsINTEL < Bitfield
|
195
|
+
HOST_INTEL = (1 << 0)
|
196
|
+
CONTENT_UNDEFINED_INTEL = (1 << 1)
|
197
|
+
# Returns an Array of String representing the different flags set
|
198
|
+
def names
|
199
|
+
fs = []
|
200
|
+
%w( HOST CONTENT_UNDEFINED ).each { |f|
|
201
|
+
fs.push(f) if self.include?( self.class.const_get(f) )
|
202
|
+
}
|
203
|
+
return fs
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
end
|
208
|
+
|
209
|
+
module InnerInterface
|
210
|
+
TYPE_CONVERTER[:cl_unified_shared_memory_type_intel] = Mem::UnifiedSharedMemoryTypeINTEL
|
211
|
+
TYPE_CONVERTER[:cl_mem_alloc_flags_intel] = Mem::AllocFlagsINTEL
|
212
|
+
TYPE_CONVERTER[:cl_mem_advice_intel] = Mem::AdviceINTEL
|
213
|
+
TYPE_CONVERTER[:cl_mem_migration_flags_intel] = Mem::MigrationFlagsINTEL
|
214
|
+
end
|
215
|
+
|
216
|
+
class Device
|
217
|
+
HOST_MEM_CAPABILITIES_INTEL = 0x4190
|
218
|
+
DEVICE_MEM_CAPABILITIES_INTEL = 0x4191
|
219
|
+
SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4192
|
220
|
+
CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4193
|
221
|
+
SHARED_SYSTEM_MEM_CAPABILITIES_INTEL = 0x4194
|
222
|
+
|
223
|
+
module UnifiedSharedMemoryPreviewINTEL
|
224
|
+
extend InnerGenerator
|
225
|
+
|
226
|
+
get_info("Device", :cl_unified_shared_memory_capabilities_intel, "host_mem_capabilities_intel")
|
227
|
+
get_info("Device", :cl_unified_shared_memory_capabilities_intel, "device_mem_capabilities_intel")
|
228
|
+
get_info("Device", :cl_unified_shared_memory_capabilities_intel, "device_shared_mem_capabilities_intel")
|
229
|
+
get_info("Device", :cl_unified_shared_memory_capabilities_intel, "cross_device_mem_capabilities_intel")
|
230
|
+
get_info("Device", :cl_unified_shared_memory_capabilities_intel, "shared_system_mem_capabilities_intel")
|
231
|
+
end
|
232
|
+
register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
|
233
|
+
end
|
234
|
+
|
235
|
+
class USMPointer < Pointer
|
236
|
+
|
237
|
+
def initialize(address, context)
|
238
|
+
super(address)
|
239
|
+
@context = context
|
240
|
+
end
|
241
|
+
|
242
|
+
def inspect
|
243
|
+
return "#<#{self.class.name}: 0x#{address.to_s(16)} (#{size})>"
|
244
|
+
end
|
245
|
+
|
246
|
+
def slice(offset, size)
|
247
|
+
res = super(offset, size)
|
248
|
+
self.class.new(res, context)
|
249
|
+
end
|
250
|
+
|
251
|
+
def +( offset )
|
252
|
+
self.slice(offset, self.size - offset)
|
253
|
+
end
|
254
|
+
|
255
|
+
def free
|
256
|
+
@context.mem_free_intel(alloc_base_ptr_intel)
|
257
|
+
end
|
258
|
+
|
259
|
+
def alloc_type_intel
|
260
|
+
@context.mem_alloc_type_intel(self)
|
261
|
+
end
|
262
|
+
|
263
|
+
def alloc_flags_intel
|
264
|
+
@context.mem_alloc_flags_intel(self)
|
265
|
+
end
|
266
|
+
|
267
|
+
def alloc_base_ptr_intel
|
268
|
+
@context.mem_alloc_base_ptr_intel(self)
|
269
|
+
end
|
270
|
+
|
271
|
+
def alloc_size_intel
|
272
|
+
@context.mem_alloc_size_intel(self)
|
273
|
+
end
|
274
|
+
|
275
|
+
def alloc_device_intel
|
276
|
+
context.mem_alloc_device_intel(self)
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
class Context
|
281
|
+
module UnifiedSharedMemoryPreviewINTEL
|
282
|
+
extend InnerGenerator
|
283
|
+
|
284
|
+
def clGetMemAllocInfoINTEL
|
285
|
+
return @_clGetMemAllocInfoINTEL if @_clGetMemAllocInfoINTEL
|
286
|
+
@_clGetMemAllocInfoINTEL = platform.get_extension_function("clGetMemAllocInfoINTEL", :cl_int, [Context, :pointer, :cl_mem_info_intel, :size_t, :pointer, :pointer])
|
287
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clGetMemAllocInfoINTEL
|
288
|
+
return @_clGetMemAllocInfoINTEL
|
289
|
+
end
|
290
|
+
|
291
|
+
def clHostMemAllocINTEL
|
292
|
+
return @_clHostMemAllocINTEL if @_clHostMemAllocINTEL
|
293
|
+
@_clHostMemAllocINTEL = platform.get_extension_function("clHostMemAllocINTEL", :pointer, [Context, :pointer, :size_t, :cl_uint, :pointer])
|
294
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clHostMemAllocINTEL
|
295
|
+
return @_clHostMemAllocINTEL
|
296
|
+
end
|
297
|
+
|
298
|
+
def clDeviceMemAllocINTEL
|
299
|
+
return @_clDeviceMemAllocINTEL if @_clDeviceMemAllocINTEL
|
300
|
+
@_clDeviceMemAllocINTEL = platform.get_extension_function("clDeviceMemAllocINTEL", :pointer, [Context, Device, :pointer, :size_t, :cl_uint, :pointer])
|
301
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clDeviceMemAllocINTEL
|
302
|
+
return @_clDeviceMemAllocINTEL
|
303
|
+
end
|
304
|
+
|
305
|
+
def clSharedMemAllocINTEL
|
306
|
+
return @_clSharedMemAllocINTEL if @_clSharedMemAllocINTEL
|
307
|
+
@_clSharedMemAllocINTEL = platform.get_extension_function("clSharedMemAllocINTEL", :pointer, [Context, Device, :pointer, :size_t, :cl_uint, :pointer])
|
308
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clSharedMemAllocINTEL
|
309
|
+
return @_clSharedMemAllocINTEL
|
310
|
+
end
|
311
|
+
|
312
|
+
def clMemFreeINTEL
|
313
|
+
return @_clMemFreeINTEL if @_clMemFreeINTEL
|
314
|
+
@_clMemFreeINTEL = platform.get_extension_function("clMemFreeINTEL", :cl_int, [Context, :pointer])
|
315
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clMemFreeINTEL
|
316
|
+
return @_clMemFreeINTEL
|
317
|
+
end
|
318
|
+
|
319
|
+
def get_mem_properties_intel(properties)
|
320
|
+
return nil unless properties
|
321
|
+
properties = [properties].flatten
|
322
|
+
props = MemoryPointer::new(:cl_mem_properties_intel, properties.length + 1)
|
323
|
+
properties.each_with_index { |e, i|
|
324
|
+
props[i].write_cl_mem_properties_intel(e)
|
325
|
+
}
|
326
|
+
props[properties.length].write_cl_mem_properties_intel(0)
|
327
|
+
return props
|
328
|
+
end
|
329
|
+
|
330
|
+
private :get_mem_properties_intel
|
331
|
+
|
332
|
+
def host_mem_alloc_intel(size, options = {})
|
333
|
+
properties = get_mem_properties_intel(options[:properties])
|
334
|
+
alignment = 0
|
335
|
+
alignment = options[:alignment] if options[:alignment]
|
336
|
+
error = MemoryPointer::new( :cl_int )
|
337
|
+
ptr = clHostMemAllocINTEL.call(self, properties, size, alignment, error)
|
338
|
+
error_check(error.read_cl_int)
|
339
|
+
return USMPointer::new(ptr.slice(0, size), self)
|
340
|
+
end
|
341
|
+
|
342
|
+
def device_mem_alloc_intel(device, size, options = {})
|
343
|
+
properties = get_mem_properties_intel(options[:properties])
|
344
|
+
alignment = 0
|
345
|
+
alignment = options[:alignment] if options[:alignment]
|
346
|
+
error = MemoryPointer::new( :cl_int )
|
347
|
+
ptr = clDeviceMemAllocINTEL.call(self, device, properties, size, alignment, error)
|
348
|
+
error_check(error.read_cl_int)
|
349
|
+
return USMPointer::new(ptr.slice(0, size), self)
|
350
|
+
end
|
351
|
+
|
352
|
+
def shared_mem_alloc_intel(device, size, options = {})
|
353
|
+
properties = get_mem_properties_intel(options[:properties])
|
354
|
+
alignment = 0
|
355
|
+
alignment = options[:alignment] if options[:alignment]
|
356
|
+
error = MemoryPointer::new( :cl_int )
|
357
|
+
ptr = clSharedMemAllocINTEL.call(self, device, properties, size, alignment, error)
|
358
|
+
error_check(error.read_cl_int)
|
359
|
+
return USMPointer::new(ptr.slice(0, size), self)
|
360
|
+
end
|
361
|
+
|
362
|
+
def mem_free_intel(ptr)
|
363
|
+
error = clMemFreeINTEL.call(self, ptr)
|
364
|
+
error_check(error)
|
365
|
+
return self
|
366
|
+
end
|
367
|
+
|
368
|
+
def mem_alloc_type_intel(ptr)
|
369
|
+
ptr_res = MemoryPointer::new(:cl_unified_shared_memory_type_intel)
|
370
|
+
error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_TYPE_INTEL, ptr_res.size, ptr_res, nil)
|
371
|
+
error_check(error)
|
372
|
+
return OpenCL::Mem::UnifiedSharedMemoryTypeINTEL::new(ptr_res.read_cl_unified_shared_memory_type_intel)
|
373
|
+
end
|
374
|
+
|
375
|
+
def mem_alloc_flags_intel(ptr)
|
376
|
+
ptr_res = MemoryPointer::new(:cl_mem_alloc_flags_intel)
|
377
|
+
error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_FLAGS_INTEL, ptr_res.size, ptr_res, nil)
|
378
|
+
error_check(error)
|
379
|
+
return OpenCL::Mem::AllocFlagsINTEL::new(ptr_res.read_cl_mem_alloc_flags_intel)
|
380
|
+
end
|
381
|
+
|
382
|
+
def mem_alloc_base_ptr_intel(ptr)
|
383
|
+
ptr_res = MemoryPointer::new(:pointer)
|
384
|
+
error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_BASE_PTR_INTEL, ptr_res.size, ptr_res, nil)
|
385
|
+
error_check(error)
|
386
|
+
return ptr_res.read_pointer
|
387
|
+
end
|
388
|
+
|
389
|
+
def mem_alloc_size_intel(ptr)
|
390
|
+
ptr_res = MemoryPointer::new(:size_t)
|
391
|
+
error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_SIZE_INTEL, ptr_res.size, ptr_res, nil)
|
392
|
+
error_check(error)
|
393
|
+
return ptr_res.read_size_t
|
394
|
+
end
|
395
|
+
|
396
|
+
def mem_alloc_device_intel(ptr)
|
397
|
+
ptr_res = MemoryPointer::new( Device )
|
398
|
+
error = OpenCL.clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_DEVICE_INTEL, Device.size, ptr_res, nil)
|
399
|
+
error_check(error)
|
400
|
+
return Device::new(ptr_res.read_pointer)
|
401
|
+
end
|
402
|
+
|
403
|
+
end
|
404
|
+
register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "platform.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
|
405
|
+
end
|
406
|
+
|
407
|
+
class Kernel
|
408
|
+
EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL = 0x4200
|
409
|
+
EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL = 0x4201
|
410
|
+
EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL = 0x4202
|
411
|
+
EXEC_INFO_USM_PTRS_INTEL = 0x4203
|
412
|
+
|
413
|
+
module UnifiedSharedMemoryPreviewINTEL
|
414
|
+
extend InnerGenerator
|
415
|
+
|
416
|
+
def clSetKernelArgMemPointerINTEL
|
417
|
+
return @_clSetKernelArgMemPointerINTEL if @_clSetKernelArgMemPointerINTEL
|
418
|
+
@_clSetKernelArgMemPointerINTEL = context.platform.get_extension_function("clSetKernelArgMemPointerINTEL", :cl_int, Kernel, :cl_uint, :pointer)
|
419
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clSetKernelArgMemPointerINTEL
|
420
|
+
return @_clSetKernelArgMemPointerINTEL
|
421
|
+
end
|
422
|
+
|
423
|
+
def set_arg_mem_pointer_intel(index, usm_pointer)
|
424
|
+
error = clSetKernelArgMemPointerINTEL.call(self, index, usm_pointer)
|
425
|
+
error_check(error)
|
426
|
+
return self
|
427
|
+
end
|
428
|
+
|
429
|
+
def set_usm_ptrs_intel( ptrs )
|
430
|
+
pointers = [ptrs].flatten
|
431
|
+
pt = MemoryPointer::new( :pointer, pointers.length )
|
432
|
+
pointers.each_with_index { |p, i|
|
433
|
+
pt[i].write_pointer(p)
|
434
|
+
}
|
435
|
+
error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_USM_PTRS_INTEL, pt.size, pt)
|
436
|
+
error_check(error)
|
437
|
+
self
|
438
|
+
end
|
439
|
+
|
440
|
+
def set_indirect_host_access_intel( flag )
|
441
|
+
pt = MemoryPointer::new( :cl_bool )
|
442
|
+
pt.write_cl_bool( flag )
|
443
|
+
error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, pt.size, pt)
|
444
|
+
error_check(error)
|
445
|
+
self
|
446
|
+
end
|
447
|
+
|
448
|
+
def set_indirect_device_access_intel( flag )
|
449
|
+
pt = MemoryPointer::new( :cl_bool )
|
450
|
+
pt.write_cl_bool( flag )
|
451
|
+
error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, pt.size, pt)
|
452
|
+
error_check(error)
|
453
|
+
self
|
454
|
+
end
|
455
|
+
|
456
|
+
def set_shared_device_access_intel( flag )
|
457
|
+
pt = MemoryPointer::new( :cl_bool )
|
458
|
+
pt.write_cl_bool( flag )
|
459
|
+
error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_SHARED_DEVICE_ACCESS_INTEL, pt.size, pt)
|
460
|
+
error_check(error)
|
461
|
+
self
|
462
|
+
end
|
463
|
+
|
464
|
+
def enqueue_with_args(command_queue, global_work_size, *args)
|
465
|
+
n = self.num_args
|
466
|
+
error_check(INVALID_KERNEL_ARGS) if args.length < n
|
467
|
+
error_check(INVALID_KERNEL_ARGS) if args.length > n + 1
|
468
|
+
if args.length == n + 1
|
469
|
+
options = args.last
|
470
|
+
else
|
471
|
+
options = {}
|
472
|
+
end
|
473
|
+
n.times { |i|
|
474
|
+
if args[i].class == SVMPointer and self.context.platform.version_number >= 2.0 then
|
475
|
+
self.set_arg_svm_pointer(i, args[i])
|
476
|
+
elsif args[i].class == USMPointer then
|
477
|
+
self.set_arg_mem_pointer_intel(i, args[i])
|
478
|
+
else
|
479
|
+
self.set_arg(i, args[i])
|
480
|
+
end
|
481
|
+
}
|
482
|
+
command_queue.enqueue_ndrange_kernel(self, global_work_size, options)
|
483
|
+
end
|
484
|
+
|
485
|
+
end
|
486
|
+
register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "platform.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
|
487
|
+
end
|
488
|
+
|
489
|
+
class Kernel
|
490
|
+
class Arg
|
491
|
+
module UnifiedSharedMemoryPreviewINTEL
|
492
|
+
def set(value, size = nil)
|
493
|
+
if value.class == SVMPointer and @kernel.context.platform.version_number >= 2.0 then
|
494
|
+
OpenCL.set_kernel_arg_svm_pointer(@kernel, @index, value)
|
495
|
+
elsif args[i].class == USMPointer then
|
496
|
+
@kernel.set_arg_mem_pointer_intel(@index, value)
|
497
|
+
else
|
498
|
+
OpenCL.set_kernel_arg(@kernel, @index, value, size)
|
499
|
+
end
|
500
|
+
end
|
501
|
+
end
|
502
|
+
register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "platform.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
|
503
|
+
end
|
504
|
+
end
|
505
|
+
|
506
|
+
class CommandQueue
|
507
|
+
module UnifiedSharedMemoryPreviewINTEL
|
508
|
+
extend InnerGenerator
|
509
|
+
|
510
|
+
def clEnqueueMemFillINTEL
|
511
|
+
return @_clEnqueueMemFillINTEL if @_clEnqueueMemFillINTEL
|
512
|
+
@_clEnqueueMemFillINTEL = platform.get_extension_function("clEnqueueMemFillINTEL", :cl_int, [CommandQueue, :pointer, :pointer, :size_t, :size_t, :cl_uint, :pointer, :pointer])
|
513
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clEnqueueMemFillINTEL
|
514
|
+
return @_clEnqueueMemFillINTEL
|
515
|
+
end
|
516
|
+
|
517
|
+
def clEnqueueMemcpyINTEL
|
518
|
+
return @_clEnqueueMemcpyINTEL if @_clEnqueueMemcpyINTEL
|
519
|
+
@_clEnqueueMemcpyINTEL = platform.get_extension_function("clEnqueueMemcpyINTEL", :cl_int, [CommandQueue, :cl_bool, :pointer, :pointer, :size_t, :cl_uint, :pointer, :pointer])
|
520
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clEnqueueMemcpyINTEL
|
521
|
+
return @_clEnqueueMemcpyINTEL
|
522
|
+
end
|
523
|
+
|
524
|
+
def clEnqueueMigrateMemINTEL
|
525
|
+
return @_clEnqueueMigrateMemINTEL if @_clEnqueueMigrateMemINTEL
|
526
|
+
@_clEnqueueMigrateMemINTEL = platform.get_extension_function("clEnqueueMigrateMemINTEL", :cl_int, [CommandQueue, :pointer, :size_t, :cl_mem_migration_flags_intel, :cl_uint, :pointer, :pointer])
|
527
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clEnqueueMemcpyINTEL
|
528
|
+
return @_clEnqueueMemcpyINTEL
|
529
|
+
end
|
530
|
+
|
531
|
+
def clEnqueueMemAdviseINTEL
|
532
|
+
return @_clEnqueueMemAdviseINTEL if @_clEnqueueMemAdviseINTEL
|
533
|
+
@_clEnqueueMemAdviseINTEL = platform.get_extension_function("clEnqueueMemAdviseINTEL", :cl_int, [CommandQueue, :pointer, :size_t, :cl_mem_advice_intel, :cl_uint, :pointer, :pointer])
|
534
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clEnqueueMemAdviseINTEL
|
535
|
+
return @_clEnqueueMemAdviseINTEL
|
536
|
+
end
|
537
|
+
|
538
|
+
def enqueue_mem_fill_intel(usm_ptr, pattern, options = {})
|
539
|
+
num_events, events = get_event_wait_list( options )
|
540
|
+
pattern_size = pattern.size
|
541
|
+
pattern_size = options[:pattern_size] if options[:pattern_size]
|
542
|
+
size = usm_ptr.size
|
543
|
+
size = options[:size] if options[:size]
|
544
|
+
event = MemoryPointer::new( Event )
|
545
|
+
error = clEnqueueMemFillINTEL.call(self, usm_ptr, pattern, pattern_size, size, num_events, events, event)
|
546
|
+
error_check(error)
|
547
|
+
return Event::new(event.read_pointer, false)
|
548
|
+
end
|
549
|
+
|
550
|
+
def enqueue_memcpy_intel(dst_ptr, src_ptr, options = {})
|
551
|
+
num_events, events = get_event_wait_list( options )
|
552
|
+
blocking = FALSE
|
553
|
+
blocking = TRUE if options[:blocking] or options[:blocking_copy]
|
554
|
+
size = [dst_ptr.size, src_ptr.size].min
|
555
|
+
size = options[:size] if options[:size]
|
556
|
+
event = MemoryPointer::new( Event )
|
557
|
+
error = clEnqueueMemcpyINTEL.call(self, blocking, dst_ptr, src_ptr, size, num_events, events, event)
|
558
|
+
error_check(error)
|
559
|
+
return Event::new(event.read_pointer, false)
|
560
|
+
end
|
561
|
+
|
562
|
+
def enqueue_migrate_mem_intel(usm_ptr, options = {})
|
563
|
+
num_events, events = get_event_wait_list( options )
|
564
|
+
flags = get_flags( options )
|
565
|
+
size = usm_ptr.size
|
566
|
+
size = options[:size] if options[:size]
|
567
|
+
event = MemoryPointer::new( Event )
|
568
|
+
error = clEnqueueMigrateMemINTEL.call(self, usm_ptr, size, flags, num_events, events, event)
|
569
|
+
error_check(error)
|
570
|
+
return Event::new(event.read_pointer, false)
|
571
|
+
end
|
572
|
+
|
573
|
+
def enqueue_mem_advise_intel(usm_ptr, advice, options = {})
|
574
|
+
num_events, events = get_event_wait_list( options )
|
575
|
+
size = usm_ptr.size
|
576
|
+
size = options[:size] if options[:size]
|
577
|
+
event = MemoryPointer::new( Event )
|
578
|
+
error = clEnqueueMemAdviseINTEL(self, usm_ptr, size, advice, num_events, events, event)
|
579
|
+
error_check(error)
|
580
|
+
return Event::new(event.read_pointer, false)
|
581
|
+
end
|
582
|
+
|
583
|
+
end
|
584
|
+
register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "device.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
|
585
|
+
end
|
586
|
+
end
|