opencl_ruby_ffi 1.3.6 → 1.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/opencl_ruby_ffi/CommandQueue.rb +16 -9
- data/lib/opencl_ruby_ffi/Context.rb +13 -5
- data/lib/opencl_ruby_ffi/Device.rb +11 -9
- data/lib/opencl_ruby_ffi/Event.rb +13 -4
- data/lib/opencl_ruby_ffi/Kernel.rb +29 -10
- data/lib/opencl_ruby_ffi/Mem.rb +19 -9
- data/lib/opencl_ruby_ffi/Platform.rb +8 -6
- data/lib/opencl_ruby_ffi/Program.rb +57 -16
- data/lib/opencl_ruby_ffi/SVM.rb +23 -11
- data/lib/opencl_ruby_ffi/Sampler.rb +2 -1
- data/lib/opencl_ruby_ffi/intel/kernel_profiling.rb +38 -0
- data/lib/opencl_ruby_ffi/intel/unified_shared_memory_preview.rb +586 -0
- data/lib/opencl_ruby_ffi/intel/unofficial.rb +95 -0
- data/lib/opencl_ruby_ffi/opencl_ruby_ffi_base.rb +109 -24
- data/opencl_ruby_ffi.gemspec +1 -2
- metadata +6 -4
@@ -63,6 +63,7 @@ module OpenCL
|
|
63
63
|
|
64
64
|
# Returns an Array of Platform containing the available OpenCL platforms
|
65
65
|
def self.get_platforms
|
66
|
+
return @_platforms if @_platforms
|
66
67
|
ptr1 = MemoryPointer::new(:cl_uint , 1)
|
67
68
|
|
68
69
|
error = clGetPlatformIDs(0, nil, ptr1)
|
@@ -70,9 +71,10 @@ module OpenCL
|
|
70
71
|
ptr2 = MemoryPointer::new(:pointer, ptr1.read_uint)
|
71
72
|
error = clGetPlatformIDs(ptr1.read_uint(), ptr2, nil)
|
72
73
|
error_check(error)
|
73
|
-
|
74
|
+
@_platforms = ptr2.get_array_of_pointer(0,ptr1.read_uint()).collect { |platform_ptr|
|
74
75
|
Platform::new(platform_ptr, false)
|
75
76
|
}
|
77
|
+
return @_platforms
|
76
78
|
end
|
77
79
|
|
78
80
|
class << self
|
@@ -88,11 +90,11 @@ module OpenCL
|
|
88
90
|
return "#<#{self.class.name}: #{self.name}>"
|
89
91
|
end
|
90
92
|
|
91
|
-
get_info("Platform", :string, "profile")
|
92
|
-
get_info("Platform", :string, "version")
|
93
|
-
get_info("Platform", :string, "name")
|
93
|
+
get_info("Platform", :string, "profile", true)
|
94
|
+
get_info("Platform", :string, "version", true)
|
95
|
+
get_info("Platform", :string, "name", true)
|
94
96
|
alias to_s name
|
95
|
-
get_info("Platform", :string, "vendor")
|
97
|
+
get_info("Platform", :string, "vendor", true)
|
96
98
|
|
97
99
|
# Returns an Array of string corresponding to the Platform extensions
|
98
100
|
def extensions
|
@@ -175,7 +177,7 @@ module OpenCL
|
|
175
177
|
module OpenCL21
|
176
178
|
extend InnerGenerator
|
177
179
|
|
178
|
-
get_info("Platform", :cl_ulong, "host_timer_resolution")
|
180
|
+
get_info("Platform", :cl_ulong, "host_timer_resolution", true)
|
179
181
|
|
180
182
|
end
|
181
183
|
|
@@ -15,12 +15,20 @@ module OpenCL
|
|
15
15
|
# * +:options+ - a String containing the options to use for the build
|
16
16
|
# * +:user_data+ - a Pointer (or convertible to Pointer using to_ptr) to the memory area to pass to the callback
|
17
17
|
def self.build_program(program, options = {}, &block)
|
18
|
-
|
18
|
+
if block
|
19
|
+
wrapper_block = lambda { |p, u|
|
20
|
+
block.call(p, u)
|
21
|
+
@@callbacks.delete(wrapper_block)
|
22
|
+
}
|
23
|
+
@@callbacks[wrapper_block] = options[:user_data]
|
24
|
+
else
|
25
|
+
wrapper_block = nil
|
26
|
+
end
|
19
27
|
num_devices, devices_p = get_device_list( options )
|
20
28
|
opt = ""
|
21
29
|
opt = options[:options] if options[:options]
|
22
30
|
options_p = MemoryPointer.from_string(opt)
|
23
|
-
error = clBuildProgram(program, num_devices, devices_p, options_p,
|
31
|
+
error = clBuildProgram(program, num_devices, devices_p, options_p, wrapper_block, options[:user_data] )
|
24
32
|
error_check(error)
|
25
33
|
return program
|
26
34
|
end
|
@@ -40,7 +48,15 @@ module OpenCL
|
|
40
48
|
# * +:options+ - a String containing the options to use for the build
|
41
49
|
# * +:user_data+ - a Pointer (or convertible to Pointer using to_ptr) to the memory area to pass to the callback
|
42
50
|
def self.link_program(context, input_programs, options = {}, &block)
|
43
|
-
|
51
|
+
if block
|
52
|
+
wrapper_block = lambda { |p, u|
|
53
|
+
block.call(p, u)
|
54
|
+
@@callbacks.delete(wrapper_block)
|
55
|
+
}
|
56
|
+
@@callbacks[wrapper_block] = options[:user_data]
|
57
|
+
else
|
58
|
+
wrapper_block = nil
|
59
|
+
end
|
44
60
|
num_devices, devices_p = get_device_list( options )
|
45
61
|
opt = ""
|
46
62
|
opt = options[:options] if options[:options]
|
@@ -50,7 +66,7 @@ module OpenCL
|
|
50
66
|
programs_p = MemoryPointer::new( Program, num_programs )
|
51
67
|
programs_p.write_array_of_pointer(programs)
|
52
68
|
error = MemoryPointer::new( :cl_int )
|
53
|
-
prog = clLinkProgram( context, num_devices, devices_p, options_p, num_programs, programs_p,
|
69
|
+
prog = clLinkProgram( context, num_devices, devices_p, options_p, num_programs, programs_p, wrapper_block, options[:user_data], error)
|
54
70
|
error_check(error.read_cl_int)
|
55
71
|
return Program::new( prog, false )
|
56
72
|
end
|
@@ -70,7 +86,15 @@ module OpenCL
|
|
70
86
|
# * +:options+ - a String containing the options to use for the compilation
|
71
87
|
# * +:input_headers+ - a Hash containing pairs of : String: header_include_name => Program: header
|
72
88
|
def self.compile_program(program, options = {}, &block)
|
73
|
-
|
89
|
+
if block
|
90
|
+
wrapper_block = lambda { |p, u|
|
91
|
+
block.call(p, u)
|
92
|
+
@@callbacks.delete(wrapper_block)
|
93
|
+
}
|
94
|
+
@@callbacks[wrapper_block] = options[:user_data]
|
95
|
+
else
|
96
|
+
wrapper_block = nil
|
97
|
+
end
|
74
98
|
num_devices, devices_p = get_device_list( options )
|
75
99
|
opt = ""
|
76
100
|
opt = options[:options] if options[:options]
|
@@ -80,7 +104,7 @@ module OpenCL
|
|
80
104
|
header_include_names = nil
|
81
105
|
num_headers = 0
|
82
106
|
num_headers = headers.length if headers
|
83
|
-
if num_headers then
|
107
|
+
if num_headers > 0 then
|
84
108
|
headers_p = MemoryPointer::new( Program, num_headers )
|
85
109
|
header_include_names = MemoryPointer::new( :pointer, num_headers )
|
86
110
|
indx = 0
|
@@ -90,7 +114,7 @@ module OpenCL
|
|
90
114
|
indx = indx + 1
|
91
115
|
}
|
92
116
|
end
|
93
|
-
error = clCompileProgram(program, num_devices, devices_p, options_p, num_headers, headers_p, header_include_names,
|
117
|
+
error = clCompileProgram(program, num_devices, devices_p, options_p, num_headers, headers_p, header_include_names, wrapper_block, options[:user_data] )
|
94
118
|
error_check(error)
|
95
119
|
return program
|
96
120
|
end
|
@@ -125,10 +149,10 @@ module OpenCL
|
|
125
149
|
# * +device_list+ - an Array of Device to create the program for. Can throw an Error::INVALID_VALUE if the number of supplied devices is different from the number of supplied binaries.
|
126
150
|
# * +binaries+ - Array of binaries
|
127
151
|
def self.create_program_with_binary(context, device_list, binaries)
|
128
|
-
|
129
|
-
num_devices =
|
152
|
+
binaries = [binaries].flatten
|
153
|
+
num_devices = binaries.length
|
130
154
|
devices = [device_list].flatten
|
131
|
-
error_check(INVALID_VALUE) if devices.length !=
|
155
|
+
error_check(INVALID_VALUE) if devices.length != binaries.length
|
132
156
|
devices_p = MemoryPointer::new( Device, num_devices )
|
133
157
|
lengths = MemoryPointer::new( :size_t, num_devices )
|
134
158
|
binaries_p = MemoryPointer::new( :pointer, num_devices )
|
@@ -215,8 +239,16 @@ module OpenCL
|
|
215
239
|
#
|
216
240
|
# * +:user_data+ - a Pointer (or convertible to Pointer using to_ptr) to the memory area to pass to the callback
|
217
241
|
def self.set_program_release_callback( program, options = {}, &block )
|
218
|
-
|
219
|
-
|
242
|
+
if block
|
243
|
+
wrapper_block = lambda { |p, u|
|
244
|
+
block.call(p, u)
|
245
|
+
@@callbacks.delete(wrapper_block)
|
246
|
+
}
|
247
|
+
@@callbacks[wrapper_block] = options[:user_data]
|
248
|
+
else
|
249
|
+
wrapper_block = nil
|
250
|
+
end
|
251
|
+
error = clSetProgramReleaseCallback( program, wrapper_block, options[:user_data] )
|
220
252
|
error_check(error)
|
221
253
|
return program
|
222
254
|
end
|
@@ -270,26 +302,35 @@ module OpenCL
|
|
270
302
|
end
|
271
303
|
end
|
272
304
|
|
305
|
+
# Returns the Platform associated with the Program
|
306
|
+
def platform
|
307
|
+
return @_platform if @_platform
|
308
|
+
@_platform = self.context.platform
|
309
|
+
end
|
310
|
+
|
273
311
|
# Returns the Context the Program is associated to
|
274
312
|
def context
|
313
|
+
return @_context if @_context
|
275
314
|
ptr = MemoryPointer::new( Context )
|
276
315
|
error = OpenCL.clGetProgramInfo(self, CONTEXT, Context.size, ptr, nil)
|
277
316
|
error_check(error)
|
278
|
-
|
317
|
+
@_context = Context::new( ptr.read_pointer )
|
279
318
|
end
|
280
319
|
|
281
|
-
get_info("Program", :cl_uint, "num_devices")
|
320
|
+
get_info("Program", :cl_uint, "num_devices", true)
|
282
321
|
get_info("Program", :cl_uint, "reference_count")
|
283
322
|
|
284
323
|
# Returns the Array of Device the Program is associated with
|
285
324
|
def devices
|
325
|
+
return @_devices if @_devices
|
286
326
|
n = self.num_devices
|
287
327
|
ptr2 = MemoryPointer::new( Device, n )
|
288
328
|
error = OpenCL.clGetProgramInfo(self, DEVICES, Device.size*n, ptr2, nil)
|
289
329
|
error_check(error)
|
290
|
-
|
330
|
+
@_devices = ptr2.get_array_of_pointer(0, n).collect { |device_ptr|
|
291
331
|
Device::new(device_ptr)
|
292
332
|
}
|
333
|
+
return @_devices
|
293
334
|
end
|
294
335
|
|
295
336
|
get_info("Program", :string, "source")
|
@@ -462,7 +503,7 @@ module OpenCL
|
|
462
503
|
il_size = MemoryPointer::new( :size_t )
|
463
504
|
error = OpenCL.clGetProgramInfo(self, IL, 0, nil, il_size)
|
464
505
|
error_check(error)
|
465
|
-
return nil if il_size == 0
|
506
|
+
return nil if il_size.read_size_t == 0
|
466
507
|
length = il_size.read_size_t
|
467
508
|
il_p = MemoryPointer::new( length )
|
468
509
|
error = OpenCL.clGetProgramInfo(self, IL, length, il_p, nil)
|
data/lib/opencl_ruby_ffi/SVM.rb
CHANGED
@@ -5,7 +5,7 @@ module OpenCL
|
|
5
5
|
class SVMPointer < Pointer
|
6
6
|
|
7
7
|
# create a new SVMPointer from its address and the context it pertains to
|
8
|
-
def initialize( address, context,
|
8
|
+
def initialize( address, context, base = nil )
|
9
9
|
super( address )
|
10
10
|
@context = context
|
11
11
|
if base then
|
@@ -13,16 +13,20 @@ module OpenCL
|
|
13
13
|
else
|
14
14
|
@base = address
|
15
15
|
end
|
16
|
-
@size = size
|
17
16
|
end
|
18
17
|
|
19
18
|
def inspect
|
20
|
-
return "#<#{self.class.name}: #{
|
19
|
+
return "#<#{self.class.name}: 0x#{address.to_s(16)} (#{size})>"
|
20
|
+
end
|
21
|
+
|
22
|
+
def slice(offset, size)
|
23
|
+
res = super(offset, size)
|
24
|
+
return slef.class.new( res, @context, @base )
|
21
25
|
end
|
22
26
|
|
23
27
|
# creates a new SVMPointer relative to an existing one from an offset
|
24
28
|
def +( offset )
|
25
|
-
|
29
|
+
self.slice(offset, self.size - offset)
|
26
30
|
end
|
27
31
|
|
28
32
|
# frees the parent memory region associated to this SVMPointer
|
@@ -51,7 +55,7 @@ module OpenCL
|
|
51
55
|
alignment = options[:alignment] if options[:alignment]
|
52
56
|
ptr = clSVMAlloc( context, flags, size, alignment )
|
53
57
|
error_check(MEM_OBJECT_ALLOCATION_FAILURE) if ptr.null?
|
54
|
-
return SVMPointer::new( ptr,
|
58
|
+
return SVMPointer::new( ptr.slice(0, size), context )
|
55
59
|
end
|
56
60
|
|
57
61
|
# Frees an SVMPointer
|
@@ -112,11 +116,11 @@ module OpenCL
|
|
112
116
|
# * +command_queue+ - CommandQueue used to execute the write command
|
113
117
|
# * +dst_ptr+ - the Pointer (or convertible to Pointer using to_ptr) or SVMPointer to be written to
|
114
118
|
# * +src_ptr+ - the Pointer (or convertible to Pointer using to_ptr) or SVMPointer to be read from
|
115
|
-
# * +size+ - the size of data to copy
|
116
119
|
# * +options+ - a hash containing named options
|
117
120
|
#
|
118
121
|
# ==== Options
|
119
122
|
#
|
123
|
+
# * +:size+ - the size of data to copy
|
120
124
|
# * +:event_wait_list+ - if provided, a list of Event to wait upon before executing the command
|
121
125
|
# * +:blocking_copy+ - if provided indicates if the command blocks until the copy finishes
|
122
126
|
# * +:blocking+ - if provided indicates if the command blocks until the copy finishes
|
@@ -124,10 +128,12 @@ module OpenCL
|
|
124
128
|
# ==== Returns
|
125
129
|
#
|
126
130
|
# the Event associated with the command
|
127
|
-
def self.enqueue_svm_memcpy(command_queue, dst_ptr, src_ptr,
|
131
|
+
def self.enqueue_svm_memcpy(command_queue, dst_ptr, src_ptr, options = {})
|
128
132
|
error_check(INVALID_OPERATION) if command_queue.context.platform.version_number < 2.0
|
129
133
|
blocking = FALSE
|
130
134
|
blocking = TRUE if options[:blocking] or options[:blocking_copy]
|
135
|
+
size = [dst_ptr.size, src_ptr.size].min
|
136
|
+
size = options[:size] if options[:size]
|
131
137
|
num_events, events = get_event_wait_list( options )
|
132
138
|
event = MemoryPointer::new( Event )
|
133
139
|
error = clEnqueueSVMMemcpy(command_queue, blocking, dst_ptr, src_ptr, size, num_events, events, event)
|
@@ -142,10 +148,10 @@ module OpenCL
|
|
142
148
|
# * +command_queue+ - CommandQueue used to execute the write command
|
143
149
|
# * +svm_ptr+ - the SVMPointer to the area to fill
|
144
150
|
# * +pattern+ - the Pointer (or convertible to Pointer using to_ptr) to the memory area where the pattern is stored
|
145
|
-
# * +size+ - the size of the area to fill
|
146
151
|
#
|
147
152
|
# ==== Options
|
148
153
|
#
|
154
|
+
# * +:size+ - the size of the area to fill
|
149
155
|
# * +:event_wait_list+ - if provided, a list of Event to wait upon before executing the command
|
150
156
|
# * +:pattern_size+ - if provided indicates the size of the pattern, else the maximum pattern data is used
|
151
157
|
#
|
@@ -157,24 +163,28 @@ module OpenCL
|
|
157
163
|
num_events, events = get_event_wait_list( options )
|
158
164
|
pattern_size = pattern.size
|
159
165
|
pattern_size = options[:pattern_size] if options[:pattern_size]
|
166
|
+
size = svm_ptr.size
|
167
|
+
size = options[:size] if options[:size]
|
160
168
|
event = MemoryPointer::new( Event )
|
161
169
|
error = clEnqueueSVMMemFill(command_queue, svm_ptr, pattern, pattern_size, size, num_events, events, event)
|
162
170
|
error_check(error)
|
163
171
|
return Event::new(event.read_pointer, false)
|
164
172
|
end
|
165
173
|
|
174
|
+
singleton_class.send(:alias_method, :enqueue_svm_mem_fill, :enqueue_svm_memfill)
|
175
|
+
|
166
176
|
# Enqueues a command to map an Image into host memory
|
167
177
|
#
|
168
178
|
# ==== Attributes
|
169
179
|
#
|
170
180
|
# * +command_queue+ - CommandQueue used to execute the map command
|
171
181
|
# * +svm_ptr+ - the SVMPointer to the area to map
|
172
|
-
# * +size+ - the size of the region to map
|
173
182
|
# * +map_flags+ - a single or an Array of :cl_map_flags flags
|
174
183
|
# * +options+ - a hash containing named options
|
175
184
|
#
|
176
185
|
# ==== Options
|
177
186
|
#
|
187
|
+
# * +:size+ - the size of the region to map
|
178
188
|
# * +:event_wait_list+ - if provided, a list of Event to wait upon before executing the command
|
179
189
|
# * +:blocking_map+ - if provided indicates if the command blocks until the region is mapped
|
180
190
|
# * +:blocking+ - if provided indicates if the command blocks until the region is mapped
|
@@ -182,11 +192,13 @@ module OpenCL
|
|
182
192
|
# ==== Returns
|
183
193
|
#
|
184
194
|
# the Event associated with the command
|
185
|
-
def self.enqueue_svm_map( command_queue, svm_ptr,
|
195
|
+
def self.enqueue_svm_map( command_queue, svm_ptr, map_flags, options = {} )
|
186
196
|
error_check(INVALID_OPERATION) if command_queue.context.platform.version_number < 2.0
|
187
197
|
blocking = FALSE
|
188
198
|
blocking = TRUE if options[:blocking] or options[:blocking_map]
|
189
199
|
flags = get_flags( {:flags => map_flags} )
|
200
|
+
size = svm_ptr.size
|
201
|
+
size = options[:size] if options[:size]
|
190
202
|
num_events, events = get_event_wait_list( options )
|
191
203
|
event = MemoryPointer::new( Event )
|
192
204
|
error = clEnqueueSVMMap( command_queue, blocking, flags, svm_ptr, size, num_events, events, event )
|
@@ -251,7 +263,7 @@ module OpenCL
|
|
251
263
|
sizes_p[i].write_size_t(sizes[i])
|
252
264
|
}
|
253
265
|
event = MemoryPointer::new( Event )
|
254
|
-
error =
|
266
|
+
error = clEnqueueSVMMigrateMem( command_queue, num_svm_pointers, svn_ptrs_p, sizes_p, flags, num_events, events, event )
|
255
267
|
error_check( error )
|
256
268
|
return Event::new( event.read_pointer, false )
|
257
269
|
end
|
@@ -71,10 +71,11 @@ module OpenCL
|
|
71
71
|
|
72
72
|
# Returns the context associated with the Sampler
|
73
73
|
def context
|
74
|
+
return @_context if @_context
|
74
75
|
ptr = MemoryPointer::new( Context )
|
75
76
|
error = OpenCL.clGetSamplerInfo(self, CONTEXT, Context.size, ptr, nil)
|
76
77
|
error_check(error)
|
77
|
-
|
78
|
+
@_context = Context::new( ptr.read_pointer )
|
78
79
|
end
|
79
80
|
|
80
81
|
get_info("Sampler", :cl_uint, "reference_count")
|
@@ -0,0 +1,38 @@
|
|
1
|
+
using OpenCLRefinements if RUBY_VERSION.scan(/\d+/).collect(&:to_i).first >= 2
|
2
|
+
module OpenCL
|
3
|
+
|
4
|
+
CONTEXT_KERNEL_PROFILING_MODES_COUNT_INTEL = 0x407A
|
5
|
+
CONTEXT_KERNEL_PROFILING_MODE_INFO_INTEL = 0x407B
|
6
|
+
KERNEL_IL_SYMBOLS_INTEL = 0x407C
|
7
|
+
KERNEL_BINARY_PROGRAM_INTEL = 0x407D
|
8
|
+
|
9
|
+
class Kernel
|
10
|
+
IL_SYMBOLS_INTEL = 0x407C
|
11
|
+
BINARY_PROGRAM_INTEL = 0x407D
|
12
|
+
module KernelProfilingINTEL
|
13
|
+
def binary_program_intel(device = program.devices.first)
|
14
|
+
ptr_bin = nil
|
15
|
+
begin
|
16
|
+
ptr = MemoryPointer::new( :size_t )
|
17
|
+
error = OpenCL.clGetKernelWorkGroupInfo(self, device, BINARY_PROGRAM_INTEL, 0, nil, ptr)
|
18
|
+
error_check(error)
|
19
|
+
bin_size = ptr.read_size_t
|
20
|
+
ptr_bin = MemoryPointer::new(bin_size)
|
21
|
+
error = OpenCL.clGetKernelWorkGroupInfo(self, device, BINARY_PROGRAM_INTEL, bin_size, ptr_bin, nil)
|
22
|
+
error_check(error)
|
23
|
+
rescue OpenCL::Error::INVALID_VALUE
|
24
|
+
ptr = MemoryPointer::new( :size_t )
|
25
|
+
error = OpenCL.clGetKernelInfo(self, BINARY_PROGRAM_INTEL, 0, nil, ptr)
|
26
|
+
error_check(error)
|
27
|
+
bin_size = ptr.read_size_t
|
28
|
+
ptr_bin = MemoryPointer::new(bin_size)
|
29
|
+
error = OpenCL.clGetKernelInfo(self, BINARY_PROGRAM_INTEL, bin_size, ptr_bin, nil)
|
30
|
+
error_check(error)
|
31
|
+
end
|
32
|
+
return ptr_bin.read_bytes(bin_size)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
register_extension(:cl_intel_kernel_profiling, KernelProfilingINTEL, "true")
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,586 @@
|
|
1
|
+
module OpenCL
|
2
|
+
DEVICE_HOST_MEM_CAPABILITIES_INTEL = 0x4190
|
3
|
+
DEVICE_DEVICE_MEM_CAPABILITIES_INTEL = 0x4191
|
4
|
+
DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4192
|
5
|
+
DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4193
|
6
|
+
DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL = 0x4194
|
7
|
+
|
8
|
+
UNIFIED_SHARED_MEMORY_ACCESS_INTEL = (1 << 0)
|
9
|
+
UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL = (1 << 1)
|
10
|
+
UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL = (1 << 2)
|
11
|
+
UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL = (1 << 3)
|
12
|
+
|
13
|
+
MEM_ALLOC_FLAGS_INTEL = 0x4195
|
14
|
+
|
15
|
+
MEM_ALLOC_WRITE_COMBINED_INTEL = (1 << 0)
|
16
|
+
|
17
|
+
MEM_TYPE_UNKNOWN_INTEL = 0x4196
|
18
|
+
MEM_TYPE_HOST_INTEL = 0x4197
|
19
|
+
MEM_TYPE_DEVICE_INTEL = 0x4198
|
20
|
+
MEM_TYPE_SHARED_INTEL = 0x4199
|
21
|
+
|
22
|
+
MEM_ALLOC_TYPE_INTEL = 0x419A
|
23
|
+
MEM_ALLOC_BASE_PTR_INTEL = 0x419B
|
24
|
+
MEM_ALLOC_SIZE_INTEL = 0x419C
|
25
|
+
MEM_ALLOC_DEVICE_INTEL = 0x419D
|
26
|
+
MEM_ALLOC_INFO_TBD1_INTEL = 0x419E
|
27
|
+
MEM_ALLOC_INFO_TBD2_INTEL = 0x419F
|
28
|
+
|
29
|
+
KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL = 0x4200
|
30
|
+
KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL = 0x4201
|
31
|
+
KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL = 0x4202
|
32
|
+
KERNEL_EXEC_INFO_USM_PTRS_INTEL = 0x4203
|
33
|
+
|
34
|
+
MIGRATE_MEM_OBJECT_HOST_INTEL = (1 << 0)
|
35
|
+
MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED_INTEL = (1 << 1)
|
36
|
+
|
37
|
+
COMMAND_MEMFILL_INTEL = 0x4204
|
38
|
+
COMMAND_MEMCPY_INTEL = 0x4205
|
39
|
+
COMMAND_MIGRATEMEM_INTEL = 0x4206
|
40
|
+
COMMAND_MEMADVISE_INTEL = 0x4207
|
41
|
+
|
42
|
+
class CommandType
|
43
|
+
MEMFILL_INTEL = 0x4204
|
44
|
+
MEMCPY_INTEL = 0x4205
|
45
|
+
MIGRATEMEM_INTEL = 0x4206
|
46
|
+
MEMADVISE_INTEL = 0x4207
|
47
|
+
|
48
|
+
@codes[0x4204] = 'MEMFILL_INTEL'
|
49
|
+
@codes[0x4205] = 'MEMCPY_INTEL'
|
50
|
+
@codes[0x4206] = 'MIGRATEMEM_INTEL'
|
51
|
+
@codes[0x4207] = 'MEMADVISE_INTEL'
|
52
|
+
end
|
53
|
+
|
54
|
+
MEM_ADVICE_TBD0_INTEL = 0x4208
|
55
|
+
MEM_ADVICE_TBD1_INTEL = 0x4209
|
56
|
+
MEM_ADVICE_TBD2_INTEL = 0x420A
|
57
|
+
MEM_ADVICE_TBD3_INTEL = 0x420B
|
58
|
+
MEM_ADVICE_TBD4_INTEL = 0x420C
|
59
|
+
MEM_ADVICE_TBD5_INTEL = 0x420D
|
60
|
+
MEM_ADVICE_TBD6_INTEL = 0x420E
|
61
|
+
MEM_ADVICE_TBD7_INTEL = 0x420F
|
62
|
+
|
63
|
+
[[:cl_bitfield, :cl_mem_properties_intel],
|
64
|
+
[:cl_bitfield, :cl_mem_migration_flags_intel],
|
65
|
+
[:cl_bitfield, :cl_mem_alloc_flags_intel],
|
66
|
+
[:cl_uint, :cl_mem_info_intel],
|
67
|
+
[:cl_uint, :cl_mem_advice_intel],
|
68
|
+
[:cl_bitfield, :cl_unified_shared_memory_capabilities_intel],
|
69
|
+
[:cl_uint, :cl_unified_shared_memory_type_intel]
|
70
|
+
].each { |o_t, t|
|
71
|
+
typedef o_t, t
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
if RUBY_VERSION.scan(/\d+/).collect(&:to_i).first >= 2
|
76
|
+
module OpenCLRefinements
|
77
|
+
refine FFI::Pointer do
|
78
|
+
methods_prefix = [:put, :get, :write, :read, :put_array_of, :get_array_of]
|
79
|
+
[[:cl_bitfield, :cl_mem_properties_intel],
|
80
|
+
[:cl_bitfield, :cl_mem_migration_flags_intel],
|
81
|
+
[:cl_bitfield, :cl_mem_alloc_flags_intel],
|
82
|
+
[:cl_uint, :cl_mem_info_intel],
|
83
|
+
[:cl_uint, :cl_mem_advice_intel],
|
84
|
+
[:cl_bitfield, :cl_unified_shared_memory_capabilities_intel],
|
85
|
+
[:cl_uint, :cl_unified_shared_memory_type_intel]
|
86
|
+
].each { |orig, add|
|
87
|
+
methods_prefix.each { |meth|
|
88
|
+
alias_method "#{meth}_#{add}".to_sym, "#{meth}_#{orig}".to_sym
|
89
|
+
}
|
90
|
+
}
|
91
|
+
end
|
92
|
+
end
|
93
|
+
using OpenCLRefinements
|
94
|
+
else
|
95
|
+
class FFI::Pointer
|
96
|
+
methods_prefix = [:put, :get, :write, :read, :put_array_of, :get_array_of]
|
97
|
+
[[:cl_bitfield, :cl_mem_properties_intel],
|
98
|
+
[:cl_bitfield, :cl_mem_migration_flags_intel],
|
99
|
+
[:cl_bitfield, :cl_mem_alloc_flags_intel],
|
100
|
+
[:cl_uint, :cl_mem_info_intel],
|
101
|
+
[:cl_uint, :cl_mem_advice_intel],
|
102
|
+
[:cl_bitfield, :cl_unified_shared_memory_capabilities_intel],
|
103
|
+
[:cl_uint, :cl_unified_shared_memory_type_intel]
|
104
|
+
].each { |orig, add|
|
105
|
+
methods_prefix.each { |meth|
|
106
|
+
alias_method "#{meth}_#{add}".to_sym, "#{meth}_#{orig}".to_sym
|
107
|
+
}
|
108
|
+
}
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
module OpenCL
|
113
|
+
|
114
|
+
class UnifiedSharedMemoryCapabilitiesINTEL < Bitfield
|
115
|
+
UNIFIED_SHARED_MEMORY_ACCESS_INTEL = (1 << 0)
|
116
|
+
UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL = (1 << 1)
|
117
|
+
UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL = (1 << 2)
|
118
|
+
UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL = (1 << 3)
|
119
|
+
|
120
|
+
def names
|
121
|
+
fs = []
|
122
|
+
%w( UNIFIED_SHARED_MEMORY_ACCESS_INTEL UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL )..each { |f|
|
123
|
+
fs.push(f) if self.include?( self.class.const_get(f) )
|
124
|
+
}
|
125
|
+
return fs
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
module InnerInterface
|
130
|
+
TYPE_CONVERTER[:cl_unified_shared_memory_capabilities_intel] = UnifiedSharedMemoryCapabilitiesINTEL
|
131
|
+
end
|
132
|
+
|
133
|
+
class Mem
|
134
|
+
ALLOC_FLAGS_INTEL = 0x4195
|
135
|
+
|
136
|
+
TYPE_UNKNOWN_INTEL = 0x4196
|
137
|
+
TYPE_HOST_INTEL = 0x4197
|
138
|
+
TYPE_DEVICE_INTEL = 0x4198
|
139
|
+
TYPE_SHARED_INTEL = 0x4199
|
140
|
+
|
141
|
+
ALLOC_WRITE_COMBINED_INTEL = (1 << 0)
|
142
|
+
|
143
|
+
ALLOC_TYPE_INTEL = 0x419A
|
144
|
+
ALLOC_BASE_PTR_INTEL = 0x419B
|
145
|
+
ALLOC_SIZE_INTEL = 0x419C
|
146
|
+
ALLOC_DEVICE_INTEL = 0x419D
|
147
|
+
ALLOC_INFO_TBD1_INTEL = 0x419E
|
148
|
+
ALLOC_INFO_TBD2_INTEL = 0x419F
|
149
|
+
|
150
|
+
class UnifiedSharedMemoryTypeINTEL < EnumInt
|
151
|
+
UNKNOWN_INTEL = 0x4196
|
152
|
+
HOST_INTEL = 0x4197
|
153
|
+
DEVICE_INTEL = 0x4198
|
154
|
+
SHARED_INTEL = 0x4199
|
155
|
+
@codes = {}
|
156
|
+
@codes[0x4196] = 'UNKNOWN_INTEL'
|
157
|
+
@codes[0x4197] = 'HOST_INTEL'
|
158
|
+
@codes[0x4198] = 'DEVICE_INTEL'
|
159
|
+
@codes[0x4199] = 'SHARED_INTEL'
|
160
|
+
end
|
161
|
+
|
162
|
+
class AllocFlagsINTEL < Bitfield
|
163
|
+
DEFAULT_INTEL = 0
|
164
|
+
WRITE_COMBINED_INTEL = (1 << 0)
|
165
|
+
def names
|
166
|
+
fs = []
|
167
|
+
%w( WRITE_COMBINED_INTEL ).each { |f|
|
168
|
+
fs.push(f) if self.include?( self.class.const_get(f) )
|
169
|
+
}
|
170
|
+
return fs
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
class AdviceINTEL < EnumInt
|
175
|
+
TBD0_INTEL = 0x4208
|
176
|
+
TBD1_INTEL = 0x4209
|
177
|
+
TBD2_INTEL = 0x420A
|
178
|
+
TBD3_INTEL = 0x420B
|
179
|
+
TBD4_INTEL = 0x420C
|
180
|
+
TBD5_INTEL = 0x420D
|
181
|
+
TBD6_INTEL = 0x420E
|
182
|
+
TBD7_INTEL = 0x420F
|
183
|
+
@codes = {}
|
184
|
+
@codes[0x4208] = 'TBD0_INTEL'
|
185
|
+
@codes[0x4209] = 'TBD1_INTEL'
|
186
|
+
@codes[0x420A] = 'TBD2_INTEL'
|
187
|
+
@codes[0x420B] = 'TBD3_INTEL'
|
188
|
+
@codes[0x420C] = 'TBD4_INTEL'
|
189
|
+
@codes[0x420D] = 'TBD5_INTEL'
|
190
|
+
@codes[0x420E] = 'TBD6_INTEL'
|
191
|
+
@codes[0x420F] = 'TBD7_INTEL'
|
192
|
+
end
|
193
|
+
|
194
|
+
class MigrationFlagsINTEL < Bitfield
|
195
|
+
HOST_INTEL = (1 << 0)
|
196
|
+
CONTENT_UNDEFINED_INTEL = (1 << 1)
|
197
|
+
# Returns an Array of String representing the different flags set
|
198
|
+
def names
|
199
|
+
fs = []
|
200
|
+
%w( HOST CONTENT_UNDEFINED ).each { |f|
|
201
|
+
fs.push(f) if self.include?( self.class.const_get(f) )
|
202
|
+
}
|
203
|
+
return fs
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
end
|
208
|
+
|
209
|
+
module InnerInterface
|
210
|
+
TYPE_CONVERTER[:cl_unified_shared_memory_type_intel] = Mem::UnifiedSharedMemoryTypeINTEL
|
211
|
+
TYPE_CONVERTER[:cl_mem_alloc_flags_intel] = Mem::AllocFlagsINTEL
|
212
|
+
TYPE_CONVERTER[:cl_mem_advice_intel] = Mem::AdviceINTEL
|
213
|
+
TYPE_CONVERTER[:cl_mem_migration_flags_intel] = Mem::MigrationFlagsINTEL
|
214
|
+
end
|
215
|
+
|
216
|
+
class Device
|
217
|
+
HOST_MEM_CAPABILITIES_INTEL = 0x4190
|
218
|
+
DEVICE_MEM_CAPABILITIES_INTEL = 0x4191
|
219
|
+
SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4192
|
220
|
+
CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4193
|
221
|
+
SHARED_SYSTEM_MEM_CAPABILITIES_INTEL = 0x4194
|
222
|
+
|
223
|
+
module UnifiedSharedMemoryPreviewINTEL
|
224
|
+
extend InnerGenerator
|
225
|
+
|
226
|
+
get_info("Device", :cl_unified_shared_memory_capabilities_intel, "host_mem_capabilities_intel")
|
227
|
+
get_info("Device", :cl_unified_shared_memory_capabilities_intel, "device_mem_capabilities_intel")
|
228
|
+
get_info("Device", :cl_unified_shared_memory_capabilities_intel, "device_shared_mem_capabilities_intel")
|
229
|
+
get_info("Device", :cl_unified_shared_memory_capabilities_intel, "cross_device_mem_capabilities_intel")
|
230
|
+
get_info("Device", :cl_unified_shared_memory_capabilities_intel, "shared_system_mem_capabilities_intel")
|
231
|
+
end
|
232
|
+
register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
|
233
|
+
end
|
234
|
+
|
235
|
+
class USMPointer < Pointer
|
236
|
+
|
237
|
+
def initialize(address, context)
|
238
|
+
super(address)
|
239
|
+
@context = context
|
240
|
+
end
|
241
|
+
|
242
|
+
def inspect
|
243
|
+
return "#<#{self.class.name}: 0x#{address.to_s(16)} (#{size})>"
|
244
|
+
end
|
245
|
+
|
246
|
+
def slice(offset, size)
|
247
|
+
res = super(offset, size)
|
248
|
+
self.class.new(res, context)
|
249
|
+
end
|
250
|
+
|
251
|
+
def +( offset )
|
252
|
+
self.slice(offset, self.size - offset)
|
253
|
+
end
|
254
|
+
|
255
|
+
def free
|
256
|
+
@context.mem_free_intel(alloc_base_ptr_intel)
|
257
|
+
end
|
258
|
+
|
259
|
+
def alloc_type_intel
|
260
|
+
@context.mem_alloc_type_intel(self)
|
261
|
+
end
|
262
|
+
|
263
|
+
def alloc_flags_intel
|
264
|
+
@context.mem_alloc_flags_intel(self)
|
265
|
+
end
|
266
|
+
|
267
|
+
def alloc_base_ptr_intel
|
268
|
+
@context.mem_alloc_base_ptr_intel(self)
|
269
|
+
end
|
270
|
+
|
271
|
+
def alloc_size_intel
|
272
|
+
@context.mem_alloc_size_intel(self)
|
273
|
+
end
|
274
|
+
|
275
|
+
def alloc_device_intel
|
276
|
+
context.mem_alloc_device_intel(self)
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
class Context
|
281
|
+
module UnifiedSharedMemoryPreviewINTEL
|
282
|
+
extend InnerGenerator
|
283
|
+
|
284
|
+
def clGetMemAllocInfoINTEL
|
285
|
+
return @_clGetMemAllocInfoINTEL if @_clGetMemAllocInfoINTEL
|
286
|
+
@_clGetMemAllocInfoINTEL = platform.get_extension_function("clGetMemAllocInfoINTEL", :cl_int, [Context, :pointer, :cl_mem_info_intel, :size_t, :pointer, :pointer])
|
287
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clGetMemAllocInfoINTEL
|
288
|
+
return @_clGetMemAllocInfoINTEL
|
289
|
+
end
|
290
|
+
|
291
|
+
def clHostMemAllocINTEL
|
292
|
+
return @_clHostMemAllocINTEL if @_clHostMemAllocINTEL
|
293
|
+
@_clHostMemAllocINTEL = platform.get_extension_function("clHostMemAllocINTEL", :pointer, [Context, :pointer, :size_t, :cl_uint, :pointer])
|
294
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clHostMemAllocINTEL
|
295
|
+
return @_clHostMemAllocINTEL
|
296
|
+
end
|
297
|
+
|
298
|
+
def clDeviceMemAllocINTEL
|
299
|
+
return @_clDeviceMemAllocINTEL if @_clDeviceMemAllocINTEL
|
300
|
+
@_clDeviceMemAllocINTEL = platform.get_extension_function("clDeviceMemAllocINTEL", :pointer, [Context, Device, :pointer, :size_t, :cl_uint, :pointer])
|
301
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clDeviceMemAllocINTEL
|
302
|
+
return @_clDeviceMemAllocINTEL
|
303
|
+
end
|
304
|
+
|
305
|
+
def clSharedMemAllocINTEL
|
306
|
+
return @_clSharedMemAllocINTEL if @_clSharedMemAllocINTEL
|
307
|
+
@_clSharedMemAllocINTEL = platform.get_extension_function("clSharedMemAllocINTEL", :pointer, [Context, Device, :pointer, :size_t, :cl_uint, :pointer])
|
308
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clSharedMemAllocINTEL
|
309
|
+
return @_clSharedMemAllocINTEL
|
310
|
+
end
|
311
|
+
|
312
|
+
def clMemFreeINTEL
|
313
|
+
return @_clMemFreeINTEL if @_clMemFreeINTEL
|
314
|
+
@_clMemFreeINTEL = platform.get_extension_function("clMemFreeINTEL", :cl_int, [Context, :pointer])
|
315
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clMemFreeINTEL
|
316
|
+
return @_clMemFreeINTEL
|
317
|
+
end
|
318
|
+
|
319
|
+
def get_mem_properties_intel(properties)
|
320
|
+
return nil unless properties
|
321
|
+
properties = [properties].flatten
|
322
|
+
props = MemoryPointer::new(:cl_mem_properties_intel, properties.length + 1)
|
323
|
+
properties.each_with_index { |e, i|
|
324
|
+
props[i].write_cl_mem_properties_intel(e)
|
325
|
+
}
|
326
|
+
props[properties.length].write_cl_mem_properties_intel(0)
|
327
|
+
return props
|
328
|
+
end
|
329
|
+
|
330
|
+
private :get_mem_properties_intel
|
331
|
+
|
332
|
+
def host_mem_alloc_intel(size, options = {})
|
333
|
+
properties = get_mem_properties_intel(options[:properties])
|
334
|
+
alignment = 0
|
335
|
+
alignment = options[:alignment] if options[:alignment]
|
336
|
+
error = MemoryPointer::new( :cl_int )
|
337
|
+
ptr = clHostMemAllocINTEL.call(self, properties, size, alignment, error)
|
338
|
+
error_check(error.read_cl_int)
|
339
|
+
return USMPointer::new(ptr.slice(0, size), self)
|
340
|
+
end
|
341
|
+
|
342
|
+
def device_mem_alloc_intel(device, size, options = {})
|
343
|
+
properties = get_mem_properties_intel(options[:properties])
|
344
|
+
alignment = 0
|
345
|
+
alignment = options[:alignment] if options[:alignment]
|
346
|
+
error = MemoryPointer::new( :cl_int )
|
347
|
+
ptr = clDeviceMemAllocINTEL.call(self, device, properties, size, alignment, error)
|
348
|
+
error_check(error.read_cl_int)
|
349
|
+
return USMPointer::new(ptr.slice(0, size), self)
|
350
|
+
end
|
351
|
+
|
352
|
+
def shared_mem_alloc_intel(device, size, options = {})
|
353
|
+
properties = get_mem_properties_intel(options[:properties])
|
354
|
+
alignment = 0
|
355
|
+
alignment = options[:alignment] if options[:alignment]
|
356
|
+
error = MemoryPointer::new( :cl_int )
|
357
|
+
ptr = clSharedMemAllocINTEL.call(self, device, properties, size, alignment, error)
|
358
|
+
error_check(error.read_cl_int)
|
359
|
+
return USMPointer::new(ptr.slice(0, size), self)
|
360
|
+
end
|
361
|
+
|
362
|
+
def mem_free_intel(ptr)
|
363
|
+
error = clMemFreeINTEL.call(self, ptr)
|
364
|
+
error_check(error)
|
365
|
+
return self
|
366
|
+
end
|
367
|
+
|
368
|
+
def mem_alloc_type_intel(ptr)
|
369
|
+
ptr_res = MemoryPointer::new(:cl_unified_shared_memory_type_intel)
|
370
|
+
error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_TYPE_INTEL, ptr_res.size, ptr_res, nil)
|
371
|
+
error_check(error)
|
372
|
+
return OpenCL::Mem::UnifiedSharedMemoryTypeINTEL::new(ptr_res.read_cl_unified_shared_memory_type_intel)
|
373
|
+
end
|
374
|
+
|
375
|
+
def mem_alloc_flags_intel(ptr)
|
376
|
+
ptr_res = MemoryPointer::new(:cl_mem_alloc_flags_intel)
|
377
|
+
error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_FLAGS_INTEL, ptr_res.size, ptr_res, nil)
|
378
|
+
error_check(error)
|
379
|
+
return OpenCL::Mem::AllocFlagsINTEL::new(ptr_res.read_cl_mem_alloc_flags_intel)
|
380
|
+
end
|
381
|
+
|
382
|
+
def mem_alloc_base_ptr_intel(ptr)
|
383
|
+
ptr_res = MemoryPointer::new(:pointer)
|
384
|
+
error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_BASE_PTR_INTEL, ptr_res.size, ptr_res, nil)
|
385
|
+
error_check(error)
|
386
|
+
return ptr_res.read_pointer
|
387
|
+
end
|
388
|
+
|
389
|
+
def mem_alloc_size_intel(ptr)
|
390
|
+
ptr_res = MemoryPointer::new(:size_t)
|
391
|
+
error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_SIZE_INTEL, ptr_res.size, ptr_res, nil)
|
392
|
+
error_check(error)
|
393
|
+
return ptr_res.read_size_t
|
394
|
+
end
|
395
|
+
|
396
|
+
def mem_alloc_device_intel(ptr)
|
397
|
+
ptr_res = MemoryPointer::new( Device )
|
398
|
+
error = OpenCL.clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_DEVICE_INTEL, Device.size, ptr_res, nil)
|
399
|
+
error_check(error)
|
400
|
+
return Device::new(ptr_res.read_pointer)
|
401
|
+
end
|
402
|
+
|
403
|
+
end
|
404
|
+
register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "platform.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
|
405
|
+
end
|
406
|
+
|
407
|
+
class Kernel
|
408
|
+
EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL = 0x4200
|
409
|
+
EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL = 0x4201
|
410
|
+
EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL = 0x4202
|
411
|
+
EXEC_INFO_USM_PTRS_INTEL = 0x4203
|
412
|
+
|
413
|
+
module UnifiedSharedMemoryPreviewINTEL
|
414
|
+
extend InnerGenerator
|
415
|
+
|
416
|
+
def clSetKernelArgMemPointerINTEL
|
417
|
+
return @_clSetKernelArgMemPointerINTEL if @_clSetKernelArgMemPointerINTEL
|
418
|
+
@_clSetKernelArgMemPointerINTEL = context.platform.get_extension_function("clSetKernelArgMemPointerINTEL", :cl_int, Kernel, :cl_uint, :pointer)
|
419
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clSetKernelArgMemPointerINTEL
|
420
|
+
return @_clSetKernelArgMemPointerINTEL
|
421
|
+
end
|
422
|
+
|
423
|
+
def set_arg_mem_pointer_intel(index, usm_pointer)
|
424
|
+
error = clSetKernelArgMemPointerINTEL.call(self, index, usm_pointer)
|
425
|
+
error_check(error)
|
426
|
+
return self
|
427
|
+
end
|
428
|
+
|
429
|
+
def set_usm_ptrs_intel( ptrs )
|
430
|
+
pointers = [ptrs].flatten
|
431
|
+
pt = MemoryPointer::new( :pointer, pointers.length )
|
432
|
+
pointers.each_with_index { |p, i|
|
433
|
+
pt[i].write_pointer(p)
|
434
|
+
}
|
435
|
+
error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_USM_PTRS_INTEL, pt.size, pt)
|
436
|
+
error_check(error)
|
437
|
+
self
|
438
|
+
end
|
439
|
+
|
440
|
+
def set_indirect_host_access_intel( flag )
|
441
|
+
pt = MemoryPointer::new( :cl_bool )
|
442
|
+
pt.write_cl_bool( flag )
|
443
|
+
error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, pt.size, pt)
|
444
|
+
error_check(error)
|
445
|
+
self
|
446
|
+
end
|
447
|
+
|
448
|
+
def set_indirect_device_access_intel( flag )
|
449
|
+
pt = MemoryPointer::new( :cl_bool )
|
450
|
+
pt.write_cl_bool( flag )
|
451
|
+
error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, pt.size, pt)
|
452
|
+
error_check(error)
|
453
|
+
self
|
454
|
+
end
|
455
|
+
|
456
|
+
def set_shared_device_access_intel( flag )
|
457
|
+
pt = MemoryPointer::new( :cl_bool )
|
458
|
+
pt.write_cl_bool( flag )
|
459
|
+
error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_SHARED_DEVICE_ACCESS_INTEL, pt.size, pt)
|
460
|
+
error_check(error)
|
461
|
+
self
|
462
|
+
end
|
463
|
+
|
464
|
+
def enqueue_with_args(command_queue, global_work_size, *args)
|
465
|
+
n = self.num_args
|
466
|
+
error_check(INVALID_KERNEL_ARGS) if args.length < n
|
467
|
+
error_check(INVALID_KERNEL_ARGS) if args.length > n + 1
|
468
|
+
if args.length == n + 1
|
469
|
+
options = args.last
|
470
|
+
else
|
471
|
+
options = {}
|
472
|
+
end
|
473
|
+
n.times { |i|
|
474
|
+
if args[i].class == SVMPointer and self.context.platform.version_number >= 2.0 then
|
475
|
+
self.set_arg_svm_pointer(i, args[i])
|
476
|
+
elsif args[i].class == USMPointer then
|
477
|
+
self.set_arg_mem_pointer_intel(i, args[i])
|
478
|
+
else
|
479
|
+
self.set_arg(i, args[i])
|
480
|
+
end
|
481
|
+
}
|
482
|
+
command_queue.enqueue_ndrange_kernel(self, global_work_size, options)
|
483
|
+
end
|
484
|
+
|
485
|
+
end
|
486
|
+
register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "platform.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
|
487
|
+
end
|
488
|
+
|
489
|
+
class Kernel
|
490
|
+
class Arg
|
491
|
+
module UnifiedSharedMemoryPreviewINTEL
|
492
|
+
def set(value, size = nil)
|
493
|
+
if value.class == SVMPointer and @kernel.context.platform.version_number >= 2.0 then
|
494
|
+
OpenCL.set_kernel_arg_svm_pointer(@kernel, @index, value)
|
495
|
+
elsif args[i].class == USMPointer then
|
496
|
+
@kernel.set_arg_mem_pointer_intel(@index, value)
|
497
|
+
else
|
498
|
+
OpenCL.set_kernel_arg(@kernel, @index, value, size)
|
499
|
+
end
|
500
|
+
end
|
501
|
+
end
|
502
|
+
register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "platform.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
|
503
|
+
end
|
504
|
+
end
|
505
|
+
|
506
|
+
class CommandQueue
|
507
|
+
module UnifiedSharedMemoryPreviewINTEL
|
508
|
+
extend InnerGenerator
|
509
|
+
|
510
|
+
def clEnqueueMemFillINTEL
|
511
|
+
return @_clEnqueueMemFillINTEL if @_clEnqueueMemFillINTEL
|
512
|
+
@_clEnqueueMemFillINTEL = platform.get_extension_function("clEnqueueMemFillINTEL", :cl_int, [CommandQueue, :pointer, :pointer, :size_t, :size_t, :cl_uint, :pointer, :pointer])
|
513
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clEnqueueMemFillINTEL
|
514
|
+
return @_clEnqueueMemFillINTEL
|
515
|
+
end
|
516
|
+
|
517
|
+
def clEnqueueMemcpyINTEL
|
518
|
+
return @_clEnqueueMemcpyINTEL if @_clEnqueueMemcpyINTEL
|
519
|
+
@_clEnqueueMemcpyINTEL = platform.get_extension_function("clEnqueueMemcpyINTEL", :cl_int, [CommandQueue, :cl_bool, :pointer, :pointer, :size_t, :cl_uint, :pointer, :pointer])
|
520
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clEnqueueMemcpyINTEL
|
521
|
+
return @_clEnqueueMemcpyINTEL
|
522
|
+
end
|
523
|
+
|
524
|
+
def clEnqueueMigrateMemINTEL
|
525
|
+
return @_clEnqueueMigrateMemINTEL if @_clEnqueueMigrateMemINTEL
|
526
|
+
@_clEnqueueMigrateMemINTEL = platform.get_extension_function("clEnqueueMigrateMemINTEL", :cl_int, [CommandQueue, :pointer, :size_t, :cl_mem_migration_flags_intel, :cl_uint, :pointer, :pointer])
|
527
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clEnqueueMemcpyINTEL
|
528
|
+
return @_clEnqueueMemcpyINTEL
|
529
|
+
end
|
530
|
+
|
531
|
+
def clEnqueueMemAdviseINTEL
|
532
|
+
return @_clEnqueueMemAdviseINTEL if @_clEnqueueMemAdviseINTEL
|
533
|
+
@_clEnqueueMemAdviseINTEL = platform.get_extension_function("clEnqueueMemAdviseINTEL", :cl_int, [CommandQueue, :pointer, :size_t, :cl_mem_advice_intel, :cl_uint, :pointer, :pointer])
|
534
|
+
error_check(OpenCL::INVALID_OPERATION) unless @_clEnqueueMemAdviseINTEL
|
535
|
+
return @_clEnqueueMemAdviseINTEL
|
536
|
+
end
|
537
|
+
|
538
|
+
def enqueue_mem_fill_intel(usm_ptr, pattern, options = {})
|
539
|
+
num_events, events = get_event_wait_list( options )
|
540
|
+
pattern_size = pattern.size
|
541
|
+
pattern_size = options[:pattern_size] if options[:pattern_size]
|
542
|
+
size = usm_ptr.size
|
543
|
+
size = options[:size] if options[:size]
|
544
|
+
event = MemoryPointer::new( Event )
|
545
|
+
error = clEnqueueMemFillINTEL.call(self, usm_ptr, pattern, pattern_size, size, num_events, events, event)
|
546
|
+
error_check(error)
|
547
|
+
return Event::new(event.read_pointer, false)
|
548
|
+
end
|
549
|
+
|
550
|
+
def enqueue_memcpy_intel(dst_ptr, src_ptr, options = {})
|
551
|
+
num_events, events = get_event_wait_list( options )
|
552
|
+
blocking = FALSE
|
553
|
+
blocking = TRUE if options[:blocking] or options[:blocking_copy]
|
554
|
+
size = [dst_ptr.size, src_ptr.size].min
|
555
|
+
size = options[:size] if options[:size]
|
556
|
+
event = MemoryPointer::new( Event )
|
557
|
+
error = clEnqueueMemcpyINTEL.call(self, blocking, dst_ptr, src_ptr, size, num_events, events, event)
|
558
|
+
error_check(error)
|
559
|
+
return Event::new(event.read_pointer, false)
|
560
|
+
end
|
561
|
+
|
562
|
+
def enqueue_migrate_mem_intel(usm_ptr, options = {})
|
563
|
+
num_events, events = get_event_wait_list( options )
|
564
|
+
flags = get_flags( options )
|
565
|
+
size = usm_ptr.size
|
566
|
+
size = options[:size] if options[:size]
|
567
|
+
event = MemoryPointer::new( Event )
|
568
|
+
error = clEnqueueMigrateMemINTEL.call(self, usm_ptr, size, flags, num_events, events, event)
|
569
|
+
error_check(error)
|
570
|
+
return Event::new(event.read_pointer, false)
|
571
|
+
end
|
572
|
+
|
573
|
+
def enqueue_mem_advise_intel(usm_ptr, advice, options = {})
|
574
|
+
num_events, events = get_event_wait_list( options )
|
575
|
+
size = usm_ptr.size
|
576
|
+
size = options[:size] if options[:size]
|
577
|
+
event = MemoryPointer::new( Event )
|
578
|
+
error = clEnqueueMemAdviseINTEL(self, usm_ptr, size, advice, num_events, events, event)
|
579
|
+
error_check(error)
|
580
|
+
return Event::new(event.read_pointer, false)
|
581
|
+
end
|
582
|
+
|
583
|
+
end
|
584
|
+
register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "device.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
|
585
|
+
end
|
586
|
+
end
|