opencl_ruby_ffi 1.3.6 → 1.3.11

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,7 +5,7 @@ module OpenCL
5
5
  class SVMPointer < Pointer
6
6
 
7
7
  # create a new SVMPointer from its address and the context it pertains to
8
- def initialize( address, context, size, base = nil )
8
+ def initialize( address, context, base = nil )
9
9
  super( address )
10
10
  @context = context
11
11
  if base then
@@ -13,16 +13,20 @@ module OpenCL
13
13
  else
14
14
  @base = address
15
15
  end
16
- @size = size
17
16
  end
18
17
 
19
18
  def inspect
20
- return "#<#{self.class.name}: #{@size}>"
19
+ return "#<#{self.class.name}: 0x#{address.to_s(16)} (#{size})>"
20
+ end
21
+
22
+ def slice(offset, size)
23
+ res = super(offset, size)
24
+ return slef.class.new( res, @context, @base )
21
25
  end
22
26
 
23
27
  # creates a new SVMPointer relative to an existing one from an offset
24
28
  def +( offset )
25
- return SVMPointer::new( self.address + offset, @context, @size, @base )
29
+ self.slice(offset, self.size - offset)
26
30
  end
27
31
 
28
32
  # frees the parent memory region associated to this SVMPointer
@@ -51,7 +55,7 @@ module OpenCL
51
55
  alignment = options[:alignment] if options[:alignment]
52
56
  ptr = clSVMAlloc( context, flags, size, alignment )
53
57
  error_check(MEM_OBJECT_ALLOCATION_FAILURE) if ptr.null?
54
- return SVMPointer::new( ptr, context, size )
58
+ return SVMPointer::new( ptr.slice(0, size), context )
55
59
  end
56
60
 
57
61
  # Frees an SVMPointer
@@ -112,11 +116,11 @@ module OpenCL
112
116
  # * +command_queue+ - CommandQueue used to execute the write command
113
117
  # * +dst_ptr+ - the Pointer (or convertible to Pointer using to_ptr) or SVMPointer to be written to
114
118
  # * +src_ptr+ - the Pointer (or convertible to Pointer using to_ptr) or SVMPointer to be read from
115
- # * +size+ - the size of data to copy
116
119
  # * +options+ - a hash containing named options
117
120
  #
118
121
  # ==== Options
119
122
  #
123
+ # * +:size+ - the size of data to copy
120
124
  # * +:event_wait_list+ - if provided, a list of Event to wait upon before executing the command
121
125
  # * +:blocking_copy+ - if provided indicates if the command blocks until the copy finishes
122
126
  # * +:blocking+ - if provided indicates if the command blocks until the copy finishes
@@ -124,10 +128,12 @@ module OpenCL
124
128
  # ==== Returns
125
129
  #
126
130
  # the Event associated with the command
127
- def self.enqueue_svm_memcpy(command_queue, dst_ptr, src_ptr, size, options = {})
131
+ def self.enqueue_svm_memcpy(command_queue, dst_ptr, src_ptr, options = {})
128
132
  error_check(INVALID_OPERATION) if command_queue.context.platform.version_number < 2.0
129
133
  blocking = FALSE
130
134
  blocking = TRUE if options[:blocking] or options[:blocking_copy]
135
+ size = [dst_ptr.size, src_ptr.size].min
136
+ size = options[:size] if options[:size]
131
137
  num_events, events = get_event_wait_list( options )
132
138
  event = MemoryPointer::new( Event )
133
139
  error = clEnqueueSVMMemcpy(command_queue, blocking, dst_ptr, src_ptr, size, num_events, events, event)
@@ -142,39 +148,43 @@ module OpenCL
142
148
  # * +command_queue+ - CommandQueue used to execute the write command
143
149
  # * +svm_ptr+ - the SVMPointer to the area to fill
144
150
  # * +pattern+ - the Pointer (or convertible to Pointer using to_ptr) to the memory area where the pattern is stored
145
- # * +size+ - the size of the area to fill
146
151
  #
147
152
  # ==== Options
148
153
  #
154
+ # * +:size+ - the size of the area to fill
149
155
  # * +:event_wait_list+ - if provided, a list of Event to wait upon before executing the command
150
156
  # * +:pattern_size+ - if provided indicates the size of the pattern, else the maximum pattern data is used
151
157
  #
152
158
  # ==== Returns
153
159
  #
154
160
  # the Event associated with the command
155
- def self.enqueue_svm_memfill(command_queue, svm_ptr, pattern, size, options = {})
161
+ def self.enqueue_svm_memfill(command_queue, svm_ptr, pattern, options = {})
156
162
  error_check(INVALID_OPERATION) if command_queue.context.platform.version_number < 2.0
157
163
  num_events, events = get_event_wait_list( options )
158
164
  pattern_size = pattern.size
159
165
  pattern_size = options[:pattern_size] if options[:pattern_size]
166
+ size = svm_ptr.size
167
+ size = options[:size] if options[:size]
160
168
  event = MemoryPointer::new( Event )
161
169
  error = clEnqueueSVMMemFill(command_queue, svm_ptr, pattern, pattern_size, size, num_events, events, event)
162
170
  error_check(error)
163
171
  return Event::new(event.read_pointer, false)
164
172
  end
165
173
 
174
+ singleton_class.send(:alias_method, :enqueue_svm_mem_fill, :enqueue_svm_memfill)
175
+
166
176
  # Enqueues a command to map an Image into host memory
167
177
  #
168
178
  # ==== Attributes
169
179
  #
170
180
  # * +command_queue+ - CommandQueue used to execute the map command
171
181
  # * +svm_ptr+ - the SVMPointer to the area to map
172
- # * +size+ - the size of the region to map
173
182
  # * +map_flags+ - a single or an Array of :cl_map_flags flags
174
183
  # * +options+ - a hash containing named options
175
184
  #
176
185
  # ==== Options
177
186
  #
187
+ # * +:size+ - the size of the region to map
178
188
  # * +:event_wait_list+ - if provided, a list of Event to wait upon before executing the command
179
189
  # * +:blocking_map+ - if provided indicates if the command blocks until the region is mapped
180
190
  # * +:blocking+ - if provided indicates if the command blocks until the region is mapped
@@ -182,11 +192,13 @@ module OpenCL
182
192
  # ==== Returns
183
193
  #
184
194
  # the Event associated with the command
185
- def self.enqueue_svm_map( command_queue, svm_ptr, size, map_flags, options = {} )
195
+ def self.enqueue_svm_map( command_queue, svm_ptr, map_flags, options = {} )
186
196
  error_check(INVALID_OPERATION) if command_queue.context.platform.version_number < 2.0
187
197
  blocking = FALSE
188
198
  blocking = TRUE if options[:blocking] or options[:blocking_map]
189
199
  flags = get_flags( {:flags => map_flags} )
200
+ size = svm_ptr.size
201
+ size = options[:size] if options[:size]
190
202
  num_events, events = get_event_wait_list( options )
191
203
  event = MemoryPointer::new( Event )
192
204
  error = clEnqueueSVMMap( command_queue, blocking, flags, svm_ptr, size, num_events, events, event )
@@ -251,7 +263,7 @@ module OpenCL
251
263
  sizes_p[i].write_size_t(sizes[i])
252
264
  }
253
265
  event = MemoryPointer::new( Event )
254
- error = clEnqueueSVMMigrateMem( command_queue, num_svm_pointers, svn_ptrs_p, sizes_p, flags, num_events, events, event )
266
+ error = clEnqueueSVMMigrateMem( command_queue, num_svm_pointers, svn_ptrs_p, sizes_p, flags, num_events, events, event )
255
267
  error_check( error )
256
268
  return Event::new( event.read_pointer, false )
257
269
  end
@@ -30,7 +30,7 @@ module OpenCL
30
30
  prop_size += 2 if options[:mip_filter_mode]
31
31
  prop_size += 2 if options[:lod_min]
32
32
  prop_size += 2 if options[:lod_max]
33
- properties = MemoryPointer::new( :cl_sampler_info )
33
+ properties = MemoryPointer::new( :cl_sampler_properties, prop_size )
34
34
  properties[0].write_cl_sampler_info( Sampler::NORMALIZED_COORDS )
35
35
  properties[1].write_cl_bool( normalized_coords )
36
36
  properties[2].write_cl_sampler_info( Sampler::ADDRESSING_MODE )
@@ -71,10 +71,12 @@ module OpenCL
71
71
 
72
72
  # Returns the context associated with the Sampler
73
73
  def context
74
- ptr = MemoryPointer::new( Context )
75
- error = OpenCL.clGetSamplerInfo(self, CONTEXT, Context.size, ptr, nil)
76
- error_check(error)
77
- return Context::new( ptr.read_pointer )
74
+ @_context ||= begin
75
+ ptr = MemoryPointer::new( Context )
76
+ error = OpenCL.clGetSamplerInfo(self, CONTEXT, Context.size, ptr, nil)
77
+ error_check(error)
78
+ Context::new( ptr.read_pointer )
79
+ end
78
80
  end
79
81
 
80
82
  get_info("Sampler", :cl_uint, "reference_count")
@@ -9,3 +9,4 @@ require "opencl_ruby_ffi/ext/device_fission.rb"
9
9
  require "opencl_ruby_ffi/khr/sub_groups.rb"
10
10
  require "opencl_ruby_ffi/khr/priority_hints.rb"
11
11
  require "opencl_ruby_ffi/khr/throttle_hints.rb"
12
+ require "opencl_ruby_ffi/khr/device_uuid.rb"
@@ -0,0 +1,38 @@
1
+ using OpenCLRefinements if RUBY_VERSION.scan(/\d+/).collect(&:to_i).first >= 2
2
+ module OpenCL
3
+
4
+ CONTEXT_KERNEL_PROFILING_MODES_COUNT_INTEL = 0x407A
5
+ CONTEXT_KERNEL_PROFILING_MODE_INFO_INTEL = 0x407B
6
+ KERNEL_IL_SYMBOLS_INTEL = 0x407C
7
+ KERNEL_BINARY_PROGRAM_INTEL = 0x407D
8
+
9
+ class Kernel
10
+ IL_SYMBOLS_INTEL = 0x407C
11
+ BINARY_PROGRAM_INTEL = 0x407D
12
+ module KernelProfilingINTEL
13
+ def binary_program_intel(device = program.devices.first)
14
+ ptr_bin = nil
15
+ begin
16
+ ptr = MemoryPointer::new( :size_t )
17
+ error = OpenCL.clGetKernelWorkGroupInfo(self, device, BINARY_PROGRAM_INTEL, 0, nil, ptr)
18
+ error_check(error)
19
+ bin_size = ptr.read_size_t
20
+ ptr_bin = MemoryPointer::new(bin_size)
21
+ error = OpenCL.clGetKernelWorkGroupInfo(self, device, BINARY_PROGRAM_INTEL, bin_size, ptr_bin, nil)
22
+ error_check(error)
23
+ rescue OpenCL::Error::INVALID_VALUE
24
+ ptr = MemoryPointer::new( :size_t )
25
+ error = OpenCL.clGetKernelInfo(self, BINARY_PROGRAM_INTEL, 0, nil, ptr)
26
+ error_check(error)
27
+ bin_size = ptr.read_size_t
28
+ ptr_bin = MemoryPointer::new(bin_size)
29
+ error = OpenCL.clGetKernelInfo(self, BINARY_PROGRAM_INTEL, bin_size, ptr_bin, nil)
30
+ error_check(error)
31
+ end
32
+ return ptr_bin.read_bytes(bin_size)
33
+ end
34
+ end
35
+ register_extension(:cl_intel_kernel_profiling, KernelProfilingINTEL, "true")
36
+ end
37
+
38
+ end
@@ -0,0 +1,613 @@
1
+ module OpenCL
2
+ DEVICE_HOST_MEM_CAPABILITIES_INTEL = 0x4190
3
+ DEVICE_DEVICE_MEM_CAPABILITIES_INTEL = 0x4191
4
+ DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4192
5
+ DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4193
6
+ DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL = 0x4194
7
+
8
+ UNIFIED_SHARED_MEMORY_ACCESS_INTEL = (1 << 0)
9
+ UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL = (1 << 1)
10
+ UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL = (1 << 2)
11
+ UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL = (1 << 3)
12
+
13
+ MEM_ALLOC_FLAGS_INTEL = 0x4195
14
+
15
+ MEM_ALLOC_WRITE_COMBINED_INTEL = (1 << 0)
16
+
17
+ MEM_TYPE_UNKNOWN_INTEL = 0x4196
18
+ MEM_TYPE_HOST_INTEL = 0x4197
19
+ MEM_TYPE_DEVICE_INTEL = 0x4198
20
+ MEM_TYPE_SHARED_INTEL = 0x4199
21
+
22
+ MEM_ALLOC_TYPE_INTEL = 0x419A
23
+ MEM_ALLOC_BASE_PTR_INTEL = 0x419B
24
+ MEM_ALLOC_SIZE_INTEL = 0x419C
25
+ MEM_ALLOC_DEVICE_INTEL = 0x419D
26
+ MEM_ALLOC_INFO_TBD1_INTEL = 0x419E
27
+ MEM_ALLOC_INFO_TBD2_INTEL = 0x419F
28
+
29
+ KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL = 0x4200
30
+ KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL = 0x4201
31
+ KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL = 0x4202
32
+ KERNEL_EXEC_INFO_USM_PTRS_INTEL = 0x4203
33
+
34
+ MIGRATE_MEM_OBJECT_HOST_INTEL = (1 << 0)
35
+ MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED_INTEL = (1 << 1)
36
+
37
+ COMMAND_MEMFILL_INTEL = 0x4204
38
+ COMMAND_MEMCPY_INTEL = 0x4205
39
+ COMMAND_MIGRATEMEM_INTEL = 0x4206
40
+ COMMAND_MEMADVISE_INTEL = 0x4207
41
+
42
+ class CommandType
43
+ MEMFILL_INTEL = 0x4204
44
+ MEMCPY_INTEL = 0x4205
45
+ MIGRATEMEM_INTEL = 0x4206
46
+ MEMADVISE_INTEL = 0x4207
47
+
48
+ @codes[0x4204] = 'MEMFILL_INTEL'
49
+ @codes[0x4205] = 'MEMCPY_INTEL'
50
+ @codes[0x4206] = 'MIGRATEMEM_INTEL'
51
+ @codes[0x4207] = 'MEMADVISE_INTEL'
52
+ end
53
+
54
+ MEM_ADVICE_TBD0_INTEL = 0x4208
55
+ MEM_ADVICE_TBD1_INTEL = 0x4209
56
+ MEM_ADVICE_TBD2_INTEL = 0x420A
57
+ MEM_ADVICE_TBD3_INTEL = 0x420B
58
+ MEM_ADVICE_TBD4_INTEL = 0x420C
59
+ MEM_ADVICE_TBD5_INTEL = 0x420D
60
+ MEM_ADVICE_TBD6_INTEL = 0x420E
61
+ MEM_ADVICE_TBD7_INTEL = 0x420F
62
+
63
+ [[:cl_bitfield, :cl_mem_properties_intel],
64
+ [:cl_bitfield, :cl_mem_migration_flags_intel],
65
+ [:cl_bitfield, :cl_mem_alloc_flags_intel],
66
+ [:cl_uint, :cl_mem_info_intel],
67
+ [:cl_uint, :cl_mem_advice_intel],
68
+ [:cl_bitfield, :cl_unified_shared_memory_capabilities_intel],
69
+ [:cl_uint, :cl_unified_shared_memory_type_intel]
70
+ ].each { |o_t, t|
71
+ typedef o_t, t
72
+ }
73
+ end
74
+
75
+ if RUBY_VERSION.scan(/\d+/).collect(&:to_i).first >= 2
76
+ module OpenCLRefinements
77
+ refine FFI::Pointer do
78
+ methods_prefix = [:put, :get, :write, :read, :put_array_of, :get_array_of]
79
+ [[:cl_bitfield, :cl_mem_properties_intel],
80
+ [:cl_bitfield, :cl_mem_migration_flags_intel],
81
+ [:cl_bitfield, :cl_mem_alloc_flags_intel],
82
+ [:cl_uint, :cl_mem_info_intel],
83
+ [:cl_uint, :cl_mem_advice_intel],
84
+ [:cl_bitfield, :cl_unified_shared_memory_capabilities_intel],
85
+ [:cl_uint, :cl_unified_shared_memory_type_intel]
86
+ ].each { |orig, add|
87
+ methods_prefix.each { |meth|
88
+ alias_method "#{meth}_#{add}".to_sym, "#{meth}_#{orig}".to_sym
89
+ }
90
+ }
91
+ end
92
+ end
93
+ using OpenCLRefinements
94
+ else
95
+ class FFI::Pointer
96
+ methods_prefix = [:put, :get, :write, :read, :put_array_of, :get_array_of]
97
+ [[:cl_bitfield, :cl_mem_properties_intel],
98
+ [:cl_bitfield, :cl_mem_migration_flags_intel],
99
+ [:cl_bitfield, :cl_mem_alloc_flags_intel],
100
+ [:cl_uint, :cl_mem_info_intel],
101
+ [:cl_uint, :cl_mem_advice_intel],
102
+ [:cl_bitfield, :cl_unified_shared_memory_capabilities_intel],
103
+ [:cl_uint, :cl_unified_shared_memory_type_intel]
104
+ ].each { |orig, add|
105
+ methods_prefix.each { |meth|
106
+ alias_method "#{meth}_#{add}".to_sym, "#{meth}_#{orig}".to_sym
107
+ }
108
+ }
109
+ end
110
+ end
111
+
112
+ module OpenCL
113
+
114
+ class UnifiedSharedMemoryCapabilitiesINTEL < Bitfield
115
+ UNIFIED_SHARED_MEMORY_ACCESS_INTEL = (1 << 0)
116
+ UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL = (1 << 1)
117
+ UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL = (1 << 2)
118
+ UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL = (1 << 3)
119
+
120
+ def names
121
+ fs = []
122
+ %w( UNIFIED_SHARED_MEMORY_ACCESS_INTEL UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL )..each { |f|
123
+ fs.push(f) if self.include?( self.class.const_get(f) )
124
+ }
125
+ return fs
126
+ end
127
+ end
128
+
129
+ module InnerInterface
130
+ TYPE_CONVERTER[:cl_unified_shared_memory_capabilities_intel] = UnifiedSharedMemoryCapabilitiesINTEL
131
+ end
132
+
133
+ class Mem
134
+ ALLOC_FLAGS_INTEL = 0x4195
135
+
136
+ TYPE_UNKNOWN_INTEL = 0x4196
137
+ TYPE_HOST_INTEL = 0x4197
138
+ TYPE_DEVICE_INTEL = 0x4198
139
+ TYPE_SHARED_INTEL = 0x4199
140
+
141
+ ALLOC_WRITE_COMBINED_INTEL = (1 << 0)
142
+
143
+ ALLOC_TYPE_INTEL = 0x419A
144
+ ALLOC_BASE_PTR_INTEL = 0x419B
145
+ ALLOC_SIZE_INTEL = 0x419C
146
+ ALLOC_DEVICE_INTEL = 0x419D
147
+ ALLOC_INFO_TBD1_INTEL = 0x419E
148
+ ALLOC_INFO_TBD2_INTEL = 0x419F
149
+
150
+ class UnifiedSharedMemoryTypeINTEL < EnumInt
151
+ UNKNOWN_INTEL = 0x4196
152
+ HOST_INTEL = 0x4197
153
+ DEVICE_INTEL = 0x4198
154
+ SHARED_INTEL = 0x4199
155
+ @codes = {}
156
+ @codes[0x4196] = 'UNKNOWN_INTEL'
157
+ @codes[0x4197] = 'HOST_INTEL'
158
+ @codes[0x4198] = 'DEVICE_INTEL'
159
+ @codes[0x4199] = 'SHARED_INTEL'
160
+ end
161
+
162
+ class AllocFlagsINTEL < Bitfield
163
+ DEFAULT_INTEL = 0
164
+ WRITE_COMBINED_INTEL = (1 << 0)
165
+ def names
166
+ fs = []
167
+ %w( WRITE_COMBINED_INTEL ).each { |f|
168
+ fs.push(f) if self.include?( self.class.const_get(f) )
169
+ }
170
+ return fs
171
+ end
172
+ end
173
+
174
+ class AdviceINTEL < EnumInt
175
+ TBD0_INTEL = 0x4208
176
+ TBD1_INTEL = 0x4209
177
+ TBD2_INTEL = 0x420A
178
+ TBD3_INTEL = 0x420B
179
+ TBD4_INTEL = 0x420C
180
+ TBD5_INTEL = 0x420D
181
+ TBD6_INTEL = 0x420E
182
+ TBD7_INTEL = 0x420F
183
+ @codes = {}
184
+ @codes[0x4208] = 'TBD0_INTEL'
185
+ @codes[0x4209] = 'TBD1_INTEL'
186
+ @codes[0x420A] = 'TBD2_INTEL'
187
+ @codes[0x420B] = 'TBD3_INTEL'
188
+ @codes[0x420C] = 'TBD4_INTEL'
189
+ @codes[0x420D] = 'TBD5_INTEL'
190
+ @codes[0x420E] = 'TBD6_INTEL'
191
+ @codes[0x420F] = 'TBD7_INTEL'
192
+ end
193
+
194
+ class MigrationFlagsINTEL < Bitfield
195
+ HOST_INTEL = (1 << 0)
196
+ CONTENT_UNDEFINED_INTEL = (1 << 1)
197
+ # Returns an Array of String representing the different flags set
198
+ def names
199
+ fs = []
200
+ %w( HOST CONTENT_UNDEFINED ).each { |f|
201
+ fs.push(f) if self.include?( self.class.const_get(f) )
202
+ }
203
+ return fs
204
+ end
205
+ end
206
+
207
+ end
208
+
209
+ module InnerInterface
210
+ TYPE_CONVERTER[:cl_unified_shared_memory_type_intel] = Mem::UnifiedSharedMemoryTypeINTEL
211
+ TYPE_CONVERTER[:cl_mem_alloc_flags_intel] = Mem::AllocFlagsINTEL
212
+ TYPE_CONVERTER[:cl_mem_advice_intel] = Mem::AdviceINTEL
213
+ TYPE_CONVERTER[:cl_mem_migration_flags_intel] = Mem::MigrationFlagsINTEL
214
+ end
215
+
216
+ class Device
217
+ HOST_MEM_CAPABILITIES_INTEL = 0x4190
218
+ DEVICE_MEM_CAPABILITIES_INTEL = 0x4191
219
+ SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4192
220
+ CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4193
221
+ SHARED_SYSTEM_MEM_CAPABILITIES_INTEL = 0x4194
222
+
223
+ module UnifiedSharedMemoryPreviewINTEL
224
+ extend InnerGenerator
225
+
226
+ get_info("Device", :cl_unified_shared_memory_capabilities_intel, "host_mem_capabilities_intel")
227
+ get_info("Device", :cl_unified_shared_memory_capabilities_intel, "device_mem_capabilities_intel")
228
+ get_info("Device", :cl_unified_shared_memory_capabilities_intel, "device_shared_mem_capabilities_intel")
229
+ get_info("Device", :cl_unified_shared_memory_capabilities_intel, "cross_device_mem_capabilities_intel")
230
+ get_info("Device", :cl_unified_shared_memory_capabilities_intel, "shared_system_mem_capabilities_intel")
231
+
232
+ def clGetDeviceGlobalVariablePointerINTEL
233
+ @_clGetDeviceGlobalVariablePointerINTEL ||= begin
234
+ p = platform.get_extension_function("clGetDeviceGlobalVariablePointerINTEL", :cl_int, [Device, Program, :string, :pointer, :pointer])
235
+ error_check(OpenCL::INVALID_OPERATION) unless p
236
+ p
237
+ end
238
+ end
239
+
240
+ def get_global_variable_pointer_intel(program, name)
241
+ pSize = MemoryPointer::new(:size_t)
242
+ pAddr = MemoryPointer::new(:pointer)
243
+ error = clGetDeviceGlobalVariablePointerINTEL.call(self, program, name, pSize, pAddr)
244
+ error_check(error)
245
+ return USMPointer::new(pAddr.read_pointer.slice(0, pSize.read_size_t), self)
246
+ end
247
+
248
+ end
249
+ register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
250
+ end
251
+
252
+ class USMPointer < Pointer
253
+
254
+ def initialize(address, context)
255
+ super(address)
256
+ @context = context
257
+ end
258
+
259
+ def inspect
260
+ return "#<#{self.class.name}: 0x#{address.to_s(16)} (#{size})>"
261
+ end
262
+
263
+ def slice(offset, size)
264
+ res = super(offset, size)
265
+ self.class.new(res, context)
266
+ end
267
+
268
+ def +( offset )
269
+ self.slice(offset, self.size - offset)
270
+ end
271
+
272
+ def free
273
+ @context.mem_free_intel(alloc_base_ptr_intel)
274
+ end
275
+
276
+ def alloc_type_intel
277
+ @context.mem_alloc_type_intel(self)
278
+ end
279
+
280
+ def alloc_flags_intel
281
+ @context.mem_alloc_flags_intel(self)
282
+ end
283
+
284
+ def alloc_base_ptr_intel
285
+ @context.mem_alloc_base_ptr_intel(self)
286
+ end
287
+
288
+ def alloc_size_intel
289
+ @context.mem_alloc_size_intel(self)
290
+ end
291
+
292
+ def alloc_device_intel
293
+ context.mem_alloc_device_intel(self)
294
+ end
295
+ end
296
+
297
+ class Context
298
+ module UnifiedSharedMemoryPreviewINTEL
299
+ extend InnerGenerator
300
+
301
+ def clGetMemAllocInfoINTEL
302
+ @_clGetMemAllocInfoINTEL ||= begin
303
+ p = platform.get_extension_function("clGetMemAllocInfoINTEL", :cl_int, [Context, :pointer, :cl_mem_info_intel, :size_t, :pointer, :pointer])
304
+ error_check(OpenCL::INVALID_OPERATION) unless p
305
+ p
306
+ end
307
+ end
308
+
309
+ def clHostMemAllocINTEL
310
+ @_clHostMemAllocINTEL ||= begin
311
+ p = platform.get_extension_function("clHostMemAllocINTEL", :pointer, [Context, :pointer, :size_t, :cl_uint, :pointer])
312
+ error_check(OpenCL::INVALID_OPERATION) unless p
313
+ p
314
+ end
315
+ end
316
+
317
+ def clDeviceMemAllocINTEL
318
+ @_clDeviceMemAllocINTEL ||= begin
319
+ p = platform.get_extension_function("clDeviceMemAllocINTEL", :pointer, [Context, Device, :pointer, :size_t, :cl_uint, :pointer])
320
+ error_check(OpenCL::INVALID_OPERATION) unless p
321
+ p
322
+ end
323
+ end
324
+
325
+ def clSharedMemAllocINTEL
326
+ @_clSharedMemAllocINTEL ||= begin
327
+ p = platform.get_extension_function("clSharedMemAllocINTEL", :pointer, [Context, Device, :pointer, :size_t, :cl_uint, :pointer])
328
+ error_check(OpenCL::INVALID_OPERATION) unless p
329
+ p
330
+ end
331
+ end
332
+
333
+ def clMemFreeINTEL
334
+ return @_clMemFreeINTEL ||= begin
335
+ p = platform.get_extension_function("clMemFreeINTEL", :cl_int, [Context, :pointer])
336
+ error_check(OpenCL::INVALID_OPERATION) unless p
337
+ p
338
+ end
339
+ end
340
+
341
+ def get_mem_properties_intel(properties)
342
+ return nil unless properties
343
+ properties = [properties].flatten
344
+ props = MemoryPointer::new(:cl_mem_properties_intel, properties.length + 1)
345
+ properties.each_with_index { |e, i|
346
+ props[i].write_cl_mem_properties_intel(e)
347
+ }
348
+ props[properties.length].write_cl_mem_properties_intel(0)
349
+ return props
350
+ end
351
+
352
+ private :get_mem_properties_intel
353
+
354
+ def host_mem_alloc_intel(size, options = {})
355
+ properties = get_mem_properties_intel(options[:properties])
356
+ alignment = 0
357
+ alignment = options[:alignment] if options[:alignment]
358
+ error = MemoryPointer::new( :cl_int )
359
+ ptr = clHostMemAllocINTEL.call(self, properties, size, alignment, error)
360
+ error_check(error.read_cl_int)
361
+ return USMPointer::new(ptr.slice(0, size), self)
362
+ end
363
+
364
+ def device_mem_alloc_intel(device, size, options = {})
365
+ properties = get_mem_properties_intel(options[:properties])
366
+ alignment = 0
367
+ alignment = options[:alignment] if options[:alignment]
368
+ error = MemoryPointer::new( :cl_int )
369
+ ptr = clDeviceMemAllocINTEL.call(self, device, properties, size, alignment, error)
370
+ error_check(error.read_cl_int)
371
+ return USMPointer::new(ptr.slice(0, size), self)
372
+ end
373
+
374
+ def shared_mem_alloc_intel(device, size, options = {})
375
+ properties = get_mem_properties_intel(options[:properties])
376
+ alignment = 0
377
+ alignment = options[:alignment] if options[:alignment]
378
+ error = MemoryPointer::new( :cl_int )
379
+ ptr = clSharedMemAllocINTEL.call(self, device, properties, size, alignment, error)
380
+ error_check(error.read_cl_int)
381
+ return USMPointer::new(ptr.slice(0, size), self)
382
+ end
383
+
384
+ def mem_free_intel(ptr)
385
+ error = clMemFreeINTEL.call(self, ptr)
386
+ error_check(error)
387
+ return self
388
+ end
389
+
390
+ def mem_alloc_type_intel(ptr)
391
+ ptr_res = MemoryPointer::new(:cl_unified_shared_memory_type_intel)
392
+ error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_TYPE_INTEL, ptr_res.size, ptr_res, nil)
393
+ error_check(error)
394
+ return OpenCL::Mem::UnifiedSharedMemoryTypeINTEL::new(ptr_res.read_cl_unified_shared_memory_type_intel)
395
+ end
396
+
397
+ def mem_alloc_flags_intel(ptr)
398
+ ptr_res = MemoryPointer::new(:cl_mem_alloc_flags_intel)
399
+ error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_FLAGS_INTEL, ptr_res.size, ptr_res, nil)
400
+ error_check(error)
401
+ return OpenCL::Mem::AllocFlagsINTEL::new(ptr_res.read_cl_mem_alloc_flags_intel)
402
+ end
403
+
404
+ def mem_alloc_base_ptr_intel(ptr)
405
+ ptr_res = MemoryPointer::new(:pointer)
406
+ error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_BASE_PTR_INTEL, ptr_res.size, ptr_res, nil)
407
+ error_check(error)
408
+ return ptr_res.read_pointer
409
+ end
410
+
411
+ def mem_alloc_size_intel(ptr)
412
+ ptr_res = MemoryPointer::new(:size_t)
413
+ error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_SIZE_INTEL, ptr_res.size, ptr_res, nil)
414
+ error_check(error)
415
+ return ptr_res.read_size_t
416
+ end
417
+
418
+ def mem_alloc_device_intel(ptr)
419
+ ptr_res = MemoryPointer::new( Device )
420
+ error = OpenCL.clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_DEVICE_INTEL, Device.size, ptr_res, nil)
421
+ error_check(error)
422
+ return Device::new(ptr_res.read_pointer)
423
+ end
424
+
425
+ end
426
+ register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "platform.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
427
+ end
428
+
429
+ class Kernel
430
+ EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL = 0x4200
431
+ EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL = 0x4201
432
+ EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL = 0x4202
433
+ EXEC_INFO_USM_PTRS_INTEL = 0x4203
434
+
435
+ module UnifiedSharedMemoryPreviewINTEL
436
+ extend InnerGenerator
437
+
438
+ def clSetKernelArgMemPointerINTEL
439
+ @_clSetKernelArgMemPointerINTEL ||= begin
440
+ p = context.platform.get_extension_function("clSetKernelArgMemPointerINTEL", :cl_int, Kernel, :cl_uint, :pointer)
441
+ error_check(OpenCL::INVALID_OPERATION) unless p
442
+ p
443
+ end
444
+ end
445
+
446
+ def set_arg_mem_pointer_intel(index, usm_pointer)
447
+ error = clSetKernelArgMemPointerINTEL.call(self, index, usm_pointer)
448
+ error_check(error)
449
+ return self
450
+ end
451
+
452
+ def set_usm_ptrs_intel( ptrs )
453
+ pointers = [ptrs].flatten
454
+ pt = MemoryPointer::new( :pointer, pointers.length )
455
+ pointers.each_with_index { |p, i|
456
+ pt[i].write_pointer(p)
457
+ }
458
+ error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_USM_PTRS_INTEL, pt.size, pt)
459
+ error_check(error)
460
+ self
461
+ end
462
+
463
+ def set_indirect_host_access_intel( flag )
464
+ pt = MemoryPointer::new( :cl_bool )
465
+ pt.write_cl_bool( flag )
466
+ error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, pt.size, pt)
467
+ error_check(error)
468
+ self
469
+ end
470
+
471
+ def set_indirect_device_access_intel( flag )
472
+ pt = MemoryPointer::new( :cl_bool )
473
+ pt.write_cl_bool( flag )
474
+ error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, pt.size, pt)
475
+ error_check(error)
476
+ self
477
+ end
478
+
479
+ def set_shared_device_access_intel( flag )
480
+ pt = MemoryPointer::new( :cl_bool )
481
+ pt.write_cl_bool( flag )
482
+ error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_SHARED_DEVICE_ACCESS_INTEL, pt.size, pt)
483
+ error_check(error)
484
+ self
485
+ end
486
+
487
+ def enqueue_with_args(command_queue, global_work_size, *args)
488
+ n = self.num_args
489
+ error_check(INVALID_KERNEL_ARGS) if args.length < n
490
+ error_check(INVALID_KERNEL_ARGS) if args.length > n + 1
491
+ if args.length == n + 1
492
+ options = args.last
493
+ else
494
+ options = {}
495
+ end
496
+ n.times { |i|
497
+ if args[i].class == SVMPointer and self.context.platform.version_number >= 2.0 then
498
+ self.set_arg_svm_pointer(i, args[i])
499
+ elsif args[i].class == USMPointer then
500
+ self.set_arg_mem_pointer_intel(i, args[i])
501
+ else
502
+ self.set_arg(i, args[i])
503
+ end
504
+ }
505
+ command_queue.enqueue_ndrange_kernel(self, global_work_size, options)
506
+ end
507
+
508
+ end
509
+ register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "platform.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
510
+ end
511
+
512
+ class Kernel
513
+ class Arg
514
+ module UnifiedSharedMemoryPreviewINTEL
515
+ def set(value, size = nil)
516
+ if value.class == SVMPointer and @kernel.context.platform.version_number >= 2.0 then
517
+ OpenCL.set_kernel_arg_svm_pointer(@kernel, @index, value)
518
+ elsif args[i].class == USMPointer then
519
+ @kernel.set_arg_mem_pointer_intel(@index, value)
520
+ else
521
+ OpenCL.set_kernel_arg(@kernel, @index, value, size)
522
+ end
523
+ end
524
+ end
525
+ register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "platform.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
526
+ end
527
+ end
528
+
529
+ class CommandQueue
530
+ module UnifiedSharedMemoryPreviewINTEL
531
+ extend InnerGenerator
532
+
533
+ def clEnqueueMemFillINTEL
534
+ @_clEnqueueMemFillINTEL ||= begin
535
+ p = platform.get_extension_function("clEnqueueMemFillINTEL", :cl_int, [CommandQueue, :pointer, :pointer, :size_t, :size_t, :cl_uint, :pointer, :pointer])
536
+ error_check(OpenCL::INVALID_OPERATION) unless p
537
+ p
538
+ end
539
+ end
540
+
541
+ def clEnqueueMemcpyINTEL
542
+ @_clEnqueueMemcpyINTEL ||= begin
543
+ p = platform.get_extension_function("clEnqueueMemcpyINTEL", :cl_int, [CommandQueue, :cl_bool, :pointer, :pointer, :size_t, :cl_uint, :pointer, :pointer])
544
+ error_check(OpenCL::INVALID_OPERATION) unless p
545
+ p
546
+ end
547
+ end
548
+
549
+ def clEnqueueMigrateMemINTEL
550
+ @_clEnqueueMigrateMemINTEL ||= begin
551
+ p = platform.get_extension_function("clEnqueueMigrateMemINTEL", :cl_int, [CommandQueue, :pointer, :size_t, :cl_mem_migration_flags_intel, :cl_uint, :pointer, :pointer])
552
+ error_check(OpenCL::INVALID_OPERATION) unless p
553
+ p
554
+ end
555
+ end
556
+
557
+ def clEnqueueMemAdviseINTEL
558
+ @_clEnqueueMemAdviseINTEL ||= begin
559
+ p = platform.get_extension_function("clEnqueueMemAdviseINTEL", :cl_int, [CommandQueue, :pointer, :size_t, :cl_mem_advice_intel, :cl_uint, :pointer, :pointer])
560
+ error_check(OpenCL::INVALID_OPERATION) unless p
561
+ p
562
+ end
563
+ end
564
+
565
+ def enqueue_mem_fill_intel(usm_ptr, pattern, options = {})
566
+ num_events, events = get_event_wait_list( options )
567
+ pattern_size = pattern.size
568
+ pattern_size = options[:pattern_size] if options[:pattern_size]
569
+ size = usm_ptr.size
570
+ size = options[:size] if options[:size]
571
+ event = MemoryPointer::new( Event )
572
+ error = clEnqueueMemFillINTEL.call(self, usm_ptr, pattern, pattern_size, size, num_events, events, event)
573
+ error_check(error)
574
+ return Event::new(event.read_pointer, false)
575
+ end
576
+
577
+ def enqueue_memcpy_intel(dst_ptr, src_ptr, options = {})
578
+ num_events, events = get_event_wait_list( options )
579
+ blocking = FALSE
580
+ blocking = TRUE if options[:blocking] or options[:blocking_copy]
581
+ size = [dst_ptr.size, src_ptr.size].min
582
+ size = options[:size] if options[:size]
583
+ event = MemoryPointer::new( Event )
584
+ error = clEnqueueMemcpyINTEL.call(self, blocking, dst_ptr, src_ptr, size, num_events, events, event)
585
+ error_check(error)
586
+ return Event::new(event.read_pointer, false)
587
+ end
588
+
589
+ def enqueue_migrate_mem_intel(usm_ptr, options = {})
590
+ num_events, events = get_event_wait_list( options )
591
+ flags = get_flags( options )
592
+ size = usm_ptr.size
593
+ size = options[:size] if options[:size]
594
+ event = MemoryPointer::new( Event )
595
+ error = clEnqueueMigrateMemINTEL.call(self, usm_ptr, size, flags, num_events, events, event)
596
+ error_check(error)
597
+ return Event::new(event.read_pointer, false)
598
+ end
599
+
600
+ def enqueue_mem_advise_intel(usm_ptr, advice, options = {})
601
+ num_events, events = get_event_wait_list( options )
602
+ size = usm_ptr.size
603
+ size = options[:size] if options[:size]
604
+ event = MemoryPointer::new( Event )
605
+ error = clEnqueueMemAdviseINTEL(self, usm_ptr, size, advice, num_events, events, event)
606
+ error_check(error)
607
+ return Event::new(event.read_pointer, false)
608
+ end
609
+
610
+ end
611
+ register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "device.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
612
+ end
613
+ end