opencl_ruby_ffi 1.3.6 → 1.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ module OpenCL
5
5
  class SVMPointer < Pointer
6
6
 
7
7
  # create a new SVMPointer from its address and the context it pertains to
8
- def initialize( address, context, size, base = nil )
8
+ def initialize( address, context, base = nil )
9
9
  super( address )
10
10
  @context = context
11
11
  if base then
@@ -13,16 +13,20 @@ module OpenCL
13
13
  else
14
14
  @base = address
15
15
  end
16
- @size = size
17
16
  end
18
17
 
19
18
  def inspect
20
- return "#<#{self.class.name}: #{@size}>"
19
+ return "#<#{self.class.name}: 0x#{address.to_s(16)} (#{size})>"
20
+ end
21
+
22
+ def slice(offset, size)
23
+ res = super(offset, size)
24
+ return slef.class.new( res, @context, @base )
21
25
  end
22
26
 
23
27
  # creates a new SVMPointer relative to an existing one from an offset
24
28
  def +( offset )
25
- return SVMPointer::new( self.address + offset, @context, @size, @base )
29
+ self.slice(offset, self.size - offset)
26
30
  end
27
31
 
28
32
  # frees the parent memory region associated to this SVMPointer
@@ -51,7 +55,7 @@ module OpenCL
51
55
  alignment = options[:alignment] if options[:alignment]
52
56
  ptr = clSVMAlloc( context, flags, size, alignment )
53
57
  error_check(MEM_OBJECT_ALLOCATION_FAILURE) if ptr.null?
54
- return SVMPointer::new( ptr, context, size )
58
+ return SVMPointer::new( ptr.slice(0, size), context )
55
59
  end
56
60
 
57
61
  # Frees an SVMPointer
@@ -112,11 +116,11 @@ module OpenCL
112
116
  # * +command_queue+ - CommandQueue used to execute the write command
113
117
  # * +dst_ptr+ - the Pointer (or convertible to Pointer using to_ptr) or SVMPointer to be written to
114
118
  # * +src_ptr+ - the Pointer (or convertible to Pointer using to_ptr) or SVMPointer to be read from
115
- # * +size+ - the size of data to copy
116
119
  # * +options+ - a hash containing named options
117
120
  #
118
121
  # ==== Options
119
122
  #
123
+ # * +:size+ - the size of data to copy
120
124
  # * +:event_wait_list+ - if provided, a list of Event to wait upon before executing the command
121
125
  # * +:blocking_copy+ - if provided indicates if the command blocks until the copy finishes
122
126
  # * +:blocking+ - if provided indicates if the command blocks until the copy finishes
@@ -124,10 +128,12 @@ module OpenCL
124
128
  # ==== Returns
125
129
  #
126
130
  # the Event associated with the command
127
- def self.enqueue_svm_memcpy(command_queue, dst_ptr, src_ptr, size, options = {})
131
+ def self.enqueue_svm_memcpy(command_queue, dst_ptr, src_ptr, options = {})
128
132
  error_check(INVALID_OPERATION) if command_queue.context.platform.version_number < 2.0
129
133
  blocking = FALSE
130
134
  blocking = TRUE if options[:blocking] or options[:blocking_copy]
135
+ size = [dst_ptr.size, src_ptr.size].min
136
+ size = options[:size] if options[:size]
131
137
  num_events, events = get_event_wait_list( options )
132
138
  event = MemoryPointer::new( Event )
133
139
  error = clEnqueueSVMMemcpy(command_queue, blocking, dst_ptr, src_ptr, size, num_events, events, event)
@@ -142,39 +148,43 @@ module OpenCL
142
148
  # * +command_queue+ - CommandQueue used to execute the write command
143
149
  # * +svm_ptr+ - the SVMPointer to the area to fill
144
150
  # * +pattern+ - the Pointer (or convertible to Pointer using to_ptr) to the memory area where the pattern is stored
145
- # * +size+ - the size of the area to fill
146
151
  #
147
152
  # ==== Options
148
153
  #
154
+ # * +:size+ - the size of the area to fill
149
155
  # * +:event_wait_list+ - if provided, a list of Event to wait upon before executing the command
150
156
  # * +:pattern_size+ - if provided indicates the size of the pattern, else the maximum pattern data is used
151
157
  #
152
158
  # ==== Returns
153
159
  #
154
160
  # the Event associated with the command
155
- def self.enqueue_svm_memfill(command_queue, svm_ptr, pattern, size, options = {})
161
+ def self.enqueue_svm_memfill(command_queue, svm_ptr, pattern, options = {})
156
162
  error_check(INVALID_OPERATION) if command_queue.context.platform.version_number < 2.0
157
163
  num_events, events = get_event_wait_list( options )
158
164
  pattern_size = pattern.size
159
165
  pattern_size = options[:pattern_size] if options[:pattern_size]
166
+ size = svm_ptr.size
167
+ size = options[:size] if options[:size]
160
168
  event = MemoryPointer::new( Event )
161
169
  error = clEnqueueSVMMemFill(command_queue, svm_ptr, pattern, pattern_size, size, num_events, events, event)
162
170
  error_check(error)
163
171
  return Event::new(event.read_pointer, false)
164
172
  end
165
173
 
174
+ singleton_class.send(:alias_method, :enqueue_svm_mem_fill, :enqueue_svm_memfill)
175
+
166
176
  # Enqueues a command to map an Image into host memory
167
177
  #
168
178
  # ==== Attributes
169
179
  #
170
180
  # * +command_queue+ - CommandQueue used to execute the map command
171
181
  # * +svm_ptr+ - the SVMPointer to the area to map
172
- # * +size+ - the size of the region to map
173
182
  # * +map_flags+ - a single or an Array of :cl_map_flags flags
174
183
  # * +options+ - a hash containing named options
175
184
  #
176
185
  # ==== Options
177
186
  #
187
+ # * +:size+ - the size of the region to map
178
188
  # * +:event_wait_list+ - if provided, a list of Event to wait upon before executing the command
179
189
  # * +:blocking_map+ - if provided indicates if the command blocks until the region is mapped
180
190
  # * +:blocking+ - if provided indicates if the command blocks until the region is mapped
@@ -182,11 +192,13 @@ module OpenCL
182
192
  # ==== Returns
183
193
  #
184
194
  # the Event associated with the command
185
- def self.enqueue_svm_map( command_queue, svm_ptr, size, map_flags, options = {} )
195
+ def self.enqueue_svm_map( command_queue, svm_ptr, map_flags, options = {} )
186
196
  error_check(INVALID_OPERATION) if command_queue.context.platform.version_number < 2.0
187
197
  blocking = FALSE
188
198
  blocking = TRUE if options[:blocking] or options[:blocking_map]
189
199
  flags = get_flags( {:flags => map_flags} )
200
+ size = svm_ptr.size
201
+ size = options[:size] if options[:size]
190
202
  num_events, events = get_event_wait_list( options )
191
203
  event = MemoryPointer::new( Event )
192
204
  error = clEnqueueSVMMap( command_queue, blocking, flags, svm_ptr, size, num_events, events, event )
@@ -251,7 +263,7 @@ module OpenCL
251
263
  sizes_p[i].write_size_t(sizes[i])
252
264
  }
253
265
  event = MemoryPointer::new( Event )
254
- error = clEnqueueSVMMigrateMem( command_queue, num_svm_pointers, svn_ptrs_p, sizes_p, flags, num_events, events, event )
266
+ error = clEnqueueSVMMigrateMem( command_queue, num_svm_pointers, svn_ptrs_p, sizes_p, flags, num_events, events, event )
255
267
  error_check( error )
256
268
  return Event::new( event.read_pointer, false )
257
269
  end
@@ -30,7 +30,7 @@ module OpenCL
30
30
  prop_size += 2 if options[:mip_filter_mode]
31
31
  prop_size += 2 if options[:lod_min]
32
32
  prop_size += 2 if options[:lod_max]
33
- properties = MemoryPointer::new( :cl_sampler_info )
33
+ properties = MemoryPointer::new( :cl_sampler_properties, prop_size )
34
34
  properties[0].write_cl_sampler_info( Sampler::NORMALIZED_COORDS )
35
35
  properties[1].write_cl_bool( normalized_coords )
36
36
  properties[2].write_cl_sampler_info( Sampler::ADDRESSING_MODE )
@@ -71,10 +71,12 @@ module OpenCL
71
71
 
72
72
  # Returns the context associated with the Sampler
73
73
  def context
74
- ptr = MemoryPointer::new( Context )
75
- error = OpenCL.clGetSamplerInfo(self, CONTEXT, Context.size, ptr, nil)
76
- error_check(error)
77
- return Context::new( ptr.read_pointer )
74
+ @_context ||= begin
75
+ ptr = MemoryPointer::new( Context )
76
+ error = OpenCL.clGetSamplerInfo(self, CONTEXT, Context.size, ptr, nil)
77
+ error_check(error)
78
+ Context::new( ptr.read_pointer )
79
+ end
78
80
  end
79
81
 
80
82
  get_info("Sampler", :cl_uint, "reference_count")
@@ -9,3 +9,4 @@ require "opencl_ruby_ffi/ext/device_fission.rb"
9
9
  require "opencl_ruby_ffi/khr/sub_groups.rb"
10
10
  require "opencl_ruby_ffi/khr/priority_hints.rb"
11
11
  require "opencl_ruby_ffi/khr/throttle_hints.rb"
12
+ require "opencl_ruby_ffi/khr/device_uuid.rb"
@@ -0,0 +1,38 @@
1
+ using OpenCLRefinements if RUBY_VERSION.scan(/\d+/).collect(&:to_i).first >= 2
2
+ module OpenCL
3
+
4
+ CONTEXT_KERNEL_PROFILING_MODES_COUNT_INTEL = 0x407A
5
+ CONTEXT_KERNEL_PROFILING_MODE_INFO_INTEL = 0x407B
6
+ KERNEL_IL_SYMBOLS_INTEL = 0x407C
7
+ KERNEL_BINARY_PROGRAM_INTEL = 0x407D
8
+
9
+ class Kernel
10
+ IL_SYMBOLS_INTEL = 0x407C
11
+ BINARY_PROGRAM_INTEL = 0x407D
12
+ module KernelProfilingINTEL
13
+ def binary_program_intel(device = program.devices.first)
14
+ ptr_bin = nil
15
+ begin
16
+ ptr = MemoryPointer::new( :size_t )
17
+ error = OpenCL.clGetKernelWorkGroupInfo(self, device, BINARY_PROGRAM_INTEL, 0, nil, ptr)
18
+ error_check(error)
19
+ bin_size = ptr.read_size_t
20
+ ptr_bin = MemoryPointer::new(bin_size)
21
+ error = OpenCL.clGetKernelWorkGroupInfo(self, device, BINARY_PROGRAM_INTEL, bin_size, ptr_bin, nil)
22
+ error_check(error)
23
+ rescue OpenCL::Error::INVALID_VALUE
24
+ ptr = MemoryPointer::new( :size_t )
25
+ error = OpenCL.clGetKernelInfo(self, BINARY_PROGRAM_INTEL, 0, nil, ptr)
26
+ error_check(error)
27
+ bin_size = ptr.read_size_t
28
+ ptr_bin = MemoryPointer::new(bin_size)
29
+ error = OpenCL.clGetKernelInfo(self, BINARY_PROGRAM_INTEL, bin_size, ptr_bin, nil)
30
+ error_check(error)
31
+ end
32
+ return ptr_bin.read_bytes(bin_size)
33
+ end
34
+ end
35
+ register_extension(:cl_intel_kernel_profiling, KernelProfilingINTEL, "true")
36
+ end
37
+
38
+ end
@@ -0,0 +1,613 @@
1
+ module OpenCL
2
+ DEVICE_HOST_MEM_CAPABILITIES_INTEL = 0x4190
3
+ DEVICE_DEVICE_MEM_CAPABILITIES_INTEL = 0x4191
4
+ DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4192
5
+ DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4193
6
+ DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL = 0x4194
7
+
8
+ UNIFIED_SHARED_MEMORY_ACCESS_INTEL = (1 << 0)
9
+ UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL = (1 << 1)
10
+ UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL = (1 << 2)
11
+ UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL = (1 << 3)
12
+
13
+ MEM_ALLOC_FLAGS_INTEL = 0x4195
14
+
15
+ MEM_ALLOC_WRITE_COMBINED_INTEL = (1 << 0)
16
+
17
+ MEM_TYPE_UNKNOWN_INTEL = 0x4196
18
+ MEM_TYPE_HOST_INTEL = 0x4197
19
+ MEM_TYPE_DEVICE_INTEL = 0x4198
20
+ MEM_TYPE_SHARED_INTEL = 0x4199
21
+
22
+ MEM_ALLOC_TYPE_INTEL = 0x419A
23
+ MEM_ALLOC_BASE_PTR_INTEL = 0x419B
24
+ MEM_ALLOC_SIZE_INTEL = 0x419C
25
+ MEM_ALLOC_DEVICE_INTEL = 0x419D
26
+ MEM_ALLOC_INFO_TBD1_INTEL = 0x419E
27
+ MEM_ALLOC_INFO_TBD2_INTEL = 0x419F
28
+
29
+ KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL = 0x4200
30
+ KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL = 0x4201
31
+ KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL = 0x4202
32
+ KERNEL_EXEC_INFO_USM_PTRS_INTEL = 0x4203
33
+
34
+ MIGRATE_MEM_OBJECT_HOST_INTEL = (1 << 0)
35
+ MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED_INTEL = (1 << 1)
36
+
37
+ COMMAND_MEMFILL_INTEL = 0x4204
38
+ COMMAND_MEMCPY_INTEL = 0x4205
39
+ COMMAND_MIGRATEMEM_INTEL = 0x4206
40
+ COMMAND_MEMADVISE_INTEL = 0x4207
41
+
42
+ class CommandType
43
+ MEMFILL_INTEL = 0x4204
44
+ MEMCPY_INTEL = 0x4205
45
+ MIGRATEMEM_INTEL = 0x4206
46
+ MEMADVISE_INTEL = 0x4207
47
+
48
+ @codes[0x4204] = 'MEMFILL_INTEL'
49
+ @codes[0x4205] = 'MEMCPY_INTEL'
50
+ @codes[0x4206] = 'MIGRATEMEM_INTEL'
51
+ @codes[0x4207] = 'MEMADVISE_INTEL'
52
+ end
53
+
54
+ MEM_ADVICE_TBD0_INTEL = 0x4208
55
+ MEM_ADVICE_TBD1_INTEL = 0x4209
56
+ MEM_ADVICE_TBD2_INTEL = 0x420A
57
+ MEM_ADVICE_TBD3_INTEL = 0x420B
58
+ MEM_ADVICE_TBD4_INTEL = 0x420C
59
+ MEM_ADVICE_TBD5_INTEL = 0x420D
60
+ MEM_ADVICE_TBD6_INTEL = 0x420E
61
+ MEM_ADVICE_TBD7_INTEL = 0x420F
62
+
63
+ [[:cl_bitfield, :cl_mem_properties_intel],
64
+ [:cl_bitfield, :cl_mem_migration_flags_intel],
65
+ [:cl_bitfield, :cl_mem_alloc_flags_intel],
66
+ [:cl_uint, :cl_mem_info_intel],
67
+ [:cl_uint, :cl_mem_advice_intel],
68
+ [:cl_bitfield, :cl_unified_shared_memory_capabilities_intel],
69
+ [:cl_uint, :cl_unified_shared_memory_type_intel]
70
+ ].each { |o_t, t|
71
+ typedef o_t, t
72
+ }
73
+ end
74
+
75
+ if RUBY_VERSION.scan(/\d+/).collect(&:to_i).first >= 2
76
+ module OpenCLRefinements
77
+ refine FFI::Pointer do
78
+ methods_prefix = [:put, :get, :write, :read, :put_array_of, :get_array_of]
79
+ [[:cl_bitfield, :cl_mem_properties_intel],
80
+ [:cl_bitfield, :cl_mem_migration_flags_intel],
81
+ [:cl_bitfield, :cl_mem_alloc_flags_intel],
82
+ [:cl_uint, :cl_mem_info_intel],
83
+ [:cl_uint, :cl_mem_advice_intel],
84
+ [:cl_bitfield, :cl_unified_shared_memory_capabilities_intel],
85
+ [:cl_uint, :cl_unified_shared_memory_type_intel]
86
+ ].each { |orig, add|
87
+ methods_prefix.each { |meth|
88
+ alias_method "#{meth}_#{add}".to_sym, "#{meth}_#{orig}".to_sym
89
+ }
90
+ }
91
+ end
92
+ end
93
+ using OpenCLRefinements
94
+ else
95
+ class FFI::Pointer
96
+ methods_prefix = [:put, :get, :write, :read, :put_array_of, :get_array_of]
97
+ [[:cl_bitfield, :cl_mem_properties_intel],
98
+ [:cl_bitfield, :cl_mem_migration_flags_intel],
99
+ [:cl_bitfield, :cl_mem_alloc_flags_intel],
100
+ [:cl_uint, :cl_mem_info_intel],
101
+ [:cl_uint, :cl_mem_advice_intel],
102
+ [:cl_bitfield, :cl_unified_shared_memory_capabilities_intel],
103
+ [:cl_uint, :cl_unified_shared_memory_type_intel]
104
+ ].each { |orig, add|
105
+ methods_prefix.each { |meth|
106
+ alias_method "#{meth}_#{add}".to_sym, "#{meth}_#{orig}".to_sym
107
+ }
108
+ }
109
+ end
110
+ end
111
+
112
+ module OpenCL
113
+
114
+ class UnifiedSharedMemoryCapabilitiesINTEL < Bitfield
115
+ UNIFIED_SHARED_MEMORY_ACCESS_INTEL = (1 << 0)
116
+ UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL = (1 << 1)
117
+ UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL = (1 << 2)
118
+ UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL = (1 << 3)
119
+
120
+ def names
121
+ fs = []
122
+ %w( UNIFIED_SHARED_MEMORY_ACCESS_INTEL UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL )..each { |f|
123
+ fs.push(f) if self.include?( self.class.const_get(f) )
124
+ }
125
+ return fs
126
+ end
127
+ end
128
+
129
+ module InnerInterface
130
+ TYPE_CONVERTER[:cl_unified_shared_memory_capabilities_intel] = UnifiedSharedMemoryCapabilitiesINTEL
131
+ end
132
+
133
+ class Mem
134
+ ALLOC_FLAGS_INTEL = 0x4195
135
+
136
+ TYPE_UNKNOWN_INTEL = 0x4196
137
+ TYPE_HOST_INTEL = 0x4197
138
+ TYPE_DEVICE_INTEL = 0x4198
139
+ TYPE_SHARED_INTEL = 0x4199
140
+
141
+ ALLOC_WRITE_COMBINED_INTEL = (1 << 0)
142
+
143
+ ALLOC_TYPE_INTEL = 0x419A
144
+ ALLOC_BASE_PTR_INTEL = 0x419B
145
+ ALLOC_SIZE_INTEL = 0x419C
146
+ ALLOC_DEVICE_INTEL = 0x419D
147
+ ALLOC_INFO_TBD1_INTEL = 0x419E
148
+ ALLOC_INFO_TBD2_INTEL = 0x419F
149
+
150
+ class UnifiedSharedMemoryTypeINTEL < EnumInt
151
+ UNKNOWN_INTEL = 0x4196
152
+ HOST_INTEL = 0x4197
153
+ DEVICE_INTEL = 0x4198
154
+ SHARED_INTEL = 0x4199
155
+ @codes = {}
156
+ @codes[0x4196] = 'UNKNOWN_INTEL'
157
+ @codes[0x4197] = 'HOST_INTEL'
158
+ @codes[0x4198] = 'DEVICE_INTEL'
159
+ @codes[0x4199] = 'SHARED_INTEL'
160
+ end
161
+
162
+ class AllocFlagsINTEL < Bitfield
163
+ DEFAULT_INTEL = 0
164
+ WRITE_COMBINED_INTEL = (1 << 0)
165
+ def names
166
+ fs = []
167
+ %w( WRITE_COMBINED_INTEL ).each { |f|
168
+ fs.push(f) if self.include?( self.class.const_get(f) )
169
+ }
170
+ return fs
171
+ end
172
+ end
173
+
174
+ class AdviceINTEL < EnumInt
175
+ TBD0_INTEL = 0x4208
176
+ TBD1_INTEL = 0x4209
177
+ TBD2_INTEL = 0x420A
178
+ TBD3_INTEL = 0x420B
179
+ TBD4_INTEL = 0x420C
180
+ TBD5_INTEL = 0x420D
181
+ TBD6_INTEL = 0x420E
182
+ TBD7_INTEL = 0x420F
183
+ @codes = {}
184
+ @codes[0x4208] = 'TBD0_INTEL'
185
+ @codes[0x4209] = 'TBD1_INTEL'
186
+ @codes[0x420A] = 'TBD2_INTEL'
187
+ @codes[0x420B] = 'TBD3_INTEL'
188
+ @codes[0x420C] = 'TBD4_INTEL'
189
+ @codes[0x420D] = 'TBD5_INTEL'
190
+ @codes[0x420E] = 'TBD6_INTEL'
191
+ @codes[0x420F] = 'TBD7_INTEL'
192
+ end
193
+
194
+ class MigrationFlagsINTEL < Bitfield
195
+ HOST_INTEL = (1 << 0)
196
+ CONTENT_UNDEFINED_INTEL = (1 << 1)
197
+ # Returns an Array of String representing the different flags set
198
+ def names
199
+ fs = []
200
+ %w( HOST CONTENT_UNDEFINED ).each { |f|
201
+ fs.push(f) if self.include?( self.class.const_get(f) )
202
+ }
203
+ return fs
204
+ end
205
+ end
206
+
207
+ end
208
+
209
+ module InnerInterface
210
+ TYPE_CONVERTER[:cl_unified_shared_memory_type_intel] = Mem::UnifiedSharedMemoryTypeINTEL
211
+ TYPE_CONVERTER[:cl_mem_alloc_flags_intel] = Mem::AllocFlagsINTEL
212
+ TYPE_CONVERTER[:cl_mem_advice_intel] = Mem::AdviceINTEL
213
+ TYPE_CONVERTER[:cl_mem_migration_flags_intel] = Mem::MigrationFlagsINTEL
214
+ end
215
+
216
+ class Device
217
+ HOST_MEM_CAPABILITIES_INTEL = 0x4190
218
+ DEVICE_MEM_CAPABILITIES_INTEL = 0x4191
219
+ SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4192
220
+ CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL = 0x4193
221
+ SHARED_SYSTEM_MEM_CAPABILITIES_INTEL = 0x4194
222
+
223
+ module UnifiedSharedMemoryPreviewINTEL
224
+ extend InnerGenerator
225
+
226
+ get_info("Device", :cl_unified_shared_memory_capabilities_intel, "host_mem_capabilities_intel")
227
+ get_info("Device", :cl_unified_shared_memory_capabilities_intel, "device_mem_capabilities_intel")
228
+ get_info("Device", :cl_unified_shared_memory_capabilities_intel, "device_shared_mem_capabilities_intel")
229
+ get_info("Device", :cl_unified_shared_memory_capabilities_intel, "cross_device_mem_capabilities_intel")
230
+ get_info("Device", :cl_unified_shared_memory_capabilities_intel, "shared_system_mem_capabilities_intel")
231
+
232
+ def clGetDeviceGlobalVariablePointerINTEL
233
+ @_clGetDeviceGlobalVariablePointerINTEL ||= begin
234
+ p = platform.get_extension_function("clGetDeviceGlobalVariablePointerINTEL", :cl_int, [Device, Program, :string, :pointer, :pointer])
235
+ error_check(OpenCL::INVALID_OPERATION) unless p
236
+ p
237
+ end
238
+ end
239
+
240
+ def get_global_variable_pointer_intel(program, name)
241
+ pSize = MemoryPointer::new(:size_t)
242
+ pAddr = MemoryPointer::new(:pointer)
243
+ error = clGetDeviceGlobalVariablePointerINTEL.call(self, program, name, pSize, pAddr)
244
+ error_check(error)
245
+ return USMPointer::new(pAddr.read_pointer.slice(0, pSize.read_size_t), self)
246
+ end
247
+
248
+ end
249
+ register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
250
+ end
251
+
252
+ class USMPointer < Pointer
253
+
254
+ def initialize(address, context)
255
+ super(address)
256
+ @context = context
257
+ end
258
+
259
+ def inspect
260
+ return "#<#{self.class.name}: 0x#{address.to_s(16)} (#{size})>"
261
+ end
262
+
263
+ def slice(offset, size)
264
+ res = super(offset, size)
265
+ self.class.new(res, context)
266
+ end
267
+
268
+ def +( offset )
269
+ self.slice(offset, self.size - offset)
270
+ end
271
+
272
+ def free
273
+ @context.mem_free_intel(alloc_base_ptr_intel)
274
+ end
275
+
276
+ def alloc_type_intel
277
+ @context.mem_alloc_type_intel(self)
278
+ end
279
+
280
+ def alloc_flags_intel
281
+ @context.mem_alloc_flags_intel(self)
282
+ end
283
+
284
+ def alloc_base_ptr_intel
285
+ @context.mem_alloc_base_ptr_intel(self)
286
+ end
287
+
288
+ def alloc_size_intel
289
+ @context.mem_alloc_size_intel(self)
290
+ end
291
+
292
+ def alloc_device_intel
293
+ context.mem_alloc_device_intel(self)
294
+ end
295
+ end
296
+
297
+ class Context
298
+ module UnifiedSharedMemoryPreviewINTEL
299
+ extend InnerGenerator
300
+
301
+ def clGetMemAllocInfoINTEL
302
+ @_clGetMemAllocInfoINTEL ||= begin
303
+ p = platform.get_extension_function("clGetMemAllocInfoINTEL", :cl_int, [Context, :pointer, :cl_mem_info_intel, :size_t, :pointer, :pointer])
304
+ error_check(OpenCL::INVALID_OPERATION) unless p
305
+ p
306
+ end
307
+ end
308
+
309
+ def clHostMemAllocINTEL
310
+ @_clHostMemAllocINTEL ||= begin
311
+ p = platform.get_extension_function("clHostMemAllocINTEL", :pointer, [Context, :pointer, :size_t, :cl_uint, :pointer])
312
+ error_check(OpenCL::INVALID_OPERATION) unless p
313
+ p
314
+ end
315
+ end
316
+
317
+ def clDeviceMemAllocINTEL
318
+ @_clDeviceMemAllocINTEL ||= begin
319
+ p = platform.get_extension_function("clDeviceMemAllocINTEL", :pointer, [Context, Device, :pointer, :size_t, :cl_uint, :pointer])
320
+ error_check(OpenCL::INVALID_OPERATION) unless p
321
+ p
322
+ end
323
+ end
324
+
325
+ def clSharedMemAllocINTEL
326
+ @_clSharedMemAllocINTEL ||= begin
327
+ p = platform.get_extension_function("clSharedMemAllocINTEL", :pointer, [Context, Device, :pointer, :size_t, :cl_uint, :pointer])
328
+ error_check(OpenCL::INVALID_OPERATION) unless p
329
+ p
330
+ end
331
+ end
332
+
333
+ def clMemFreeINTEL
334
+ return @_clMemFreeINTEL ||= begin
335
+ p = platform.get_extension_function("clMemFreeINTEL", :cl_int, [Context, :pointer])
336
+ error_check(OpenCL::INVALID_OPERATION) unless p
337
+ p
338
+ end
339
+ end
340
+
341
+ def get_mem_properties_intel(properties)
342
+ return nil unless properties
343
+ properties = [properties].flatten
344
+ props = MemoryPointer::new(:cl_mem_properties_intel, properties.length + 1)
345
+ properties.each_with_index { |e, i|
346
+ props[i].write_cl_mem_properties_intel(e)
347
+ }
348
+ props[properties.length].write_cl_mem_properties_intel(0)
349
+ return props
350
+ end
351
+
352
+ private :get_mem_properties_intel
353
+
354
+ def host_mem_alloc_intel(size, options = {})
355
+ properties = get_mem_properties_intel(options[:properties])
356
+ alignment = 0
357
+ alignment = options[:alignment] if options[:alignment]
358
+ error = MemoryPointer::new( :cl_int )
359
+ ptr = clHostMemAllocINTEL.call(self, properties, size, alignment, error)
360
+ error_check(error.read_cl_int)
361
+ return USMPointer::new(ptr.slice(0, size), self)
362
+ end
363
+
364
+ def device_mem_alloc_intel(device, size, options = {})
365
+ properties = get_mem_properties_intel(options[:properties])
366
+ alignment = 0
367
+ alignment = options[:alignment] if options[:alignment]
368
+ error = MemoryPointer::new( :cl_int )
369
+ ptr = clDeviceMemAllocINTEL.call(self, device, properties, size, alignment, error)
370
+ error_check(error.read_cl_int)
371
+ return USMPointer::new(ptr.slice(0, size), self)
372
+ end
373
+
374
+ def shared_mem_alloc_intel(device, size, options = {})
375
+ properties = get_mem_properties_intel(options[:properties])
376
+ alignment = 0
377
+ alignment = options[:alignment] if options[:alignment]
378
+ error = MemoryPointer::new( :cl_int )
379
+ ptr = clSharedMemAllocINTEL.call(self, device, properties, size, alignment, error)
380
+ error_check(error.read_cl_int)
381
+ return USMPointer::new(ptr.slice(0, size), self)
382
+ end
383
+
384
+ def mem_free_intel(ptr)
385
+ error = clMemFreeINTEL.call(self, ptr)
386
+ error_check(error)
387
+ return self
388
+ end
389
+
390
+ def mem_alloc_type_intel(ptr)
391
+ ptr_res = MemoryPointer::new(:cl_unified_shared_memory_type_intel)
392
+ error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_TYPE_INTEL, ptr_res.size, ptr_res, nil)
393
+ error_check(error)
394
+ return OpenCL::Mem::UnifiedSharedMemoryTypeINTEL::new(ptr_res.read_cl_unified_shared_memory_type_intel)
395
+ end
396
+
397
+ def mem_alloc_flags_intel(ptr)
398
+ ptr_res = MemoryPointer::new(:cl_mem_alloc_flags_intel)
399
+ error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_FLAGS_INTEL, ptr_res.size, ptr_res, nil)
400
+ error_check(error)
401
+ return OpenCL::Mem::AllocFlagsINTEL::new(ptr_res.read_cl_mem_alloc_flags_intel)
402
+ end
403
+
404
+ def mem_alloc_base_ptr_intel(ptr)
405
+ ptr_res = MemoryPointer::new(:pointer)
406
+ error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_BASE_PTR_INTEL, ptr_res.size, ptr_res, nil)
407
+ error_check(error)
408
+ return ptr_res.read_pointer
409
+ end
410
+
411
+ def mem_alloc_size_intel(ptr)
412
+ ptr_res = MemoryPointer::new(:size_t)
413
+ error = clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_SIZE_INTEL, ptr_res.size, ptr_res, nil)
414
+ error_check(error)
415
+ return ptr_res.read_size_t
416
+ end
417
+
418
+ def mem_alloc_device_intel(ptr)
419
+ ptr_res = MemoryPointer::new( Device )
420
+ error = OpenCL.clGetMemAllocInfoINTEL.call(self, ptr, OpenCL::Mem::ALLOC_DEVICE_INTEL, Device.size, ptr_res, nil)
421
+ error_check(error)
422
+ return Device::new(ptr_res.read_pointer)
423
+ end
424
+
425
+ end
426
+ register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "platform.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
427
+ end
428
+
429
+ class Kernel
430
+ EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL = 0x4200
431
+ EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL = 0x4201
432
+ EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL = 0x4202
433
+ EXEC_INFO_USM_PTRS_INTEL = 0x4203
434
+
435
+ module UnifiedSharedMemoryPreviewINTEL
436
+ extend InnerGenerator
437
+
438
+ def clSetKernelArgMemPointerINTEL
439
+ @_clSetKernelArgMemPointerINTEL ||= begin
440
+ p = context.platform.get_extension_function("clSetKernelArgMemPointerINTEL", :cl_int, Kernel, :cl_uint, :pointer)
441
+ error_check(OpenCL::INVALID_OPERATION) unless p
442
+ p
443
+ end
444
+ end
445
+
446
+ def set_arg_mem_pointer_intel(index, usm_pointer)
447
+ error = clSetKernelArgMemPointerINTEL.call(self, index, usm_pointer)
448
+ error_check(error)
449
+ return self
450
+ end
451
+
452
+ def set_usm_ptrs_intel( ptrs )
453
+ pointers = [ptrs].flatten
454
+ pt = MemoryPointer::new( :pointer, pointers.length )
455
+ pointers.each_with_index { |p, i|
456
+ pt[i].write_pointer(p)
457
+ }
458
+ error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_USM_PTRS_INTEL, pt.size, pt)
459
+ error_check(error)
460
+ self
461
+ end
462
+
463
+ def set_indirect_host_access_intel( flag )
464
+ pt = MemoryPointer::new( :cl_bool )
465
+ pt.write_cl_bool( flag )
466
+ error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, pt.size, pt)
467
+ error_check(error)
468
+ self
469
+ end
470
+
471
+ def set_indirect_device_access_intel( flag )
472
+ pt = MemoryPointer::new( :cl_bool )
473
+ pt.write_cl_bool( flag )
474
+ error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, pt.size, pt)
475
+ error_check(error)
476
+ self
477
+ end
478
+
479
+ def set_shared_device_access_intel( flag )
480
+ pt = MemoryPointer::new( :cl_bool )
481
+ pt.write_cl_bool( flag )
482
+ error = OpenCL.clSetKernelExecInfo( self, EXEC_INFO_SHARED_DEVICE_ACCESS_INTEL, pt.size, pt)
483
+ error_check(error)
484
+ self
485
+ end
486
+
487
+ def enqueue_with_args(command_queue, global_work_size, *args)
488
+ n = self.num_args
489
+ error_check(INVALID_KERNEL_ARGS) if args.length < n
490
+ error_check(INVALID_KERNEL_ARGS) if args.length > n + 1
491
+ if args.length == n + 1
492
+ options = args.last
493
+ else
494
+ options = {}
495
+ end
496
+ n.times { |i|
497
+ if args[i].class == SVMPointer and self.context.platform.version_number >= 2.0 then
498
+ self.set_arg_svm_pointer(i, args[i])
499
+ elsif args[i].class == USMPointer then
500
+ self.set_arg_mem_pointer_intel(i, args[i])
501
+ else
502
+ self.set_arg(i, args[i])
503
+ end
504
+ }
505
+ command_queue.enqueue_ndrange_kernel(self, global_work_size, options)
506
+ end
507
+
508
+ end
509
+ register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "platform.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
510
+ end
511
+
512
+ class Kernel
513
+ class Arg
514
+ module UnifiedSharedMemoryPreviewINTEL
515
+ def set(value, size = nil)
516
+ if value.class == SVMPointer and @kernel.context.platform.version_number >= 2.0 then
517
+ OpenCL.set_kernel_arg_svm_pointer(@kernel, @index, value)
518
+ elsif args[i].class == USMPointer then
519
+ @kernel.set_arg_mem_pointer_intel(@index, value)
520
+ else
521
+ OpenCL.set_kernel_arg(@kernel, @index, value, size)
522
+ end
523
+ end
524
+ end
525
+ register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "platform.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
526
+ end
527
+ end
528
+
529
+ class CommandQueue
530
+ module UnifiedSharedMemoryPreviewINTEL
531
+ extend InnerGenerator
532
+
533
+ def clEnqueueMemFillINTEL
534
+ @_clEnqueueMemFillINTEL ||= begin
535
+ p = platform.get_extension_function("clEnqueueMemFillINTEL", :cl_int, [CommandQueue, :pointer, :pointer, :size_t, :size_t, :cl_uint, :pointer, :pointer])
536
+ error_check(OpenCL::INVALID_OPERATION) unless p
537
+ p
538
+ end
539
+ end
540
+
541
+ def clEnqueueMemcpyINTEL
542
+ @_clEnqueueMemcpyINTEL ||= begin
543
+ p = platform.get_extension_function("clEnqueueMemcpyINTEL", :cl_int, [CommandQueue, :cl_bool, :pointer, :pointer, :size_t, :cl_uint, :pointer, :pointer])
544
+ error_check(OpenCL::INVALID_OPERATION) unless p
545
+ p
546
+ end
547
+ end
548
+
549
+ def clEnqueueMigrateMemINTEL
550
+ @_clEnqueueMigrateMemINTEL ||= begin
551
+ p = platform.get_extension_function("clEnqueueMigrateMemINTEL", :cl_int, [CommandQueue, :pointer, :size_t, :cl_mem_migration_flags_intel, :cl_uint, :pointer, :pointer])
552
+ error_check(OpenCL::INVALID_OPERATION) unless p
553
+ p
554
+ end
555
+ end
556
+
557
+ def clEnqueueMemAdviseINTEL
558
+ @_clEnqueueMemAdviseINTEL ||= begin
559
+ p = platform.get_extension_function("clEnqueueMemAdviseINTEL", :cl_int, [CommandQueue, :pointer, :size_t, :cl_mem_advice_intel, :cl_uint, :pointer, :pointer])
560
+ error_check(OpenCL::INVALID_OPERATION) unless p
561
+ p
562
+ end
563
+ end
564
+
565
+ def enqueue_mem_fill_intel(usm_ptr, pattern, options = {})
566
+ num_events, events = get_event_wait_list( options )
567
+ pattern_size = pattern.size
568
+ pattern_size = options[:pattern_size] if options[:pattern_size]
569
+ size = usm_ptr.size
570
+ size = options[:size] if options[:size]
571
+ event = MemoryPointer::new( Event )
572
+ error = clEnqueueMemFillINTEL.call(self, usm_ptr, pattern, pattern_size, size, num_events, events, event)
573
+ error_check(error)
574
+ return Event::new(event.read_pointer, false)
575
+ end
576
+
577
+ def enqueue_memcpy_intel(dst_ptr, src_ptr, options = {})
578
+ num_events, events = get_event_wait_list( options )
579
+ blocking = FALSE
580
+ blocking = TRUE if options[:blocking] or options[:blocking_copy]
581
+ size = [dst_ptr.size, src_ptr.size].min
582
+ size = options[:size] if options[:size]
583
+ event = MemoryPointer::new( Event )
584
+ error = clEnqueueMemcpyINTEL.call(self, blocking, dst_ptr, src_ptr, size, num_events, events, event)
585
+ error_check(error)
586
+ return Event::new(event.read_pointer, false)
587
+ end
588
+
589
+ def enqueue_migrate_mem_intel(usm_ptr, options = {})
590
+ num_events, events = get_event_wait_list( options )
591
+ flags = get_flags( options )
592
+ size = usm_ptr.size
593
+ size = options[:size] if options[:size]
594
+ event = MemoryPointer::new( Event )
595
+ error = clEnqueueMigrateMemINTEL.call(self, usm_ptr, size, flags, num_events, events, event)
596
+ error_check(error)
597
+ return Event::new(event.read_pointer, false)
598
+ end
599
+
600
+ def enqueue_mem_advise_intel(usm_ptr, advice, options = {})
601
+ num_events, events = get_event_wait_list( options )
602
+ size = usm_ptr.size
603
+ size = options[:size] if options[:size]
604
+ event = MemoryPointer::new( Event )
605
+ error = clEnqueueMemAdviseINTEL(self, usm_ptr, size, advice, num_events, events, event)
606
+ error_check(error)
607
+ return Event::new(event.read_pointer, false)
608
+ end
609
+
610
+ end
611
+ register_extension( :cl_intel_unified_shared_memory_preview, UnifiedSharedMemoryPreviewINTEL, "device.extensions.include?(\"cl_intel_unified_shared_memory_preview\")" )
612
+ end
613
+ end