tensor_stream-opencl 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +44 -2
- data/lib/tensor_stream/opencl/array_ops.rb +24 -3
- data/lib/tensor_stream/opencl/nn_ops.rb +33 -0
- data/lib/tensor_stream/opencl/opencl_evaluator.rb +2 -0
- data/lib/tensor_stream/opencl/version.rb +1 -1
- data/samples/rnn.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 03fcb3bb50485dd601bf17b58f216209c86fb20aeb6c0b61b23144b5d644efaa
|
4
|
+
data.tar.gz: 7b96f90b902ff747b74575be13015e52cfda0f4104273e14eca5bee90fc1a405
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2916b8a053754bfd58594cef0680d79d12d38332d99d28a3cded71028c58b7a35ec3a6480b6b80f486aa2a59b617f89ee1b534bf00f51343e4827df250ff92f4
|
7
|
+
data.tar.gz: 308153886efa111da2251b31f4a6b5d4ad610336681e3d94b1bb9b055cf8a7e97ad9460271a5ae14e738410a77fc75d8669636732909af2ff574f0e907cad44b
|
data/README.md
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
[](https://badge.fury.io/rb/tensor_stream-opencl)
|
2
|
+
|
1
3
|
# TensorStream::Opencl
|
2
4
|
|
3
5
|
This gem provides an OpenCL backend for TensorStream (https://github.com/jedld/tensor_stream). OpenCL is an open standard
|
4
|
-
that allows running compute applications on heterogenous platforms like CPUs and GPUs.
|
6
|
+
that allows running compute applications on heterogenous platforms like CPUs and GPUs. For certain neural network implementations, like deep neural networks GPU acceleration can dramatically speedup computation.
|
5
7
|
|
6
8
|
## Installation
|
7
9
|
|
@@ -37,7 +39,47 @@ Or install it yourself as:
|
|
37
39
|
|
38
40
|
## Usage
|
39
41
|
|
40
|
-
|
42
|
+
If using a Gemfile or a framework like rails, simply including this gem will allow tensor_stream to automatically select opencl devices for use in your computation. Otherwise you can do:
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
require 'tensor_stream/opencl'
|
46
|
+
```
|
47
|
+
|
48
|
+
You can check for available OpenCL devices via'
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
TensorStream::Evaluator::OpenclEvaluator.query_supported_devices
|
52
|
+
|
53
|
+
TensorStream::Evaluator::OpenclEvaluator.query_supported_devices.map(&:native_device)
|
54
|
+
# => [#<OpenCL::Device: Intel(R) Core(TM) i5-5575R CPU @ 2.80GHz (4294967295)>, #<OpenCL::Device: Intel(R) Iris(TM) Pro Graphics 6200 (16925952)>]
|
55
|
+
```
|
56
|
+
|
57
|
+
## Device placement control
|
58
|
+
|
59
|
+
You can place operations on certain devices using ts.device:
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
require 'tensor_stream/opencl'
|
63
|
+
|
64
|
+
ts = TensorStream
|
65
|
+
# For the first GPU
|
66
|
+
ts.device('/device:GPU:0') do
|
67
|
+
a = ts.placeholder(:float32, shape: [DIMEN, DIMEN])
|
68
|
+
b = ts.placeholder(:float32, shape: [DIMEN, DIMEN])
|
69
|
+
# Compute A^n and B^n and store results in c1
|
70
|
+
c1 << matpow(a, n)
|
71
|
+
c1 << matpow(b, n)
|
72
|
+
end
|
73
|
+
|
74
|
+
# For the second GPU
|
75
|
+
ts.device('/device:GPU:1') do
|
76
|
+
a = ts.placeholder(:float32, shape: [DIMEN, DIMEN])
|
77
|
+
b = ts.placeholder(:float32, shape: [DIMEN, DIMEN])
|
78
|
+
# Compute A^n and B^n and store results in c1
|
79
|
+
c1 << matpow(a, n)
|
80
|
+
c1 << matpow(b, n)
|
81
|
+
end
|
82
|
+
```
|
41
83
|
|
42
84
|
## Development
|
43
85
|
|
@@ -202,12 +202,33 @@ module TensorStream
|
|
202
202
|
output_buffer
|
203
203
|
end
|
204
204
|
|
205
|
+
register_op :squeeze do |context, tensor, inputs|
|
206
|
+
arr = inputs[0]
|
207
|
+
shape = inputs[0].shape.dup
|
208
|
+
axis = !tensor.options[:axis].is_a?(Array) ? [tensor.options[:axis]] : tensor.options[:axis]
|
209
|
+
if !axis.empty?
|
210
|
+
axis.each do |axis|
|
211
|
+
if shape[axis] == 1
|
212
|
+
shape[axis] = nil
|
213
|
+
else
|
214
|
+
raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1"
|
215
|
+
end
|
216
|
+
end
|
217
|
+
else
|
218
|
+
shape = shape.map { |s| s == 1 ? nil : s }
|
219
|
+
end
|
220
|
+
|
221
|
+
OpenCLBuffer.new(name: tensor.name, data_type: tensor.data_type,
|
222
|
+
shape: shape.compact, buffer: arr.buffer,
|
223
|
+
cl_buffer: arr.cl_buffer,
|
224
|
+
op: arr.op)
|
225
|
+
end
|
226
|
+
|
205
227
|
register_op :stack do |_context, tensor, inputs|
|
206
228
|
axis = tensor.options[:axis] || 0
|
207
229
|
shape = inputs[0].shape
|
208
230
|
rank = shape.size + 1
|
209
231
|
elem_size = shape.empty? ? 1 : shape.reduce(:*)
|
210
|
-
|
211
232
|
new_shape = [inputs.size]
|
212
233
|
shape.inject(new_shape) { |ns, s| ns << s }
|
213
234
|
|
@@ -313,8 +334,8 @@ module TensorStream
|
|
313
334
|
end
|
314
335
|
|
315
336
|
register_op :shape_n do |_context, tensor, inputs|
|
316
|
-
shapes = inputs.collect do |input|
|
317
|
-
wrap_opencl(input.shape, name: tensor.name, data_type: tensor.data_type)
|
337
|
+
shapes = inputs.collect.with_index do |input, index|
|
338
|
+
wrap_opencl(input.shape, name: "#{tensor.name}_#{index}", data_type: tensor.data_type)
|
318
339
|
end
|
319
340
|
TensorStream::Evaluator::OutputGroup.new(shapes, shapes.map { tensor.data_type })
|
320
341
|
end
|
@@ -285,6 +285,39 @@ module TensorStream
|
|
285
285
|
output_buffer
|
286
286
|
end
|
287
287
|
|
288
|
+
register_op :sparse_softmax_cross_entropy_with_logits do |context, tensor, inputs|
|
289
|
+
a = inputs[0] # logits
|
290
|
+
labels = read_final_result(complete_eval(inputs[1], context)) # labels
|
291
|
+
labels = last_axis(labels)
|
292
|
+
num_classes = a.shape.last
|
293
|
+
|
294
|
+
labels = labels.map do |l|
|
295
|
+
one_hot = Array.new(num_classes) { 0 }
|
296
|
+
one_hot[l] = 1
|
297
|
+
one_hot
|
298
|
+
end
|
299
|
+
|
300
|
+
b = wrap_opencl(labels, data_type: inputs[0].data_type, name: "#{tensor.name}_label")
|
301
|
+
|
302
|
+
event_wait_list = build_event_wait_list(inputs)
|
303
|
+
dtype = tensor.data_type
|
304
|
+
output_buffer = _create_result_buffer(tensor.data_type, a.shape, tensor.name)
|
305
|
+
output_buffer_backprop = _create_result_buffer(tensor.data_type, a.shape, "#{tensor.name}_2")
|
306
|
+
rank = a.shape.size - 1
|
307
|
+
m, n = a.shape
|
308
|
+
work_group = [m]
|
309
|
+
n = m if n.nil?
|
310
|
+
cl_n = OpenCL::Int1.new(n || 1)
|
311
|
+
|
312
|
+
event = _cl_program("softmax_cross", dtype: dtype).send(:"softmax_cross_#{dtype}", _opencl_queue, work_group, cl_n, a.cl_buffer, b.cl_buffer,
|
313
|
+
output_buffer.cl_buffer, output_buffer_backprop.cl_buffer, event_wait_list: event_wait_list)
|
314
|
+
output_buffer.op = event
|
315
|
+
output_buffer_backprop.op = event
|
316
|
+
|
317
|
+
loss = reduction(context, tensor, output_buffer, rank, :sum)
|
318
|
+
TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop], [tensor.inputs[0].data_type, tensor.inputs[0].data_type])
|
319
|
+
end
|
320
|
+
|
288
321
|
register_op :softmax_grad do |_context, tensor, inputs|
|
289
322
|
a, grad = inputs
|
290
323
|
|
@@ -413,9 +413,11 @@ module TensorStream
|
|
413
413
|
end
|
414
414
|
rescue EvaluatorExcecutionException => e
|
415
415
|
_opencl_queue.finish # dump queue
|
416
|
+
puts e.message
|
416
417
|
raise e, "error #{e.message} while evaluating #{tensor.name} : #{tensor.to_math(true, 1)} defined at #{tensor.source}"
|
417
418
|
rescue TensorStreamError => e
|
418
419
|
_opencl_queue.finish # dump queue
|
420
|
+
puts e.message
|
419
421
|
raise e, "error #{e.message} while evaluating #{tensor.name} : #{tensor.to_math(true, 1)} defined at #{tensor.source}"
|
420
422
|
rescue StandardError => e
|
421
423
|
_opencl_queue.finish # dump queue
|
data/samples/rnn.rb
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
require "bundler/setup"
|
9
9
|
require 'tensor_stream'
|
10
10
|
require 'tensor_stream/opencl'
|
11
|
-
require 'pry-byebug'
|
11
|
+
# require 'pry-byebug'
|
12
12
|
|
13
13
|
|
14
14
|
tf = TensorStream
|
@@ -75,7 +75,7 @@ losses = logits_series.zip(labels_series).collect do |logits, labels|
|
|
75
75
|
end
|
76
76
|
total_loss = tf.reduce_mean(losses)
|
77
77
|
|
78
|
-
train_step = TensorStream::Train::AdagradOptimizer.new(0.
|
78
|
+
train_step = TensorStream::Train::AdagradOptimizer.new(0.1).minimize(total_loss)
|
79
79
|
|
80
80
|
puts "#{tf.get_default_graph.nodes.keys.size} nodes created"
|
81
81
|
zeros_state = tf.zeros([batch_size, state_size]).eval
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tensor_stream-opencl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Dayo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|