tensor_stream 1.0.4 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGELOG.md +12 -2
- data/Dockerfile +1 -1
- data/USAGE_GUIDE.md +68 -0
- data/lib/tensor_stream.rb +1 -0
- data/lib/tensor_stream/evaluator/base_evaluator.rb +21 -1
- data/lib/tensor_stream/evaluator/evaluator.rb +1 -0
- data/lib/tensor_stream/evaluator/evaluator_utils.rb +20 -0
- data/lib/tensor_stream/evaluator/operation_helpers/array_ops_helper.rb +60 -0
- data/lib/tensor_stream/evaluator/ruby/array_ops.rb +53 -1
- data/lib/tensor_stream/evaluator/ruby/images_ops.rb +26 -0
- data/lib/tensor_stream/evaluator/ruby/math_ops.rb +60 -5
- data/lib/tensor_stream/evaluator/ruby/nn_ops.rb +25 -29
- data/lib/tensor_stream/evaluator/ruby/random_ops.rb +7 -11
- data/lib/tensor_stream/evaluator/ruby/storage_manager.rb +40 -0
- data/lib/tensor_stream/evaluator/ruby/variable_ops.rb +74 -0
- data/lib/tensor_stream/evaluator/ruby_evaluator.rb +31 -77
- data/lib/tensor_stream/generated_stub/ops.rb +256 -166
- data/lib/tensor_stream/generated_stub/stub_file.erb +4 -4
- data/lib/tensor_stream/graph.rb +3 -3
- data/lib/tensor_stream/graph_deserializers/yaml_loader.rb +4 -6
- data/lib/tensor_stream/helpers/infer_shape.rb +1 -7
- data/lib/tensor_stream/helpers/tensor_mixins.rb +10 -1
- data/lib/tensor_stream/images.rb +4 -0
- data/lib/tensor_stream/math/math_ops.rb +22 -0
- data/lib/tensor_stream/math_gradients.rb +15 -1
- data/lib/tensor_stream/nn/embedding_lookup.rb +114 -0
- data/lib/tensor_stream/nn/nn_ops.rb +16 -0
- data/lib/tensor_stream/op_maker.rb +36 -3
- data/lib/tensor_stream/operation.rb +8 -20
- data/lib/tensor_stream/ops.rb +14 -11
- data/lib/tensor_stream/ops/bias_add.rb +16 -0
- data/lib/tensor_stream/ops/equal.rb +4 -0
- data/lib/tensor_stream/ops/greater.rb +4 -0
- data/lib/tensor_stream/ops/greater_equal.rb +4 -0
- data/lib/tensor_stream/ops/less.rb +19 -0
- data/lib/tensor_stream/ops/less_equal.rb +4 -0
- data/lib/tensor_stream/ops/not_equal.rb +19 -0
- data/lib/tensor_stream/ops/rsqrt.rb +11 -0
- data/lib/tensor_stream/ops/strided_slice.rb +24 -0
- data/lib/tensor_stream/ops/sum.rb +4 -2
- data/lib/tensor_stream/ops/top_k.rb +23 -0
- data/lib/tensor_stream/session.rb +6 -12
- data/lib/tensor_stream/tensor.rb +1 -0
- data/lib/tensor_stream/tensor_shape.rb +32 -1
- data/lib/tensor_stream/train/saver.rb +2 -3
- data/lib/tensor_stream/utils.rb +18 -13
- data/lib/tensor_stream/utils/freezer.rb +5 -1
- data/lib/tensor_stream/utils/py_ports.rb +11 -0
- data/lib/tensor_stream/variable.rb +9 -6
- data/lib/tensor_stream/version.rb +1 -1
- data/samples/word_embeddings/word_embedding_1.rb +192 -0
- data/samples/word_embeddings/word_embedding_2.rb +203 -0
- data/tensor_stream.gemspec +7 -2
- metadata +67 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8f7d54f45a96ee2ed86af5916339747701171476e2b1cd6197f4f78d3f7f2eb3
|
4
|
+
data.tar.gz: f8a1c615ebf5f67de35e0e6ac84ac531fc5306b62787ac0d2d952f474eb97bad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4607a3c117c98f21594bcbf12b98a1e927dab88c2847d4516c4f3dd3502b821e8d4b2b8e17e085c3ed122eec1e339baced66e3822fed2c5c6e3c8e10d0121e08
|
7
|
+
data.tar.gz: dd2f7b6c971a25b90a4404319231de6386aef0b536a1df9eca84a3a881e6e9e2faa903fd7fb72b9a05ba8588ec5ffdd153be0072e6c850960fff9ecaecf7b6bc
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
|
5
5
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
6
6
|
|
7
|
+
## [1.0.7] - 2019-04-08
|
8
|
+
- [NEW] - Support for nn.embedding_lookup
|
9
|
+
- [NEW] - l2_normalize, dynamic_partition
|
10
|
+
- [NEW OP] - New Ops: rsqrt, top_k, strided_slice
|
11
|
+
- [NEW] - Support for ranges in tensors (e.g. t[0...2] via strided slice)
|
12
|
+
- [SAMPLES] - Add samples for handling word vectors
|
13
|
+
|
14
|
+
## [1.0.5] - 2019-03-20
|
15
|
+
- [BUG FIX] - Fix not wrapping a stack op on some arrays. Should fix rnn sample
|
16
|
+
|
7
17
|
## [0.9.10] - 2019-01-02
|
8
18
|
- [BUG FIX] - remove pry-byebug include (Thanks @samgooi4189)
|
9
19
|
- Update Changelog for 0.9.9
|
@@ -19,7 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
19
29
|
- [NEW OP] Convolutional networks - conv2d, conv2d_backprop_filter, conv2d_backprop_input
|
20
30
|
- [IMAGE] Exposed image resampling options
|
21
31
|
- [BUG FIX] fix argmin, argmax handling of NaN values
|
22
|
-
|
32
|
+
|
23
33
|
## [0.9.5] - 2018-11-05
|
24
34
|
- [NEW OP] assert_equal, relu6
|
25
35
|
- [TRAINING] learning_rate_decay, dropout
|
@@ -134,4 +144,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
134
144
|
- reworked auto differentiation, fix a number of bugs related to auto differentiation, smaller derivative programs
|
135
145
|
- alpha support for saving to pbtext format, added graphml generation
|
136
146
|
- significant number of ops added
|
137
|
-
- ops that support broadcasting now work better
|
147
|
+
- ops that support broadcasting now work better
|
data/Dockerfile
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
FROM circleci/ruby:2.
|
1
|
+
FROM circleci/ruby:2.6.1-node-browsers
|
2
2
|
RUN sudo apt-get update -q && sudo apt-get install --no-install-recommends -yq alien wget unzip clinfo \
|
3
3
|
&& sudo apt-get clean && sudo rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
4
4
|
RUN export DEVEL_URL="https://software.intel.com/file/531197/download" \
|
data/USAGE_GUIDE.md
CHANGED
@@ -223,6 +223,74 @@ vars = graph.get_collection(TensorStream::GraphKeys::GLOBAL_VARIABLES)
|
|
223
223
|
=> [Variable(Variable:0 shape: TensorShape([]) data_type: float32)]
|
224
224
|
```
|
225
225
|
|
226
|
+
High Performance Computing
|
227
|
+
--------------------------
|
228
|
+
|
229
|
+
TensorStream has been designed from the ground up to support multiple execution backends.
|
230
|
+
|
231
|
+
What this means is you can build your models once and then be able to execute them later on specialized hardware when available like GPUs.
|
232
|
+
|
233
|
+
An OpenCL backend is available that you can use for compute intensive taks like machine learning, especially those that use convolutional networks.
|
234
|
+
|
235
|
+
Using OpenCL is as simple as installing the tensorstream-opencl gem
|
236
|
+
|
237
|
+
```
|
238
|
+
gem install tensor_stream-opencl
|
239
|
+
```
|
240
|
+
|
241
|
+
You can then require the library in your programs and it will get used automatically (assuming you also installed OpenCL drivers for your system)
|
242
|
+
|
243
|
+
```ruby
|
244
|
+
require 'tensor_stream'
|
245
|
+
|
246
|
+
# enable OpenCL
|
247
|
+
require 'tensor_stream/opencl'
|
248
|
+
|
249
|
+
tf = TensorStream
|
250
|
+
|
251
|
+
srand(5)
|
252
|
+
seed = 5
|
253
|
+
tf.set_random_seed(seed)
|
254
|
+
|
255
|
+
SHAPES = [32, 32]
|
256
|
+
tf = TensorStream
|
257
|
+
sess = tf.session
|
258
|
+
large_tensor = tf.constant(sess.run(tf.random_uniform([256, 256])))
|
259
|
+
|
260
|
+
sum_axis_1 = tf.reduce_sum(large_tensor, 1)
|
261
|
+
sess.run(sum_axis_1)
|
262
|
+
```
|
263
|
+
|
264
|
+
Using OpenCL can improve performance dramatically in scenarios involving large tensors:
|
265
|
+
|
266
|
+
```
|
267
|
+
Linux 4.15.0-46-generic #49-Ubuntu SMP
|
268
|
+
model name : AMD Ryzen 3 1300X Quad-Core Processor
|
269
|
+
OpenCL device NVIDIA CUDA GeForce GTX 1060 6GB
|
270
|
+
ruby 2.6.2p47 (2019-03-13 revision 67232) [x86_64-linux]
|
271
|
+
|
272
|
+
user system total real
|
273
|
+
pure ruby softmax : 0.024724 0.000000 0.024724 ( 0.024731)
|
274
|
+
opencl softmax : 0.006237 0.003945 0.010182 ( 0.009005)
|
275
|
+
pure ruby matmul : 0.679538 0.000000 0.679538 ( 0.680048)
|
276
|
+
opencl matmul : 0.003456 0.007965 0.011421 ( 0.008568)
|
277
|
+
pure ruby sum : 3.210619 0.000000 3.210619 ( 3.210064)
|
278
|
+
opencl sum : 0.002431 0.008030 0.010461 ( 0.007522)
|
279
|
+
pure ruby sum axis 1 : 3.208789 0.000000 3.208789 ( 3.208125)
|
280
|
+
opencl sum axis 1 : 0.006075 0.003963 0.010038 ( 0.007679)
|
281
|
+
pure ruby conv2d_backprop : 3.738167 0.000000 3.738167 ( 3.737946)
|
282
|
+
opencl conv2d_backprop : 0.031267 0.003958 0.035225 ( 0.030381)
|
283
|
+
pure ruby conv2d : 0.794182 0.000000 0.794182 ( 0.794100)
|
284
|
+
opencl conv2d : 0.015865 0.004020 0.019885 ( 0.016878)
|
285
|
+
```
|
286
|
+
|
287
|
+
A quick glance shows not a marginal increase but an order of magnitude performance increase in most operations.
|
288
|
+
In fact we are looking at almost a 200x faster compute on operations like matmul and softmax (essential operations in machine learning). This is not a surprise because of the "embarrasingly" parallel nature of machine learning computation. Because of this, GPUs are basically a requirement in most machine learning tasks.
|
289
|
+
|
290
|
+
The code containing these benchmarks can be found at:
|
291
|
+
|
292
|
+
tensor_stream-opencl/benchmark/benchmark.rb
|
293
|
+
|
226
294
|
Limitations
|
227
295
|
-----------
|
228
296
|
|
data/lib/tensor_stream.rb
CHANGED
@@ -23,6 +23,7 @@ require "tensor_stream/operation"
|
|
23
23
|
require "tensor_stream/placeholder"
|
24
24
|
require "tensor_stream/control_flow"
|
25
25
|
require "tensor_stream/dynamic_stitch"
|
26
|
+
require "tensor_stream/math/math_ops"
|
26
27
|
require "tensor_stream/nn/nn_ops"
|
27
28
|
require "tensor_stream/evaluator/evaluator"
|
28
29
|
require "tensor_stream/graph_serializers/packer"
|
@@ -2,11 +2,17 @@ module TensorStream
|
|
2
2
|
# Evaluator base module
|
3
3
|
module Evaluator
|
4
4
|
class OutputGroup
|
5
|
+
include Enumerable
|
6
|
+
|
5
7
|
attr_accessor :outputs, :data_types
|
6
8
|
def initialize(outputs = [], data_types = [])
|
7
9
|
@outputs = outputs
|
8
10
|
@data_types = data_types
|
9
11
|
end
|
12
|
+
|
13
|
+
def each
|
14
|
+
@outputs.map { |output| yield output }
|
15
|
+
end
|
10
16
|
end
|
11
17
|
|
12
18
|
class UnsupportedOp < RuntimeError
|
@@ -131,7 +137,7 @@ module TensorStream
|
|
131
137
|
time.to_i * (10**9) + time.nsec
|
132
138
|
end
|
133
139
|
|
134
|
-
instance_exec(execution_context, tensor, resolved_inputs, &op[:block]).tap do
|
140
|
+
instance_exec(execution_context, tensor, resolved_inputs, &op[:block]).tap do |result|
|
135
141
|
if profile_enabled?
|
136
142
|
time = Time.now
|
137
143
|
end_time = time.to_i * (10**9) + time.nsec
|
@@ -222,11 +228,25 @@ module TensorStream
|
|
222
228
|
|
223
229
|
def self.register_evaluator(klass, name, index = 0)
|
224
230
|
@evaluators ||= {}
|
231
|
+
@storage_managers ||= {}
|
225
232
|
@evaluators[name] = {name: name, class: klass, index: index}
|
233
|
+
@storage_managers[klass] = klass.get_storage_manager
|
226
234
|
end
|
227
235
|
|
228
236
|
def self.default_evaluators
|
229
237
|
evaluators.values.sort { |v| v[:index] }.reverse.map { |v| v[:class] }
|
230
238
|
end
|
239
|
+
|
240
|
+
def self.clear_storages(graph)
|
241
|
+
@storage_managers.values.each { |manager| manager.clear_variables(graph) }
|
242
|
+
end
|
243
|
+
|
244
|
+
def self.read_variable(graph, name)
|
245
|
+
@storage_managers.values.each do |manager|
|
246
|
+
return manager.read_value(graph, name) if manager.exists?(graph, name)
|
247
|
+
end
|
248
|
+
|
249
|
+
nil
|
250
|
+
end
|
231
251
|
end
|
232
252
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module TensorStream
|
2
|
+
class EvaluatorUtils
|
3
|
+
extend TensorStream::StringHelper
|
4
|
+
|
5
|
+
def self.get_evaluator_classes(evaluators)
|
6
|
+
@evaluator_classes ||= if evaluators.is_a?(Array)
|
7
|
+
if evaluators.empty?
|
8
|
+
TensorStream::Evaluator.default_evaluators
|
9
|
+
else
|
10
|
+
evaluators.collect { |name| Object.const_get("TensorStream::Evaluator::#{camelize(name.to_s)}") }
|
11
|
+
end
|
12
|
+
elsif evaluators.nil?
|
13
|
+
TensorStream::Evaluator.default_evaluators
|
14
|
+
else
|
15
|
+
[Object.const_get("TensorStream::Evaluator::#{camelize(evaluators.to_s)}")]
|
16
|
+
end
|
17
|
+
@evaluator_classes
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -30,6 +30,16 @@ module TensorStream
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
+
def array_set!(input, value)
|
34
|
+
input.each_with_index do |element, index|
|
35
|
+
if element.is_a?(Array)
|
36
|
+
array_set(element, value)
|
37
|
+
else
|
38
|
+
input[index] = value[index]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
33
43
|
def truncate(input, target_shape)
|
34
44
|
rank = get_rank(input)
|
35
45
|
return input if rank.zero?
|
@@ -331,5 +341,55 @@ module TensorStream
|
|
331
341
|
value.nil? ? arr : value
|
332
342
|
end
|
333
343
|
end
|
344
|
+
|
345
|
+
def strided_slice(value, slices = [])
|
346
|
+
current_slice = slices.dup
|
347
|
+
selection = current_slice.shift
|
348
|
+
return value if selection.nil?
|
349
|
+
|
350
|
+
b, e, stride = selection
|
351
|
+
|
352
|
+
b = value.size + b if b < 0
|
353
|
+
e = value.size + e + 1 if e < 0
|
354
|
+
|
355
|
+
indexes = if stride < 0
|
356
|
+
b.downto(e).select.with_index { |elem, index| (index % stride.abs) == 0 }
|
357
|
+
else
|
358
|
+
(b...e).step(stride)
|
359
|
+
end
|
360
|
+
|
361
|
+
indexes.map do |index|
|
362
|
+
strided_slice(value[index], current_slice)
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
def strided_slice_grad(value, grad, x, slices)
|
367
|
+
current_slice = slices.dup
|
368
|
+
selection = current_slice.shift
|
369
|
+
current_shape = x.shift
|
370
|
+
|
371
|
+
if selection.nil?
|
372
|
+
array_set!(value, grad)
|
373
|
+
end
|
374
|
+
|
375
|
+
b, e, stride = selection
|
376
|
+
|
377
|
+
b = value.size + b if b < 0
|
378
|
+
e = value.size + e + 1 if e < 0
|
379
|
+
|
380
|
+
indexes = if stride < 0
|
381
|
+
b.downto(e).select.with_index { |elem, index| (index % stride.abs) == 0 }
|
382
|
+
else
|
383
|
+
(b...e).step(stride)
|
384
|
+
end
|
385
|
+
|
386
|
+
indexes.each_with_index do |index, grad_index|
|
387
|
+
if (value[index].is_a?(Array))
|
388
|
+
strided_slice_grad(value[index], grad[grad_index], x.dup, current_slice.dup)
|
389
|
+
else
|
390
|
+
value[index] = grad[grad_index]
|
391
|
+
end
|
392
|
+
end
|
393
|
+
end
|
334
394
|
end
|
335
395
|
end
|
@@ -22,8 +22,9 @@ module TensorStream
|
|
22
22
|
merged
|
23
23
|
end
|
24
24
|
|
25
|
-
register_op :gather do |_context,
|
25
|
+
register_op :gather do |_context, tensor, inputs|
|
26
26
|
params, indexes = inputs
|
27
|
+
raise "axis !=0 not supported" if tensor.options[:axis] != 0
|
27
28
|
gather(params, indexes)
|
28
29
|
end
|
29
30
|
|
@@ -216,7 +217,14 @@ module TensorStream
|
|
216
217
|
|
217
218
|
register_op :range do |_context, _tensor, inputs|
|
218
219
|
start, limit, delta = inputs
|
220
|
+
|
219
221
|
raise " delta !=0 " if delta.zero?
|
222
|
+
|
223
|
+
if limit.zero?
|
224
|
+
limit = start
|
225
|
+
start = 0
|
226
|
+
end
|
227
|
+
|
220
228
|
raise " Requires start <= limit when delta > 0" if (start > limit) && delta > 0
|
221
229
|
raise " Requires start >= limit when delta < 0" if (start < limit) && delta < 0
|
222
230
|
|
@@ -399,6 +407,50 @@ module TensorStream
|
|
399
407
|
end
|
400
408
|
end
|
401
409
|
|
410
|
+
register_op :dynamic_partition do |context, tensor, inputs|
|
411
|
+
data, partitions = inputs
|
412
|
+
num_partitions = tensor.options[:num_partitions]
|
413
|
+
output_arr = Array.new(num_partitions) { [] }
|
414
|
+
|
415
|
+
partitions.each_with_index do |part, index|
|
416
|
+
output_arr[part] << data[index]
|
417
|
+
end
|
418
|
+
TensorStream::Evaluator::OutputGroup.new(output_arr, num_partitions.times.map { tensor.data_type })
|
419
|
+
end
|
420
|
+
|
421
|
+
register_op :gather_grad do |context, tensor, inputs|
|
422
|
+
grad, indexes, input_shape = inputs
|
423
|
+
output = Array.new(input_shape.reduce(:*)) { fp_type?(tensor.data_type) ? 0.0 : 0 }
|
424
|
+
indexes.each_with_index.map do |x, index|
|
425
|
+
output[x] += grad[index]
|
426
|
+
end
|
427
|
+
TensorShape.reshape(output, input_shape)
|
428
|
+
end
|
429
|
+
|
430
|
+
register_op :strided_slice do |_context, _tensor, inputs|
|
431
|
+
value, b_index, e_index, stride = inputs
|
432
|
+
slices = b_index.zip(e_index).zip(stride).map do |params|
|
433
|
+
selection, stride = params
|
434
|
+
s, e = selection
|
435
|
+
[s, e, stride]
|
436
|
+
end
|
437
|
+
strided_slice(value, slices)
|
438
|
+
end
|
439
|
+
|
440
|
+
register_op :strided_slice_grad do |_context, tensor, inputs|
|
441
|
+
x, b_index, e_index, stride, grad = inputs
|
442
|
+
slices = b_index.zip(e_index).zip(stride).map do |params|
|
443
|
+
selection, stride = params
|
444
|
+
s, e = selection
|
445
|
+
[s, e, stride]
|
446
|
+
end
|
447
|
+
|
448
|
+
target_val = generate_vector(x, generator: ->() { fp_type?(tensor.data_type) ? 0.0 : 0 })
|
449
|
+
|
450
|
+
strided_slice_grad(target_val, grad, x.dup, slices.dup)
|
451
|
+
target_val
|
452
|
+
end
|
453
|
+
|
402
454
|
def merge_dynamic_stitch(merged, indexes, data, context)
|
403
455
|
indexes.each_with_index do |ind, m|
|
404
456
|
if ind.is_a?(Array)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "chunky_png"
|
2
2
|
|
3
|
+
|
3
4
|
module TensorStream
|
4
5
|
module ImagesOps
|
5
6
|
def self.included(klass)
|
@@ -49,6 +50,31 @@ module TensorStream
|
|
49
50
|
TensorShape.reshape(image_data, [image.height, image.width, channels])
|
50
51
|
end
|
51
52
|
|
53
|
+
register_op :decode_jpg do |_context, tensor, inputs|
|
54
|
+
require "jpeg"
|
55
|
+
|
56
|
+
content = inputs[0]
|
57
|
+
channels = tensor.options[:channels]
|
58
|
+
channels = 3 if channels.zero?
|
59
|
+
|
60
|
+
image = Jpeg::Image.open_buffer(content)
|
61
|
+
source_channels = image.color_info == :gray ? 1 : 3
|
62
|
+
|
63
|
+
image_data = image.raw_data.map do |pixel|
|
64
|
+
if source_channels == channels
|
65
|
+
pixel
|
66
|
+
elsif source_channels = 1 && channels == 3
|
67
|
+
[pixel, pixel, pixel]
|
68
|
+
elsif source_channels = 3 && channels == 1
|
69
|
+
raise TensorStream::ValueError, "color to grayscale not supported for jpg"
|
70
|
+
end
|
71
|
+
end.flatten
|
72
|
+
|
73
|
+
image_data.map!(&:to_f) if fp_type?(tensor.data_type)
|
74
|
+
|
75
|
+
TensorShape.reshape(image_data, [image.height, image.width, channels])
|
76
|
+
end
|
77
|
+
|
52
78
|
register_op :encode_png do |_context, tensor, inputs|
|
53
79
|
image_data = inputs[0]
|
54
80
|
height, width, channels = shape_eval(image_data)
|
@@ -37,6 +37,24 @@ module TensorStream
|
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
+
register_op :bias_add do |_context, _tensor, inputs|
|
41
|
+
value, bias = inputs
|
42
|
+
arr = value.flatten.each_slice(bias.size).map do |slice|
|
43
|
+
slice.each_with_index.map { |elem, index| elem + bias[index] }
|
44
|
+
end
|
45
|
+
TensorShape.reshape(arr, shape_eval(value))
|
46
|
+
end
|
47
|
+
|
48
|
+
register_op :bias_add_grad do |_context, _tensor, inputs|
|
49
|
+
received_grad = inputs[0]
|
50
|
+
bias_size = shape_eval(received_grad).last
|
51
|
+
grad_sum = Array.new(bias_size) { 0.0 }
|
52
|
+
received_grad.flatten.each_slice(bias_size) do |slice|
|
53
|
+
slice.each_with_index.map { |elem, index| grad_sum[index] += elem }
|
54
|
+
end
|
55
|
+
grad_sum
|
56
|
+
end
|
57
|
+
|
40
58
|
register_op :sub, no_eval: true do |context, tensor, inputs|
|
41
59
|
a, b = inputs
|
42
60
|
call_vector_op(tensor, :sub, a, b, context) { |t, u| t - u }
|
@@ -111,6 +129,15 @@ module TensorStream
|
|
111
129
|
call_op(inputs[0], context) { |t, _b| Math.sqrt(t) }
|
112
130
|
end
|
113
131
|
|
132
|
+
register_op :rsqrt, no_eval: true do |context, _tensor, inputs|
|
133
|
+
call_op(inputs[0], context) { |t, _b| 1 / Math.sqrt(t) }
|
134
|
+
end
|
135
|
+
|
136
|
+
register_op :rsqrt_grad, no_eval: true do |context, tensor, inputs|
|
137
|
+
y, grad = inputs
|
138
|
+
call_vector_op(tensor, :rsqrt_grad, y, grad, context) { |_y, g| 0.5 * g * (_y ** 3) }
|
139
|
+
end
|
140
|
+
|
114
141
|
register_op :floor, no_eval: true do |context, _tensor, inputs|
|
115
142
|
call_op(inputs[0], context) { |t, _b| t.floor }
|
116
143
|
end
|
@@ -135,6 +162,25 @@ module TensorStream
|
|
135
162
|
call_op(inputs[0], context) { |t, _b| 1 - Math.tanh(t) * Math.tanh(t) }
|
136
163
|
end
|
137
164
|
|
165
|
+
register_op :top_k do |context, tensor, inputs|
|
166
|
+
values, k = inputs
|
167
|
+
v_shape = shape_eval(values)
|
168
|
+
|
169
|
+
sorted = tensor.options[:sorted]
|
170
|
+
work_values = TensorShape.reshape(values, [-1, v_shape.last])
|
171
|
+
work_values.map! do |row|
|
172
|
+
last_k = row.map.with_index { |r, index| [r, index] }.sort! { |a,b| a[0] <=> b[0] }.last(k)
|
173
|
+
last_k.reverse! if sorted
|
174
|
+
last_k
|
175
|
+
end
|
176
|
+
|
177
|
+
top_k = work_values.map { |row| row.map { |r| r[0] } }
|
178
|
+
top_indices = work_values.map { |row| row.map { |r| r[1] } }
|
179
|
+
v_shape[-1] = k
|
180
|
+
|
181
|
+
TensorStream::Evaluator::OutputGroup.new([TensorShape.reshape(top_k, v_shape), TensorShape.reshape(top_indices, v_shape)], [tensor.inputs[0].data_type, :int32])
|
182
|
+
end
|
183
|
+
|
138
184
|
register_op(%i[argmax arg_max]) do |_context, tensor, inputs|
|
139
185
|
axis = inputs[1] || 0
|
140
186
|
rank = get_rank(inputs[0])
|
@@ -241,13 +287,22 @@ module TensorStream
|
|
241
287
|
raise "#{tensor.inputs[0].name} rank must be greater than 1" if rank_a < 2
|
242
288
|
raise "#{tensor.inputs[1].name} rank must be greater than 1" if rank_b < 2
|
243
289
|
|
244
|
-
matrix_a = matrix_a.transpose if tensor.options[:transpose_a]
|
245
|
-
matrix_b = matrix_b.transpose if tensor.options[:transpose_b]
|
246
|
-
|
247
290
|
# check matrix dimensions
|
248
|
-
|
291
|
+
if rank_a >= 3
|
292
|
+
matrix_a.zip(matrix_b).map do |m_a, m_b|
|
293
|
+
matmul(m_a, m_b, tensor)
|
294
|
+
end
|
295
|
+
else
|
296
|
+
matmul(matrix_a, matrix_b, tensor)
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
def matmul(m_a, m_b, tensor)
|
301
|
+
m_a = m_a.transpose if tensor.options[:transpose_a]
|
302
|
+
m_b = m_b.transpose if tensor.options[:transpose_b]
|
303
|
+
raise TensorStream::ValueError, "incompatible shape sizes for matrix multiplication (#{m_a[0].size} != #{m_b.size}) #{shape_eval(m_a)} vs #{shape_eval(m_b)}" if m_a[0].size != m_b.size
|
249
304
|
|
250
|
-
(Matrix[*
|
305
|
+
(Matrix[*m_a] * Matrix[*m_b]).to_a
|
251
306
|
end
|
252
307
|
|
253
308
|
register_op %i[max maximum], noop: true do |context, tensor, inputs|
|