tensor_stream 0.5.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +9 -0
- data/benchmark_ryzen_amd.txt +36 -0
- data/lib/tensor_stream/dynamic_stitch.rb +28 -0
- data/lib/tensor_stream/evaluator/base_evaluator.rb +32 -3
- data/lib/tensor_stream/evaluator/opencl/kernels/floor_div.cl +48 -0
- data/lib/tensor_stream/evaluator/opencl/kernels/mod.cl +3 -0
- data/lib/tensor_stream/evaluator/opencl/kernels/squared_difference.cl +53 -0
- data/lib/tensor_stream/evaluator/opencl/opencl_buffer.rb +1 -2
- data/lib/tensor_stream/evaluator/opencl/opencl_evaluator.rb +44 -24
- data/lib/tensor_stream/evaluator/opencl/opencl_template_helper.rb +2 -0
- data/lib/tensor_stream/evaluator/operation_helpers/array_ops_helper.rb +21 -11
- data/lib/tensor_stream/evaluator/ruby_evaluator.rb +165 -48
- data/lib/tensor_stream/graph_serializers/pbtext.rb +8 -0
- data/lib/tensor_stream/helpers/op_helper.rb +41 -4
- data/lib/tensor_stream/math_gradients.rb +64 -64
- data/lib/tensor_stream/nn/nn_ops.rb +6 -2
- data/lib/tensor_stream/operation.rb +17 -3
- data/lib/tensor_stream/ops.rb +47 -0
- data/lib/tensor_stream/session.rb +9 -1
- data/lib/tensor_stream/tensor.rb +15 -0
- data/lib/tensor_stream/utils.rb +5 -1
- data/lib/tensor_stream/version.rb +1 -1
- data/lib/tensor_stream.rb +1 -0
- data/samples/nearest_neighbor.rb +1 -1
- data/test_samples/raw_neural_net_sample.rb +6 -7
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 6d647cef8f32fa7b3c10460365adfc55ccdd9872e71d453df090986349b615f5
|
4
|
+
data.tar.gz: baa7be71775bc5d39396343b6d4c32943cf3b79d5a2e591c885bd6fc9314883e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3207ef919464e696d03fe7bbd264ba606565bf09d66796d461b687f047e8b2b969259bcffaf7524677d6b22398ba32025ca8f94c33756cda3a3bb37f535a902
|
7
|
+
data.tar.gz: 735dbd55e54237619bb9c6653818dade6df257fb71a58cc4abd540a3053d51d318d734ba149c3889bd6403a5219166c291534a66528b83e481ba1240e2696e49
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
|
5
5
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
6
6
|
|
7
|
+
## [0.6.0] - 2018-07-21
|
8
|
+
### Added
|
9
|
+
- [NEW OP] fill, floor_div, dynamic_stitch, mod, range, size, squared_difference
|
10
|
+
|
11
|
+
### Fixes
|
12
|
+
- [General] Some auto-differentation fixes
|
13
|
+
- [softmax_cross_entropy_with_logits_v2] Use numerically stable way of calculating values
|
14
|
+
- Other fixes related to shape computation
|
15
|
+
|
7
16
|
## [0.5.1] - 2018-06-27
|
8
17
|
### Added
|
9
18
|
- Added support for control_dependencies
|
@@ -0,0 +1,36 @@
|
|
1
|
+
TensorStream::Evaluator::OpenclEvaluator
|
2
|
+
TensorStream::Evaluator::RubyEvaluator
|
3
|
+
model name : AMD Ryzen 3 1300X Quad-Core Processor
|
4
|
+
OpenCL device AMD Accelerated Parallel Processing Ellesmere
|
5
|
+
Rehearsal --------------------------------------------------------------
|
6
|
+
pure ruby ooo matmul : 1.480000 0.000000 1.480000 ( 1.486855)
|
7
|
+
opencl ooo matmul : 0.190000 0.130000 0.320000 ( 0.332605)
|
8
|
+
pure ruby softmax : 0.280000 0.000000 0.280000 ( 0.278398)
|
9
|
+
opencl softmax : 0.040000 0.020000 0.060000 ( 0.070980)
|
10
|
+
pure ruby matmul : 0.730000 0.000000 0.730000 ( 0.726565)
|
11
|
+
opencl matmul : 0.020000 0.010000 0.030000 ( 0.046762)
|
12
|
+
pure ruby : 2.550000 0.000000 2.550000 ( 2.544265)
|
13
|
+
opencl : 0.290000 0.020000 0.310000 ( 0.318674)
|
14
|
+
pure ruby single function: 0.370000 0.000000 0.370000 ( 0.374805)
|
15
|
+
opencl singlefunction: 0.190000 0.050000 0.240000 ( 0.239913)
|
16
|
+
pure ruby pow float: 0.090000 0.000000 0.090000 ( 0.093488)
|
17
|
+
opencl pow float: 0.100000 0.010000 0.110000 ( 0.110532)
|
18
|
+
pure ruby pow int: 0.030000 0.000000 0.030000 ( 0.022236)
|
19
|
+
opencl pow int: 0.090000 0.010000 0.100000 ( 0.111199)
|
20
|
+
----------------------------------------------------- total: 6.700000sec
|
21
|
+
|
22
|
+
user system total real
|
23
|
+
pure ruby ooo matmul : 1.460000 0.000000 1.460000 ( 1.468597)
|
24
|
+
opencl ooo matmul : 0.040000 0.000000 0.040000 ( 0.053625)
|
25
|
+
pure ruby softmax : 0.280000 0.000000 0.280000 ( 0.280252)
|
26
|
+
opencl softmax : 0.020000 0.010000 0.030000 ( 0.043143)
|
27
|
+
pure ruby matmul : 0.700000 0.000000 0.700000 ( 0.703540)
|
28
|
+
opencl matmul : 0.030000 0.000000 0.030000 ( 0.037716)
|
29
|
+
pure ruby : 2.540000 0.000000 2.540000 ( 2.539661)
|
30
|
+
opencl : 0.150000 0.000000 0.150000 ( 0.164203)
|
31
|
+
pure ruby single function: 0.350000 0.000000 0.350000 ( 0.351883)
|
32
|
+
opencl singlefunction: 0.090000 0.010000 0.100000 ( 0.092359)
|
33
|
+
pure ruby pow float: 0.080000 0.000000 0.080000 ( 0.080484)
|
34
|
+
opencl pow float: 0.030000 0.000000 0.030000 ( 0.032691)
|
35
|
+
pure ruby pow int: 0.020000 0.000000 0.020000 ( 0.019487)
|
36
|
+
opencl pow int: 0.020000 0.000000 0.020000 ( 0.026782)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module TensorStream
|
2
|
+
# Defines a TensorStream controlflow op
|
3
|
+
class DynamicStitch < Operation
|
4
|
+
attr_accessor :ops
|
5
|
+
|
6
|
+
def initialize(flow_type, inputs, ops = nil, options = {})
|
7
|
+
setup_initial_state(options)
|
8
|
+
|
9
|
+
@operation = :"flow_#{flow_type}"
|
10
|
+
@inputs = inputs
|
11
|
+
|
12
|
+
@data_type = Tensor.detect_type(inputs[1])
|
13
|
+
@name = [@graph.get_name_scope, options[:name] || set_name].compact.join('/')
|
14
|
+
@ops = ops
|
15
|
+
@shape = TensorShape.new([inputs.size])
|
16
|
+
@graph.add_node(self)
|
17
|
+
end
|
18
|
+
|
19
|
+
def set_data_type(_passed_data_type)
|
20
|
+
:unknown
|
21
|
+
end
|
22
|
+
|
23
|
+
def run
|
24
|
+
eval
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
@@ -79,7 +79,6 @@ module TensorStream
|
|
79
79
|
@ops[op.to_sym] = { options: options, block: block }
|
80
80
|
end
|
81
81
|
else
|
82
|
-
|
83
82
|
@ops[opcode.to_sym] = { options: options, block: block }
|
84
83
|
end
|
85
84
|
end
|
@@ -87,16 +86,24 @@ module TensorStream
|
|
87
86
|
##
|
88
87
|
# gets all supported ops for this Evaluator class
|
89
88
|
def self.ops
|
90
|
-
@ops ||={}
|
89
|
+
@ops ||= {}
|
91
90
|
end
|
92
91
|
|
93
92
|
def invoke(tensor, execution_context)
|
93
|
+
return eval_tensor(tensor, execution_context) unless tensor.is_a?(Operation)
|
94
|
+
|
94
95
|
if self.class.ops.key?(tensor.operation.to_sym)
|
95
96
|
op = self.class.ops[tensor.operation.to_sym]
|
97
|
+
|
96
98
|
op_options = op[:options]
|
97
99
|
resolved_inputs = tensor.inputs.map do |i|
|
98
100
|
next if i.nil?
|
99
|
-
|
101
|
+
|
102
|
+
if i.is_a?(Array)
|
103
|
+
next i.collect { |sub_item| sub_item.is_a?(Tensor) ? invoke(sub_item, execution_context) : sub_item }
|
104
|
+
end
|
105
|
+
|
106
|
+
if !op_options[:noop] && @context[:_cache][:placement][tensor.name] != @context[:_cache][:placement][i.name] # tensor is on another device or evaluator
|
100
107
|
cache_key = "#{tensor.graph.object_id}_#{i.name}:#{object_id}"
|
101
108
|
next @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
|
102
109
|
|
@@ -116,6 +123,28 @@ module TensorStream
|
|
116
123
|
|
117
124
|
protected
|
118
125
|
|
126
|
+
def get_broadcast_gradient_args(input_a, input_b)
|
127
|
+
return [[], []] if input_a == input_b
|
128
|
+
|
129
|
+
input_a_args = []
|
130
|
+
input_b_args = []
|
131
|
+
|
132
|
+
input_a = input_b.size.times.map { |i| i < input_a.size ? input_a[i] : nil }.reverse if input_a.size < input_b.size
|
133
|
+
input_b = input_a.size.times.map { |i| i < input_b.size ? input_b[i] : nil }.reverse if input_a.size > input_b.size
|
134
|
+
|
135
|
+
input_a.reverse.zip(input_b.reverse).each_with_index do |item, index|
|
136
|
+
a, b = item
|
137
|
+
|
138
|
+
if a.nil? || b && (a < b)
|
139
|
+
input_a_args << input_b.size - index - 1
|
140
|
+
elsif b.nil? || a && (a > b)
|
141
|
+
input_b_args << input_a.size - index - 1
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
[input_a_args.reverse, input_b_args.reverse]
|
146
|
+
end
|
147
|
+
|
119
148
|
##
|
120
149
|
# converts from a ruby Buffer object to the evaluator's native buffer format
|
121
150
|
def convert_from_buffer(tensor, result)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
% fname = 'floor_div'
|
3
|
+
% result_t = c_dtype
|
4
|
+
// same dimension add floating point op
|
5
|
+
__kernel void <%= fname%>_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
|
6
|
+
// Get the index of the current element to be processed
|
7
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
8
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
9
|
+
|
10
|
+
C[globalRow * N + globalCol] = (int)(A[globalRow * N + globalCol] / B[globalRow * N + globalCol]);
|
11
|
+
}
|
12
|
+
|
13
|
+
// 1D + Scalar floating point add op
|
14
|
+
__kernel void <%=fname%>_c_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
|
15
|
+
// Get the index of the current element to be processed
|
16
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
17
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
18
|
+
|
19
|
+
if (switch_op == 0) {
|
20
|
+
C[globalRow * N + globalCol] = (int)(A[globalRow * N + globalCol] / B[0]);
|
21
|
+
} else {
|
22
|
+
C[globalRow * N + globalCol] = (int)(B[0] / A[globalRow * N + globalCol]);
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
// 1D + Scalar floating point add op broadcast
|
27
|
+
__kernel void <%= fname%>_b_<%= dtype %>_<%= dtype %>(const int M, const int N, const int M2, const int N2, const int switch_op,__global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
|
28
|
+
// Get the index of the current element to be processed
|
29
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
30
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
31
|
+
|
32
|
+
int b_m_index = globalRow;
|
33
|
+
int b_n_index = globalCol;
|
34
|
+
|
35
|
+
if ( b_m_index >= M2) {
|
36
|
+
b_m_index = b_m_index % M2;
|
37
|
+
};
|
38
|
+
|
39
|
+
if (b_n_index >= N2) {
|
40
|
+
b_n_index = b_n_index % N2;
|
41
|
+
}
|
42
|
+
|
43
|
+
if (switch_op == 0) {
|
44
|
+
C[globalRow * N + globalCol] = (int)(A[globalRow * N + globalCol] / B[b_m_index * N2 + b_n_index]);
|
45
|
+
} else {
|
46
|
+
C[globalRow * N + globalCol] = (int)(B[b_m_index * N2 + b_n_index] / A[globalRow * N + globalCol]);
|
47
|
+
}
|
48
|
+
}
|
@@ -0,0 +1,53 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
// same dimension add floating point op
|
3
|
+
__kernel void squared_difference_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
|
4
|
+
// Get the index of the current element to be processed
|
5
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
6
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
7
|
+
<%= c_dtype %> x = A[globalRow * N + globalCol];
|
8
|
+
<%= c_dtype %> y = B[globalRow * N + globalCol];
|
9
|
+
C[globalRow * N + globalCol] = (x - y) * (x - y);
|
10
|
+
}
|
11
|
+
|
12
|
+
// 1D + Scalar floating point add op
|
13
|
+
__kernel void squared_difference_c_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
|
14
|
+
// Get the index of the current element to be processed
|
15
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
16
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
17
|
+
|
18
|
+
<%= c_dtype %> x = A[globalRow * N + globalCol];
|
19
|
+
<%= c_dtype %> y = B[0];
|
20
|
+
|
21
|
+
if (switch_op == 0) {
|
22
|
+
C[globalRow * N + globalCol] = (x - y) * (x - y);
|
23
|
+
} else {
|
24
|
+
C[globalRow * N + globalCol] = (y - x) * (y - x);
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
// 1D + Scalar floating point add op broadcast
|
29
|
+
__kernel void squared_difference_b_<%= dtype %>_<%= dtype %>(const int M, const int N, const int M2, const int N2, const int switch_op,__global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
|
30
|
+
// Get the index of the current element to be processed
|
31
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
32
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
33
|
+
|
34
|
+
int b_m_index = globalRow;
|
35
|
+
int b_n_index = globalCol;
|
36
|
+
|
37
|
+
if ( b_m_index >= M2) {
|
38
|
+
b_m_index = b_m_index % M2;
|
39
|
+
};
|
40
|
+
|
41
|
+
if (b_n_index >= N2) {
|
42
|
+
b_n_index = b_n_index % N2;
|
43
|
+
}
|
44
|
+
|
45
|
+
<%= c_dtype %> x = A[globalRow * N + globalCol];
|
46
|
+
<%= c_dtype %> y = B[b_m_index * N2 + b_n_index];
|
47
|
+
|
48
|
+
if (switch_op == 0) {
|
49
|
+
C[globalRow * N + globalCol] = (x - y) * (x - y);
|
50
|
+
} else {
|
51
|
+
C[globalRow * N + globalCol] = (y - x) * (y - x);
|
52
|
+
}
|
53
|
+
}
|
@@ -25,8 +25,7 @@ module TensorStream
|
|
25
25
|
op.command_queue.finish
|
26
26
|
self.dirty = false
|
27
27
|
end
|
28
|
-
|
29
|
-
result = buffer.reshape(*shape.reverse).to_a
|
28
|
+
result = buffer.reshape(*shape.map { |s| s.to_i}.reverse).to_a
|
30
29
|
|
31
30
|
if data_type == :boolean
|
32
31
|
result = process_function_op(result, ->(a, _b) { a != 0 })
|
@@ -109,7 +109,9 @@ module TensorStream
|
|
109
109
|
b
|
110
110
|
end
|
111
111
|
else
|
112
|
-
return buffer if buffer.nil?
|
112
|
+
return buffer if buffer.nil?
|
113
|
+
return [] if buffer.buffer.nil?
|
114
|
+
return buffer if buffer.buffer.size.zero?
|
113
115
|
_opencl_queue.enqueue_read_buffer(buffer.cl_buffer, buffer.buffer, event_wait_list: [buffer.op].compact)
|
114
116
|
end
|
115
117
|
_opencl_queue.finish
|
@@ -202,6 +204,7 @@ module TensorStream
|
|
202
204
|
suffix = args.collect { |k,v| "#{k}.#{v}"}.join('.')
|
203
205
|
@context[:_cache]["_opencl_kernel_#{kernel}.#{suffix}:#{object_id}"] ||= begin
|
204
206
|
filename = %w[cl.erb cl].map { |ext| cl_template_path(kernel, ext) }.find { |n| File.exist?(n) }
|
207
|
+
raise "opencl kernel template for #{kernel} has not yet been defined" if filename.nil?
|
205
208
|
source = File.read(filename)
|
206
209
|
source = OpenclTemplateHelper.new(source).generate(args)
|
207
210
|
# File.write("/tmp/#{kernel}.#{suffix}.cl", source)
|
@@ -251,13 +254,13 @@ module TensorStream
|
|
251
254
|
execute_func('log', tensor, inputs[0], context)
|
252
255
|
end
|
253
256
|
|
254
|
-
register_op :cond do |context, tensor, inputs|
|
257
|
+
register_op :cond, noop: true do |context, tensor, inputs|
|
255
258
|
pred = complete_eval(tensor.options[:pred], context)
|
256
259
|
|
257
260
|
if all_true?(pred.buffer)
|
258
|
-
inputs[0]
|
261
|
+
complete_eval(inputs[0], context)
|
259
262
|
else
|
260
|
-
inputs[1]
|
263
|
+
complete_eval(inputs[1], context)
|
261
264
|
end
|
262
265
|
end
|
263
266
|
|
@@ -285,12 +288,20 @@ module TensorStream
|
|
285
288
|
end
|
286
289
|
end
|
287
290
|
|
288
|
-
%i[max add div sub mul pow sigmoid_grad].each do |op|
|
291
|
+
%i[max add div sub mod mul pow sigmoid_grad squared_difference].each do |op|
|
289
292
|
register_op op, noop: true do |context, tensor, inputs|
|
290
293
|
execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], context)
|
291
294
|
end
|
292
295
|
end
|
293
296
|
|
297
|
+
register_op :floor_div, noop: true do |context, tensor, inputs|
|
298
|
+
if fp_type?(tensor.data_type)
|
299
|
+
execute_2_operand_func('floor_div', tensor, inputs[0], inputs[1], context)
|
300
|
+
else
|
301
|
+
execute_2_operand_func('div', tensor, inputs[0], inputs[1], context)
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
294
305
|
register_op :where, noop: true do |context, tensor, inputs|
|
295
306
|
pred = tensor.options[:pred]
|
296
307
|
execute_cond_func('where', tensor, pred, inputs[0], inputs[1], context)
|
@@ -479,11 +490,12 @@ module TensorStream
|
|
479
490
|
end
|
480
491
|
|
481
492
|
register_op :broadcast_gradient_args, buffer: true do |_context, tensor, inputs|
|
482
|
-
|
493
|
+
rx, ry = get_broadcast_gradient_args(inputs[0].buffer.to_a, inputs[1].buffer.to_a)
|
494
|
+
[ wrap_opencl(rx, data_type: :int32, name: "#{tensor.name}"), wrap_opencl(ry, data_type: :int32, name: "#{tensor.name}:1")]
|
483
495
|
end
|
484
496
|
|
485
497
|
register_op :shape do |_context, tensor, inputs|
|
486
|
-
wrap_opencl(inputs[0].shape, name: tensor.name, data_type: tensor.
|
498
|
+
wrap_opencl(inputs[0].shape, name: tensor.name, data_type: tensor.data_type)
|
487
499
|
end
|
488
500
|
|
489
501
|
register_op :reshape, buffer: true do |_context, _tensor, inputs|
|
@@ -504,6 +516,10 @@ module TensorStream
|
|
504
516
|
inputs
|
505
517
|
end
|
506
518
|
|
519
|
+
register_op :size do |_context, tensor, inputs|
|
520
|
+
wrap_opencl(inputs[0].buffer.size, name: tensor.name, data_type: tensor.options[:out_type] || :int32)
|
521
|
+
end
|
522
|
+
|
507
523
|
%i[sum mean].each do |op|
|
508
524
|
register_op op, noop: true do |context, tensor, inputs|
|
509
525
|
reduction(context, tensor, inputs[0], inputs[1], op.to_sym)
|
@@ -534,8 +550,9 @@ module TensorStream
|
|
534
550
|
end
|
535
551
|
|
536
552
|
def eval_operation(tensor, child_context)
|
537
|
-
|
553
|
+
|
538
554
|
cache_key = "#{tensor.graph.object_id}_opencl_#{tensor.name}:#{object_id}"
|
555
|
+
return @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
|
539
556
|
return @context[cache_key] if @context.key?(cache_key)
|
540
557
|
# puts tensor.name
|
541
558
|
invoke(tensor, child_context).tap do |result|
|
@@ -559,8 +576,8 @@ module TensorStream
|
|
559
576
|
value: result
|
560
577
|
}
|
561
578
|
end
|
562
|
-
@context[
|
563
|
-
@context[
|
579
|
+
@context[cache_key] = result
|
580
|
+
@context[:_cache][cache_key] = result if tensor.is_const
|
564
581
|
end
|
565
582
|
rescue EvaluatorExcecutionException => e
|
566
583
|
raise e
|
@@ -628,6 +645,7 @@ module TensorStream
|
|
628
645
|
a, b = auto_type_cast(a, b, name: "#{tensor.name}/cast_#{a.name}_#{b.data_type}")
|
629
646
|
dtype = tensor.data_type
|
630
647
|
result_shape = TensorShape.infer_shape(a.shape, b.shape)
|
648
|
+
return _create_result_buffer(dtype, [0], "out_#{tensor.name}") if result_shape == [0]
|
631
649
|
|
632
650
|
output_buffer = _create_result_buffer(tensor.data_type, result_shape, "out_#{tensor.name}")
|
633
651
|
a, b, prog, switch_operands = select_program(a, b, op_name)
|
@@ -799,8 +817,9 @@ module TensorStream
|
|
799
817
|
end
|
800
818
|
|
801
819
|
def _create_result_buffer(data_type, shape, name)
|
820
|
+
return OpenCLBuffer.new(data_type: data_type, shape: [0], buffer: nil, cl_buffer: nil) if shape == [0]
|
802
821
|
@context[:_cache][:_cl_buffers]["_result_#{name}_#{shape.join('_')}:#{object_id}"] ||= begin
|
803
|
-
size = shape.empty? ? 1 : shape.reduce(:*)
|
822
|
+
size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
|
804
823
|
buffer = allocate_narray_for_type(data_type, size)
|
805
824
|
cl_buffer = _opencl_context.create_buffer(buffer.size * buffer.element_size)
|
806
825
|
OpenCLBuffer.new(data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer)
|
@@ -840,6 +859,17 @@ module TensorStream
|
|
840
859
|
end
|
841
860
|
end
|
842
861
|
|
862
|
+
def _reduced_shape(input_shape, axes)
|
863
|
+
return [] if axes.nil? # reduce to scalar
|
864
|
+
axes = [ axes ] unless axes.is_a?(Array)
|
865
|
+
return input_shape if axes.empty?
|
866
|
+
|
867
|
+
axes.each do |dimen|
|
868
|
+
input_shape[dimen] = 1
|
869
|
+
end
|
870
|
+
input_shape
|
871
|
+
end
|
872
|
+
|
843
873
|
def reduction(child_context, tensor, a, b, func)
|
844
874
|
input = complete_eval(a, child_context)
|
845
875
|
axis = read_final_result(complete_eval(b, child_context))
|
@@ -853,7 +883,8 @@ module TensorStream
|
|
853
883
|
|
854
884
|
if axis.is_a?(Array)
|
855
885
|
axis.map{ |x| rank - x.abs }.sort.reverse.each do |x|
|
856
|
-
|
886
|
+
|
887
|
+
value = value.send(func, x.to_i)
|
857
888
|
end
|
858
889
|
else
|
859
890
|
value = value.send(func, rank - axis.abs)
|
@@ -867,7 +898,7 @@ module TensorStream
|
|
867
898
|
end
|
868
899
|
|
869
900
|
if tensor.options[:keepdims]
|
870
|
-
new_shape =
|
901
|
+
new_shape = _reduced_shape(input.shape.dup, axis)
|
871
902
|
end
|
872
903
|
|
873
904
|
convert_to_opencl(value.flatten, new_shape, data_type: tensor.data_type, name: tensor.name)
|
@@ -948,17 +979,6 @@ module TensorStream
|
|
948
979
|
shape.is_a?(Array) ? shape.size : 0
|
949
980
|
end
|
950
981
|
|
951
|
-
def get_broadcast_gradient_args(input_a, input_b)
|
952
|
-
return [] if get_rank(input_b).zero? && get_rank(input_a).zero?
|
953
|
-
return nil if get_rank(input_b).zero?
|
954
|
-
# ruby scalar
|
955
|
-
if get_rank(input_a).zero?
|
956
|
-
_broadcast_gradient_op(input_b, input_a, 0, true)
|
957
|
-
elsif get_rank(input_a) > 0
|
958
|
-
_broadcast_gradient_op(input_a, input_b, 0)
|
959
|
-
end
|
960
|
-
end
|
961
|
-
|
962
982
|
def concat_array(values, axis)
|
963
983
|
combined_array = values.shift
|
964
984
|
axis = get_rank(combined_array) - 1 if axis == -1
|