tensor_stream 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +9 -0
- data/benchmark_ryzen_amd.txt +36 -0
- data/lib/tensor_stream/dynamic_stitch.rb +28 -0
- data/lib/tensor_stream/evaluator/base_evaluator.rb +32 -3
- data/lib/tensor_stream/evaluator/opencl/kernels/floor_div.cl +48 -0
- data/lib/tensor_stream/evaluator/opencl/kernels/mod.cl +3 -0
- data/lib/tensor_stream/evaluator/opencl/kernels/squared_difference.cl +53 -0
- data/lib/tensor_stream/evaluator/opencl/opencl_buffer.rb +1 -2
- data/lib/tensor_stream/evaluator/opencl/opencl_evaluator.rb +44 -24
- data/lib/tensor_stream/evaluator/opencl/opencl_template_helper.rb +2 -0
- data/lib/tensor_stream/evaluator/operation_helpers/array_ops_helper.rb +21 -11
- data/lib/tensor_stream/evaluator/ruby_evaluator.rb +165 -48
- data/lib/tensor_stream/graph_serializers/pbtext.rb +8 -0
- data/lib/tensor_stream/helpers/op_helper.rb +41 -4
- data/lib/tensor_stream/math_gradients.rb +64 -64
- data/lib/tensor_stream/nn/nn_ops.rb +6 -2
- data/lib/tensor_stream/operation.rb +17 -3
- data/lib/tensor_stream/ops.rb +47 -0
- data/lib/tensor_stream/session.rb +9 -1
- data/lib/tensor_stream/tensor.rb +15 -0
- data/lib/tensor_stream/utils.rb +5 -1
- data/lib/tensor_stream/version.rb +1 -1
- data/lib/tensor_stream.rb +1 -0
- data/samples/nearest_neighbor.rb +1 -1
- data/test_samples/raw_neural_net_sample.rb +6 -7
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 6d647cef8f32fa7b3c10460365adfc55ccdd9872e71d453df090986349b615f5
|
4
|
+
data.tar.gz: baa7be71775bc5d39396343b6d4c32943cf3b79d5a2e591c885bd6fc9314883e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3207ef919464e696d03fe7bbd264ba606565bf09d66796d461b687f047e8b2b969259bcffaf7524677d6b22398ba32025ca8f94c33756cda3a3bb37f535a902
|
7
|
+
data.tar.gz: 735dbd55e54237619bb9c6653818dade6df257fb71a58cc4abd540a3053d51d318d734ba149c3889bd6403a5219166c291534a66528b83e481ba1240e2696e49
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
|
5
5
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
6
6
|
|
7
|
+
## [0.6.0] - 2018-07-21
|
8
|
+
### Added
|
9
|
+
- [NEW OP] fill, floor_div, dynamic_stitch, mod, range, size, squared_difference
|
10
|
+
|
11
|
+
### Fixes
|
12
|
+
- [General] Some auto-differentation fixes
|
13
|
+
- [softmax_cross_entropy_with_logits_v2] Use numerically stable way of calculating values
|
14
|
+
- Other fixes related to shape computation
|
15
|
+
|
7
16
|
## [0.5.1] - 2018-06-27
|
8
17
|
### Added
|
9
18
|
- Added support for control_dependencies
|
@@ -0,0 +1,36 @@
|
|
1
|
+
TensorStream::Evaluator::OpenclEvaluator
|
2
|
+
TensorStream::Evaluator::RubyEvaluator
|
3
|
+
model name : AMD Ryzen 3 1300X Quad-Core Processor
|
4
|
+
OpenCL device AMD Accelerated Parallel Processing Ellesmere
|
5
|
+
Rehearsal --------------------------------------------------------------
|
6
|
+
pure ruby ooo matmul : 1.480000 0.000000 1.480000 ( 1.486855)
|
7
|
+
opencl ooo matmul : 0.190000 0.130000 0.320000 ( 0.332605)
|
8
|
+
pure ruby softmax : 0.280000 0.000000 0.280000 ( 0.278398)
|
9
|
+
opencl softmax : 0.040000 0.020000 0.060000 ( 0.070980)
|
10
|
+
pure ruby matmul : 0.730000 0.000000 0.730000 ( 0.726565)
|
11
|
+
opencl matmul : 0.020000 0.010000 0.030000 ( 0.046762)
|
12
|
+
pure ruby : 2.550000 0.000000 2.550000 ( 2.544265)
|
13
|
+
opencl : 0.290000 0.020000 0.310000 ( 0.318674)
|
14
|
+
pure ruby single function: 0.370000 0.000000 0.370000 ( 0.374805)
|
15
|
+
opencl singlefunction: 0.190000 0.050000 0.240000 ( 0.239913)
|
16
|
+
pure ruby pow float: 0.090000 0.000000 0.090000 ( 0.093488)
|
17
|
+
opencl pow float: 0.100000 0.010000 0.110000 ( 0.110532)
|
18
|
+
pure ruby pow int: 0.030000 0.000000 0.030000 ( 0.022236)
|
19
|
+
opencl pow int: 0.090000 0.010000 0.100000 ( 0.111199)
|
20
|
+
----------------------------------------------------- total: 6.700000sec
|
21
|
+
|
22
|
+
user system total real
|
23
|
+
pure ruby ooo matmul : 1.460000 0.000000 1.460000 ( 1.468597)
|
24
|
+
opencl ooo matmul : 0.040000 0.000000 0.040000 ( 0.053625)
|
25
|
+
pure ruby softmax : 0.280000 0.000000 0.280000 ( 0.280252)
|
26
|
+
opencl softmax : 0.020000 0.010000 0.030000 ( 0.043143)
|
27
|
+
pure ruby matmul : 0.700000 0.000000 0.700000 ( 0.703540)
|
28
|
+
opencl matmul : 0.030000 0.000000 0.030000 ( 0.037716)
|
29
|
+
pure ruby : 2.540000 0.000000 2.540000 ( 2.539661)
|
30
|
+
opencl : 0.150000 0.000000 0.150000 ( 0.164203)
|
31
|
+
pure ruby single function: 0.350000 0.000000 0.350000 ( 0.351883)
|
32
|
+
opencl singlefunction: 0.090000 0.010000 0.100000 ( 0.092359)
|
33
|
+
pure ruby pow float: 0.080000 0.000000 0.080000 ( 0.080484)
|
34
|
+
opencl pow float: 0.030000 0.000000 0.030000 ( 0.032691)
|
35
|
+
pure ruby pow int: 0.020000 0.000000 0.020000 ( 0.019487)
|
36
|
+
opencl pow int: 0.020000 0.000000 0.020000 ( 0.026782)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module TensorStream
|
2
|
+
# Defines a TensorStream controlflow op
|
3
|
+
class DynamicStitch < Operation
|
4
|
+
attr_accessor :ops
|
5
|
+
|
6
|
+
def initialize(flow_type, inputs, ops = nil, options = {})
|
7
|
+
setup_initial_state(options)
|
8
|
+
|
9
|
+
@operation = :"flow_#{flow_type}"
|
10
|
+
@inputs = inputs
|
11
|
+
|
12
|
+
@data_type = Tensor.detect_type(inputs[1])
|
13
|
+
@name = [@graph.get_name_scope, options[:name] || set_name].compact.join('/')
|
14
|
+
@ops = ops
|
15
|
+
@shape = TensorShape.new([inputs.size])
|
16
|
+
@graph.add_node(self)
|
17
|
+
end
|
18
|
+
|
19
|
+
def set_data_type(_passed_data_type)
|
20
|
+
:unknown
|
21
|
+
end
|
22
|
+
|
23
|
+
def run
|
24
|
+
eval
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
@@ -79,7 +79,6 @@ module TensorStream
|
|
79
79
|
@ops[op.to_sym] = { options: options, block: block }
|
80
80
|
end
|
81
81
|
else
|
82
|
-
|
83
82
|
@ops[opcode.to_sym] = { options: options, block: block }
|
84
83
|
end
|
85
84
|
end
|
@@ -87,16 +86,24 @@ module TensorStream
|
|
87
86
|
##
|
88
87
|
# gets all supported ops for this Evaluator class
|
89
88
|
def self.ops
|
90
|
-
@ops ||={}
|
89
|
+
@ops ||= {}
|
91
90
|
end
|
92
91
|
|
93
92
|
def invoke(tensor, execution_context)
|
93
|
+
return eval_tensor(tensor, execution_context) unless tensor.is_a?(Operation)
|
94
|
+
|
94
95
|
if self.class.ops.key?(tensor.operation.to_sym)
|
95
96
|
op = self.class.ops[tensor.operation.to_sym]
|
97
|
+
|
96
98
|
op_options = op[:options]
|
97
99
|
resolved_inputs = tensor.inputs.map do |i|
|
98
100
|
next if i.nil?
|
99
|
-
|
101
|
+
|
102
|
+
if i.is_a?(Array)
|
103
|
+
next i.collect { |sub_item| sub_item.is_a?(Tensor) ? invoke(sub_item, execution_context) : sub_item }
|
104
|
+
end
|
105
|
+
|
106
|
+
if !op_options[:noop] && @context[:_cache][:placement][tensor.name] != @context[:_cache][:placement][i.name] # tensor is on another device or evaluator
|
100
107
|
cache_key = "#{tensor.graph.object_id}_#{i.name}:#{object_id}"
|
101
108
|
next @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
|
102
109
|
|
@@ -116,6 +123,28 @@ module TensorStream
|
|
116
123
|
|
117
124
|
protected
|
118
125
|
|
126
|
+
def get_broadcast_gradient_args(input_a, input_b)
|
127
|
+
return [[], []] if input_a == input_b
|
128
|
+
|
129
|
+
input_a_args = []
|
130
|
+
input_b_args = []
|
131
|
+
|
132
|
+
input_a = input_b.size.times.map { |i| i < input_a.size ? input_a[i] : nil }.reverse if input_a.size < input_b.size
|
133
|
+
input_b = input_a.size.times.map { |i| i < input_b.size ? input_b[i] : nil }.reverse if input_a.size > input_b.size
|
134
|
+
|
135
|
+
input_a.reverse.zip(input_b.reverse).each_with_index do |item, index|
|
136
|
+
a, b = item
|
137
|
+
|
138
|
+
if a.nil? || b && (a < b)
|
139
|
+
input_a_args << input_b.size - index - 1
|
140
|
+
elsif b.nil? || a && (a > b)
|
141
|
+
input_b_args << input_a.size - index - 1
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
[input_a_args.reverse, input_b_args.reverse]
|
146
|
+
end
|
147
|
+
|
119
148
|
##
|
120
149
|
# converts from a ruby Buffer object to the evaluator's native buffer format
|
121
150
|
def convert_from_buffer(tensor, result)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
% fname = 'floor_div'
|
3
|
+
% result_t = c_dtype
|
4
|
+
// same dimension add floating point op
|
5
|
+
__kernel void <%= fname%>_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
|
6
|
+
// Get the index of the current element to be processed
|
7
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
8
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
9
|
+
|
10
|
+
C[globalRow * N + globalCol] = (int)(A[globalRow * N + globalCol] / B[globalRow * N + globalCol]);
|
11
|
+
}
|
12
|
+
|
13
|
+
// 1D + Scalar floating point add op
|
14
|
+
__kernel void <%=fname%>_c_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
|
15
|
+
// Get the index of the current element to be processed
|
16
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
17
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
18
|
+
|
19
|
+
if (switch_op == 0) {
|
20
|
+
C[globalRow * N + globalCol] = (int)(A[globalRow * N + globalCol] / B[0]);
|
21
|
+
} else {
|
22
|
+
C[globalRow * N + globalCol] = (int)(B[0] / A[globalRow * N + globalCol]);
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
// 1D + Scalar floating point add op broadcast
|
27
|
+
__kernel void <%= fname%>_b_<%= dtype %>_<%= dtype %>(const int M, const int N, const int M2, const int N2, const int switch_op,__global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
|
28
|
+
// Get the index of the current element to be processed
|
29
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
30
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
31
|
+
|
32
|
+
int b_m_index = globalRow;
|
33
|
+
int b_n_index = globalCol;
|
34
|
+
|
35
|
+
if ( b_m_index >= M2) {
|
36
|
+
b_m_index = b_m_index % M2;
|
37
|
+
};
|
38
|
+
|
39
|
+
if (b_n_index >= N2) {
|
40
|
+
b_n_index = b_n_index % N2;
|
41
|
+
}
|
42
|
+
|
43
|
+
if (switch_op == 0) {
|
44
|
+
C[globalRow * N + globalCol] = (int)(A[globalRow * N + globalCol] / B[b_m_index * N2 + b_n_index]);
|
45
|
+
} else {
|
46
|
+
C[globalRow * N + globalCol] = (int)(B[b_m_index * N2 + b_n_index] / A[globalRow * N + globalCol]);
|
47
|
+
}
|
48
|
+
}
|
@@ -0,0 +1,53 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
// same dimension add floating point op
|
3
|
+
__kernel void squared_difference_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
|
4
|
+
// Get the index of the current element to be processed
|
5
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
6
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
7
|
+
<%= c_dtype %> x = A[globalRow * N + globalCol];
|
8
|
+
<%= c_dtype %> y = B[globalRow * N + globalCol];
|
9
|
+
C[globalRow * N + globalCol] = (x - y) * (x - y);
|
10
|
+
}
|
11
|
+
|
12
|
+
// 1D + Scalar floating point add op
|
13
|
+
__kernel void squared_difference_c_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
|
14
|
+
// Get the index of the current element to be processed
|
15
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
16
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
17
|
+
|
18
|
+
<%= c_dtype %> x = A[globalRow * N + globalCol];
|
19
|
+
<%= c_dtype %> y = B[0];
|
20
|
+
|
21
|
+
if (switch_op == 0) {
|
22
|
+
C[globalRow * N + globalCol] = (x - y) * (x - y);
|
23
|
+
} else {
|
24
|
+
C[globalRow * N + globalCol] = (y - x) * (y - x);
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
// 1D + Scalar floating point add op broadcast
|
29
|
+
__kernel void squared_difference_b_<%= dtype %>_<%= dtype %>(const int M, const int N, const int M2, const int N2, const int switch_op,__global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
|
30
|
+
// Get the index of the current element to be processed
|
31
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
32
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
33
|
+
|
34
|
+
int b_m_index = globalRow;
|
35
|
+
int b_n_index = globalCol;
|
36
|
+
|
37
|
+
if ( b_m_index >= M2) {
|
38
|
+
b_m_index = b_m_index % M2;
|
39
|
+
};
|
40
|
+
|
41
|
+
if (b_n_index >= N2) {
|
42
|
+
b_n_index = b_n_index % N2;
|
43
|
+
}
|
44
|
+
|
45
|
+
<%= c_dtype %> x = A[globalRow * N + globalCol];
|
46
|
+
<%= c_dtype %> y = B[b_m_index * N2 + b_n_index];
|
47
|
+
|
48
|
+
if (switch_op == 0) {
|
49
|
+
C[globalRow * N + globalCol] = (x - y) * (x - y);
|
50
|
+
} else {
|
51
|
+
C[globalRow * N + globalCol] = (y - x) * (y - x);
|
52
|
+
}
|
53
|
+
}
|
@@ -25,8 +25,7 @@ module TensorStream
|
|
25
25
|
op.command_queue.finish
|
26
26
|
self.dirty = false
|
27
27
|
end
|
28
|
-
|
29
|
-
result = buffer.reshape(*shape.reverse).to_a
|
28
|
+
result = buffer.reshape(*shape.map { |s| s.to_i}.reverse).to_a
|
30
29
|
|
31
30
|
if data_type == :boolean
|
32
31
|
result = process_function_op(result, ->(a, _b) { a != 0 })
|
@@ -109,7 +109,9 @@ module TensorStream
|
|
109
109
|
b
|
110
110
|
end
|
111
111
|
else
|
112
|
-
return buffer if buffer.nil?
|
112
|
+
return buffer if buffer.nil?
|
113
|
+
return [] if buffer.buffer.nil?
|
114
|
+
return buffer if buffer.buffer.size.zero?
|
113
115
|
_opencl_queue.enqueue_read_buffer(buffer.cl_buffer, buffer.buffer, event_wait_list: [buffer.op].compact)
|
114
116
|
end
|
115
117
|
_opencl_queue.finish
|
@@ -202,6 +204,7 @@ module TensorStream
|
|
202
204
|
suffix = args.collect { |k,v| "#{k}.#{v}"}.join('.')
|
203
205
|
@context[:_cache]["_opencl_kernel_#{kernel}.#{suffix}:#{object_id}"] ||= begin
|
204
206
|
filename = %w[cl.erb cl].map { |ext| cl_template_path(kernel, ext) }.find { |n| File.exist?(n) }
|
207
|
+
raise "opencl kernel template for #{kernel} has not yet been defined" if filename.nil?
|
205
208
|
source = File.read(filename)
|
206
209
|
source = OpenclTemplateHelper.new(source).generate(args)
|
207
210
|
# File.write("/tmp/#{kernel}.#{suffix}.cl", source)
|
@@ -251,13 +254,13 @@ module TensorStream
|
|
251
254
|
execute_func('log', tensor, inputs[0], context)
|
252
255
|
end
|
253
256
|
|
254
|
-
register_op :cond do |context, tensor, inputs|
|
257
|
+
register_op :cond, noop: true do |context, tensor, inputs|
|
255
258
|
pred = complete_eval(tensor.options[:pred], context)
|
256
259
|
|
257
260
|
if all_true?(pred.buffer)
|
258
|
-
inputs[0]
|
261
|
+
complete_eval(inputs[0], context)
|
259
262
|
else
|
260
|
-
inputs[1]
|
263
|
+
complete_eval(inputs[1], context)
|
261
264
|
end
|
262
265
|
end
|
263
266
|
|
@@ -285,12 +288,20 @@ module TensorStream
|
|
285
288
|
end
|
286
289
|
end
|
287
290
|
|
288
|
-
%i[max add div sub mul pow sigmoid_grad].each do |op|
|
291
|
+
%i[max add div sub mod mul pow sigmoid_grad squared_difference].each do |op|
|
289
292
|
register_op op, noop: true do |context, tensor, inputs|
|
290
293
|
execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], context)
|
291
294
|
end
|
292
295
|
end
|
293
296
|
|
297
|
+
register_op :floor_div, noop: true do |context, tensor, inputs|
|
298
|
+
if fp_type?(tensor.data_type)
|
299
|
+
execute_2_operand_func('floor_div', tensor, inputs[0], inputs[1], context)
|
300
|
+
else
|
301
|
+
execute_2_operand_func('div', tensor, inputs[0], inputs[1], context)
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
294
305
|
register_op :where, noop: true do |context, tensor, inputs|
|
295
306
|
pred = tensor.options[:pred]
|
296
307
|
execute_cond_func('where', tensor, pred, inputs[0], inputs[1], context)
|
@@ -479,11 +490,12 @@ module TensorStream
|
|
479
490
|
end
|
480
491
|
|
481
492
|
register_op :broadcast_gradient_args, buffer: true do |_context, tensor, inputs|
|
482
|
-
|
493
|
+
rx, ry = get_broadcast_gradient_args(inputs[0].buffer.to_a, inputs[1].buffer.to_a)
|
494
|
+
[ wrap_opencl(rx, data_type: :int32, name: "#{tensor.name}"), wrap_opencl(ry, data_type: :int32, name: "#{tensor.name}:1")]
|
483
495
|
end
|
484
496
|
|
485
497
|
register_op :shape do |_context, tensor, inputs|
|
486
|
-
wrap_opencl(inputs[0].shape, name: tensor.name, data_type: tensor.
|
498
|
+
wrap_opencl(inputs[0].shape, name: tensor.name, data_type: tensor.data_type)
|
487
499
|
end
|
488
500
|
|
489
501
|
register_op :reshape, buffer: true do |_context, _tensor, inputs|
|
@@ -504,6 +516,10 @@ module TensorStream
|
|
504
516
|
inputs
|
505
517
|
end
|
506
518
|
|
519
|
+
register_op :size do |_context, tensor, inputs|
|
520
|
+
wrap_opencl(inputs[0].buffer.size, name: tensor.name, data_type: tensor.options[:out_type] || :int32)
|
521
|
+
end
|
522
|
+
|
507
523
|
%i[sum mean].each do |op|
|
508
524
|
register_op op, noop: true do |context, tensor, inputs|
|
509
525
|
reduction(context, tensor, inputs[0], inputs[1], op.to_sym)
|
@@ -534,8 +550,9 @@ module TensorStream
|
|
534
550
|
end
|
535
551
|
|
536
552
|
def eval_operation(tensor, child_context)
|
537
|
-
|
553
|
+
|
538
554
|
cache_key = "#{tensor.graph.object_id}_opencl_#{tensor.name}:#{object_id}"
|
555
|
+
return @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
|
539
556
|
return @context[cache_key] if @context.key?(cache_key)
|
540
557
|
# puts tensor.name
|
541
558
|
invoke(tensor, child_context).tap do |result|
|
@@ -559,8 +576,8 @@ module TensorStream
|
|
559
576
|
value: result
|
560
577
|
}
|
561
578
|
end
|
562
|
-
@context[
|
563
|
-
@context[
|
579
|
+
@context[cache_key] = result
|
580
|
+
@context[:_cache][cache_key] = result if tensor.is_const
|
564
581
|
end
|
565
582
|
rescue EvaluatorExcecutionException => e
|
566
583
|
raise e
|
@@ -628,6 +645,7 @@ module TensorStream
|
|
628
645
|
a, b = auto_type_cast(a, b, name: "#{tensor.name}/cast_#{a.name}_#{b.data_type}")
|
629
646
|
dtype = tensor.data_type
|
630
647
|
result_shape = TensorShape.infer_shape(a.shape, b.shape)
|
648
|
+
return _create_result_buffer(dtype, [0], "out_#{tensor.name}") if result_shape == [0]
|
631
649
|
|
632
650
|
output_buffer = _create_result_buffer(tensor.data_type, result_shape, "out_#{tensor.name}")
|
633
651
|
a, b, prog, switch_operands = select_program(a, b, op_name)
|
@@ -799,8 +817,9 @@ module TensorStream
|
|
799
817
|
end
|
800
818
|
|
801
819
|
def _create_result_buffer(data_type, shape, name)
|
820
|
+
return OpenCLBuffer.new(data_type: data_type, shape: [0], buffer: nil, cl_buffer: nil) if shape == [0]
|
802
821
|
@context[:_cache][:_cl_buffers]["_result_#{name}_#{shape.join('_')}:#{object_id}"] ||= begin
|
803
|
-
size = shape.empty? ? 1 : shape.reduce(:*)
|
822
|
+
size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
|
804
823
|
buffer = allocate_narray_for_type(data_type, size)
|
805
824
|
cl_buffer = _opencl_context.create_buffer(buffer.size * buffer.element_size)
|
806
825
|
OpenCLBuffer.new(data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer)
|
@@ -840,6 +859,17 @@ module TensorStream
|
|
840
859
|
end
|
841
860
|
end
|
842
861
|
|
862
|
+
def _reduced_shape(input_shape, axes)
|
863
|
+
return [] if axes.nil? # reduce to scalar
|
864
|
+
axes = [ axes ] unless axes.is_a?(Array)
|
865
|
+
return input_shape if axes.empty?
|
866
|
+
|
867
|
+
axes.each do |dimen|
|
868
|
+
input_shape[dimen] = 1
|
869
|
+
end
|
870
|
+
input_shape
|
871
|
+
end
|
872
|
+
|
843
873
|
def reduction(child_context, tensor, a, b, func)
|
844
874
|
input = complete_eval(a, child_context)
|
845
875
|
axis = read_final_result(complete_eval(b, child_context))
|
@@ -853,7 +883,8 @@ module TensorStream
|
|
853
883
|
|
854
884
|
if axis.is_a?(Array)
|
855
885
|
axis.map{ |x| rank - x.abs }.sort.reverse.each do |x|
|
856
|
-
|
886
|
+
|
887
|
+
value = value.send(func, x.to_i)
|
857
888
|
end
|
858
889
|
else
|
859
890
|
value = value.send(func, rank - axis.abs)
|
@@ -867,7 +898,7 @@ module TensorStream
|
|
867
898
|
end
|
868
899
|
|
869
900
|
if tensor.options[:keepdims]
|
870
|
-
new_shape =
|
901
|
+
new_shape = _reduced_shape(input.shape.dup, axis)
|
871
902
|
end
|
872
903
|
|
873
904
|
convert_to_opencl(value.flatten, new_shape, data_type: tensor.data_type, name: tensor.name)
|
@@ -948,17 +979,6 @@ module TensorStream
|
|
948
979
|
shape.is_a?(Array) ? shape.size : 0
|
949
980
|
end
|
950
981
|
|
951
|
-
def get_broadcast_gradient_args(input_a, input_b)
|
952
|
-
return [] if get_rank(input_b).zero? && get_rank(input_a).zero?
|
953
|
-
return nil if get_rank(input_b).zero?
|
954
|
-
# ruby scalar
|
955
|
-
if get_rank(input_a).zero?
|
956
|
-
_broadcast_gradient_op(input_b, input_a, 0, true)
|
957
|
-
elsif get_rank(input_a) > 0
|
958
|
-
_broadcast_gradient_op(input_a, input_b, 0)
|
959
|
-
end
|
960
|
-
end
|
961
|
-
|
962
982
|
def concat_array(values, axis)
|
963
983
|
combined_array = values.shift
|
964
984
|
axis = get_rank(combined_array) - 1 if axis == -1
|