tensor_stream 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +32 -1
- data/benchmark/benchmark.rb +7 -6
- data/benchmark_intel.txt +33 -18
- data/benchmark_nvidia.txt +35 -32
- data/lib/tensor_stream/evaluator/base_evaluator.rb +6 -1
- data/lib/tensor_stream/evaluator/opencl/kernels/ceil.cl +8 -0
- data/lib/tensor_stream/evaluator/opencl/kernels/floor.cl +8 -0
- data/lib/tensor_stream/evaluator/opencl/opencl_buffer.rb +7 -0
- data/lib/tensor_stream/evaluator/opencl/opencl_evaluator.rb +36 -35
- data/lib/tensor_stream/evaluator/ruby_evaluator.rb +12 -0
- data/lib/tensor_stream/graph.rb +15 -2
- data/lib/tensor_stream/helpers/op_helper.rb +6 -1
- data/lib/tensor_stream/math_gradients.rb +3 -0
- data/lib/tensor_stream/operation.rb +10 -2
- data/lib/tensor_stream/ops.rb +14 -0
- data/lib/tensor_stream/session.rb +0 -3
- data/lib/tensor_stream/tensor.rb +21 -21
- data/lib/tensor_stream/utils.rb +9 -0
- data/lib/tensor_stream/variable.rb +6 -5
- data/lib/tensor_stream/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f14dd6388d5cdd10827cebde01a9cbca0686b653
|
4
|
+
data.tar.gz: d2ccba35defe6474a21bd75fcb09f8d49ce42e79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 244026aae6ce13d8e932deada3c169b5320df517eb5dd7db5ea8c06c1cdedc9c9829d7f149261602f502c284ffe65ae831845016d9425250b0ad9d7d66fc6a0e
|
7
|
+
data.tar.gz: 91811c88a464604f5ca1e776f86d0342dc316ee016d920d40d8a228e2978f6a275783c5613a97cf21af1ba9256c951ad3db777b66fae20e5d1f8f9659f170301
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
|
5
5
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
6
6
|
|
7
|
+
## [0.5.1] - 2018-06-27
|
8
|
+
### Added
|
9
|
+
- Added support for control_dependencies
|
10
|
+
- [NEW OP] floor, ceil
|
11
|
+
|
12
|
+
### Fixes
|
13
|
+
- fixed variable assignment of value sometimes not working
|
14
|
+
- variable assignment now checks for data types properly
|
15
|
+
|
7
16
|
## [0.5.0] - 2018-06-25
|
8
17
|
### Added
|
9
18
|
- [OpenCL] boolean types now use short by default
|
data/README.md
CHANGED
@@ -214,10 +214,41 @@ sess.run(....) # do stuff
|
|
214
214
|
|
215
215
|
```
|
216
216
|
|
217
|
-
You can manually place operations using ts.device
|
217
|
+
You can manually place operations using ts.device e.g:
|
218
218
|
|
219
219
|
```ruby
|
220
|
+
ts = TensorStream
|
221
|
+
# Creates a graph. place in the first OpenCL CPU device
|
220
222
|
|
223
|
+
a, b = ts.device('/cpu:0') do
|
224
|
+
a = ts.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape: [2, 3], name: 'a')
|
225
|
+
b = ts.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape: [3, 2], name: 'b')
|
226
|
+
[a, b]
|
227
|
+
end
|
228
|
+
|
229
|
+
c = ts.device('/device:GPU:0') do
|
230
|
+
ts.matmul(a, b)
|
231
|
+
end
|
232
|
+
|
233
|
+
# Creates a session with log_device_placement set to True.
|
234
|
+
sess = ts.session(log_device_placement: true)
|
235
|
+
# Runs the op.
|
236
|
+
print(sess.run(c))
|
237
|
+
|
238
|
+
# a : apple:0
|
239
|
+
# b : apple:0
|
240
|
+
# a_1 : apple:0
|
241
|
+
# b_1 : apple:0
|
242
|
+
# matmul:0 : apple:1
|
243
|
+
# [[22.0, 28.0], [49.0, 64.0]] => nil
|
244
|
+
```
|
245
|
+
|
246
|
+
To force the ruby evaluator even with the OpenCL evaluator loaded you can use:
|
247
|
+
|
248
|
+
```ruby
|
249
|
+
ts.device('/ts:ruby:cpu') do
|
250
|
+
# put ops here
|
251
|
+
end
|
221
252
|
```
|
222
253
|
|
223
254
|
Note that the OpenCL evaluator provides speedup if you are using large tensors, tensors that are only using scalars like the linear regression sample will actually be slower.
|
data/benchmark/benchmark.rb
CHANGED
@@ -24,7 +24,10 @@ seed = 5
|
|
24
24
|
tf.set_random_seed(seed)
|
25
25
|
|
26
26
|
SHAPES = [32, 32]
|
27
|
-
|
27
|
+
|
28
|
+
sess = tf.session(:ruby_evaluator)
|
29
|
+
|
30
|
+
a = tf.constant(sess.run(tf.random_uniform(SHAPES)))
|
28
31
|
a_int = tf.constant([
|
29
32
|
[1, 2, 3, 4, 4, 1, 4, 8, 3, 4, 1, 1],
|
30
33
|
[2, 2, 3, 4, 4, 1, 1, 1, 1, 4, 1, 1],
|
@@ -40,11 +43,11 @@ a_int = tf.constant([
|
|
40
43
|
[4, 2, 3, 4, 0, 1, 1, 0, 0, 2, 1, 2],
|
41
44
|
])
|
42
45
|
|
43
|
-
b = tf.random_uniform(SHAPES)
|
46
|
+
b = tf.constant(sess.run(tf.random_uniform(SHAPES)))
|
44
47
|
|
45
|
-
c = tf.random_uniform(SHAPES)
|
48
|
+
c = tf.constant(sess.run(tf.random_uniform(SHAPES)))
|
46
49
|
|
47
|
-
d = tf.random_uniform(SHAPES)
|
50
|
+
d = tf.constant(sess.run(tf.random_uniform(SHAPES)))
|
48
51
|
|
49
52
|
p = tf.placeholder('float')
|
50
53
|
q = tf.placeholder('float')
|
@@ -59,13 +62,11 @@ softmax = tf.nn.softmax(a)
|
|
59
62
|
|
60
63
|
puts TensorStream::Evaluator.default_evaluators
|
61
64
|
|
62
|
-
sess = tf.session(:ruby_evaluator)
|
63
65
|
sess2 = tf.session
|
64
66
|
|
65
67
|
puts `cat /proc/cpuinfo | grep "model name" | head -1`
|
66
68
|
device = TensorStream::Evaluator::OpenclEvaluator.default_device.native_device
|
67
69
|
puts "OpenCL device #{device.platform.to_s} #{device.name}"
|
68
|
-
|
69
70
|
Benchmark.bmbm do |x|
|
70
71
|
x.report("pure ruby ooo matmul :") { 100.times do sess.run(out_of_order) end }
|
71
72
|
x.report("opencl ooo matmul :") { 100.times do sess2.run(out_of_order) end }
|
data/benchmark_intel.txt
CHANGED
@@ -1,21 +1,36 @@
|
|
1
|
-
|
1
|
+
TensorStream::Evaluator::OpenclEvaluator
|
2
|
+
TensorStream::Evaluator::RubyEvaluator
|
3
|
+
model name : Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz
|
4
|
+
OpenCL device Intel Gen OCL Driver Intel(R) HD Graphics Skylake ULT GT2
|
2
5
|
Rehearsal --------------------------------------------------------------
|
3
|
-
pure ruby
|
4
|
-
opencl
|
5
|
-
pure ruby
|
6
|
-
opencl
|
7
|
-
pure ruby
|
8
|
-
opencl
|
9
|
-
pure ruby
|
10
|
-
opencl
|
11
|
-
|
6
|
+
pure ruby ooo matmul : 1.800000 0.000000 1.800000 ( 1.803752)
|
7
|
+
opencl ooo matmul : 0.520000 0.050000 0.570000 ( 0.630992)
|
8
|
+
pure ruby softmax : 0.300000 0.000000 0.300000 ( 0.303185)
|
9
|
+
opencl softmax : 0.180000 0.010000 0.190000 ( 0.200246)
|
10
|
+
pure ruby matmul : 0.860000 0.010000 0.870000 ( 0.869387)
|
11
|
+
opencl matmul : 0.260000 0.020000 0.280000 ( 0.335164)
|
12
|
+
pure ruby : 2.960000 0.020000 2.980000 ( 2.980800)
|
13
|
+
opencl : 1.050000 0.090000 1.140000 ( 1.258354)
|
14
|
+
pure ruby single function: 0.460000 0.000000 0.460000 ( 0.464543)
|
15
|
+
opencl singlefunction: 0.570000 0.020000 0.590000 ( 0.590300)
|
16
|
+
pure ruby pow float: 0.120000 0.000000 0.120000 ( 0.123025)
|
17
|
+
opencl pow float: 0.290000 0.010000 0.300000 ( 0.316175)
|
18
|
+
pure ruby pow int: 0.020000 0.000000 0.020000 ( 0.021570)
|
19
|
+
opencl pow int: 0.180000 0.000000 0.180000 ( 0.194088)
|
20
|
+
----------------------------------------------------- total: 9.800000sec
|
12
21
|
|
13
22
|
user system total real
|
14
|
-
pure ruby
|
15
|
-
opencl
|
16
|
-
pure ruby
|
17
|
-
opencl
|
18
|
-
pure ruby
|
19
|
-
opencl
|
20
|
-
pure ruby
|
21
|
-
opencl
|
23
|
+
pure ruby ooo matmul : 1.860000 0.000000 1.860000 ( 1.866387)
|
24
|
+
opencl ooo matmul : 0.410000 0.040000 0.450000 ( 0.505565)
|
25
|
+
pure ruby softmax : 0.300000 0.000000 0.300000 ( 0.298407)
|
26
|
+
opencl softmax : 0.120000 0.000000 0.120000 ( 0.128033)
|
27
|
+
pure ruby matmul : 0.830000 0.000000 0.830000 ( 0.836471)
|
28
|
+
opencl matmul : 0.240000 0.010000 0.250000 ( 0.269629)
|
29
|
+
pure ruby : 2.950000 0.000000 2.950000 ( 2.947306)
|
30
|
+
opencl : 0.930000 0.100000 1.030000 ( 1.205344)
|
31
|
+
pure ruby single function: 0.650000 0.000000 0.650000 ( 0.642834)
|
32
|
+
opencl singlefunction: 0.840000 0.040000 0.880000 ( 1.097814)
|
33
|
+
pure ruby pow float: 0.140000 0.000000 0.140000 ( 0.140097)
|
34
|
+
opencl pow float: 0.190000 0.010000 0.200000 ( 0.269772)
|
35
|
+
pure ruby pow int: 0.030000 0.000000 0.030000 ( 0.030491)
|
36
|
+
opencl pow int: 0.040000 0.010000 0.050000 ( 0.084335)
|
data/benchmark_nvidia.txt
CHANGED
@@ -1,33 +1,36 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
1
|
+
TensorStream::Evaluator::OpenclEvaluator
|
2
|
+
TensorStream::Evaluator::RubyEvaluator
|
3
|
+
model name : Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz
|
4
|
+
OpenCL device NVIDIA CUDA GeForce GTX 950M
|
5
|
+
Rehearsal --------------------------------------------------------------
|
6
|
+
pure ruby ooo matmul : 1.670000 0.010000 1.680000 ( 1.682059)
|
7
|
+
opencl ooo matmul : 0.100000 0.100000 0.200000 ( 0.220002)
|
8
|
+
pure ruby softmax : 0.380000 0.010000 0.390000 ( 0.377827)
|
9
|
+
opencl softmax : 0.040000 0.000000 0.040000 ( 0.040750)
|
10
|
+
pure ruby matmul : 1.000000 0.010000 1.010000 ( 1.013795)
|
11
|
+
opencl matmul : 0.040000 0.000000 0.040000 ( 0.032285)
|
12
|
+
pure ruby : 3.460000 0.010000 3.470000 ( 3.486048)
|
13
|
+
opencl : 0.320000 0.020000 0.340000 ( 0.326977)
|
14
|
+
pure ruby single function: 0.460000 0.000000 0.460000 ( 0.460433)
|
15
|
+
opencl singlefunction: 0.130000 0.000000 0.130000 ( 0.130273)
|
16
|
+
pure ruby pow float: 0.110000 0.000000 0.110000 ( 0.115466)
|
17
|
+
opencl pow float: 0.040000 0.010000 0.050000 ( 0.030290)
|
18
|
+
pure ruby pow int: 0.020000 0.000000 0.020000 ( 0.023065)
|
19
|
+
opencl pow int: 0.040000 0.010000 0.050000 ( 0.044086)
|
20
|
+
----------------------------------------------------- total: 7.990000sec
|
18
21
|
|
19
|
-
|
20
|
-
pure ruby ooo matmul
|
21
|
-
opencl ooo matmul
|
22
|
-
pure ruby softmax :
|
23
|
-
opencl softmax :
|
24
|
-
pure ruby matmul :
|
25
|
-
opencl matmul :
|
26
|
-
pure ruby :
|
27
|
-
opencl :
|
28
|
-
pure ruby single function:
|
29
|
-
opencl singlefunction:
|
30
|
-
pure ruby pow float:
|
31
|
-
opencl pow float:
|
32
|
-
pure ruby pow int:
|
33
|
-
opencl pow int:
|
22
|
+
user system total real
|
23
|
+
pure ruby ooo matmul : 1.790000 0.000000 1.790000 ( 1.794305)
|
24
|
+
opencl ooo matmul : 0.050000 0.000000 0.050000 ( 0.049030)
|
25
|
+
pure ruby softmax : 0.300000 0.000000 0.300000 ( 0.305664)
|
26
|
+
opencl softmax : 0.030000 0.000000 0.030000 ( 0.021897)
|
27
|
+
pure ruby matmul : 0.810000 0.000000 0.810000 ( 0.805583)
|
28
|
+
opencl matmul : 0.030000 0.000000 0.030000 ( 0.024358)
|
29
|
+
pure ruby : 2.870000 0.010000 2.880000 ( 2.881779)
|
30
|
+
opencl : 0.170000 0.000000 0.170000 ( 0.173036)
|
31
|
+
pure ruby single function: 0.400000 0.000000 0.400000 ( 0.398390)
|
32
|
+
opencl singlefunction: 0.120000 0.000000 0.120000 ( 0.117482)
|
33
|
+
pure ruby pow float: 0.100000 0.000000 0.100000 ( 0.099471)
|
34
|
+
opencl pow float: 0.030000 0.000000 0.030000 ( 0.025039)
|
35
|
+
pure ruby pow int: 0.030000 0.000000 0.030000 ( 0.028251)
|
36
|
+
opencl pow int: 0.040000 0.000000 0.040000 ( 0.031384)
|
@@ -97,8 +97,13 @@ module TensorStream
|
|
97
97
|
resolved_inputs = tensor.inputs.map do |i|
|
98
98
|
next if i.nil?
|
99
99
|
if @context[:_cache][:placement][tensor.name] != @context[:_cache][:placement][i.name] # tensor is on another device or evaluator
|
100
|
+
cache_key = "#{tensor.graph.object_id}_#{i.name}:#{object_id}"
|
101
|
+
next @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
|
102
|
+
|
100
103
|
result = @session.delegate_to_evaluator(i, @context, execution_context)
|
101
|
-
convert_from_buffer(i, result)
|
104
|
+
convert_from_buffer(i, result).tap do |buffer|
|
105
|
+
@context[:_cache][cache_key] = buffer if i.is_const
|
106
|
+
end
|
102
107
|
else
|
103
108
|
prepare_input(i, execution_context, op_options)
|
104
109
|
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
__kernel void ceil_<%= dtype %>(const int M, const int N, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *C) {
|
3
|
+
// Get the index of the current element to be processed
|
4
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
5
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
6
|
+
|
7
|
+
C[globalRow * N + globalCol] = ceil(A[globalRow * N + globalCol]);
|
8
|
+
}
|
@@ -0,0 +1,8 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
__kernel void floor_<%= dtype %>(const int M, const int N, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *C) {
|
3
|
+
// Get the index of the current element to be processed
|
4
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
5
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
6
|
+
|
7
|
+
C[globalRow * N + globalCol] = floor(A[globalRow * N + globalCol]);
|
8
|
+
}
|
@@ -20,7 +20,14 @@ module TensorStream
|
|
20
20
|
return buffer[0]
|
21
21
|
end
|
22
22
|
|
23
|
+
if dirty
|
24
|
+
op.command_queue.enqueue_read_buffer(cl_buffer, buffer, event_wait_list: [op].compact)
|
25
|
+
op.command_queue.finish
|
26
|
+
self.dirty = false
|
27
|
+
end
|
28
|
+
|
23
29
|
result = buffer.reshape(*shape.reverse).to_a
|
30
|
+
|
24
31
|
if data_type == :boolean
|
25
32
|
result = process_function_op(result, ->(a, _b) { a != 0 })
|
26
33
|
end
|
@@ -51,7 +51,7 @@ module TensorStream
|
|
51
51
|
|
52
52
|
def self.fetch_device(query = [])
|
53
53
|
devices = query_devices_with_score
|
54
|
-
platform_devices = devices.select { |d| d[0].platform.to_s.downcase =~ /#{query[0].downcase}/ }
|
54
|
+
platform_devices = devices.select { |d| d[0].platform.to_s.gsub(' ','_').downcase =~ /#{query[0].downcase}/ }
|
55
55
|
opencl_to_device(platform_devices[[query[1].to_i, platform_devices.size - 1].min])
|
56
56
|
end
|
57
57
|
|
@@ -215,9 +215,7 @@ module TensorStream
|
|
215
215
|
|
216
216
|
def _run(tensor, execution_context)
|
217
217
|
return tensor if tensor.is_a?(OpenCLBuffer)
|
218
|
-
if tensor.is_a?(Array) && tensor.size
|
219
|
-
return tensor.map { |t| _run(t, execution_context) }
|
220
|
-
end
|
218
|
+
return tensor.map { |t| _run(t, execution_context) } if tensor.is_a?(Array) && !tensor.size.empty? && tensor[0].is_a?(Tensor)
|
221
219
|
|
222
220
|
tensor = tensor.call if tensor.is_a?(Proc)
|
223
221
|
|
@@ -246,12 +244,11 @@ module TensorStream
|
|
246
244
|
tensor.buffer
|
247
245
|
end
|
248
246
|
|
249
|
-
register_op :
|
250
|
-
execute_func('log', tensor, inputs[0], context)
|
247
|
+
register_op :no_op do |_context, _tensor, _inputs|
|
251
248
|
end
|
252
249
|
|
253
|
-
register_op :
|
254
|
-
execute_func('
|
250
|
+
register_op :log do |context, tensor, inputs|
|
251
|
+
execute_func('log', tensor, inputs[0], context)
|
255
252
|
end
|
256
253
|
|
257
254
|
register_op :cond do |context, tensor, inputs|
|
@@ -347,7 +344,7 @@ module TensorStream
|
|
347
344
|
end
|
348
345
|
end
|
349
346
|
|
350
|
-
%i[sign exp tan cos abs sqrt negate square reciprocal tanh tanh_grad sigmoid log1p round].each do |op|
|
347
|
+
%i[sign exp tan sin cos abs sqrt negate square reciprocal tanh tanh_grad sigmoid log1p round floor ceil].each do |op|
|
351
348
|
register_op op, noop: true do |context, tensor, inputs|
|
352
349
|
execute_func(op.to_s, tensor, inputs[0], context)
|
353
350
|
end
|
@@ -385,30 +382,6 @@ module TensorStream
|
|
385
382
|
output_buffer
|
386
383
|
end
|
387
384
|
|
388
|
-
register_op :truncate do |context, tensor, inputs|
|
389
|
-
a, b = inputs
|
390
|
-
if a.shape.size.zero?
|
391
|
-
a
|
392
|
-
else
|
393
|
-
input_b = read_final_result(b)
|
394
|
-
if a.shape == input_b
|
395
|
-
a
|
396
|
-
else
|
397
|
-
input_a = read_final_result(a)
|
398
|
-
if input_b == []
|
399
|
-
if a.buffer.size == 1
|
400
|
-
a.shape = input_b
|
401
|
-
a
|
402
|
-
else
|
403
|
-
wrap_opencl(a.buffer[0], data_type: a.data_type, name: tensor.name)
|
404
|
-
end
|
405
|
-
else
|
406
|
-
wrap_opencl(truncate(input_a, input_b), data_type: a.data_type, name: tensor.name)
|
407
|
-
end
|
408
|
-
end
|
409
|
-
end
|
410
|
-
end
|
411
|
-
|
412
385
|
register_op :check_numerics, noop: true do |context, tensor, inputs|
|
413
386
|
a = complete_eval(inputs[0], context)
|
414
387
|
name = tensor.options[:name]
|
@@ -433,6 +406,30 @@ module TensorStream
|
|
433
406
|
end
|
434
407
|
end
|
435
408
|
|
409
|
+
register_op :truncate do |_context, tensor, inputs|
|
410
|
+
a, b = inputs
|
411
|
+
if a.shape.size.zero?
|
412
|
+
a
|
413
|
+
else
|
414
|
+
input_b = read_final_result(b)
|
415
|
+
if a.shape == input_b
|
416
|
+
a
|
417
|
+
else
|
418
|
+
input_a = read_final_result(a)
|
419
|
+
if input_b == []
|
420
|
+
if a.buffer.size == 1
|
421
|
+
a.shape = input_b
|
422
|
+
a
|
423
|
+
else
|
424
|
+
wrap_opencl(a.buffer[0], data_type: a.data_type, name: tensor.name)
|
425
|
+
end
|
426
|
+
else
|
427
|
+
wrap_opencl(truncate(input_a, input_b), data_type: a.data_type, name: tensor.name)
|
428
|
+
end
|
429
|
+
end
|
430
|
+
end
|
431
|
+
end
|
432
|
+
|
436
433
|
register_op :print do |context, tensor, inputs|
|
437
434
|
a, b = inputs
|
438
435
|
input_b = complete_eval(b, context)
|
@@ -610,12 +607,16 @@ module TensorStream
|
|
610
607
|
buffer = complete_eval(b, child_context)
|
611
608
|
|
612
609
|
if assign.buffer
|
613
|
-
buffer = type_cast(buffer, assign.data_type, name: "#{tensor.name}/cast_#{tensor.name}_#{tensor.data_type}")
|
610
|
+
# buffer = type_cast(buffer, assign.data_type, name: "#{tensor.name}/cast_#{tensor.name}_#{tensor.data_type}")
|
614
611
|
if assign.buffer.cl_buffer != buffer.cl_buffer
|
615
612
|
assign.buffer.op = _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.buffer.cl_buffer, event_wait_list: [buffer.op, assign.buffer.op])
|
613
|
+
else
|
614
|
+
assign.buffer.op = buffer.op
|
616
615
|
end
|
617
616
|
else
|
618
|
-
|
617
|
+
value = read_final_result(buffer)
|
618
|
+
assign.buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
|
619
|
+
assign.value = value
|
619
620
|
end
|
620
621
|
assign.buffer.dirty = true
|
621
622
|
assign.buffer
|
@@ -96,6 +96,10 @@ module TensorStream
|
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
99
|
+
register_op(:no_op, no_eval: true) do |_context, _tensor, inputs|
|
100
|
+
inputs
|
101
|
+
end
|
102
|
+
|
99
103
|
register_op(:const) do |context, _tensor, inputs|
|
100
104
|
inputs[0]
|
101
105
|
end
|
@@ -232,6 +236,14 @@ module TensorStream
|
|
232
236
|
call_op(:sqrt, inputs[0], context, ->(t, _b) { Math.sqrt(t) })
|
233
237
|
end
|
234
238
|
|
239
|
+
register_op :floor, no_eval: true do |context, _tensor, inputs|
|
240
|
+
call_op(:floor, inputs[0], context, ->(t, _b) { t.floor })
|
241
|
+
end
|
242
|
+
|
243
|
+
register_op :ceil, no_eval: true do |context, _tensor, inputs|
|
244
|
+
call_op(:ceil, inputs[0], context, ->(t, _b) { t.ceil })
|
245
|
+
end
|
246
|
+
|
235
247
|
register_op :square, no_eval: true do |context, tensor, inputs|
|
236
248
|
call_op(:square, inputs[0], context, ->(t, _b) { t * t })
|
237
249
|
end
|
data/lib/tensor_stream/graph.rb
CHANGED
@@ -120,8 +120,15 @@ module TensorStream
|
|
120
120
|
add_node(node)
|
121
121
|
end
|
122
122
|
|
123
|
-
def control_dependencies(
|
124
|
-
|
123
|
+
def control_dependencies(control_inputs = [], &block)
|
124
|
+
Thread.current["ts_graph_#{object_id}"] ||= {}
|
125
|
+
Thread.current["ts_graph_#{object_id}"][:control_dependencies] ||= []
|
126
|
+
Thread.current["ts_graph_#{object_id}"][:control_dependencies] << Operation.new(:no_op, *control_inputs)
|
127
|
+
begin
|
128
|
+
block.call
|
129
|
+
ensure
|
130
|
+
Thread.current["ts_graph_#{object_id}"][:control_dependencies].pop
|
131
|
+
end
|
125
132
|
end
|
126
133
|
|
127
134
|
def enable_eager_execution
|
@@ -178,6 +185,12 @@ module TensorStream
|
|
178
185
|
graph_thread_storage[:current_scope].join('/')
|
179
186
|
end
|
180
187
|
|
188
|
+
def get_dependency_scope
|
189
|
+
graph_thread_storage = Thread.current["ts_graph_#{object_id}"]
|
190
|
+
return nil if graph_thread_storage.nil? || graph_thread_storage[:control_dependencies].nil?
|
191
|
+
graph_thread_storage[:control_dependencies].last
|
192
|
+
end
|
193
|
+
|
181
194
|
def get_device_scope
|
182
195
|
graph_thread_storage = Thread.current["ts_graph_#{object_id}"]
|
183
196
|
return :default if graph_thread_storage.nil? || graph_thread_storage[:default_device].nil?
|
@@ -2,7 +2,12 @@ module TensorStream
|
|
2
2
|
# module that contains helper functions useful for ops
|
3
3
|
module OpHelper
|
4
4
|
def _op(code, t_a, t_b = nil, options = {})
|
5
|
-
Operation.new(code.to_sym, t_a, t_b, options)
|
5
|
+
op = Operation.new(code.to_sym, t_a, t_b, options)
|
6
|
+
if !TensorStream.get_default_graph.get_dependency_scope.nil?
|
7
|
+
i_op(:identity, op, TensorStream.get_default_graph.get_dependency_scope, name: [op.name, 'tuple', 'control_dependency'].join('/'))
|
8
|
+
else
|
9
|
+
op
|
10
|
+
end
|
6
11
|
end
|
7
12
|
|
8
13
|
# same as op but with a marker that it was internal generated
|
@@ -4,7 +4,15 @@ module TensorStream
|
|
4
4
|
attr_accessor :name, :operation, :inputs, :rank, :options
|
5
5
|
attr_reader :outputs
|
6
6
|
|
7
|
-
def initialize(operation,
|
7
|
+
def initialize(operation, *args)
|
8
|
+
options = if args.last.is_a?(Hash)
|
9
|
+
args.pop
|
10
|
+
else
|
11
|
+
{}
|
12
|
+
end
|
13
|
+
|
14
|
+
inputs = args
|
15
|
+
|
8
16
|
setup_initial_state(options)
|
9
17
|
|
10
18
|
@operation = operation
|
@@ -15,7 +23,7 @@ module TensorStream
|
|
15
23
|
|
16
24
|
@options = options
|
17
25
|
|
18
|
-
@inputs =
|
26
|
+
@inputs = inputs.map { |i| options[:preserve_params_type] ? i : TensorStream.convert_to_tensor(i) }
|
19
27
|
@data_type = set_data_type(options[:data_type])
|
20
28
|
@is_const = infer_const
|
21
29
|
@shape = TensorShape.new(infer_shape)
|
data/lib/tensor_stream/ops.rb
CHANGED
@@ -146,6 +146,20 @@ module TensorStream
|
|
146
146
|
_op(:ones, shape, nil, data_type: dtype, name: name)
|
147
147
|
end
|
148
148
|
|
149
|
+
##
|
150
|
+
# Returns element-wise largest integer not greater than x.
|
151
|
+
def floor(input, name: nil)
|
152
|
+
check_allowed_types(input, FLOATING_POINT_TYPES)
|
153
|
+
_op(:floor, input, name: name)
|
154
|
+
end
|
155
|
+
|
156
|
+
##
|
157
|
+
# Returns element-wise smallest integer in not less than x
|
158
|
+
def ceil(input, name: nil)
|
159
|
+
check_allowed_types(input, FLOATING_POINT_TYPES)
|
160
|
+
_op(:ceil, input, name: name)
|
161
|
+
end
|
162
|
+
|
149
163
|
##
|
150
164
|
# Returns the truth value of (x < y) element-wise.
|
151
165
|
# This operation supports broadcasting
|
data/lib/tensor_stream/tensor.rb
CHANGED
@@ -55,80 +55,80 @@ module TensorStream
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def +(other)
|
58
|
-
_a,
|
59
|
-
|
58
|
+
_a, other = TensorStream.check_data_types(self, other)
|
59
|
+
_op(:add, self, other)
|
60
60
|
end
|
61
61
|
|
62
62
|
def [](index)
|
63
|
-
|
63
|
+
_op(:index, self, index)
|
64
64
|
end
|
65
65
|
|
66
66
|
def *(other)
|
67
|
-
TensorStream.check_data_types(self, other)
|
68
|
-
|
67
|
+
_a, other = TensorStream.check_data_types(self, other)
|
68
|
+
_op(:mul, self, TensorStream.convert_to_tensor(other, dtype: data_type))
|
69
69
|
end
|
70
70
|
|
71
71
|
def **(other)
|
72
|
-
TensorStream.check_data_types(self, other)
|
73
|
-
|
72
|
+
_a, other = TensorStream.check_data_types(self, other)
|
73
|
+
_op(:pow, self, TensorStream.convert_to_tensor(other, dtype: data_type))
|
74
74
|
end
|
75
75
|
|
76
76
|
def /(other)
|
77
|
-
TensorStream.check_data_types(self, other)
|
78
|
-
|
77
|
+
_a, other = TensorStream.check_data_types(self, other)
|
78
|
+
_op(:div, self, TensorStream.convert_to_tensor(other, dtype: data_type))
|
79
79
|
end
|
80
80
|
|
81
81
|
def -(other)
|
82
|
-
TensorStream.check_data_types(self, other)
|
83
|
-
|
82
|
+
_a, other = TensorStream.check_data_types(self, other)
|
83
|
+
_op(:sub, self, TensorStream.convert_to_tensor(other, dtype: data_type))
|
84
84
|
end
|
85
85
|
|
86
86
|
def -@
|
87
|
-
|
87
|
+
_op(:negate, self, nil)
|
88
88
|
end
|
89
89
|
|
90
90
|
def ==(other)
|
91
|
-
TensorStream.check_data_types(self, other)
|
91
|
+
_a, other = TensorStream.check_data_types(self, other)
|
92
92
|
_op(:equal, self, other)
|
93
93
|
end
|
94
94
|
|
95
95
|
def <(other)
|
96
|
-
TensorStream.check_data_types(self, other)
|
96
|
+
_a, other = TensorStream.check_data_types(self, other)
|
97
97
|
_op(:less, self, other)
|
98
98
|
end
|
99
99
|
|
100
100
|
def !=(other)
|
101
|
-
TensorStream.check_data_types(self, other)
|
101
|
+
_a, other = TensorStream.check_data_types(self, other)
|
102
102
|
_op(:not_equal, self, other)
|
103
103
|
end
|
104
104
|
|
105
105
|
def >(other)
|
106
|
-
TensorStream.check_data_types(self, other)
|
106
|
+
_a, other = TensorStream.check_data_types(self, other)
|
107
107
|
_op(:greater, self, other)
|
108
108
|
end
|
109
109
|
|
110
110
|
def >=(other)
|
111
|
-
TensorStream.check_data_types(self, other)
|
111
|
+
_a, other = TensorStream.check_data_types(self, other)
|
112
112
|
_op(:greater_equal, self, other)
|
113
113
|
end
|
114
114
|
|
115
115
|
def <=(other)
|
116
|
-
TensorStream.check_data_types(self, other)
|
116
|
+
_a, other = TensorStream.check_data_types(self, other)
|
117
117
|
_op(:less_equal, self, other)
|
118
118
|
end
|
119
119
|
|
120
120
|
def and(other)
|
121
|
-
TensorStream.check_data_types(self, other)
|
121
|
+
_a, other = TensorStream.check_data_types(self, other)
|
122
122
|
_op(:logical_and, self, other)
|
123
123
|
end
|
124
124
|
|
125
125
|
def matmul(other)
|
126
|
-
TensorStream.check_data_types(self, other)
|
126
|
+
_a, other = TensorStream.check_data_types(self, other)
|
127
127
|
_op(:matmul, self, other)
|
128
128
|
end
|
129
129
|
|
130
130
|
def dot(other)
|
131
|
-
TensorStream.check_data_types(self, other)
|
131
|
+
_a, other = TensorStream.check_data_types(self, other)
|
132
132
|
_op(:matmul, self, other)
|
133
133
|
end
|
134
134
|
|
data/lib/tensor_stream/utils.rb
CHANGED
@@ -149,6 +149,11 @@ module TensorStream
|
|
149
149
|
Graph.get_default_graph.get_collection(name, options)
|
150
150
|
end
|
151
151
|
|
152
|
+
def assign(ref, value, name: nil)
|
153
|
+
raise "#{ref.name} not a variable" unless ref.is_a?(Variable)
|
154
|
+
ref.assign(value, name: name)
|
155
|
+
end
|
156
|
+
|
152
157
|
def placeholder(dtype, shape: nil, name: nil)
|
153
158
|
TensorStream::Placeholder.new(dtype, nil, shape, name: name)
|
154
159
|
end
|
@@ -169,6 +174,10 @@ module TensorStream
|
|
169
174
|
TensorStream.get_default_graph.random_seed = seed
|
170
175
|
end
|
171
176
|
|
177
|
+
def control_dependencies(control_inputs, &block)
|
178
|
+
TensorStream.get_default_graph.control_dependencies(control_inputs, &block)
|
179
|
+
end
|
180
|
+
|
172
181
|
def convert_to_tensor(value, dtype: nil, name: nil, preferred_dtype: nil)
|
173
182
|
return convert_to_tensor(value.call) if value.is_a?(Proc)
|
174
183
|
|
@@ -32,21 +32,21 @@ module TensorStream
|
|
32
32
|
assign(init_op)
|
33
33
|
end
|
34
34
|
|
35
|
-
def assign(value)
|
36
|
-
|
35
|
+
def assign(value, name: nil)
|
36
|
+
_a, value = TensorStream.check_data_types(self, value)
|
37
|
+
Operation.new(:assign, self, value, name: name)
|
37
38
|
end
|
38
39
|
|
39
40
|
def read_value
|
40
|
-
if buffer
|
41
|
+
if buffer
|
41
42
|
@value = buffer.to_ruby
|
42
|
-
buffer.dirty = false
|
43
43
|
end
|
44
44
|
|
45
45
|
@value
|
46
46
|
end
|
47
47
|
|
48
48
|
def assign_add(value)
|
49
|
-
value =
|
49
|
+
_a, value = TensorStream.check_data_types(self, value)
|
50
50
|
Operation.new(:assign_add, self, value, data_type: data_type)
|
51
51
|
end
|
52
52
|
|
@@ -55,6 +55,7 @@ module TensorStream
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def assign_sub(value)
|
58
|
+
_a, value = TensorStream.check_data_types(self, value)
|
58
59
|
Operation.new(:assign_sub, self, value)
|
59
60
|
end
|
60
61
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tensor_stream
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Emmanuel Dayo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -248,10 +248,12 @@ files:
|
|
248
248
|
- lib/tensor_stream/evaluator/opencl/kernels/argmax.cl
|
249
249
|
- lib/tensor_stream/evaluator/opencl/kernels/argmin.cl
|
250
250
|
- lib/tensor_stream/evaluator/opencl/kernels/cast.cl
|
251
|
+
- lib/tensor_stream/evaluator/opencl/kernels/ceil.cl
|
251
252
|
- lib/tensor_stream/evaluator/opencl/kernels/cond.cl.erb
|
252
253
|
- lib/tensor_stream/evaluator/opencl/kernels/cos.cl
|
253
254
|
- lib/tensor_stream/evaluator/opencl/kernels/div.cl.erb
|
254
255
|
- lib/tensor_stream/evaluator/opencl/kernels/exp.cl
|
256
|
+
- lib/tensor_stream/evaluator/opencl/kernels/floor.cl
|
255
257
|
- lib/tensor_stream/evaluator/opencl/kernels/gemm.cl
|
256
258
|
- lib/tensor_stream/evaluator/opencl/kernels/log.cl
|
257
259
|
- lib/tensor_stream/evaluator/opencl/kernels/log1p.cl
|
@@ -340,7 +342,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
340
342
|
version: '0'
|
341
343
|
requirements: []
|
342
344
|
rubyforge_project:
|
343
|
-
rubygems_version: 2.6.
|
345
|
+
rubygems_version: 2.6.11
|
344
346
|
signing_key:
|
345
347
|
specification_version: 4
|
346
348
|
summary: A Pure ruby tensorflow implementation
|