tensor_stream 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +32 -1
- data/benchmark/benchmark.rb +7 -6
- data/benchmark_intel.txt +33 -18
- data/benchmark_nvidia.txt +35 -32
- data/lib/tensor_stream/evaluator/base_evaluator.rb +6 -1
- data/lib/tensor_stream/evaluator/opencl/kernels/ceil.cl +8 -0
- data/lib/tensor_stream/evaluator/opencl/kernels/floor.cl +8 -0
- data/lib/tensor_stream/evaluator/opencl/opencl_buffer.rb +7 -0
- data/lib/tensor_stream/evaluator/opencl/opencl_evaluator.rb +36 -35
- data/lib/tensor_stream/evaluator/ruby_evaluator.rb +12 -0
- data/lib/tensor_stream/graph.rb +15 -2
- data/lib/tensor_stream/helpers/op_helper.rb +6 -1
- data/lib/tensor_stream/math_gradients.rb +3 -0
- data/lib/tensor_stream/operation.rb +10 -2
- data/lib/tensor_stream/ops.rb +14 -0
- data/lib/tensor_stream/session.rb +0 -3
- data/lib/tensor_stream/tensor.rb +21 -21
- data/lib/tensor_stream/utils.rb +9 -0
- data/lib/tensor_stream/variable.rb +6 -5
- data/lib/tensor_stream/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f14dd6388d5cdd10827cebde01a9cbca0686b653
|
4
|
+
data.tar.gz: d2ccba35defe6474a21bd75fcb09f8d49ce42e79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 244026aae6ce13d8e932deada3c169b5320df517eb5dd7db5ea8c06c1cdedc9c9829d7f149261602f502c284ffe65ae831845016d9425250b0ad9d7d66fc6a0e
|
7
|
+
data.tar.gz: 91811c88a464604f5ca1e776f86d0342dc316ee016d920d40d8a228e2978f6a275783c5613a97cf21af1ba9256c951ad3db777b66fae20e5d1f8f9659f170301
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
|
5
5
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
6
6
|
|
7
|
+
## [0.5.1] - 2018-06-27
|
8
|
+
### Added
|
9
|
+
- Added support for control_dependencies
|
10
|
+
- [NEW OP] floor, ceil
|
11
|
+
|
12
|
+
### Fixes
|
13
|
+
- fixed variable assignment of value sometimes not working
|
14
|
+
- variable assignment now checks for data types properly
|
15
|
+
|
7
16
|
## [0.5.0] - 2018-06-25
|
8
17
|
### Added
|
9
18
|
- [OpenCL] boolean types now use short by default
|
data/README.md
CHANGED
@@ -214,10 +214,41 @@ sess.run(....) # do stuff
|
|
214
214
|
|
215
215
|
```
|
216
216
|
|
217
|
-
You can manually place operations using ts.device
|
217
|
+
You can manually place operations using ts.device e.g:
|
218
218
|
|
219
219
|
```ruby
|
220
|
+
ts = TensorStream
|
221
|
+
# Creates a graph. place in the first OpenCL CPU device
|
220
222
|
|
223
|
+
a, b = ts.device('/cpu:0') do
|
224
|
+
a = ts.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape: [2, 3], name: 'a')
|
225
|
+
b = ts.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape: [3, 2], name: 'b')
|
226
|
+
[a, b]
|
227
|
+
end
|
228
|
+
|
229
|
+
c = ts.device('/device:GPU:0') do
|
230
|
+
ts.matmul(a, b)
|
231
|
+
end
|
232
|
+
|
233
|
+
# Creates a session with log_device_placement set to True.
|
234
|
+
sess = ts.session(log_device_placement: true)
|
235
|
+
# Runs the op.
|
236
|
+
print(sess.run(c))
|
237
|
+
|
238
|
+
# a : apple:0
|
239
|
+
# b : apple:0
|
240
|
+
# a_1 : apple:0
|
241
|
+
# b_1 : apple:0
|
242
|
+
# matmul:0 : apple:1
|
243
|
+
# [[22.0, 28.0], [49.0, 64.0]] => nil
|
244
|
+
```
|
245
|
+
|
246
|
+
To force the ruby evaluator even with the OpenCL evaluator loaded you can use:
|
247
|
+
|
248
|
+
```ruby
|
249
|
+
ts.device('/ts:ruby:cpu') do
|
250
|
+
# put ops here
|
251
|
+
end
|
221
252
|
```
|
222
253
|
|
223
254
|
Note that the OpenCL evaluator provides speedup if you are using large tensors, tensors that are only using scalars like the linear regression sample will actually be slower.
|
data/benchmark/benchmark.rb
CHANGED
@@ -24,7 +24,10 @@ seed = 5
|
|
24
24
|
tf.set_random_seed(seed)
|
25
25
|
|
26
26
|
SHAPES = [32, 32]
|
27
|
-
|
27
|
+
|
28
|
+
sess = tf.session(:ruby_evaluator)
|
29
|
+
|
30
|
+
a = tf.constant(sess.run(tf.random_uniform(SHAPES)))
|
28
31
|
a_int = tf.constant([
|
29
32
|
[1, 2, 3, 4, 4, 1, 4, 8, 3, 4, 1, 1],
|
30
33
|
[2, 2, 3, 4, 4, 1, 1, 1, 1, 4, 1, 1],
|
@@ -40,11 +43,11 @@ a_int = tf.constant([
|
|
40
43
|
[4, 2, 3, 4, 0, 1, 1, 0, 0, 2, 1, 2],
|
41
44
|
])
|
42
45
|
|
43
|
-
b = tf.random_uniform(SHAPES)
|
46
|
+
b = tf.constant(sess.run(tf.random_uniform(SHAPES)))
|
44
47
|
|
45
|
-
c = tf.random_uniform(SHAPES)
|
48
|
+
c = tf.constant(sess.run(tf.random_uniform(SHAPES)))
|
46
49
|
|
47
|
-
d = tf.random_uniform(SHAPES)
|
50
|
+
d = tf.constant(sess.run(tf.random_uniform(SHAPES)))
|
48
51
|
|
49
52
|
p = tf.placeholder('float')
|
50
53
|
q = tf.placeholder('float')
|
@@ -59,13 +62,11 @@ softmax = tf.nn.softmax(a)
|
|
59
62
|
|
60
63
|
puts TensorStream::Evaluator.default_evaluators
|
61
64
|
|
62
|
-
sess = tf.session(:ruby_evaluator)
|
63
65
|
sess2 = tf.session
|
64
66
|
|
65
67
|
puts `cat /proc/cpuinfo | grep "model name" | head -1`
|
66
68
|
device = TensorStream::Evaluator::OpenclEvaluator.default_device.native_device
|
67
69
|
puts "OpenCL device #{device.platform.to_s} #{device.name}"
|
68
|
-
|
69
70
|
Benchmark.bmbm do |x|
|
70
71
|
x.report("pure ruby ooo matmul :") { 100.times do sess.run(out_of_order) end }
|
71
72
|
x.report("opencl ooo matmul :") { 100.times do sess2.run(out_of_order) end }
|
data/benchmark_intel.txt
CHANGED
@@ -1,21 +1,36 @@
|
|
1
|
-
|
1
|
+
TensorStream::Evaluator::OpenclEvaluator
|
2
|
+
TensorStream::Evaluator::RubyEvaluator
|
3
|
+
model name : Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz
|
4
|
+
OpenCL device Intel Gen OCL Driver Intel(R) HD Graphics Skylake ULT GT2
|
2
5
|
Rehearsal --------------------------------------------------------------
|
3
|
-
pure ruby
|
4
|
-
opencl
|
5
|
-
pure ruby
|
6
|
-
opencl
|
7
|
-
pure ruby
|
8
|
-
opencl
|
9
|
-
pure ruby
|
10
|
-
opencl
|
11
|
-
|
6
|
+
pure ruby ooo matmul : 1.800000 0.000000 1.800000 ( 1.803752)
|
7
|
+
opencl ooo matmul : 0.520000 0.050000 0.570000 ( 0.630992)
|
8
|
+
pure ruby softmax : 0.300000 0.000000 0.300000 ( 0.303185)
|
9
|
+
opencl softmax : 0.180000 0.010000 0.190000 ( 0.200246)
|
10
|
+
pure ruby matmul : 0.860000 0.010000 0.870000 ( 0.869387)
|
11
|
+
opencl matmul : 0.260000 0.020000 0.280000 ( 0.335164)
|
12
|
+
pure ruby : 2.960000 0.020000 2.980000 ( 2.980800)
|
13
|
+
opencl : 1.050000 0.090000 1.140000 ( 1.258354)
|
14
|
+
pure ruby single function: 0.460000 0.000000 0.460000 ( 0.464543)
|
15
|
+
opencl singlefunction: 0.570000 0.020000 0.590000 ( 0.590300)
|
16
|
+
pure ruby pow float: 0.120000 0.000000 0.120000 ( 0.123025)
|
17
|
+
opencl pow float: 0.290000 0.010000 0.300000 ( 0.316175)
|
18
|
+
pure ruby pow int: 0.020000 0.000000 0.020000 ( 0.021570)
|
19
|
+
opencl pow int: 0.180000 0.000000 0.180000 ( 0.194088)
|
20
|
+
----------------------------------------------------- total: 9.800000sec
|
12
21
|
|
13
22
|
user system total real
|
14
|
-
pure ruby
|
15
|
-
opencl
|
16
|
-
pure ruby
|
17
|
-
opencl
|
18
|
-
pure ruby
|
19
|
-
opencl
|
20
|
-
pure ruby
|
21
|
-
opencl
|
23
|
+
pure ruby ooo matmul : 1.860000 0.000000 1.860000 ( 1.866387)
|
24
|
+
opencl ooo matmul : 0.410000 0.040000 0.450000 ( 0.505565)
|
25
|
+
pure ruby softmax : 0.300000 0.000000 0.300000 ( 0.298407)
|
26
|
+
opencl softmax : 0.120000 0.000000 0.120000 ( 0.128033)
|
27
|
+
pure ruby matmul : 0.830000 0.000000 0.830000 ( 0.836471)
|
28
|
+
opencl matmul : 0.240000 0.010000 0.250000 ( 0.269629)
|
29
|
+
pure ruby : 2.950000 0.000000 2.950000 ( 2.947306)
|
30
|
+
opencl : 0.930000 0.100000 1.030000 ( 1.205344)
|
31
|
+
pure ruby single function: 0.650000 0.000000 0.650000 ( 0.642834)
|
32
|
+
opencl singlefunction: 0.840000 0.040000 0.880000 ( 1.097814)
|
33
|
+
pure ruby pow float: 0.140000 0.000000 0.140000 ( 0.140097)
|
34
|
+
opencl pow float: 0.190000 0.010000 0.200000 ( 0.269772)
|
35
|
+
pure ruby pow int: 0.030000 0.000000 0.030000 ( 0.030491)
|
36
|
+
opencl pow int: 0.040000 0.010000 0.050000 ( 0.084335)
|
data/benchmark_nvidia.txt
CHANGED
@@ -1,33 +1,36 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
1
|
+
TensorStream::Evaluator::OpenclEvaluator
|
2
|
+
TensorStream::Evaluator::RubyEvaluator
|
3
|
+
model name : Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz
|
4
|
+
OpenCL device NVIDIA CUDA GeForce GTX 950M
|
5
|
+
Rehearsal --------------------------------------------------------------
|
6
|
+
pure ruby ooo matmul : 1.670000 0.010000 1.680000 ( 1.682059)
|
7
|
+
opencl ooo matmul : 0.100000 0.100000 0.200000 ( 0.220002)
|
8
|
+
pure ruby softmax : 0.380000 0.010000 0.390000 ( 0.377827)
|
9
|
+
opencl softmax : 0.040000 0.000000 0.040000 ( 0.040750)
|
10
|
+
pure ruby matmul : 1.000000 0.010000 1.010000 ( 1.013795)
|
11
|
+
opencl matmul : 0.040000 0.000000 0.040000 ( 0.032285)
|
12
|
+
pure ruby : 3.460000 0.010000 3.470000 ( 3.486048)
|
13
|
+
opencl : 0.320000 0.020000 0.340000 ( 0.326977)
|
14
|
+
pure ruby single function: 0.460000 0.000000 0.460000 ( 0.460433)
|
15
|
+
opencl singlefunction: 0.130000 0.000000 0.130000 ( 0.130273)
|
16
|
+
pure ruby pow float: 0.110000 0.000000 0.110000 ( 0.115466)
|
17
|
+
opencl pow float: 0.040000 0.010000 0.050000 ( 0.030290)
|
18
|
+
pure ruby pow int: 0.020000 0.000000 0.020000 ( 0.023065)
|
19
|
+
opencl pow int: 0.040000 0.010000 0.050000 ( 0.044086)
|
20
|
+
----------------------------------------------------- total: 7.990000sec
|
18
21
|
|
19
|
-
|
20
|
-
pure ruby ooo matmul
|
21
|
-
opencl ooo matmul
|
22
|
-
pure ruby softmax :
|
23
|
-
opencl softmax :
|
24
|
-
pure ruby matmul :
|
25
|
-
opencl matmul :
|
26
|
-
pure ruby :
|
27
|
-
opencl :
|
28
|
-
pure ruby single function:
|
29
|
-
opencl singlefunction:
|
30
|
-
pure ruby pow float:
|
31
|
-
opencl pow float:
|
32
|
-
pure ruby pow int:
|
33
|
-
opencl pow int:
|
22
|
+
user system total real
|
23
|
+
pure ruby ooo matmul : 1.790000 0.000000 1.790000 ( 1.794305)
|
24
|
+
opencl ooo matmul : 0.050000 0.000000 0.050000 ( 0.049030)
|
25
|
+
pure ruby softmax : 0.300000 0.000000 0.300000 ( 0.305664)
|
26
|
+
opencl softmax : 0.030000 0.000000 0.030000 ( 0.021897)
|
27
|
+
pure ruby matmul : 0.810000 0.000000 0.810000 ( 0.805583)
|
28
|
+
opencl matmul : 0.030000 0.000000 0.030000 ( 0.024358)
|
29
|
+
pure ruby : 2.870000 0.010000 2.880000 ( 2.881779)
|
30
|
+
opencl : 0.170000 0.000000 0.170000 ( 0.173036)
|
31
|
+
pure ruby single function: 0.400000 0.000000 0.400000 ( 0.398390)
|
32
|
+
opencl singlefunction: 0.120000 0.000000 0.120000 ( 0.117482)
|
33
|
+
pure ruby pow float: 0.100000 0.000000 0.100000 ( 0.099471)
|
34
|
+
opencl pow float: 0.030000 0.000000 0.030000 ( 0.025039)
|
35
|
+
pure ruby pow int: 0.030000 0.000000 0.030000 ( 0.028251)
|
36
|
+
opencl pow int: 0.040000 0.000000 0.040000 ( 0.031384)
|
@@ -97,8 +97,13 @@ module TensorStream
|
|
97
97
|
resolved_inputs = tensor.inputs.map do |i|
|
98
98
|
next if i.nil?
|
99
99
|
if @context[:_cache][:placement][tensor.name] != @context[:_cache][:placement][i.name] # tensor is on another device or evaluator
|
100
|
+
cache_key = "#{tensor.graph.object_id}_#{i.name}:#{object_id}"
|
101
|
+
next @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
|
102
|
+
|
100
103
|
result = @session.delegate_to_evaluator(i, @context, execution_context)
|
101
|
-
convert_from_buffer(i, result)
|
104
|
+
convert_from_buffer(i, result).tap do |buffer|
|
105
|
+
@context[:_cache][cache_key] = buffer if i.is_const
|
106
|
+
end
|
102
107
|
else
|
103
108
|
prepare_input(i, execution_context, op_options)
|
104
109
|
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
__kernel void ceil_<%= dtype %>(const int M, const int N, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *C) {
|
3
|
+
// Get the index of the current element to be processed
|
4
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
5
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
6
|
+
|
7
|
+
C[globalRow * N + globalCol] = ceil(A[globalRow * N + globalCol]);
|
8
|
+
}
|
@@ -0,0 +1,8 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
__kernel void floor_<%= dtype %>(const int M, const int N, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *C) {
|
3
|
+
// Get the index of the current element to be processed
|
4
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
5
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
6
|
+
|
7
|
+
C[globalRow * N + globalCol] = floor(A[globalRow * N + globalCol]);
|
8
|
+
}
|
@@ -20,7 +20,14 @@ module TensorStream
|
|
20
20
|
return buffer[0]
|
21
21
|
end
|
22
22
|
|
23
|
+
if dirty
|
24
|
+
op.command_queue.enqueue_read_buffer(cl_buffer, buffer, event_wait_list: [op].compact)
|
25
|
+
op.command_queue.finish
|
26
|
+
self.dirty = false
|
27
|
+
end
|
28
|
+
|
23
29
|
result = buffer.reshape(*shape.reverse).to_a
|
30
|
+
|
24
31
|
if data_type == :boolean
|
25
32
|
result = process_function_op(result, ->(a, _b) { a != 0 })
|
26
33
|
end
|
@@ -51,7 +51,7 @@ module TensorStream
|
|
51
51
|
|
52
52
|
def self.fetch_device(query = [])
|
53
53
|
devices = query_devices_with_score
|
54
|
-
platform_devices = devices.select { |d| d[0].platform.to_s.downcase =~ /#{query[0].downcase}/ }
|
54
|
+
platform_devices = devices.select { |d| d[0].platform.to_s.gsub(' ','_').downcase =~ /#{query[0].downcase}/ }
|
55
55
|
opencl_to_device(platform_devices[[query[1].to_i, platform_devices.size - 1].min])
|
56
56
|
end
|
57
57
|
|
@@ -215,9 +215,7 @@ module TensorStream
|
|
215
215
|
|
216
216
|
def _run(tensor, execution_context)
|
217
217
|
return tensor if tensor.is_a?(OpenCLBuffer)
|
218
|
-
if tensor.is_a?(Array) && tensor.size
|
219
|
-
return tensor.map { |t| _run(t, execution_context) }
|
220
|
-
end
|
218
|
+
return tensor.map { |t| _run(t, execution_context) } if tensor.is_a?(Array) && !tensor.size.empty? && tensor[0].is_a?(Tensor)
|
221
219
|
|
222
220
|
tensor = tensor.call if tensor.is_a?(Proc)
|
223
221
|
|
@@ -246,12 +244,11 @@ module TensorStream
|
|
246
244
|
tensor.buffer
|
247
245
|
end
|
248
246
|
|
249
|
-
register_op :
|
250
|
-
execute_func('log', tensor, inputs[0], context)
|
247
|
+
register_op :no_op do |_context, _tensor, _inputs|
|
251
248
|
end
|
252
249
|
|
253
|
-
register_op :
|
254
|
-
execute_func('
|
250
|
+
register_op :log do |context, tensor, inputs|
|
251
|
+
execute_func('log', tensor, inputs[0], context)
|
255
252
|
end
|
256
253
|
|
257
254
|
register_op :cond do |context, tensor, inputs|
|
@@ -347,7 +344,7 @@ module TensorStream
|
|
347
344
|
end
|
348
345
|
end
|
349
346
|
|
350
|
-
%i[sign exp tan cos abs sqrt negate square reciprocal tanh tanh_grad sigmoid log1p round].each do |op|
|
347
|
+
%i[sign exp tan sin cos abs sqrt negate square reciprocal tanh tanh_grad sigmoid log1p round floor ceil].each do |op|
|
351
348
|
register_op op, noop: true do |context, tensor, inputs|
|
352
349
|
execute_func(op.to_s, tensor, inputs[0], context)
|
353
350
|
end
|
@@ -385,30 +382,6 @@ module TensorStream
|
|
385
382
|
output_buffer
|
386
383
|
end
|
387
384
|
|
388
|
-
register_op :truncate do |context, tensor, inputs|
|
389
|
-
a, b = inputs
|
390
|
-
if a.shape.size.zero?
|
391
|
-
a
|
392
|
-
else
|
393
|
-
input_b = read_final_result(b)
|
394
|
-
if a.shape == input_b
|
395
|
-
a
|
396
|
-
else
|
397
|
-
input_a = read_final_result(a)
|
398
|
-
if input_b == []
|
399
|
-
if a.buffer.size == 1
|
400
|
-
a.shape = input_b
|
401
|
-
a
|
402
|
-
else
|
403
|
-
wrap_opencl(a.buffer[0], data_type: a.data_type, name: tensor.name)
|
404
|
-
end
|
405
|
-
else
|
406
|
-
wrap_opencl(truncate(input_a, input_b), data_type: a.data_type, name: tensor.name)
|
407
|
-
end
|
408
|
-
end
|
409
|
-
end
|
410
|
-
end
|
411
|
-
|
412
385
|
register_op :check_numerics, noop: true do |context, tensor, inputs|
|
413
386
|
a = complete_eval(inputs[0], context)
|
414
387
|
name = tensor.options[:name]
|
@@ -433,6 +406,30 @@ module TensorStream
|
|
433
406
|
end
|
434
407
|
end
|
435
408
|
|
409
|
+
register_op :truncate do |_context, tensor, inputs|
|
410
|
+
a, b = inputs
|
411
|
+
if a.shape.size.zero?
|
412
|
+
a
|
413
|
+
else
|
414
|
+
input_b = read_final_result(b)
|
415
|
+
if a.shape == input_b
|
416
|
+
a
|
417
|
+
else
|
418
|
+
input_a = read_final_result(a)
|
419
|
+
if input_b == []
|
420
|
+
if a.buffer.size == 1
|
421
|
+
a.shape = input_b
|
422
|
+
a
|
423
|
+
else
|
424
|
+
wrap_opencl(a.buffer[0], data_type: a.data_type, name: tensor.name)
|
425
|
+
end
|
426
|
+
else
|
427
|
+
wrap_opencl(truncate(input_a, input_b), data_type: a.data_type, name: tensor.name)
|
428
|
+
end
|
429
|
+
end
|
430
|
+
end
|
431
|
+
end
|
432
|
+
|
436
433
|
register_op :print do |context, tensor, inputs|
|
437
434
|
a, b = inputs
|
438
435
|
input_b = complete_eval(b, context)
|
@@ -610,12 +607,16 @@ module TensorStream
|
|
610
607
|
buffer = complete_eval(b, child_context)
|
611
608
|
|
612
609
|
if assign.buffer
|
613
|
-
buffer = type_cast(buffer, assign.data_type, name: "#{tensor.name}/cast_#{tensor.name}_#{tensor.data_type}")
|
610
|
+
# buffer = type_cast(buffer, assign.data_type, name: "#{tensor.name}/cast_#{tensor.name}_#{tensor.data_type}")
|
614
611
|
if assign.buffer.cl_buffer != buffer.cl_buffer
|
615
612
|
assign.buffer.op = _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.buffer.cl_buffer, event_wait_list: [buffer.op, assign.buffer.op])
|
613
|
+
else
|
614
|
+
assign.buffer.op = buffer.op
|
616
615
|
end
|
617
616
|
else
|
618
|
-
|
617
|
+
value = read_final_result(buffer)
|
618
|
+
assign.buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
|
619
|
+
assign.value = value
|
619
620
|
end
|
620
621
|
assign.buffer.dirty = true
|
621
622
|
assign.buffer
|
@@ -96,6 +96,10 @@ module TensorStream
|
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
99
|
+
register_op(:no_op, no_eval: true) do |_context, _tensor, inputs|
|
100
|
+
inputs
|
101
|
+
end
|
102
|
+
|
99
103
|
register_op(:const) do |context, _tensor, inputs|
|
100
104
|
inputs[0]
|
101
105
|
end
|
@@ -232,6 +236,14 @@ module TensorStream
|
|
232
236
|
call_op(:sqrt, inputs[0], context, ->(t, _b) { Math.sqrt(t) })
|
233
237
|
end
|
234
238
|
|
239
|
+
register_op :floor, no_eval: true do |context, _tensor, inputs|
|
240
|
+
call_op(:floor, inputs[0], context, ->(t, _b) { t.floor })
|
241
|
+
end
|
242
|
+
|
243
|
+
register_op :ceil, no_eval: true do |context, _tensor, inputs|
|
244
|
+
call_op(:ceil, inputs[0], context, ->(t, _b) { t.ceil })
|
245
|
+
end
|
246
|
+
|
235
247
|
register_op :square, no_eval: true do |context, tensor, inputs|
|
236
248
|
call_op(:square, inputs[0], context, ->(t, _b) { t * t })
|
237
249
|
end
|
data/lib/tensor_stream/graph.rb
CHANGED
@@ -120,8 +120,15 @@ module TensorStream
|
|
120
120
|
add_node(node)
|
121
121
|
end
|
122
122
|
|
123
|
-
def control_dependencies(
|
124
|
-
|
123
|
+
def control_dependencies(control_inputs = [], &block)
|
124
|
+
Thread.current["ts_graph_#{object_id}"] ||= {}
|
125
|
+
Thread.current["ts_graph_#{object_id}"][:control_dependencies] ||= []
|
126
|
+
Thread.current["ts_graph_#{object_id}"][:control_dependencies] << Operation.new(:no_op, *control_inputs)
|
127
|
+
begin
|
128
|
+
block.call
|
129
|
+
ensure
|
130
|
+
Thread.current["ts_graph_#{object_id}"][:control_dependencies].pop
|
131
|
+
end
|
125
132
|
end
|
126
133
|
|
127
134
|
def enable_eager_execution
|
@@ -178,6 +185,12 @@ module TensorStream
|
|
178
185
|
graph_thread_storage[:current_scope].join('/')
|
179
186
|
end
|
180
187
|
|
188
|
+
def get_dependency_scope
|
189
|
+
graph_thread_storage = Thread.current["ts_graph_#{object_id}"]
|
190
|
+
return nil if graph_thread_storage.nil? || graph_thread_storage[:control_dependencies].nil?
|
191
|
+
graph_thread_storage[:control_dependencies].last
|
192
|
+
end
|
193
|
+
|
181
194
|
def get_device_scope
|
182
195
|
graph_thread_storage = Thread.current["ts_graph_#{object_id}"]
|
183
196
|
return :default if graph_thread_storage.nil? || graph_thread_storage[:default_device].nil?
|
@@ -2,7 +2,12 @@ module TensorStream
|
|
2
2
|
# module that contains helper functions useful for ops
|
3
3
|
module OpHelper
|
4
4
|
def _op(code, t_a, t_b = nil, options = {})
|
5
|
-
Operation.new(code.to_sym, t_a, t_b, options)
|
5
|
+
op = Operation.new(code.to_sym, t_a, t_b, options)
|
6
|
+
if !TensorStream.get_default_graph.get_dependency_scope.nil?
|
7
|
+
i_op(:identity, op, TensorStream.get_default_graph.get_dependency_scope, name: [op.name, 'tuple', 'control_dependency'].join('/'))
|
8
|
+
else
|
9
|
+
op
|
10
|
+
end
|
6
11
|
end
|
7
12
|
|
8
13
|
# same as op but with a marker that it was internal generated
|
@@ -4,7 +4,15 @@ module TensorStream
|
|
4
4
|
attr_accessor :name, :operation, :inputs, :rank, :options
|
5
5
|
attr_reader :outputs
|
6
6
|
|
7
|
-
def initialize(operation,
|
7
|
+
def initialize(operation, *args)
|
8
|
+
options = if args.last.is_a?(Hash)
|
9
|
+
args.pop
|
10
|
+
else
|
11
|
+
{}
|
12
|
+
end
|
13
|
+
|
14
|
+
inputs = args
|
15
|
+
|
8
16
|
setup_initial_state(options)
|
9
17
|
|
10
18
|
@operation = operation
|
@@ -15,7 +23,7 @@ module TensorStream
|
|
15
23
|
|
16
24
|
@options = options
|
17
25
|
|
18
|
-
@inputs =
|
26
|
+
@inputs = inputs.map { |i| options[:preserve_params_type] ? i : TensorStream.convert_to_tensor(i) }
|
19
27
|
@data_type = set_data_type(options[:data_type])
|
20
28
|
@is_const = infer_const
|
21
29
|
@shape = TensorShape.new(infer_shape)
|
data/lib/tensor_stream/ops.rb
CHANGED
@@ -146,6 +146,20 @@ module TensorStream
|
|
146
146
|
_op(:ones, shape, nil, data_type: dtype, name: name)
|
147
147
|
end
|
148
148
|
|
149
|
+
##
|
150
|
+
# Returns element-wise largest integer not greater than x.
|
151
|
+
def floor(input, name: nil)
|
152
|
+
check_allowed_types(input, FLOATING_POINT_TYPES)
|
153
|
+
_op(:floor, input, name: name)
|
154
|
+
end
|
155
|
+
|
156
|
+
##
|
157
|
+
# Returns element-wise smallest integer in not less than x
|
158
|
+
def ceil(input, name: nil)
|
159
|
+
check_allowed_types(input, FLOATING_POINT_TYPES)
|
160
|
+
_op(:ceil, input, name: name)
|
161
|
+
end
|
162
|
+
|
149
163
|
##
|
150
164
|
# Returns the truth value of (x < y) element-wise.
|
151
165
|
# This operation supports broadcasting
|
data/lib/tensor_stream/tensor.rb
CHANGED
@@ -55,80 +55,80 @@ module TensorStream
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def +(other)
|
58
|
-
_a,
|
59
|
-
|
58
|
+
_a, other = TensorStream.check_data_types(self, other)
|
59
|
+
_op(:add, self, other)
|
60
60
|
end
|
61
61
|
|
62
62
|
def [](index)
|
63
|
-
|
63
|
+
_op(:index, self, index)
|
64
64
|
end
|
65
65
|
|
66
66
|
def *(other)
|
67
|
-
TensorStream.check_data_types(self, other)
|
68
|
-
|
67
|
+
_a, other = TensorStream.check_data_types(self, other)
|
68
|
+
_op(:mul, self, TensorStream.convert_to_tensor(other, dtype: data_type))
|
69
69
|
end
|
70
70
|
|
71
71
|
def **(other)
|
72
|
-
TensorStream.check_data_types(self, other)
|
73
|
-
|
72
|
+
_a, other = TensorStream.check_data_types(self, other)
|
73
|
+
_op(:pow, self, TensorStream.convert_to_tensor(other, dtype: data_type))
|
74
74
|
end
|
75
75
|
|
76
76
|
def /(other)
|
77
|
-
TensorStream.check_data_types(self, other)
|
78
|
-
|
77
|
+
_a, other = TensorStream.check_data_types(self, other)
|
78
|
+
_op(:div, self, TensorStream.convert_to_tensor(other, dtype: data_type))
|
79
79
|
end
|
80
80
|
|
81
81
|
def -(other)
|
82
|
-
TensorStream.check_data_types(self, other)
|
83
|
-
|
82
|
+
_a, other = TensorStream.check_data_types(self, other)
|
83
|
+
_op(:sub, self, TensorStream.convert_to_tensor(other, dtype: data_type))
|
84
84
|
end
|
85
85
|
|
86
86
|
def -@
|
87
|
-
|
87
|
+
_op(:negate, self, nil)
|
88
88
|
end
|
89
89
|
|
90
90
|
def ==(other)
|
91
|
-
TensorStream.check_data_types(self, other)
|
91
|
+
_a, other = TensorStream.check_data_types(self, other)
|
92
92
|
_op(:equal, self, other)
|
93
93
|
end
|
94
94
|
|
95
95
|
def <(other)
|
96
|
-
TensorStream.check_data_types(self, other)
|
96
|
+
_a, other = TensorStream.check_data_types(self, other)
|
97
97
|
_op(:less, self, other)
|
98
98
|
end
|
99
99
|
|
100
100
|
def !=(other)
|
101
|
-
TensorStream.check_data_types(self, other)
|
101
|
+
_a, other = TensorStream.check_data_types(self, other)
|
102
102
|
_op(:not_equal, self, other)
|
103
103
|
end
|
104
104
|
|
105
105
|
def >(other)
|
106
|
-
TensorStream.check_data_types(self, other)
|
106
|
+
_a, other = TensorStream.check_data_types(self, other)
|
107
107
|
_op(:greater, self, other)
|
108
108
|
end
|
109
109
|
|
110
110
|
def >=(other)
|
111
|
-
TensorStream.check_data_types(self, other)
|
111
|
+
_a, other = TensorStream.check_data_types(self, other)
|
112
112
|
_op(:greater_equal, self, other)
|
113
113
|
end
|
114
114
|
|
115
115
|
def <=(other)
|
116
|
-
TensorStream.check_data_types(self, other)
|
116
|
+
_a, other = TensorStream.check_data_types(self, other)
|
117
117
|
_op(:less_equal, self, other)
|
118
118
|
end
|
119
119
|
|
120
120
|
def and(other)
|
121
|
-
TensorStream.check_data_types(self, other)
|
121
|
+
_a, other = TensorStream.check_data_types(self, other)
|
122
122
|
_op(:logical_and, self, other)
|
123
123
|
end
|
124
124
|
|
125
125
|
def matmul(other)
|
126
|
-
TensorStream.check_data_types(self, other)
|
126
|
+
_a, other = TensorStream.check_data_types(self, other)
|
127
127
|
_op(:matmul, self, other)
|
128
128
|
end
|
129
129
|
|
130
130
|
def dot(other)
|
131
|
-
TensorStream.check_data_types(self, other)
|
131
|
+
_a, other = TensorStream.check_data_types(self, other)
|
132
132
|
_op(:matmul, self, other)
|
133
133
|
end
|
134
134
|
|
data/lib/tensor_stream/utils.rb
CHANGED
@@ -149,6 +149,11 @@ module TensorStream
|
|
149
149
|
Graph.get_default_graph.get_collection(name, options)
|
150
150
|
end
|
151
151
|
|
152
|
+
def assign(ref, value, name: nil)
|
153
|
+
raise "#{ref.name} not a variable" unless ref.is_a?(Variable)
|
154
|
+
ref.assign(value, name: name)
|
155
|
+
end
|
156
|
+
|
152
157
|
def placeholder(dtype, shape: nil, name: nil)
|
153
158
|
TensorStream::Placeholder.new(dtype, nil, shape, name: name)
|
154
159
|
end
|
@@ -169,6 +174,10 @@ module TensorStream
|
|
169
174
|
TensorStream.get_default_graph.random_seed = seed
|
170
175
|
end
|
171
176
|
|
177
|
+
def control_dependencies(control_inputs, &block)
|
178
|
+
TensorStream.get_default_graph.control_dependencies(control_inputs, &block)
|
179
|
+
end
|
180
|
+
|
172
181
|
def convert_to_tensor(value, dtype: nil, name: nil, preferred_dtype: nil)
|
173
182
|
return convert_to_tensor(value.call) if value.is_a?(Proc)
|
174
183
|
|
@@ -32,21 +32,21 @@ module TensorStream
|
|
32
32
|
assign(init_op)
|
33
33
|
end
|
34
34
|
|
35
|
-
def assign(value)
|
36
|
-
|
35
|
+
def assign(value, name: nil)
|
36
|
+
_a, value = TensorStream.check_data_types(self, value)
|
37
|
+
Operation.new(:assign, self, value, name: name)
|
37
38
|
end
|
38
39
|
|
39
40
|
def read_value
|
40
|
-
if buffer
|
41
|
+
if buffer
|
41
42
|
@value = buffer.to_ruby
|
42
|
-
buffer.dirty = false
|
43
43
|
end
|
44
44
|
|
45
45
|
@value
|
46
46
|
end
|
47
47
|
|
48
48
|
def assign_add(value)
|
49
|
-
value =
|
49
|
+
_a, value = TensorStream.check_data_types(self, value)
|
50
50
|
Operation.new(:assign_add, self, value, data_type: data_type)
|
51
51
|
end
|
52
52
|
|
@@ -55,6 +55,7 @@ module TensorStream
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def assign_sub(value)
|
58
|
+
_a, value = TensorStream.check_data_types(self, value)
|
58
59
|
Operation.new(:assign_sub, self, value)
|
59
60
|
end
|
60
61
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tensor_stream
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Emmanuel Dayo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -248,10 +248,12 @@ files:
|
|
248
248
|
- lib/tensor_stream/evaluator/opencl/kernels/argmax.cl
|
249
249
|
- lib/tensor_stream/evaluator/opencl/kernels/argmin.cl
|
250
250
|
- lib/tensor_stream/evaluator/opencl/kernels/cast.cl
|
251
|
+
- lib/tensor_stream/evaluator/opencl/kernels/ceil.cl
|
251
252
|
- lib/tensor_stream/evaluator/opencl/kernels/cond.cl.erb
|
252
253
|
- lib/tensor_stream/evaluator/opencl/kernels/cos.cl
|
253
254
|
- lib/tensor_stream/evaluator/opencl/kernels/div.cl.erb
|
254
255
|
- lib/tensor_stream/evaluator/opencl/kernels/exp.cl
|
256
|
+
- lib/tensor_stream/evaluator/opencl/kernels/floor.cl
|
255
257
|
- lib/tensor_stream/evaluator/opencl/kernels/gemm.cl
|
256
258
|
- lib/tensor_stream/evaluator/opencl/kernels/log.cl
|
257
259
|
- lib/tensor_stream/evaluator/opencl/kernels/log1p.cl
|
@@ -340,7 +342,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
340
342
|
version: '0'
|
341
343
|
requirements: []
|
342
344
|
rubyforge_project:
|
343
|
-
rubygems_version: 2.6.
|
345
|
+
rubygems_version: 2.6.11
|
344
346
|
signing_key:
|
345
347
|
specification_version: 4
|
346
348
|
summary: A Pure ruby tensorflow implementation
|