tensor_stream-opencl 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/benchmark/benchmark.rb +63 -37
- data/benchmark_imac2015_iris.txt +80 -0
- data/lib/tensor_stream/opencl.rb +1 -0
- data/lib/tensor_stream/opencl/array_ops.rb +37 -7
- data/lib/tensor_stream/opencl/images_ops.rb +1 -1
- data/lib/tensor_stream/opencl/kernels/gemm.cl +5 -4
- data/lib/tensor_stream/opencl/kernels/random_uniform.cl +7 -0
- data/lib/tensor_stream/opencl/math_ops.rb +13 -11
- data/lib/tensor_stream/opencl/opencl_buffer.rb +85 -3
- data/lib/tensor_stream/opencl/opencl_evaluator.rb +32 -45
- data/lib/tensor_stream/opencl/random_ops.rb +54 -0
- data/lib/tensor_stream/opencl/utils.rb +27 -0
- data/lib/tensor_stream/opencl/version.rb +1 -1
- data/samples/mnist_data_2.3.rb +9 -4
- data/samples/mnist_data_3.0.rb +2 -2
- data/tensor_stream-opencl.gemspec +1 -1
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b433e9e7ab38a517c21b57065e5a43b112640fd7c419fb7baa2f3319128cdacf
|
4
|
+
data.tar.gz: fab7d48513cb0f8481e151d18b088782918cb1539b59586613a00c4d5f5aeed2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04d106f5ee5fac49eba20ff143bb2212a1cafd5140fc04cee20958ffea0c5909d352824948badf16ec5bc8ca2a7b13b4dcf7748eb03cbd6dc8a466c6ae0f5040
|
7
|
+
data.tar.gz: e17171f28641ce3496c0b338b6913c96e10d9fd5ce93b7980dae6edef00e63e5f7c4dcb60ed04fed5271a474b4940d069ebcf6a00bbfd3c4e6eafa2c0c4f26ed
|
data/benchmark/benchmark.rb
CHANGED
@@ -4,6 +4,25 @@ require 'benchmark'
|
|
4
4
|
require 'pry-byebug'
|
5
5
|
require 'awesome_print'
|
6
6
|
require 'tensor_stream/opencl'
|
7
|
+
require 'rbconfig'
|
8
|
+
|
9
|
+
def os
|
10
|
+
@os ||= (
|
11
|
+
host_os = RbConfig::CONFIG['host_os']
|
12
|
+
case host_os
|
13
|
+
when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
|
14
|
+
:windows
|
15
|
+
when /darwin|mac os/
|
16
|
+
:macosx
|
17
|
+
when /linux/
|
18
|
+
:linux
|
19
|
+
when /solaris|bsd/
|
20
|
+
:unix
|
21
|
+
else
|
22
|
+
raise Error::WebDriverError, "unknown os: #{host_os.inspect}"
|
23
|
+
end
|
24
|
+
)
|
25
|
+
end
|
7
26
|
|
8
27
|
def tr(t, places = 1)
|
9
28
|
if t.is_a?(Array)
|
@@ -77,49 +96,56 @@ conv2d_grad = tf.gradients(conv2d, [sample_image, sample_filter])
|
|
77
96
|
|
78
97
|
bias_add = tf.nn.bias_add(large_tensor, large_tensor_bias)
|
79
98
|
bias_add_grad = tf.gradients(bias_add, [large_tensor_bias])
|
99
|
+
dropout = tf.nn.dropout(large_tensor, 0.8)
|
80
100
|
|
81
101
|
puts TensorStream::Evaluator.default_evaluators
|
82
102
|
|
83
103
|
sess2 = tf.session
|
84
104
|
|
85
|
-
|
105
|
+
if os == :macosx
|
106
|
+
puts `sysctl -n machdep.cpu.brand_string`
|
107
|
+
else
|
108
|
+
puts `cat /proc/cpuinfo | grep "model name" | head -1`
|
109
|
+
end
|
86
110
|
device = TensorStream::Evaluator::OpenclEvaluator.default_device.native_device
|
87
111
|
puts "OpenCL device #{device.platform.to_s} #{device.name}"
|
88
112
|
Benchmark.bmbm do |x|
|
89
|
-
x.report("
|
90
|
-
x.report("opencl argmin
|
91
|
-
x.report("
|
92
|
-
x.report("opencl bias_add_grad
|
93
|
-
x.report("
|
94
|
-
x.report("opencl bias_add
|
95
|
-
x.report("
|
96
|
-
x.report("opencl conv2d_backprop
|
97
|
-
x.report("
|
98
|
-
x.report("opencl conv2d
|
99
|
-
x.report("
|
100
|
-
x.report("opencl arr index
|
101
|
-
x.report("
|
102
|
-
x.report("opencl min
|
103
|
-
x.report("
|
104
|
-
x.report("opencl sum
|
105
|
-
x.report("
|
106
|
-
x.report("opencl sum axis 1
|
107
|
-
x.report("
|
108
|
-
x.report("opencl split
|
109
|
-
x.report("
|
110
|
-
x.report("opencl add_n
|
111
|
-
x.report("
|
112
|
-
x.report("opencl ooo matmul
|
113
|
-
x.report("
|
114
|
-
x.report("opencl softmax
|
115
|
-
x.report("
|
116
|
-
x.report("opencl matmul
|
117
|
-
x.report("
|
118
|
-
x.report("opencl
|
119
|
-
x.report("
|
120
|
-
x.report("opencl
|
121
|
-
x.report("
|
122
|
-
x.report("opencl pow float:") { 100.times do sess2.run(pow_f, feed_dict: { p => rand, q => rand }) end }
|
123
|
-
x.report("
|
124
|
-
x.report("opencl pow int:") { 100.times do sess2.run(pow_i, feed_dict: { p => rand, q => rand }) end }
|
113
|
+
x.report("ruby argmin :") { 100.times do sess.run(argmin) end }
|
114
|
+
x.report("opencl argmin :") { 100.times do sess2.run(argmin) end }
|
115
|
+
x.report("ruby bias_add_grad :") { 100.times do sess.run(bias_add_grad) end }
|
116
|
+
x.report("opencl bias_add_grad :") { 100.times do sess2.run(bias_add_grad) end }
|
117
|
+
x.report("ruby bias_add :") { 100.times do sess.run(bias_add) end }
|
118
|
+
x.report("opencl bias_add :") { 100.times do sess2.run(bias_add) end }
|
119
|
+
x.report("ruby conv2d_backprop :") { 100.times do sess.run(conv2d_grad) end }
|
120
|
+
x.report("opencl conv2d_backprop :") { 100.times do sess2.run(conv2d_grad) end }
|
121
|
+
x.report("ruby conv2d :") { 100.times do sess.run(conv2d) end }
|
122
|
+
x.report("opencl conv2d :") { 100.times do sess2.run(conv2d) end }
|
123
|
+
x.report("ruby arr index :") { 100.times do sess.run(index) end }
|
124
|
+
x.report("opencl arr index :") { 100.times do sess2.run(index) end }
|
125
|
+
x.report("ruby min :") { 100.times do sess.run(min) end }
|
126
|
+
x.report("opencl min :") { 100.times do sess2.run(min) end }
|
127
|
+
x.report("ruby sum :") { 100.times do sess.run(sum) end }
|
128
|
+
x.report("opencl sum :") { 100.times do sess2.run(sum) end }
|
129
|
+
x.report("ruby sum axis 1 :") { 100.times do sess.run(sum_axis_1) end }
|
130
|
+
x.report("opencl sum axis 1 :") { 100.times do sess2.run(sum_axis_1) end }
|
131
|
+
x.report("ruby split :") { 100.times do sess.run(split) end }
|
132
|
+
x.report("opencl split :") { 100.times do sess2.run(split) end }
|
133
|
+
x.report("ruby add_n :") { 100.times do sess.run(add_n) end }
|
134
|
+
x.report("opencl add_n :") { 100.times do sess2.run(add_n) end }
|
135
|
+
x.report("ruby ooo matmul :") { 100.times do sess.run(out_of_order) end }
|
136
|
+
x.report("opencl ooo matmul :") { 100.times do sess2.run(out_of_order) end }
|
137
|
+
x.report("ruby softmax :") { 100.times do sess.run(softmax) end }
|
138
|
+
x.report("opencl softmax :") { 100.times do sess2.run(softmax) end }
|
139
|
+
x.report("ruby matmul :") { 100.times do sess.run(matmul) end }
|
140
|
+
x.report("opencl matmul :") { 100.times do sess2.run(matmul) end }
|
141
|
+
x.report("ruby :") { 100.times do sess.run(model, feed_dict: { p => rand, q => rand }) end }
|
142
|
+
x.report("opencl :") { 100.times do sess2.run(model, feed_dict: { p => rand, q => rand }) end }
|
143
|
+
x.report("ruby single function :") { 100.times do sess.run(single_function_test, feed_dict: { p => rand, q => rand }) end }
|
144
|
+
x.report("opencl single function :") { 100.times do sess2.run(single_function_test, feed_dict: { p => rand, q => rand }) end }
|
145
|
+
x.report("ruby pow float :") { 100.times do sess.run(pow_f, feed_dict: { p => rand, q => rand }) end }
|
146
|
+
x.report("opencl pow float :") { 100.times do sess2.run(pow_f, feed_dict: { p => rand, q => rand }) end }
|
147
|
+
x.report("ruby pow int :") { 100.times do sess.run(pow_i, feed_dict: { p => rand, q => rand }) end }
|
148
|
+
x.report("opencl pow int :") { 100.times do sess2.run(pow_i, feed_dict: { p => rand, q => rand }) end }
|
149
|
+
x.report("ruby dropout :") { 100.times do sess.run(dropout) end }
|
150
|
+
x.report("opencl dropout :") { 100.times do sess2.run(dropout) end }
|
125
151
|
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
TensorStream::Evaluator::OpenclEvaluator
|
2
|
+
TensorStream::Evaluator::RubyEvaluator
|
3
|
+
Intel(R) Core(TM) i5-5575R CPU @ 2.80GHz
|
4
|
+
OpenCL device Apple Intel(R) Iris(TM) Pro Graphics 6200
|
5
|
+
Rehearsal ------------------------------------------------------------
|
6
|
+
ruby argmin : 0.940000 0.030000 0.970000 ( 1.197240)
|
7
|
+
opencl argmin : 0.070000 0.020000 0.090000 ( 0.093035)
|
8
|
+
ruby bias_add_grad : 2.390000 0.060000 2.450000 ( 2.558622)
|
9
|
+
opencl bias_add_grad : 0.020000 0.010000 0.030000 ( 0.030563)
|
10
|
+
ruby bias_add : 2.530000 0.070000 2.600000 ( 2.749423)
|
11
|
+
opencl bias_add : 0.150000 0.030000 0.180000 ( 0.191476)
|
12
|
+
ruby conv2d_backprop : 4.020000 0.060000 4.080000 ( 5.306408)
|
13
|
+
opencl conv2d_backprop : 0.040000 0.030000 0.070000 ( 0.077737)
|
14
|
+
ruby conv2d : 0.890000 0.010000 0.900000 ( 0.963062)
|
15
|
+
opencl conv2d : 0.030000 0.010000 0.040000 ( 0.042274)
|
16
|
+
ruby arr index : 0.000000 0.000000 0.000000 ( 0.004072)
|
17
|
+
opencl arr index : 0.010000 0.010000 0.020000 ( 0.023981)
|
18
|
+
ruby min : 3.710000 0.040000 3.750000 ( 4.329215)
|
19
|
+
opencl min : 0.160000 0.030000 0.190000 ( 0.191062)
|
20
|
+
ruby sum : 6.930000 0.080000 7.010000 ( 7.467194)
|
21
|
+
opencl sum : 0.010000 0.010000 0.020000 ( 0.034392)
|
22
|
+
ruby sum axis 1 : 6.920000 0.070000 6.990000 ( 7.412997)
|
23
|
+
opencl sum axis 1 : 0.020000 0.020000 0.040000 ( 0.027614)
|
24
|
+
ruby split : 0.020000 0.000000 0.020000 ( 0.022597)
|
25
|
+
opencl split : 0.060000 0.040000 0.100000 ( 0.099309)
|
26
|
+
ruby add_n : 0.150000 0.000000 0.150000 ( 0.162702)
|
27
|
+
opencl add_n : 0.020000 0.020000 0.040000 ( 0.033757)
|
28
|
+
ruby ooo matmul : 1.670000 0.010000 1.680000 ( 1.738712)
|
29
|
+
opencl ooo matmul : 0.020000 0.010000 0.030000 ( 0.029647)
|
30
|
+
ruby softmax : 0.030000 0.000000 0.030000 ( 0.033050)
|
31
|
+
opencl softmax : 0.020000 0.010000 0.030000 ( 0.030572)
|
32
|
+
ruby matmul : 0.820000 0.010000 0.830000 ( 0.851559)
|
33
|
+
opencl matmul : 0.010000 0.010000 0.020000 ( 0.026167)
|
34
|
+
ruby : 2.860000 0.020000 2.880000 ( 3.033034)
|
35
|
+
opencl : 0.220000 0.070000 0.290000 ( 0.240857)
|
36
|
+
ruby single function : 0.380000 0.000000 0.380000 ( 0.398911)
|
37
|
+
opencl single function : 0.150000 0.050000 0.200000 ( 0.162006)
|
38
|
+
ruby pow float : 0.090000 0.000000 0.090000 ( 0.098400)
|
39
|
+
opencl pow float : 0.020000 0.020000 0.040000 ( 0.033370)
|
40
|
+
ruby pow int : 0.020000 0.000000 0.020000 ( 0.023459)
|
41
|
+
opencl pow int : 0.020000 0.010000 0.030000 ( 0.030894)
|
42
|
+
-------------------------------------------------- total: 36.290000sec
|
43
|
+
|
44
|
+
user system total real
|
45
|
+
ruby argmin : 0.880000 0.010000 0.890000 ( 0.933367)
|
46
|
+
opencl argmin : 0.010000 0.010000 0.020000 ( 0.023140)
|
47
|
+
ruby bias_add_grad : 2.350000 0.050000 2.400000 ( 2.539349)
|
48
|
+
opencl bias_add_grad : 0.010000 0.010000 0.020000 ( 0.024700)
|
49
|
+
ruby bias_add : 2.510000 0.060000 2.570000 ( 2.667330)
|
50
|
+
opencl bias_add : 0.150000 0.020000 0.170000 ( 0.184056)
|
51
|
+
ruby conv2d_backprop : 3.910000 0.040000 3.950000 ( 4.320383)
|
52
|
+
opencl conv2d_backprop : 0.030000 0.020000 0.050000 ( 0.058036)
|
53
|
+
ruby conv2d : 0.910000 0.020000 0.930000 ( 1.120605)
|
54
|
+
opencl conv2d : 0.020000 0.010000 0.030000 ( 0.034972)
|
55
|
+
ruby arr index : 0.000000 0.000000 0.000000 ( 0.004119)
|
56
|
+
opencl arr index : 0.020000 0.010000 0.030000 ( 0.024126)
|
57
|
+
ruby min : 3.670000 0.030000 3.700000 ( 4.024439)
|
58
|
+
opencl min : 0.140000 0.030000 0.170000 ( 0.178683)
|
59
|
+
ruby sum : 6.920000 0.050000 6.970000 ( 7.314338)
|
60
|
+
opencl sum : 0.010000 0.020000 0.030000 ( 0.024655)
|
61
|
+
ruby sum axis 1 : 6.900000 0.050000 6.950000 ( 7.332897)
|
62
|
+
opencl sum axis 1 : 0.020000 0.020000 0.040000 ( 0.026150)
|
63
|
+
ruby split : 0.010000 0.000000 0.010000 ( 0.018866)
|
64
|
+
opencl split : 0.050000 0.040000 0.090000 ( 0.096327)
|
65
|
+
ruby add_n : 0.140000 0.000000 0.140000 ( 0.151006)
|
66
|
+
opencl add_n : 0.020000 0.010000 0.030000 ( 0.025622)
|
67
|
+
ruby ooo matmul : 1.670000 0.010000 1.680000 ( 1.732486)
|
68
|
+
opencl ooo matmul : 0.020000 0.020000 0.040000 ( 0.027051)
|
69
|
+
ruby softmax : 0.030000 0.000000 0.030000 ( 0.032848)
|
70
|
+
opencl softmax : 0.010000 0.010000 0.020000 ( 0.026403)
|
71
|
+
ruby matmul : 0.810000 0.000000 0.810000 ( 0.866297)
|
72
|
+
opencl matmul : 0.020000 0.020000 0.040000 ( 0.026677)
|
73
|
+
ruby : 2.870000 0.020000 2.890000 ( 3.237224)
|
74
|
+
opencl : 0.240000 0.080000 0.320000 ( 0.302463)
|
75
|
+
ruby single function : 0.390000 0.010000 0.400000 ( 0.470700)
|
76
|
+
opencl single function : 0.150000 0.060000 0.210000 ( 0.228528)
|
77
|
+
ruby pow float : 0.090000 0.000000 0.090000 ( 0.113073)
|
78
|
+
opencl pow float : 0.020000 0.010000 0.030000 ( 0.036938)
|
79
|
+
ruby pow int : 0.020000 0.000000 0.020000 ( 0.023728)
|
80
|
+
opencl pow int : 0.020000 0.020000 0.040000 ( 0.031909)
|
data/lib/tensor_stream/opencl.rb
CHANGED
@@ -10,13 +10,13 @@ module TensorStream
|
|
10
10
|
shape = if %i[zeros_like ones_like].include?(tensor.operation)
|
11
11
|
inputs[0].shape
|
12
12
|
elsif !inputs[0].nil?
|
13
|
-
|
13
|
+
complete_eval(inputs[0], context).buffer.to_a
|
14
14
|
else
|
15
15
|
tensor.shape.shape
|
16
16
|
end
|
17
17
|
cache_key = "cons_#{tensor.name}_#{tensor.data_type}_#{shape}"
|
18
18
|
@context[:_cache][:_cl_buffers][cache_key] ||= begin
|
19
|
-
buffer = allocate_narray_for_type(tensor.data_type, shape.reduce(:*) || 1)
|
19
|
+
buffer = OpenCLBuffer.allocate_narray_for_type(tensor.data_type, shape.reduce(:*) || 1)
|
20
20
|
if %i[zeros zeros_like].include?(tensor.operation)
|
21
21
|
buffer.fill!(0)
|
22
22
|
else
|
@@ -47,7 +47,7 @@ module TensorStream
|
|
47
47
|
buffer = if cl_buffer
|
48
48
|
cl_buffer.buffer
|
49
49
|
else
|
50
|
-
allocate_narray_for_type(tensor.data_type, narray_size)
|
50
|
+
OpenCLBuffer.allocate_narray_for_type(tensor.data_type, narray_size)
|
51
51
|
end
|
52
52
|
|
53
53
|
buffer.fill!(value.buffer[0])
|
@@ -365,8 +365,8 @@ module TensorStream
|
|
365
365
|
end
|
366
366
|
|
367
367
|
register_op :reshape do |context, tensor, inputs|
|
368
|
-
arr = inputs
|
369
|
-
new_shape =
|
368
|
+
arr, new_shape = inputs
|
369
|
+
new_shape = complete_eval(new_shape, context).buffer.to_a
|
370
370
|
|
371
371
|
shape = if new_shape.size.zero? && arr.buffer.size == 1
|
372
372
|
new_shape
|
@@ -389,9 +389,9 @@ module TensorStream
|
|
389
389
|
res
|
390
390
|
else
|
391
391
|
rank = inputs[0].shape.size
|
392
|
-
perm = inputs[1].nil? ? (0...rank).to_a.reverse : inputs[1].buffer
|
392
|
+
perm = inputs[1].nil? ? (0...rank).to_a.reverse : inputs[1].buffer!
|
393
393
|
new_shape = perm.map { |p| inputs[0].shape[p] }.to_a
|
394
|
-
output_buffer = _create_result_buffer(tensor.data_type, new_shape, tensor.name)
|
394
|
+
output_buffer = _create_result_buffer(tensor.data_type, new_shape, tensor.name, allocate_host: true)
|
395
395
|
transpose_with_perm(inputs[0].buffer, output_buffer.buffer, inputs[0].shape, new_shape, perm)
|
396
396
|
|
397
397
|
write_op = _opencl_queue.enqueue_write_buffer(output_buffer.cl_buffer, output_buffer.buffer)
|
@@ -442,6 +442,36 @@ module TensorStream
|
|
442
442
|
a
|
443
443
|
end
|
444
444
|
end
|
445
|
+
|
446
|
+
register_op :range do |context, tensor, inputs|
|
447
|
+
start, limit, delta = complete_eval(inputs, context).map { |p| p.buffer.to_a.first }
|
448
|
+
|
449
|
+
if limit.zero?
|
450
|
+
limit = start
|
451
|
+
start = 0
|
452
|
+
end
|
453
|
+
|
454
|
+
raise " delta !=0 " if delta.zero?
|
455
|
+
raise " Requires start <= limit when delta > 0" if (start > limit) && delta > 0
|
456
|
+
raise " Requires start >= limit when delta < 0" if (start < limit) && delta < 0
|
457
|
+
cache_key = "range_#{start}_#{limit}_#{delta}_#{tensor.data_type}"
|
458
|
+
|
459
|
+
@context[:_cache][:_cl_buffers][cache_key] ||= begin
|
460
|
+
delta = fp_type?(tensor.options[:output_type]) ? delta.to_f : delta.to_i
|
461
|
+
cur_step = fp_type?(tensor.options[:output_type]) ? start.to_f : start.to_i
|
462
|
+
r = []
|
463
|
+
Kernel.loop do
|
464
|
+
break if start == limit
|
465
|
+
break if (start < limit) && (cur_step >= limit)
|
466
|
+
break if (start > limit) && (cur_step <= limit)
|
467
|
+
|
468
|
+
r << cur_step
|
469
|
+
cur_step += delta
|
470
|
+
end
|
471
|
+
r
|
472
|
+
convert_to_opencl(r, [r.size], data_type: tensor.options[:output_type], name: tensor.name)
|
473
|
+
end
|
474
|
+
end
|
445
475
|
end
|
446
476
|
end
|
447
477
|
end
|
@@ -24,7 +24,7 @@ module TensorStream
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
output_buffer = _create_result_buffer(tensor.data_type, [image.height, image.width, channels], "out_#{tensor.name}")
|
27
|
+
output_buffer = _create_result_buffer(tensor.data_type, [image.height, image.width, channels], "out_#{tensor.name}", allocate_host: true)
|
28
28
|
|
29
29
|
image.grayscale! if channels == 1
|
30
30
|
image.pixels.each_with_index do |pixel, index|
|
@@ -6,8 +6,9 @@ __kernel void gemm_<%= dtype %>(const int M, const int N, const int K,
|
|
6
6
|
__global <%= c_dtype %>* C) {
|
7
7
|
|
8
8
|
// Get the index of the current element to be processed
|
9
|
-
const int
|
10
|
-
const int
|
9
|
+
const int index = get_global_id(0);
|
10
|
+
const int globalRow = get_global_id(1); // Row ID of C (0..M)
|
11
|
+
const int globalCol = get_global_id(2); // Col ID of C (0..N)
|
11
12
|
|
12
13
|
// Compute a single element (loop over K)
|
13
14
|
<%= c_dtype %> acc = 0.0f;
|
@@ -16,9 +17,9 @@ __kernel void gemm_<%= dtype %>(const int M, const int N, const int K,
|
|
16
17
|
int b_index = k*N + globalCol;
|
17
18
|
<% if ta %>a_index = M*k + globalRow;<% end %>
|
18
19
|
<% if tb %>b_index = globalCol*K + k;<% end %>
|
19
|
-
acc += A[a_index] * B[b_index];
|
20
|
+
acc += A[a_index + index * <%= n_a %>] * B[b_index + index * <%= n_b %>];
|
20
21
|
}
|
21
22
|
|
22
23
|
// Store the result
|
23
|
-
C[globalRow*N + globalCol] = acc;
|
24
|
+
C[index * <%= n %> + globalRow*N + globalCol] = acc;
|
24
25
|
}
|
@@ -0,0 +1,7 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
__kernel void random_uniform_<%= dtype %>(const int seed_ptr, const float min, const float max, __global const <%= c_dtype %> *rand_table, __global <%= c_dtype %> *C) {
|
3
|
+
// Get the index of the current element to be processed
|
4
|
+
const int id = get_global_id(0);
|
5
|
+
<%= c_dtype %> rand_value = rand_table[ (seed_ptr + id) % <%= tsize %>];
|
6
|
+
C[id] = rand_value * (max - min) + min;
|
7
|
+
}
|
@@ -54,26 +54,28 @@ module TensorStream
|
|
54
54
|
register_op :mat_mul do |_context, tensor, inputs|
|
55
55
|
a, b = inputs
|
56
56
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
k =
|
57
|
+
a_matrix_shape = a.shape.dup
|
58
|
+
b_matrix_shape = b.shape.dup
|
59
|
+
|
60
|
+
k = a_matrix_shape.pop
|
61
|
+
m = a_matrix_shape.pop
|
62
|
+
n = b_matrix_shape.pop
|
63
|
+
v = b_matrix_shape.pop
|
61
64
|
|
62
65
|
if tensor.options[:transpose_a]
|
63
|
-
m =
|
64
|
-
k = a.shape[0]
|
66
|
+
m, k = k, m
|
65
67
|
end
|
66
68
|
|
67
69
|
if tensor.options[:transpose_b]
|
68
|
-
n =
|
69
|
-
v = b.shape[1]
|
70
|
+
n, v = v, n
|
70
71
|
end
|
71
72
|
|
72
|
-
result_shape = [m, n]
|
73
|
+
result_shape = [a_matrix_shape.first, m, n].compact
|
74
|
+
work_group = [a_matrix_shape.first || 1, m, n]
|
73
75
|
|
74
76
|
raise "#{tensor.inputs[0].name} rank must be greater than 1" if a.shape.size < 2
|
75
77
|
raise "#{tensor.inputs[1].name} rank must be greater than 1" if b.shape.size < 2
|
76
|
-
raise "#{tensor.inputs[0].name} unsupported rank" if b.shape.size
|
78
|
+
raise "#{tensor.inputs[0].name} unsupported rank" if b.shape.size > 3 || a.shape.size > 3
|
77
79
|
raise "incompatible shape sizes for matrix multiplication (#{a.shape[1]} != #{b.shape[0]}) #{a.shape} vs #{b.shape}" if k != v
|
78
80
|
|
79
81
|
dtype = tensor.data_type
|
@@ -85,7 +87,7 @@ module TensorStream
|
|
85
87
|
cl_k = OpenCL::Int1.new(k)
|
86
88
|
|
87
89
|
event_wait_list = build_event_wait_list([a, b])
|
88
|
-
output_buffer.op = _cl_program('gemm', ta: !!tensor.options[:transpose_a], tb: !!tensor.options[:transpose_b], dtype: dtype).send(:"gemm_#{dtype}", _opencl_queue,
|
90
|
+
output_buffer.op = _cl_program('gemm', ta: !!tensor.options[:transpose_a], tb: !!tensor.options[:transpose_b], n: m * n, n_a: m * k, n_b: n * v, dtype: dtype).send(:"gemm_#{dtype}", _opencl_queue, work_group, cl_m, cl_n, cl_k, a.cl_buffer, b.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
|
89
91
|
|
90
92
|
output_buffer
|
91
93
|
end
|
@@ -1,7 +1,48 @@
|
|
1
1
|
module TensorStream
|
2
2
|
# Buffer used by the OpenCL evaluator
|
3
3
|
class OpenCLBuffer < Buffer
|
4
|
+
class LazyBuffer
|
5
|
+
attr_reader :data_type
|
6
|
+
|
7
|
+
def initialize(data_type, size)
|
8
|
+
@data_type = data_type
|
9
|
+
@size = size
|
10
|
+
end
|
11
|
+
|
12
|
+
def size
|
13
|
+
@size
|
14
|
+
end
|
15
|
+
|
16
|
+
def element_size
|
17
|
+
buffer_size_for_type(@data_type)
|
18
|
+
end
|
19
|
+
|
20
|
+
def buffer_size_for_type(data_type)
|
21
|
+
case data_type
|
22
|
+
when :float, :float32, :float16
|
23
|
+
4
|
24
|
+
when :float64
|
25
|
+
8
|
26
|
+
when :int, :int32, :int64, :uint64, :uint32 # NArray does not have 64 bit int types
|
27
|
+
4
|
28
|
+
when :int16, :uint16
|
29
|
+
2
|
30
|
+
when :uint8, :int8
|
31
|
+
1
|
32
|
+
when :boolean
|
33
|
+
1
|
34
|
+
when :string
|
35
|
+
1
|
36
|
+
when :unknown
|
37
|
+
nil
|
38
|
+
else
|
39
|
+
raise "unsupported type #{data_type}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
4
44
|
include ArrayOpsHelper
|
45
|
+
include TensorStream::CLEventHelpers
|
5
46
|
|
6
47
|
attr_accessor :shape, :buffer, :cl_buffer, :op, :owner
|
7
48
|
|
@@ -24,15 +65,33 @@ module TensorStream
|
|
24
65
|
end
|
25
66
|
|
26
67
|
def inspect
|
27
|
-
"CLBuffer(shape: #{shape || "?"} data_type: #{data_type}, cl_allocated: #{cl_buffer ? cl_buffer.size : 'unallocated'}) -> raw: #{buffer.to_a}"
|
68
|
+
"CLBuffer(name: #{name} shape: #{shape || "?"} data_type: #{data_type}, cl_allocated: #{cl_buffer ? cl_buffer.size : 'unallocated'}) -> raw: #{buffer.to_a}"
|
69
|
+
end
|
70
|
+
|
71
|
+
def buffer!
|
72
|
+
return buffer if buffer.is_a?(NArray)
|
73
|
+
|
74
|
+
@buffer = OpenCLBuffer.allocate_narray_for_type(buffer.data_type, buffer.size) if buffer.is_a?(LazyBuffer)
|
75
|
+
|
76
|
+
command_queue.enqueue_read_buffer(cl_buffer, @buffer, blocking: true, event_wait_list: build_event_wait_list([self]))
|
77
|
+
@buffer
|
78
|
+
end
|
79
|
+
|
80
|
+
def command_queue
|
81
|
+
@command_queue ||= begin
|
82
|
+
first_op = op.is_a?(Array) ? op.first : op
|
83
|
+
first_op.command_queue
|
84
|
+
end
|
28
85
|
end
|
29
86
|
|
30
87
|
def to_ruby
|
88
|
+
buffer! if buffer.is_a?(LazyBuffer)
|
89
|
+
|
31
90
|
return [] if buffer.empty?
|
32
91
|
|
33
92
|
if dirty
|
34
|
-
|
35
|
-
|
93
|
+
command_queue.enqueue_read_buffer(cl_buffer, buffer, event_wait_list: [op].compact)
|
94
|
+
command_queue.finish
|
36
95
|
self.dirty = false
|
37
96
|
end
|
38
97
|
|
@@ -54,5 +113,28 @@ module TensorStream
|
|
54
113
|
def self.nil_buffer(owner, name, data_type)
|
55
114
|
OpenCLBuffer.new(owner, name: name, data_type: data_type, shape: [0], buffer: nil, cl_buffer: nil)
|
56
115
|
end
|
116
|
+
|
117
|
+
def self.allocate_narray_for_type(data_type, narray_size)
|
118
|
+
case data_type
|
119
|
+
when :float, :float32, :float16
|
120
|
+
NArray.sfloat(narray_size)
|
121
|
+
when :float64
|
122
|
+
NArray.float(narray_size)
|
123
|
+
when :int, :int32, :int64, :uint64, :uint32 # NArray does not have 64 bit int types
|
124
|
+
NArray.int(narray_size)
|
125
|
+
when :int16, :uint16
|
126
|
+
NArray.sint(narray_size)
|
127
|
+
when :uint8, :int8
|
128
|
+
NArray.byte(narray_size)
|
129
|
+
when :boolean
|
130
|
+
NArray.byte(narray_size)
|
131
|
+
when :string
|
132
|
+
NArray.byte(narray_size)
|
133
|
+
when :unknown
|
134
|
+
nil
|
135
|
+
else
|
136
|
+
raise "unsupported type #{data_type}"
|
137
|
+
end
|
138
|
+
end
|
57
139
|
end
|
58
140
|
end
|
@@ -13,6 +13,7 @@ require 'tensor_stream/opencl/math_ops'
|
|
13
13
|
require 'tensor_stream/opencl/nn_ops'
|
14
14
|
require 'tensor_stream/opencl/images_ops'
|
15
15
|
require 'tensor_stream/opencl/array_ops'
|
16
|
+
require 'tensor_stream/opencl/random_ops'
|
16
17
|
require 'tensor_stream/helpers/op_helper'
|
17
18
|
|
18
19
|
module TensorStream
|
@@ -49,6 +50,8 @@ module TensorStream
|
|
49
50
|
include TensorStream::OpenCLHelpers::NNOps
|
50
51
|
include TensorStream::OpenCLHelpers::ImagesOps
|
51
52
|
include TensorStream::OpenCLHelpers::ArrayOps
|
53
|
+
include TensorStream::OpenCLHelpers::RandomOps
|
54
|
+
include TensorStream::CLEventHelpers
|
52
55
|
|
53
56
|
def initialize(session, device, thread_pool: nil, log_intermediates: false)
|
54
57
|
super
|
@@ -159,6 +162,9 @@ module TensorStream
|
|
159
162
|
return [] if buffer.buffer.nil?
|
160
163
|
return buffer if buffer.buffer.size.zero?
|
161
164
|
|
165
|
+
# lazy allocate
|
166
|
+
buffer.buffer = OpenCLBuffer.allocate_narray_for_type(buffer.buffer.data_type, buffer.buffer.size) if buffer.buffer.is_a?(OpenCLBuffer::LazyBuffer)
|
167
|
+
|
162
168
|
buffer.op = _opencl_queue.enqueue_read_buffer(buffer.cl_buffer, buffer.buffer, event_wait_list: build_event_wait_list([buffer]))
|
163
169
|
buffer
|
164
170
|
end
|
@@ -167,12 +173,19 @@ module TensorStream
|
|
167
173
|
def complete_eval(tensor, context)
|
168
174
|
return nil if tensor.nil?
|
169
175
|
|
170
|
-
|
171
|
-
|
176
|
+
buffers = if tensor.is_a?(Array)
|
177
|
+
tensor.map { |t|
|
178
|
+
enqueue_buffer_read(t, context)
|
179
|
+
}
|
180
|
+
else
|
181
|
+
[enqueue_buffer_read(tensor, context)]
|
182
|
+
end
|
183
|
+
|
184
|
+
events = build_event_wait_list(buffers)
|
172
185
|
# puts "** wait #{tensor.name} **"
|
173
186
|
OpenCL.wait_for_events(events) unless events.empty?
|
174
187
|
# puts "** done #{tensor.name} **"
|
175
|
-
|
188
|
+
tensor.is_a?(Array) ? buffers : buffers.first
|
176
189
|
end
|
177
190
|
|
178
191
|
def self.query_devices_with_score
|
@@ -355,9 +368,13 @@ module TensorStream
|
|
355
368
|
|
356
369
|
register_op :identity do |_context, tensor, inputs|
|
357
370
|
value = inputs[0]
|
358
|
-
|
359
|
-
|
360
|
-
|
371
|
+
if value.is_a?(OutputGroup)
|
372
|
+
value
|
373
|
+
else
|
374
|
+
buffer = OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type, shape: value.shape, buffer: value.buffer, cl_buffer: value.cl_buffer)
|
375
|
+
buffer.op = build_event_wait_list(inputs)
|
376
|
+
buffer
|
377
|
+
end
|
361
378
|
end
|
362
379
|
|
363
380
|
register_op :assign, noop: true do |context, tensor, inputs|
|
@@ -773,9 +790,9 @@ module TensorStream
|
|
773
790
|
value
|
774
791
|
elsif data_type == :string && shape.empty?
|
775
792
|
cl_buffer_size = value[0].bytesize
|
776
|
-
allocate_narray_for_type(data_type, value[0].bytesize)
|
793
|
+
OpenCLBuffer.allocate_narray_for_type(data_type, value[0].bytesize)
|
777
794
|
else
|
778
|
-
|
795
|
+
OpenCLBuffer.allocate_narray_for_type(data_type, narray_size)
|
779
796
|
end
|
780
797
|
|
781
798
|
return nil if buffer.nil?
|
@@ -818,39 +835,17 @@ module TensorStream
|
|
818
835
|
cl_object
|
819
836
|
end
|
820
837
|
|
821
|
-
def
|
822
|
-
case data_type
|
823
|
-
when :float, :float32, :float16
|
824
|
-
NArray.sfloat(narray_size)
|
825
|
-
when :float64
|
826
|
-
NArray.float(narray_size)
|
827
|
-
when :int, :int32, :int64, :uint64, :uint32 # NArray does not have 64 bit int types
|
828
|
-
NArray.int(narray_size)
|
829
|
-
when :int16, :uint16
|
830
|
-
NArray.sint(narray_size)
|
831
|
-
when :uint8, :int8
|
832
|
-
NArray.byte(narray_size)
|
833
|
-
when :boolean
|
834
|
-
NArray.byte(narray_size)
|
835
|
-
when :string
|
836
|
-
NArray.byte(narray_size)
|
837
|
-
when :unknown
|
838
|
-
nil
|
839
|
-
else
|
840
|
-
raise "unsupported type #{data_type}"
|
841
|
-
end
|
842
|
-
end
|
843
|
-
|
844
|
-
def _create_result_buffer(data_type, shape, name)
|
838
|
+
def _create_result_buffer(data_type, shape, name, allocate_host: false)
|
845
839
|
return OpenCLBuffer.nil_buffer(self, name, data_type) if shape == [0]
|
846
840
|
|
847
841
|
cache_key = "_result_#{name}_#{shape.join('_')}:#{object_id}"
|
848
842
|
@context[:_cache][:_cl_buffers][cache_key] ||= begin
|
849
843
|
# puts "create result buffer #{cache_key}"
|
850
844
|
size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
|
851
|
-
|
852
|
-
cl_buffer = _opencl_context.create_buffer(
|
853
|
-
|
845
|
+
lazy_buffer = !allocate_host ? OpenCLBuffer::LazyBuffer.new(data_type, size) : OpenCLBuffer.allocate_narray_for_type(data_type, size)
|
846
|
+
cl_buffer = _opencl_context.create_buffer(size * lazy_buffer.element_size)
|
847
|
+
|
848
|
+
OpenCLBuffer.new(self, data_type: data_type, shape: shape, buffer: lazy_buffer, cl_buffer: cl_buffer, name: name)
|
854
849
|
end
|
855
850
|
end
|
856
851
|
|
@@ -859,7 +854,7 @@ module TensorStream
|
|
859
854
|
cache_key ="_sub_result_#{parent_buffer.object_id}_#{name}_#{index}:#{object_id}"
|
860
855
|
@context[:_cache][:_cl_buffers][cache_key] ||= begin
|
861
856
|
size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
|
862
|
-
buffer = allocate_narray_for_type(data_type, size)
|
857
|
+
buffer = OpenCLBuffer.allocate_narray_for_type(data_type, size)
|
863
858
|
|
864
859
|
if parent_buffer.cl_buffer.associated_memobject.nil?
|
865
860
|
start = index * buffer.size * buffer.element_size
|
@@ -890,7 +885,7 @@ module TensorStream
|
|
890
885
|
cache_key = "_sub_result_#{parent_buffer.object_id}_#{name}_#{index}:#{object_id}"
|
891
886
|
@context[:_cache][:_cl_buffers][cache_key] ||= begin
|
892
887
|
size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
|
893
|
-
buffer = allocate_narray_for_type(data_type, size)
|
888
|
+
buffer = OpenCLBuffer.allocate_narray_for_type(data_type, size)
|
894
889
|
|
895
890
|
if parent_buffer.cl_buffer.associated_memobject.nil?
|
896
891
|
region = OpenCL::BufferRegion::new(start, region_size_in_bytes)
|
@@ -980,14 +975,6 @@ module TensorStream
|
|
980
975
|
shape.is_a?(Array) ? shape.size : 0
|
981
976
|
end
|
982
977
|
|
983
|
-
def build_event_wait_list(inputs)
|
984
|
-
if inputs.is_a?(Array)
|
985
|
-
inputs.flatten.compact.map(&:op).compact.uniq
|
986
|
-
else
|
987
|
-
inputs.op ? [inputs.op] : []
|
988
|
-
end
|
989
|
-
end
|
990
|
-
|
991
978
|
def resolve_placeholder(placeholder, _execution_context = {})
|
992
979
|
return nil if placeholder.nil?
|
993
980
|
return placeholder unless placeholder.is_a?(Placeholder)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module TensorStream
|
2
|
+
module OpenCLHelpers
|
3
|
+
# Collection of math functions for interfacing with OpenCL kernels
|
4
|
+
module RandomOps
|
5
|
+
RAND_TABLE_SIZE = 1024 * 1024
|
6
|
+
|
7
|
+
def RandomOps.included(klass)
|
8
|
+
klass.class_eval do
|
9
|
+
register_op :random_uniform do |context, tensor, inputs|
|
10
|
+
maxval = tensor.options.fetch(:maxval, 1)
|
11
|
+
minval = tensor.options.fetch(:minval, 0)
|
12
|
+
seed = tensor.options[:seed]
|
13
|
+
|
14
|
+
rand_buffer = @context[:_cache][:_cl_buffers]["_rand"] ||= begin
|
15
|
+
@context[:_cache][:_cl_buffers]["_rand_seed_ptr"] = 0
|
16
|
+
random = _get_randomizer(tensor, seed)
|
17
|
+
rand_table = RAND_TABLE_SIZE.times.map { random.rand }
|
18
|
+
convert_to_opencl(rand_table, [RAND_TABLE_SIZE], data_type: tensor.data_type, name: "rand_#{tensor.data_type}")
|
19
|
+
end
|
20
|
+
@context[:_cache][:_cl_buffers]["_rand_seed_ptr"] ||= 0
|
21
|
+
|
22
|
+
seed_ptr = @context[:_cache][:_cl_buffers]["_rand_seed_ptr"]
|
23
|
+
|
24
|
+
shape = read_final_result(complete_eval(inputs[0], context))
|
25
|
+
shape = shape || tensor.shape.shape
|
26
|
+
workgroup = [shape.reduce(:*) || 1 ]
|
27
|
+
cl_seed_ptr = OpenCL::Int1.new(seed_ptr)
|
28
|
+
cl_min = OpenCL::Float1.new(minval)
|
29
|
+
cl_max = OpenCL::Float1.new(maxval)
|
30
|
+
|
31
|
+
@context[:_cache][:_cl_buffers]["_rand_seed_ptr"] = (seed_ptr + (shape.reduce(:*) || 0) ) % RAND_TABLE_SIZE
|
32
|
+
buffer = _create_result_buffer(tensor.data_type, shape, tensor.name)
|
33
|
+
buffer.op = _cl_program("random_uniform", dtype: tensor.data_type, tsize: RAND_TABLE_SIZE).send(:"random_uniform_#{tensor.data_type}", _opencl_queue, workgroup, cl_seed_ptr, cl_min, cl_max, rand_buffer.cl_buffer, buffer.cl_buffer)
|
34
|
+
buffer
|
35
|
+
end
|
36
|
+
|
37
|
+
def _get_randomizer(tensor, seed)
|
38
|
+
if tensor.graph.random_seed && seed
|
39
|
+
Random.new(tensor.graph.random_seed ^ seed)
|
40
|
+
elsif tensor.graph.random_seed
|
41
|
+
@session.randomizer[tensor.graph.object_id] ||= Random.new(tensor.graph.random_seed)
|
42
|
+
@session.randomizer[tensor.graph.object_id]
|
43
|
+
elsif seed
|
44
|
+
@session.randomizer[tensor.operation] ||= Random.new(seed)
|
45
|
+
@session.randomizer[tensor.operation]
|
46
|
+
else
|
47
|
+
Random.new
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module TensorStream
|
2
|
+
class OpenCLUtil
|
3
|
+
##
|
4
|
+
# initializes a OpenCL helper class based on a session
|
5
|
+
def initialize(session)
|
6
|
+
@session = session
|
7
|
+
end
|
8
|
+
|
9
|
+
##
|
10
|
+
# Retrieves OpenCL memory usage
|
11
|
+
def get_memory_usage
|
12
|
+
cl_buffer_uniq_set = Set.new
|
13
|
+
@session.last_session_context[:_cache][:_cl_buffers].inject(0) do |sum, elem|
|
14
|
+
cl_buffer_uniq_set.add?(elem[1].cl_buffer.object_id) ? sum + elem[1].cl_buffer.size : sum
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
module CLEventHelpers
|
19
|
+
def build_event_wait_list(inputs)
|
20
|
+
if inputs.is_a?(Array)
|
21
|
+
inputs.flatten.compact.map(&:op).compact.uniq
|
22
|
+
else
|
23
|
+
inputs.op ? [inputs.op] : []
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/samples/mnist_data_2.3.rb
CHANGED
@@ -53,11 +53,16 @@ b5 = tf.variable(tf.zeros([10]))
|
|
53
53
|
|
54
54
|
x_ = tf.reshape(x, [-1, 784])
|
55
55
|
|
56
|
+
|
56
57
|
y1 = tf.nn.relu(tf.matmul(x_, w1) + b1)
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
58
|
+
Y1d = tf.nn.dropout(y1, pkeep)
|
59
|
+
y2 = tf.nn.relu(tf.matmul(Y1d, w2) + b2)
|
60
|
+
Y2d = tf.nn.dropout(y2, pkeep)
|
61
|
+
y3 = tf.nn.relu(tf.matmul(Y2d, w3) + b3)
|
62
|
+
Y3d = tf.nn.dropout(y3, pkeep)
|
63
|
+
y4 = tf.nn.relu(tf.matmul(Y3d, w4) + b4)
|
64
|
+
Y4d = tf.nn.dropout(y4, pkeep)
|
65
|
+
ylogits = tf.matmul(Y4d, w5) + b5
|
61
66
|
|
62
67
|
# model
|
63
68
|
y = tf.nn.softmax(ylogits)
|
data/samples/mnist_data_3.0.rb
CHANGED
@@ -85,8 +85,8 @@ y3 = tf.nn.relu(tf.nn.conv2d(y2, w3, [1, stride, stride, 1], 'SAME') + b3)
|
|
85
85
|
# reshape the output from the third convolution for the fully connected layer
|
86
86
|
yy = tf.reshape(y3, [-1, 7 * 7 * M])
|
87
87
|
y4 = tf.nn.relu(tf.matmul(yy, w4) + b4)
|
88
|
-
|
89
|
-
ylogits = tf.matmul(
|
88
|
+
YY4 = tf.nn.dropout(y4, pkeep)
|
89
|
+
ylogits = tf.matmul(YY4, w5) + b5
|
90
90
|
|
91
91
|
# model
|
92
92
|
y = tf.nn.softmax(ylogits, name: 'out')
|
@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
|
|
39
39
|
spec.add_development_dependency "awesome_print"
|
40
40
|
spec.add_development_dependency "mnist-learn"
|
41
41
|
spec.add_development_dependency "simplecov"
|
42
|
-
spec.add_dependency "tensor_stream", "1.0.
|
42
|
+
spec.add_dependency "tensor_stream", "1.0.7"
|
43
43
|
spec.add_dependency "opencl_ruby_ffi"
|
44
44
|
spec.add_dependency "oily_png"
|
45
45
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tensor_stream-opencl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Dayo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -114,14 +114,14 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - '='
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: 1.0.
|
117
|
+
version: 1.0.7
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - '='
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: 1.0.
|
124
|
+
version: 1.0.7
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: opencl_ruby_ffi
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -167,6 +167,7 @@ files:
|
|
167
167
|
- README.md
|
168
168
|
- Rakefile
|
169
169
|
- benchmark/benchmark.rb
|
170
|
+
- benchmark_imac2015_iris.txt
|
170
171
|
- benchmark_intel.txt
|
171
172
|
- benchmark_ryzen.txt
|
172
173
|
- benchmark_ryzen_nvidia.txt
|
@@ -219,6 +220,7 @@ files:
|
|
219
220
|
- lib/tensor_stream/opencl/kernels/pack.cl
|
220
221
|
- lib/tensor_stream/opencl/kernels/pow.cl
|
221
222
|
- lib/tensor_stream/opencl/kernels/prod.cl
|
223
|
+
- lib/tensor_stream/opencl/kernels/random_uniform.cl
|
222
224
|
- lib/tensor_stream/opencl/kernels/real_div.cl
|
223
225
|
- lib/tensor_stream/opencl/kernels/reciprocal.cl
|
224
226
|
- lib/tensor_stream/opencl/kernels/reduce_axis.cl
|
@@ -250,6 +252,8 @@ files:
|
|
250
252
|
- lib/tensor_stream/opencl/opencl_device.rb
|
251
253
|
- lib/tensor_stream/opencl/opencl_evaluator.rb
|
252
254
|
- lib/tensor_stream/opencl/opencl_template_helper.rb
|
255
|
+
- lib/tensor_stream/opencl/random_ops.rb
|
256
|
+
- lib/tensor_stream/opencl/utils.rb
|
253
257
|
- lib/tensor_stream/opencl/version.rb
|
254
258
|
- samples/build_mnist_model.rb
|
255
259
|
- samples/classify.rb
|