tensor_stream-opencl 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/benchmark/benchmark.rb +63 -37
- data/benchmark_imac2015_iris.txt +80 -0
- data/lib/tensor_stream/opencl.rb +1 -0
- data/lib/tensor_stream/opencl/array_ops.rb +37 -7
- data/lib/tensor_stream/opencl/images_ops.rb +1 -1
- data/lib/tensor_stream/opencl/kernels/gemm.cl +5 -4
- data/lib/tensor_stream/opencl/kernels/random_uniform.cl +7 -0
- data/lib/tensor_stream/opencl/math_ops.rb +13 -11
- data/lib/tensor_stream/opencl/opencl_buffer.rb +85 -3
- data/lib/tensor_stream/opencl/opencl_evaluator.rb +32 -45
- data/lib/tensor_stream/opencl/random_ops.rb +54 -0
- data/lib/tensor_stream/opencl/utils.rb +27 -0
- data/lib/tensor_stream/opencl/version.rb +1 -1
- data/samples/mnist_data_2.3.rb +9 -4
- data/samples/mnist_data_3.0.rb +2 -2
- data/tensor_stream-opencl.gemspec +1 -1
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b433e9e7ab38a517c21b57065e5a43b112640fd7c419fb7baa2f3319128cdacf
|
4
|
+
data.tar.gz: fab7d48513cb0f8481e151d18b088782918cb1539b59586613a00c4d5f5aeed2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04d106f5ee5fac49eba20ff143bb2212a1cafd5140fc04cee20958ffea0c5909d352824948badf16ec5bc8ca2a7b13b4dcf7748eb03cbd6dc8a466c6ae0f5040
|
7
|
+
data.tar.gz: e17171f28641ce3496c0b338b6913c96e10d9fd5ce93b7980dae6edef00e63e5f7c4dcb60ed04fed5271a474b4940d069ebcf6a00bbfd3c4e6eafa2c0c4f26ed
|
data/benchmark/benchmark.rb
CHANGED
@@ -4,6 +4,25 @@ require 'benchmark'
|
|
4
4
|
require 'pry-byebug'
|
5
5
|
require 'awesome_print'
|
6
6
|
require 'tensor_stream/opencl'
|
7
|
+
require 'rbconfig'
|
8
|
+
|
9
|
+
def os
|
10
|
+
@os ||= (
|
11
|
+
host_os = RbConfig::CONFIG['host_os']
|
12
|
+
case host_os
|
13
|
+
when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
|
14
|
+
:windows
|
15
|
+
when /darwin|mac os/
|
16
|
+
:macosx
|
17
|
+
when /linux/
|
18
|
+
:linux
|
19
|
+
when /solaris|bsd/
|
20
|
+
:unix
|
21
|
+
else
|
22
|
+
raise Error::WebDriverError, "unknown os: #{host_os.inspect}"
|
23
|
+
end
|
24
|
+
)
|
25
|
+
end
|
7
26
|
|
8
27
|
def tr(t, places = 1)
|
9
28
|
if t.is_a?(Array)
|
@@ -77,49 +96,56 @@ conv2d_grad = tf.gradients(conv2d, [sample_image, sample_filter])
|
|
77
96
|
|
78
97
|
bias_add = tf.nn.bias_add(large_tensor, large_tensor_bias)
|
79
98
|
bias_add_grad = tf.gradients(bias_add, [large_tensor_bias])
|
99
|
+
dropout = tf.nn.dropout(large_tensor, 0.8)
|
80
100
|
|
81
101
|
puts TensorStream::Evaluator.default_evaluators
|
82
102
|
|
83
103
|
sess2 = tf.session
|
84
104
|
|
85
|
-
|
105
|
+
if os == :macosx
|
106
|
+
puts `sysctl -n machdep.cpu.brand_string`
|
107
|
+
else
|
108
|
+
puts `cat /proc/cpuinfo | grep "model name" | head -1`
|
109
|
+
end
|
86
110
|
device = TensorStream::Evaluator::OpenclEvaluator.default_device.native_device
|
87
111
|
puts "OpenCL device #{device.platform.to_s} #{device.name}"
|
88
112
|
Benchmark.bmbm do |x|
|
89
|
-
x.report("
|
90
|
-
x.report("opencl argmin
|
91
|
-
x.report("
|
92
|
-
x.report("opencl bias_add_grad
|
93
|
-
x.report("
|
94
|
-
x.report("opencl bias_add
|
95
|
-
x.report("
|
96
|
-
x.report("opencl conv2d_backprop
|
97
|
-
x.report("
|
98
|
-
x.report("opencl conv2d
|
99
|
-
x.report("
|
100
|
-
x.report("opencl arr index
|
101
|
-
x.report("
|
102
|
-
x.report("opencl min
|
103
|
-
x.report("
|
104
|
-
x.report("opencl sum
|
105
|
-
x.report("
|
106
|
-
x.report("opencl sum axis 1
|
107
|
-
x.report("
|
108
|
-
x.report("opencl split
|
109
|
-
x.report("
|
110
|
-
x.report("opencl add_n
|
111
|
-
x.report("
|
112
|
-
x.report("opencl ooo matmul
|
113
|
-
x.report("
|
114
|
-
x.report("opencl softmax
|
115
|
-
x.report("
|
116
|
-
x.report("opencl matmul
|
117
|
-
x.report("
|
118
|
-
x.report("opencl
|
119
|
-
x.report("
|
120
|
-
x.report("opencl
|
121
|
-
x.report("
|
122
|
-
x.report("opencl pow float:") { 100.times do sess2.run(pow_f, feed_dict: { p => rand, q => rand }) end }
|
123
|
-
x.report("
|
124
|
-
x.report("opencl pow int:") { 100.times do sess2.run(pow_i, feed_dict: { p => rand, q => rand }) end }
|
113
|
+
x.report("ruby argmin :") { 100.times do sess.run(argmin) end }
|
114
|
+
x.report("opencl argmin :") { 100.times do sess2.run(argmin) end }
|
115
|
+
x.report("ruby bias_add_grad :") { 100.times do sess.run(bias_add_grad) end }
|
116
|
+
x.report("opencl bias_add_grad :") { 100.times do sess2.run(bias_add_grad) end }
|
117
|
+
x.report("ruby bias_add :") { 100.times do sess.run(bias_add) end }
|
118
|
+
x.report("opencl bias_add :") { 100.times do sess2.run(bias_add) end }
|
119
|
+
x.report("ruby conv2d_backprop :") { 100.times do sess.run(conv2d_grad) end }
|
120
|
+
x.report("opencl conv2d_backprop :") { 100.times do sess2.run(conv2d_grad) end }
|
121
|
+
x.report("ruby conv2d :") { 100.times do sess.run(conv2d) end }
|
122
|
+
x.report("opencl conv2d :") { 100.times do sess2.run(conv2d) end }
|
123
|
+
x.report("ruby arr index :") { 100.times do sess.run(index) end }
|
124
|
+
x.report("opencl arr index :") { 100.times do sess2.run(index) end }
|
125
|
+
x.report("ruby min :") { 100.times do sess.run(min) end }
|
126
|
+
x.report("opencl min :") { 100.times do sess2.run(min) end }
|
127
|
+
x.report("ruby sum :") { 100.times do sess.run(sum) end }
|
128
|
+
x.report("opencl sum :") { 100.times do sess2.run(sum) end }
|
129
|
+
x.report("ruby sum axis 1 :") { 100.times do sess.run(sum_axis_1) end }
|
130
|
+
x.report("opencl sum axis 1 :") { 100.times do sess2.run(sum_axis_1) end }
|
131
|
+
x.report("ruby split :") { 100.times do sess.run(split) end }
|
132
|
+
x.report("opencl split :") { 100.times do sess2.run(split) end }
|
133
|
+
x.report("ruby add_n :") { 100.times do sess.run(add_n) end }
|
134
|
+
x.report("opencl add_n :") { 100.times do sess2.run(add_n) end }
|
135
|
+
x.report("ruby ooo matmul :") { 100.times do sess.run(out_of_order) end }
|
136
|
+
x.report("opencl ooo matmul :") { 100.times do sess2.run(out_of_order) end }
|
137
|
+
x.report("ruby softmax :") { 100.times do sess.run(softmax) end }
|
138
|
+
x.report("opencl softmax :") { 100.times do sess2.run(softmax) end }
|
139
|
+
x.report("ruby matmul :") { 100.times do sess.run(matmul) end }
|
140
|
+
x.report("opencl matmul :") { 100.times do sess2.run(matmul) end }
|
141
|
+
x.report("ruby :") { 100.times do sess.run(model, feed_dict: { p => rand, q => rand }) end }
|
142
|
+
x.report("opencl :") { 100.times do sess2.run(model, feed_dict: { p => rand, q => rand }) end }
|
143
|
+
x.report("ruby single function :") { 100.times do sess.run(single_function_test, feed_dict: { p => rand, q => rand }) end }
|
144
|
+
x.report("opencl single function :") { 100.times do sess2.run(single_function_test, feed_dict: { p => rand, q => rand }) end }
|
145
|
+
x.report("ruby pow float :") { 100.times do sess.run(pow_f, feed_dict: { p => rand, q => rand }) end }
|
146
|
+
x.report("opencl pow float :") { 100.times do sess2.run(pow_f, feed_dict: { p => rand, q => rand }) end }
|
147
|
+
x.report("ruby pow int :") { 100.times do sess.run(pow_i, feed_dict: { p => rand, q => rand }) end }
|
148
|
+
x.report("opencl pow int :") { 100.times do sess2.run(pow_i, feed_dict: { p => rand, q => rand }) end }
|
149
|
+
x.report("ruby dropout :") { 100.times do sess.run(dropout) end }
|
150
|
+
x.report("opencl dropout :") { 100.times do sess2.run(dropout) end }
|
125
151
|
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
TensorStream::Evaluator::OpenclEvaluator
|
2
|
+
TensorStream::Evaluator::RubyEvaluator
|
3
|
+
Intel(R) Core(TM) i5-5575R CPU @ 2.80GHz
|
4
|
+
OpenCL device Apple Intel(R) Iris(TM) Pro Graphics 6200
|
5
|
+
Rehearsal ------------------------------------------------------------
|
6
|
+
ruby argmin : 0.940000 0.030000 0.970000 ( 1.197240)
|
7
|
+
opencl argmin : 0.070000 0.020000 0.090000 ( 0.093035)
|
8
|
+
ruby bias_add_grad : 2.390000 0.060000 2.450000 ( 2.558622)
|
9
|
+
opencl bias_add_grad : 0.020000 0.010000 0.030000 ( 0.030563)
|
10
|
+
ruby bias_add : 2.530000 0.070000 2.600000 ( 2.749423)
|
11
|
+
opencl bias_add : 0.150000 0.030000 0.180000 ( 0.191476)
|
12
|
+
ruby conv2d_backprop : 4.020000 0.060000 4.080000 ( 5.306408)
|
13
|
+
opencl conv2d_backprop : 0.040000 0.030000 0.070000 ( 0.077737)
|
14
|
+
ruby conv2d : 0.890000 0.010000 0.900000 ( 0.963062)
|
15
|
+
opencl conv2d : 0.030000 0.010000 0.040000 ( 0.042274)
|
16
|
+
ruby arr index : 0.000000 0.000000 0.000000 ( 0.004072)
|
17
|
+
opencl arr index : 0.010000 0.010000 0.020000 ( 0.023981)
|
18
|
+
ruby min : 3.710000 0.040000 3.750000 ( 4.329215)
|
19
|
+
opencl min : 0.160000 0.030000 0.190000 ( 0.191062)
|
20
|
+
ruby sum : 6.930000 0.080000 7.010000 ( 7.467194)
|
21
|
+
opencl sum : 0.010000 0.010000 0.020000 ( 0.034392)
|
22
|
+
ruby sum axis 1 : 6.920000 0.070000 6.990000 ( 7.412997)
|
23
|
+
opencl sum axis 1 : 0.020000 0.020000 0.040000 ( 0.027614)
|
24
|
+
ruby split : 0.020000 0.000000 0.020000 ( 0.022597)
|
25
|
+
opencl split : 0.060000 0.040000 0.100000 ( 0.099309)
|
26
|
+
ruby add_n : 0.150000 0.000000 0.150000 ( 0.162702)
|
27
|
+
opencl add_n : 0.020000 0.020000 0.040000 ( 0.033757)
|
28
|
+
ruby ooo matmul : 1.670000 0.010000 1.680000 ( 1.738712)
|
29
|
+
opencl ooo matmul : 0.020000 0.010000 0.030000 ( 0.029647)
|
30
|
+
ruby softmax : 0.030000 0.000000 0.030000 ( 0.033050)
|
31
|
+
opencl softmax : 0.020000 0.010000 0.030000 ( 0.030572)
|
32
|
+
ruby matmul : 0.820000 0.010000 0.830000 ( 0.851559)
|
33
|
+
opencl matmul : 0.010000 0.010000 0.020000 ( 0.026167)
|
34
|
+
ruby : 2.860000 0.020000 2.880000 ( 3.033034)
|
35
|
+
opencl : 0.220000 0.070000 0.290000 ( 0.240857)
|
36
|
+
ruby single function : 0.380000 0.000000 0.380000 ( 0.398911)
|
37
|
+
opencl single function : 0.150000 0.050000 0.200000 ( 0.162006)
|
38
|
+
ruby pow float : 0.090000 0.000000 0.090000 ( 0.098400)
|
39
|
+
opencl pow float : 0.020000 0.020000 0.040000 ( 0.033370)
|
40
|
+
ruby pow int : 0.020000 0.000000 0.020000 ( 0.023459)
|
41
|
+
opencl pow int : 0.020000 0.010000 0.030000 ( 0.030894)
|
42
|
+
-------------------------------------------------- total: 36.290000sec
|
43
|
+
|
44
|
+
user system total real
|
45
|
+
ruby argmin : 0.880000 0.010000 0.890000 ( 0.933367)
|
46
|
+
opencl argmin : 0.010000 0.010000 0.020000 ( 0.023140)
|
47
|
+
ruby bias_add_grad : 2.350000 0.050000 2.400000 ( 2.539349)
|
48
|
+
opencl bias_add_grad : 0.010000 0.010000 0.020000 ( 0.024700)
|
49
|
+
ruby bias_add : 2.510000 0.060000 2.570000 ( 2.667330)
|
50
|
+
opencl bias_add : 0.150000 0.020000 0.170000 ( 0.184056)
|
51
|
+
ruby conv2d_backprop : 3.910000 0.040000 3.950000 ( 4.320383)
|
52
|
+
opencl conv2d_backprop : 0.030000 0.020000 0.050000 ( 0.058036)
|
53
|
+
ruby conv2d : 0.910000 0.020000 0.930000 ( 1.120605)
|
54
|
+
opencl conv2d : 0.020000 0.010000 0.030000 ( 0.034972)
|
55
|
+
ruby arr index : 0.000000 0.000000 0.000000 ( 0.004119)
|
56
|
+
opencl arr index : 0.020000 0.010000 0.030000 ( 0.024126)
|
57
|
+
ruby min : 3.670000 0.030000 3.700000 ( 4.024439)
|
58
|
+
opencl min : 0.140000 0.030000 0.170000 ( 0.178683)
|
59
|
+
ruby sum : 6.920000 0.050000 6.970000 ( 7.314338)
|
60
|
+
opencl sum : 0.010000 0.020000 0.030000 ( 0.024655)
|
61
|
+
ruby sum axis 1 : 6.900000 0.050000 6.950000 ( 7.332897)
|
62
|
+
opencl sum axis 1 : 0.020000 0.020000 0.040000 ( 0.026150)
|
63
|
+
ruby split : 0.010000 0.000000 0.010000 ( 0.018866)
|
64
|
+
opencl split : 0.050000 0.040000 0.090000 ( 0.096327)
|
65
|
+
ruby add_n : 0.140000 0.000000 0.140000 ( 0.151006)
|
66
|
+
opencl add_n : 0.020000 0.010000 0.030000 ( 0.025622)
|
67
|
+
ruby ooo matmul : 1.670000 0.010000 1.680000 ( 1.732486)
|
68
|
+
opencl ooo matmul : 0.020000 0.020000 0.040000 ( 0.027051)
|
69
|
+
ruby softmax : 0.030000 0.000000 0.030000 ( 0.032848)
|
70
|
+
opencl softmax : 0.010000 0.010000 0.020000 ( 0.026403)
|
71
|
+
ruby matmul : 0.810000 0.000000 0.810000 ( 0.866297)
|
72
|
+
opencl matmul : 0.020000 0.020000 0.040000 ( 0.026677)
|
73
|
+
ruby : 2.870000 0.020000 2.890000 ( 3.237224)
|
74
|
+
opencl : 0.240000 0.080000 0.320000 ( 0.302463)
|
75
|
+
ruby single function : 0.390000 0.010000 0.400000 ( 0.470700)
|
76
|
+
opencl single function : 0.150000 0.060000 0.210000 ( 0.228528)
|
77
|
+
ruby pow float : 0.090000 0.000000 0.090000 ( 0.113073)
|
78
|
+
opencl pow float : 0.020000 0.010000 0.030000 ( 0.036938)
|
79
|
+
ruby pow int : 0.020000 0.000000 0.020000 ( 0.023728)
|
80
|
+
opencl pow int : 0.020000 0.020000 0.040000 ( 0.031909)
|
data/lib/tensor_stream/opencl.rb
CHANGED
@@ -10,13 +10,13 @@ module TensorStream
|
|
10
10
|
shape = if %i[zeros_like ones_like].include?(tensor.operation)
|
11
11
|
inputs[0].shape
|
12
12
|
elsif !inputs[0].nil?
|
13
|
-
|
13
|
+
complete_eval(inputs[0], context).buffer.to_a
|
14
14
|
else
|
15
15
|
tensor.shape.shape
|
16
16
|
end
|
17
17
|
cache_key = "cons_#{tensor.name}_#{tensor.data_type}_#{shape}"
|
18
18
|
@context[:_cache][:_cl_buffers][cache_key] ||= begin
|
19
|
-
buffer = allocate_narray_for_type(tensor.data_type, shape.reduce(:*) || 1)
|
19
|
+
buffer = OpenCLBuffer.allocate_narray_for_type(tensor.data_type, shape.reduce(:*) || 1)
|
20
20
|
if %i[zeros zeros_like].include?(tensor.operation)
|
21
21
|
buffer.fill!(0)
|
22
22
|
else
|
@@ -47,7 +47,7 @@ module TensorStream
|
|
47
47
|
buffer = if cl_buffer
|
48
48
|
cl_buffer.buffer
|
49
49
|
else
|
50
|
-
allocate_narray_for_type(tensor.data_type, narray_size)
|
50
|
+
OpenCLBuffer.allocate_narray_for_type(tensor.data_type, narray_size)
|
51
51
|
end
|
52
52
|
|
53
53
|
buffer.fill!(value.buffer[0])
|
@@ -365,8 +365,8 @@ module TensorStream
|
|
365
365
|
end
|
366
366
|
|
367
367
|
register_op :reshape do |context, tensor, inputs|
|
368
|
-
arr = inputs
|
369
|
-
new_shape =
|
368
|
+
arr, new_shape = inputs
|
369
|
+
new_shape = complete_eval(new_shape, context).buffer.to_a
|
370
370
|
|
371
371
|
shape = if new_shape.size.zero? && arr.buffer.size == 1
|
372
372
|
new_shape
|
@@ -389,9 +389,9 @@ module TensorStream
|
|
389
389
|
res
|
390
390
|
else
|
391
391
|
rank = inputs[0].shape.size
|
392
|
-
perm = inputs[1].nil? ? (0...rank).to_a.reverse : inputs[1].buffer
|
392
|
+
perm = inputs[1].nil? ? (0...rank).to_a.reverse : inputs[1].buffer!
|
393
393
|
new_shape = perm.map { |p| inputs[0].shape[p] }.to_a
|
394
|
-
output_buffer = _create_result_buffer(tensor.data_type, new_shape, tensor.name)
|
394
|
+
output_buffer = _create_result_buffer(tensor.data_type, new_shape, tensor.name, allocate_host: true)
|
395
395
|
transpose_with_perm(inputs[0].buffer, output_buffer.buffer, inputs[0].shape, new_shape, perm)
|
396
396
|
|
397
397
|
write_op = _opencl_queue.enqueue_write_buffer(output_buffer.cl_buffer, output_buffer.buffer)
|
@@ -442,6 +442,36 @@ module TensorStream
|
|
442
442
|
a
|
443
443
|
end
|
444
444
|
end
|
445
|
+
|
446
|
+
register_op :range do |context, tensor, inputs|
|
447
|
+
start, limit, delta = complete_eval(inputs, context).map { |p| p.buffer.to_a.first }
|
448
|
+
|
449
|
+
if limit.zero?
|
450
|
+
limit = start
|
451
|
+
start = 0
|
452
|
+
end
|
453
|
+
|
454
|
+
raise " delta !=0 " if delta.zero?
|
455
|
+
raise " Requires start <= limit when delta > 0" if (start > limit) && delta > 0
|
456
|
+
raise " Requires start >= limit when delta < 0" if (start < limit) && delta < 0
|
457
|
+
cache_key = "range_#{start}_#{limit}_#{delta}_#{tensor.data_type}"
|
458
|
+
|
459
|
+
@context[:_cache][:_cl_buffers][cache_key] ||= begin
|
460
|
+
delta = fp_type?(tensor.options[:output_type]) ? delta.to_f : delta.to_i
|
461
|
+
cur_step = fp_type?(tensor.options[:output_type]) ? start.to_f : start.to_i
|
462
|
+
r = []
|
463
|
+
Kernel.loop do
|
464
|
+
break if start == limit
|
465
|
+
break if (start < limit) && (cur_step >= limit)
|
466
|
+
break if (start > limit) && (cur_step <= limit)
|
467
|
+
|
468
|
+
r << cur_step
|
469
|
+
cur_step += delta
|
470
|
+
end
|
471
|
+
r
|
472
|
+
convert_to_opencl(r, [r.size], data_type: tensor.options[:output_type], name: tensor.name)
|
473
|
+
end
|
474
|
+
end
|
445
475
|
end
|
446
476
|
end
|
447
477
|
end
|
@@ -24,7 +24,7 @@ module TensorStream
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
output_buffer = _create_result_buffer(tensor.data_type, [image.height, image.width, channels], "out_#{tensor.name}")
|
27
|
+
output_buffer = _create_result_buffer(tensor.data_type, [image.height, image.width, channels], "out_#{tensor.name}", allocate_host: true)
|
28
28
|
|
29
29
|
image.grayscale! if channels == 1
|
30
30
|
image.pixels.each_with_index do |pixel, index|
|
@@ -6,8 +6,9 @@ __kernel void gemm_<%= dtype %>(const int M, const int N, const int K,
|
|
6
6
|
__global <%= c_dtype %>* C) {
|
7
7
|
|
8
8
|
// Get the index of the current element to be processed
|
9
|
-
const int
|
10
|
-
const int
|
9
|
+
const int index = get_global_id(0);
|
10
|
+
const int globalRow = get_global_id(1); // Row ID of C (0..M)
|
11
|
+
const int globalCol = get_global_id(2); // Col ID of C (0..N)
|
11
12
|
|
12
13
|
// Compute a single element (loop over K)
|
13
14
|
<%= c_dtype %> acc = 0.0f;
|
@@ -16,9 +17,9 @@ __kernel void gemm_<%= dtype %>(const int M, const int N, const int K,
|
|
16
17
|
int b_index = k*N + globalCol;
|
17
18
|
<% if ta %>a_index = M*k + globalRow;<% end %>
|
18
19
|
<% if tb %>b_index = globalCol*K + k;<% end %>
|
19
|
-
acc += A[a_index] * B[b_index];
|
20
|
+
acc += A[a_index + index * <%= n_a %>] * B[b_index + index * <%= n_b %>];
|
20
21
|
}
|
21
22
|
|
22
23
|
// Store the result
|
23
|
-
C[globalRow*N + globalCol] = acc;
|
24
|
+
C[index * <%= n %> + globalRow*N + globalCol] = acc;
|
24
25
|
}
|
@@ -0,0 +1,7 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
__kernel void random_uniform_<%= dtype %>(const int seed_ptr, const float min, const float max, __global const <%= c_dtype %> *rand_table, __global <%= c_dtype %> *C) {
|
3
|
+
// Get the index of the current element to be processed
|
4
|
+
const int id = get_global_id(0);
|
5
|
+
<%= c_dtype %> rand_value = rand_table[ (seed_ptr + id) % <%= tsize %>];
|
6
|
+
C[id] = rand_value * (max - min) + min;
|
7
|
+
}
|
@@ -54,26 +54,28 @@ module TensorStream
|
|
54
54
|
register_op :mat_mul do |_context, tensor, inputs|
|
55
55
|
a, b = inputs
|
56
56
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
k =
|
57
|
+
a_matrix_shape = a.shape.dup
|
58
|
+
b_matrix_shape = b.shape.dup
|
59
|
+
|
60
|
+
k = a_matrix_shape.pop
|
61
|
+
m = a_matrix_shape.pop
|
62
|
+
n = b_matrix_shape.pop
|
63
|
+
v = b_matrix_shape.pop
|
61
64
|
|
62
65
|
if tensor.options[:transpose_a]
|
63
|
-
m =
|
64
|
-
k = a.shape[0]
|
66
|
+
m, k = k, m
|
65
67
|
end
|
66
68
|
|
67
69
|
if tensor.options[:transpose_b]
|
68
|
-
n =
|
69
|
-
v = b.shape[1]
|
70
|
+
n, v = v, n
|
70
71
|
end
|
71
72
|
|
72
|
-
result_shape = [m, n]
|
73
|
+
result_shape = [a_matrix_shape.first, m, n].compact
|
74
|
+
work_group = [a_matrix_shape.first || 1, m, n]
|
73
75
|
|
74
76
|
raise "#{tensor.inputs[0].name} rank must be greater than 1" if a.shape.size < 2
|
75
77
|
raise "#{tensor.inputs[1].name} rank must be greater than 1" if b.shape.size < 2
|
76
|
-
raise "#{tensor.inputs[0].name} unsupported rank" if b.shape.size
|
78
|
+
raise "#{tensor.inputs[0].name} unsupported rank" if b.shape.size > 3 || a.shape.size > 3
|
77
79
|
raise "incompatible shape sizes for matrix multiplication (#{a.shape[1]} != #{b.shape[0]}) #{a.shape} vs #{b.shape}" if k != v
|
78
80
|
|
79
81
|
dtype = tensor.data_type
|
@@ -85,7 +87,7 @@ module TensorStream
|
|
85
87
|
cl_k = OpenCL::Int1.new(k)
|
86
88
|
|
87
89
|
event_wait_list = build_event_wait_list([a, b])
|
88
|
-
output_buffer.op = _cl_program('gemm', ta: !!tensor.options[:transpose_a], tb: !!tensor.options[:transpose_b], dtype: dtype).send(:"gemm_#{dtype}", _opencl_queue,
|
90
|
+
output_buffer.op = _cl_program('gemm', ta: !!tensor.options[:transpose_a], tb: !!tensor.options[:transpose_b], n: m * n, n_a: m * k, n_b: n * v, dtype: dtype).send(:"gemm_#{dtype}", _opencl_queue, work_group, cl_m, cl_n, cl_k, a.cl_buffer, b.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
|
89
91
|
|
90
92
|
output_buffer
|
91
93
|
end
|
@@ -1,7 +1,48 @@
|
|
1
1
|
module TensorStream
|
2
2
|
# Buffer used by the OpenCL evaluator
|
3
3
|
class OpenCLBuffer < Buffer
|
4
|
+
class LazyBuffer
|
5
|
+
attr_reader :data_type
|
6
|
+
|
7
|
+
def initialize(data_type, size)
|
8
|
+
@data_type = data_type
|
9
|
+
@size = size
|
10
|
+
end
|
11
|
+
|
12
|
+
def size
|
13
|
+
@size
|
14
|
+
end
|
15
|
+
|
16
|
+
def element_size
|
17
|
+
buffer_size_for_type(@data_type)
|
18
|
+
end
|
19
|
+
|
20
|
+
def buffer_size_for_type(data_type)
|
21
|
+
case data_type
|
22
|
+
when :float, :float32, :float16
|
23
|
+
4
|
24
|
+
when :float64
|
25
|
+
8
|
26
|
+
when :int, :int32, :int64, :uint64, :uint32 # NArray does not have 64 bit int types
|
27
|
+
4
|
28
|
+
when :int16, :uint16
|
29
|
+
2
|
30
|
+
when :uint8, :int8
|
31
|
+
1
|
32
|
+
when :boolean
|
33
|
+
1
|
34
|
+
when :string
|
35
|
+
1
|
36
|
+
when :unknown
|
37
|
+
nil
|
38
|
+
else
|
39
|
+
raise "unsupported type #{data_type}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
4
44
|
include ArrayOpsHelper
|
45
|
+
include TensorStream::CLEventHelpers
|
5
46
|
|
6
47
|
attr_accessor :shape, :buffer, :cl_buffer, :op, :owner
|
7
48
|
|
@@ -24,15 +65,33 @@ module TensorStream
|
|
24
65
|
end
|
25
66
|
|
26
67
|
def inspect
|
27
|
-
"CLBuffer(shape: #{shape || "?"} data_type: #{data_type}, cl_allocated: #{cl_buffer ? cl_buffer.size : 'unallocated'}) -> raw: #{buffer.to_a}"
|
68
|
+
"CLBuffer(name: #{name} shape: #{shape || "?"} data_type: #{data_type}, cl_allocated: #{cl_buffer ? cl_buffer.size : 'unallocated'}) -> raw: #{buffer.to_a}"
|
69
|
+
end
|
70
|
+
|
71
|
+
def buffer!
|
72
|
+
return buffer if buffer.is_a?(NArray)
|
73
|
+
|
74
|
+
@buffer = OpenCLBuffer.allocate_narray_for_type(buffer.data_type, buffer.size) if buffer.is_a?(LazyBuffer)
|
75
|
+
|
76
|
+
command_queue.enqueue_read_buffer(cl_buffer, @buffer, blocking: true, event_wait_list: build_event_wait_list([self]))
|
77
|
+
@buffer
|
78
|
+
end
|
79
|
+
|
80
|
+
def command_queue
|
81
|
+
@command_queue ||= begin
|
82
|
+
first_op = op.is_a?(Array) ? op.first : op
|
83
|
+
first_op.command_queue
|
84
|
+
end
|
28
85
|
end
|
29
86
|
|
30
87
|
def to_ruby
|
88
|
+
buffer! if buffer.is_a?(LazyBuffer)
|
89
|
+
|
31
90
|
return [] if buffer.empty?
|
32
91
|
|
33
92
|
if dirty
|
34
|
-
|
35
|
-
|
93
|
+
command_queue.enqueue_read_buffer(cl_buffer, buffer, event_wait_list: [op].compact)
|
94
|
+
command_queue.finish
|
36
95
|
self.dirty = false
|
37
96
|
end
|
38
97
|
|
@@ -54,5 +113,28 @@ module TensorStream
|
|
54
113
|
def self.nil_buffer(owner, name, data_type)
|
55
114
|
OpenCLBuffer.new(owner, name: name, data_type: data_type, shape: [0], buffer: nil, cl_buffer: nil)
|
56
115
|
end
|
116
|
+
|
117
|
+
def self.allocate_narray_for_type(data_type, narray_size)
|
118
|
+
case data_type
|
119
|
+
when :float, :float32, :float16
|
120
|
+
NArray.sfloat(narray_size)
|
121
|
+
when :float64
|
122
|
+
NArray.float(narray_size)
|
123
|
+
when :int, :int32, :int64, :uint64, :uint32 # NArray does not have 64 bit int types
|
124
|
+
NArray.int(narray_size)
|
125
|
+
when :int16, :uint16
|
126
|
+
NArray.sint(narray_size)
|
127
|
+
when :uint8, :int8
|
128
|
+
NArray.byte(narray_size)
|
129
|
+
when :boolean
|
130
|
+
NArray.byte(narray_size)
|
131
|
+
when :string
|
132
|
+
NArray.byte(narray_size)
|
133
|
+
when :unknown
|
134
|
+
nil
|
135
|
+
else
|
136
|
+
raise "unsupported type #{data_type}"
|
137
|
+
end
|
138
|
+
end
|
57
139
|
end
|
58
140
|
end
|
@@ -13,6 +13,7 @@ require 'tensor_stream/opencl/math_ops'
|
|
13
13
|
require 'tensor_stream/opencl/nn_ops'
|
14
14
|
require 'tensor_stream/opencl/images_ops'
|
15
15
|
require 'tensor_stream/opencl/array_ops'
|
16
|
+
require 'tensor_stream/opencl/random_ops'
|
16
17
|
require 'tensor_stream/helpers/op_helper'
|
17
18
|
|
18
19
|
module TensorStream
|
@@ -49,6 +50,8 @@ module TensorStream
|
|
49
50
|
include TensorStream::OpenCLHelpers::NNOps
|
50
51
|
include TensorStream::OpenCLHelpers::ImagesOps
|
51
52
|
include TensorStream::OpenCLHelpers::ArrayOps
|
53
|
+
include TensorStream::OpenCLHelpers::RandomOps
|
54
|
+
include TensorStream::CLEventHelpers
|
52
55
|
|
53
56
|
def initialize(session, device, thread_pool: nil, log_intermediates: false)
|
54
57
|
super
|
@@ -159,6 +162,9 @@ module TensorStream
|
|
159
162
|
return [] if buffer.buffer.nil?
|
160
163
|
return buffer if buffer.buffer.size.zero?
|
161
164
|
|
165
|
+
# lazy allocate
|
166
|
+
buffer.buffer = OpenCLBuffer.allocate_narray_for_type(buffer.buffer.data_type, buffer.buffer.size) if buffer.buffer.is_a?(OpenCLBuffer::LazyBuffer)
|
167
|
+
|
162
168
|
buffer.op = _opencl_queue.enqueue_read_buffer(buffer.cl_buffer, buffer.buffer, event_wait_list: build_event_wait_list([buffer]))
|
163
169
|
buffer
|
164
170
|
end
|
@@ -167,12 +173,19 @@ module TensorStream
|
|
167
173
|
def complete_eval(tensor, context)
|
168
174
|
return nil if tensor.nil?
|
169
175
|
|
170
|
-
|
171
|
-
|
176
|
+
buffers = if tensor.is_a?(Array)
|
177
|
+
tensor.map { |t|
|
178
|
+
enqueue_buffer_read(t, context)
|
179
|
+
}
|
180
|
+
else
|
181
|
+
[enqueue_buffer_read(tensor, context)]
|
182
|
+
end
|
183
|
+
|
184
|
+
events = build_event_wait_list(buffers)
|
172
185
|
# puts "** wait #{tensor.name} **"
|
173
186
|
OpenCL.wait_for_events(events) unless events.empty?
|
174
187
|
# puts "** done #{tensor.name} **"
|
175
|
-
|
188
|
+
tensor.is_a?(Array) ? buffers : buffers.first
|
176
189
|
end
|
177
190
|
|
178
191
|
def self.query_devices_with_score
|
@@ -355,9 +368,13 @@ module TensorStream
|
|
355
368
|
|
356
369
|
register_op :identity do |_context, tensor, inputs|
|
357
370
|
value = inputs[0]
|
358
|
-
|
359
|
-
|
360
|
-
|
371
|
+
if value.is_a?(OutputGroup)
|
372
|
+
value
|
373
|
+
else
|
374
|
+
buffer = OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type, shape: value.shape, buffer: value.buffer, cl_buffer: value.cl_buffer)
|
375
|
+
buffer.op = build_event_wait_list(inputs)
|
376
|
+
buffer
|
377
|
+
end
|
361
378
|
end
|
362
379
|
|
363
380
|
register_op :assign, noop: true do |context, tensor, inputs|
|
@@ -773,9 +790,9 @@ module TensorStream
|
|
773
790
|
value
|
774
791
|
elsif data_type == :string && shape.empty?
|
775
792
|
cl_buffer_size = value[0].bytesize
|
776
|
-
allocate_narray_for_type(data_type, value[0].bytesize)
|
793
|
+
OpenCLBuffer.allocate_narray_for_type(data_type, value[0].bytesize)
|
777
794
|
else
|
778
|
-
|
795
|
+
OpenCLBuffer.allocate_narray_for_type(data_type, narray_size)
|
779
796
|
end
|
780
797
|
|
781
798
|
return nil if buffer.nil?
|
@@ -818,39 +835,17 @@ module TensorStream
|
|
818
835
|
cl_object
|
819
836
|
end
|
820
837
|
|
821
|
-
def
|
822
|
-
case data_type
|
823
|
-
when :float, :float32, :float16
|
824
|
-
NArray.sfloat(narray_size)
|
825
|
-
when :float64
|
826
|
-
NArray.float(narray_size)
|
827
|
-
when :int, :int32, :int64, :uint64, :uint32 # NArray does not have 64 bit int types
|
828
|
-
NArray.int(narray_size)
|
829
|
-
when :int16, :uint16
|
830
|
-
NArray.sint(narray_size)
|
831
|
-
when :uint8, :int8
|
832
|
-
NArray.byte(narray_size)
|
833
|
-
when :boolean
|
834
|
-
NArray.byte(narray_size)
|
835
|
-
when :string
|
836
|
-
NArray.byte(narray_size)
|
837
|
-
when :unknown
|
838
|
-
nil
|
839
|
-
else
|
840
|
-
raise "unsupported type #{data_type}"
|
841
|
-
end
|
842
|
-
end
|
843
|
-
|
844
|
-
def _create_result_buffer(data_type, shape, name)
|
838
|
+
def _create_result_buffer(data_type, shape, name, allocate_host: false)
|
845
839
|
return OpenCLBuffer.nil_buffer(self, name, data_type) if shape == [0]
|
846
840
|
|
847
841
|
cache_key = "_result_#{name}_#{shape.join('_')}:#{object_id}"
|
848
842
|
@context[:_cache][:_cl_buffers][cache_key] ||= begin
|
849
843
|
# puts "create result buffer #{cache_key}"
|
850
844
|
size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
|
851
|
-
|
852
|
-
cl_buffer = _opencl_context.create_buffer(
|
853
|
-
|
845
|
+
lazy_buffer = !allocate_host ? OpenCLBuffer::LazyBuffer.new(data_type, size) : OpenCLBuffer.allocate_narray_for_type(data_type, size)
|
846
|
+
cl_buffer = _opencl_context.create_buffer(size * lazy_buffer.element_size)
|
847
|
+
|
848
|
+
OpenCLBuffer.new(self, data_type: data_type, shape: shape, buffer: lazy_buffer, cl_buffer: cl_buffer, name: name)
|
854
849
|
end
|
855
850
|
end
|
856
851
|
|
@@ -859,7 +854,7 @@ module TensorStream
|
|
859
854
|
cache_key ="_sub_result_#{parent_buffer.object_id}_#{name}_#{index}:#{object_id}"
|
860
855
|
@context[:_cache][:_cl_buffers][cache_key] ||= begin
|
861
856
|
size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
|
862
|
-
buffer = allocate_narray_for_type(data_type, size)
|
857
|
+
buffer = OpenCLBuffer.allocate_narray_for_type(data_type, size)
|
863
858
|
|
864
859
|
if parent_buffer.cl_buffer.associated_memobject.nil?
|
865
860
|
start = index * buffer.size * buffer.element_size
|
@@ -890,7 +885,7 @@ module TensorStream
|
|
890
885
|
cache_key = "_sub_result_#{parent_buffer.object_id}_#{name}_#{index}:#{object_id}"
|
891
886
|
@context[:_cache][:_cl_buffers][cache_key] ||= begin
|
892
887
|
size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
|
893
|
-
buffer = allocate_narray_for_type(data_type, size)
|
888
|
+
buffer = OpenCLBuffer.allocate_narray_for_type(data_type, size)
|
894
889
|
|
895
890
|
if parent_buffer.cl_buffer.associated_memobject.nil?
|
896
891
|
region = OpenCL::BufferRegion::new(start, region_size_in_bytes)
|
@@ -980,14 +975,6 @@ module TensorStream
|
|
980
975
|
shape.is_a?(Array) ? shape.size : 0
|
981
976
|
end
|
982
977
|
|
983
|
-
def build_event_wait_list(inputs)
|
984
|
-
if inputs.is_a?(Array)
|
985
|
-
inputs.flatten.compact.map(&:op).compact.uniq
|
986
|
-
else
|
987
|
-
inputs.op ? [inputs.op] : []
|
988
|
-
end
|
989
|
-
end
|
990
|
-
|
991
978
|
def resolve_placeholder(placeholder, _execution_context = {})
|
992
979
|
return nil if placeholder.nil?
|
993
980
|
return placeholder unless placeholder.is_a?(Placeholder)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module TensorStream
|
2
|
+
module OpenCLHelpers
|
3
|
+
# Collection of math functions for interfacing with OpenCL kernels
|
4
|
+
module RandomOps
|
5
|
+
RAND_TABLE_SIZE = 1024 * 1024
|
6
|
+
|
7
|
+
def RandomOps.included(klass)
|
8
|
+
klass.class_eval do
|
9
|
+
register_op :random_uniform do |context, tensor, inputs|
|
10
|
+
maxval = tensor.options.fetch(:maxval, 1)
|
11
|
+
minval = tensor.options.fetch(:minval, 0)
|
12
|
+
seed = tensor.options[:seed]
|
13
|
+
|
14
|
+
rand_buffer = @context[:_cache][:_cl_buffers]["_rand"] ||= begin
|
15
|
+
@context[:_cache][:_cl_buffers]["_rand_seed_ptr"] = 0
|
16
|
+
random = _get_randomizer(tensor, seed)
|
17
|
+
rand_table = RAND_TABLE_SIZE.times.map { random.rand }
|
18
|
+
convert_to_opencl(rand_table, [RAND_TABLE_SIZE], data_type: tensor.data_type, name: "rand_#{tensor.data_type}")
|
19
|
+
end
|
20
|
+
@context[:_cache][:_cl_buffers]["_rand_seed_ptr"] ||= 0
|
21
|
+
|
22
|
+
seed_ptr = @context[:_cache][:_cl_buffers]["_rand_seed_ptr"]
|
23
|
+
|
24
|
+
shape = read_final_result(complete_eval(inputs[0], context))
|
25
|
+
shape = shape || tensor.shape.shape
|
26
|
+
workgroup = [shape.reduce(:*) || 1 ]
|
27
|
+
cl_seed_ptr = OpenCL::Int1.new(seed_ptr)
|
28
|
+
cl_min = OpenCL::Float1.new(minval)
|
29
|
+
cl_max = OpenCL::Float1.new(maxval)
|
30
|
+
|
31
|
+
@context[:_cache][:_cl_buffers]["_rand_seed_ptr"] = (seed_ptr + (shape.reduce(:*) || 0) ) % RAND_TABLE_SIZE
|
32
|
+
buffer = _create_result_buffer(tensor.data_type, shape, tensor.name)
|
33
|
+
buffer.op = _cl_program("random_uniform", dtype: tensor.data_type, tsize: RAND_TABLE_SIZE).send(:"random_uniform_#{tensor.data_type}", _opencl_queue, workgroup, cl_seed_ptr, cl_min, cl_max, rand_buffer.cl_buffer, buffer.cl_buffer)
|
34
|
+
buffer
|
35
|
+
end
|
36
|
+
|
37
|
+
def _get_randomizer(tensor, seed)
|
38
|
+
if tensor.graph.random_seed && seed
|
39
|
+
Random.new(tensor.graph.random_seed ^ seed)
|
40
|
+
elsif tensor.graph.random_seed
|
41
|
+
@session.randomizer[tensor.graph.object_id] ||= Random.new(tensor.graph.random_seed)
|
42
|
+
@session.randomizer[tensor.graph.object_id]
|
43
|
+
elsif seed
|
44
|
+
@session.randomizer[tensor.operation] ||= Random.new(seed)
|
45
|
+
@session.randomizer[tensor.operation]
|
46
|
+
else
|
47
|
+
Random.new
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module TensorStream
|
2
|
+
class OpenCLUtil
|
3
|
+
##
|
4
|
+
# initializes a OpenCL helper class based on a session
|
5
|
+
def initialize(session)
|
6
|
+
@session = session
|
7
|
+
end
|
8
|
+
|
9
|
+
##
|
10
|
+
# Retrieves OpenCL memory usage
|
11
|
+
def get_memory_usage
|
12
|
+
cl_buffer_uniq_set = Set.new
|
13
|
+
@session.last_session_context[:_cache][:_cl_buffers].inject(0) do |sum, elem|
|
14
|
+
cl_buffer_uniq_set.add?(elem[1].cl_buffer.object_id) ? sum + elem[1].cl_buffer.size : sum
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
module CLEventHelpers
|
19
|
+
def build_event_wait_list(inputs)
|
20
|
+
if inputs.is_a?(Array)
|
21
|
+
inputs.flatten.compact.map(&:op).compact.uniq
|
22
|
+
else
|
23
|
+
inputs.op ? [inputs.op] : []
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/samples/mnist_data_2.3.rb
CHANGED
@@ -53,11 +53,16 @@ b5 = tf.variable(tf.zeros([10]))
|
|
53
53
|
|
54
54
|
x_ = tf.reshape(x, [-1, 784])
|
55
55
|
|
56
|
+
|
56
57
|
y1 = tf.nn.relu(tf.matmul(x_, w1) + b1)
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
58
|
+
Y1d = tf.nn.dropout(y1, pkeep)
|
59
|
+
y2 = tf.nn.relu(tf.matmul(Y1d, w2) + b2)
|
60
|
+
Y2d = tf.nn.dropout(y2, pkeep)
|
61
|
+
y3 = tf.nn.relu(tf.matmul(Y2d, w3) + b3)
|
62
|
+
Y3d = tf.nn.dropout(y3, pkeep)
|
63
|
+
y4 = tf.nn.relu(tf.matmul(Y3d, w4) + b4)
|
64
|
+
Y4d = tf.nn.dropout(y4, pkeep)
|
65
|
+
ylogits = tf.matmul(Y4d, w5) + b5
|
61
66
|
|
62
67
|
# model
|
63
68
|
y = tf.nn.softmax(ylogits)
|
data/samples/mnist_data_3.0.rb
CHANGED
@@ -85,8 +85,8 @@ y3 = tf.nn.relu(tf.nn.conv2d(y2, w3, [1, stride, stride, 1], 'SAME') + b3)
|
|
85
85
|
# reshape the output from the third convolution for the fully connected layer
|
86
86
|
yy = tf.reshape(y3, [-1, 7 * 7 * M])
|
87
87
|
y4 = tf.nn.relu(tf.matmul(yy, w4) + b4)
|
88
|
-
|
89
|
-
ylogits = tf.matmul(
|
88
|
+
YY4 = tf.nn.dropout(y4, pkeep)
|
89
|
+
ylogits = tf.matmul(YY4, w5) + b5
|
90
90
|
|
91
91
|
# model
|
92
92
|
y = tf.nn.softmax(ylogits, name: 'out')
|
@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
|
|
39
39
|
spec.add_development_dependency "awesome_print"
|
40
40
|
spec.add_development_dependency "mnist-learn"
|
41
41
|
spec.add_development_dependency "simplecov"
|
42
|
-
spec.add_dependency "tensor_stream", "1.0.
|
42
|
+
spec.add_dependency "tensor_stream", "1.0.7"
|
43
43
|
spec.add_dependency "opencl_ruby_ffi"
|
44
44
|
spec.add_dependency "oily_png"
|
45
45
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tensor_stream-opencl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Dayo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -114,14 +114,14 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - '='
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: 1.0.
|
117
|
+
version: 1.0.7
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - '='
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: 1.0.
|
124
|
+
version: 1.0.7
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: opencl_ruby_ffi
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -167,6 +167,7 @@ files:
|
|
167
167
|
- README.md
|
168
168
|
- Rakefile
|
169
169
|
- benchmark/benchmark.rb
|
170
|
+
- benchmark_imac2015_iris.txt
|
170
171
|
- benchmark_intel.txt
|
171
172
|
- benchmark_ryzen.txt
|
172
173
|
- benchmark_ryzen_nvidia.txt
|
@@ -219,6 +220,7 @@ files:
|
|
219
220
|
- lib/tensor_stream/opencl/kernels/pack.cl
|
220
221
|
- lib/tensor_stream/opencl/kernels/pow.cl
|
221
222
|
- lib/tensor_stream/opencl/kernels/prod.cl
|
223
|
+
- lib/tensor_stream/opencl/kernels/random_uniform.cl
|
222
224
|
- lib/tensor_stream/opencl/kernels/real_div.cl
|
223
225
|
- lib/tensor_stream/opencl/kernels/reciprocal.cl
|
224
226
|
- lib/tensor_stream/opencl/kernels/reduce_axis.cl
|
@@ -250,6 +252,8 @@ files:
|
|
250
252
|
- lib/tensor_stream/opencl/opencl_device.rb
|
251
253
|
- lib/tensor_stream/opencl/opencl_evaluator.rb
|
252
254
|
- lib/tensor_stream/opencl/opencl_template_helper.rb
|
255
|
+
- lib/tensor_stream/opencl/random_ops.rb
|
256
|
+
- lib/tensor_stream/opencl/utils.rb
|
253
257
|
- lib/tensor_stream/opencl/version.rb
|
254
258
|
- samples/build_mnist_model.rb
|
255
259
|
- samples/classify.rb
|