tensor_stream-opencl 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 466d846646b2b58659b7068260e84b1a76b64b053c0c8c2dc9c60d7b817041c1
|
4
|
+
data.tar.gz: 2a440d5cd6d809dbd661576e38277edb9a738b453d21c27f3048bfcaca9f4852
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d25b928b3b3b9ca3658f14d42e898a067b356eb0f806356e5a093687d652d47b151c34c73394d0bd33048cf6dc68e918e6427287f07d5e90d9741b056460ab2f
|
7
|
+
data.tar.gz: 5fda92ffbc2c3063e6bcedf8ce00c024bab356425f9d723988d755e43e62b7ff283c2087e1ec455f3680e718d8256b914220b2a8e3bcd60edce7287225ec1a61
|
@@ -0,0 +1,21 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
// same dimension add floating point op
|
3
|
+
__kernel void apply_adadelta_<%= dtype %>(const int M, const int N,
|
4
|
+
__global const <%= c_dtype %> *lr,
|
5
|
+
__global const <%= c_dtype %> *rho,
|
6
|
+
__global const <%= c_dtype %> *epsilon,
|
7
|
+
__global const <%= c_dtype %> *grad,
|
8
|
+
__global <%= c_dtype %> *output,
|
9
|
+
__global <%= c_dtype %> *acc,
|
10
|
+
__global <%= c_dtype %> *acc_update
|
11
|
+
) {
|
12
|
+
// Get the index of the current element to be processed
|
13
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
14
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
15
|
+
const int index = globalRow * N + globalCol;
|
16
|
+
|
17
|
+
acc[index] = acc[index] * rho[0] + (grad[index] * grad[index]) * ((<%= c_dtype %>)1 - rho[0]);
|
18
|
+
const <%= c_dtype %> update = sqrt(acc_update[index] + epsilon[0]) * rsqrt(acc[index] + epsilon[0]) * grad[index];
|
19
|
+
output[index] -= update * lr[0];
|
20
|
+
acc_update[index] = acc_update[index] * rho[0] + update * update * ((<%= c_dtype %>)1 - rho[0]);
|
21
|
+
}
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'pry-byebug'
|
1
2
|
module TensorStream
|
2
3
|
module OpenCLHelpers
|
3
4
|
# Collection of math functions for interfacing with OpenCL kernels
|
@@ -54,6 +55,39 @@ module TensorStream
|
|
54
55
|
end
|
55
56
|
|
56
57
|
register_op :apply_adadelta do |context, tensor, inputs|
|
58
|
+
_target_var, _accum, _accum_update, lr, rho, epsilon, grad = inputs
|
59
|
+
assign = tensor.inputs[0] || tensor
|
60
|
+
assign_acc = tensor.inputs[1]
|
61
|
+
assign_acc_update = tensor.inputs[2]
|
62
|
+
|
63
|
+
# mark variable buffers as dirty
|
64
|
+
assign.buffer.dirty = true # force buffer copy when variable is read externally
|
65
|
+
assign_acc.buffer.dirty = true # force buffer copy when variable is read externally
|
66
|
+
assign_acc_update.buffer.dirty = true # force buffer copy when variable is read externally
|
67
|
+
|
68
|
+
output_buffer = assign.buffer
|
69
|
+
|
70
|
+
m, n = output_buffer.shape
|
71
|
+
work_group = [m || 1, n || 1]
|
72
|
+
cl_m = OpenCL::Int1.new(m || 1)
|
73
|
+
cl_n = OpenCL::Int1.new(n || 1)
|
74
|
+
|
75
|
+
event_wait_list = build_event_wait_list(inputs)
|
76
|
+
method_call = :"apply_adadelta_#{output_buffer.data_type}"
|
77
|
+
event = _cl_program('apply_adadelta', dtype: output_buffer.data_type)
|
78
|
+
.send(method_call, _opencl_queue, work_group, cl_m, cl_n,
|
79
|
+
lr.cl_buffer,
|
80
|
+
rho.cl_buffer,
|
81
|
+
epsilon.cl_buffer,
|
82
|
+
grad.cl_buffer,
|
83
|
+
assign.buffer.cl_buffer,
|
84
|
+
assign_acc.buffer.cl_buffer,
|
85
|
+
assign_acc_update.buffer.cl_buffer,
|
86
|
+
event_wait_list: event_wait_list)
|
87
|
+
output_buffer.op = event
|
88
|
+
assign_acc.buffer.op = event
|
89
|
+
assign_acc_update.buffer.op = event
|
90
|
+
output_buffer
|
57
91
|
end
|
58
92
|
|
59
93
|
# Adam optimization algorithm
|
@@ -35,6 +35,7 @@ Gem::Specification.new do |spec|
|
|
35
35
|
spec.add_development_dependency "bundler", "~> 1.16"
|
36
36
|
spec.add_development_dependency "rake", "~> 10.0"
|
37
37
|
spec.add_development_dependency "rspec", "~> 3.0"
|
38
|
+
spec.add_development_dependency "pry-byebug"
|
38
39
|
spec.add_dependency "tensor_stream"
|
39
40
|
spec.add_dependency "opencl_ruby_ffi"
|
40
41
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tensor_stream-opencl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Dayo
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: pry-byebug
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: tensor_stream
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -104,6 +118,7 @@ files:
|
|
104
118
|
- lib/tensor_stream/opencl/kernels/abs.cl
|
105
119
|
- lib/tensor_stream/opencl/kernels/acos.cl
|
106
120
|
- lib/tensor_stream/opencl/kernels/add.cl
|
121
|
+
- lib/tensor_stream/opencl/kernels/apply_adadelta.cl
|
107
122
|
- lib/tensor_stream/opencl/kernels/apply_adam.cl
|
108
123
|
- lib/tensor_stream/opencl/kernels/apply_gradient.cl
|
109
124
|
- lib/tensor_stream/opencl/kernels/apply_momentum.cl
|
@@ -178,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
193
|
version: '0'
|
179
194
|
requirements: []
|
180
195
|
rubyforge_project:
|
181
|
-
rubygems_version: 2.
|
196
|
+
rubygems_version: 2.7.7
|
182
197
|
signing_key:
|
183
198
|
specification_version: 4
|
184
199
|
summary: OpenCL evaluator for tensor_stream
|