tensor_stream-opencl 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 466d846646b2b58659b7068260e84b1a76b64b053c0c8c2dc9c60d7b817041c1
|
4
|
+
data.tar.gz: 2a440d5cd6d809dbd661576e38277edb9a738b453d21c27f3048bfcaca9f4852
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d25b928b3b3b9ca3658f14d42e898a067b356eb0f806356e5a093687d652d47b151c34c73394d0bd33048cf6dc68e918e6427287f07d5e90d9741b056460ab2f
|
7
|
+
data.tar.gz: 5fda92ffbc2c3063e6bcedf8ce00c024bab356425f9d723988d755e43e62b7ff283c2087e1ec455f3680e718d8256b914220b2a8e3bcd60edce7287225ec1a61
|
@@ -0,0 +1,21 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
// same dimension add floating point op
|
3
|
+
__kernel void apply_adadelta_<%= dtype %>(const int M, const int N,
|
4
|
+
__global const <%= c_dtype %> *lr,
|
5
|
+
__global const <%= c_dtype %> *rho,
|
6
|
+
__global const <%= c_dtype %> *epsilon,
|
7
|
+
__global const <%= c_dtype %> *grad,
|
8
|
+
__global <%= c_dtype %> *output,
|
9
|
+
__global <%= c_dtype %> *acc,
|
10
|
+
__global <%= c_dtype %> *acc_update
|
11
|
+
) {
|
12
|
+
// Get the index of the current element to be processed
|
13
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
14
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
15
|
+
const int index = globalRow * N + globalCol;
|
16
|
+
|
17
|
+
acc[index] = acc[index] * rho[0] + (grad[index] * grad[index]) * ((<%= c_dtype %>)1 - rho[0]);
|
18
|
+
const <%= c_dtype %> update = sqrt(acc_update[index] + epsilon[0]) * rsqrt(acc[index] + epsilon[0]) * grad[index];
|
19
|
+
output[index] -= update * lr[0];
|
20
|
+
acc_update[index] = acc_update[index] * rho[0] + update * update * ((<%= c_dtype %>)1 - rho[0]);
|
21
|
+
}
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'pry-byebug'
|
1
2
|
module TensorStream
|
2
3
|
module OpenCLHelpers
|
3
4
|
# Collection of math functions for interfacing with OpenCL kernels
|
@@ -54,6 +55,39 @@ module TensorStream
|
|
54
55
|
end
|
55
56
|
|
56
57
|
register_op :apply_adadelta do |context, tensor, inputs|
|
58
|
+
_target_var, _accum, _accum_update, lr, rho, epsilon, grad = inputs
|
59
|
+
assign = tensor.inputs[0] || tensor
|
60
|
+
assign_acc = tensor.inputs[1]
|
61
|
+
assign_acc_update = tensor.inputs[2]
|
62
|
+
|
63
|
+
# mark variable buffers as dirty
|
64
|
+
assign.buffer.dirty = true # force buffer copy when variable is read externally
|
65
|
+
assign_acc.buffer.dirty = true # force buffer copy when variable is read externally
|
66
|
+
assign_acc_update.buffer.dirty = true # force buffer copy when variable is read externally
|
67
|
+
|
68
|
+
output_buffer = assign.buffer
|
69
|
+
|
70
|
+
m, n = output_buffer.shape
|
71
|
+
work_group = [m || 1, n || 1]
|
72
|
+
cl_m = OpenCL::Int1.new(m || 1)
|
73
|
+
cl_n = OpenCL::Int1.new(n || 1)
|
74
|
+
|
75
|
+
event_wait_list = build_event_wait_list(inputs)
|
76
|
+
method_call = :"apply_adadelta_#{output_buffer.data_type}"
|
77
|
+
event = _cl_program('apply_adadelta', dtype: output_buffer.data_type)
|
78
|
+
.send(method_call, _opencl_queue, work_group, cl_m, cl_n,
|
79
|
+
lr.cl_buffer,
|
80
|
+
rho.cl_buffer,
|
81
|
+
epsilon.cl_buffer,
|
82
|
+
grad.cl_buffer,
|
83
|
+
assign.buffer.cl_buffer,
|
84
|
+
assign_acc.buffer.cl_buffer,
|
85
|
+
assign_acc_update.buffer.cl_buffer,
|
86
|
+
event_wait_list: event_wait_list)
|
87
|
+
output_buffer.op = event
|
88
|
+
assign_acc.buffer.op = event
|
89
|
+
assign_acc_update.buffer.op = event
|
90
|
+
output_buffer
|
57
91
|
end
|
58
92
|
|
59
93
|
# Adam optimization algorithm
|
@@ -35,6 +35,7 @@ Gem::Specification.new do |spec|
|
|
35
35
|
spec.add_development_dependency "bundler", "~> 1.16"
|
36
36
|
spec.add_development_dependency "rake", "~> 10.0"
|
37
37
|
spec.add_development_dependency "rspec", "~> 3.0"
|
38
|
+
spec.add_development_dependency "pry-byebug"
|
38
39
|
spec.add_dependency "tensor_stream"
|
39
40
|
spec.add_dependency "opencl_ruby_ffi"
|
40
41
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tensor_stream-opencl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Dayo
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: pry-byebug
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: tensor_stream
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -104,6 +118,7 @@ files:
|
|
104
118
|
- lib/tensor_stream/opencl/kernels/abs.cl
|
105
119
|
- lib/tensor_stream/opencl/kernels/acos.cl
|
106
120
|
- lib/tensor_stream/opencl/kernels/add.cl
|
121
|
+
- lib/tensor_stream/opencl/kernels/apply_adadelta.cl
|
107
122
|
- lib/tensor_stream/opencl/kernels/apply_adam.cl
|
108
123
|
- lib/tensor_stream/opencl/kernels/apply_gradient.cl
|
109
124
|
- lib/tensor_stream/opencl/kernels/apply_momentum.cl
|
@@ -178,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
193
|
version: '0'
|
179
194
|
requirements: []
|
180
195
|
rubyforge_project:
|
181
|
-
rubygems_version: 2.
|
196
|
+
rubygems_version: 2.7.7
|
182
197
|
signing_key:
|
183
198
|
specification_version: 4
|
184
199
|
summary: OpenCL evaluator for tensor_stream
|