tensor_stream-opencl 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +51 -0
- data/LICENSE.txt +21 -0
- data/README.md +58 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/tensor_stream/opencl.rb +7 -0
- data/lib/tensor_stream/opencl/kernels/_bool_operand.cl +45 -0
- data/lib/tensor_stream/opencl/kernels/_operand.cl +45 -0
- data/lib/tensor_stream/opencl/kernels/abs.cl +20 -0
- data/lib/tensor_stream/opencl/kernels/acos.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/add.cl +3 -0
- data/lib/tensor_stream/opencl/kernels/apply_adam.cl +23 -0
- data/lib/tensor_stream/opencl/kernels/apply_gradient.cl +9 -0
- data/lib/tensor_stream/opencl/kernels/apply_momentum.cl +16 -0
- data/lib/tensor_stream/opencl/kernels/argmax.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/argmin.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/asin.cl +9 -0
- data/lib/tensor_stream/opencl/kernels/cast.cl +10 -0
- data/lib/tensor_stream/opencl/kernels/ceil.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/cond.cl.erb +6 -0
- data/lib/tensor_stream/opencl/kernels/cos.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/div.cl.erb +3 -0
- data/lib/tensor_stream/opencl/kernels/exp.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/floor.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/floor_div.cl +48 -0
- data/lib/tensor_stream/opencl/kernels/floor_mod.cl +3 -0
- data/lib/tensor_stream/opencl/kernels/gemm.cl +32 -0
- data/lib/tensor_stream/opencl/kernels/log.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/log1p.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/log_softmax.cl +26 -0
- data/lib/tensor_stream/opencl/kernels/max.cl +46 -0
- data/lib/tensor_stream/opencl/kernels/min.cl +46 -0
- data/lib/tensor_stream/opencl/kernels/mod.cl +3 -0
- data/lib/tensor_stream/opencl/kernels/mul.cl +3 -0
- data/lib/tensor_stream/opencl/kernels/negate.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/pack.cl +24 -0
- data/lib/tensor_stream/opencl/kernels/pow.cl +46 -0
- data/lib/tensor_stream/opencl/kernels/real_div.cl +3 -0
- data/lib/tensor_stream/opencl/kernels/reciprocal.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/round.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/sigmoid.cl +9 -0
- data/lib/tensor_stream/opencl/kernels/sigmoid_grad.cl +55 -0
- data/lib/tensor_stream/opencl/kernels/sign.cl +21 -0
- data/lib/tensor_stream/opencl/kernels/sin.cl +9 -0
- data/lib/tensor_stream/opencl/kernels/softmax.cl +26 -0
- data/lib/tensor_stream/opencl/kernels/softmax_cross.cl +32 -0
- data/lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl +28 -0
- data/lib/tensor_stream/opencl/kernels/softmax_grad.cl +46 -0
- data/lib/tensor_stream/opencl/kernels/sqrt.cl +9 -0
- data/lib/tensor_stream/opencl/kernels/square.cl +9 -0
- data/lib/tensor_stream/opencl/kernels/squared_difference.cl +53 -0
- data/lib/tensor_stream/opencl/kernels/sub.cl +3 -0
- data/lib/tensor_stream/opencl/kernels/tan.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/tanh.cl +8 -0
- data/lib/tensor_stream/opencl/kernels/tanh_grad.cl +7 -0
- data/lib/tensor_stream/opencl/kernels/where.cl +8 -0
- data/lib/tensor_stream/opencl/math_ops.rb +133 -0
- data/lib/tensor_stream/opencl/nn_ops.rb +191 -0
- data/lib/tensor_stream/opencl/opencl_buffer.rb +35 -0
- data/lib/tensor_stream/opencl/opencl_device.rb +5 -0
- data/lib/tensor_stream/opencl/opencl_evaluator.rb +933 -0
- data/lib/tensor_stream/opencl/opencl_template_helper.rb +99 -0
- data/lib/tensor_stream/opencl/version.rb +5 -0
- data/tensor_stream-opencl.gemspec +40 -0
- metadata +185 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0d1536e4ed46b18be21e6c0a12a6c06a854d6695
|
4
|
+
data.tar.gz: 458e8fcf33d992c3e0ee3660bf632ef5bab49b8e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9841051b9a4ef0809eb243158aeb1bfa19e8efeb81f30e99795a019e86b08ff65b02b0bd0c64e4d31350bc496a6371298e945d77f52456439cfd419b02fef2ce
|
7
|
+
data.tar.gz: 29d8efc7a82aac8ecd959a46d04e02c96efeca311ef9340fd74be6bbe1b89a62d8789a2cc867c1d62c2860dffcf03c203bd825ebe159fc0be4db3ec457eb2ceb
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
2
|
+
|
3
|
+
## Our Pledge
|
4
|
+
|
5
|
+
In the interest of fostering an open and welcoming environment, we as
|
6
|
+
contributors and maintainers pledge to making participation in our project and
|
7
|
+
our community a harassment-free experience for everyone, regardless of age, body
|
8
|
+
size, disability, ethnicity, gender identity and expression, level of experience,
|
9
|
+
nationality, personal appearance, race, religion, or sexual identity and
|
10
|
+
orientation.
|
11
|
+
|
12
|
+
## Our Standards
|
13
|
+
|
14
|
+
Examples of behavior that contributes to creating a positive environment
|
15
|
+
include:
|
16
|
+
|
17
|
+
* Using welcoming and inclusive language
|
18
|
+
* Being respectful of differing viewpoints and experiences
|
19
|
+
* Gracefully accepting constructive criticism
|
20
|
+
* Focusing on what is best for the community
|
21
|
+
* Showing empathy towards other community members
|
22
|
+
|
23
|
+
Examples of unacceptable behavior by participants include:
|
24
|
+
|
25
|
+
* The use of sexualized language or imagery and unwelcome sexual attention or
|
26
|
+
advances
|
27
|
+
* Trolling, insulting/derogatory comments, and personal or political attacks
|
28
|
+
* Public or private harassment
|
29
|
+
* Publishing others' private information, such as a physical or electronic
|
30
|
+
address, without explicit permission
|
31
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
32
|
+
professional setting
|
33
|
+
|
34
|
+
## Our Responsibilities
|
35
|
+
|
36
|
+
Project maintainers are responsible for clarifying the standards of acceptable
|
37
|
+
behavior and are expected to take appropriate and fair corrective action in
|
38
|
+
response to any instances of unacceptable behavior.
|
39
|
+
|
40
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
41
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
42
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
43
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
44
|
+
threatening, offensive, or harmful.
|
45
|
+
|
46
|
+
## Scope
|
47
|
+
|
48
|
+
This Code of Conduct applies both within project spaces and in public spaces
|
49
|
+
when an individual is representing the project or its community. Examples of
|
50
|
+
representing a project or community include using an official project e-mail
|
51
|
+
address, posting via an official social media account, or acting as an appointed
|
52
|
+
representative at an online or offline event. Representation of a project may be
|
53
|
+
further defined and clarified by project maintainers.
|
54
|
+
|
55
|
+
## Enforcement
|
56
|
+
|
57
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58
|
+
reported by contacting the project team at joseph.dayo@gmail.com. All
|
59
|
+
complaints will be reviewed and investigated and will result in a response that
|
60
|
+
is deemed necessary and appropriate to the circumstances. The project team is
|
61
|
+
obligated to maintain confidentiality with regard to the reporter of an incident.
|
62
|
+
Further details of specific enforcement policies may be posted separately.
|
63
|
+
|
64
|
+
Project maintainers who do not follow or enforce the Code of Conduct in good
|
65
|
+
faith may face temporary or permanent repercussions as determined by other
|
66
|
+
members of the project's leadership.
|
67
|
+
|
68
|
+
## Attribution
|
69
|
+
|
70
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
71
|
+
available at [http://contributor-covenant.org/version/1/4][version]
|
72
|
+
|
73
|
+
[homepage]: http://contributor-covenant.org
|
74
|
+
[version]: http://contributor-covenant.org/version/1/4/
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
tensor_stream-opencl (0.1.0)
|
5
|
+
opencl_ruby_ffi
|
6
|
+
tensor_stream
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: https://rubygems.org/
|
10
|
+
specs:
|
11
|
+
concurrent-ruby (1.0.5)
|
12
|
+
deep_merge (1.2.1)
|
13
|
+
diff-lcs (1.3)
|
14
|
+
ffi (1.9.25)
|
15
|
+
narray (0.6.1.2)
|
16
|
+
narray_ffi (1.4.3)
|
17
|
+
ffi (~> 1.9, >= 1.9.3)
|
18
|
+
narray (~> 0.6, >= 0.6.0.8)
|
19
|
+
opencl_ruby_ffi (1.3.4)
|
20
|
+
ffi (~> 1.9, >= 1.9.3)
|
21
|
+
narray (~> 0.6, >= 0.6.0.8)
|
22
|
+
narray_ffi (~> 1.0, >= 1.0.0)
|
23
|
+
rake (10.5.0)
|
24
|
+
rspec (3.8.0)
|
25
|
+
rspec-core (~> 3.8.0)
|
26
|
+
rspec-expectations (~> 3.8.0)
|
27
|
+
rspec-mocks (~> 3.8.0)
|
28
|
+
rspec-core (3.8.0)
|
29
|
+
rspec-support (~> 3.8.0)
|
30
|
+
rspec-expectations (3.8.1)
|
31
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
32
|
+
rspec-support (~> 3.8.0)
|
33
|
+
rspec-mocks (3.8.0)
|
34
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
35
|
+
rspec-support (~> 3.8.0)
|
36
|
+
rspec-support (3.8.0)
|
37
|
+
tensor_stream (0.8.1)
|
38
|
+
concurrent-ruby
|
39
|
+
deep_merge
|
40
|
+
|
41
|
+
PLATFORMS
|
42
|
+
ruby
|
43
|
+
|
44
|
+
DEPENDENCIES
|
45
|
+
bundler (~> 1.16)
|
46
|
+
rake (~> 10.0)
|
47
|
+
rspec (~> 3.0)
|
48
|
+
tensor_stream-opencl!
|
49
|
+
|
50
|
+
BUNDLED WITH
|
51
|
+
1.16.2
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2018 Joseph Dayo
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# TensorStream::Opencl
|
2
|
+
|
3
|
+
This gem provides an OpenCL backend for TensorStream (https://github.com/jedld/tensor_stream). OpenCL is an open standard
|
4
|
+
that allows running compute applications on heterogenous platforms like CPUs and GPUs.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Make sure OpenCL device drivers are installed in your system. You may refer to the following links:
|
9
|
+
|
10
|
+
### Nvidia
|
11
|
+
|
12
|
+
https://developer.nvidia.com/opencl
|
13
|
+
|
14
|
+
### AMD
|
15
|
+
|
16
|
+
https://support.amd.com/en-us/kb-articles/Pages/OpenCL2-Driver.aspx
|
17
|
+
|
18
|
+
|
19
|
+
### Intel
|
20
|
+
|
21
|
+
https://software.intel.com/en-us/articles/opencl-drivers
|
22
|
+
|
23
|
+
|
24
|
+
Add this line to your application's Gemfile:
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
gem 'tensor_stream-opencl'
|
28
|
+
```
|
29
|
+
|
30
|
+
And then execute:
|
31
|
+
|
32
|
+
$ bundle
|
33
|
+
|
34
|
+
Or install it yourself as:
|
35
|
+
|
36
|
+
$ gem install tensor_stream-opencl
|
37
|
+
|
38
|
+
## Usage
|
39
|
+
|
40
|
+
Simply including this gem will allow tensor_stream to automatically select opencl devices for use in your computation
|
41
|
+
|
42
|
+
## Development
|
43
|
+
|
44
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
45
|
+
|
46
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
47
|
+
|
48
|
+
## Contributing
|
49
|
+
|
50
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/jedld/tensor_stream-opencl. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
51
|
+
|
52
|
+
## License
|
53
|
+
|
54
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
55
|
+
|
56
|
+
## Code of Conduct
|
57
|
+
|
58
|
+
Everyone interacting in the TensorStream::Opencl project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/tensor_stream-opencl/blob/master/CODE_OF_CONDUCT.md).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "tensor_stream/opencl"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
// same dimension add floating point op
|
2
|
+
__kernel void <%= fname%>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= a_dtype %> *A, __global <%= b_dtype %> *B, __global <%= result_t %> *C) {
|
3
|
+
// Get the index of the current element to be processed
|
4
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
5
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
6
|
+
|
7
|
+
C[globalRow * N + globalCol] = A[globalRow * N + globalCol] <%= op %> B[globalRow * N + globalCol] ? 1 : 0;
|
8
|
+
}
|
9
|
+
|
10
|
+
// 1D + Scalar floating point add op
|
11
|
+
__kernel void <%=fname%>_c_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= a_dtype %> *A, __global <%= b_dtype %> *B, __global <%= result_t %> *C) {
|
12
|
+
// Get the index of the current element to be processed
|
13
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
14
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
15
|
+
|
16
|
+
if (switch_op == 0) {
|
17
|
+
C[globalRow * N + globalCol] = A[globalRow * N + globalCol] <%= op %> B[0] ? 1 : 0;
|
18
|
+
} else {
|
19
|
+
C[globalRow * N + globalCol] = B[0] <%= op %> A[globalRow * N + globalCol] ? 1 : 0;
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
// 1D + Scalar floating point add op broadcast
|
24
|
+
__kernel void <%= fname%>_b_<%= dtype %>(const int M, const int N, const int M2, const int N2, const int switch_op,__global const <%= a_dtype %> *A, __global <%= b_dtype %> *B, __global <%= result_t %> *C) {
|
25
|
+
// Get the index of the current element to be processed
|
26
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
27
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
28
|
+
|
29
|
+
int b_m_index = globalRow;
|
30
|
+
int b_n_index = globalCol;
|
31
|
+
|
32
|
+
if ( b_m_index >= M2) {
|
33
|
+
b_m_index = b_m_index % M2;
|
34
|
+
};
|
35
|
+
|
36
|
+
if (b_n_index >= N2) {
|
37
|
+
b_n_index = b_n_index % N2;
|
38
|
+
}
|
39
|
+
|
40
|
+
if (switch_op == 0) {
|
41
|
+
C[globalRow * N + globalCol] = A[globalRow * N + globalCol] <%= op %> B[b_m_index * N2 + b_n_index] ? 1 : 0;
|
42
|
+
} else {
|
43
|
+
C[globalRow * N + globalCol] = B[b_m_index * N2 + b_n_index] <%= op %> A[globalRow * N + globalCol] ? 1 : 0;
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
// same dimension add floating point op
|
2
|
+
__kernel void <%= fname%>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
|
3
|
+
// Get the index of the current element to be processed
|
4
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
5
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
6
|
+
|
7
|
+
C[globalRow * N + globalCol] = A[globalRow * N + globalCol] <%= op %> B[globalRow * N + globalCol];
|
8
|
+
}
|
9
|
+
|
10
|
+
// 1D + Scalar floating point add op
|
11
|
+
__kernel void <%=fname%>_c_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
|
12
|
+
// Get the index of the current element to be processed
|
13
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
14
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
15
|
+
|
16
|
+
if (switch_op == 0) {
|
17
|
+
C[globalRow * N + globalCol] = A[globalRow * N + globalCol] <%= op %> B[0];
|
18
|
+
} else {
|
19
|
+
C[globalRow * N + globalCol] = B[0] <%= op %> A[globalRow * N + globalCol];
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
// 1D + Scalar floating point add op broadcast
|
24
|
+
__kernel void <%= fname%>_b_<%= dtype %>(const int M, const int N, const int M2, const int N2, const int switch_op,__global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
|
25
|
+
// Get the index of the current element to be processed
|
26
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
27
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
28
|
+
|
29
|
+
int b_m_index = globalRow;
|
30
|
+
int b_n_index = globalCol;
|
31
|
+
|
32
|
+
if ( b_m_index >= M2) {
|
33
|
+
b_m_index = b_m_index % M2;
|
34
|
+
};
|
35
|
+
|
36
|
+
if (b_n_index >= N2) {
|
37
|
+
b_n_index = b_n_index % N2;
|
38
|
+
}
|
39
|
+
|
40
|
+
if (switch_op == 0) {
|
41
|
+
C[globalRow * N + globalCol] = A[globalRow * N + globalCol] <%= op %> B[b_m_index * N2 + b_n_index];
|
42
|
+
} else {
|
43
|
+
C[globalRow * N + globalCol] = B[b_m_index * N2 + b_n_index] <%= op %> A[globalRow * N + globalCol];
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,20 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
% if TensorStream::Ops::FLOATING_POINT_TYPES.include?(dtype)
|
3
|
+
__kernel void abs_<%= dtype%>(const int M, const int N, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *C) {
|
4
|
+
// Get the index of the current element to be processed
|
5
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
6
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
7
|
+
|
8
|
+
C[globalRow * N + globalCol] = fabs(A[globalRow * N + globalCol]);
|
9
|
+
}
|
10
|
+
% else
|
11
|
+
% %w[int int32].each do |dt|
|
12
|
+
__kernel void abs_<%= dt %>(const int M, const int N, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *C) {
|
13
|
+
// Get the index of the current element to be processed
|
14
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
15
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
16
|
+
|
17
|
+
C[globalRow * N + globalCol] = fabs((float)A[globalRow * N + globalCol]);
|
18
|
+
}
|
19
|
+
% end
|
20
|
+
%end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
__kernel void acos_<%= dtype %>(const int M, const int N, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *C) {
|
3
|
+
// Get the index of the current element to be processed
|
4
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
5
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
6
|
+
|
7
|
+
C[globalRow * N + globalCol] = acos(A[globalRow * N + globalCol]);
|
8
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
% c_dtype = dtype_to_c_type(dtype)
|
2
|
+
// same dimension add floating point op
|
3
|
+
__kernel void apply_adam_<%= dtype %>(const int M, const int N,
|
4
|
+
__global const <%= c_dtype %> *grad,
|
5
|
+
__global const <%= c_dtype %> *learning_rate,
|
6
|
+
__global const <%= c_dtype %> *beta1_power,
|
7
|
+
__global const <%= c_dtype %> *beta2_power,
|
8
|
+
__global const <%= c_dtype %> *beta1,
|
9
|
+
__global const <%= c_dtype %> *beta2,
|
10
|
+
__global const <%= c_dtype %> *epsilon,
|
11
|
+
__global <%= c_dtype %> *momentum,
|
12
|
+
__global <%= c_dtype %> *output, __global <%= c_dtype %> *v) {
|
13
|
+
// Get the index of the current element to be processed
|
14
|
+
const int globalRow = get_global_id(0); // Row ID of C (0..M)
|
15
|
+
const int globalCol = get_global_id(1); // Col ID of C (0..N)
|
16
|
+
const int index = globalRow * N + globalCol;
|
17
|
+
|
18
|
+
<%= c_dtype %> alpha = learning_rate[0] * sqrt(1.0 - beta2_power[0]) / (1.0 - beta1_power[0]);
|
19
|
+
|
20
|
+
momentum[index] += (grad[index] - momentum[index]) * (1.0 - beta1[0]);
|
21
|
+
v[index] += (grad[index] * grad[index] - v[index]) * (1.0 - beta2[0]);
|
22
|
+
output[index] -= (momentum[index] * alpha) / ( sqrt(v[index]) + epsilon[0] );
|
23
|
+
}
|