cumo 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.rubocop.yml +15 -0
- data/.rubocop_todo.yml +1272 -0
- data/3rd_party/mkmf-cu/Gemfile +2 -0
- data/3rd_party/mkmf-cu/Rakefile +2 -1
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +36 -7
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
- data/CHANGELOG.md +69 -0
- data/Gemfile +6 -1
- data/README.md +2 -10
- data/Rakefile +8 -11
- data/bench/broadcast_fp32.rb +28 -26
- data/bench/cumo_bench.rb +18 -16
- data/bench/numo_bench.rb +18 -16
- data/bench/reduction_fp32.rb +14 -12
- data/bin/console +1 -0
- data/cumo.gemspec +5 -8
- data/ext/cumo/cuda/cudnn.c +2 -2
- data/ext/cumo/cumo.c +7 -3
- data/ext/cumo/depend.erb +15 -13
- data/ext/cumo/extconf.rb +32 -46
- data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
- data/ext/cumo/include/cumo/intern.h +1 -0
- data/ext/cumo/include/cumo/narray.h +13 -1
- data/ext/cumo/include/cumo/template.h +2 -4
- data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
- data/ext/cumo/include/cumo/types/float_macro.h +2 -2
- data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/narray/array.c +3 -3
- data/ext/cumo/narray/data.c +23 -2
- data/ext/cumo/narray/gen/cogen.rb +8 -7
- data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
- data/ext/cumo/narray/gen/def/bit.rb +3 -1
- data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
- data/ext/cumo/narray/gen/def/int16.rb +2 -0
- data/ext/cumo/narray/gen/def/int32.rb +2 -0
- data/ext/cumo/narray/gen/def/int64.rb +2 -0
- data/ext/cumo/narray/gen/def/int8.rb +2 -0
- data/ext/cumo/narray/gen/def/robject.rb +2 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
- data/ext/cumo/narray/gen/def/uint16.rb +2 -0
- data/ext/cumo/narray/gen/def/uint32.rb +2 -0
- data/ext/cumo/narray/gen/def/uint64.rb +2 -0
- data/ext/cumo/narray/gen/def/uint8.rb +2 -0
- data/ext/cumo/narray/gen/erbln.rb +9 -7
- data/ext/cumo/narray/gen/erbpp2.rb +26 -24
- data/ext/cumo/narray/gen/narray_def.rb +13 -11
- data/ext/cumo/narray/gen/spec.rb +58 -55
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
- data/ext/cumo/narray/gen/tmpl/at.c +34 -0
- data/ext/cumo/narray/gen/tmpl/batch_norm.c +1 -1
- data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +2 -2
- data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
- data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
- data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
- data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +1 -1
- data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
- data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
- data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
- data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
- data/ext/cumo/narray/gen/tmpl/sort.c +1 -1
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
- data/ext/cumo/narray/index.c +243 -39
- data/ext/cumo/narray/index_kernel.cu +84 -0
- data/ext/cumo/narray/narray.c +38 -1
- data/ext/cumo/narray/ndloop.c +1 -1
- data/ext/cumo/narray/struct.c +1 -1
- data/lib/cumo/cuda/compile_error.rb +1 -1
- data/lib/cumo/cuda/compiler.rb +23 -22
- data/lib/cumo/cuda/cudnn.rb +1 -1
- data/lib/cumo/cuda/device.rb +1 -1
- data/lib/cumo/cuda/link_state.rb +2 -2
- data/lib/cumo/cuda/module.rb +1 -2
- data/lib/cumo/cuda/nvrtc_program.rb +3 -2
- data/lib/cumo/cuda.rb +2 -0
- data/lib/cumo/linalg.rb +2 -0
- data/lib/cumo/narray/extra.rb +137 -185
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo.rb +3 -1
- data/test/bit_test.rb +157 -0
- data/test/cuda/compiler_test.rb +69 -0
- data/test/cuda/device_test.rb +30 -0
- data/test/cuda/memory_pool_test.rb +45 -0
- data/test/cuda/nvrtc_test.rb +51 -0
- data/test/cuda/runtime_test.rb +28 -0
- data/test/cudnn_test.rb +498 -0
- data/test/cumo_test.rb +27 -0
- data/test/narray_test.rb +745 -0
- data/test/ractor_test.rb +52 -0
- data/test/test_helper.rb +31 -0
- metadata +31 -54
- data/.travis.yml +0 -5
- data/numo-narray-version +0 -1
data/ext/cumo/narray/struct.c
CHANGED
|
@@ -812,7 +812,7 @@ nst_s_add_type(int argc, VALUE *argv, VALUE mod)
|
|
|
812
812
|
|
|
813
813
|
#define NST_TYPEDEF(tpname,tpclass) \
|
|
814
814
|
static VALUE \
|
|
815
|
-
nst_s_##tpname(
|
|
815
|
+
nst_s_##tpname(int argc, VALUE *argv, VALUE mod) \
|
|
816
816
|
{ nstruct_add_type(tpclass,argc,argv,mod); \
|
|
817
817
|
return Qnil; \
|
|
818
818
|
}
|
data/lib/cumo/cuda/compiler.rb
CHANGED
|
@@ -1,32 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'tmpdir'
|
|
2
4
|
require 'tempfile'
|
|
3
5
|
require 'fileutils'
|
|
4
6
|
require 'digest/md5'
|
|
5
|
-
require_relative '../cuda'
|
|
6
7
|
|
|
7
8
|
module Cumo::CUDA
|
|
8
9
|
class Compiler
|
|
9
10
|
VALID_KERNEL_NAME = /\A[a-zA-Z_][a-zA-Z_0-9]*\z/
|
|
10
11
|
DEFAULT_CACHE_DIR = File.expand_path('~/.cumo/kernel_cache')
|
|
11
|
-
|
|
12
|
+
|
|
12
13
|
@@empty_file_preprocess_cache ||= {}
|
|
13
|
-
|
|
14
|
+
|
|
14
15
|
def self.valid_kernel_name?(name)
|
|
15
16
|
VALID_KERNEL_NAME.match?(name)
|
|
16
17
|
end
|
|
17
|
-
|
|
18
|
+
|
|
18
19
|
def compile_using_nvrtc(source, options: [], arch: nil)
|
|
19
20
|
arch ||= get_arch
|
|
20
21
|
options += ["-arch=#{arch}"]
|
|
21
|
-
|
|
22
|
+
|
|
22
23
|
Dir.mktmpdir do |root_dir|
|
|
23
24
|
path = File.join(root_dir, 'kern')
|
|
24
25
|
cu_path = "#{path}.cu"
|
|
25
|
-
|
|
26
|
+
|
|
26
27
|
File.open(cu_path, 'w') do |cu_file|
|
|
27
28
|
cu_file.write(source)
|
|
28
29
|
end
|
|
29
|
-
|
|
30
|
+
|
|
30
31
|
prog = NVRTCProgram.new(source, name: cu_path)
|
|
31
32
|
begin
|
|
32
33
|
ptx = prog.compile(options: options)
|
|
@@ -41,12 +42,12 @@ module Cumo::CUDA
|
|
|
41
42
|
return ptx
|
|
42
43
|
end
|
|
43
44
|
end
|
|
44
|
-
|
|
45
|
+
|
|
45
46
|
def compile_with_cache(source, options: [], arch: nil, cache_dir: nil, extra_source: nil)
|
|
46
47
|
# NVRTC does not use extra_source. extra_source is used for cache key.
|
|
47
48
|
cache_dir ||= get_cache_dir
|
|
48
49
|
arch ||= get_arch
|
|
49
|
-
|
|
50
|
+
|
|
50
51
|
options += ['-ftz=true']
|
|
51
52
|
|
|
52
53
|
env = [arch, options, get_nvrtc_version]
|
|
@@ -57,15 +58,15 @@ module Cumo::CUDA
|
|
|
57
58
|
@@empty_file_preprocess_cache[env] = base
|
|
58
59
|
end
|
|
59
60
|
key_src = "#{env} #{base} #{source} #{extra_source}"
|
|
60
|
-
|
|
61
|
+
|
|
61
62
|
key_src.encode!('utf-8')
|
|
62
63
|
digest = Digest::MD5.hexdigest(key_src)
|
|
63
64
|
name = "#{digest}_2.cubin"
|
|
64
|
-
|
|
65
|
+
|
|
65
66
|
unless Dir.exist?(cache_dir)
|
|
66
67
|
FileUtils.mkdir_p(cache_dir)
|
|
67
68
|
end
|
|
68
|
-
|
|
69
|
+
|
|
69
70
|
# TODO(sonots): thread-safe?
|
|
70
71
|
path = File.join(cache_dir, name)
|
|
71
72
|
cubin = load_cache(path)
|
|
@@ -74,7 +75,7 @@ module Cumo::CUDA
|
|
|
74
75
|
mod.load(cubin)
|
|
75
76
|
return mod
|
|
76
77
|
end
|
|
77
|
-
|
|
78
|
+
|
|
78
79
|
ptx = compile_using_nvrtc(source, options: options, arch: arch)
|
|
79
80
|
cubin = nil
|
|
80
81
|
cubin_hash = nil
|
|
@@ -85,7 +86,7 @@ module Cumo::CUDA
|
|
|
85
86
|
end
|
|
86
87
|
|
|
87
88
|
save_cache(path, cubin_hash, cubin)
|
|
88
|
-
|
|
89
|
+
|
|
89
90
|
# Save .cu source file along with .cubin
|
|
90
91
|
if get_bool_env_variable('CUMO_CACHE_SAVE_CUDA_SOURCE', false)
|
|
91
92
|
File.open("#{path}.cu", 'w') do |f|
|
|
@@ -97,7 +98,7 @@ module Cumo::CUDA
|
|
|
97
98
|
mod.load(cubin)
|
|
98
99
|
return mod
|
|
99
100
|
end
|
|
100
|
-
|
|
101
|
+
|
|
101
102
|
private
|
|
102
103
|
|
|
103
104
|
def save_cache(path, cubin_hash, cubin)
|
|
@@ -105,7 +106,7 @@ module Cumo::CUDA
|
|
|
105
106
|
tf.write(cubin_hash)
|
|
106
107
|
tf.write(cubin)
|
|
107
108
|
temp_path = tf.path
|
|
108
|
-
|
|
109
|
+
FileUtils.mv(temp_path, path)
|
|
109
110
|
end
|
|
110
111
|
|
|
111
112
|
def load_cache(path)
|
|
@@ -121,29 +122,29 @@ module Cumo::CUDA
|
|
|
121
122
|
end
|
|
122
123
|
nil
|
|
123
124
|
end
|
|
124
|
-
|
|
125
|
+
|
|
125
126
|
def get_cache_dir
|
|
126
127
|
ENV.fetch('CUMO_CACHE_DIR', DEFAULT_CACHE_DIR)
|
|
127
128
|
end
|
|
128
|
-
|
|
129
|
+
|
|
129
130
|
def get_nvrtc_version
|
|
130
131
|
@@nvrtc_version ||= NVRTC.nvrtcVersion
|
|
131
132
|
end
|
|
132
|
-
|
|
133
|
+
|
|
133
134
|
def get_arch
|
|
134
135
|
cc = Device.new.compute_capability
|
|
135
136
|
"compute_#{cc}"
|
|
136
137
|
end
|
|
137
|
-
|
|
138
|
+
|
|
138
139
|
def get_bool_env_variable(name, default)
|
|
139
140
|
val = ENV[name]
|
|
140
141
|
return default if val.nil? or val.size == 0
|
|
141
142
|
Integer(val) == 1 rescue false
|
|
142
143
|
end
|
|
143
|
-
|
|
144
|
+
|
|
144
145
|
def preprocess(source, options, arch)
|
|
145
146
|
options += ["-arch=#{arch}"]
|
|
146
|
-
|
|
147
|
+
|
|
147
148
|
prog = NVRTCProgram.new(source, name: '')
|
|
148
149
|
begin
|
|
149
150
|
result = prog.compile(options: options)
|
data/lib/cumo/cuda/cudnn.rb
CHANGED
data/lib/cumo/cuda/device.rb
CHANGED
data/lib/cumo/cuda/link_state.rb
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Cumo::CUDA
|
|
4
4
|
# CUDA link state.
|
|
@@ -25,7 +25,7 @@ module Cumo::CUDA
|
|
|
25
25
|
end
|
|
26
26
|
|
|
27
27
|
def complete
|
|
28
|
-
|
|
28
|
+
Driver.cuLinkComplete(@ptr)
|
|
29
29
|
end
|
|
30
30
|
end
|
|
31
31
|
end
|
data/lib/cumo/cuda/module.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
require_relative 'compile_error'
|
|
3
4
|
|
|
4
5
|
module Cumo::CUDA
|
|
@@ -18,7 +19,7 @@ module Cumo::CUDA
|
|
|
18
19
|
begin
|
|
19
20
|
NVRTC.nvrtcCompileProgram(@ptr, options)
|
|
20
21
|
return NVRTC.nvrtcGetPTX(@ptr)
|
|
21
|
-
rescue NVRTCError
|
|
22
|
+
rescue NVRTCError
|
|
22
23
|
log = NVRTC.nvrtcGetProgramLog(@ptr)
|
|
23
24
|
raise CompileError.new(log, @src, @name, options)
|
|
24
25
|
end
|
data/lib/cumo/cuda.rb
CHANGED