cumo 0.4.3 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.rubocop.yml +15 -0
- data/.rubocop_todo.yml +1252 -0
- data/3rd_party/mkmf-cu/Gemfile +2 -0
- data/3rd_party/mkmf-cu/Rakefile +2 -1
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +43 -7
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
- data/CHANGELOG.md +85 -0
- data/Dockerfile +34 -0
- data/Gemfile +6 -1
- data/README.md +2 -10
- data/Rakefile +8 -11
- data/bench/broadcast_fp32.rb +28 -26
- data/bench/cumo_bench.rb +18 -16
- data/bench/numo_bench.rb +18 -16
- data/bench/reduction_fp32.rb +14 -12
- data/bin/console +1 -0
- data/cumo.gemspec +6 -9
- data/docker-build.sh +4 -0
- data/docker-launch.sh +4 -0
- data/docs/src-tree.md +1 -1
- data/ext/cumo/cuda/cudnn.c +2 -2
- data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
- data/ext/cumo/cuda/driver.c +8 -0
- data/ext/cumo/cumo.c +7 -3
- data/ext/cumo/depend.erb +15 -13
- data/ext/cumo/extconf.rb +33 -47
- data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +13 -6
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
- data/ext/cumo/include/cumo/intern.h +1 -0
- data/ext/cumo/include/cumo/narray.h +13 -1
- data/ext/cumo/include/cumo/template.h +2 -4
- data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
- data/ext/cumo/include/cumo/types/float_macro.h +2 -2
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
- data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/narray/array.c +8 -6
- data/ext/cumo/narray/data.c +48 -28
- data/ext/cumo/narray/gen/cogen.rb +8 -7
- data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
- data/ext/cumo/narray/gen/def/bit.rb +3 -1
- data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
- data/ext/cumo/narray/gen/def/int16.rb +2 -0
- data/ext/cumo/narray/gen/def/int32.rb +2 -0
- data/ext/cumo/narray/gen/def/int64.rb +2 -0
- data/ext/cumo/narray/gen/def/int8.rb +2 -0
- data/ext/cumo/narray/gen/def/robject.rb +2 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
- data/ext/cumo/narray/gen/def/uint16.rb +2 -0
- data/ext/cumo/narray/gen/def/uint32.rb +2 -0
- data/ext/cumo/narray/gen/def/uint64.rb +2 -0
- data/ext/cumo/narray/gen/def/uint8.rb +2 -0
- data/ext/cumo/narray/gen/erbln.rb +9 -7
- data/ext/cumo/narray/gen/erbpp2.rb +26 -24
- data/ext/cumo/narray/gen/narray_def.rb +13 -11
- data/ext/cumo/narray/gen/spec.rb +58 -55
- data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
- data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
- data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
- data/ext/cumo/narray/gen/tmpl/at.c +34 -0
- data/ext/cumo/narray/gen/tmpl/batch_norm.c +5 -2
- data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +6 -3
- data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
- data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
- data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
- data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
- data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
- data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
- data/ext/cumo/narray/gen/tmpl/each.c +4 -2
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
- data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +5 -2
- data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
- data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
- data/ext/cumo/narray/gen/tmpl/median.c +2 -2
- data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
- data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
- data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
- data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
- data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
- data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
- data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
- data/ext/cumo/narray/gen/tmpl/sort.c +3 -3
- data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
- data/ext/cumo/narray/index.c +244 -40
- data/ext/cumo/narray/index_kernel.cu +84 -0
- data/ext/cumo/narray/narray.c +57 -19
- data/ext/cumo/narray/ndloop.c +1 -1
- data/ext/cumo/narray/struct.c +1 -1
- data/lib/cumo/cuda/compile_error.rb +1 -1
- data/lib/cumo/cuda/compiler.rb +23 -22
- data/lib/cumo/cuda/cudnn.rb +1 -1
- data/lib/cumo/cuda/device.rb +1 -1
- data/lib/cumo/cuda/link_state.rb +2 -2
- data/lib/cumo/cuda/module.rb +1 -2
- data/lib/cumo/cuda/nvrtc_program.rb +3 -2
- data/lib/cumo/cuda.rb +2 -0
- data/lib/cumo/linalg.rb +2 -0
- data/lib/cumo/narray/extra.rb +297 -341
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo.rb +3 -1
- data/test/bit_test.rb +157 -0
- data/test/cuda/compiler_test.rb +69 -0
- data/test/cuda/device_test.rb +31 -0
- data/test/cuda/memory_pool_test.rb +45 -0
- data/test/cuda/nvrtc_test.rb +51 -0
- data/test/cuda/runtime_test.rb +28 -0
- data/test/cudnn_test.rb +498 -0
- data/test/cumo_test.rb +27 -0
- data/test/narray_test.rb +745 -0
- data/test/ractor_test.rb +52 -0
- data/test/test_helper.rb +31 -0
- metadata +34 -54
- data/.travis.yml +0 -5
- data/numo-narray-version +0 -1
data/ext/cumo/narray/narray.c
CHANGED
|
@@ -324,19 +324,19 @@ cumo_na_setup(VALUE self, int ndim, size_t *shape)
|
|
|
324
324
|
|
|
325
325
|
@example
|
|
326
326
|
i = Cumo::Int64.new([2,4,3])
|
|
327
|
-
|
|
327
|
+
# => Cumo::Int64#shape=[2,4,3](empty)
|
|
328
328
|
|
|
329
329
|
f = Cumo::DFloat.new(3,4)
|
|
330
|
-
|
|
330
|
+
# => Cumo::DFloat#shape=[3,4](empty)
|
|
331
331
|
|
|
332
332
|
f.fill(2)
|
|
333
|
-
|
|
333
|
+
# => Cumo::DFloat#shape=[3,4]
|
|
334
334
|
# [[2, 2, 2, 2],
|
|
335
335
|
# [2, 2, 2, 2],
|
|
336
336
|
# [2, 2, 2, 2]]
|
|
337
337
|
|
|
338
338
|
x = Cumo::NArray.new(5)
|
|
339
|
-
|
|
339
|
+
# => in `new': allocator undefined for Cumo::NArray (TypeError)
|
|
340
340
|
# from t.rb:9:in `<main>'
|
|
341
341
|
|
|
342
342
|
*/
|
|
@@ -420,10 +420,10 @@ cumo_na_initialize_copy(VALUE self, VALUE orig)
|
|
|
420
420
|
* but for typed NArray subclasses, e.g., DFloat, Int64.
|
|
421
421
|
* @example
|
|
422
422
|
* a = Cumo::DFloat.zeros(3,5)
|
|
423
|
-
* => Cumo::DFloat#shape=[3,5]
|
|
424
|
-
* [[0, 0, 0, 0, 0],
|
|
425
|
-
*
|
|
426
|
-
*
|
|
423
|
+
* # => Cumo::DFloat#shape=[3,5]
|
|
424
|
+
* # [[0, 0, 0, 0, 0],
|
|
425
|
+
* # [0, 0, 0, 0, 0],
|
|
426
|
+
* # [0, 0, 0, 0, 0]]
|
|
427
427
|
*/
|
|
428
428
|
static VALUE
|
|
429
429
|
cumo_na_s_zeros(int argc, VALUE *argv, VALUE klass)
|
|
@@ -471,8 +471,8 @@ cumo_na_s_ones(int argc, VALUE *argv, VALUE klass)
|
|
|
471
471
|
|
|
472
472
|
@example
|
|
473
473
|
a = Cumo::DFloat.linspace(-5,5,7)
|
|
474
|
-
=> Cumo::DFloat#shape=[7]
|
|
475
|
-
[-5, -3.33333, -1.66667, 0, 1.66667, 3.33333, 5]
|
|
474
|
+
# => Cumo::DFloat#shape=[7]
|
|
475
|
+
# [-5, -3.33333, -1.66667, 0, 1.66667, 3.33333, 5]
|
|
476
476
|
*/
|
|
477
477
|
static VALUE
|
|
478
478
|
cumo_na_s_linspace(int argc, VALUE *argv, VALUE klass)
|
|
@@ -510,11 +510,12 @@ cumo_na_s_linspace(int argc, VALUE *argv, VALUE klass)
|
|
|
510
510
|
|
|
511
511
|
@example
|
|
512
512
|
Cumo::DFloat.logspace(4,0,5,2)
|
|
513
|
-
=> Cumo::DFloat#shape=[5]
|
|
514
|
-
|
|
513
|
+
# => Cumo::DFloat#shape=[5]
|
|
514
|
+
# [16, 8, 4, 2, 1]
|
|
515
|
+
|
|
515
516
|
Cumo::DComplex.logspace(0,1i*Math::PI,5,Math::E)
|
|
516
|
-
=> Cumo::DComplex#shape=[5]
|
|
517
|
-
|
|
517
|
+
# => Cumo::DComplex#shape=[5]
|
|
518
|
+
# [1+4.44659e-323i, 0.707107+0.707107i, 6.12323e-17+1i, -0.707107+0.707107i, ...]
|
|
518
519
|
*/
|
|
519
520
|
static VALUE
|
|
520
521
|
cumo_na_s_logspace(int argc, VALUE *argv, VALUE klass)
|
|
@@ -548,10 +549,10 @@ cumo_na_s_logspace(int argc, VALUE *argv, VALUE klass)
|
|
|
548
549
|
@return [Cumo::NArray] created NArray.
|
|
549
550
|
@example
|
|
550
551
|
a = Cumo::DFloat.eye(3)
|
|
551
|
-
=> Cumo::DFloat#shape=[3,3]
|
|
552
|
-
[[1, 0, 0],
|
|
553
|
-
|
|
554
|
-
|
|
552
|
+
# => Cumo::DFloat#shape=[3,3]
|
|
553
|
+
# [[1, 0, 0],
|
|
554
|
+
# [0, 1, 0],
|
|
555
|
+
# [0, 0, 1]]
|
|
555
556
|
*/
|
|
556
557
|
static VALUE
|
|
557
558
|
cumo_na_s_eye(int argc, VALUE *argv, VALUE klass)
|
|
@@ -889,6 +890,39 @@ cumo_na_check_contiguous(VALUE self)
|
|
|
889
890
|
return Qfalse;
|
|
890
891
|
}
|
|
891
892
|
|
|
893
|
+
VALUE
|
|
894
|
+
cumo_na_check_fortran_contiguous(VALUE self)
|
|
895
|
+
{
|
|
896
|
+
int i;
|
|
897
|
+
ssize_t st0;
|
|
898
|
+
cumo_narray_t *na;
|
|
899
|
+
|
|
900
|
+
switch(CUMO_RNARRAY_TYPE(self)) {
|
|
901
|
+
case CUMO_NARRAY_DATA_T:
|
|
902
|
+
case CUMO_NARRAY_FILEMAP_T:
|
|
903
|
+
return Qfalse;
|
|
904
|
+
case CUMO_NARRAY_VIEW_T:
|
|
905
|
+
CumoGetNArray(self,na);
|
|
906
|
+
|
|
907
|
+
// not contiguous if it has index
|
|
908
|
+
for (i=0; i < CUMO_NA_NDIM(na); i++) {
|
|
909
|
+
if (CUMO_NA_IS_INDEX_AT(na,i))
|
|
910
|
+
return Qfalse;
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
// check f-contiguous
|
|
914
|
+
st0 = cumo_na_element_stride(self); // elmsz
|
|
915
|
+
for (i=0; i < CUMO_NA_NDIM(na); i++) {
|
|
916
|
+
if (CUMO_NA_SHAPE(na)[i] == 1)
|
|
917
|
+
continue;
|
|
918
|
+
if (CUMO_NA_STRIDE_AT(na, i) != st0)
|
|
919
|
+
return Qfalse;
|
|
920
|
+
st0 *= CUMO_NA_SHAPE(na)[i];
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
return Qtrue;
|
|
924
|
+
}
|
|
925
|
+
|
|
892
926
|
VALUE
|
|
893
927
|
cumo_na_as_contiguous_array(VALUE a)
|
|
894
928
|
{
|
|
@@ -1388,7 +1422,7 @@ static VALUE cumo_na_inplace( VALUE self );
|
|
|
1388
1422
|
/*
|
|
1389
1423
|
Load marshal data.
|
|
1390
1424
|
@overload marshal_load(data)
|
|
1391
|
-
@
|
|
1425
|
+
@param [Array] Array containing marshal data.
|
|
1392
1426
|
@return [nil]
|
|
1393
1427
|
*/
|
|
1394
1428
|
static VALUE
|
|
@@ -1833,6 +1867,9 @@ cumo_na_equal(VALUE self, volatile VALUE other)
|
|
|
1833
1867
|
return Qfalse;
|
|
1834
1868
|
}
|
|
1835
1869
|
}
|
|
1870
|
+
if (na1->size == 0) {
|
|
1871
|
+
return Qtrue;
|
|
1872
|
+
}
|
|
1836
1873
|
vbool = rb_funcall(self, cumo_id_eq, 1, other);
|
|
1837
1874
|
return (rb_funcall(vbool, cumo_id_count_false_cpu, 0)==INT2FIX(0)) ? Qtrue : Qfalse;
|
|
1838
1875
|
}
|
|
@@ -1929,6 +1966,7 @@ Init_cumo_narray()
|
|
|
1929
1966
|
rb_define_method(cNArray, "debug_info", cumo_na_debug_info, 0);
|
|
1930
1967
|
|
|
1931
1968
|
rb_define_method(cNArray, "contiguous?", cumo_na_check_contiguous, 0);
|
|
1969
|
+
rb_define_method(cNArray, "fortran_contiguous?", cumo_na_check_fortran_contiguous, 0);
|
|
1932
1970
|
|
|
1933
1971
|
rb_define_method(cNArray, "view", cumo_na_make_view, 0);
|
|
1934
1972
|
rb_define_method(cNArray, "expand_dims", cumo_na_expand_dims, 1);
|
data/ext/cumo/narray/ndloop.c
CHANGED
|
@@ -56,7 +56,7 @@ typedef struct CUMO_NA_MD_LOOP {
|
|
|
56
56
|
// [2,3,4], 111b for sum(), 010b for sum(axis: 1), 110b for sum(axis: [1,2])
|
|
57
57
|
VALUE loop_opt;
|
|
58
58
|
cumo_ndfunc_t *ndfunc;
|
|
59
|
-
void (*loop_func)();
|
|
59
|
+
void (*loop_func)(cumo_ndfunc_t *, struct CUMO_NA_MD_LOOP *);
|
|
60
60
|
} cumo_na_md_loop_t;
|
|
61
61
|
|
|
62
62
|
#define LARG(lp,iarg) ((lp)->user.args[iarg])
|
data/ext/cumo/narray/struct.c
CHANGED
|
@@ -812,7 +812,7 @@ nst_s_add_type(int argc, VALUE *argv, VALUE mod)
|
|
|
812
812
|
|
|
813
813
|
#define NST_TYPEDEF(tpname,tpclass) \
|
|
814
814
|
static VALUE \
|
|
815
|
-
nst_s_##tpname(
|
|
815
|
+
nst_s_##tpname(int argc, VALUE *argv, VALUE mod) \
|
|
816
816
|
{ nstruct_add_type(tpclass,argc,argv,mod); \
|
|
817
817
|
return Qnil; \
|
|
818
818
|
}
|
data/lib/cumo/cuda/compiler.rb
CHANGED
|
@@ -1,32 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'tmpdir'
|
|
2
4
|
require 'tempfile'
|
|
3
5
|
require 'fileutils'
|
|
4
6
|
require 'digest/md5'
|
|
5
|
-
require_relative '../cuda'
|
|
6
7
|
|
|
7
8
|
module Cumo::CUDA
|
|
8
9
|
class Compiler
|
|
9
10
|
VALID_KERNEL_NAME = /\A[a-zA-Z_][a-zA-Z_0-9]*\z/
|
|
10
11
|
DEFAULT_CACHE_DIR = File.expand_path('~/.cumo/kernel_cache')
|
|
11
|
-
|
|
12
|
+
|
|
12
13
|
@@empty_file_preprocess_cache ||= {}
|
|
13
|
-
|
|
14
|
+
|
|
14
15
|
def self.valid_kernel_name?(name)
|
|
15
16
|
VALID_KERNEL_NAME.match?(name)
|
|
16
17
|
end
|
|
17
|
-
|
|
18
|
+
|
|
18
19
|
def compile_using_nvrtc(source, options: [], arch: nil)
|
|
19
20
|
arch ||= get_arch
|
|
20
21
|
options += ["-arch=#{arch}"]
|
|
21
|
-
|
|
22
|
+
|
|
22
23
|
Dir.mktmpdir do |root_dir|
|
|
23
24
|
path = File.join(root_dir, 'kern')
|
|
24
25
|
cu_path = "#{path}.cu"
|
|
25
|
-
|
|
26
|
+
|
|
26
27
|
File.open(cu_path, 'w') do |cu_file|
|
|
27
28
|
cu_file.write(source)
|
|
28
29
|
end
|
|
29
|
-
|
|
30
|
+
|
|
30
31
|
prog = NVRTCProgram.new(source, name: cu_path)
|
|
31
32
|
begin
|
|
32
33
|
ptx = prog.compile(options: options)
|
|
@@ -41,12 +42,12 @@ module Cumo::CUDA
|
|
|
41
42
|
return ptx
|
|
42
43
|
end
|
|
43
44
|
end
|
|
44
|
-
|
|
45
|
+
|
|
45
46
|
def compile_with_cache(source, options: [], arch: nil, cache_dir: nil, extra_source: nil)
|
|
46
47
|
# NVRTC does not use extra_source. extra_source is used for cache key.
|
|
47
48
|
cache_dir ||= get_cache_dir
|
|
48
49
|
arch ||= get_arch
|
|
49
|
-
|
|
50
|
+
|
|
50
51
|
options += ['-ftz=true']
|
|
51
52
|
|
|
52
53
|
env = [arch, options, get_nvrtc_version]
|
|
@@ -57,15 +58,15 @@ module Cumo::CUDA
|
|
|
57
58
|
@@empty_file_preprocess_cache[env] = base
|
|
58
59
|
end
|
|
59
60
|
key_src = "#{env} #{base} #{source} #{extra_source}"
|
|
60
|
-
|
|
61
|
+
|
|
61
62
|
key_src.encode!('utf-8')
|
|
62
63
|
digest = Digest::MD5.hexdigest(key_src)
|
|
63
64
|
name = "#{digest}_2.cubin"
|
|
64
|
-
|
|
65
|
+
|
|
65
66
|
unless Dir.exist?(cache_dir)
|
|
66
67
|
FileUtils.mkdir_p(cache_dir)
|
|
67
68
|
end
|
|
68
|
-
|
|
69
|
+
|
|
69
70
|
# TODO(sonots): thread-safe?
|
|
70
71
|
path = File.join(cache_dir, name)
|
|
71
72
|
cubin = load_cache(path)
|
|
@@ -74,7 +75,7 @@ module Cumo::CUDA
|
|
|
74
75
|
mod.load(cubin)
|
|
75
76
|
return mod
|
|
76
77
|
end
|
|
77
|
-
|
|
78
|
+
|
|
78
79
|
ptx = compile_using_nvrtc(source, options: options, arch: arch)
|
|
79
80
|
cubin = nil
|
|
80
81
|
cubin_hash = nil
|
|
@@ -85,7 +86,7 @@ module Cumo::CUDA
|
|
|
85
86
|
end
|
|
86
87
|
|
|
87
88
|
save_cache(path, cubin_hash, cubin)
|
|
88
|
-
|
|
89
|
+
|
|
89
90
|
# Save .cu source file along with .cubin
|
|
90
91
|
if get_bool_env_variable('CUMO_CACHE_SAVE_CUDA_SOURCE', false)
|
|
91
92
|
File.open("#{path}.cu", 'w') do |f|
|
|
@@ -97,7 +98,7 @@ module Cumo::CUDA
|
|
|
97
98
|
mod.load(cubin)
|
|
98
99
|
return mod
|
|
99
100
|
end
|
|
100
|
-
|
|
101
|
+
|
|
101
102
|
private
|
|
102
103
|
|
|
103
104
|
def save_cache(path, cubin_hash, cubin)
|
|
@@ -105,7 +106,7 @@ module Cumo::CUDA
|
|
|
105
106
|
tf.write(cubin_hash)
|
|
106
107
|
tf.write(cubin)
|
|
107
108
|
temp_path = tf.path
|
|
108
|
-
|
|
109
|
+
FileUtils.mv(temp_path, path)
|
|
109
110
|
end
|
|
110
111
|
|
|
111
112
|
def load_cache(path)
|
|
@@ -121,29 +122,29 @@ module Cumo::CUDA
|
|
|
121
122
|
end
|
|
122
123
|
nil
|
|
123
124
|
end
|
|
124
|
-
|
|
125
|
+
|
|
125
126
|
def get_cache_dir
|
|
126
127
|
ENV.fetch('CUMO_CACHE_DIR', DEFAULT_CACHE_DIR)
|
|
127
128
|
end
|
|
128
|
-
|
|
129
|
+
|
|
129
130
|
def get_nvrtc_version
|
|
130
131
|
@@nvrtc_version ||= NVRTC.nvrtcVersion
|
|
131
132
|
end
|
|
132
|
-
|
|
133
|
+
|
|
133
134
|
def get_arch
|
|
134
135
|
cc = Device.new.compute_capability
|
|
135
136
|
"compute_#{cc}"
|
|
136
137
|
end
|
|
137
|
-
|
|
138
|
+
|
|
138
139
|
def get_bool_env_variable(name, default)
|
|
139
140
|
val = ENV[name]
|
|
140
141
|
return default if val.nil? or val.size == 0
|
|
141
142
|
Integer(val) == 1 rescue false
|
|
142
143
|
end
|
|
143
|
-
|
|
144
|
+
|
|
144
145
|
def preprocess(source, options, arch)
|
|
145
146
|
options += ["-arch=#{arch}"]
|
|
146
|
-
|
|
147
|
+
|
|
147
148
|
prog = NVRTCProgram.new(source, name: '')
|
|
148
149
|
begin
|
|
149
150
|
result = prog.compile(options: options)
|
data/lib/cumo/cuda/cudnn.rb
CHANGED
data/lib/cumo/cuda/device.rb
CHANGED
data/lib/cumo/cuda/link_state.rb
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Cumo::CUDA
|
|
4
4
|
# CUDA link state.
|
|
@@ -25,7 +25,7 @@ module Cumo::CUDA
|
|
|
25
25
|
end
|
|
26
26
|
|
|
27
27
|
def complete
|
|
28
|
-
|
|
28
|
+
Driver.cuLinkComplete(@ptr)
|
|
29
29
|
end
|
|
30
30
|
end
|
|
31
31
|
end
|
data/lib/cumo/cuda/module.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
require_relative 'compile_error'
|
|
3
4
|
|
|
4
5
|
module Cumo::CUDA
|
|
@@ -18,7 +19,7 @@ module Cumo::CUDA
|
|
|
18
19
|
begin
|
|
19
20
|
NVRTC.nvrtcCompileProgram(@ptr, options)
|
|
20
21
|
return NVRTC.nvrtcGetPTX(@ptr)
|
|
21
|
-
rescue NVRTCError
|
|
22
|
+
rescue NVRTCError
|
|
22
23
|
log = NVRTC.nvrtcGetProgramLog(@ptr)
|
|
23
24
|
raise CompileError.new(log, @src, @name, options)
|
|
24
25
|
end
|
data/lib/cumo/cuda.rb
CHANGED