cumo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +27 -0
- data/.travis.yml +5 -0
- data/3rd_party/mkmf-cu/.gitignore +36 -0
- data/3rd_party/mkmf-cu/Gemfile +3 -0
- data/3rd_party/mkmf-cu/LICENSE +21 -0
- data/3rd_party/mkmf-cu/README.md +36 -0
- data/3rd_party/mkmf-cu/Rakefile +11 -0
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +82 -0
- data/README.md +252 -0
- data/Rakefile +43 -0
- data/bench/broadcast_fp32.rb +138 -0
- data/bench/cumo_bench.rb +193 -0
- data/bench/numo_bench.rb +138 -0
- data/bench/reduction_fp32.rb +117 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/cumo.gemspec +32 -0
- data/ext/cumo/cuda/cublas.c +278 -0
- data/ext/cumo/cuda/driver.c +421 -0
- data/ext/cumo/cuda/memory_pool.cpp +185 -0
- data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
- data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
- data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
- data/ext/cumo/cuda/nvrtc.c +207 -0
- data/ext/cumo/cuda/runtime.c +167 -0
- data/ext/cumo/cumo.c +148 -0
- data/ext/cumo/depend.erb +58 -0
- data/ext/cumo/extconf.rb +179 -0
- data/ext/cumo/include/cumo.h +25 -0
- data/ext/cumo/include/cumo/compat.h +23 -0
- data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
- data/ext/cumo/include/cumo/cuda/driver.h +22 -0
- data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
- data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
- data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
- data/ext/cumo/include/cumo/indexer.h +238 -0
- data/ext/cumo/include/cumo/intern.h +142 -0
- data/ext/cumo/include/cumo/intern_fwd.h +38 -0
- data/ext/cumo/include/cumo/intern_kernel.h +6 -0
- data/ext/cumo/include/cumo/narray.h +429 -0
- data/ext/cumo/include/cumo/narray_kernel.h +149 -0
- data/ext/cumo/include/cumo/ndloop.h +95 -0
- data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
- data/ext/cumo/include/cumo/template.h +158 -0
- data/ext/cumo/include/cumo/template_kernel.h +77 -0
- data/ext/cumo/include/cumo/types/bit.h +40 -0
- data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
- data/ext/cumo/include/cumo/types/complex.h +402 -0
- data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
- data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
- data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/dfloat.h +47 -0
- data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/float_def.h +34 -0
- data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
- data/ext/cumo/include/cumo/types/float_macro.h +191 -0
- data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
- data/ext/cumo/include/cumo/types/int16.h +24 -0
- data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
- data/ext/cumo/include/cumo/types/int32.h +24 -0
- data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int64.h +24 -0
- data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int8.h +24 -0
- data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int_macro.h +67 -0
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
- data/ext/cumo/include/cumo/types/real_accum.h +486 -0
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
- data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
- data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
- data/ext/cumo/include/cumo/types/robject.h +27 -0
- data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
- data/ext/cumo/include/cumo/types/scomplex.h +46 -0
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/sfloat.h +48 -0
- data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/uint16.h +25 -0
- data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint32.h +25 -0
- data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint64.h +25 -0
- data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint8.h +25 -0
- data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
- data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
- data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
- data/ext/cumo/narray/SFMT-params.h +97 -0
- data/ext/cumo/narray/SFMT-params19937.h +46 -0
- data/ext/cumo/narray/SFMT.c +620 -0
- data/ext/cumo/narray/SFMT.h +167 -0
- data/ext/cumo/narray/array.c +638 -0
- data/ext/cumo/narray/data.c +961 -0
- data/ext/cumo/narray/gen/cogen.rb +56 -0
- data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
- data/ext/cumo/narray/gen/def/bit.rb +37 -0
- data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/int16.rb +36 -0
- data/ext/cumo/narray/gen/def/int32.rb +36 -0
- data/ext/cumo/narray/gen/def/int64.rb +36 -0
- data/ext/cumo/narray/gen/def/int8.rb +36 -0
- data/ext/cumo/narray/gen/def/robject.rb +37 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/uint16.rb +36 -0
- data/ext/cumo/narray/gen/def/uint32.rb +36 -0
- data/ext/cumo/narray/gen/def/uint64.rb +36 -0
- data/ext/cumo/narray/gen/def/uint8.rb +36 -0
- data/ext/cumo/narray/gen/erbpp2.rb +346 -0
- data/ext/cumo/narray/gen/narray_def.rb +268 -0
- data/ext/cumo/narray/gen/spec.rb +425 -0
- data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
- data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
- data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
- data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
- data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
- data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
- data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
- data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
- data/ext/cumo/narray/gen/tmpl/class.c +9 -0
- data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
- data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
- data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
- data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
- data/ext/cumo/narray/gen/tmpl/each.c +47 -0
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
- data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
- data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
- data/ext/cumo/narray/gen/tmpl/format.c +62 -0
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
- data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
- data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
- data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
- data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
- data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
- data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
- data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
- data/ext/cumo/narray/gen/tmpl/median.c +66 -0
- data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
- data/ext/cumo/narray/gen/tmpl/module.c +9 -0
- data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
- data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
- data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
- data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
- data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
- data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
- data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
- data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
- data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
- data/ext/cumo/narray/gen/tmpl/store.c +41 -0
- data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
- data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
- data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
- data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
- data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
- data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
- data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
- data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
- data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
- data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
- data/ext/cumo/narray/index.c +880 -0
- data/ext/cumo/narray/kwargs.c +153 -0
- data/ext/cumo/narray/math.c +142 -0
- data/ext/cumo/narray/narray.c +1948 -0
- data/ext/cumo/narray/ndloop.c +2105 -0
- data/ext/cumo/narray/rand.c +45 -0
- data/ext/cumo/narray/step.c +474 -0
- data/ext/cumo/narray/struct.c +886 -0
- data/lib/cumo.rb +3 -0
- data/lib/cumo/cuda.rb +11 -0
- data/lib/cumo/cuda/compile_error.rb +36 -0
- data/lib/cumo/cuda/compiler.rb +161 -0
- data/lib/cumo/cuda/device.rb +47 -0
- data/lib/cumo/cuda/link_state.rb +31 -0
- data/lib/cumo/cuda/module.rb +40 -0
- data/lib/cumo/cuda/nvrtc_program.rb +27 -0
- data/lib/cumo/linalg.rb +12 -0
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo/narray/extra.rb +1278 -0
- data/lib/erbpp.rb +294 -0
- data/lib/erbpp/line_number.rb +137 -0
- data/lib/erbpp/narray_def.rb +381 -0
- data/numo-narray-version +1 -0
- data/run.gdb +7 -0
- metadata +353 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
#include <ruby.h>
|
|
2
|
+
#include <ruby/thread.h>
|
|
3
|
+
#include <assert.h>
|
|
4
|
+
#include <nvrtc.h>
|
|
5
|
+
#include "cumo/cuda/nvrtc.h"
|
|
6
|
+
|
|
7
|
+
VALUE cumo_cuda_eNVRTCError;
|
|
8
|
+
VALUE cumo_cuda_mNVRTC;
|
|
9
|
+
#define eNVRTCError cumo_cuda_eNVRTCError
|
|
10
|
+
#define mNVRTC cumo_cuda_mNVRTC
|
|
11
|
+
|
|
12
|
+
static void
|
|
13
|
+
check_status(nvrtcResult status)
|
|
14
|
+
{
|
|
15
|
+
if (status != 0) {
|
|
16
|
+
rb_raise(cumo_cuda_eNVRTCError, "%s (error=%d)", nvrtcGetErrorString(status), status);
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
static VALUE
|
|
21
|
+
rb_nvrtcVersion(VALUE self)
|
|
22
|
+
{
|
|
23
|
+
int _major, _minor;
|
|
24
|
+
nvrtcResult status;
|
|
25
|
+
VALUE major, minor;
|
|
26
|
+
|
|
27
|
+
status = nvrtcVersion(&_major, &_minor);
|
|
28
|
+
|
|
29
|
+
check_status(status);
|
|
30
|
+
major = INT2NUM(_major);
|
|
31
|
+
minor = INT2NUM(_minor);
|
|
32
|
+
return rb_ary_new3(2, major, minor);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
struct nvrtcCreateProgramParam {
|
|
36
|
+
nvrtcProgram *prog;
|
|
37
|
+
const char* src;
|
|
38
|
+
const char *name;
|
|
39
|
+
int numHeaders;
|
|
40
|
+
const char** headers;
|
|
41
|
+
const char** includeNames;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
static void*
|
|
45
|
+
nvrtcCreateProgram_without_gvl_cb(void *param)
|
|
46
|
+
{
|
|
47
|
+
struct nvrtcCreateProgramParam *p = param;
|
|
48
|
+
nvrtcResult status;
|
|
49
|
+
status = nvrtcCreateProgram(p->prog, p->src, p->name, p->numHeaders, p->headers, p->includeNames);
|
|
50
|
+
return (void *)status;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
static VALUE
|
|
54
|
+
rb_nvrtcCreateProgram(
|
|
55
|
+
VALUE self,
|
|
56
|
+
VALUE src,
|
|
57
|
+
VALUE name,
|
|
58
|
+
VALUE headers,
|
|
59
|
+
VALUE includeNames)
|
|
60
|
+
{
|
|
61
|
+
nvrtcResult status;
|
|
62
|
+
nvrtcProgram _prog;
|
|
63
|
+
const char* _src = StringValueCStr(src);
|
|
64
|
+
const char* _name = StringValueCStr(name);
|
|
65
|
+
int _numHeaders = RARRAY_LEN(headers);
|
|
66
|
+
const char** _headers = (const char **)malloc(_numHeaders * sizeof(char *));
|
|
67
|
+
const char** _includeNames = (const char **)malloc(_numHeaders * sizeof(char *));
|
|
68
|
+
int i;
|
|
69
|
+
for (i = 0; i < _numHeaders; i++) {
|
|
70
|
+
VALUE header = RARRAY_PTR(headers)[i];
|
|
71
|
+
_headers[i] = StringValueCStr(header);
|
|
72
|
+
}
|
|
73
|
+
for (i = 0; i < _numHeaders; i++) {
|
|
74
|
+
VALUE include_name = RARRAY_PTR(includeNames)[i];
|
|
75
|
+
_includeNames[i] = StringValueCStr(include_name);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
{
|
|
79
|
+
struct nvrtcCreateProgramParam param = {&_prog, _src, _name, _numHeaders, _headers, _includeNames};
|
|
80
|
+
status = (nvrtcResult)rb_thread_call_without_gvl(nvrtcCreateProgram_without_gvl_cb, ¶m, NULL, NULL);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
free(_headers);
|
|
84
|
+
free(_includeNames);
|
|
85
|
+
check_status(status);
|
|
86
|
+
return SIZET2NUM((size_t)_prog);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
struct nvrtcDestroyProgramParam {
|
|
90
|
+
nvrtcProgram *prog;
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
static void*
|
|
94
|
+
nvrtcDestroyProgram_without_gvl_cb(void *param)
|
|
95
|
+
{
|
|
96
|
+
struct nvrtcDestroyProgramParam *p = param;
|
|
97
|
+
nvrtcResult status;
|
|
98
|
+
status = nvrtcDestroyProgram(p->prog);
|
|
99
|
+
return (void *)status;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
static VALUE
|
|
103
|
+
rb_nvrtcDestroyProgram(VALUE self, VALUE prog)
|
|
104
|
+
{
|
|
105
|
+
nvrtcResult status;
|
|
106
|
+
nvrtcProgram _prog = (nvrtcProgram)NUM2SIZET(prog);
|
|
107
|
+
|
|
108
|
+
struct nvrtcDestroyProgramParam param = {&_prog};
|
|
109
|
+
status = (nvrtcResult)rb_thread_call_without_gvl(nvrtcDestroyProgram_without_gvl_cb, ¶m, NULL, NULL);
|
|
110
|
+
|
|
111
|
+
check_status(status);
|
|
112
|
+
return Qnil;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
struct nvrtcCompileProgramParam {
|
|
116
|
+
nvrtcProgram prog;
|
|
117
|
+
int numOptions;
|
|
118
|
+
const char** options;
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
static void*
|
|
122
|
+
nvrtcCompileProgram_without_gvl_cb(void *param)
|
|
123
|
+
{
|
|
124
|
+
struct nvrtcCompileProgramParam *p = param;
|
|
125
|
+
nvrtcResult status;
|
|
126
|
+
status = nvrtcCompileProgram(p->prog, p->numOptions, p->options);
|
|
127
|
+
return (void *)status;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
static VALUE
|
|
131
|
+
rb_nvrtcCompileProgram(VALUE self, VALUE prog, VALUE options)
|
|
132
|
+
{
|
|
133
|
+
nvrtcResult status;
|
|
134
|
+
nvrtcProgram _prog = (nvrtcProgram)NUM2SIZET(prog);
|
|
135
|
+
int _numOptions = RARRAY_LEN(options);
|
|
136
|
+
const char** _options = (const char **)malloc(_numOptions * sizeof(char *));
|
|
137
|
+
int i;
|
|
138
|
+
for (i = 0; i < _numOptions; i++) {
|
|
139
|
+
VALUE option = RARRAY_PTR(options)[i];
|
|
140
|
+
_options[i] = StringValueCStr(option);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
{
|
|
144
|
+
struct nvrtcCompileProgramParam param = {_prog, _numOptions, _options};
|
|
145
|
+
status = (nvrtcResult)rb_thread_call_without_gvl(nvrtcCompileProgram_without_gvl_cb, ¶m, NULL, NULL);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
free(_options);
|
|
149
|
+
check_status(status);
|
|
150
|
+
return Qnil;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
static VALUE
|
|
154
|
+
rb_nvrtcGetPTX(VALUE self, VALUE prog)
|
|
155
|
+
{
|
|
156
|
+
nvrtcResult status;
|
|
157
|
+
nvrtcProgram _prog = (nvrtcProgram)NUM2SIZET(prog);
|
|
158
|
+
size_t _ptxSizeRet;
|
|
159
|
+
char *_ptx;
|
|
160
|
+
VALUE ptx;
|
|
161
|
+
|
|
162
|
+
status = nvrtcGetPTXSize(_prog, &_ptxSizeRet);
|
|
163
|
+
check_status(status);
|
|
164
|
+
|
|
165
|
+
ptx = rb_str_new(NULL, _ptxSizeRet);
|
|
166
|
+
_ptx = RSTRING_PTR(ptx);
|
|
167
|
+
status = nvrtcGetPTX(_prog, _ptx);
|
|
168
|
+
check_status(status);
|
|
169
|
+
|
|
170
|
+
return ptx;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
static VALUE
|
|
174
|
+
rb_nvrtcGetProgramLog(VALUE self, VALUE prog)
|
|
175
|
+
{
|
|
176
|
+
nvrtcResult status;
|
|
177
|
+
nvrtcProgram _prog = (nvrtcProgram)NUM2SIZET(prog);
|
|
178
|
+
size_t _logSizeRet;
|
|
179
|
+
char *_log;
|
|
180
|
+
VALUE log;
|
|
181
|
+
|
|
182
|
+
status = nvrtcGetProgramLogSize(_prog, &_logSizeRet);
|
|
183
|
+
check_status(status);
|
|
184
|
+
|
|
185
|
+
log = rb_str_new(NULL, _logSizeRet);
|
|
186
|
+
_log = RSTRING_PTR(log);
|
|
187
|
+
status = nvrtcGetProgramLog(_prog, _log);
|
|
188
|
+
check_status(status);
|
|
189
|
+
|
|
190
|
+
return log;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
void
|
|
194
|
+
Init_cumo_cuda_nvrtc()
|
|
195
|
+
{
|
|
196
|
+
VALUE mCumo = rb_define_module("Cumo");
|
|
197
|
+
VALUE mCUDA = rb_define_module_under(mCumo, "CUDA");
|
|
198
|
+
mNVRTC = rb_define_module_under(mCUDA, "NVRTC");
|
|
199
|
+
eNVRTCError = rb_define_class_under(mCUDA, "NVRTCError", rb_eStandardError);
|
|
200
|
+
|
|
201
|
+
rb_define_singleton_method(mNVRTC, "nvrtcVersion", rb_nvrtcVersion, 0);
|
|
202
|
+
rb_define_singleton_method(mNVRTC, "nvrtcCreateProgram", rb_nvrtcCreateProgram, 4);
|
|
203
|
+
rb_define_singleton_method(mNVRTC, "nvrtcDestroyProgram", rb_nvrtcDestroyProgram, 1);
|
|
204
|
+
rb_define_singleton_method(mNVRTC, "nvrtcCompileProgram", rb_nvrtcCompileProgram, 2);
|
|
205
|
+
rb_define_singleton_method(mNVRTC, "nvrtcGetPTX", rb_nvrtcGetPTX, 1);
|
|
206
|
+
rb_define_singleton_method(mNVRTC, "nvrtcGetProgramLog", rb_nvrtcGetProgramLog, 1);
|
|
207
|
+
}
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
#include <ruby.h>
|
|
2
|
+
#include <assert.h>
|
|
3
|
+
#include <cuda_runtime.h>
|
|
4
|
+
#include "cumo/cuda/runtime.h"
|
|
5
|
+
|
|
6
|
+
VALUE cumo_cuda_eRuntimeError;
|
|
7
|
+
VALUE cumo_cuda_mRuntime;
|
|
8
|
+
#define eRuntimeError cumo_cuda_eRuntimeError
|
|
9
|
+
#define mRuntime cumo_cuda_mRuntime
|
|
10
|
+
|
|
11
|
+
#define check_status(status) (cumo_cuda_runtime_check_status((status)))
|
|
12
|
+
|
|
13
|
+
///////////////////////////////////////////
|
|
14
|
+
// Version Management
|
|
15
|
+
///////////////////////////////////////////
|
|
16
|
+
|
|
17
|
+
/*
|
|
18
|
+
Returns the CUDA driver version.
|
|
19
|
+
|
|
20
|
+
@return [Integer] Returns the CUDA driver version.
|
|
21
|
+
@see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION_1g8a06ee14a0551606b7c780084d5564ab
|
|
22
|
+
*/
|
|
23
|
+
static VALUE
|
|
24
|
+
rb_cudaDriverGetVersion(VALUE self)
|
|
25
|
+
{
|
|
26
|
+
int _version;
|
|
27
|
+
cudaError_t status;
|
|
28
|
+
|
|
29
|
+
status = cudaDriverGetVersion(&_version);
|
|
30
|
+
|
|
31
|
+
check_status(status);
|
|
32
|
+
return INT2NUM(_version);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/*
|
|
36
|
+
Returns the CUDA Runtime version.
|
|
37
|
+
|
|
38
|
+
@return [Integer] Returns the CUDA Runtime version.
|
|
39
|
+
@see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION_1g0e3952c7802fd730432180f1f4a6cdc6
|
|
40
|
+
*/
|
|
41
|
+
static VALUE
|
|
42
|
+
rb_cudaRuntimeGetVersion(VALUE self)
|
|
43
|
+
{
|
|
44
|
+
int _version;
|
|
45
|
+
cudaError_t status;
|
|
46
|
+
|
|
47
|
+
status = cudaRuntimeGetVersion(&_version);
|
|
48
|
+
|
|
49
|
+
check_status(status);
|
|
50
|
+
return INT2NUM(_version);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/////////////////////////////////////////
|
|
54
|
+
// Device and context operations
|
|
55
|
+
/////////////////////////////////////////
|
|
56
|
+
|
|
57
|
+
/*
|
|
58
|
+
Returns which device is currently being used.
|
|
59
|
+
|
|
60
|
+
@return [Integer] Returns the device on which the active host thread executes the device code.
|
|
61
|
+
@raise [Cumo::CUDA::RuntimeError]
|
|
62
|
+
@see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g80861db2ce7c29b6e8055af8ae01bc78
|
|
63
|
+
*/
|
|
64
|
+
static VALUE
|
|
65
|
+
rb_cudaGetDevice(VALUE self)
|
|
66
|
+
{
|
|
67
|
+
int _device;
|
|
68
|
+
cudaError_t status;
|
|
69
|
+
|
|
70
|
+
status = cudaGetDevice(&_device);
|
|
71
|
+
|
|
72
|
+
check_status(status);
|
|
73
|
+
return INT2NUM(_device);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/*
|
|
77
|
+
Returns information about the device.
|
|
78
|
+
|
|
79
|
+
@param [Integer] attrib Device attribute to query
|
|
80
|
+
@param [Integer] device Device number to query
|
|
81
|
+
@return [Integer] Returned device attribute value
|
|
82
|
+
@raise [Cumo::CUDA::RuntimeError]
|
|
83
|
+
@see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1gb22e8256592b836df9a9cc36c9db7151
|
|
84
|
+
*/
|
|
85
|
+
static VALUE
|
|
86
|
+
rb_cudaDeviceGetAttributes(VALUE self, VALUE attrib, VALUE device)
|
|
87
|
+
{
|
|
88
|
+
int _attrib = NUM2INT(attrib);
|
|
89
|
+
int _device = NUM2INT(device);
|
|
90
|
+
int _ret;
|
|
91
|
+
cudaError_t status;
|
|
92
|
+
|
|
93
|
+
status = cudaDeviceGetAttribute(&_ret, _attrib, _device);
|
|
94
|
+
|
|
95
|
+
check_status(status);
|
|
96
|
+
return INT2NUM(_ret);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/*
|
|
100
|
+
Returns the number of compute-capable devices.
|
|
101
|
+
|
|
102
|
+
@return [Integer] Returns the number of devices with compute capability greater or equal to 2.0
|
|
103
|
+
@raise [Cumo::CUDA::RuntimeError]
|
|
104
|
+
@see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g18808e54893cfcaafefeab31a73cc55f
|
|
105
|
+
*/
|
|
106
|
+
static VALUE
|
|
107
|
+
rb_cudaGetDeviceCount(VALUE self)
|
|
108
|
+
{
|
|
109
|
+
int _count;
|
|
110
|
+
cudaError_t status;
|
|
111
|
+
|
|
112
|
+
status = cudaGetDeviceCount(&_count);
|
|
113
|
+
|
|
114
|
+
check_status(status);
|
|
115
|
+
return INT2NUM(_count);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/*
|
|
119
|
+
Set device to be used for GPU executions.
|
|
120
|
+
|
|
121
|
+
@param [Integer] device Device on which the active host thread should execute the device code.
|
|
122
|
+
@raise [Cumo::CUDA::RuntimeError]
|
|
123
|
+
@see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g159587909ffa0791bbe4b40187a4c6bb
|
|
124
|
+
*/
|
|
125
|
+
static VALUE
|
|
126
|
+
rb_cudaSetDevice(VALUE self, VALUE device)
|
|
127
|
+
{
|
|
128
|
+
int _device = NUM2INT(device);
|
|
129
|
+
cudaError_t status;
|
|
130
|
+
|
|
131
|
+
status = cudaSetDevice(_device);
|
|
132
|
+
|
|
133
|
+
check_status(status);
|
|
134
|
+
return Qnil;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/*
|
|
138
|
+
Wait for compute device to finish.
|
|
139
|
+
|
|
140
|
+
@raise [Cumo::CUDA::RuntimeError]
|
|
141
|
+
@see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g10e20b05a95f638a4071a655503df25d
|
|
142
|
+
*/
|
|
143
|
+
static VALUE
|
|
144
|
+
rb_cudaDeviceSynchronize(VALUE self)
|
|
145
|
+
{
|
|
146
|
+
cudaError_t status;
|
|
147
|
+
status = cudaDeviceSynchronize();
|
|
148
|
+
check_status(status);
|
|
149
|
+
return Qnil;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
void
|
|
153
|
+
Init_cumo_cuda_runtime()
|
|
154
|
+
{
|
|
155
|
+
VALUE mCumo = rb_define_module("Cumo");
|
|
156
|
+
VALUE mCUDA = rb_define_module_under(mCumo, "CUDA");
|
|
157
|
+
mRuntime = rb_define_module_under(mCUDA, "Runtime");
|
|
158
|
+
eRuntimeError = rb_define_class_under(mCUDA, "RuntimeError", rb_eStandardError);
|
|
159
|
+
|
|
160
|
+
rb_define_singleton_method(mRuntime, "cudaDriverGetVersion", rb_cudaDriverGetVersion, 0);
|
|
161
|
+
rb_define_singleton_method(mRuntime, "cudaRuntimeGetVersion", rb_cudaRuntimeGetVersion, 0);
|
|
162
|
+
rb_define_singleton_method(mRuntime, "cudaGetDevice", rb_cudaGetDevice, 0);
|
|
163
|
+
rb_define_singleton_method(mRuntime, "cudaDeviceGetAttributes", rb_cudaDeviceGetAttributes, 2);
|
|
164
|
+
rb_define_singleton_method(mRuntime, "cudaGetDeviceCount", rb_cudaGetDeviceCount, 0);
|
|
165
|
+
rb_define_singleton_method(mRuntime, "cudaSetDevice", rb_cudaSetDevice, 1);
|
|
166
|
+
rb_define_singleton_method(mRuntime, "cudaDeviceSynchronize", rb_cudaDeviceSynchronize, 0);
|
|
167
|
+
}
|
data/ext/cumo/cumo.c
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
#define CUMO_C
|
|
2
|
+
#include <ruby.h>
|
|
3
|
+
#include <assert.h>
|
|
4
|
+
#include <stdlib.h>
|
|
5
|
+
#include "cumo.h"
|
|
6
|
+
#include "cumo/narray.h"
|
|
7
|
+
|
|
8
|
+
void Init_cumo();
|
|
9
|
+
void Init_cumo_narray();
|
|
10
|
+
void Init_cumo_nary_data();
|
|
11
|
+
void Init_cumo_nary_ndloop();
|
|
12
|
+
void Init_cumo_nary_step();
|
|
13
|
+
void Init_cumo_nary_index();
|
|
14
|
+
void Init_cumo_bit();
|
|
15
|
+
void Init_cumo_int8();
|
|
16
|
+
void Init_cumo_int16();
|
|
17
|
+
void Init_cumo_int32();
|
|
18
|
+
void Init_cumo_int64();
|
|
19
|
+
void Init_cumo_uint8();
|
|
20
|
+
void Init_cumo_uint16();
|
|
21
|
+
void Init_cumo_uint32();
|
|
22
|
+
void Init_cumo_uint64();
|
|
23
|
+
void Init_cumo_sfloat();
|
|
24
|
+
void Init_cumo_scomplex();
|
|
25
|
+
void Init_cumo_dfloat();
|
|
26
|
+
void Init_cumo_dcomplex();
|
|
27
|
+
void Init_cumo_robject();
|
|
28
|
+
void Init_cumo_nary_math();
|
|
29
|
+
void Init_cumo_nary_rand();
|
|
30
|
+
void Init_cumo_nary_array();
|
|
31
|
+
void Init_cumo_nary_struct();
|
|
32
|
+
void Init_cumo_cuda_driver();
|
|
33
|
+
void Init_cumo_cuda_memory_pool();
|
|
34
|
+
void Init_cumo_cuda_runtime();
|
|
35
|
+
void Init_cumo_cuda_nvrtc();
|
|
36
|
+
|
|
37
|
+
void
|
|
38
|
+
cumo_debug_breakpoint(void)
|
|
39
|
+
{
|
|
40
|
+
/* */
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
static bool cumo_compatible_mode_enabled;
|
|
44
|
+
|
|
45
|
+
bool cumo_compatible_mode_enabled_p()
|
|
46
|
+
{
|
|
47
|
+
return cumo_compatible_mode_enabled;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/*
|
|
51
|
+
Enable Numo NArray compatible mode.
|
|
52
|
+
|
|
53
|
+
Cumo returns 0-dimensional NArray instead of ruby numeric object
|
|
54
|
+
for some methods such as `extract`, and `[]` not to synchronize
|
|
55
|
+
between CPU and GPU for performance as default.
|
|
56
|
+
|
|
57
|
+
Enabling the compatible mode makes Cumo behave as Numo. But, please
|
|
58
|
+
note that it makes Cumo slow.
|
|
59
|
+
|
|
60
|
+
@return [Boolean] Returns previous state (true if enabled)
|
|
61
|
+
*/
|
|
62
|
+
static VALUE
|
|
63
|
+
rb_enable_compatible_mode(VALUE self)
|
|
64
|
+
{
|
|
65
|
+
VALUE ret = (cumo_compatible_mode_enabled ? Qtrue : Qfalse);
|
|
66
|
+
cumo_compatible_mode_enabled = true;
|
|
67
|
+
return ret;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/*
|
|
71
|
+
Disable Numo NArray compatible mode.
|
|
72
|
+
|
|
73
|
+
@return [Boolean] Returns previous state (true if enabled)
|
|
74
|
+
*/
|
|
75
|
+
static VALUE
|
|
76
|
+
rb_disable_compatible_mode(VALUE self)
|
|
77
|
+
{
|
|
78
|
+
VALUE ret = (cumo_compatible_mode_enabled ? Qtrue : Qfalse);
|
|
79
|
+
cumo_compatible_mode_enabled = false;
|
|
80
|
+
return ret;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/*
|
|
84
|
+
Returns whether Numo NArray compatible mode is enabled or not.
|
|
85
|
+
|
|
86
|
+
@return [Boolean] Returns the state (true if enabled)
|
|
87
|
+
*/
|
|
88
|
+
static VALUE
|
|
89
|
+
rb_compatible_mode_enabled_p(VALUE self)
|
|
90
|
+
{
|
|
91
|
+
return (cumo_compatible_mode_enabled ? Qtrue : Qfalse);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/* initialization of Cumo Module */
|
|
95
|
+
void
|
|
96
|
+
Init_cumo()
|
|
97
|
+
{
|
|
98
|
+
const char* env;
|
|
99
|
+
VALUE mCumo;
|
|
100
|
+
|
|
101
|
+
mCumo = rb_define_module("Cumo");
|
|
102
|
+
|
|
103
|
+
rb_define_const(mCumo, "VERSION", rb_str_new2(CUMO_VERSION));
|
|
104
|
+
|
|
105
|
+
rb_define_singleton_method(mCumo, "enable_compatible_mode", RUBY_METHOD_FUNC(rb_enable_compatible_mode), 0);
|
|
106
|
+
rb_define_singleton_method(mCumo, "disable_compatible_mode", RUBY_METHOD_FUNC(rb_disable_compatible_mode), 0);
|
|
107
|
+
rb_define_singleton_method(mCumo, "compatible_mode_enabled?", RUBY_METHOD_FUNC(rb_compatible_mode_enabled_p), 0);
|
|
108
|
+
|
|
109
|
+
// default is false
|
|
110
|
+
env = getenv("CUMO_COMPATIBLE_MODE");
|
|
111
|
+
cumo_compatible_mode_enabled = (env != NULL && strcmp(env, "OFF") != 0 && strcmp(env, "0") != 0 && strcmp(env, "NO") != 0);
|
|
112
|
+
|
|
113
|
+
Init_cumo_narray();
|
|
114
|
+
|
|
115
|
+
Init_cumo_nary_step();
|
|
116
|
+
Init_cumo_nary_index();
|
|
117
|
+
|
|
118
|
+
Init_cumo_nary_data();
|
|
119
|
+
Init_cumo_nary_ndloop();
|
|
120
|
+
|
|
121
|
+
Init_cumo_dcomplex();
|
|
122
|
+
Init_cumo_dfloat();
|
|
123
|
+
Init_cumo_scomplex();
|
|
124
|
+
Init_cumo_sfloat();
|
|
125
|
+
|
|
126
|
+
Init_cumo_int64();
|
|
127
|
+
Init_cumo_uint64();
|
|
128
|
+
Init_cumo_int32();
|
|
129
|
+
Init_cumo_uint32();
|
|
130
|
+
Init_cumo_int16();
|
|
131
|
+
Init_cumo_uint16();
|
|
132
|
+
Init_cumo_int8();
|
|
133
|
+
Init_cumo_uint8();
|
|
134
|
+
|
|
135
|
+
Init_cumo_bit();
|
|
136
|
+
Init_cumo_robject();
|
|
137
|
+
|
|
138
|
+
Init_cumo_nary_math();
|
|
139
|
+
|
|
140
|
+
Init_cumo_nary_rand();
|
|
141
|
+
Init_cumo_nary_array();
|
|
142
|
+
Init_cumo_nary_struct();
|
|
143
|
+
|
|
144
|
+
Init_cumo_cuda_driver();
|
|
145
|
+
Init_cumo_cuda_memory_pool();
|
|
146
|
+
Init_cumo_cuda_runtime();
|
|
147
|
+
Init_cumo_cuda_nvrtc();
|
|
148
|
+
}
|