cumo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +27 -0
- data/.travis.yml +5 -0
- data/3rd_party/mkmf-cu/.gitignore +36 -0
- data/3rd_party/mkmf-cu/Gemfile +3 -0
- data/3rd_party/mkmf-cu/LICENSE +21 -0
- data/3rd_party/mkmf-cu/README.md +36 -0
- data/3rd_party/mkmf-cu/Rakefile +11 -0
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +82 -0
- data/README.md +252 -0
- data/Rakefile +43 -0
- data/bench/broadcast_fp32.rb +138 -0
- data/bench/cumo_bench.rb +193 -0
- data/bench/numo_bench.rb +138 -0
- data/bench/reduction_fp32.rb +117 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/cumo.gemspec +32 -0
- data/ext/cumo/cuda/cublas.c +278 -0
- data/ext/cumo/cuda/driver.c +421 -0
- data/ext/cumo/cuda/memory_pool.cpp +185 -0
- data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
- data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
- data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
- data/ext/cumo/cuda/nvrtc.c +207 -0
- data/ext/cumo/cuda/runtime.c +167 -0
- data/ext/cumo/cumo.c +148 -0
- data/ext/cumo/depend.erb +58 -0
- data/ext/cumo/extconf.rb +179 -0
- data/ext/cumo/include/cumo.h +25 -0
- data/ext/cumo/include/cumo/compat.h +23 -0
- data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
- data/ext/cumo/include/cumo/cuda/driver.h +22 -0
- data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
- data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
- data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
- data/ext/cumo/include/cumo/indexer.h +238 -0
- data/ext/cumo/include/cumo/intern.h +142 -0
- data/ext/cumo/include/cumo/intern_fwd.h +38 -0
- data/ext/cumo/include/cumo/intern_kernel.h +6 -0
- data/ext/cumo/include/cumo/narray.h +429 -0
- data/ext/cumo/include/cumo/narray_kernel.h +149 -0
- data/ext/cumo/include/cumo/ndloop.h +95 -0
- data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
- data/ext/cumo/include/cumo/template.h +158 -0
- data/ext/cumo/include/cumo/template_kernel.h +77 -0
- data/ext/cumo/include/cumo/types/bit.h +40 -0
- data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
- data/ext/cumo/include/cumo/types/complex.h +402 -0
- data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
- data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
- data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/dfloat.h +47 -0
- data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/float_def.h +34 -0
- data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
- data/ext/cumo/include/cumo/types/float_macro.h +191 -0
- data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
- data/ext/cumo/include/cumo/types/int16.h +24 -0
- data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
- data/ext/cumo/include/cumo/types/int32.h +24 -0
- data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int64.h +24 -0
- data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int8.h +24 -0
- data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int_macro.h +67 -0
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
- data/ext/cumo/include/cumo/types/real_accum.h +486 -0
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
- data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
- data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
- data/ext/cumo/include/cumo/types/robject.h +27 -0
- data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
- data/ext/cumo/include/cumo/types/scomplex.h +46 -0
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/sfloat.h +48 -0
- data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/uint16.h +25 -0
- data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint32.h +25 -0
- data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint64.h +25 -0
- data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint8.h +25 -0
- data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
- data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
- data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
- data/ext/cumo/narray/SFMT-params.h +97 -0
- data/ext/cumo/narray/SFMT-params19937.h +46 -0
- data/ext/cumo/narray/SFMT.c +620 -0
- data/ext/cumo/narray/SFMT.h +167 -0
- data/ext/cumo/narray/array.c +638 -0
- data/ext/cumo/narray/data.c +961 -0
- data/ext/cumo/narray/gen/cogen.rb +56 -0
- data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
- data/ext/cumo/narray/gen/def/bit.rb +37 -0
- data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/int16.rb +36 -0
- data/ext/cumo/narray/gen/def/int32.rb +36 -0
- data/ext/cumo/narray/gen/def/int64.rb +36 -0
- data/ext/cumo/narray/gen/def/int8.rb +36 -0
- data/ext/cumo/narray/gen/def/robject.rb +37 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/uint16.rb +36 -0
- data/ext/cumo/narray/gen/def/uint32.rb +36 -0
- data/ext/cumo/narray/gen/def/uint64.rb +36 -0
- data/ext/cumo/narray/gen/def/uint8.rb +36 -0
- data/ext/cumo/narray/gen/erbpp2.rb +346 -0
- data/ext/cumo/narray/gen/narray_def.rb +268 -0
- data/ext/cumo/narray/gen/spec.rb +425 -0
- data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
- data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
- data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
- data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
- data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
- data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
- data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
- data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
- data/ext/cumo/narray/gen/tmpl/class.c +9 -0
- data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
- data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
- data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
- data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
- data/ext/cumo/narray/gen/tmpl/each.c +47 -0
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
- data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
- data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
- data/ext/cumo/narray/gen/tmpl/format.c +62 -0
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
- data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
- data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
- data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
- data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
- data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
- data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
- data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
- data/ext/cumo/narray/gen/tmpl/median.c +66 -0
- data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
- data/ext/cumo/narray/gen/tmpl/module.c +9 -0
- data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
- data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
- data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
- data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
- data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
- data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
- data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
- data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
- data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
- data/ext/cumo/narray/gen/tmpl/store.c +41 -0
- data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
- data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
- data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
- data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
- data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
- data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
- data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
- data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
- data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
- data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
- data/ext/cumo/narray/index.c +880 -0
- data/ext/cumo/narray/kwargs.c +153 -0
- data/ext/cumo/narray/math.c +142 -0
- data/ext/cumo/narray/narray.c +1948 -0
- data/ext/cumo/narray/ndloop.c +2105 -0
- data/ext/cumo/narray/rand.c +45 -0
- data/ext/cumo/narray/step.c +474 -0
- data/ext/cumo/narray/struct.c +886 -0
- data/lib/cumo.rb +3 -0
- data/lib/cumo/cuda.rb +11 -0
- data/lib/cumo/cuda/compile_error.rb +36 -0
- data/lib/cumo/cuda/compiler.rb +161 -0
- data/lib/cumo/cuda/device.rb +47 -0
- data/lib/cumo/cuda/link_state.rb +31 -0
- data/lib/cumo/cuda/module.rb +40 -0
- data/lib/cumo/cuda/nvrtc_program.rb +27 -0
- data/lib/cumo/linalg.rb +12 -0
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo/narray/extra.rb +1278 -0
- data/lib/erbpp.rb +294 -0
- data/lib/erbpp/line_number.rb +137 -0
- data/lib/erbpp/narray_def.rb +381 -0
- data/numo-narray-version +1 -0
- data/run.gdb +7 -0
- metadata +353 -0
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
#include <ruby.h>
|
|
2
|
+
#include <ruby/thread.h>
|
|
3
|
+
#include <cuda.h>
|
|
4
|
+
#include <cuda_runtime.h>
|
|
5
|
+
#include "cumo/cuda/driver.h"
|
|
6
|
+
|
|
7
|
+
VALUE cumo_cuda_eDriverError;
|
|
8
|
+
VALUE cumo_cuda_mDriver;
|
|
9
|
+
#define eDriverError cumo_cuda_eDriverError
|
|
10
|
+
#define mDriver cumo_cuda_mDriver
|
|
11
|
+
|
|
12
|
+
static void
|
|
13
|
+
check_status(CUresult status)
|
|
14
|
+
{
|
|
15
|
+
if (status != 0) {
|
|
16
|
+
const char *errname = NULL;
|
|
17
|
+
const char *errstring = NULL;
|
|
18
|
+
cuGetErrorName(status, &errname);
|
|
19
|
+
cuGetErrorString(status, &errstring);
|
|
20
|
+
rb_raise(cumo_cuda_eDriverError, "%s %s (error=%d)", errname, errstring, status);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
///////////////////////////////////////////////
|
|
25
|
+
// Context Management
|
|
26
|
+
//////////////////////////////////////////////
|
|
27
|
+
|
|
28
|
+
static VALUE
|
|
29
|
+
rb_cuCtxCreate(VALUE self, VALUE flags, VALUE dev)
|
|
30
|
+
{
|
|
31
|
+
unsigned int _flags = NUM2INT(flags);
|
|
32
|
+
CUdevice _dev = (CUdevice)NUM2INT(dev);
|
|
33
|
+
CUcontext _pctx;
|
|
34
|
+
CUresult status;
|
|
35
|
+
|
|
36
|
+
status = cuCtxCreate(&_pctx, _flags, _dev);
|
|
37
|
+
|
|
38
|
+
check_status(status);
|
|
39
|
+
return SIZET2NUM((size_t)_pctx);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
static VALUE
|
|
43
|
+
rb_cuCtxGetCurrent(VALUE self)
|
|
44
|
+
{
|
|
45
|
+
CUcontext ctx;
|
|
46
|
+
CUresult status;
|
|
47
|
+
|
|
48
|
+
status = cuCtxGetCurrent(&ctx);
|
|
49
|
+
check_status(status);
|
|
50
|
+
|
|
51
|
+
return SIZET2NUM((size_t)ctx);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
///////////////////////////////////////////////
|
|
55
|
+
// Device Management
|
|
56
|
+
//////////////////////////////////////////////
|
|
57
|
+
|
|
58
|
+
static VALUE
|
|
59
|
+
rb_cuDeviceGet(VALUE self, VALUE ordinal)
|
|
60
|
+
{
|
|
61
|
+
int _ordinal = NUM2INT(ordinal);
|
|
62
|
+
CUdevice _device;
|
|
63
|
+
CUresult status;
|
|
64
|
+
|
|
65
|
+
status = cuDeviceGet(&_device, _ordinal);
|
|
66
|
+
|
|
67
|
+
check_status(status);
|
|
68
|
+
return INT2NUM(_device);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
///////////////////////////////////////////////
|
|
72
|
+
// Module Load and Kernel Execution
|
|
73
|
+
//////////////////////////////////////////////
|
|
74
|
+
|
|
75
|
+
struct cuLinkAddDataParam {
|
|
76
|
+
CUlinkState state;
|
|
77
|
+
CUjitInputType type;
|
|
78
|
+
void* data;
|
|
79
|
+
size_t size;
|
|
80
|
+
const char* name;
|
|
81
|
+
unsigned int numOptions;
|
|
82
|
+
CUjit_option* options;
|
|
83
|
+
void ** optionValues;
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
static void *
|
|
87
|
+
cuLinkAddData_without_gvl_cb(void *param)
|
|
88
|
+
{
|
|
89
|
+
struct cuLinkAddDataParam *p = param;
|
|
90
|
+
CUresult status;
|
|
91
|
+
status = cuLinkAddData(p->state, p->type, p->data, p->size, p->name, p->numOptions, p->options, p->optionValues);
|
|
92
|
+
return (void *)status;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// TODO(sonots): Support options.
|
|
96
|
+
static VALUE
|
|
97
|
+
rb_cuLinkAddData(VALUE self, VALUE state, VALUE type, VALUE data, VALUE name)
|
|
98
|
+
{
|
|
99
|
+
CUlinkState _state = (CUlinkState)NUM2SIZET(state);
|
|
100
|
+
CUjitInputType _type = (CUjitInputType)NUM2INT(type);
|
|
101
|
+
void* _data = (void *)RSTRING_PTR(data);
|
|
102
|
+
size_t _size = RSTRING_LEN(data);
|
|
103
|
+
const char* _name = RSTRING_PTR(data);
|
|
104
|
+
CUresult status;
|
|
105
|
+
|
|
106
|
+
struct cuLinkAddDataParam param = {_state, _type, _data, _size, _name, 0, (CUjit_option*)0, (void**)0};
|
|
107
|
+
status = (CUresult)rb_thread_call_without_gvl(cuLinkAddData_without_gvl_cb, ¶m, NULL, NULL);
|
|
108
|
+
//status = cuLinkAddData(_state, _type, _data, _size, _name, 0, (CUjit_option*)0, (void**)0);
|
|
109
|
+
|
|
110
|
+
check_status(status);
|
|
111
|
+
return Qnil;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
struct cuLinkAddFileParam {
|
|
115
|
+
CUlinkState state;
|
|
116
|
+
CUjitInputType type;
|
|
117
|
+
const char* path;
|
|
118
|
+
unsigned int numOptions;
|
|
119
|
+
CUjit_option* options;
|
|
120
|
+
void ** optionValues;
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
static void *
|
|
124
|
+
cuLinkAddFile_without_gvl_cb(void *param)
|
|
125
|
+
{
|
|
126
|
+
struct cuLinkAddFileParam *p = param;
|
|
127
|
+
CUresult status;
|
|
128
|
+
status = cuLinkAddFile(p->state, p->type, p->path, p->numOptions, p->options, p->optionValues);
|
|
129
|
+
return (void *)status;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// TODO(sonots): Support options.
|
|
133
|
+
static VALUE
|
|
134
|
+
rb_cuLinkAddFile(VALUE self, VALUE state, VALUE type, VALUE path)
|
|
135
|
+
{
|
|
136
|
+
CUlinkState _state = (CUlinkState)NUM2SIZET(state);
|
|
137
|
+
CUjitInputType _type = (CUjitInputType)NUM2INT(type);
|
|
138
|
+
const char* _path = RSTRING_PTR(path);
|
|
139
|
+
CUresult status;
|
|
140
|
+
|
|
141
|
+
struct cuLinkAddFileParam param = {_state, _type, _path, 0, (CUjit_option*)0, (void **)0};
|
|
142
|
+
status = (CUresult)rb_thread_call_without_gvl(cuLinkAddFile_without_gvl_cb, ¶m, NULL, NULL);
|
|
143
|
+
//status = cuLinkAddFile(_state, _type, _path, 0, (CUjit_option*)0, (void **)0);
|
|
144
|
+
|
|
145
|
+
check_status(status);
|
|
146
|
+
return Qnil;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
struct cuLinkCompleteParam {
|
|
150
|
+
CUlinkState state;
|
|
151
|
+
void** cubinOut;
|
|
152
|
+
size_t* sizeOut;
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
static void *
|
|
156
|
+
cuLinkComplete_without_gvl_cb(void *param)
|
|
157
|
+
{
|
|
158
|
+
struct cuLinkCompleteParam *p = param;
|
|
159
|
+
CUresult status;
|
|
160
|
+
status = cuLinkComplete(p->state, p->cubinOut, p->sizeOut);
|
|
161
|
+
return (void *)status;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
static VALUE
|
|
165
|
+
rb_cuLinkComplete(VALUE self, VALUE state)
|
|
166
|
+
{
|
|
167
|
+
CUlinkState _state = (CUlinkState)NUM2SIZET(state);
|
|
168
|
+
void* _cubinOut;
|
|
169
|
+
size_t _sizeOut;
|
|
170
|
+
CUresult status;
|
|
171
|
+
|
|
172
|
+
struct cuLinkCompleteParam param = {_state, &_cubinOut, &_sizeOut};
|
|
173
|
+
status = (CUresult)rb_thread_call_without_gvl(cuLinkComplete_without_gvl_cb, ¶m, NULL, NULL);
|
|
174
|
+
//status = cuLinkComplete(_state, &_cubinOut, &_sizeOut);
|
|
175
|
+
|
|
176
|
+
check_status(status);
|
|
177
|
+
return rb_str_new((char *)_cubinOut, _sizeOut);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
struct cuLinkCreateParam {
|
|
181
|
+
unsigned int numOptions;
|
|
182
|
+
CUjit_option* options;
|
|
183
|
+
void** optionValues;
|
|
184
|
+
CUlinkState* state;
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
static void *
|
|
188
|
+
cuLinkCreate_without_gvl_cb(void *param)
|
|
189
|
+
{
|
|
190
|
+
struct cuLinkCreateParam *p = param;
|
|
191
|
+
CUresult status;
|
|
192
|
+
status = cuLinkCreate(p->numOptions, p->options, p->optionValues, p->state);
|
|
193
|
+
return (void *)status;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// TODO(sonots): Support options.
|
|
197
|
+
static VALUE
|
|
198
|
+
rb_cuLinkCreate(VALUE self)
|
|
199
|
+
{
|
|
200
|
+
CUlinkState state;
|
|
201
|
+
CUresult status;
|
|
202
|
+
|
|
203
|
+
struct cuLinkCreateParam param = {0, (CUjit_option*)0, (void**)0, &state};
|
|
204
|
+
status = (CUresult)rb_thread_call_without_gvl(cuLinkCreate_without_gvl_cb, ¶m, NULL, NULL);
|
|
205
|
+
//status = cuLinkCreate(0, (CUjit_option*)0, (void**)0, &state);
|
|
206
|
+
|
|
207
|
+
check_status(status);
|
|
208
|
+
return SIZET2NUM((size_t)state);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
struct cuLinkDestroyParam {
|
|
212
|
+
CUlinkState state;
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
static void *
|
|
216
|
+
cuLinkDestroy_without_gvl_cb(void *param)
|
|
217
|
+
{
|
|
218
|
+
struct cuLinkDestroyParam *p = param;
|
|
219
|
+
CUresult status;
|
|
220
|
+
status = cuLinkDestroy(p->state);
|
|
221
|
+
return (void *)status;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
static VALUE
|
|
225
|
+
rb_cuLinkDestroy(VALUE self, VALUE state)
|
|
226
|
+
{
|
|
227
|
+
CUlinkState _state = (CUlinkState)NUM2SIZET(state);
|
|
228
|
+
CUresult status;
|
|
229
|
+
|
|
230
|
+
struct cuLinkDestroyParam param = {_state};
|
|
231
|
+
status = (CUresult)rb_thread_call_without_gvl(cuLinkDestroy_without_gvl_cb, ¶m, NULL, NULL);
|
|
232
|
+
//status = cuLinkDestroy(_state);
|
|
233
|
+
|
|
234
|
+
check_status(status);
|
|
235
|
+
return Qnil;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
struct cuModuleGetFunctionParam {
|
|
239
|
+
CUfunction* hfunc;
|
|
240
|
+
CUmodule hmod;
|
|
241
|
+
const char* name;
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
static void *
|
|
245
|
+
cuModuleGetFunction_without_gvl_cb(void *param)
|
|
246
|
+
{
|
|
247
|
+
struct cuModuleGetFunctionParam *p = param;
|
|
248
|
+
CUresult status;
|
|
249
|
+
status = cuModuleGetFunction(p->hfunc, p->hmod, p->name);
|
|
250
|
+
return (void *)status;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
static VALUE
|
|
254
|
+
rb_cuModuleGetFunction(VALUE self, VALUE hmod, VALUE name)
|
|
255
|
+
{
|
|
256
|
+
CUfunction _hfunc;
|
|
257
|
+
CUmodule _hmod = (CUmodule)NUM2SIZET(hmod);
|
|
258
|
+
const char* _name = RSTRING_PTR(name);
|
|
259
|
+
CUresult status;
|
|
260
|
+
|
|
261
|
+
struct cuModuleGetFunctionParam param = {&_hfunc, _hmod, _name};
|
|
262
|
+
status = (CUresult)rb_thread_call_without_gvl(cuModuleGetFunction_without_gvl_cb, ¶m, NULL, NULL);
|
|
263
|
+
//status = cuModuleGetFunction(&_hfunc, _hmod, _name);
|
|
264
|
+
|
|
265
|
+
check_status(status);
|
|
266
|
+
return SIZET2NUM((size_t)_hfunc);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
struct cuModuleGetGlobalParam {
|
|
270
|
+
CUdeviceptr* dptr;
|
|
271
|
+
size_t* bytes;
|
|
272
|
+
CUmodule hmod;
|
|
273
|
+
const char* name;
|
|
274
|
+
};
|
|
275
|
+
|
|
276
|
+
static void *
|
|
277
|
+
cuModuleGetGlobal_without_gvl_cb(void *param)
|
|
278
|
+
{
|
|
279
|
+
struct cuModuleGetGlobalParam *p = param;
|
|
280
|
+
CUresult status;
|
|
281
|
+
status = cuModuleGetGlobal(p->dptr, p->bytes, p->hmod, p->name);
|
|
282
|
+
return (void *)status;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
static VALUE
|
|
286
|
+
rb_cuModuleGetGlobal(VALUE self, VALUE hmod, VALUE name)
|
|
287
|
+
{
|
|
288
|
+
CUdeviceptr _dptr;
|
|
289
|
+
size_t _bytes;
|
|
290
|
+
CUmodule _hmod = (CUmodule)NUM2SIZET(hmod);
|
|
291
|
+
const char* _name = RSTRING_PTR(name);
|
|
292
|
+
CUresult status;
|
|
293
|
+
|
|
294
|
+
struct cuModuleGetGlobalParam param = {&_dptr, &_bytes, _hmod, _name};
|
|
295
|
+
status = (CUresult)rb_thread_call_without_gvl(cuModuleGetGlobal_without_gvl_cb, ¶m, NULL, NULL);
|
|
296
|
+
//status = cuModuleGetGlobal(&_dptr, &_bytes, _hmod, _name);
|
|
297
|
+
|
|
298
|
+
check_status(status);
|
|
299
|
+
return rb_str_new((char *)_dptr, _bytes);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
struct cuModuleLoadParam {
|
|
303
|
+
CUmodule* module;
|
|
304
|
+
const char* fname;
|
|
305
|
+
};
|
|
306
|
+
|
|
307
|
+
static void *
|
|
308
|
+
cuModuleLoad_without_gvl_cb(void *param)
|
|
309
|
+
{
|
|
310
|
+
struct cuModuleLoadParam *p = param;
|
|
311
|
+
CUresult status;
|
|
312
|
+
status = cuModuleLoad(p->module, p->fname);
|
|
313
|
+
return (void *)status;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
static VALUE
|
|
317
|
+
rb_cuModuleLoad(VALUE self, VALUE fname)
|
|
318
|
+
{
|
|
319
|
+
CUmodule _module;
|
|
320
|
+
const char* _fname = RSTRING_PTR(fname);
|
|
321
|
+
CUresult status;
|
|
322
|
+
|
|
323
|
+
struct cuModuleLoadParam param = {&_module, _fname};
|
|
324
|
+
status = (CUresult)rb_thread_call_without_gvl(cuModuleLoad_without_gvl_cb, ¶m, NULL, NULL);
|
|
325
|
+
//status = cuModuleLoad(&_module, _fname);
|
|
326
|
+
|
|
327
|
+
check_status(status);
|
|
328
|
+
return SIZET2NUM((size_t)_module);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
struct cuModuleLoadDataParam {
|
|
332
|
+
CUmodule* module;
|
|
333
|
+
const void* image;
|
|
334
|
+
};
|
|
335
|
+
|
|
336
|
+
static void *
|
|
337
|
+
cuModuleLoadData_without_gvl_cb(void *param)
|
|
338
|
+
{
|
|
339
|
+
struct cuModuleLoadDataParam *p = param;
|
|
340
|
+
CUresult status;
|
|
341
|
+
status = cuModuleLoadData(p->module, p->image);
|
|
342
|
+
return (void *)status;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
static VALUE
|
|
346
|
+
rb_cuModuleLoadData(VALUE self, VALUE image)
|
|
347
|
+
{
|
|
348
|
+
CUmodule _module;
|
|
349
|
+
const void* _image = (void*)RSTRING_PTR(image);
|
|
350
|
+
CUresult status;
|
|
351
|
+
|
|
352
|
+
struct cuModuleLoadDataParam param = {&_module, _image};
|
|
353
|
+
status = (CUresult)rb_thread_call_without_gvl(cuModuleLoadData_without_gvl_cb, ¶m, NULL, NULL);
|
|
354
|
+
//status = cuModuleLoadData(&_module, _image);
|
|
355
|
+
|
|
356
|
+
check_status(status);
|
|
357
|
+
return SIZET2NUM((size_t)_module);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
struct cuModuleUnloadParam {
|
|
361
|
+
CUmodule hmod;
|
|
362
|
+
};
|
|
363
|
+
|
|
364
|
+
static void *
|
|
365
|
+
cuModuleUnload_without_gvl_cb(void *param)
|
|
366
|
+
{
|
|
367
|
+
struct cuModuleUnloadParam *p = param;
|
|
368
|
+
CUresult status;
|
|
369
|
+
status = cuModuleUnload(p->hmod);
|
|
370
|
+
return (void *)status;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
static VALUE
|
|
374
|
+
rb_cuModuleUnload(VALUE self, VALUE hmod)
|
|
375
|
+
{
|
|
376
|
+
CUmodule _hmod = (CUmodule)NUM2SIZET(hmod);
|
|
377
|
+
CUresult status;
|
|
378
|
+
|
|
379
|
+
struct cuModuleUnloadParam param = {_hmod};
|
|
380
|
+
status = (CUresult)rb_thread_call_without_gvl(cuModuleUnload_without_gvl_cb, ¶m, NULL, NULL);
|
|
381
|
+
//status = cuModuleUnload(_hmod);
|
|
382
|
+
|
|
383
|
+
check_status(status);
|
|
384
|
+
return Qnil;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
void
|
|
388
|
+
Init_cumo_cuda_driver()
|
|
389
|
+
{
|
|
390
|
+
VALUE mCumo = rb_define_module("Cumo");
|
|
391
|
+
VALUE mCUDA = rb_define_module_under(mCumo, "CUDA");
|
|
392
|
+
mDriver = rb_define_module_under(mCUDA, "Driver");
|
|
393
|
+
eDriverError = rb_define_class_under(mCUDA, "DriverError", rb_eStandardError);
|
|
394
|
+
|
|
395
|
+
rb_define_singleton_method(mDriver, "cuCtxGetCurrent", rb_cuCtxGetCurrent, 0);
|
|
396
|
+
rb_define_singleton_method(mDriver, "cuLinkAddData", rb_cuLinkAddData, 4);
|
|
397
|
+
rb_define_singleton_method(mDriver, "cuLinkAddFile", rb_cuLinkAddFile, 3);
|
|
398
|
+
rb_define_singleton_method(mDriver, "cuLinkComplete", rb_cuLinkComplete, 1);
|
|
399
|
+
rb_define_singleton_method(mDriver, "cuLinkCreate", rb_cuLinkCreate, 0);
|
|
400
|
+
rb_define_singleton_method(mDriver, "cuLinkDestroy", rb_cuLinkDestroy, 1);
|
|
401
|
+
rb_define_singleton_method(mDriver, "cuModuleGetFunction", rb_cuModuleGetFunction, 2);
|
|
402
|
+
rb_define_singleton_method(mDriver, "cuModuleGetGlobal", rb_cuModuleGetGlobal, 2);
|
|
403
|
+
rb_define_singleton_method(mDriver, "cuModuleLoad", rb_cuModuleLoad, 1);
|
|
404
|
+
rb_define_singleton_method(mDriver, "cuModuleLoadData", rb_cuModuleLoadData, 1);
|
|
405
|
+
rb_define_singleton_method(mDriver, "cuModuleUnload", rb_cuModuleUnload, 1);
|
|
406
|
+
|
|
407
|
+
rb_define_singleton_method(mDriver, "cuDeviceGet", rb_cuDeviceGet, 1);
|
|
408
|
+
rb_define_singleton_method(mDriver, "cuCtxCreate", rb_cuCtxCreate, 2);
|
|
409
|
+
|
|
410
|
+
rb_define_const(mDriver, "CU_JIT_INPUT_CUBIN", INT2NUM(CU_JIT_INPUT_CUBIN));
|
|
411
|
+
rb_define_const(mDriver, "CU_JIT_INPUT_FATBINARY", INT2NUM(CU_JIT_INPUT_FATBINARY));
|
|
412
|
+
rb_define_const(mDriver, "CU_JIT_INPUT_LIBRARY", INT2NUM(CU_JIT_INPUT_LIBRARY));
|
|
413
|
+
rb_define_const(mDriver, "CU_JIT_INPUT_OBJECT", INT2NUM(CU_JIT_INPUT_OBJECT));
|
|
414
|
+
rb_define_const(mDriver, "CU_JIT_INPUT_PTX", INT2NUM(CU_JIT_INPUT_PTX));
|
|
415
|
+
|
|
416
|
+
CUdevice cuDevice;
|
|
417
|
+
CUcontext context;
|
|
418
|
+
cuInit(0);
|
|
419
|
+
cuDeviceGet(&cuDevice, 0);
|
|
420
|
+
cuCtxCreate(&context, 0, cuDevice);
|
|
421
|
+
}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
#include <ruby.h>
|
|
2
|
+
#include <cuda_runtime.h>
|
|
3
|
+
#include "memory_pool_impl.hpp"
|
|
4
|
+
#include "cumo/cuda/memory_pool.h"
|
|
5
|
+
#include "cumo/cuda/runtime.h"
|
|
6
|
+
|
|
7
|
+
#include <cstdlib>
|
|
8
|
+
#include <string>
|
|
9
|
+
|
|
10
|
+
#if defined(__cplusplus)
|
|
11
|
+
extern "C" {
|
|
12
|
+
#if 0
|
|
13
|
+
} /* satisfy cc-mode */
|
|
14
|
+
#endif
|
|
15
|
+
#endif
|
|
16
|
+
|
|
17
|
+
static cumo::internal::MemoryPool pool{};
|
|
18
|
+
static bool memory_pool_enabled;
|
|
19
|
+
|
|
20
|
+
VALUE cumo_cuda_eOutOfMemoryError;
|
|
21
|
+
|
|
22
|
+
char*
|
|
23
|
+
cumo_cuda_runtime_malloc(size_t size)
|
|
24
|
+
{
|
|
25
|
+
if (memory_pool_enabled) {
|
|
26
|
+
try {
|
|
27
|
+
// TODO(sonots): Get current CUDA stream and pass it
|
|
28
|
+
return reinterpret_cast<char*>(pool.Malloc(size));
|
|
29
|
+
} catch (const cumo::internal::CUDARuntimeError& e) {
|
|
30
|
+
cumo_cuda_runtime_check_status(e.status());
|
|
31
|
+
} catch (const cumo::internal::OutOfMemoryError& e) {
|
|
32
|
+
rb_raise(cumo_cuda_eOutOfMemoryError, "%s", e.what());
|
|
33
|
+
}
|
|
34
|
+
} else {
|
|
35
|
+
void *ptr = 0;
|
|
36
|
+
cumo_cuda_runtime_check_status(cudaMallocManaged(&ptr, size, cudaMemAttachGlobal));
|
|
37
|
+
return reinterpret_cast<char*>(ptr);
|
|
38
|
+
}
|
|
39
|
+
return 0; // should not reach here
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
void
|
|
43
|
+
cumo_cuda_runtime_free(char *ptr)
|
|
44
|
+
{
|
|
45
|
+
if (memory_pool_enabled) {
|
|
46
|
+
try {
|
|
47
|
+
// TODO(sonots): Get current CUDA stream and pass it
|
|
48
|
+
pool.Free(reinterpret_cast<intptr_t>(ptr));
|
|
49
|
+
} catch (const cumo::internal::CUDARuntimeError& e) {
|
|
50
|
+
cumo_cuda_runtime_check_status(e.status());
|
|
51
|
+
}
|
|
52
|
+
} else {
|
|
53
|
+
cumo_cuda_runtime_check_status(cudaFree((void*)ptr));
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/*
|
|
58
|
+
Enable memory pool.
|
|
59
|
+
|
|
60
|
+
@return [Boolean] Returns previous state (true if enabled)
|
|
61
|
+
*/
|
|
62
|
+
static VALUE
|
|
63
|
+
rb_memory_pool_enable(VALUE self)
|
|
64
|
+
{
|
|
65
|
+
VALUE ret = (memory_pool_enabled ? Qtrue : Qfalse);
|
|
66
|
+
memory_pool_enabled = true;
|
|
67
|
+
return ret;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/*
|
|
71
|
+
Disable memory pool.
|
|
72
|
+
|
|
73
|
+
@return [Boolean] Returns previous state (true if enabled)
|
|
74
|
+
*/
|
|
75
|
+
static VALUE
|
|
76
|
+
rb_memory_pool_disable(VALUE self)
|
|
77
|
+
{
|
|
78
|
+
VALUE ret = (memory_pool_enabled ? Qtrue : Qfalse);
|
|
79
|
+
memory_pool_enabled = false;
|
|
80
|
+
return ret;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/*
|
|
84
|
+
Returns whether memory pool is enabled or not.
|
|
85
|
+
|
|
86
|
+
@return [Boolean] Returns the state (true if enabled)
|
|
87
|
+
*/
|
|
88
|
+
static VALUE
|
|
89
|
+
rb_memory_pool_enabled_p(VALUE self)
|
|
90
|
+
{
|
|
91
|
+
return (memory_pool_enabled ? Qtrue : Qfalse);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/*
|
|
95
|
+
Free all **non-split** chunks in all arenas.
|
|
96
|
+
*/
|
|
97
|
+
static VALUE
|
|
98
|
+
rb_memory_pool_free_all_blocks(int argc, VALUE* argv, VALUE self)
|
|
99
|
+
{
|
|
100
|
+
try {
|
|
101
|
+
if (argc < 1) {
|
|
102
|
+
pool.FreeAllBlocks();
|
|
103
|
+
} else {
|
|
104
|
+
// TODO(sonots): FIX if we create a Stream object
|
|
105
|
+
cudaStream_t stream_ptr = (cudaStream_t)NUM2SIZET(argv[0]);
|
|
106
|
+
pool.FreeAllBlocks(stream_ptr);
|
|
107
|
+
}
|
|
108
|
+
} catch (const cumo::internal::CUDARuntimeError& e) {
|
|
109
|
+
cumo_cuda_runtime_check_status(e.status());
|
|
110
|
+
}
|
|
111
|
+
return Qnil;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/*
|
|
115
|
+
Count the total number of free blocks.
|
|
116
|
+
|
|
117
|
+
@return [Integer] The total number of free blocks.
|
|
118
|
+
*/
|
|
119
|
+
static VALUE
|
|
120
|
+
rb_memory_pool_n_free_blocks(VALUE self)
|
|
121
|
+
{
|
|
122
|
+
return SIZET2NUM(pool.GetNumFreeBlocks());
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/*
|
|
126
|
+
Get the total number of bytes used.
|
|
127
|
+
|
|
128
|
+
@return [Integer] The total number of bytes used.
|
|
129
|
+
*/
|
|
130
|
+
static VALUE
|
|
131
|
+
rb_memory_pool_used_bytes(VALUE self)
|
|
132
|
+
{
|
|
133
|
+
return SIZET2NUM(pool.GetUsedBytes());
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/*
|
|
137
|
+
Get the total number of bytes acquired but not used in the pool.
|
|
138
|
+
|
|
139
|
+
@return [Integer] The total number of bytes acquired but not used in the pool.
|
|
140
|
+
*/
|
|
141
|
+
static VALUE
|
|
142
|
+
rb_memory_pool_free_bytes(VALUE self)
|
|
143
|
+
{
|
|
144
|
+
return SIZET2NUM(pool.GetFreeBytes());
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/*
|
|
148
|
+
Get the total number of bytes acquired in the pool.
|
|
149
|
+
|
|
150
|
+
@return [Integer] The total number of bytes acquired in the pool.
|
|
151
|
+
*/
|
|
152
|
+
static VALUE
|
|
153
|
+
rb_memory_pool_total_bytes(VALUE self)
|
|
154
|
+
{
|
|
155
|
+
return SIZET2NUM(pool.GetTotalBytes());
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
void
|
|
159
|
+
Init_cumo_cuda_memory_pool()
|
|
160
|
+
{
|
|
161
|
+
VALUE mCumo = rb_define_module("Cumo");
|
|
162
|
+
VALUE mCUDA = rb_define_module_under(mCumo, "CUDA");
|
|
163
|
+
VALUE mMemoryPool = rb_define_module_under(mCUDA, "MemoryPool");
|
|
164
|
+
cumo_cuda_eOutOfMemoryError = rb_define_class_under(mCUDA, "OutOfMemoryError", rb_eStandardError);
|
|
165
|
+
|
|
166
|
+
rb_define_singleton_method(mMemoryPool, "enable", RUBY_METHOD_FUNC(rb_memory_pool_enable), 0);
|
|
167
|
+
rb_define_singleton_method(mMemoryPool, "disable", RUBY_METHOD_FUNC(rb_memory_pool_disable), 0);
|
|
168
|
+
rb_define_singleton_method(mMemoryPool, "enabled?", RUBY_METHOD_FUNC(rb_memory_pool_enabled_p), 0);
|
|
169
|
+
rb_define_singleton_method(mMemoryPool, "free_all_blocks", RUBY_METHOD_FUNC(rb_memory_pool_free_all_blocks), -1);
|
|
170
|
+
rb_define_singleton_method(mMemoryPool, "n_free_blocks", RUBY_METHOD_FUNC(rb_memory_pool_n_free_blocks), 0);
|
|
171
|
+
rb_define_singleton_method(mMemoryPool, "used_bytes", RUBY_METHOD_FUNC(rb_memory_pool_used_bytes), 0);
|
|
172
|
+
rb_define_singleton_method(mMemoryPool, "free_bytes", RUBY_METHOD_FUNC(rb_memory_pool_free_bytes), 0);
|
|
173
|
+
rb_define_singleton_method(mMemoryPool, "total_bytes", RUBY_METHOD_FUNC(rb_memory_pool_total_bytes), 0);
|
|
174
|
+
|
|
175
|
+
// default is true
|
|
176
|
+
const char* env = std::getenv("CUMO_MEMORY_POOL");
|
|
177
|
+
memory_pool_enabled = env == nullptr || (std::string(env) != "OFF" && std::string(env) != "0" && std::string(env) != "NO");
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
#if defined(__cplusplus)
|
|
181
|
+
#if 0
|
|
182
|
+
{ /* satisfy cc-mode */
|
|
183
|
+
#endif
|
|
184
|
+
} /* extern "C" { */
|
|
185
|
+
#endif
|