cumo 0.4.3 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.rubocop.yml +15 -0
- data/.rubocop_todo.yml +1252 -0
- data/3rd_party/mkmf-cu/Gemfile +2 -0
- data/3rd_party/mkmf-cu/Rakefile +2 -1
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +43 -7
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
- data/CHANGELOG.md +85 -0
- data/Dockerfile +34 -0
- data/Gemfile +6 -1
- data/README.md +2 -10
- data/Rakefile +8 -11
- data/bench/broadcast_fp32.rb +28 -26
- data/bench/cumo_bench.rb +18 -16
- data/bench/numo_bench.rb +18 -16
- data/bench/reduction_fp32.rb +14 -12
- data/bin/console +1 -0
- data/cumo.gemspec +6 -9
- data/docker-build.sh +4 -0
- data/docker-launch.sh +4 -0
- data/docs/src-tree.md +1 -1
- data/ext/cumo/cuda/cudnn.c +2 -2
- data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
- data/ext/cumo/cuda/driver.c +8 -0
- data/ext/cumo/cumo.c +7 -3
- data/ext/cumo/depend.erb +15 -13
- data/ext/cumo/extconf.rb +33 -47
- data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +13 -6
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
- data/ext/cumo/include/cumo/intern.h +1 -0
- data/ext/cumo/include/cumo/narray.h +13 -1
- data/ext/cumo/include/cumo/template.h +2 -4
- data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
- data/ext/cumo/include/cumo/types/float_macro.h +2 -2
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
- data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/narray/array.c +8 -6
- data/ext/cumo/narray/data.c +48 -28
- data/ext/cumo/narray/gen/cogen.rb +8 -7
- data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
- data/ext/cumo/narray/gen/def/bit.rb +3 -1
- data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
- data/ext/cumo/narray/gen/def/int16.rb +2 -0
- data/ext/cumo/narray/gen/def/int32.rb +2 -0
- data/ext/cumo/narray/gen/def/int64.rb +2 -0
- data/ext/cumo/narray/gen/def/int8.rb +2 -0
- data/ext/cumo/narray/gen/def/robject.rb +2 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
- data/ext/cumo/narray/gen/def/uint16.rb +2 -0
- data/ext/cumo/narray/gen/def/uint32.rb +2 -0
- data/ext/cumo/narray/gen/def/uint64.rb +2 -0
- data/ext/cumo/narray/gen/def/uint8.rb +2 -0
- data/ext/cumo/narray/gen/erbln.rb +9 -7
- data/ext/cumo/narray/gen/erbpp2.rb +26 -24
- data/ext/cumo/narray/gen/narray_def.rb +13 -11
- data/ext/cumo/narray/gen/spec.rb +58 -55
- data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
- data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
- data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
- data/ext/cumo/narray/gen/tmpl/at.c +34 -0
- data/ext/cumo/narray/gen/tmpl/batch_norm.c +5 -2
- data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +6 -3
- data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
- data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
- data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
- data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
- data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
- data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
- data/ext/cumo/narray/gen/tmpl/each.c +4 -2
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
- data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +5 -2
- data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
- data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
- data/ext/cumo/narray/gen/tmpl/median.c +2 -2
- data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
- data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
- data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
- data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
- data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
- data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
- data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
- data/ext/cumo/narray/gen/tmpl/sort.c +3 -3
- data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
- data/ext/cumo/narray/index.c +244 -40
- data/ext/cumo/narray/index_kernel.cu +84 -0
- data/ext/cumo/narray/narray.c +57 -19
- data/ext/cumo/narray/ndloop.c +1 -1
- data/ext/cumo/narray/struct.c +1 -1
- data/lib/cumo/cuda/compile_error.rb +1 -1
- data/lib/cumo/cuda/compiler.rb +23 -22
- data/lib/cumo/cuda/cudnn.rb +1 -1
- data/lib/cumo/cuda/device.rb +1 -1
- data/lib/cumo/cuda/link_state.rb +2 -2
- data/lib/cumo/cuda/module.rb +1 -2
- data/lib/cumo/cuda/nvrtc_program.rb +3 -2
- data/lib/cumo/cuda.rb +2 -0
- data/lib/cumo/linalg.rb +2 -0
- data/lib/cumo/narray/extra.rb +297 -341
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo.rb +3 -1
- data/test/bit_test.rb +157 -0
- data/test/cuda/compiler_test.rb +69 -0
- data/test/cuda/device_test.rb +31 -0
- data/test/cuda/memory_pool_test.rb +45 -0
- data/test/cuda/nvrtc_test.rb +51 -0
- data/test/cuda/runtime_test.rb +28 -0
- data/test/cudnn_test.rb +498 -0
- data/test/cumo_test.rb +27 -0
- data/test/narray_test.rb +745 -0
- data/test/ractor_test.rb +52 -0
- data/test/test_helper.rb +31 -0
- metadata +34 -54
- data/.travis.yml +0 -5
- data/numo-narray-version +0 -1
data/ext/cumo/narray/gen/spec.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
def_id "cast"
|
|
2
4
|
def_id "eq"
|
|
3
5
|
def_id "ne"
|
|
@@ -14,8 +16,8 @@ if is_float
|
|
|
14
16
|
def_id "copysign"
|
|
15
17
|
end
|
|
16
18
|
if is_int
|
|
17
|
-
def_id "<<","left_shift"
|
|
18
|
-
def_id ">>","right_shift"
|
|
19
|
+
def_id "<<", "left_shift"
|
|
20
|
+
def_id ">>", "right_shift"
|
|
19
21
|
end
|
|
20
22
|
if is_comparable && !is_object
|
|
21
23
|
def_id "gt"
|
|
@@ -42,13 +44,13 @@ if is_object
|
|
|
42
44
|
def_id "nan?"
|
|
43
45
|
def_id "infinite?"
|
|
44
46
|
def_id "finite?"
|
|
45
|
-
def_id "==","eq"
|
|
46
|
-
def_id "!=","ne"
|
|
47
|
-
def_id ">" ,"gt"
|
|
48
|
-
def_id ">=","ge"
|
|
49
|
-
def_id "<" ,"lt"
|
|
50
|
-
def_id "<=","le"
|
|
51
|
-
def_id "<=>","ufo"
|
|
47
|
+
def_id "==", "eq"
|
|
48
|
+
def_id "!=", "ne"
|
|
49
|
+
def_id ">" , "gt"
|
|
50
|
+
def_id ">=", "ge"
|
|
51
|
+
def_id "<" , "lt"
|
|
52
|
+
def_id "<=", "le"
|
|
53
|
+
def_id "<=>", "ufo"
|
|
52
54
|
end
|
|
53
55
|
if (is_float || is_complex) && !is_object
|
|
54
56
|
def_id "gemm"
|
|
@@ -119,18 +121,18 @@ def_method "store" do
|
|
|
119
121
|
store_numeric
|
|
120
122
|
store_from "Bit"
|
|
121
123
|
if is_complex
|
|
122
|
-
store_from "DComplex","cumo_dcomplex","m_from_dcomplex"
|
|
123
|
-
store_from "SComplex","cumo_scomplex","m_from_scomplex"
|
|
124
|
+
store_from "DComplex", "cumo_dcomplex", "m_from_dcomplex"
|
|
125
|
+
store_from "SComplex", "cumo_scomplex", "m_from_scomplex"
|
|
124
126
|
end
|
|
125
|
-
store_from "DFloat","double", "m_from_real"
|
|
126
|
-
store_from "SFloat","float", "m_from_real"
|
|
127
|
+
store_from "DFloat", "double", "m_from_real"
|
|
128
|
+
store_from "SFloat", "float", "m_from_real"
|
|
127
129
|
store_from "Int64", "int64_t", "m_from_int64"
|
|
128
130
|
store_from "Int32", "int32_t", "m_from_int32"
|
|
129
131
|
store_from "Int16", "int16_t", "m_from_sint"
|
|
130
132
|
store_from "Int8", "int8_t", "m_from_sint"
|
|
131
|
-
store_from "UInt64","u_int64_t","m_from_uint64"
|
|
132
|
-
store_from "UInt32","u_int32_t","m_from_uint32"
|
|
133
|
-
store_from "UInt16","u_int16_t","m_from_sint"
|
|
133
|
+
store_from "UInt64", "u_int64_t", "m_from_uint64"
|
|
134
|
+
store_from "UInt32", "u_int32_t", "m_from_uint32"
|
|
135
|
+
store_from "UInt16", "u_int16_t", "m_from_sint"
|
|
134
136
|
store_from "UInt8", "u_int8_t", "m_from_sint"
|
|
135
137
|
store_from "RObject", "VALUE", "m_num_to_data"
|
|
136
138
|
store_array
|
|
@@ -144,6 +146,7 @@ def_singleton_method "cast"
|
|
|
144
146
|
def_method "aref", op:"[]"
|
|
145
147
|
def_method "aref_cpu"
|
|
146
148
|
def_method "aset", op:"[]="
|
|
149
|
+
def_method "at"
|
|
147
150
|
|
|
148
151
|
def_method "coerce_cast"
|
|
149
152
|
def_method "to_a"
|
|
@@ -167,15 +170,15 @@ if is_bit
|
|
|
167
170
|
binary "xor", "^"
|
|
168
171
|
binary "eq"
|
|
169
172
|
bit_count "count_true"
|
|
170
|
-
def_alias "count_1","count_true"
|
|
171
|
-
def_alias "count","count_true"
|
|
173
|
+
def_alias "count_1", "count_true"
|
|
174
|
+
def_alias "count", "count_true"
|
|
172
175
|
bit_count "count_false"
|
|
173
|
-
def_alias "count_0","count_false"
|
|
176
|
+
def_alias "count_0", "count_false"
|
|
174
177
|
bit_count_cpu "count_true_cpu"
|
|
175
|
-
def_alias "count_1_cpu","count_true_cpu"
|
|
176
|
-
def_alias "count_cpu","count_true_cpu"
|
|
178
|
+
def_alias "count_1_cpu", "count_true_cpu"
|
|
179
|
+
def_alias "count_cpu", "count_true_cpu"
|
|
177
180
|
bit_count_cpu "count_false_cpu"
|
|
178
|
-
def_alias "count_0_cpu","count_false_cpu"
|
|
181
|
+
def_alias "count_0_cpu", "count_false_cpu"
|
|
179
182
|
bit_reduce "all?", 1
|
|
180
183
|
bit_reduce "any?", 0
|
|
181
184
|
def_method "none?", "none_p"
|
|
@@ -215,17 +218,17 @@ if is_complex
|
|
|
215
218
|
unary2 "real", "rtype", "cRT"
|
|
216
219
|
unary2 "imag", "rtype", "cRT"
|
|
217
220
|
unary2 "arg", "rtype", "cRT"
|
|
218
|
-
def_alias "angle","arg"
|
|
221
|
+
def_alias "angle", "arg"
|
|
219
222
|
set2 "set_imag", "rtype", "cRT"
|
|
220
223
|
set2 "set_real", "rtype", "cRT"
|
|
221
|
-
def_alias "imag=","set_imag"
|
|
222
|
-
def_alias "real=","set_real"
|
|
224
|
+
def_alias "imag=", "set_imag"
|
|
225
|
+
def_alias "real=", "set_real"
|
|
223
226
|
else
|
|
224
227
|
def_alias "conj", "view"
|
|
225
228
|
def_alias "im", "view"
|
|
226
229
|
end
|
|
227
230
|
|
|
228
|
-
def_alias "conjugate","conj"
|
|
231
|
+
def_alias "conjugate", "conj"
|
|
229
232
|
|
|
230
233
|
# base_cond
|
|
231
234
|
|
|
@@ -278,9 +281,9 @@ if is_comparable
|
|
|
278
281
|
cond_binary "lt"
|
|
279
282
|
cond_binary "le"
|
|
280
283
|
def_alias ">", "gt"
|
|
281
|
-
def_alias ">=","ge"
|
|
284
|
+
def_alias ">=", "ge"
|
|
282
285
|
def_alias "<", "lt"
|
|
283
|
-
def_alias "<=","le"
|
|
286
|
+
def_alias "<=", "le"
|
|
284
287
|
def_method "clip"
|
|
285
288
|
end
|
|
286
289
|
|
|
@@ -296,32 +299,32 @@ end
|
|
|
296
299
|
|
|
297
300
|
if is_int
|
|
298
301
|
if is_unsigned
|
|
299
|
-
accum "sum","u_int64_t","cumo_cUInt64"
|
|
300
|
-
accum "prod","u_int64_t","cumo_cUInt64"
|
|
302
|
+
accum "sum", "u_int64_t", "cumo_cUInt64"
|
|
303
|
+
accum "prod", "u_int64_t", "cumo_cUInt64"
|
|
301
304
|
else
|
|
302
|
-
accum "sum","int64_t","cumo_cInt64"
|
|
303
|
-
accum "prod","int64_t","cumo_cInt64"
|
|
305
|
+
accum "sum", "int64_t", "cumo_cInt64"
|
|
306
|
+
accum "prod", "int64_t", "cumo_cInt64"
|
|
304
307
|
end
|
|
305
308
|
else
|
|
306
|
-
accum "sum","dtype","cT"
|
|
307
|
-
accum "prod","dtype","cT"
|
|
309
|
+
accum "sum", "dtype", "cT"
|
|
310
|
+
accum "prod", "dtype", "cT"
|
|
308
311
|
end
|
|
309
312
|
|
|
310
313
|
if is_double_precision
|
|
311
|
-
accum "kahan_sum","dtype","cT"
|
|
314
|
+
accum "kahan_sum", "dtype", "cT"
|
|
312
315
|
end
|
|
313
316
|
|
|
314
317
|
if is_float
|
|
315
|
-
accum "mean","dtype","cT"
|
|
316
|
-
accum "stddev","rtype","cRT"
|
|
317
|
-
accum "var","rtype","cRT"
|
|
318
|
-
accum "rms","rtype","cRT"
|
|
318
|
+
accum "mean", "dtype", "cT"
|
|
319
|
+
accum "stddev", "rtype", "cRT"
|
|
320
|
+
accum "var", "rtype", "cRT"
|
|
321
|
+
accum "rms", "rtype", "cRT"
|
|
319
322
|
end
|
|
320
323
|
|
|
321
324
|
if is_comparable
|
|
322
|
-
accum "min","dtype","cT"
|
|
323
|
-
accum "max","dtype","cT"
|
|
324
|
-
accum "ptp","dtype","cT"
|
|
325
|
+
accum "min", "dtype", "cT"
|
|
326
|
+
accum "max", "dtype", "cT"
|
|
327
|
+
accum "ptp", "dtype", "cT"
|
|
325
328
|
accum_index "max_index"
|
|
326
329
|
accum_index "min_index"
|
|
327
330
|
def_method "minmax"
|
|
@@ -333,8 +336,8 @@ if is_int && !is_object
|
|
|
333
336
|
def_method "bincount"
|
|
334
337
|
end
|
|
335
338
|
|
|
336
|
-
cum "cumsum","add"
|
|
337
|
-
cum "cumprod","mul"
|
|
339
|
+
cum "cumsum", "add"
|
|
340
|
+
cum "cumprod", "mul"
|
|
338
341
|
|
|
339
342
|
# dot
|
|
340
343
|
accum_binary "mulsum"
|
|
@@ -377,17 +380,17 @@ def_method "poly"
|
|
|
377
380
|
|
|
378
381
|
if is_comparable && !is_object
|
|
379
382
|
if is_float
|
|
380
|
-
qsort type_name,"dtype","*(dtype*)","_prnan"
|
|
381
|
-
qsort type_name,"dtype","*(dtype*)","_ignan"
|
|
383
|
+
qsort type_name, "dtype", "*(dtype*)", "_prnan"
|
|
384
|
+
qsort type_name, "dtype", "*(dtype*)", "_ignan"
|
|
382
385
|
else
|
|
383
|
-
qsort type_name,"dtype","*(dtype*)"
|
|
386
|
+
qsort type_name, "dtype", "*(dtype*)"
|
|
384
387
|
end
|
|
385
388
|
def_method "sort"
|
|
386
389
|
if is_float
|
|
387
|
-
qsort type_name+"_index","dtype*","**(dtype**)","_prnan"
|
|
388
|
-
qsort type_name+"_index","dtype*","**(dtype**)","_ignan"
|
|
390
|
+
qsort type_name + "_index", "dtype*", "**(dtype**)", "_prnan"
|
|
391
|
+
qsort type_name + "_index", "dtype*", "**(dtype**)", "_ignan"
|
|
389
392
|
else
|
|
390
|
-
qsort type_name+"_index","dtype*","**(dtype**)"
|
|
393
|
+
qsort type_name + "_index", "dtype*", "**(dtype**)"
|
|
391
394
|
end
|
|
392
395
|
def_method "sort_index"
|
|
393
396
|
def_method "median"
|
|
@@ -407,7 +410,7 @@ def_module do
|
|
|
407
410
|
set ns_var: "cT"
|
|
408
411
|
set class_name: cn
|
|
409
412
|
set name: "#{nm}_math"
|
|
410
|
-
set full_module_name: fn+"::NMath"
|
|
413
|
+
set full_module_name: fn + "::NMath"
|
|
411
414
|
set module_name: "Math"
|
|
412
415
|
set module_var: "mTM"
|
|
413
416
|
|
|
@@ -433,14 +436,14 @@ def_module do
|
|
|
433
436
|
math "atanh"
|
|
434
437
|
math "sinc"
|
|
435
438
|
if !is_c
|
|
436
|
-
math "atan2",2
|
|
437
|
-
math "hypot",2
|
|
439
|
+
math "atan2", 2
|
|
440
|
+
math "hypot", 2
|
|
438
441
|
math "erf"
|
|
439
442
|
math "erfc"
|
|
440
443
|
math "log1p"
|
|
441
444
|
math "expm1"
|
|
442
|
-
math "ldexp",2
|
|
443
|
-
math "frexp",1,"frexp"
|
|
445
|
+
math "ldexp", 2
|
|
446
|
+
math "frexp", 1, "frexp"
|
|
444
447
|
end
|
|
445
448
|
end
|
|
446
449
|
end
|
|
@@ -56,8 +56,8 @@ static void
|
|
|
56
56
|
<% else %>
|
|
57
57
|
@overload <%=name%>(axis:nil, keepdims:false)
|
|
58
58
|
<% end %>
|
|
59
|
-
@param [Numeric,Array,Range] axis
|
|
60
|
-
@param [TrueClass] keepdims
|
|
59
|
+
@param [Numeric,Array,Range] axis Performs <%=name%> along the axis.
|
|
60
|
+
@param [TrueClass] keepdims If true, the reduced axes are left in the result array as dimensions with size one.
|
|
61
61
|
@return [Cumo::<%=class_name%>] returns result of <%=name%>.
|
|
62
62
|
*/
|
|
63
63
|
static VALUE
|
|
@@ -91,7 +91,7 @@ static VALUE
|
|
|
91
91
|
@overload <%=op_map%>(other, axis:nil, keepdims:false)
|
|
92
92
|
<% end %>
|
|
93
93
|
@param [Cumo::NArray,Numeric] other
|
|
94
|
-
@param [Numeric,Array,Range] axis
|
|
94
|
+
@param [Numeric,Array,Range] axis Performs <%=name%> along the axis.
|
|
95
95
|
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
|
|
96
96
|
<% if is_float %>
|
|
97
97
|
@param [TrueClass] nan (keyword) If true, apply NaN-aware algorithm (avoid NaN if exists).
|
|
@@ -21,35 +21,35 @@ static VALUE
|
|
|
21
21
|
|
|
22
22
|
@example
|
|
23
23
|
a = Cumo::DFloat.new(4,5).seq
|
|
24
|
-
=> Cumo::DFloat#shape=[4,5]
|
|
25
|
-
[[0, 1, 2, 3, 4],
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
24
|
+
# => Cumo::DFloat#shape=[4,5]
|
|
25
|
+
# [[0, 1, 2, 3, 4],
|
|
26
|
+
# [5, 6, 7, 8, 9],
|
|
27
|
+
# [10, 11, 12, 13, 14],
|
|
28
|
+
# [15, 16, 17, 18, 19]]
|
|
29
29
|
|
|
30
30
|
a[7]
|
|
31
|
-
=> Cumo::DFloat#shape=[]
|
|
32
|
-
6.0
|
|
31
|
+
# => Cumo::DFloat#shape=[]
|
|
32
|
+
# 6.0
|
|
33
33
|
|
|
34
34
|
a[1,1]
|
|
35
|
-
=> Cumo::DFloat#shape=[]
|
|
36
|
-
6.0
|
|
35
|
+
# => Cumo::DFloat#shape=[]
|
|
36
|
+
# 6.0
|
|
37
37
|
|
|
38
38
|
a[1..3,1]
|
|
39
|
-
=> Cumo::DFloat#shape=[3]
|
|
40
|
-
[6, 11, 16]
|
|
39
|
+
# => Cumo::DFloat#shape=[3]
|
|
40
|
+
# [6, 11, 16]
|
|
41
41
|
|
|
42
42
|
a[1,[1,3,4]]
|
|
43
|
-
=> Cumo::DFloat#shape=[3]
|
|
44
|
-
[6, 8, 9]
|
|
43
|
+
# => Cumo::DFloat#shape=[3]
|
|
44
|
+
# [6, 8, 9]
|
|
45
45
|
|
|
46
46
|
a[true,2].fill(99)
|
|
47
47
|
a
|
|
48
|
-
=> Cumo::DFloat#shape=[4,5]
|
|
49
|
-
[[0, 1, 99, 3, 4],
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
48
|
+
# => Cumo::DFloat#shape=[4,5]
|
|
49
|
+
# [[0, 1, 99, 3, 4],
|
|
50
|
+
# [5, 6, 99, 8, 9],
|
|
51
|
+
# [10, 11, 99, 13, 14],
|
|
52
|
+
# [15, 16, 99, 18, 19]]
|
|
53
53
|
*/
|
|
54
54
|
static VALUE
|
|
55
55
|
<%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
|
|
@@ -10,31 +10,31 @@
|
|
|
10
10
|
|
|
11
11
|
@example
|
|
12
12
|
a = Cumo::DFloat.new(3,4).seq
|
|
13
|
-
=> Cumo::DFloat#shape=[3,4]
|
|
14
|
-
[[0, 1, 2, 3],
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
# => Cumo::DFloat#shape=[3,4]
|
|
14
|
+
# [[0, 1, 2, 3],
|
|
15
|
+
# [4, 5, 6, 7],
|
|
16
|
+
# [8, 9, 10, 11]]
|
|
17
17
|
|
|
18
18
|
a[1,2]=99
|
|
19
19
|
a
|
|
20
|
-
=> Cumo::DFloat#shape=[3,4]
|
|
21
|
-
[[0, 1, 2, 3],
|
|
22
|
-
|
|
23
|
-
|
|
20
|
+
# => Cumo::DFloat#shape=[3,4]
|
|
21
|
+
# [[0, 1, 2, 3],
|
|
22
|
+
# [4, 5, 99, 7],
|
|
23
|
+
# [8, 9, 10, 11]]
|
|
24
24
|
|
|
25
25
|
a[1,[0,2]] = [101,102]
|
|
26
26
|
a
|
|
27
|
-
=> Cumo::DFloat#shape=[3,4]
|
|
28
|
-
[[0, 1, 2, 3],
|
|
29
|
-
|
|
30
|
-
|
|
27
|
+
# => Cumo::DFloat#shape=[3,4]
|
|
28
|
+
# [[0, 1, 2, 3],
|
|
29
|
+
# [101, 5, 102, 7],
|
|
30
|
+
# [8, 9, 10, 11]]
|
|
31
31
|
|
|
32
32
|
a[1,true]=99
|
|
33
33
|
a
|
|
34
|
-
=> Cumo::DFloat#shape=[3,4]
|
|
35
|
-
[[0, 1, 2, 3],
|
|
36
|
-
|
|
37
|
-
|
|
34
|
+
# => Cumo::DFloat#shape=[3,4]
|
|
35
|
+
# [[0, 1, 2, 3],
|
|
36
|
+
# [99, 99, 99, 99],
|
|
37
|
+
# [8, 9, 10, 11]]
|
|
38
38
|
|
|
39
39
|
*/
|
|
40
40
|
static VALUE
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/*
|
|
2
|
+
Multi-dimensional array indexing.
|
|
3
|
+
Same as [] for one-dimensional NArray.
|
|
4
|
+
Similar to numpy's tuple indexing, i.e., `a[[1,2,..],[3,4,..]]`
|
|
5
|
+
@overload at(*indices)
|
|
6
|
+
@param [Numeric,Range,etc] *indices Multi-dimensional Index Arrays.
|
|
7
|
+
@return [Cumo::NArray::<%=class_name%>] one-dimensional NArray view.
|
|
8
|
+
|
|
9
|
+
@example
|
|
10
|
+
x = Cumo::DFloat.new(3,3,3).seq
|
|
11
|
+
=> Cumo::DFloat#shape=[3,3,3]
|
|
12
|
+
[[[0, 1, 2],
|
|
13
|
+
[3, 4, 5],
|
|
14
|
+
[6, 7, 8]],
|
|
15
|
+
[[9, 10, 11],
|
|
16
|
+
[12, 13, 14],
|
|
17
|
+
[15, 16, 17]],
|
|
18
|
+
[[18, 19, 20],
|
|
19
|
+
[21, 22, 23],
|
|
20
|
+
[24, 25, 26]]]
|
|
21
|
+
|
|
22
|
+
x.at([0,1,2],[0,1,2],[-1,-2,-3])
|
|
23
|
+
=> Cumo::DFloat(view)#shape=[3]
|
|
24
|
+
[2, 13, 24]
|
|
25
|
+
*/
|
|
26
|
+
static VALUE
|
|
27
|
+
<%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
|
|
28
|
+
{
|
|
29
|
+
int result_nd;
|
|
30
|
+
size_t pos;
|
|
31
|
+
|
|
32
|
+
result_nd = cumo_na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
|
|
33
|
+
return cumo_na_at_main(argc, argv, self, 0, result_nd, pos);
|
|
34
|
+
}
|
|
@@ -157,8 +157,11 @@ static VALUE
|
|
|
157
157
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
|
158
158
|
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
|
|
159
159
|
|
|
160
|
+
status = cudnnCreateTensorDescriptor(&bn_desc);
|
|
161
|
+
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
|
|
162
|
+
|
|
160
163
|
mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
|
|
161
|
-
status =
|
|
164
|
+
status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
|
|
162
165
|
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
|
|
163
166
|
// TODO: bn_desc may return another type, and may need to cast gamma, beta, mean, var
|
|
164
167
|
|
|
@@ -193,7 +196,7 @@ BATCH_NORM_ERROR:
|
|
|
193
196
|
}
|
|
194
197
|
|
|
195
198
|
#else // CUDNN_FOUND
|
|
196
|
-
|
|
199
|
+
#include "cumo/cuda/cudnn.h"
|
|
197
200
|
|
|
198
201
|
static VALUE
|
|
199
202
|
<%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
|
|
@@ -134,8 +134,11 @@ static VALUE
|
|
|
134
134
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
|
135
135
|
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
|
|
136
136
|
|
|
137
|
+
status = cudnnCreateTensorDescriptor(&bn_desc);
|
|
138
|
+
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
|
|
139
|
+
|
|
137
140
|
mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
|
|
138
|
-
status =
|
|
141
|
+
status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
|
|
139
142
|
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
|
|
140
143
|
// TODO: bn_desc may return another type, and may need to cast gamma, gy, mean, var
|
|
141
144
|
|
|
@@ -178,11 +181,11 @@ BATCH_NORM_BACKWARD_ERROR:
|
|
|
178
181
|
}
|
|
179
182
|
|
|
180
183
|
#else // CUDNN_FOUND
|
|
181
|
-
|
|
184
|
+
#include "cumo/cuda/cudnn.h"
|
|
182
185
|
|
|
183
186
|
static VALUE
|
|
184
187
|
<%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
|
|
185
188
|
{
|
|
186
|
-
rb_raise(
|
|
189
|
+
rb_raise(cumo_cuda_eCUDNNError, "cuDNN is not available");
|
|
187
190
|
}
|
|
188
191
|
#endif // CUDNN_FOUND
|
|
@@ -116,22 +116,22 @@ static VALUE
|
|
|
116
116
|
otherwise returns UInt32 or UInt64 depending on the size along last axis.
|
|
117
117
|
@example
|
|
118
118
|
Cumo::Int32[0..4].bincount
|
|
119
|
-
=> Cumo::UInt32#shape=[5]
|
|
120
|
-
|
|
119
|
+
# => Cumo::UInt32#shape=[5]
|
|
120
|
+
# [1, 1, 1, 1, 1]
|
|
121
121
|
|
|
122
122
|
Cumo::Int32[0, 1, 1, 3, 2, 1, 7].bincount
|
|
123
|
-
=> Cumo::UInt32#shape=[8]
|
|
124
|
-
|
|
123
|
+
# => Cumo::UInt32#shape=[8]
|
|
124
|
+
# [1, 3, 1, 1, 0, 0, 0, 1]
|
|
125
125
|
|
|
126
126
|
x = Cumo::Int32[0, 1, 1, 3, 2, 1, 7, 23]
|
|
127
127
|
x.bincount.size == x.max+1
|
|
128
|
-
=> true
|
|
128
|
+
# => true
|
|
129
129
|
|
|
130
130
|
w = Cumo::DFloat[0.3, 0.5, 0.2, 0.7, 1.0, -0.6]
|
|
131
131
|
x = Cumo::Int32[0, 1, 1, 2, 2, 2]
|
|
132
132
|
x.bincount(w)
|
|
133
|
-
=> Cumo::DFloat#shape=[3]
|
|
134
|
-
|
|
133
|
+
# => Cumo::DFloat#shape=[3]
|
|
134
|
+
# [0.3, 0.7, 1.1]
|
|
135
135
|
|
|
136
136
|
*/
|
|
137
137
|
static VALUE
|
|
@@ -75,28 +75,24 @@ static void
|
|
|
75
75
|
|
|
76
76
|
@example
|
|
77
77
|
a = Cumo::Int32.new(10).seq
|
|
78
|
-
|
|
79
|
-
# Cumo::Int32#shape=[10]
|
|
80
|
-
# [1, 1, 2, 3, 4, 5, 6, 7, 8, 8]
|
|
81
|
-
|
|
82
|
-
p a
|
|
83
|
-
# Cumo::Int32#shape=[10]
|
|
78
|
+
# => Cumo::Int32#shape=[10]
|
|
84
79
|
# [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
|
85
80
|
|
|
86
|
-
|
|
87
|
-
# Cumo::Int32
|
|
88
|
-
# [
|
|
81
|
+
a.clip(1,8)
|
|
82
|
+
# => Cumo::Int32#shape=[10]
|
|
83
|
+
# [1, 1, 2, 3, 4, 5, 6, 7, 8, 8]
|
|
89
84
|
|
|
90
|
-
|
|
91
|
-
|
|
85
|
+
a.inplace.clip(3,6)
|
|
86
|
+
a
|
|
87
|
+
# => Cumo::Int32#shape=[10]
|
|
92
88
|
# [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
|
|
93
89
|
|
|
94
|
-
|
|
95
|
-
# Cumo::Int32#shape=[10]
|
|
90
|
+
b = Cumo::Int32.new(10).seq
|
|
91
|
+
# => Cumo::Int32#shape=[10]
|
|
96
92
|
# [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
|
97
93
|
|
|
98
|
-
|
|
99
|
-
# Cumo::Int32#shape=[10]
|
|
94
|
+
b.clip([3,4,1,1,1,4,4,4,4,4], 8)
|
|
95
|
+
# => Cumo::Int32#shape=[10]
|
|
100
96
|
# [3, 4, 2, 3, 4, 5, 6, 7, 8, 8]
|
|
101
97
|
*/
|
|
102
98
|
static VALUE
|
|
@@ -95,6 +95,7 @@ static VALUE
|
|
|
95
95
|
CUMO_CUDA_CUDNN_CHECK_DIM_EQ(sizet_w_shape[0], ngy->shape[1]);
|
|
96
96
|
CUMO_CUDA_CUDNN_CHECK_DIM_EQ(sizet_w_shape[1], nx->shape[1]);
|
|
97
97
|
|
|
98
|
+
#if !defined(NDEBUG)
|
|
98
99
|
{
|
|
99
100
|
// shape check of gy
|
|
100
101
|
size_t *y_shape = ngy->shape;
|
|
@@ -105,6 +106,7 @@ static VALUE
|
|
|
105
106
|
x_shape[i + 2], sizet_w_shape[i + 2], int_stride[i], int_pad[i]));
|
|
106
107
|
}
|
|
107
108
|
}
|
|
109
|
+
#endif
|
|
108
110
|
|
|
109
111
|
x_cont = cumo_na_as_contiguous_array(x);
|
|
110
112
|
gy_cont = cumo_na_as_contiguous_array(gy);
|
|
@@ -173,7 +175,7 @@ CONV_GRAD_W_ERROR:
|
|
|
173
175
|
}
|
|
174
176
|
|
|
175
177
|
#else // CUDNN_FOUND
|
|
176
|
-
|
|
178
|
+
#include "cumo/cuda/cudnn.h"
|
|
177
179
|
|
|
178
180
|
static VALUE
|
|
179
181
|
<%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
|
|
@@ -30,7 +30,7 @@ static void
|
|
|
30
30
|
/*
|
|
31
31
|
<%=name%> of self.
|
|
32
32
|
@overload <%=name%>(axis:nil, nan:false)
|
|
33
|
-
@param [Numeric,Array,Range] axis
|
|
33
|
+
@param [Numeric,Array,Range] axis Performs <%=name%> along the axis.
|
|
34
34
|
@param [TrueClass] nan If true, apply NaN-aware algorithm (avoid NaN if exists).
|
|
35
35
|
@return [Cumo::<%=class_name%>] <%=name%> of self.
|
|
36
36
|
*/
|
|
@@ -34,8 +34,10 @@ static void
|
|
|
34
34
|
passing that element as a parameter.
|
|
35
35
|
@overload <%=name%>
|
|
36
36
|
@return [Cumo::NArray] self
|
|
37
|
-
For a block {|x| ... }
|
|
38
|
-
@
|
|
37
|
+
For a block `{|x| ... }`,
|
|
38
|
+
@yieldparam [Numeric] x an element of NArray.
|
|
39
|
+
@see #each_with_index
|
|
40
|
+
@see #map
|
|
39
41
|
*/
|
|
40
42
|
static VALUE
|
|
41
43
|
<%=c_func(0)%>(VALUE self)
|
|
@@ -55,9 +55,12 @@ static void
|
|
|
55
55
|
Invokes the given block once for each element of self,
|
|
56
56
|
passing that element and indices along each axis as parameters.
|
|
57
57
|
@overload <%=name%>
|
|
58
|
+
For a block `{|x,i,j,...| ... }`,
|
|
59
|
+
@yieldparam [Numeric] x an element
|
|
60
|
+
@yieldparam [Integer] i,j,... multitimensional indices
|
|
58
61
|
@return [Cumo::NArray] self
|
|
59
|
-
|
|
60
|
-
@
|
|
62
|
+
@see #each
|
|
63
|
+
@see #map_with_index
|
|
61
64
|
*/
|
|
62
65
|
static VALUE
|
|
63
66
|
<%=c_func(0)%>(VALUE self)
|
|
@@ -106,8 +106,11 @@ static VALUE
|
|
|
106
106
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
|
107
107
|
if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
|
|
108
108
|
|
|
109
|
+
status = cudnnCreateTensorDescriptor(&bn_desc);
|
|
110
|
+
if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
|
|
111
|
+
|
|
109
112
|
mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
|
|
110
|
-
status =
|
|
113
|
+
status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
|
|
111
114
|
if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
|
|
112
115
|
// TODO: bn_desc may return another type, and may need to cast gamma, beta, mean, var
|
|
113
116
|
|
|
@@ -139,7 +142,7 @@ FIXED_BATCH_NORM_ERROR:
|
|
|
139
142
|
}
|
|
140
143
|
|
|
141
144
|
#else // CUDNN_FOUND
|
|
142
|
-
|
|
145
|
+
#include "cumo/cuda/cudnn.h"
|
|
143
146
|
|
|
144
147
|
static VALUE
|
|
145
148
|
<%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
|