cumo 0.4.3 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +15 -0
  4. data/.rubocop_todo.yml +1252 -0
  5. data/3rd_party/mkmf-cu/Gemfile +2 -0
  6. data/3rd_party/mkmf-cu/Rakefile +2 -1
  7. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
  8. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +43 -7
  9. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
  11. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
  12. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
  13. data/CHANGELOG.md +85 -0
  14. data/Dockerfile +34 -0
  15. data/Gemfile +6 -1
  16. data/README.md +2 -10
  17. data/Rakefile +8 -11
  18. data/bench/broadcast_fp32.rb +28 -26
  19. data/bench/cumo_bench.rb +18 -16
  20. data/bench/numo_bench.rb +18 -16
  21. data/bench/reduction_fp32.rb +14 -12
  22. data/bin/console +1 -0
  23. data/cumo.gemspec +6 -9
  24. data/docker-build.sh +4 -0
  25. data/docker-launch.sh +4 -0
  26. data/docs/src-tree.md +1 -1
  27. data/ext/cumo/cuda/cudnn.c +2 -2
  28. data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
  29. data/ext/cumo/cuda/driver.c +8 -0
  30. data/ext/cumo/cumo.c +7 -3
  31. data/ext/cumo/depend.erb +15 -13
  32. data/ext/cumo/extconf.rb +33 -47
  33. data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
  34. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +13 -6
  35. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
  36. data/ext/cumo/include/cumo/intern.h +1 -0
  37. data/ext/cumo/include/cumo/narray.h +13 -1
  38. data/ext/cumo/include/cumo/template.h +2 -4
  39. data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
  40. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
  41. data/ext/cumo/include/cumo/types/float_macro.h +2 -2
  42. data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
  43. data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
  44. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
  45. data/ext/cumo/include/cumo.h +2 -2
  46. data/ext/cumo/narray/array.c +8 -6
  47. data/ext/cumo/narray/data.c +48 -28
  48. data/ext/cumo/narray/gen/cogen.rb +8 -7
  49. data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
  50. data/ext/cumo/narray/gen/def/bit.rb +3 -1
  51. data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
  52. data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
  53. data/ext/cumo/narray/gen/def/int16.rb +2 -0
  54. data/ext/cumo/narray/gen/def/int32.rb +2 -0
  55. data/ext/cumo/narray/gen/def/int64.rb +2 -0
  56. data/ext/cumo/narray/gen/def/int8.rb +2 -0
  57. data/ext/cumo/narray/gen/def/robject.rb +2 -0
  58. data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
  59. data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
  60. data/ext/cumo/narray/gen/def/uint16.rb +2 -0
  61. data/ext/cumo/narray/gen/def/uint32.rb +2 -0
  62. data/ext/cumo/narray/gen/def/uint64.rb +2 -0
  63. data/ext/cumo/narray/gen/def/uint8.rb +2 -0
  64. data/ext/cumo/narray/gen/erbln.rb +9 -7
  65. data/ext/cumo/narray/gen/erbpp2.rb +26 -24
  66. data/ext/cumo/narray/gen/narray_def.rb +13 -11
  67. data/ext/cumo/narray/gen/spec.rb +58 -55
  68. data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
  69. data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
  70. data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
  71. data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
  72. data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
  73. data/ext/cumo/narray/gen/tmpl/at.c +34 -0
  74. data/ext/cumo/narray/gen/tmpl/batch_norm.c +5 -2
  75. data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +6 -3
  76. data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
  77. data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
  78. data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
  79. data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
  80. data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
  81. data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
  82. data/ext/cumo/narray/gen/tmpl/each.c +4 -2
  83. data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
  84. data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +5 -2
  85. data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
  86. data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
  87. data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
  88. data/ext/cumo/narray/gen/tmpl/median.c +2 -2
  89. data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
  90. data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
  91. data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
  92. data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
  93. data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
  94. data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
  95. data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
  96. data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
  97. data/ext/cumo/narray/gen/tmpl/sort.c +3 -3
  98. data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
  99. data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
  100. data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
  101. data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
  102. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
  103. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
  104. data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
  105. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
  106. data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
  107. data/ext/cumo/narray/index.c +244 -40
  108. data/ext/cumo/narray/index_kernel.cu +84 -0
  109. data/ext/cumo/narray/narray.c +57 -19
  110. data/ext/cumo/narray/ndloop.c +1 -1
  111. data/ext/cumo/narray/struct.c +1 -1
  112. data/lib/cumo/cuda/compile_error.rb +1 -1
  113. data/lib/cumo/cuda/compiler.rb +23 -22
  114. data/lib/cumo/cuda/cudnn.rb +1 -1
  115. data/lib/cumo/cuda/device.rb +1 -1
  116. data/lib/cumo/cuda/link_state.rb +2 -2
  117. data/lib/cumo/cuda/module.rb +1 -2
  118. data/lib/cumo/cuda/nvrtc_program.rb +3 -2
  119. data/lib/cumo/cuda.rb +2 -0
  120. data/lib/cumo/linalg.rb +2 -0
  121. data/lib/cumo/narray/extra.rb +297 -341
  122. data/lib/cumo/narray.rb +2 -0
  123. data/lib/cumo.rb +3 -1
  124. data/test/bit_test.rb +157 -0
  125. data/test/cuda/compiler_test.rb +69 -0
  126. data/test/cuda/device_test.rb +31 -0
  127. data/test/cuda/memory_pool_test.rb +45 -0
  128. data/test/cuda/nvrtc_test.rb +51 -0
  129. data/test/cuda/runtime_test.rb +28 -0
  130. data/test/cudnn_test.rb +498 -0
  131. data/test/cumo_test.rb +27 -0
  132. data/test/narray_test.rb +745 -0
  133. data/test/ractor_test.rb +52 -0
  134. data/test/test_helper.rb +31 -0
  135. metadata +34 -54
  136. data/.travis.yml +0 -5
  137. data/numo-narray-version +0 -1
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  def_id "cast"
2
4
  def_id "eq"
3
5
  def_id "ne"
@@ -14,8 +16,8 @@ if is_float
14
16
  def_id "copysign"
15
17
  end
16
18
  if is_int
17
- def_id "<<","left_shift"
18
- def_id ">>","right_shift"
19
+ def_id "<<", "left_shift"
20
+ def_id ">>", "right_shift"
19
21
  end
20
22
  if is_comparable && !is_object
21
23
  def_id "gt"
@@ -42,13 +44,13 @@ if is_object
42
44
  def_id "nan?"
43
45
  def_id "infinite?"
44
46
  def_id "finite?"
45
- def_id "==","eq"
46
- def_id "!=","ne"
47
- def_id ">" ,"gt"
48
- def_id ">=","ge"
49
- def_id "<" ,"lt"
50
- def_id "<=","le"
51
- def_id "<=>","ufo"
47
+ def_id "==", "eq"
48
+ def_id "!=", "ne"
49
+ def_id ">" , "gt"
50
+ def_id ">=", "ge"
51
+ def_id "<" , "lt"
52
+ def_id "<=", "le"
53
+ def_id "<=>", "ufo"
52
54
  end
53
55
  if (is_float || is_complex) && !is_object
54
56
  def_id "gemm"
@@ -119,18 +121,18 @@ def_method "store" do
119
121
  store_numeric
120
122
  store_from "Bit"
121
123
  if is_complex
122
- store_from "DComplex","cumo_dcomplex","m_from_dcomplex"
123
- store_from "SComplex","cumo_scomplex","m_from_scomplex"
124
+ store_from "DComplex", "cumo_dcomplex", "m_from_dcomplex"
125
+ store_from "SComplex", "cumo_scomplex", "m_from_scomplex"
124
126
  end
125
- store_from "DFloat","double", "m_from_real"
126
- store_from "SFloat","float", "m_from_real"
127
+ store_from "DFloat", "double", "m_from_real"
128
+ store_from "SFloat", "float", "m_from_real"
127
129
  store_from "Int64", "int64_t", "m_from_int64"
128
130
  store_from "Int32", "int32_t", "m_from_int32"
129
131
  store_from "Int16", "int16_t", "m_from_sint"
130
132
  store_from "Int8", "int8_t", "m_from_sint"
131
- store_from "UInt64","u_int64_t","m_from_uint64"
132
- store_from "UInt32","u_int32_t","m_from_uint32"
133
- store_from "UInt16","u_int16_t","m_from_sint"
133
+ store_from "UInt64", "u_int64_t", "m_from_uint64"
134
+ store_from "UInt32", "u_int32_t", "m_from_uint32"
135
+ store_from "UInt16", "u_int16_t", "m_from_sint"
134
136
  store_from "UInt8", "u_int8_t", "m_from_sint"
135
137
  store_from "RObject", "VALUE", "m_num_to_data"
136
138
  store_array
@@ -144,6 +146,7 @@ def_singleton_method "cast"
144
146
  def_method "aref", op:"[]"
145
147
  def_method "aref_cpu"
146
148
  def_method "aset", op:"[]="
149
+ def_method "at"
147
150
 
148
151
  def_method "coerce_cast"
149
152
  def_method "to_a"
@@ -167,15 +170,15 @@ if is_bit
167
170
  binary "xor", "^"
168
171
  binary "eq"
169
172
  bit_count "count_true"
170
- def_alias "count_1","count_true"
171
- def_alias "count","count_true"
173
+ def_alias "count_1", "count_true"
174
+ def_alias "count", "count_true"
172
175
  bit_count "count_false"
173
- def_alias "count_0","count_false"
176
+ def_alias "count_0", "count_false"
174
177
  bit_count_cpu "count_true_cpu"
175
- def_alias "count_1_cpu","count_true_cpu"
176
- def_alias "count_cpu","count_true_cpu"
178
+ def_alias "count_1_cpu", "count_true_cpu"
179
+ def_alias "count_cpu", "count_true_cpu"
177
180
  bit_count_cpu "count_false_cpu"
178
- def_alias "count_0_cpu","count_false_cpu"
181
+ def_alias "count_0_cpu", "count_false_cpu"
179
182
  bit_reduce "all?", 1
180
183
  bit_reduce "any?", 0
181
184
  def_method "none?", "none_p"
@@ -215,17 +218,17 @@ if is_complex
215
218
  unary2 "real", "rtype", "cRT"
216
219
  unary2 "imag", "rtype", "cRT"
217
220
  unary2 "arg", "rtype", "cRT"
218
- def_alias "angle","arg"
221
+ def_alias "angle", "arg"
219
222
  set2 "set_imag", "rtype", "cRT"
220
223
  set2 "set_real", "rtype", "cRT"
221
- def_alias "imag=","set_imag"
222
- def_alias "real=","set_real"
224
+ def_alias "imag=", "set_imag"
225
+ def_alias "real=", "set_real"
223
226
  else
224
227
  def_alias "conj", "view"
225
228
  def_alias "im", "view"
226
229
  end
227
230
 
228
- def_alias "conjugate","conj"
231
+ def_alias "conjugate", "conj"
229
232
 
230
233
  # base_cond
231
234
 
@@ -278,9 +281,9 @@ if is_comparable
278
281
  cond_binary "lt"
279
282
  cond_binary "le"
280
283
  def_alias ">", "gt"
281
- def_alias ">=","ge"
284
+ def_alias ">=", "ge"
282
285
  def_alias "<", "lt"
283
- def_alias "<=","le"
286
+ def_alias "<=", "le"
284
287
  def_method "clip"
285
288
  end
286
289
 
@@ -296,32 +299,32 @@ end
296
299
 
297
300
  if is_int
298
301
  if is_unsigned
299
- accum "sum","u_int64_t","cumo_cUInt64"
300
- accum "prod","u_int64_t","cumo_cUInt64"
302
+ accum "sum", "u_int64_t", "cumo_cUInt64"
303
+ accum "prod", "u_int64_t", "cumo_cUInt64"
301
304
  else
302
- accum "sum","int64_t","cumo_cInt64"
303
- accum "prod","int64_t","cumo_cInt64"
305
+ accum "sum", "int64_t", "cumo_cInt64"
306
+ accum "prod", "int64_t", "cumo_cInt64"
304
307
  end
305
308
  else
306
- accum "sum","dtype","cT"
307
- accum "prod","dtype","cT"
309
+ accum "sum", "dtype", "cT"
310
+ accum "prod", "dtype", "cT"
308
311
  end
309
312
 
310
313
  if is_double_precision
311
- accum "kahan_sum","dtype","cT"
314
+ accum "kahan_sum", "dtype", "cT"
312
315
  end
313
316
 
314
317
  if is_float
315
- accum "mean","dtype","cT"
316
- accum "stddev","rtype","cRT"
317
- accum "var","rtype","cRT"
318
- accum "rms","rtype","cRT"
318
+ accum "mean", "dtype", "cT"
319
+ accum "stddev", "rtype", "cRT"
320
+ accum "var", "rtype", "cRT"
321
+ accum "rms", "rtype", "cRT"
319
322
  end
320
323
 
321
324
  if is_comparable
322
- accum "min","dtype","cT"
323
- accum "max","dtype","cT"
324
- accum "ptp","dtype","cT"
325
+ accum "min", "dtype", "cT"
326
+ accum "max", "dtype", "cT"
327
+ accum "ptp", "dtype", "cT"
325
328
  accum_index "max_index"
326
329
  accum_index "min_index"
327
330
  def_method "minmax"
@@ -333,8 +336,8 @@ if is_int && !is_object
333
336
  def_method "bincount"
334
337
  end
335
338
 
336
- cum "cumsum","add"
337
- cum "cumprod","mul"
339
+ cum "cumsum", "add"
340
+ cum "cumprod", "mul"
338
341
 
339
342
  # dot
340
343
  accum_binary "mulsum"
@@ -377,17 +380,17 @@ def_method "poly"
377
380
 
378
381
  if is_comparable && !is_object
379
382
  if is_float
380
- qsort type_name,"dtype","*(dtype*)","_prnan"
381
- qsort type_name,"dtype","*(dtype*)","_ignan"
383
+ qsort type_name, "dtype", "*(dtype*)", "_prnan"
384
+ qsort type_name, "dtype", "*(dtype*)", "_ignan"
382
385
  else
383
- qsort type_name,"dtype","*(dtype*)"
386
+ qsort type_name, "dtype", "*(dtype*)"
384
387
  end
385
388
  def_method "sort"
386
389
  if is_float
387
- qsort type_name+"_index","dtype*","**(dtype**)","_prnan"
388
- qsort type_name+"_index","dtype*","**(dtype**)","_ignan"
390
+ qsort type_name + "_index", "dtype*", "**(dtype**)", "_prnan"
391
+ qsort type_name + "_index", "dtype*", "**(dtype**)", "_ignan"
389
392
  else
390
- qsort type_name+"_index","dtype*","**(dtype**)"
393
+ qsort type_name + "_index", "dtype*", "**(dtype**)"
391
394
  end
392
395
  def_method "sort_index"
393
396
  def_method "median"
@@ -407,7 +410,7 @@ def_module do
407
410
  set ns_var: "cT"
408
411
  set class_name: cn
409
412
  set name: "#{nm}_math"
410
- set full_module_name: fn+"::NMath"
413
+ set full_module_name: fn + "::NMath"
411
414
  set module_name: "Math"
412
415
  set module_var: "mTM"
413
416
 
@@ -433,14 +436,14 @@ def_module do
433
436
  math "atanh"
434
437
  math "sinc"
435
438
  if !is_c
436
- math "atan2",2
437
- math "hypot",2
439
+ math "atan2", 2
440
+ math "hypot", 2
438
441
  math "erf"
439
442
  math "erfc"
440
443
  math "log1p"
441
444
  math "expm1"
442
- math "ldexp",2
443
- math "frexp",1,"frexp"
445
+ math "ldexp", 2
446
+ math "frexp", 1, "frexp"
444
447
  end
445
448
  end
446
449
  end
@@ -56,8 +56,8 @@ static void
56
56
  <% else %>
57
57
  @overload <%=name%>(axis:nil, keepdims:false)
58
58
  <% end %>
59
- @param [Numeric,Array,Range] axis (keyword) Affected dimensions.
60
- @param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
59
+ @param [Numeric,Array,Range] axis Performs <%=name%> along the axis.
60
+ @param [TrueClass] keepdims If true, the reduced axes are left in the result array as dimensions with size one.
61
61
  @return [Cumo::<%=class_name%>] returns result of <%=name%>.
62
62
  */
63
63
  static VALUE
@@ -91,7 +91,7 @@ static VALUE
91
91
  @overload <%=op_map%>(other, axis:nil, keepdims:false)
92
92
  <% end %>
93
93
  @param [Cumo::NArray,Numeric] other
94
- @param [Numeric,Array,Range] axis (keyword) Affected dimensions.
94
+ @param [Numeric,Array,Range] axis Performs <%=name%> along the axis.
95
95
  @param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
96
96
  <% if is_float %>
97
97
  @param [TrueClass] nan (keyword) If true, apply NaN-aware algorithm (avoid NaN if exists).
@@ -85,7 +85,7 @@ static const rb_data_type_t <%=type_name%>_data_type = {
85
85
  {0, <%=type_name%>_free, <%=type_name%>_memsize,},
86
86
  &cumo_na_data_type,
87
87
  &<%=type_name%>_info,
88
- 0, // flags
88
+ RUBY_TYPED_FROZEN_SHAREABLE, // flags
89
89
  };
90
90
 
91
91
  <% end %>
@@ -21,35 +21,35 @@ static VALUE
21
21
 
22
22
  @example
23
23
  a = Cumo::DFloat.new(4,5).seq
24
- => Cumo::DFloat#shape=[4,5]
25
- [[0, 1, 2, 3, 4],
26
- [5, 6, 7, 8, 9],
27
- [10, 11, 12, 13, 14],
28
- [15, 16, 17, 18, 19]]
24
+ # => Cumo::DFloat#shape=[4,5]
25
+ # [[0, 1, 2, 3, 4],
26
+ # [5, 6, 7, 8, 9],
27
+ # [10, 11, 12, 13, 14],
28
+ # [15, 16, 17, 18, 19]]
29
29
 
30
30
  a[7]
31
- => Cumo::DFloat#shape=[]
32
- 6.0
31
+ # => Cumo::DFloat#shape=[]
32
+ # 6.0
33
33
 
34
34
  a[1,1]
35
- => Cumo::DFloat#shape=[]
36
- 6.0
35
+ # => Cumo::DFloat#shape=[]
36
+ # 6.0
37
37
 
38
38
  a[1..3,1]
39
- => Cumo::DFloat#shape=[3]
40
- [6, 11, 16]
39
+ # => Cumo::DFloat#shape=[3]
40
+ # [6, 11, 16]
41
41
 
42
42
  a[1,[1,3,4]]
43
- => Cumo::DFloat#shape=[3]
44
- [6, 8, 9]
43
+ # => Cumo::DFloat#shape=[3]
44
+ # [6, 8, 9]
45
45
 
46
46
  a[true,2].fill(99)
47
47
  a
48
- => Cumo::DFloat#shape=[4,5]
49
- [[0, 1, 99, 3, 4],
50
- [5, 6, 99, 8, 9],
51
- [10, 11, 99, 13, 14],
52
- [15, 16, 99, 18, 19]]
48
+ # => Cumo::DFloat#shape=[4,5]
49
+ # [[0, 1, 99, 3, 4],
50
+ # [5, 6, 99, 8, 9],
51
+ # [10, 11, 99, 13, 14],
52
+ # [15, 16, 99, 18, 19]]
53
53
  */
54
54
  static VALUE
55
55
  <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
@@ -10,31 +10,31 @@
10
10
 
11
11
  @example
12
12
  a = Cumo::DFloat.new(3,4).seq
13
- => Cumo::DFloat#shape=[3,4]
14
- [[0, 1, 2, 3],
15
- [4, 5, 6, 7],
16
- [8, 9, 10, 11]]
13
+ # => Cumo::DFloat#shape=[3,4]
14
+ # [[0, 1, 2, 3],
15
+ # [4, 5, 6, 7],
16
+ # [8, 9, 10, 11]]
17
17
 
18
18
  a[1,2]=99
19
19
  a
20
- => Cumo::DFloat#shape=[3,4]
21
- [[0, 1, 2, 3],
22
- [4, 5, 99, 7],
23
- [8, 9, 10, 11]]
20
+ # => Cumo::DFloat#shape=[3,4]
21
+ # [[0, 1, 2, 3],
22
+ # [4, 5, 99, 7],
23
+ # [8, 9, 10, 11]]
24
24
 
25
25
  a[1,[0,2]] = [101,102]
26
26
  a
27
- => Cumo::DFloat#shape=[3,4]
28
- [[0, 1, 2, 3],
29
- [101, 5, 102, 7],
30
- [8, 9, 10, 11]]
27
+ # => Cumo::DFloat#shape=[3,4]
28
+ # [[0, 1, 2, 3],
29
+ # [101, 5, 102, 7],
30
+ # [8, 9, 10, 11]]
31
31
 
32
32
  a[1,true]=99
33
33
  a
34
- => Cumo::DFloat#shape=[3,4]
35
- [[0, 1, 2, 3],
36
- [99, 99, 99, 99],
37
- [8, 9, 10, 11]]
34
+ # => Cumo::DFloat#shape=[3,4]
35
+ # [[0, 1, 2, 3],
36
+ # [99, 99, 99, 99],
37
+ # [8, 9, 10, 11]]
38
38
 
39
39
  */
40
40
  static VALUE
@@ -0,0 +1,34 @@
1
+ /*
2
+ Multi-dimensional array indexing.
3
+ Same as [] for one-dimensional NArray.
4
+ Similar to numpy's tuple indexing, i.e., `a[[1,2,..],[3,4,..]]`
5
+ @overload at(*indices)
6
+ @param [Numeric,Range,etc] *indices Multi-dimensional Index Arrays.
7
+ @return [Cumo::NArray::<%=class_name%>] one-dimensional NArray view.
8
+
9
+ @example
10
+ x = Cumo::DFloat.new(3,3,3).seq
11
+ => Cumo::DFloat#shape=[3,3,3]
12
+ [[[0, 1, 2],
13
+ [3, 4, 5],
14
+ [6, 7, 8]],
15
+ [[9, 10, 11],
16
+ [12, 13, 14],
17
+ [15, 16, 17]],
18
+ [[18, 19, 20],
19
+ [21, 22, 23],
20
+ [24, 25, 26]]]
21
+
22
+ x.at([0,1,2],[0,1,2],[-1,-2,-3])
23
+ => Cumo::DFloat(view)#shape=[3]
24
+ [2, 13, 24]
25
+ */
26
+ static VALUE
27
+ <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
28
+ {
29
+ int result_nd;
30
+ size_t pos;
31
+
32
+ result_nd = cumo_na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
33
+ return cumo_na_at_main(argc, argv, self, 0, result_nd, pos);
34
+ }
@@ -157,8 +157,11 @@ static VALUE
157
157
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
158
158
  if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
159
159
 
160
+ status = cudnnCreateTensorDescriptor(&bn_desc);
161
+ if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
162
+
160
163
  mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
161
- status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
164
+ status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
162
165
  if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
163
166
  // TODO: bn_desc may return another type, and may need to cast gamma, beta, mean, var
164
167
 
@@ -193,7 +196,7 @@ BATCH_NORM_ERROR:
193
196
  }
194
197
 
195
198
  #else // CUDNN_FOUND
196
- VALUE cumo_cuda_eCUDNNError;
199
+ #include "cumo/cuda/cudnn.h"
197
200
 
198
201
  static VALUE
199
202
  <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
@@ -134,8 +134,11 @@ static VALUE
134
134
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
135
135
  if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
136
136
 
137
+ status = cudnnCreateTensorDescriptor(&bn_desc);
138
+ if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
139
+
137
140
  mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
138
- status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
141
+ status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
139
142
  if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
140
143
  // TODO: bn_desc may return another type, and may need to cast gamma, gy, mean, var
141
144
 
@@ -178,11 +181,11 @@ BATCH_NORM_BACKWARD_ERROR:
178
181
  }
179
182
 
180
183
  #else // CUDNN_FOUND
181
- VALUE cumo_cuda_eCudnnError;
184
+ #include "cumo/cuda/cudnn.h"
182
185
 
183
186
  static VALUE
184
187
  <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
185
188
  {
186
- rb_raise(cumo_cuda_eCudnnError, "cuDNN is not available");
189
+ rb_raise(cumo_cuda_eCUDNNError, "cuDNN is not available");
187
190
  }
188
191
  #endif // CUDNN_FOUND
@@ -116,22 +116,22 @@ static VALUE
116
116
  otherwise returns UInt32 or UInt64 depending on the size along last axis.
117
117
  @example
118
118
  Cumo::Int32[0..4].bincount
119
- => Cumo::UInt32#shape=[5]
120
- [1, 1, 1, 1, 1]
119
+ # => Cumo::UInt32#shape=[5]
120
+ # [1, 1, 1, 1, 1]
121
121
 
122
122
  Cumo::Int32[0, 1, 1, 3, 2, 1, 7].bincount
123
- => Cumo::UInt32#shape=[8]
124
- [1, 3, 1, 1, 0, 0, 0, 1]
123
+ # => Cumo::UInt32#shape=[8]
124
+ # [1, 3, 1, 1, 0, 0, 0, 1]
125
125
 
126
126
  x = Cumo::Int32[0, 1, 1, 3, 2, 1, 7, 23]
127
127
  x.bincount.size == x.max+1
128
- => true
128
+ # => true
129
129
 
130
130
  w = Cumo::DFloat[0.3, 0.5, 0.2, 0.7, 1.0, -0.6]
131
131
  x = Cumo::Int32[0, 1, 1, 2, 2, 2]
132
132
  x.bincount(w)
133
- => Cumo::DFloat#shape=[3]
134
- [0.3, 0.7, 1.1]
133
+ # => Cumo::DFloat#shape=[3]
134
+ # [0.3, 0.7, 1.1]
135
135
 
136
136
  */
137
137
  static VALUE
@@ -75,28 +75,24 @@ static void
75
75
 
76
76
  @example
77
77
  a = Cumo::Int32.new(10).seq
78
- p a.clip(1,8)
79
- # Cumo::Int32#shape=[10]
80
- # [1, 1, 2, 3, 4, 5, 6, 7, 8, 8]
81
-
82
- p a
83
- # Cumo::Int32#shape=[10]
78
+ # => Cumo::Int32#shape=[10]
84
79
  # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
85
80
 
86
- p a.inplace.clip(3,6)
87
- # Cumo::Int32(view)#shape=[10]
88
- # [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
81
+ a.clip(1,8)
82
+ # => Cumo::Int32#shape=[10]
83
+ # [1, 1, 2, 3, 4, 5, 6, 7, 8, 8]
89
84
 
90
- p a
91
- # Cumo::Int32#shape=[10]
85
+ a.inplace.clip(3,6)
86
+ a
87
+ # => Cumo::Int32#shape=[10]
92
88
  # [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
93
89
 
94
- p a = Cumo::Int32.new(10).seq
95
- # Cumo::Int32#shape=[10]
90
+ b = Cumo::Int32.new(10).seq
91
+ # => Cumo::Int32#shape=[10]
96
92
  # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
97
93
 
98
- p a.clip([3,4,1,1,1,4,4,4,4,4], 8)
99
- # Cumo::Int32#shape=[10]
94
+ b.clip([3,4,1,1,1,4,4,4,4,4], 8)
95
+ # => Cumo::Int32#shape=[10]
100
96
  # [3, 4, 2, 3, 4, 5, 6, 7, 8, 8]
101
97
  */
102
98
  static VALUE
@@ -206,7 +206,7 @@ CONV_ERROR:
206
206
  }
207
207
 
208
208
  #else // CUDNN_FOUND
209
- VALUE cumo_cuda_eCUDNNError;
209
+ #include "cumo/cuda/cudnn.h"
210
210
 
211
211
  static VALUE
212
212
  <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
@@ -95,6 +95,7 @@ static VALUE
95
95
  CUMO_CUDA_CUDNN_CHECK_DIM_EQ(sizet_w_shape[0], ngy->shape[1]);
96
96
  CUMO_CUDA_CUDNN_CHECK_DIM_EQ(sizet_w_shape[1], nx->shape[1]);
97
97
 
98
+ #if !defined(NDEBUG)
98
99
  {
99
100
  // shape check of gy
100
101
  size_t *y_shape = ngy->shape;
@@ -105,6 +106,7 @@ static VALUE
105
106
  x_shape[i + 2], sizet_w_shape[i + 2], int_stride[i], int_pad[i]));
106
107
  }
107
108
  }
109
+ #endif
108
110
 
109
111
  x_cont = cumo_na_as_contiguous_array(x);
110
112
  gy_cont = cumo_na_as_contiguous_array(gy);
@@ -173,7 +175,7 @@ CONV_GRAD_W_ERROR:
173
175
  }
174
176
 
175
177
  #else // CUDNN_FOUND
176
- VALUE cumo_cuda_eCUDNNError;
178
+ #include "cumo/cuda/cudnn.h"
177
179
 
178
180
  static VALUE
179
181
  <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
@@ -234,7 +234,7 @@ CONV_TRANSPOSE_ERROR:
234
234
  }
235
235
 
236
236
  #else // CUDNN_FOUND
237
- VALUE cumo_cuda_eCUDNNError;
237
+ #include "cumo/cuda/cudnn.h"
238
238
 
239
239
  static VALUE
240
240
  <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
@@ -30,7 +30,7 @@ static void
30
30
  /*
31
31
  <%=name%> of self.
32
32
  @overload <%=name%>(axis:nil, nan:false)
33
- @param [Numeric,Array,Range] axis Affected dimensions.
33
+ @param [Numeric,Array,Range] axis Performs <%=name%> along the axis.
34
34
  @param [TrueClass] nan If true, apply NaN-aware algorithm (avoid NaN if exists).
35
35
  @return [Cumo::<%=class_name%>] <%=name%> of self.
36
36
  */
@@ -34,8 +34,10 @@ static void
34
34
  passing that element as a parameter.
35
35
  @overload <%=name%>
36
36
  @return [Cumo::NArray] self
37
- For a block {|x| ... }
38
- @yield [x] x is element of NArray.
37
+ For a block `{|x| ... }`,
38
+ @yieldparam [Numeric] x an element of NArray.
39
+ @see #each_with_index
40
+ @see #map
39
41
  */
40
42
  static VALUE
41
43
  <%=c_func(0)%>(VALUE self)
@@ -55,9 +55,12 @@ static void
55
55
  Invokes the given block once for each element of self,
56
56
  passing that element and indices along each axis as parameters.
57
57
  @overload <%=name%>
58
+ For a block `{|x,i,j,...| ... }`,
59
+ @yieldparam [Numeric] x an element
60
+ @yieldparam [Integer] i,j,... multitimensional indices
58
61
  @return [Cumo::NArray] self
59
- For a block {|x,i,j,...| ... }
60
- @yield [x,i,j,...] x is an element, i,j,... are multidimensional indices.
62
+ @see #each
63
+ @see #map_with_index
61
64
  */
62
65
  static VALUE
63
66
  <%=c_func(0)%>(VALUE self)
@@ -106,8 +106,11 @@ static VALUE
106
106
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
107
107
  if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
108
108
 
109
+ status = cudnnCreateTensorDescriptor(&bn_desc);
110
+ if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
111
+
109
112
  mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
110
- status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
113
+ status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
111
114
  if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
112
115
  // TODO: bn_desc may return another type, and may need to cast gamma, beta, mean, var
113
116
 
@@ -139,7 +142,7 @@ FIXED_BATCH_NORM_ERROR:
139
142
  }
140
143
 
141
144
  #else // CUDNN_FOUND
142
- VALUE cumo_cuda_eCUDNNError;
145
+ #include "cumo/cuda/cudnn.h"
143
146
 
144
147
  static VALUE
145
148
  <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
@@ -14,6 +14,7 @@
14
14
  rb_hash_aset(hCast, rb_cArray, cT);
15
15
  <% for x in upcast %>
16
16
  <%= x %><% end %>
17
+ rb_obj_freeze(hCast);
17
18
 
18
19
  <% @children.each do |m| %>
19
20
  <%= m.init_def %><% end %>