cumo 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (158) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/3rd_party/LICENSE.txt +60 -0
  4. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +13 -1
  5. data/LICENSE.txt +1 -62
  6. data/README.md +33 -29
  7. data/bench/cumo_bench.rb +47 -25
  8. data/bench/numo_bench.rb +27 -25
  9. data/docs/src-tree.md +16 -0
  10. data/ext/cumo/cuda/cublas.c +69 -219
  11. data/ext/cumo/cuda/memory_pool_impl.hpp +1 -0
  12. data/ext/cumo/cuda/runtime.c +2 -14
  13. data/ext/cumo/cumo.c +16 -16
  14. data/ext/cumo/include/cumo.h +2 -2
  15. data/ext/cumo/include/cumo/cuda/cublas.h +6 -129
  16. data/ext/cumo/include/cumo/cuda/runtime.h +16 -0
  17. data/ext/cumo/include/cumo/indexer.h +46 -63
  18. data/ext/cumo/include/cumo/intern.h +58 -112
  19. data/ext/cumo/include/cumo/narray.h +214 -185
  20. data/ext/cumo/include/cumo/narray_kernel.h +66 -37
  21. data/ext/cumo/include/cumo/ndloop.h +42 -42
  22. data/ext/cumo/include/cumo/reduce_kernel.h +55 -71
  23. data/ext/cumo/include/cumo/template.h +56 -51
  24. data/ext/cumo/include/cumo/template_kernel.h +31 -31
  25. data/ext/cumo/include/cumo/types/bit.h +3 -3
  26. data/ext/cumo/include/cumo/types/bit_kernel.h +2 -2
  27. data/ext/cumo/include/cumo/types/complex.h +126 -126
  28. data/ext/cumo/include/cumo/types/complex_kernel.h +126 -126
  29. data/ext/cumo/include/cumo/types/complex_macro.h +28 -28
  30. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +20 -20
  31. data/ext/cumo/include/cumo/types/dcomplex.h +5 -5
  32. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +1 -1
  33. data/ext/cumo/include/cumo/types/int_macro.h +1 -1
  34. data/ext/cumo/include/cumo/types/int_macro_kernel.h +1 -1
  35. data/ext/cumo/include/cumo/types/robj_macro.h +30 -30
  36. data/ext/cumo/include/cumo/types/scomplex.h +5 -5
  37. data/ext/cumo/include/cumo/types/scomplex_kernel.h +1 -1
  38. data/ext/cumo/narray/array.c +143 -143
  39. data/ext/cumo/narray/data.c +184 -184
  40. data/ext/cumo/narray/gen/cogen.rb +5 -2
  41. data/ext/cumo/narray/gen/cogen_kernel.rb +5 -2
  42. data/ext/cumo/narray/gen/def/dcomplex.rb +1 -1
  43. data/ext/cumo/narray/gen/def/scomplex.rb +1 -1
  44. data/ext/cumo/narray/gen/erbln.rb +132 -0
  45. data/ext/cumo/narray/gen/erbpp2.rb +18 -13
  46. data/ext/cumo/narray/gen/narray_def.rb +3 -3
  47. data/ext/cumo/narray/gen/spec.rb +2 -2
  48. data/ext/cumo/narray/gen/tmpl/accum.c +15 -15
  49. data/ext/cumo/narray/gen/tmpl/accum_binary.c +22 -22
  50. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +3 -3
  51. data/ext/cumo/narray/gen/tmpl/accum_index.c +30 -30
  52. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +2 -2
  53. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +3 -3
  54. data/ext/cumo/narray/gen/tmpl/alloc_func.c +14 -14
  55. data/ext/cumo/narray/gen/tmpl/allocate.c +11 -11
  56. data/ext/cumo/narray/gen/tmpl/aref.c +2 -2
  57. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +4 -4
  58. data/ext/cumo/narray/gen/tmpl/aset.c +2 -2
  59. data/ext/cumo/narray/gen/tmpl/binary.c +28 -28
  60. data/ext/cumo/narray/gen/tmpl/binary2.c +18 -18
  61. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +3 -3
  62. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +6 -6
  63. data/ext/cumo/narray/gen/tmpl/binary_s.c +13 -13
  64. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +3 -3
  65. data/ext/cumo/narray/gen/tmpl/bincount.c +23 -23
  66. data/ext/cumo/narray/gen/tmpl/cast.c +7 -7
  67. data/ext/cumo/narray/gen/tmpl/cast_array.c +3 -3
  68. data/ext/cumo/narray/gen/tmpl/clip.c +38 -38
  69. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +2 -2
  70. data/ext/cumo/narray/gen/tmpl/cond_binary.c +19 -19
  71. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +7 -7
  72. data/ext/cumo/narray/gen/tmpl/cond_unary.c +15 -15
  73. data/ext/cumo/narray/gen/tmpl/cum.c +15 -15
  74. data/ext/cumo/narray/gen/tmpl/each.c +9 -9
  75. data/ext/cumo/narray/gen/tmpl/each_with_index.c +9 -9
  76. data/ext/cumo/narray/gen/tmpl/ewcomp.c +15 -15
  77. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +3 -3
  78. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +5 -5
  79. data/ext/cumo/narray/gen/tmpl/extract_data.c +12 -12
  80. data/ext/cumo/narray/gen/tmpl/eye.c +9 -9
  81. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +3 -3
  82. data/ext/cumo/narray/gen/tmpl/fill.c +9 -9
  83. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +6 -6
  84. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +1 -1
  85. data/ext/cumo/narray/gen/tmpl/format.c +11 -11
  86. data/ext/cumo/narray/gen/tmpl/format_to_a.c +8 -8
  87. data/ext/cumo/narray/gen/tmpl/frexp.c +13 -13
  88. data/ext/cumo/narray/gen/tmpl/gemm.c +252 -108
  89. data/ext/cumo/narray/gen/tmpl/inspect.c +1 -1
  90. data/ext/cumo/narray/gen/tmpl/lib.c +2 -2
  91. data/ext/cumo/narray/gen/tmpl/logseq.c +7 -7
  92. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +6 -6
  93. data/ext/cumo/narray/gen/tmpl/map_with_index.c +17 -17
  94. data/ext/cumo/narray/gen/tmpl/median.c +10 -10
  95. data/ext/cumo/narray/gen/tmpl/minmax.c +10 -10
  96. data/ext/cumo/narray/gen/tmpl/new_dim0.c +3 -3
  97. data/ext/cumo/narray/gen/tmpl/poly.c +6 -6
  98. data/ext/cumo/narray/gen/tmpl/pow.c +28 -28
  99. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +6 -6
  100. data/ext/cumo/narray/gen/tmpl/rand.c +10 -10
  101. data/ext/cumo/narray/gen/tmpl/rand_norm.c +7 -7
  102. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +6 -6
  103. data/ext/cumo/narray/gen/tmpl/seq.c +7 -7
  104. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +6 -6
  105. data/ext/cumo/narray/gen/tmpl/set2.c +20 -20
  106. data/ext/cumo/narray/gen/tmpl/sort.c +11 -11
  107. data/ext/cumo/narray/gen/tmpl/sort_index.c +18 -18
  108. data/ext/cumo/narray/gen/tmpl/store.c +6 -6
  109. data/ext/cumo/narray/gen/tmpl/store_array.c +19 -19
  110. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +12 -12
  111. data/ext/cumo/narray/gen/tmpl/store_bit.c +23 -23
  112. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +28 -28
  113. data/ext/cumo/narray/gen/tmpl/store_from.c +16 -16
  114. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +12 -12
  115. data/ext/cumo/narray/gen/tmpl/to_a.c +10 -10
  116. data/ext/cumo/narray/gen/tmpl/unary.c +25 -25
  117. data/ext/cumo/narray/gen/tmpl/unary2.c +17 -17
  118. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +15 -15
  119. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +13 -13
  120. data/ext/cumo/narray/gen/tmpl/unary_s.c +17 -17
  121. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +12 -12
  122. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +9 -9
  123. data/ext/cumo/narray/gen/tmpl_bit/aref.c +2 -2
  124. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +5 -5
  125. data/ext/cumo/narray/gen/tmpl_bit/aset.c +2 -2
  126. data/ext/cumo/narray/gen/tmpl_bit/binary.c +29 -29
  127. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +14 -14
  128. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +21 -21
  129. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +28 -28
  130. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +29 -29
  131. data/ext/cumo/narray/gen/tmpl_bit/each.c +10 -10
  132. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +10 -10
  133. data/ext/cumo/narray/gen/tmpl_bit/extract.c +8 -8
  134. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +8 -8
  135. data/ext/cumo/narray/gen/tmpl_bit/fill.c +17 -17
  136. data/ext/cumo/narray/gen/tmpl_bit/format.c +14 -14
  137. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +11 -11
  138. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +3 -3
  139. data/ext/cumo/narray/gen/tmpl_bit/mask.c +33 -33
  140. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +19 -19
  141. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +22 -22
  142. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +18 -18
  143. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +12 -12
  144. data/ext/cumo/narray/gen/tmpl_bit/unary.c +24 -24
  145. data/ext/cumo/narray/gen/tmpl_bit/where.c +16 -16
  146. data/ext/cumo/narray/gen/tmpl_bit/where2.c +20 -20
  147. data/ext/cumo/narray/index.c +213 -213
  148. data/ext/cumo/narray/math.c +27 -27
  149. data/ext/cumo/narray/narray.c +484 -484
  150. data/ext/cumo/narray/ndloop.c +259 -258
  151. data/ext/cumo/narray/rand.c +3 -3
  152. data/ext/cumo/narray/step.c +70 -70
  153. data/ext/cumo/narray/struct.c +139 -139
  154. metadata +6 -7
  155. data/ext/cumo/include/cumo/intern_fwd.h +0 -38
  156. data/lib/erbpp.rb +0 -294
  157. data/lib/erbpp/line_number.rb +0 -137
  158. data/lib/erbpp/narray_def.rb +0 -381
@@ -17,5 +17,5 @@ static VALUE
17
17
  <%=c_func(0)%>(VALUE ary)
18
18
  {
19
19
  cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
20
- return na_ndloop_inspect(ary, <%=c_iter%>, Qnil);
20
+ return cumo_na_ndloop_inspect(ary, <%=c_iter%>, Qnil);
21
21
  }
@@ -21,7 +21,7 @@
21
21
 
22
22
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
23
23
 
24
- <% id_decl.each do |x| %>
24
+ <% cumo_id_decl.each do |x| %>
25
25
  <%= x %>
26
26
  <% end %>
27
27
 
@@ -41,7 +41,7 @@ Init_<%=lib_name%>(void)
41
41
 
42
42
  <%=ns_var%> = rb_define_module("Cumo");
43
43
 
44
- <% id_assign.each do |x| %>
44
+ <% cumo_id_assign.each do |x| %>
45
45
  <%= x %><% end %>
46
46
 
47
47
  <% children.each do |c| %>
@@ -11,7 +11,7 @@ void <%="cumo_#{c_iter}_stride_kernel_launch"%>(char *p1, ssize_t s1, seq_data_t
11
11
  <% end %>
12
12
 
13
13
  static void
14
- <%=c_iter%>(na_loop_t *const lp)
14
+ <%=c_iter%>(cumo_na_loop_t *const lp)
15
15
  {
16
16
  size_t i;
17
17
  char *p1;
@@ -21,8 +21,8 @@ static void
21
21
  seq_count_t c;
22
22
  logseq_opt_t *g;
23
23
 
24
- INIT_COUNTER(lp, i);
25
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
24
+ CUMO_INIT_COUNTER(lp, i);
25
+ CUMO_INIT_PTR_IDX(lp, 0, p1, s1, idx1);
26
26
  g = (logseq_opt_t*)(lp->opt_ptr);
27
27
  beg = g->beg;
28
28
  step = g->step;
@@ -31,7 +31,7 @@ static void
31
31
  <% if is_object %>
32
32
  {
33
33
  dtype x;
34
- SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
34
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
35
35
  if (idx1) {
36
36
  for (; i--;) {
37
37
  x = f_seq(beg,step,c++);
@@ -85,8 +85,8 @@ static VALUE
85
85
  {
86
86
  logseq_opt_t *g;
87
87
  VALUE vbeg, vstep, vbase;
88
- ndfunc_arg_in_t ain[1] = {{OVERWRITE,0}};
89
- ndfunc_t ndf = {<%=c_iter%>, FULL_LOOP, 1,0, ain,0};
88
+ cumo_ndfunc_arg_in_t ain[1] = {{CUMO_OVERWRITE,0}};
89
+ cumo_ndfunc_t ndf = {<%=c_iter%>, CUMO_FULL_LOOP, 1,0, ain,0};
90
90
 
91
91
  g = ALLOCA_N(logseq_opt_t,1);
92
92
  rb_scan_args(argc, args, "21", &vbeg, &vstep, &vbase);
@@ -97,6 +97,6 @@ static VALUE
97
97
  } else {
98
98
  g->base = m_num_to_data(vbase);
99
99
  }
100
- na_ndloop3(&ndf, g, 1, self);
100
+ cumo_na_ndloop3(&ndf, g, 1, self);
101
101
  return self;
102
102
  }
@@ -17,15 +17,15 @@ __global__ void <%="cumo_#{c_iter}_stride_kernel"%>(char *p1, size_t s1, seq_dat
17
17
 
18
18
  void <%="cumo_#{c_iter}_index_kernel_launch"%>(char *p1, size_t* idx1, seq_data_t beg, seq_data_t step, seq_data_t base, seq_count_t c, uint64_t n)
19
19
  {
20
- size_t gridDim = get_gridDim(n);
21
- size_t blockDim = get_blockDim(n);
22
- <%="cumo_#{c_iter}_index_kernel"%><<<gridDim, blockDim>>>(p1,idx1,beg,step,base,c,n);
20
+ size_t grid_dim = cumo_get_grid_dim(n);
21
+ size_t block_dim = cumo_get_block_dim(n);
22
+ <%="cumo_#{c_iter}_index_kernel"%><<<grid_dim, block_dim>>>(p1,idx1,beg,step,base,c,n);
23
23
  }
24
24
 
25
25
  void <%="cumo_#{c_iter}_stride_kernel_launch"%>(char *p1, ssize_t s1, seq_data_t beg, seq_data_t step, seq_data_t base, seq_count_t c, uint64_t n)
26
26
  {
27
- size_t gridDim = get_gridDim(n);
28
- size_t blockDim = get_blockDim(n);
29
- <%="cumo_#{c_iter}_stride_kernel"%><<<gridDim, blockDim>>>(p1,s1,beg,step,base,c,n);
27
+ size_t grid_dim = cumo_get_grid_dim(n);
28
+ size_t block_dim = cumo_get_block_dim(n);
29
+ <%="cumo_#{c_iter}_stride_kernel"%><<<grid_dim, block_dim>>>(p1,s1,beg,step,base,c,n);
30
30
  }
31
31
  <% end %>
@@ -13,7 +13,7 @@ yield_map_with_index(dtype x, size_t *c, VALUE *a, int nd, int md)
13
13
  }
14
14
 
15
15
  static void
16
- <%=c_iter%>(na_loop_t *const lp)
16
+ <%=c_iter%>(cumo_na_loop_t *const lp)
17
17
  {
18
18
  size_t i;
19
19
  char *p1, *p2;
@@ -30,43 +30,43 @@ static void
30
30
  md = nd + 2;
31
31
  a = ALLOCA_N(VALUE,md);
32
32
 
33
- INIT_COUNTER(lp, i);
34
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
35
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
33
+ CUMO_INIT_COUNTER(lp, i);
34
+ CUMO_INIT_PTR_IDX(lp, 0, p1, s1, idx1);
35
+ CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
36
36
 
37
- SHOW_SYNCHRONIZE_WARNING_ONCE("<%=name%>", "<%=type_name%>");
37
+ CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("<%=name%>", "<%=type_name%>");
38
38
  cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
39
39
 
40
40
  c[nd] = 0;
41
41
  if (idx1) {
42
42
  if (idx2) {
43
43
  for (; i--;) {
44
- GET_DATA_INDEX(p1,idx1,dtype,x);
44
+ CUMO_GET_DATA_INDEX(p1,idx1,dtype,x);
45
45
  x = yield_map_with_index(x,c,a,nd,md);
46
- SET_DATA_INDEX(p2,idx2,dtype,x);
46
+ CUMO_SET_DATA_INDEX(p2,idx2,dtype,x);
47
47
  c[nd]++;
48
48
  }
49
49
  } else {
50
50
  for (; i--;) {
51
- GET_DATA_INDEX(p1,idx1,dtype,x);
51
+ CUMO_GET_DATA_INDEX(p1,idx1,dtype,x);
52
52
  x = yield_map_with_index(x,c,a,nd,md);
53
- SET_DATA_STRIDE(p2,s2,dtype,x);
53
+ CUMO_SET_DATA_STRIDE(p2,s2,dtype,x);
54
54
  c[nd]++;
55
55
  }
56
56
  }
57
57
  } else {
58
58
  if (idx2) {
59
59
  for (; i--;) {
60
- GET_DATA_STRIDE(p1,s1,dtype,x);
60
+ CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
61
61
  x = yield_map_with_index(x,c,a,nd,md);
62
- SET_DATA_INDEX(p2,idx2,dtype,x);
62
+ CUMO_SET_DATA_INDEX(p2,idx2,dtype,x);
63
63
  c[nd]++;
64
64
  }
65
65
  } else {
66
66
  for (; i--;) {
67
- GET_DATA_STRIDE(p1,s1,dtype,x);
67
+ CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
68
68
  x = yield_map_with_index(x,c,a,nd,md);
69
- SET_DATA_STRIDE(p2,s2,dtype,x);
69
+ CUMO_SET_DATA_STRIDE(p2,s2,dtype,x);
70
70
  c[nd]++;
71
71
  }
72
72
  }
@@ -90,9 +90,9 @@ static void
90
90
  static VALUE
91
91
  <%=c_func(0)%>(VALUE self)
92
92
  {
93
- ndfunc_arg_in_t ain[1] = {{Qnil,0}};
94
- ndfunc_arg_out_t aout[1] = {{cT,0}};
95
- ndfunc_t ndf = {<%=c_iter%>, FULL_LOOP, 1,1, ain,aout};
93
+ cumo_ndfunc_arg_in_t ain[1] = {{Qnil,0}};
94
+ cumo_ndfunc_arg_out_t aout[1] = {{cT,0}};
95
+ cumo_ndfunc_t ndf = {<%=c_iter%>, CUMO_FULL_LOOP, 1,1, ain,aout};
96
96
 
97
- return na_ndloop_with_index(&ndf, 1, self);
97
+ return cumo_na_ndloop_with_index(&ndf, 1, self);
98
98
  }
@@ -1,17 +1,17 @@
1
1
  <% (is_float ? ["_ignan","_prnan"] : [""]).each do |j| %>
2
2
  static void
3
- <%=c_iter%><%=j%>(na_loop_t *const lp)
3
+ <%=c_iter%><%=j%>(cumo_na_loop_t *const lp)
4
4
  {
5
5
  size_t n;
6
6
  char *p1, *p2;
7
7
  dtype *buf;
8
8
 
9
- INIT_COUNTER(lp, n);
9
+ CUMO_INIT_COUNTER(lp, n);
10
10
  p1 = (lp->args[0]).ptr + (lp->args[0].iter[0]).pos;
11
11
  p2 = (lp->args[1]).ptr + (lp->args[1].iter[0]).pos;
12
12
  buf = (dtype*)p1;
13
13
 
14
- SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=j%>", "<%=type_name%>");
14
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=j%>", "<%=type_name%>");
15
15
  <%=type_name%>_qsort<%=j%>(buf, n, sizeof(dtype));
16
16
 
17
17
  <% if is_float %>
@@ -49,18 +49,18 @@ static VALUE
49
49
  <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
50
50
  {
51
51
  VALUE v, reduce;
52
- ndfunc_arg_in_t ain[2] = {{OVERWRITE,0},{sym_reduce,0}};
53
- ndfunc_arg_out_t aout[1] = {{INT2FIX(0),0}};
54
- ndfunc_t ndf = {0, NDF_HAS_LOOP|NDF_FLAT_REDUCE, 2,1, ain,aout};
52
+ cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{cumo_sym_reduce,0}};
53
+ cumo_ndfunc_arg_out_t aout[1] = {{INT2FIX(0),0}};
54
+ cumo_ndfunc_t ndf = {0, CUMO_NDF_HAS_LOOP|CUMO_NDF_FLAT_REDUCE, 2,1, ain,aout};
55
55
 
56
- self = na_copy(self); // as temporary buffer
56
+ self = cumo_na_copy(self); // as temporary buffer
57
57
  <% if is_float %>
58
58
  ndf.func = <%=c_iter%>_ignan;
59
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_prnan);
59
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_prnan);
60
60
  <% else %>
61
61
  ndf.func = <%=c_iter%>;
62
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
62
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
63
63
  <% end %>
64
- v = na_ndloop(&ndf, 2, self, reduce);
64
+ v = cumo_na_ndloop(&ndf, 2, self, reduce);
65
65
  return <%=type_name%>_extract(v);
66
66
  }
@@ -1,16 +1,16 @@
1
1
  <% (is_float ? ["","_nan"] : [""]).each do |j| %>
2
2
  static void
3
- <%=c_iter%><%=j%>(na_loop_t *const lp)
3
+ <%=c_iter%><%=j%>(cumo_na_loop_t *const lp)
4
4
  {
5
5
  size_t n;
6
6
  char *p1;
7
7
  ssize_t s1;
8
8
  dtype xmin,xmax;
9
9
 
10
- INIT_COUNTER(lp, n);
11
- INIT_PTR(lp, 0, p1, s1);
10
+ CUMO_INIT_COUNTER(lp, n);
11
+ CUMO_INIT_PTR(lp, 0, p1, s1);
12
12
 
13
- SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
13
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
14
14
  f_<%=name%><%=j%>(n,p1,s1,&xmin,&xmax);
15
15
 
16
16
  *(dtype*)(lp->args[1].ptr + lp->args[1].iter[0].pos) = xmin;
@@ -34,14 +34,14 @@ static VALUE
34
34
  <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
35
35
  {
36
36
  VALUE reduce;
37
- ndfunc_arg_in_t ain[2] = {{cT,0},{sym_reduce,0}};
38
- ndfunc_arg_out_t aout[2] = {{cT,0},{cT,0}};
39
- ndfunc_t ndf = {<%=c_iter%>, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT, 2,2, ain,aout};
37
+ cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{cumo_sym_reduce,0}};
38
+ cumo_ndfunc_arg_out_t aout[2] = {{cT,0},{cT,0}};
39
+ cumo_ndfunc_t ndf = {<%=c_iter%>, CUMO_STRIDE_LOOP_NIP|CUMO_NDF_FLAT_REDUCE|CUMO_NDF_EXTRACT, 2,2, ain,aout};
40
40
 
41
41
  <% if is_float %>
42
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_nan);
42
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_nan);
43
43
  <% else %>
44
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
44
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
45
45
  <% end %>
46
- return na_ndloop(&ndf, 2, self, reduce);
46
+ return cumo_na_ndloop(&ndf, 2, self, reduce);
47
47
  }
@@ -6,10 +6,10 @@ static VALUE
6
6
  VALUE v;
7
7
  dtype *ptr;
8
8
 
9
- v = nary_new(cT, 0, NULL);
10
- ptr = (dtype*)na_get_pointer_for_write(v);
9
+ v = cumo_na_new(cT, 0, NULL);
10
+ ptr = (dtype*)cumo_na_get_pointer_for_write(v);
11
11
  <%="cumo_#{c_func(:nodef)}_kernel_launch"%>(ptr, x);
12
12
 
13
- na_release_lock(v);
13
+ cumo_na_release_lock(v);
14
14
  return v;
15
15
  }
@@ -1,10 +1,10 @@
1
1
  static void
2
- <%=c_iter%>(na_loop_t *const lp)
2
+ <%=c_iter%>(cumo_na_loop_t *const lp)
3
3
  {
4
4
  size_t i;
5
5
  dtype x, y, a;
6
6
 
7
- SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
7
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
8
8
  x = *(dtype*)(lp->args[0].ptr + lp->args[0].iter[0].pos);
9
9
  i = lp->narg - 2;
10
10
  y = *(dtype*)(lp->args[i].ptr + lp->args[i].iter[0].pos);
@@ -30,12 +30,12 @@ static VALUE
30
30
  int argc, i;
31
31
  VALUE *argv;
32
32
  volatile VALUE v, a;
33
- ndfunc_arg_out_t aout[1] = {{cT,0}};
34
- ndfunc_t ndf = { <%=c_iter%>, NO_LOOP, 0, 1, 0, aout };
33
+ cumo_ndfunc_arg_out_t aout[1] = {{cT,0}};
34
+ cumo_ndfunc_t ndf = { <%=c_iter%>, CUMO_NO_LOOP, 0, 1, 0, aout };
35
35
 
36
36
  argc = RARRAY_LEN(args);
37
37
  ndf.nin = argc+1;
38
- ndf.ain = ALLOCA_N(ndfunc_arg_in_t,argc+1);
38
+ ndf.ain = ALLOCA_N(cumo_ndfunc_arg_in_t,argc+1);
39
39
  for (i=0; i<argc+1; i++) {
40
40
  ndf.ain[i].type = cT;
41
41
  }
@@ -45,6 +45,6 @@ static VALUE
45
45
  argv[i+1] = RARRAY_PTR(args)[i];
46
46
  }
47
47
  a = rb_ary_new4(argc+1, argv);
48
- v = na_ndloop2(&ndf, a);
48
+ v = cumo_na_ndloop2(&ndf, a);
49
49
  return <%=type_name%>_extract(v);
50
50
  }
@@ -4,24 +4,24 @@ void <%="cumo_#{c_iter}_int32_kernel_launch"%>(char *p1, char *p2, char *p3, ssi
4
4
  <% end %>
5
5
 
6
6
  static void
7
- <%=c_iter%>(na_loop_t *const lp)
7
+ <%=c_iter%>(cumo_na_loop_t *const lp)
8
8
  {
9
9
  size_t i;
10
10
  char *p1, *p2, *p3;
11
11
  ssize_t s1, s2, s3;
12
- INIT_COUNTER(lp, i);
13
- INIT_PTR(lp, 0, p1, s1);
14
- INIT_PTR(lp, 1, p2, s2);
15
- INIT_PTR(lp, 2, p3, s3);
12
+ CUMO_INIT_COUNTER(lp, i);
13
+ CUMO_INIT_PTR(lp, 0, p1, s1);
14
+ CUMO_INIT_PTR(lp, 1, p2, s2);
15
+ CUMO_INIT_PTR(lp, 2, p3, s3);
16
16
  <% if type_name == 'robject' %>
17
17
  {
18
18
  dtype x, y;
19
- SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
19
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
20
20
  for (; i--;) {
21
- GET_DATA_STRIDE(p1,s1,dtype,x);
22
- GET_DATA_STRIDE(p2,s2,dtype,y);
21
+ CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
22
+ CUMO_GET_DATA_STRIDE(p2,s2,dtype,y);
23
23
  x = m_pow(x,y);
24
- SET_DATA_STRIDE(p3,s3,dtype,x);
24
+ CUMO_SET_DATA_STRIDE(p3,s3,dtype,x);
25
25
  }
26
26
  }
27
27
  <% else %>
@@ -30,25 +30,25 @@ static void
30
30
  }
31
31
 
32
32
  static void
33
- <%=c_iter%>_int32(na_loop_t *const lp)
33
+ <%=c_iter%>_int32(cumo_na_loop_t *const lp)
34
34
  {
35
35
  size_t i;
36
36
  char *p1, *p2, *p3;
37
37
  ssize_t s1, s2, s3;
38
- INIT_COUNTER(lp, i);
39
- INIT_PTR(lp, 0, p1, s1);
40
- INIT_PTR(lp, 1, p2, s2);
41
- INIT_PTR(lp, 2, p3, s3);
38
+ CUMO_INIT_COUNTER(lp, i);
39
+ CUMO_INIT_PTR(lp, 0, p1, s1);
40
+ CUMO_INIT_PTR(lp, 1, p2, s2);
41
+ CUMO_INIT_PTR(lp, 2, p3, s3);
42
42
  <% if type_name == 'robject' %>
43
43
  {
44
44
  dtype x;
45
45
  int32_t y;
46
- SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_int32", "<%=type_name%>");
46
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_int32", "<%=type_name%>");
47
47
  for (; i--;) {
48
- GET_DATA_STRIDE(p1,s1,dtype,x);
49
- GET_DATA_STRIDE(p2,s2,int32_t,y);
48
+ CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
49
+ CUMO_GET_DATA_STRIDE(p2,s2,int32_t,y);
50
50
  x = m_pow_int(x,y);
51
- SET_DATA_STRIDE(p3,s3,dtype,x);
51
+ CUMO_SET_DATA_STRIDE(p3,s3,dtype,x);
52
52
  }
53
53
  }
54
54
  <% else %>
@@ -59,17 +59,17 @@ static void
59
59
  static VALUE
60
60
  <%=c_func%>_self(VALUE self, VALUE other)
61
61
  {
62
- ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}};
63
- ndfunc_arg_in_t ain_i[2] = {{cT,0},{cumo_cInt32,0}};
64
- ndfunc_arg_out_t aout[1] = {{cT,0}};
65
- ndfunc_t ndf = { <%=c_iter%>, STRIDE_LOOP, 2, 1, ain, aout };
66
- ndfunc_t ndf_i = { <%=c_iter%>_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
62
+ cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}};
63
+ cumo_ndfunc_arg_in_t ain_i[2] = {{cT,0},{cumo_cInt32,0}};
64
+ cumo_ndfunc_arg_out_t aout[1] = {{cT,0}};
65
+ cumo_ndfunc_t ndf = { <%=c_iter%>, CUMO_STRIDE_LOOP, 2, 1, ain, aout };
66
+ cumo_ndfunc_t ndf_i = { <%=c_iter%>_int32, CUMO_STRIDE_LOOP, 2, 1, ain_i, aout };
67
67
 
68
68
  // fixme : use na.integer?
69
69
  if (FIXNUM_P(other) || rb_obj_is_kind_of(other,cumo_cInt32)) {
70
- return na_ndloop(&ndf_i, 2, self, other);
70
+ return cumo_na_ndloop(&ndf_i, 2, self, other);
71
71
  } else {
72
- return na_ndloop(&ndf, 2, self, other);
72
+ return cumo_na_ndloop(&ndf, 2, self, other);
73
73
  }
74
74
  }
75
75
 
@@ -86,12 +86,12 @@ static VALUE
86
86
  return <%=c_func%>_self(self,other);
87
87
  <% else %>
88
88
  VALUE klass, v;
89
- klass = na_upcast(CLASS_OF(self),CLASS_OF(other));
89
+ klass = cumo_na_upcast(rb_obj_class(self),rb_obj_class(other));
90
90
  if (klass==cT) {
91
91
  return <%=c_func%>_self(self,other);
92
92
  } else {
93
- v = rb_funcall(klass, id_cast, 1, self);
94
- return rb_funcall(v, id_pow, 1, other);
93
+ v = rb_funcall(klass, cumo_id_cast, 1, self);
94
+ return rb_funcall(v, cumo_id_pow, 1, other);
95
95
  }
96
96
  <% end %>
97
97
  }
@@ -15,15 +15,15 @@ __global__ void <%="cumo_#{c_iter}_int32_kernel"%>(char *p1, char *p2, char *p3,
15
15
 
16
16
  void <%="cumo_#{c_iter}_kernel_launch"%>(char *p1, char *p2, char *p3, ssize_t s1, ssize_t s2, ssize_t s3, uint64_t n)
17
17
  {
18
- size_t gridDim = get_gridDim(n);
19
- size_t blockDim = get_blockDim(n);
20
- <%="cumo_#{c_iter}_kernel"%><<<gridDim, blockDim>>>(p1,p2,p3,s1,s2,s3,n);
18
+ size_t grid_dim = cumo_get_grid_dim(n);
19
+ size_t block_dim = cumo_get_block_dim(n);
20
+ <%="cumo_#{c_iter}_kernel"%><<<grid_dim, block_dim>>>(p1,p2,p3,s1,s2,s3,n);
21
21
  }
22
22
 
23
23
  void <%="cumo_#{c_iter}_int32_kernel_launch"%>(char *p1, char *p2, char *p3, ssize_t s1, ssize_t s2, ssize_t s3, uint64_t n)
24
24
  {
25
- size_t gridDim = get_gridDim(n);
26
- size_t blockDim = get_blockDim(n);
27
- <%="cumo_#{c_iter}_int32_kernel"%><<<gridDim, blockDim>>>(p1,p2,p3,s1,s2,s3,n);
25
+ size_t grid_dim = cumo_get_grid_dim(n);
26
+ size_t block_dim = cumo_get_block_dim(n);
27
+ <%="cumo_#{c_iter}_int32_kernel"%><<<grid_dim, block_dim>>>(p1,p2,p3,s1,s2,s3,n);
28
28
  }
29
29
  <% end %>
@@ -72,7 +72,7 @@ typedef struct {
72
72
  } rand_opt_t;
73
73
 
74
74
  static void
75
- <%=c_iter%>(na_loop_t *const lp)
75
+ <%=c_iter%>(cumo_na_loop_t *const lp)
76
76
  {
77
77
  size_t i;
78
78
  char *p1;
@@ -84,25 +84,25 @@ static void
84
84
  <%=rand_type%> max;
85
85
  <%=shift_def%>
86
86
 
87
- INIT_COUNTER(lp, i);
88
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
87
+ CUMO_INIT_COUNTER(lp, i);
88
+ CUMO_INIT_PTR_IDX(lp, 0, p1, s1, idx1);
89
89
  g = (rand_opt_t*)(lp->opt_ptr);
90
90
  low = g->low;
91
91
  max = g->max;
92
92
  <%=shift_set%>
93
93
 
94
- SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
95
- SHOW_SYNCHRONIZE_WARNING_ONCE("<%=name%>", "<%=type_name%>");
94
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
95
+ CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("<%=name%>", "<%=type_name%>");
96
96
  cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
97
97
  if (idx1) {
98
98
  for (; i--;) {
99
99
  x = m_add(<%=m_rand%>,low);
100
- SET_DATA_INDEX(p1,idx1,dtype,x);
100
+ CUMO_SET_DATA_INDEX(p1,idx1,dtype,x);
101
101
  }
102
102
  } else {
103
103
  for (; i--;) {
104
104
  x = m_add(<%=m_rand%>,low);
105
- SET_DATA_STRIDE(p1,s1,dtype,x);
105
+ CUMO_SET_DATA_STRIDE(p1,s1,dtype,x);
106
106
  }
107
107
  }
108
108
  }
@@ -131,8 +131,8 @@ static VALUE
131
131
  rand_opt_t g;
132
132
  VALUE v1=Qnil, v2=Qnil;
133
133
  dtype high;
134
- ndfunc_arg_in_t ain[1] = {{OVERWRITE,0}};
135
- ndfunc_t ndf = {<%=c_iter%>, FULL_LOOP, 1,0, ain,0};
134
+ cumo_ndfunc_arg_in_t ain[1] = {{CUMO_OVERWRITE,0}};
135
+ cumo_ndfunc_t ndf = {<%=c_iter%>, CUMO_FULL_LOOP, 1,0, ain,0};
136
136
 
137
137
  <% if is_int && !is_object %>
138
138
  rb_scan_args(argc, args, "11", &v1, &v2);
@@ -163,6 +163,6 @@ static VALUE
163
163
  rb_raise(rb_eArgError,"high must be larger than low");
164
164
  }
165
165
  <% end %>
166
- na_ndloop3(&ndf, &g, 1, self);
166
+ cumo_na_ndloop3(&ndf, &g, 1, self);
167
167
  return self;
168
168
  }