cumo 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (158) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/3rd_party/LICENSE.txt +60 -0
  4. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +13 -1
  5. data/LICENSE.txt +1 -62
  6. data/README.md +33 -29
  7. data/bench/cumo_bench.rb +47 -25
  8. data/bench/numo_bench.rb +27 -25
  9. data/docs/src-tree.md +16 -0
  10. data/ext/cumo/cuda/cublas.c +69 -219
  11. data/ext/cumo/cuda/memory_pool_impl.hpp +1 -0
  12. data/ext/cumo/cuda/runtime.c +2 -14
  13. data/ext/cumo/cumo.c +16 -16
  14. data/ext/cumo/include/cumo.h +2 -2
  15. data/ext/cumo/include/cumo/cuda/cublas.h +6 -129
  16. data/ext/cumo/include/cumo/cuda/runtime.h +16 -0
  17. data/ext/cumo/include/cumo/indexer.h +46 -63
  18. data/ext/cumo/include/cumo/intern.h +58 -112
  19. data/ext/cumo/include/cumo/narray.h +214 -185
  20. data/ext/cumo/include/cumo/narray_kernel.h +66 -37
  21. data/ext/cumo/include/cumo/ndloop.h +42 -42
  22. data/ext/cumo/include/cumo/reduce_kernel.h +55 -71
  23. data/ext/cumo/include/cumo/template.h +56 -51
  24. data/ext/cumo/include/cumo/template_kernel.h +31 -31
  25. data/ext/cumo/include/cumo/types/bit.h +3 -3
  26. data/ext/cumo/include/cumo/types/bit_kernel.h +2 -2
  27. data/ext/cumo/include/cumo/types/complex.h +126 -126
  28. data/ext/cumo/include/cumo/types/complex_kernel.h +126 -126
  29. data/ext/cumo/include/cumo/types/complex_macro.h +28 -28
  30. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +20 -20
  31. data/ext/cumo/include/cumo/types/dcomplex.h +5 -5
  32. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +1 -1
  33. data/ext/cumo/include/cumo/types/int_macro.h +1 -1
  34. data/ext/cumo/include/cumo/types/int_macro_kernel.h +1 -1
  35. data/ext/cumo/include/cumo/types/robj_macro.h +30 -30
  36. data/ext/cumo/include/cumo/types/scomplex.h +5 -5
  37. data/ext/cumo/include/cumo/types/scomplex_kernel.h +1 -1
  38. data/ext/cumo/narray/array.c +143 -143
  39. data/ext/cumo/narray/data.c +184 -184
  40. data/ext/cumo/narray/gen/cogen.rb +5 -2
  41. data/ext/cumo/narray/gen/cogen_kernel.rb +5 -2
  42. data/ext/cumo/narray/gen/def/dcomplex.rb +1 -1
  43. data/ext/cumo/narray/gen/def/scomplex.rb +1 -1
  44. data/ext/cumo/narray/gen/erbln.rb +132 -0
  45. data/ext/cumo/narray/gen/erbpp2.rb +18 -13
  46. data/ext/cumo/narray/gen/narray_def.rb +3 -3
  47. data/ext/cumo/narray/gen/spec.rb +2 -2
  48. data/ext/cumo/narray/gen/tmpl/accum.c +15 -15
  49. data/ext/cumo/narray/gen/tmpl/accum_binary.c +22 -22
  50. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +3 -3
  51. data/ext/cumo/narray/gen/tmpl/accum_index.c +30 -30
  52. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +2 -2
  53. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +3 -3
  54. data/ext/cumo/narray/gen/tmpl/alloc_func.c +14 -14
  55. data/ext/cumo/narray/gen/tmpl/allocate.c +11 -11
  56. data/ext/cumo/narray/gen/tmpl/aref.c +2 -2
  57. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +4 -4
  58. data/ext/cumo/narray/gen/tmpl/aset.c +2 -2
  59. data/ext/cumo/narray/gen/tmpl/binary.c +28 -28
  60. data/ext/cumo/narray/gen/tmpl/binary2.c +18 -18
  61. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +3 -3
  62. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +6 -6
  63. data/ext/cumo/narray/gen/tmpl/binary_s.c +13 -13
  64. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +3 -3
  65. data/ext/cumo/narray/gen/tmpl/bincount.c +23 -23
  66. data/ext/cumo/narray/gen/tmpl/cast.c +7 -7
  67. data/ext/cumo/narray/gen/tmpl/cast_array.c +3 -3
  68. data/ext/cumo/narray/gen/tmpl/clip.c +38 -38
  69. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +2 -2
  70. data/ext/cumo/narray/gen/tmpl/cond_binary.c +19 -19
  71. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +7 -7
  72. data/ext/cumo/narray/gen/tmpl/cond_unary.c +15 -15
  73. data/ext/cumo/narray/gen/tmpl/cum.c +15 -15
  74. data/ext/cumo/narray/gen/tmpl/each.c +9 -9
  75. data/ext/cumo/narray/gen/tmpl/each_with_index.c +9 -9
  76. data/ext/cumo/narray/gen/tmpl/ewcomp.c +15 -15
  77. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +3 -3
  78. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +5 -5
  79. data/ext/cumo/narray/gen/tmpl/extract_data.c +12 -12
  80. data/ext/cumo/narray/gen/tmpl/eye.c +9 -9
  81. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +3 -3
  82. data/ext/cumo/narray/gen/tmpl/fill.c +9 -9
  83. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +6 -6
  84. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +1 -1
  85. data/ext/cumo/narray/gen/tmpl/format.c +11 -11
  86. data/ext/cumo/narray/gen/tmpl/format_to_a.c +8 -8
  87. data/ext/cumo/narray/gen/tmpl/frexp.c +13 -13
  88. data/ext/cumo/narray/gen/tmpl/gemm.c +252 -108
  89. data/ext/cumo/narray/gen/tmpl/inspect.c +1 -1
  90. data/ext/cumo/narray/gen/tmpl/lib.c +2 -2
  91. data/ext/cumo/narray/gen/tmpl/logseq.c +7 -7
  92. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +6 -6
  93. data/ext/cumo/narray/gen/tmpl/map_with_index.c +17 -17
  94. data/ext/cumo/narray/gen/tmpl/median.c +10 -10
  95. data/ext/cumo/narray/gen/tmpl/minmax.c +10 -10
  96. data/ext/cumo/narray/gen/tmpl/new_dim0.c +3 -3
  97. data/ext/cumo/narray/gen/tmpl/poly.c +6 -6
  98. data/ext/cumo/narray/gen/tmpl/pow.c +28 -28
  99. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +6 -6
  100. data/ext/cumo/narray/gen/tmpl/rand.c +10 -10
  101. data/ext/cumo/narray/gen/tmpl/rand_norm.c +7 -7
  102. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +6 -6
  103. data/ext/cumo/narray/gen/tmpl/seq.c +7 -7
  104. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +6 -6
  105. data/ext/cumo/narray/gen/tmpl/set2.c +20 -20
  106. data/ext/cumo/narray/gen/tmpl/sort.c +11 -11
  107. data/ext/cumo/narray/gen/tmpl/sort_index.c +18 -18
  108. data/ext/cumo/narray/gen/tmpl/store.c +6 -6
  109. data/ext/cumo/narray/gen/tmpl/store_array.c +19 -19
  110. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +12 -12
  111. data/ext/cumo/narray/gen/tmpl/store_bit.c +23 -23
  112. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +28 -28
  113. data/ext/cumo/narray/gen/tmpl/store_from.c +16 -16
  114. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +12 -12
  115. data/ext/cumo/narray/gen/tmpl/to_a.c +10 -10
  116. data/ext/cumo/narray/gen/tmpl/unary.c +25 -25
  117. data/ext/cumo/narray/gen/tmpl/unary2.c +17 -17
  118. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +15 -15
  119. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +13 -13
  120. data/ext/cumo/narray/gen/tmpl/unary_s.c +17 -17
  121. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +12 -12
  122. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +9 -9
  123. data/ext/cumo/narray/gen/tmpl_bit/aref.c +2 -2
  124. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +5 -5
  125. data/ext/cumo/narray/gen/tmpl_bit/aset.c +2 -2
  126. data/ext/cumo/narray/gen/tmpl_bit/binary.c +29 -29
  127. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +14 -14
  128. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +21 -21
  129. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +28 -28
  130. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +29 -29
  131. data/ext/cumo/narray/gen/tmpl_bit/each.c +10 -10
  132. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +10 -10
  133. data/ext/cumo/narray/gen/tmpl_bit/extract.c +8 -8
  134. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +8 -8
  135. data/ext/cumo/narray/gen/tmpl_bit/fill.c +17 -17
  136. data/ext/cumo/narray/gen/tmpl_bit/format.c +14 -14
  137. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +11 -11
  138. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +3 -3
  139. data/ext/cumo/narray/gen/tmpl_bit/mask.c +33 -33
  140. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +19 -19
  141. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +22 -22
  142. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +18 -18
  143. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +12 -12
  144. data/ext/cumo/narray/gen/tmpl_bit/unary.c +24 -24
  145. data/ext/cumo/narray/gen/tmpl_bit/where.c +16 -16
  146. data/ext/cumo/narray/gen/tmpl_bit/where2.c +20 -20
  147. data/ext/cumo/narray/index.c +213 -213
  148. data/ext/cumo/narray/math.c +27 -27
  149. data/ext/cumo/narray/narray.c +484 -484
  150. data/ext/cumo/narray/ndloop.c +259 -258
  151. data/ext/cumo/narray/rand.c +3 -3
  152. data/ext/cumo/narray/step.c +70 -70
  153. data/ext/cumo/narray/struct.c +139 -139
  154. metadata +6 -7
  155. data/ext/cumo/include/cumo/intern_fwd.h +0 -38
  156. data/lib/erbpp.rb +0 -294
  157. data/lib/erbpp/line_number.rb +0 -137
  158. data/lib/erbpp/narray_def.rb +0 -381
@@ -52,9 +52,9 @@ void <%="cumo_#{type_name}_mulsum#{nan}_reduce_kernel_launch"%>(char *p1, char *
52
52
 
53
53
  void <%="cumo_#{type_name}_mulsum#{nan}_kernel_launch"%>(char *p1, char *p2, char *p3, ssize_t s1, ssize_t s2, ssize_t s3, uint64_t n)
54
54
  {
55
- size_t gridDim = get_gridDim(n);
56
- size_t blockDim = get_blockDim(n);
57
- <%="cumo_#{type_name}_mulsum#{nan}_kernel"%><<<gridDim, blockDim>>>(p1,p2,p3,s1,s2,s3,n);
55
+ size_t grid_dim = cumo_get_grid_dim(n);
56
+ size_t block_dim = cumo_get_block_dim(n);
57
+ <%="cumo_#{type_name}_mulsum#{nan}_kernel"%><<<grid_dim, block_dim>>>(p1,p2,p3,s1,s2,s3,n);
58
58
  }
59
59
  //<% end %>
60
60
  <% end %>
@@ -2,12 +2,12 @@
2
2
 
3
3
  <% [64,32].each do |i| %>
4
4
  <% unless type_name == 'robject' %>
5
- void cumo_<%=type_name%>_<%=name%><%=nan%>_int<%=i%>_kernel_launch(na_reduction_arg_t* arg);
5
+ void cumo_<%=type_name%>_<%=name%><%=nan%>_int<%=i%>_kernel_launch(cumo_na_reduction_arg_t* arg);
6
6
  <% end %>
7
7
 
8
8
  #define idx_t int<%=i%>_t
9
9
  static void
10
- <%=c_iter%>_index<%=i%><%=nan%>(na_loop_t *const lp)
10
+ <%=c_iter%>_index<%=i%><%=nan%>(cumo_na_loop_t *const lp)
11
11
  {
12
12
  // TODO(sonots): Support nan in CUDA
13
13
  <% if type_name == 'robject' || nan == '_nan' %>
@@ -16,18 +16,18 @@ static void
16
16
  char *d_ptr, *i_ptr, *o_ptr;
17
17
  ssize_t d_step, i_step;
18
18
 
19
- INIT_COUNTER(lp, n);
20
- INIT_PTR(lp, 0, d_ptr, d_step);
21
- INIT_PTR(lp, 1, i_ptr, i_step);
22
- o_ptr = NDL_PTR(lp,2);
19
+ CUMO_INIT_COUNTER(lp, n);
20
+ CUMO_INIT_PTR(lp, 0, d_ptr, d_step);
21
+ CUMO_INIT_PTR(lp, 1, i_ptr, i_step);
22
+ o_ptr = CUMO_NDL_PTR(lp,2);
23
23
 
24
- SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=nan%>", "<%=type_name%>");
24
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=nan%>", "<%=type_name%>");
25
25
  idx = f_<%=name%><%=nan%>(n,d_ptr,d_step);
26
26
  *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
27
27
  }
28
28
  <% else %>
29
29
  {
30
- na_reduction_arg_t arg = na_make_reduction_arg(lp);
30
+ cumo_na_reduction_arg_t arg = cumo_na_make_reduction_arg(lp);
31
31
  cumo_<%=type_name%>_<%=name%><%=nan%>_int<%=i%>_kernel_launch(&arg);
32
32
  }
33
33
  <% end %>
@@ -53,67 +53,67 @@ static VALUE
53
53
  {
54
54
  //<% if type_name == 'robject' %>
55
55
  {
56
- narray_t *na;
56
+ cumo_narray_t *na;
57
57
  VALUE idx, reduce;
58
- ndfunc_arg_in_t ain[3] = {{Qnil,0},{Qnil,0},{sym_reduce,0}};
59
- ndfunc_arg_out_t aout[1] = {{0,0,0}};
60
- ndfunc_t ndf = {0, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT, 3,1, ain,aout};
58
+ cumo_ndfunc_arg_in_t ain[3] = {{Qnil,0},{Qnil,0},{cumo_sym_reduce,0}};
59
+ cumo_ndfunc_arg_out_t aout[1] = {{0,0,0}};
60
+ cumo_ndfunc_t ndf = {0, CUMO_STRIDE_LOOP_NIP|CUMO_NDF_FLAT_REDUCE|CUMO_NDF_EXTRACT, 3,1, ain,aout};
61
61
 
62
- GetNArray(self,na);
62
+ CumoGetNArray(self,na);
63
63
  if (na->ndim==0) {
64
64
  return INT2FIX(0);
65
65
  }
66
66
  if (na->size > (~(u_int32_t)0)) {
67
67
  aout[0].type = cumo_cInt64;
68
- idx = nary_new(cumo_cInt64, na->ndim, na->shape);
68
+ idx = cumo_na_new(cumo_cInt64, na->ndim, na->shape);
69
69
  ndf.func = <%=c_iter%>_index64;
70
70
  <% if is_float %>
71
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_index64_nan);
71
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_index64_nan);
72
72
  <% else %>
73
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
73
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
74
74
  <% end %>
75
75
  } else {
76
76
  aout[0].type = cumo_cInt32;
77
- idx = nary_new(cumo_cInt32, na->ndim, na->shape);
77
+ idx = cumo_na_new(cumo_cInt32, na->ndim, na->shape);
78
78
  ndf.func = <%=c_iter%>_index32;
79
79
  <% if is_float %>
80
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_index32_nan);
80
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_index32_nan);
81
81
  <% else %>
82
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
82
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
83
83
  <% end %>
84
84
  }
85
85
  rb_funcall(idx, rb_intern("seq"), 0);
86
86
 
87
- return na_ndloop(&ndf, 3, self, idx, reduce);
87
+ return cumo_na_ndloop(&ndf, 3, self, idx, reduce);
88
88
  }
89
89
  <% else %>
90
90
  {
91
- narray_t *na;
91
+ cumo_narray_t *na;
92
92
  VALUE reduce;
93
- ndfunc_arg_in_t ain[2] = {{Qnil,0},{sym_reduce,0}};
94
- ndfunc_arg_out_t aout[1] = {{0,0,0}};
95
- ndfunc_t ndf = {0, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT|NDF_INDEXER_LOOP, 2,1, ain,aout};
93
+ cumo_ndfunc_arg_in_t ain[2] = {{Qnil,0},{cumo_sym_reduce,0}};
94
+ cumo_ndfunc_arg_out_t aout[1] = {{0,0,0}};
95
+ cumo_ndfunc_t ndf = {0, CUMO_STRIDE_LOOP_NIP|CUMO_NDF_FLAT_REDUCE|CUMO_NDF_EXTRACT|CUMO_NDF_INDEXER_LOOP, 2,1, ain,aout};
96
96
 
97
- GetNArray(self,na);
97
+ CumoGetNArray(self,na);
98
98
  if (na->size > (~(u_int32_t)0)) {
99
99
  aout[0].type = cumo_cInt64;
100
100
  ndf.func = <%=c_iter%>_index64;
101
101
  <% if is_float %>
102
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_index64_nan);
102
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_index64_nan);
103
103
  <% else %>
104
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
104
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
105
105
  <% end %>
106
106
  } else {
107
107
  aout[0].type = cumo_cInt32;
108
108
  ndf.func = <%=c_iter%>_index32;
109
109
  <% if is_float %>
110
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_index32_nan);
110
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_index32_nan);
111
111
  <% else %>
112
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
112
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
113
113
  <% end %>
114
114
  }
115
115
 
116
- return na_ndloop(&ndf, 2, self, reduce);
116
+ return cumo_na_ndloop(&ndf, 2, self, reduce);
117
117
  }
118
118
  <% end %>
119
119
  }
@@ -49,12 +49,12 @@ extern "C" {
49
49
  #endif
50
50
  #endif
51
51
 
52
- void cumo_<%=type_name%>_min_index_int<%=i%>_kernel_launch(na_reduction_arg_t* arg)
52
+ void cumo_<%=type_name%>_min_index_int<%=i%>_kernel_launch(cumo_na_reduction_arg_t* arg)
53
53
  {
54
54
  cumo_reduce<dtype, idx_t, cumo_<%=type_name%>_min_index_int<%=i%>_impl>(*arg, cumo_<%=type_name%>_min_index_int<%=i%>_impl{});
55
55
  }
56
56
 
57
- void cumo_<%=type_name%>_max_index_int<%=i%>_kernel_launch(na_reduction_arg_t* arg)
57
+ void cumo_<%=type_name%>_max_index_int<%=i%>_kernel_launch(cumo_na_reduction_arg_t* arg)
58
58
  {
59
59
  cumo_reduce<dtype, idx_t, cumo_<%=type_name%>_max_index_int<%=i%>_impl>(*arg, cumo_<%=type_name%>_max_index_int<%=i%>_impl{});
60
60
  }
@@ -2,11 +2,11 @@
2
2
  <% $cumo_narray_gen_tmpl_accum_kernel_included = 1 %>
3
3
 
4
4
  <% if type_name.include?('int') %>
5
- <% f = File.join(File.dirname(__FILE__), 'real_accum_kernel.cu'); ERB.new(File.read(f)).tap {|erb| erb.filename = f }.result(binding) %>
5
+ <%= load_erb("real_accum").result(binding) %>
6
6
  <% elsif type_name.include?('float') %>
7
- <%= f = File.join(File.dirname(__FILE__), 'float_accum_kernel.cu'); ERB.new(File.read(f)).tap {|erb| erb.filename = f }.result(binding) %>
7
+ <%= load_erb("float_accum").result(binding) %>
8
8
  <% elsif type_name.include?('complex') %>
9
- <%= f = File.join(File.dirname(__FILE__), 'complex_accum_kernel.cu'); ERB.new(File.read(f)).tap {|erb| erb.filename = f }.result(binding) %>
9
+ <%= load_erb("complex_accum").result(binding) %>
10
10
  <% end %>
11
11
 
12
12
  <% end %>
@@ -1,14 +1,14 @@
1
1
  static size_t
2
2
  <%=type_name%>_memsize(const void* ptr)
3
3
  {
4
- size_t size = sizeof(narray_data_t);
5
- const narray_data_t *na = (const narray_data_t*)ptr;
4
+ size_t size = sizeof(cumo_narray_data_t);
5
+ const cumo_narray_data_t *na = (const cumo_narray_data_t*)ptr;
6
6
 
7
- assert(na->base.type == NARRAY_DATA_T);
7
+ assert(na->base.type == CUMO_NARRAY_DATA_T);
8
8
 
9
9
  if (na->ptr != NULL) {
10
10
  <% if is_bit %>
11
- size += ((na->base.size-1)/8/sizeof(BIT_DIGIT)+1)*sizeof(BIT_DIGIT);
11
+ size += ((na->base.size-1)/8/sizeof(CUMO_BIT_DIGIT)+1)*sizeof(CUMO_BIT_DIGIT);
12
12
  <% else %>
13
13
  size += na->base.size * sizeof(dtype);
14
14
  <% end %>
@@ -24,9 +24,9 @@ static size_t
24
24
  static void
25
25
  <%=type_name%>_free(void* ptr)
26
26
  {
27
- narray_data_t *na = (narray_data_t*)ptr;
27
+ cumo_narray_data_t *na = (cumo_narray_data_t*)ptr;
28
28
 
29
- assert(na->base.type == NARRAY_DATA_T);
29
+ assert(na->base.type == CUMO_NARRAY_DATA_T);
30
30
 
31
31
  if (na->ptr != NULL) {
32
32
  cumo_cuda_runtime_free(na->ptr);
@@ -41,7 +41,7 @@ static void
41
41
  xfree(na);
42
42
  }
43
43
 
44
- static narray_type_info_t <%=type_name%>_info = {
44
+ static cumo_narray_type_info_t <%=type_name%>_info = {
45
45
  <% if is_bit %>
46
46
  1, // element_bits
47
47
  0, // element_bytes
@@ -59,7 +59,7 @@ static void
59
59
  {
60
60
  size_t n, i;
61
61
  VALUE *a;
62
- narray_data_t *na = ptr;
62
+ cumo_narray_data_t *na = ptr;
63
63
 
64
64
  if (na->ptr) {
65
65
  a = (VALUE*)(na->ptr);
@@ -73,7 +73,7 @@ static void
73
73
  static const rb_data_type_t <%=type_name%>_data_type = {
74
74
  "<%=full_class_name%>",
75
75
  {<%=type_name%>_gc_mark, <%=type_name%>_free, <%=type_name%>_memsize,},
76
- &na_data_type,
76
+ &cumo_na_data_type,
77
77
  &<%=type_name%>_info,
78
78
  0, // flags
79
79
  };
@@ -83,7 +83,7 @@ static const rb_data_type_t <%=type_name%>_data_type = {
83
83
  static const rb_data_type_t <%=type_name%>_data_type = {
84
84
  "<%=full_class_name%>",
85
85
  {0, <%=type_name%>_free, <%=type_name%>_memsize,},
86
- &na_data_type,
86
+ &cumo_na_data_type,
87
87
  &<%=type_name%>_info,
88
88
  0, // flags
89
89
  };
@@ -93,12 +93,12 @@ static const rb_data_type_t <%=type_name%>_data_type = {
93
93
  static VALUE
94
94
  <%=c_func(0)%>(VALUE klass)
95
95
  {
96
- narray_data_t *na = ALLOC(narray_data_t);
96
+ cumo_narray_data_t *na = ALLOC(cumo_narray_data_t);
97
97
 
98
98
  na->base.ndim = 0;
99
- na->base.type = NARRAY_DATA_T;
100
- na->base.flag[0] = NA_FL0_INIT;
101
- na->base.flag[1] = NA_FL1_INIT;
99
+ na->base.type = CUMO_NARRAY_DATA_T;
100
+ na->base.flag[0] = CUMO_NA_FL0_INIT;
101
+ na->base.flag[1] = CUMO_NA_FL1_INIT;
102
102
  na->base.size = 0;
103
103
  na->base.shape = NULL;
104
104
  na->base.reduce = INT2FIX(0);
@@ -1,14 +1,14 @@
1
1
  static VALUE
2
2
  <%=c_func(0)%>(VALUE self)
3
3
  {
4
- narray_t *na;
4
+ cumo_narray_t *na;
5
5
  void *ptr;
6
6
 
7
- GetNArray(self,na);
7
+ CumoGetNArray(self,na);
8
8
 
9
- switch(NA_TYPE(na)) {
10
- case NARRAY_DATA_T:
11
- ptr = NA_DATA_PTR(na);
9
+ switch(CUMO_NA_TYPE(na)) {
10
+ case CUMO_NARRAY_DATA_T:
11
+ ptr = CUMO_NA_DATA_PTR(na);
12
12
  if (na->size > 0 && ptr == NULL) {
13
13
  <% if is_object %>
14
14
  ptr = xmalloc(sizeof(dtype) * na->size);
@@ -21,17 +21,17 @@ static VALUE
21
21
  <% else %>
22
22
  ptr = cumo_cuda_runtime_malloc(sizeof(dtype) * na->size);
23
23
  <% end %>
24
- NA_DATA_PTR(na) = ptr;
24
+ CUMO_NA_DATA_PTR(na) = ptr;
25
25
  }
26
26
  break;
27
- case NARRAY_VIEW_T:
28
- rb_funcall(NA_VIEW_DATA(na), rb_intern("allocate"), 0);
27
+ case CUMO_NARRAY_VIEW_T:
28
+ rb_funcall(CUMO_NA_VIEW_DATA(na), rb_intern("allocate"), 0);
29
29
  break;
30
- case NARRAY_FILEMAP_T:
31
- //ptr = ((narray_filemap_t*)na)->ptr;
30
+ case CUMO_NARRAY_FILEMAP_T:
31
+ //ptr = ((cumo_narray_filemap_t*)na)->ptr;
32
32
  // to be implemented
33
33
  default:
34
- rb_bug("invalid narray type : %d",NA_TYPE(na));
34
+ rb_bug("invalid narray type : %d",CUMO_NA_TYPE(na));
35
35
  }
36
36
  return self;
37
37
  }
@@ -60,7 +60,7 @@ static VALUE
60
60
  int result_nd;
61
61
  size_t pos;
62
62
 
63
- result_nd = na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
64
- return na_aref_main(argc, argv, self, 0, result_nd, pos);
63
+ result_nd = cumo_na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
64
+ return cumo_na_aref_main(argc, argv, self, 0, result_nd, pos);
65
65
  }
66
66
  }
@@ -38,12 +38,12 @@ static VALUE
38
38
  size_t pos;
39
39
  char *ptr;
40
40
 
41
- result_nd = na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
41
+ result_nd = cumo_na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
42
42
  if (result_nd) {
43
- return na_aref_main(argc, argv, self, 0, result_nd, pos);
43
+ return cumo_na_aref_main(argc, argv, self, 0, result_nd, pos);
44
44
  } else {
45
- ptr = na_get_pointer_for_read(self) + pos;
46
- SHOW_SYNCHRONIZE_WARNING_ONCE("<%=name%>", "<%=type_name%>");
45
+ ptr = cumo_na_get_pointer_for_read(self) + pos;
46
+ CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("<%=name%>", "<%=type_name%>");
47
47
  cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
48
48
  return m_extract(ptr);
49
49
  }
@@ -48,8 +48,8 @@ static VALUE
48
48
  if (argc==0) {
49
49
  <%=c_func.sub(/_aset/,"_store")%>(self, argv[argc]);
50
50
  } else {
51
- nd = na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
52
- a = na_aref_main(argc, argv, self, 0, nd, pos);
51
+ nd = cumo_na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
52
+ a = cumo_na_aref_main(argc, argv, self, 0, nd, pos);
53
53
  <%=c_func.sub(/_aset/,"_store")%>(a, argv[argc]);
54
54
  }
55
55
  return argv[argc];
@@ -12,11 +12,11 @@
12
12
  //<% end %>
13
13
 
14
14
  <% unless type_name == 'robject' %>
15
- void <%="cumo_#{c_iter}_kernel_launch"%>(na_iarray_t* a1, na_iarray_t* a2, na_iarray_t* a3, na_indexer_t* indexer);
15
+ void <%="cumo_#{c_iter}_kernel_launch"%>(cumo_na_iarray_t* a1, cumo_na_iarray_t* a2, cumo_na_iarray_t* a3, cumo_na_indexer_t* indexer);
16
16
  <% end %>
17
17
 
18
18
  static void
19
- <%=c_iter%>(na_loop_t *const lp)
19
+ <%=c_iter%>(cumo_na_loop_t *const lp)
20
20
  {
21
21
  <% if type_name == 'robject' %>
22
22
  {
@@ -24,16 +24,16 @@ static void
24
24
  char *p1, *p2, *p3;
25
25
  ssize_t s1, s2, s3;
26
26
 
27
- INIT_COUNTER(lp, n);
28
- INIT_PTR(lp, 0, p1, s1);
29
- INIT_PTR(lp, 1, p2, s2);
30
- INIT_PTR(lp, 2, p3, s3);
27
+ CUMO_INIT_COUNTER(lp, n);
28
+ CUMO_INIT_PTR(lp, 0, p1, s1);
29
+ CUMO_INIT_PTR(lp, 1, p2, s2);
30
+ CUMO_INIT_PTR(lp, 2, p3, s3);
31
31
 
32
- SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
32
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
33
33
  //<% if need_align %>
34
- if (is_aligned(p1,sizeof(dtype)) &&
35
- is_aligned(p2,sizeof(dtype)) &&
36
- is_aligned(p3,sizeof(dtype)) ) {
34
+ if (cumo_is_aligned(p1,sizeof(dtype)) &&
35
+ cumo_is_aligned(p2,sizeof(dtype)) &&
36
+ cumo_is_aligned(p3,sizeof(dtype)) ) {
37
37
 
38
38
  if (s1 == sizeof(dtype) &&
39
39
  s2 == sizeof(dtype) &&
@@ -52,9 +52,9 @@ static void
52
52
  }
53
53
  return;
54
54
  }
55
- if (is_aligned_step(s1,sizeof(dtype)) &&
56
- is_aligned_step(s2,sizeof(dtype)) &&
57
- is_aligned_step(s3,sizeof(dtype)) ) {
55
+ if (cumo_is_aligned_step(s1,sizeof(dtype)) &&
56
+ cumo_is_aligned_step(s2,sizeof(dtype)) &&
57
+ cumo_is_aligned_step(s3,sizeof(dtype)) ) {
58
58
  //<% end %>
59
59
 
60
60
  if (s2 == 0){ // Broadcasting from scalar value.
@@ -102,20 +102,20 @@ static void
102
102
  }
103
103
  for (i=0; i<n; i++) {
104
104
  dtype x, y, z;
105
- GET_DATA_STRIDE(p1,s1,dtype,x);
106
- GET_DATA_STRIDE(p2,s2,dtype,y);
105
+ CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
106
+ CUMO_GET_DATA_STRIDE(p2,s2,dtype,y);
107
107
  check_intdivzero(y);
108
108
  z = m_<%=name%>(x,y);
109
- SET_DATA_STRIDE(p3,s3,dtype,z);
109
+ CUMO_SET_DATA_STRIDE(p3,s3,dtype,z);
110
110
  }
111
111
  //<% end %>
112
112
  }
113
113
  <% else %>
114
114
  {
115
- na_iarray_t a1 = na_make_iarray(&lp->args[0]);
116
- na_iarray_t a2 = na_make_iarray(&lp->args[1]);
117
- na_iarray_t a3 = na_make_iarray(&lp->args[2]);
118
- na_indexer_t indexer = na_make_indexer(&lp->args[0]);
115
+ cumo_na_iarray_t a1 = cumo_na_make_iarray(&lp->args[0]);
116
+ cumo_na_iarray_t a2 = cumo_na_make_iarray(&lp->args[1]);
117
+ cumo_na_iarray_t a3 = cumo_na_make_iarray(&lp->args[2]);
118
+ cumo_na_indexer_t indexer = cumo_na_make_indexer(&lp->args[0]);
119
119
 
120
120
  <%="cumo_#{c_iter}_kernel_launch"%>(&a1,&a2,&a3,&indexer);
121
121
  }
@@ -126,15 +126,15 @@ static void
126
126
  static VALUE
127
127
  <%=c_func%>_self(VALUE self, VALUE other)
128
128
  {
129
- ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}};
130
- ndfunc_arg_out_t aout[1] = {{cT,0}};
129
+ cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}};
130
+ cumo_ndfunc_arg_out_t aout[1] = {{cT,0}};
131
131
  <% if type_name == 'robject' %>
132
- ndfunc_t ndf = { <%=c_iter%>, STRIDE_LOOP, 2, 1, ain, aout };
132
+ cumo_ndfunc_t ndf = { <%=c_iter%>, CUMO_STRIDE_LOOP, 2, 1, ain, aout };
133
133
  <% else %>
134
- ndfunc_t ndf = { <%=c_iter%>, STRIDE_LOOP|NDF_INDEXER_LOOP, 2, 1, ain, aout };
134
+ cumo_ndfunc_t ndf = { <%=c_iter%>, CUMO_STRIDE_LOOP|CUMO_NDF_INDEXER_LOOP, 2, 1, ain, aout };
135
135
  <% end %>
136
136
 
137
- return na_ndloop(&ndf, 2, self, other);
137
+ return cumo_na_ndloop(&ndf, 2, self, other);
138
138
  }
139
139
 
140
140
  /*
@@ -151,12 +151,12 @@ static VALUE
151
151
  <% else %>
152
152
  VALUE klass, v;
153
153
 
154
- klass = na_upcast(CLASS_OF(self),CLASS_OF(other));
154
+ klass = cumo_na_upcast(rb_obj_class(self),rb_obj_class(other));
155
155
  if (klass==cT) {
156
156
  return <%=c_func%>_self(self, other);
157
157
  } else {
158
- v = rb_funcall(klass, id_cast, 1, self);
159
- return rb_funcall(v, <%=id_op%>, 1, other);
158
+ v = rb_funcall(klass, cumo_id_cast, 1, self);
159
+ return rb_funcall(v, <%=cumo_id_op%>, 1, other);
160
160
  }
161
161
  <% end %>
162
162
  }