cumo 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (158) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/3rd_party/LICENSE.txt +60 -0
  4. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +13 -1
  5. data/LICENSE.txt +1 -62
  6. data/README.md +33 -29
  7. data/bench/cumo_bench.rb +47 -25
  8. data/bench/numo_bench.rb +27 -25
  9. data/docs/src-tree.md +16 -0
  10. data/ext/cumo/cuda/cublas.c +69 -219
  11. data/ext/cumo/cuda/memory_pool_impl.hpp +1 -0
  12. data/ext/cumo/cuda/runtime.c +2 -14
  13. data/ext/cumo/cumo.c +16 -16
  14. data/ext/cumo/include/cumo.h +2 -2
  15. data/ext/cumo/include/cumo/cuda/cublas.h +6 -129
  16. data/ext/cumo/include/cumo/cuda/runtime.h +16 -0
  17. data/ext/cumo/include/cumo/indexer.h +46 -63
  18. data/ext/cumo/include/cumo/intern.h +58 -112
  19. data/ext/cumo/include/cumo/narray.h +214 -185
  20. data/ext/cumo/include/cumo/narray_kernel.h +66 -37
  21. data/ext/cumo/include/cumo/ndloop.h +42 -42
  22. data/ext/cumo/include/cumo/reduce_kernel.h +55 -71
  23. data/ext/cumo/include/cumo/template.h +56 -51
  24. data/ext/cumo/include/cumo/template_kernel.h +31 -31
  25. data/ext/cumo/include/cumo/types/bit.h +3 -3
  26. data/ext/cumo/include/cumo/types/bit_kernel.h +2 -2
  27. data/ext/cumo/include/cumo/types/complex.h +126 -126
  28. data/ext/cumo/include/cumo/types/complex_kernel.h +126 -126
  29. data/ext/cumo/include/cumo/types/complex_macro.h +28 -28
  30. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +20 -20
  31. data/ext/cumo/include/cumo/types/dcomplex.h +5 -5
  32. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +1 -1
  33. data/ext/cumo/include/cumo/types/int_macro.h +1 -1
  34. data/ext/cumo/include/cumo/types/int_macro_kernel.h +1 -1
  35. data/ext/cumo/include/cumo/types/robj_macro.h +30 -30
  36. data/ext/cumo/include/cumo/types/scomplex.h +5 -5
  37. data/ext/cumo/include/cumo/types/scomplex_kernel.h +1 -1
  38. data/ext/cumo/narray/array.c +143 -143
  39. data/ext/cumo/narray/data.c +184 -184
  40. data/ext/cumo/narray/gen/cogen.rb +5 -2
  41. data/ext/cumo/narray/gen/cogen_kernel.rb +5 -2
  42. data/ext/cumo/narray/gen/def/dcomplex.rb +1 -1
  43. data/ext/cumo/narray/gen/def/scomplex.rb +1 -1
  44. data/ext/cumo/narray/gen/erbln.rb +132 -0
  45. data/ext/cumo/narray/gen/erbpp2.rb +18 -13
  46. data/ext/cumo/narray/gen/narray_def.rb +3 -3
  47. data/ext/cumo/narray/gen/spec.rb +2 -2
  48. data/ext/cumo/narray/gen/tmpl/accum.c +15 -15
  49. data/ext/cumo/narray/gen/tmpl/accum_binary.c +22 -22
  50. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +3 -3
  51. data/ext/cumo/narray/gen/tmpl/accum_index.c +30 -30
  52. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +2 -2
  53. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +3 -3
  54. data/ext/cumo/narray/gen/tmpl/alloc_func.c +14 -14
  55. data/ext/cumo/narray/gen/tmpl/allocate.c +11 -11
  56. data/ext/cumo/narray/gen/tmpl/aref.c +2 -2
  57. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +4 -4
  58. data/ext/cumo/narray/gen/tmpl/aset.c +2 -2
  59. data/ext/cumo/narray/gen/tmpl/binary.c +28 -28
  60. data/ext/cumo/narray/gen/tmpl/binary2.c +18 -18
  61. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +3 -3
  62. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +6 -6
  63. data/ext/cumo/narray/gen/tmpl/binary_s.c +13 -13
  64. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +3 -3
  65. data/ext/cumo/narray/gen/tmpl/bincount.c +23 -23
  66. data/ext/cumo/narray/gen/tmpl/cast.c +7 -7
  67. data/ext/cumo/narray/gen/tmpl/cast_array.c +3 -3
  68. data/ext/cumo/narray/gen/tmpl/clip.c +38 -38
  69. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +2 -2
  70. data/ext/cumo/narray/gen/tmpl/cond_binary.c +19 -19
  71. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +7 -7
  72. data/ext/cumo/narray/gen/tmpl/cond_unary.c +15 -15
  73. data/ext/cumo/narray/gen/tmpl/cum.c +15 -15
  74. data/ext/cumo/narray/gen/tmpl/each.c +9 -9
  75. data/ext/cumo/narray/gen/tmpl/each_with_index.c +9 -9
  76. data/ext/cumo/narray/gen/tmpl/ewcomp.c +15 -15
  77. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +3 -3
  78. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +5 -5
  79. data/ext/cumo/narray/gen/tmpl/extract_data.c +12 -12
  80. data/ext/cumo/narray/gen/tmpl/eye.c +9 -9
  81. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +3 -3
  82. data/ext/cumo/narray/gen/tmpl/fill.c +9 -9
  83. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +6 -6
  84. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +1 -1
  85. data/ext/cumo/narray/gen/tmpl/format.c +11 -11
  86. data/ext/cumo/narray/gen/tmpl/format_to_a.c +8 -8
  87. data/ext/cumo/narray/gen/tmpl/frexp.c +13 -13
  88. data/ext/cumo/narray/gen/tmpl/gemm.c +252 -108
  89. data/ext/cumo/narray/gen/tmpl/inspect.c +1 -1
  90. data/ext/cumo/narray/gen/tmpl/lib.c +2 -2
  91. data/ext/cumo/narray/gen/tmpl/logseq.c +7 -7
  92. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +6 -6
  93. data/ext/cumo/narray/gen/tmpl/map_with_index.c +17 -17
  94. data/ext/cumo/narray/gen/tmpl/median.c +10 -10
  95. data/ext/cumo/narray/gen/tmpl/minmax.c +10 -10
  96. data/ext/cumo/narray/gen/tmpl/new_dim0.c +3 -3
  97. data/ext/cumo/narray/gen/tmpl/poly.c +6 -6
  98. data/ext/cumo/narray/gen/tmpl/pow.c +28 -28
  99. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +6 -6
  100. data/ext/cumo/narray/gen/tmpl/rand.c +10 -10
  101. data/ext/cumo/narray/gen/tmpl/rand_norm.c +7 -7
  102. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +6 -6
  103. data/ext/cumo/narray/gen/tmpl/seq.c +7 -7
  104. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +6 -6
  105. data/ext/cumo/narray/gen/tmpl/set2.c +20 -20
  106. data/ext/cumo/narray/gen/tmpl/sort.c +11 -11
  107. data/ext/cumo/narray/gen/tmpl/sort_index.c +18 -18
  108. data/ext/cumo/narray/gen/tmpl/store.c +6 -6
  109. data/ext/cumo/narray/gen/tmpl/store_array.c +19 -19
  110. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +12 -12
  111. data/ext/cumo/narray/gen/tmpl/store_bit.c +23 -23
  112. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +28 -28
  113. data/ext/cumo/narray/gen/tmpl/store_from.c +16 -16
  114. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +12 -12
  115. data/ext/cumo/narray/gen/tmpl/to_a.c +10 -10
  116. data/ext/cumo/narray/gen/tmpl/unary.c +25 -25
  117. data/ext/cumo/narray/gen/tmpl/unary2.c +17 -17
  118. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +15 -15
  119. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +13 -13
  120. data/ext/cumo/narray/gen/tmpl/unary_s.c +17 -17
  121. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +12 -12
  122. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +9 -9
  123. data/ext/cumo/narray/gen/tmpl_bit/aref.c +2 -2
  124. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +5 -5
  125. data/ext/cumo/narray/gen/tmpl_bit/aset.c +2 -2
  126. data/ext/cumo/narray/gen/tmpl_bit/binary.c +29 -29
  127. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +14 -14
  128. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +21 -21
  129. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +28 -28
  130. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +29 -29
  131. data/ext/cumo/narray/gen/tmpl_bit/each.c +10 -10
  132. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +10 -10
  133. data/ext/cumo/narray/gen/tmpl_bit/extract.c +8 -8
  134. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +8 -8
  135. data/ext/cumo/narray/gen/tmpl_bit/fill.c +17 -17
  136. data/ext/cumo/narray/gen/tmpl_bit/format.c +14 -14
  137. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +11 -11
  138. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +3 -3
  139. data/ext/cumo/narray/gen/tmpl_bit/mask.c +33 -33
  140. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +19 -19
  141. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +22 -22
  142. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +18 -18
  143. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +12 -12
  144. data/ext/cumo/narray/gen/tmpl_bit/unary.c +24 -24
  145. data/ext/cumo/narray/gen/tmpl_bit/where.c +16 -16
  146. data/ext/cumo/narray/gen/tmpl_bit/where2.c +20 -20
  147. data/ext/cumo/narray/index.c +213 -213
  148. data/ext/cumo/narray/math.c +27 -27
  149. data/ext/cumo/narray/narray.c +484 -484
  150. data/ext/cumo/narray/ndloop.c +259 -258
  151. data/ext/cumo/narray/rand.c +3 -3
  152. data/ext/cumo/narray/step.c +70 -70
  153. data/ext/cumo/narray/struct.c +139 -139
  154. metadata +6 -7
  155. data/ext/cumo/include/cumo/intern_fwd.h +0 -38
  156. data/lib/erbpp.rb +0 -294
  157. data/lib/erbpp/line_number.rb +0 -137
  158. data/lib/erbpp/narray_def.rb +0 -381
@@ -10,8 +10,8 @@ extern "C" {
10
10
  #endif
11
11
  #endif
12
12
 
13
- #define CUMO_VERSION "0.1.0"
14
- #define CUMO_VERSION_CODE 10
13
+ #define CUMO_VERSION "0.1.1"
14
+ #define CUMO_VERSION_CODE 11
15
15
 
16
16
  bool cumo_compatible_mode_enabled_p();
17
17
 
@@ -11,137 +11,14 @@ extern "C" {
11
11
  #endif
12
12
  #endif
13
13
 
14
- #define option_value cumo_cublas_option_value
15
- extern VALUE cumo_cublas_option_value(VALUE value, VALUE default_value);
14
+ void
15
+ cumo_cuda_cublas_check_status(cublasStatus_t status);
16
16
 
17
- //#define option_order cumo_cublas_option_order
18
- //extern enum CBLAS_ORDER cumo_cublas_option_order(VALUE order);
17
+ cublasHandle_t
18
+ cumo_cuda_cublas_handle();
19
19
 
20
- #define option_trans cumo_cublas_option_trans
21
- extern cublasOperation_t cumo_cublas_option_trans(VALUE trans);
22
-
23
- #define option_uplo cumo_cublas_option_uplo
24
- extern cublasFillMode_t cumo_cublas_option_uplo(VALUE uplo);
25
-
26
- #define option_diag cumo_cublas_option_diag
27
- extern cublasDiagType_t cumo_cublas_option_diag(VALUE diag);
28
-
29
- #define option_side cumo_cublas_option_side
30
- extern cublasSideMode_t cumo_cublas_option_side(VALUE side);
31
-
32
- //#define check_func cumo_cublas_check_func
33
- //extern void cumo_cublas_check_func(void **func, const char *name);
34
-
35
- // TODO: Check if a and b are row_major?
36
- #define SWAP_IFROW(a,b,tmp) \
37
- {(tmp)=(a);(a)=(b);(b)=(tmp);}
38
-
39
- #define SWAP_IFTR(trans,a,b,tmp) \
40
- { if ((trans)!=CUBLAS_OP_N) \
41
- {(tmp)=(a);(a)=(b);(b)=(tmp);} \
42
- }
43
-
44
- /*
45
- //#define SWAP_IFCOLTR(order,trans,a,b,tmp) \
46
- // { if (((order)==CblasRowMajor && (trans)!=CblasNoTrans) || \
47
- // ((order)!=CblasRowMajor && (trans)==CblasNoTrans)) \
48
- // {(tmp)=(a);(a)=(b);(b)=(tmp);} \
49
- // }
50
-
51
- //#define SWAP_IFCOL(order,a,b,tmp) \
52
- // { if ((order)==CblasColMajor) {(tmp)=(a);(a)=(b);(b)=(tmp);} }
53
- //
54
- //#define SWAP_IFROW(order,a,b,tmp) \
55
- // { if ((order)==CblasRowMajor) {(tmp)=(a);(a)=(b);(b)=(tmp);} }
56
- //
57
- //#define SWAP_IFCOLTR(order,trans,a,b,tmp) \
58
- // { if (((order)==CblasRowMajor && (trans)!=CblasNoTrans) || \
59
- // ((order)!=CblasRowMajor && (trans)==CblasNoTrans)) \
60
- // {(tmp)=(a);(a)=(b);(b)=(tmp);} \
61
- // }
62
- //
63
- //#define CHECK_FUNC(fptr, fname) \
64
- // { if ((fptr)==0) { check_func((void*)(&(fptr)),fname); } }
65
- */
66
-
67
- #define ROW_SIZE(na) ((na)->shape[(na)->ndim-2])
68
- #define COL_SIZE(na) ((na)->shape[(na)->ndim-1])
69
-
70
- #define CHECK_NARRAY_TYPE(x,t) \
71
- if (CLASS_OF(x)!=(t)) { \
72
- rb_raise(rb_eTypeError,"invalid NArray type (class)"); \
73
- }
74
-
75
- // Error Class ??
76
- #define CHECK_DIM_GE(na,nd) \
77
- if ((na)->ndim<(nd)) { \
78
- rb_raise(nary_eShapeError, \
79
- "n-dimension=%d, but >=%d is expected", \
80
- (na)->ndim, (nd)); \
81
- }
82
-
83
- #define CHECK_DIM_EQ(na1,nd) \
84
- if ((na1)->ndim != (nd)) { \
85
- rb_raise(nary_eShapeError, \
86
- "dimention mismatch: %d != %d", \
87
- (na1)->ndim, (nd)); \
88
- }
89
-
90
- #define CHECK_SQUARE(name,na) \
91
- if ((na)->shape[(na)->ndim-1] != (na)->shape[(na)->ndim-2]) { \
92
- rb_raise(nary_eShapeError,"%s is not square matrix",name); \
93
- }
94
-
95
- #define CHECK_SIZE_GE(na,sz) \
96
- if ((na)->size < (size_t)(sz)) { \
97
- rb_raise(nary_eShapeError, \
98
- "NArray size must be >= %"SZF"u",(size_t)(sz));\
99
- }
100
- #define CHECK_NON_EMPTY(na) \
101
- if ((na)->size==0) { \
102
- rb_raise(nary_eShapeError,"empty NArray"); \
103
- }
104
-
105
- #define CHECK_SIZE_EQ(n,m) \
106
- if ((n)!=(m)) { \
107
- rb_raise(nary_eShapeError, \
108
- "size mismatch: %"SZF"d != %"SZF"d", \
109
- (size_t)(n),(size_t)(m)); \
110
- }
111
-
112
- #define CHECK_SAME_SHAPE(na1,na2) \
113
- { int i; \
114
- CHECK_DIM_EQ(na1,na2->ndim); \
115
- for (i=0; i<na1->ndim; i++) { \
116
- CHECK_SIZE_EQ(na1->shape[i],na2->shape[i]); \
117
- } \
118
- }
119
-
120
- #define CHECK_INT_EQ(sm,m,sn,n) \
121
- if ((m) != (n)) { \
122
- rb_raise(nary_eShapeError, \
123
- "%s must be == %s: %s=%d %s=%d", \
124
- sm,sn,sm,m,sn,n); \
125
- }
126
-
127
- // Error Class ??
128
- #define CHECK_LEADING_GE(sld,ld,sn,n) \
129
- if ((ld) < (n)) { \
130
- rb_raise(nary_eShapeError, \
131
- "%s must be >= max(%s,1): %s=%d %s=%d", \
132
- sld,sn,sld,ld,sn,n); \
133
- }
134
-
135
- #define COPY_OR_CAST_TO(a,T) \
136
- { \
137
- if (CLASS_OF(a) == (T)) { \
138
- if (!TEST_INPLACE(a)) { \
139
- a = na_copy(a); \
140
- } \
141
- } else { \
142
- a = rb_funcall(T,rb_intern("cast"),1,a); \
143
- } \
144
- }
20
+ VALUE
21
+ cumo_cuda_cublas_option_value(VALUE value, VALUE default_value);
145
22
 
146
23
  #if defined(__cplusplus)
147
24
  #if 0
@@ -21,6 +21,22 @@ cumo_cuda_runtime_check_status(cudaError_t status)
21
21
  }
22
22
  }
23
23
 
24
+ static inline int
25
+ cumo_cuda_runtime_get_device_count()
26
+ {
27
+ int device_count;
28
+ cumo_cuda_runtime_check_status(cudaGetDeviceCount(&device_count));
29
+ return device_count;
30
+ }
31
+
32
+ static inline int
33
+ cumo_cuda_runtime_get_device()
34
+ {
35
+ int device;
36
+ cumo_cuda_runtime_check_status(cudaGetDevice(&device));
37
+ return device;
38
+ }
39
+
24
40
  static inline bool
25
41
  cumo_cuda_runtime_is_device_memory(void* ptr)
26
42
  {
@@ -1,11 +1,6 @@
1
1
  #ifndef CUMO_INDEXER_H
2
2
  #define CUMO_INDEXER_H
3
3
 
4
- /* Add cumo_ prefix */
5
- #define na_indexer_t cumo_na_indexer_t
6
- #define na_iarray_t cumo_na_iarray_t
7
- #define na_reduction_arg_t cumo_na_reduction_arg_t
8
-
9
4
  #ifndef __CUDACC__
10
5
  #include "cumo/narray.h"
11
6
  #include "cumo/ndloop.h"
@@ -21,10 +16,10 @@
21
16
  typedef struct {
22
17
  unsigned char ndim; // # of dimensions
23
18
  size_t total_size; // # of total elements
24
- size_t shape[NA_MAX_DIMENSION]; // # of elements for each dimension
25
- uint64_t index[NA_MAX_DIMENSION]; // indicies for each dimension
19
+ size_t shape[CUMO_NA_MAX_DIMENSION]; // # of elements for each dimension
20
+ uint64_t index[CUMO_NA_MAX_DIMENSION]; // indicies for each dimension
26
21
  uint64_t raw_index;
27
- } na_indexer_t;
22
+ } cumo_na_indexer_t;
28
23
 
29
24
  /* A structure to get data address with indexer.
30
25
  *
@@ -32,24 +27,23 @@ typedef struct {
32
27
  */
33
28
  typedef struct {
34
29
  char* ptr;
35
- ssize_t step[NA_MAX_DIMENSION]; // or strides
36
- } na_iarray_t;
30
+ ssize_t step[CUMO_NA_MAX_DIMENSION]; // or strides
31
+ } cumo_na_iarray_t;
37
32
 
38
33
  typedef struct {
39
- na_iarray_t in;
40
- na_iarray_t out;
41
- na_indexer_t in_indexer;
42
- na_indexer_t out_indexer;
43
- na_indexer_t reduce_indexer;
44
- } na_reduction_arg_t;
34
+ cumo_na_iarray_t in;
35
+ cumo_na_iarray_t out;
36
+ cumo_na_indexer_t in_indexer;
37
+ cumo_na_indexer_t out_indexer;
38
+ } cumo_na_reduction_arg_t;
45
39
 
46
40
  #ifndef __CUDACC__
47
- extern int na_debug_flag; // narray.c
41
+ extern int cumo_na_debug_flag; // narray.c
48
42
 
49
43
  static void
50
- print_na_indexer_t(na_indexer_t* indexer)
44
+ print_cumo_na_indexer_t(cumo_na_indexer_t* indexer)
51
45
  {
52
- printf("na_indexer_t = 0x%"SZF"x {\n", (size_t)indexer);
46
+ printf("cumo_na_indexer_t = 0x%"SZF"x {\n", (size_t)indexer);
53
47
  printf(" ndim = %d\n", indexer->ndim);
54
48
  printf(" total_size = %ld\n", indexer->total_size);
55
49
  printf(" shape = 0x%"SZF"x\n", (size_t)indexer->shape);
@@ -60,9 +54,9 @@ print_na_indexer_t(na_indexer_t* indexer)
60
54
  }
61
55
 
62
56
  static void
63
- print_na_iarray_t(na_iarray_t* iarray, unsigned char ndim)
57
+ print_cumo_na_iarray_t(cumo_na_iarray_t* iarray, unsigned char ndim)
64
58
  {
65
- printf("na_iarray_t = 0x%"SZF"x {\n", (size_t)iarray);
59
+ printf("cumo_na_iarray_t = 0x%"SZF"x {\n", (size_t)iarray);
66
60
  printf(" ptr = 0x%"SZF"x\n", (size_t)iarray->ptr);
67
61
  printf(" step = 0x%"SZF"x\n", (size_t)iarray->step);
68
62
  for (int i = 0; i < ndim; ++i) {
@@ -72,27 +66,25 @@ print_na_iarray_t(na_iarray_t* iarray, unsigned char ndim)
72
66
  }
73
67
 
74
68
  static void
75
- print_na_reduction_arg_t(na_reduction_arg_t* arg)
69
+ print_cumo_na_reduction_arg_t(cumo_na_reduction_arg_t* arg)
76
70
  {
77
- printf("na_reduction_arg_t = 0x%"SZF"x {\n", (size_t)arg);
71
+ printf("cumo_na_reduction_arg_t = 0x%"SZF"x {\n", (size_t)arg);
78
72
  printf("--in--\n");
79
- print_na_iarray_t(&arg->in, arg->in_indexer.ndim);
73
+ print_cumo_na_iarray_t(&arg->in, arg->in_indexer.ndim);
80
74
  printf("--out--\n");
81
- print_na_iarray_t(&arg->out, arg->out_indexer.ndim);
75
+ print_cumo_na_iarray_t(&arg->out, arg->out_indexer.ndim);
82
76
  printf("--in_indexer--\n");
83
- print_na_indexer_t(&arg->in_indexer);
77
+ print_cumo_na_indexer_t(&arg->in_indexer);
84
78
  printf("--out_indexer--\n");
85
- print_na_indexer_t(&arg->out_indexer);
86
- printf("--reduce_indexer--\n");
87
- print_na_indexer_t(&arg->reduce_indexer);
79
+ print_cumo_na_indexer_t(&arg->out_indexer);
88
80
  printf("}\n");
89
81
  }
90
82
 
91
- // Note that you, then, have to call na_indexer_set to create index[]
92
- static na_indexer_t
93
- na_make_indexer(na_loop_args_t* arg)
83
+ // Note that you, then, have to call cumo_na_indexer_set to create index[]
84
+ static cumo_na_indexer_t
85
+ cumo_na_make_indexer(cumo_na_loop_args_t* arg)
94
86
  {
95
- na_indexer_t indexer;
87
+ cumo_na_indexer_t indexer;
96
88
  indexer.ndim = arg->ndim;
97
89
  indexer.total_size = 1;
98
90
  for (int i = 0; i < arg->ndim; ++i) {
@@ -102,10 +94,10 @@ na_make_indexer(na_loop_args_t* arg)
102
94
  return indexer;
103
95
  }
104
96
 
105
- static na_iarray_t
106
- na_make_iarray_given_ndim(na_loop_args_t* arg, int ndim)
97
+ static cumo_na_iarray_t
98
+ cumo_na_make_iarray_given_ndim(cumo_na_loop_args_t* arg, int ndim)
107
99
  {
108
- na_iarray_t iarray;
100
+ cumo_na_iarray_t iarray;
109
101
  iarray.ptr = arg->ptr + arg->iter[0].pos;
110
102
  for (int idim = ndim; --idim >= 0;) {
111
103
  iarray.step[idim] = arg->iter[idim].step;
@@ -113,16 +105,16 @@ na_make_iarray_given_ndim(na_loop_args_t* arg, int ndim)
113
105
  return iarray;
114
106
  }
115
107
 
116
- static na_iarray_t
117
- na_make_iarray(na_loop_args_t* arg)
108
+ static cumo_na_iarray_t
109
+ cumo_na_make_iarray(cumo_na_loop_args_t* arg)
118
110
  {
119
- return na_make_iarray_given_ndim(arg, arg->ndim);
111
+ return cumo_na_make_iarray_given_ndim(arg, arg->ndim);
120
112
  }
121
113
 
122
- static na_reduction_arg_t
123
- na_make_reduction_arg(na_loop_t* lp_user)
114
+ static cumo_na_reduction_arg_t
115
+ cumo_na_make_reduction_arg(cumo_na_loop_t* lp_user)
124
116
  {
125
- na_reduction_arg_t arg;
117
+ cumo_na_reduction_arg_t arg;
126
118
  int i;
127
119
  int in_ndim = lp_user->args[0].ndim;
128
120
 
@@ -131,33 +123,24 @@ na_make_reduction_arg(na_loop_t* lp_user)
131
123
  // out shape = (2, 4, 6)
132
124
  // reduce shape = (3, 5)
133
125
 
134
- arg.in = na_make_iarray(&lp_user->args[0]);
135
- arg.in_indexer = na_make_indexer(&lp_user->args[0]);
126
+ arg.in = cumo_na_make_iarray(&lp_user->args[0]);
127
+ arg.in_indexer = cumo_na_make_indexer(&lp_user->args[0]);
136
128
 
137
- arg.reduce_indexer.ndim = 0;
138
- arg.reduce_indexer.total_size = 1;
139
129
  arg.out_indexer.ndim = 0;
140
130
  arg.out_indexer.total_size = 1;
141
131
  for (i = 0; i < in_ndim; ++i) {
142
- if (na_test_reduce(lp_user->reduce, i)) {
143
- arg.reduce_indexer.shape[arg.reduce_indexer.ndim] = arg.in_indexer.shape[i];
144
- arg.reduce_indexer.total_size *= arg.in_indexer.shape[i];
145
- ++arg.reduce_indexer.ndim;
146
- } else {
132
+ if (!cumo_na_test_reduce(lp_user->reduce, i)) {
147
133
  arg.out_indexer.shape[arg.out_indexer.ndim] = arg.in_indexer.shape[i];
148
134
  arg.out_indexer.total_size *= arg.in_indexer.shape[i];
149
135
  ++arg.out_indexer.ndim;
150
136
  }
151
137
  }
152
- arg.out = na_make_iarray_given_ndim(&lp_user->args[1], arg.out_indexer.ndim);
138
+ arg.out = cumo_na_make_iarray_given_ndim(&lp_user->args[1], arg.out_indexer.ndim);
153
139
 
154
- if (na_debug_flag) {
155
- print_na_reduction_arg_t(&arg);
140
+ if (cumo_na_debug_flag) {
141
+ print_cumo_na_reduction_arg_t(&arg);
156
142
  }
157
143
 
158
- assert(arg.reduce_indexer.ndim == lp_user->reduce_dim);
159
- assert(arg.in_indexer.ndim == arg.reduce_indexer.ndim + arg.out_indexer.ndim);
160
-
161
144
  return arg;
162
145
  }
163
146
 
@@ -169,7 +152,7 @@ na_make_reduction_arg(na_loop_t* lp_user)
169
152
 
170
153
  __host__ __device__
171
154
  static inline void
172
- cumo_na_indexer_set_dim(na_indexer_t* indexer, uint64_t i) {
155
+ cumo_na_indexer_set_dim(cumo_na_indexer_t* indexer, uint64_t i) {
173
156
  indexer->raw_index = i;
174
157
  for (int j = indexer->ndim; --j >= 0;) {
175
158
  indexer->index[j] = i % indexer->shape[j];
@@ -181,7 +164,7 @@ cumo_na_indexer_set_dim(na_indexer_t* indexer, uint64_t i) {
181
164
  #define CUMO_NA_INDEXER_SET(NDIM) \
182
165
  __host__ __device__ \
183
166
  static inline void \
184
- cumo_na_indexer_set_dim##NDIM(na_indexer_t* indexer, uint64_t i) { \
167
+ cumo_na_indexer_set_dim##NDIM(cumo_na_indexer_t* indexer, uint64_t i) { \
185
168
  indexer->raw_index = i; \
186
169
  for (int j = NDIM; --j >= 0;) { \
187
170
  indexer->index[j] = i % indexer->shape[j]; \
@@ -196,13 +179,13 @@ CUMO_NA_INDEXER_SET(0)
196
179
 
197
180
  __host__ __device__
198
181
  static inline void
199
- cumo_na_indexer_set_dim1(na_indexer_t* indexer, uint64_t i) {
182
+ cumo_na_indexer_set_dim1(cumo_na_indexer_t* indexer, uint64_t i) {
200
183
  indexer->raw_index = i;
201
184
  }
202
185
 
203
186
  __host__ __device__
204
187
  static inline char*
205
- cumo_na_iarray_at_dim(na_iarray_t* iarray, na_indexer_t* indexer) {
188
+ cumo_na_iarray_at_dim(cumo_na_iarray_t* iarray, cumo_na_indexer_t* indexer) {
206
189
  char* ptr = iarray->ptr;
207
190
  for (int idim = 0; idim < indexer->ndim; ++idim) {
208
191
  ptr += iarray->step[idim] * indexer->index[idim];
@@ -214,7 +197,7 @@ cumo_na_iarray_at_dim(na_iarray_t* iarray, na_indexer_t* indexer) {
214
197
  #define CUMO_NA_IARRAY_AT(NDIM) \
215
198
  __host__ __device__ \
216
199
  static inline char* \
217
- cumo_na_iarray_at_dim##NDIM(na_iarray_t* iarray, na_indexer_t* indexer) { \
200
+ cumo_na_iarray_at_dim##NDIM(cumo_na_iarray_t* iarray, cumo_na_indexer_t* indexer) { \
218
201
  char* ptr = iarray->ptr; \
219
202
  for (int idim = 0; idim < NDIM; ++idim) { \
220
203
  ptr += iarray->step[idim] * indexer->index[idim]; \
@@ -229,7 +212,7 @@ CUMO_NA_IARRAY_AT(0)
229
212
 
230
213
  __host__ __device__
231
214
  static inline char*
232
- cumo_na_iarray_at_dim1(na_iarray_t* iarray, na_indexer_t* indexer) {
215
+ cumo_na_iarray_at_dim1(cumo_na_iarray_t* iarray, cumo_na_indexer_t* indexer) {
233
216
  return iarray->ptr + iarray->step[0] * indexer->raw_index;
234
217
  }
235
218
 
@@ -3,132 +3,78 @@
3
3
 
4
4
  void cumo_debug_breakpoint(void);
5
5
 
6
- /* Add cumo_ prefix to avoid C symbol collisions with Numo without modifying C implementations */
7
-
8
- #define rb_narray_new cumo_nary_new
9
- #define nary_new cumo_nary_new
10
- VALUE cumo_nary_new(VALUE elem, int ndim, size_t *shape);
11
- #define rb_narray_view_new cumo_nary_view_new
12
- #define nary_view_new cumo_nary_view_new
13
- VALUE cumo_nary_view_new(VALUE elem, int ndim, size_t *shape);
14
- #define rb_narray_debug_info cumo_nary_debug_info
15
- #define nary_debug_info cumo_nary_debug_info
16
- VALUE cumo_nary_debug_info(VALUE);
17
-
18
- #define na_make_view cumo_nary_make_view
19
- VALUE cumo_nary_make_view(VALUE self);
20
-
21
- #define na_s_allocate cumo_nary_s_allocate
22
- VALUE cumo_nary_s_allocate(VALUE klass);
23
- #define na_s_allocate_view cumo_nary_s_allocate_view
24
- VALUE cumo_nary_s_allocate_view(VALUE klass);
25
- #define na_s_new_like cumo_nary_s_new_like
26
- VALUE cumo_nary_s_new_like(VALUE type, VALUE obj);
27
-
28
- #define na_alloc_shape cumo_na_alloc_shape
29
- void cumo_na_alloc_shape(narray_t *na, int ndim);
30
- #define na_array_to_internal_shape cumo_na_array_to_internal_shape
6
+ VALUE cumo_na_new(VALUE elem, int ndim, size_t *shape);
7
+ VALUE cumo_na_view_new(VALUE elem, int ndim, size_t *shape);
8
+ VALUE cumo_na_debug_info(VALUE);
9
+
10
+ VALUE cumo_na_make_view(VALUE self);
11
+
12
+ VALUE cumo_na_s_allocate(VALUE klass);
13
+ VALUE cumo_na_s_allocate_view(VALUE klass);
14
+ VALUE cumo_na_s_new_like(VALUE type, VALUE obj);
15
+
16
+ void cumo_na_alloc_shape(cumo_narray_t *na, int ndim);
31
17
  void cumo_na_array_to_internal_shape(VALUE self, VALUE ary, size_t *shape);
32
- #define na_index_arg_to_internal_order cumo_na_index_arg_to_internal_order
33
18
  void cumo_na_index_arg_to_internal_order(int argc, VALUE *argv, VALUE self);
34
- #define na_setup_shape cumo_na_setup_shape
35
- void cumo_na_setup_shape(narray_t *na, int ndim, size_t *shape);
36
-
37
- #define na_get_elmsz cumo_nary_element_stride
38
- #define nary_element_stride cumo_nary_element_stride
39
- //#define na_element_stride cumo_nary_element_stride
40
- unsigned int cumo_nary_element_stride(VALUE nary);
41
- #define na_dtype_elmsz cumo_nary_dtype_element_stride
42
- size_t cumo_nary_dtype_element_stride(VALUE klass);
43
-
44
- #define na_get_pointer cumo_nary_get_pointer
45
- char *cumo_nary_get_pointer(VALUE);
46
- #define na_get_pointer_for_write cumo_nary_get_pointer_for_write
47
- char *cumo_nary_get_pointer_for_write(VALUE);
48
- #define na_get_pointer_for_read cumo_nary_get_pointer_for_read
49
- char *cumo_nary_get_pointer_for_read(VALUE);
50
- #define na_get_pointer_for_read_write cumo_nary_get_pointer_for_read_write
51
- char *cumo_nary_get_pointer_for_read_write(VALUE);
52
- #define na_get_offset cumo_nary_get_offset
53
- size_t cumo_nary_get_offset(VALUE self);
54
-
55
- #define na_copy_flags cumo_nary_copy_flags
56
- void cumo_nary_copy_flags(VALUE src, VALUE dst);
57
-
58
- #define na_check_ladder cumo_nary_check_ladder
59
- VALUE cumo_nary_check_ladder(VALUE self, int start_dim);
60
- #define na_check_contiguous cumo_nary_check_contiguous
61
- VALUE cumo_nary_check_contiguous(VALUE self);
62
-
63
- #define na_flatten_dim cumo_nary_flatten_dim
64
- VALUE cumo_nary_flatten_dim(VALUE self, int sd);
65
-
66
- #define na_flatten cumo_nary_flatten
67
- VALUE cumo_nary_flatten(VALUE);
68
-
69
- #define na_copy cumo_nary_dup
70
- VALUE cumo_nary_dup(VALUE);
71
-
72
- #define na_store cumo_nary_store
73
- VALUE cumo_nary_store(VALUE self, VALUE src);
74
-
75
- #define na_upcast cumo_na_upcast
19
+ void cumo_na_setup_shape(cumo_narray_t *na, int ndim, size_t *shape);
20
+
21
+ unsigned int cumo_na_element_stride(VALUE nary);
22
+ size_t cumo_na_dtype_element_stride(VALUE klass);
23
+
24
+ char *cumo_na_get_pointer(VALUE);
25
+ char *cumo_na_get_pointer_for_write(VALUE);
26
+ char *cumo_na_get_pointer_for_read(VALUE);
27
+ char *cumo_na_get_pointer_for_read_write(VALUE);
28
+ size_t cumo_na_get_offset(VALUE self);
29
+
30
+ void cumo_na_copy_flags(VALUE src, VALUE dst);
31
+
32
+ VALUE cumo_na_check_ladder(VALUE self, int start_dim);
33
+ VALUE cumo_na_check_contiguous(VALUE self);
34
+
35
+ VALUE cumo_na_flatten_dim(VALUE self, int sd);
36
+
37
+ VALUE cumo_na_flatten(VALUE);
38
+
39
+ VALUE cumo_na_copy(VALUE);
40
+
41
+ VALUE cumo_na_store(VALUE self, VALUE src);
42
+
76
43
  VALUE cumo_na_upcast(VALUE type1, VALUE type2);
77
44
 
78
- #define na_release_lock cumo_na_release_lock
79
45
  void cumo_na_release_lock(VALUE); // currently do nothing
80
46
 
81
47
  // used in reduce methods
82
- #define nary_reduce_dimension cumo_nary_reduce_dimension
83
- #define na_reduce_dimension cumo_nary_reduce_dimension
84
- VALUE cumo_nary_reduce_dimension(int argc, VALUE *argv, int naryc, VALUE *naryv,
85
- ndfunc_t *ndf, na_iter_func_t nan_iter);
48
+ VALUE cumo_na_reduce_dimension(int argc, VALUE *argv, int naryc, VALUE *naryv,
49
+ cumo_ndfunc_t *ndf, cumo_na_iter_func_t nan_iter);
86
50
 
87
- #define nary_reduce_options cumo_nary_reduce_options
88
- #define na_reduce_options cumo_nary_reduce_options
89
- VALUE cumo_nary_reduce_options(VALUE axes, VALUE *opts, int naryc, VALUE *naryv,
90
- ndfunc_t *ndf);
51
+ VALUE cumo_na_reduce_options(VALUE axes, VALUE *opts, int naryc, VALUE *naryv,
52
+ cumo_ndfunc_t *ndf);
91
53
 
92
54
  // ndloop
93
- #define na_ndloop cumo_na_ndloop
94
- VALUE cumo_na_ndloop(ndfunc_t *nf, int argc, ...);
95
- #define na_ndloop2 cumo_na_ndloop2
96
- VALUE cumo_na_ndloop2(ndfunc_t *nf, VALUE args);
97
- #define na_ndloop3 cumo_na_ndloop3
98
- VALUE cumo_na_ndloop3(ndfunc_t *nf, void *ptr, int argc, ...);
99
- #define na_ndloop4 cumo_na_ndloop4
100
- VALUE cumo_na_ndloop4(ndfunc_t *nf, void *ptr, VALUE args);
101
-
102
- #define na_ndloop_cast_narray_to_rarray cumo_na_ndloop_cast_narray_to_rarray
103
- VALUE cumo_na_ndloop_cast_narray_to_rarray(ndfunc_t *nf, VALUE nary, VALUE fmt);
104
- #define na_ndloop_store_rarray cumo_na_ndloop_store_rarray
105
- VALUE cumo_na_ndloop_store_rarray(ndfunc_t *nf, VALUE nary, VALUE rary);
106
- #define na_ndloop_store_rarray2 cumo_na_ndloop_store_rarray2
107
- VALUE cumo_na_ndloop_store_rarray2(ndfunc_t *nf, VALUE nary, VALUE rary, VALUE opt);
108
- #define na_ndloop_inspect cumo_na_ndloop_inspect
109
- VALUE cumo_na_ndloop_inspect(VALUE nary, na_text_func_t func, VALUE opt);
110
- #define na_ndloop_with_index cumo_na_ndloop_with_index
111
- VALUE cumo_na_ndloop_with_index(ndfunc_t *nf, int argc, ...);
112
-
113
- #define na_info_str cumo_nary_info_str
114
- VALUE cumo_nary_info_str(VALUE);
115
-
116
- #define na_test_reduce cumo_nary_test_reduce
117
- bool cumo_nary_test_reduce(VALUE reduce, int dim);
118
-
119
- #define nary_step_array_index cumo_nary_step_array_index
120
- void cumo_nary_step_array_index(VALUE self, size_t ary_size, size_t *plen, ssize_t *pbeg, ssize_t *pstep);
121
- #define nary_step_sequence cumo_nary_step_sequence
122
- void cumo_nary_step_sequence(VALUE self, size_t *plen, double *pbeg, double *pstep);
55
+ VALUE cumo_na_ndloop(cumo_ndfunc_t *nf, int argc, ...);
56
+ VALUE cumo_na_ndloop2(cumo_ndfunc_t *nf, VALUE args);
57
+ VALUE cumo_na_ndloop3(cumo_ndfunc_t *nf, void *ptr, int argc, ...);
58
+ VALUE cumo_na_ndloop4(cumo_ndfunc_t *nf, void *ptr, VALUE args);
59
+
60
+ VALUE cumo_na_ndloop_cast_narray_to_rarray(cumo_ndfunc_t *nf, VALUE nary, VALUE fmt);
61
+ VALUE cumo_na_ndloop_store_rarray(cumo_ndfunc_t *nf, VALUE nary, VALUE rary);
62
+ VALUE cumo_na_ndloop_store_rarray2(cumo_ndfunc_t *nf, VALUE nary, VALUE rary, VALUE opt);
63
+ VALUE cumo_na_ndloop_inspect(VALUE nary, cumo_na_text_func_t func, VALUE opt);
64
+ VALUE cumo_na_ndloop_with_index(cumo_ndfunc_t *nf, int argc, ...);
65
+
66
+ VALUE cumo_na_info_str(VALUE);
67
+
68
+ bool cumo_na_test_reduce(VALUE reduce, int dim);
69
+
70
+ void cumo_na_step_array_index(VALUE self, size_t ary_size, size_t *plen, ssize_t *pbeg, ssize_t *pstep);
71
+ void cumo_na_step_sequence(VALUE self, size_t *plen, double *pbeg, double *pstep);
123
72
 
124
73
  // used in aref, aset
125
- #define na_get_result_dimension cumo_nary_get_result_dimension
126
- int cumo_nary_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx);
127
- #define na_aref_main cumo_nary_aref_main
128
- VALUE cumo_nary_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
74
+ int cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx);
75
+ VALUE cumo_na_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
129
76
 
130
77
  // defined in array, used in math
131
- #define na_ary_composition_dtype cumo_na_ary_composition_dtype
132
78
  VALUE cumo_na_ary_composition_dtype(VALUE ary);
133
79
 
134
80
  #include "ruby/version.h"