cumo 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/3rd_party/LICENSE.txt +60 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +13 -1
- data/LICENSE.txt +1 -62
- data/README.md +33 -29
- data/bench/cumo_bench.rb +47 -25
- data/bench/numo_bench.rb +27 -25
- data/docs/src-tree.md +16 -0
- data/ext/cumo/cuda/cublas.c +69 -219
- data/ext/cumo/cuda/memory_pool_impl.hpp +1 -0
- data/ext/cumo/cuda/runtime.c +2 -14
- data/ext/cumo/cumo.c +16 -16
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/include/cumo/cuda/cublas.h +6 -129
- data/ext/cumo/include/cumo/cuda/runtime.h +16 -0
- data/ext/cumo/include/cumo/indexer.h +46 -63
- data/ext/cumo/include/cumo/intern.h +58 -112
- data/ext/cumo/include/cumo/narray.h +214 -185
- data/ext/cumo/include/cumo/narray_kernel.h +66 -37
- data/ext/cumo/include/cumo/ndloop.h +42 -42
- data/ext/cumo/include/cumo/reduce_kernel.h +55 -71
- data/ext/cumo/include/cumo/template.h +56 -51
- data/ext/cumo/include/cumo/template_kernel.h +31 -31
- data/ext/cumo/include/cumo/types/bit.h +3 -3
- data/ext/cumo/include/cumo/types/bit_kernel.h +2 -2
- data/ext/cumo/include/cumo/types/complex.h +126 -126
- data/ext/cumo/include/cumo/types/complex_kernel.h +126 -126
- data/ext/cumo/include/cumo/types/complex_macro.h +28 -28
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +20 -20
- data/ext/cumo/include/cumo/types/dcomplex.h +5 -5
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +1 -1
- data/ext/cumo/include/cumo/types/int_macro.h +1 -1
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +1 -1
- data/ext/cumo/include/cumo/types/robj_macro.h +30 -30
- data/ext/cumo/include/cumo/types/scomplex.h +5 -5
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +1 -1
- data/ext/cumo/narray/array.c +143 -143
- data/ext/cumo/narray/data.c +184 -184
- data/ext/cumo/narray/gen/cogen.rb +5 -2
- data/ext/cumo/narray/gen/cogen_kernel.rb +5 -2
- data/ext/cumo/narray/gen/def/dcomplex.rb +1 -1
- data/ext/cumo/narray/gen/def/scomplex.rb +1 -1
- data/ext/cumo/narray/gen/erbln.rb +132 -0
- data/ext/cumo/narray/gen/erbpp2.rb +18 -13
- data/ext/cumo/narray/gen/narray_def.rb +3 -3
- data/ext/cumo/narray/gen/spec.rb +2 -2
- data/ext/cumo/narray/gen/tmpl/accum.c +15 -15
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +22 -22
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/accum_index.c +30 -30
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +2 -2
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +14 -14
- data/ext/cumo/narray/gen/tmpl/allocate.c +11 -11
- data/ext/cumo/narray/gen/tmpl/aref.c +2 -2
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +4 -4
- data/ext/cumo/narray/gen/tmpl/aset.c +2 -2
- data/ext/cumo/narray/gen/tmpl/binary.c +28 -28
- data/ext/cumo/narray/gen/tmpl/binary2.c +18 -18
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/binary_s.c +13 -13
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/bincount.c +23 -23
- data/ext/cumo/narray/gen/tmpl/cast.c +7 -7
- data/ext/cumo/narray/gen/tmpl/cast_array.c +3 -3
- data/ext/cumo/narray/gen/tmpl/clip.c +38 -38
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +2 -2
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +19 -19
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +7 -7
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +15 -15
- data/ext/cumo/narray/gen/tmpl/cum.c +15 -15
- data/ext/cumo/narray/gen/tmpl/each.c +9 -9
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +9 -9
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +15 -15
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +5 -5
- data/ext/cumo/narray/gen/tmpl/extract_data.c +12 -12
- data/ext/cumo/narray/gen/tmpl/eye.c +9 -9
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/fill.c +9 -9
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +1 -1
- data/ext/cumo/narray/gen/tmpl/format.c +11 -11
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +8 -8
- data/ext/cumo/narray/gen/tmpl/frexp.c +13 -13
- data/ext/cumo/narray/gen/tmpl/gemm.c +252 -108
- data/ext/cumo/narray/gen/tmpl/inspect.c +1 -1
- data/ext/cumo/narray/gen/tmpl/lib.c +2 -2
- data/ext/cumo/narray/gen/tmpl/logseq.c +7 -7
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +17 -17
- data/ext/cumo/narray/gen/tmpl/median.c +10 -10
- data/ext/cumo/narray/gen/tmpl/minmax.c +10 -10
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +3 -3
- data/ext/cumo/narray/gen/tmpl/poly.c +6 -6
- data/ext/cumo/narray/gen/tmpl/pow.c +28 -28
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/rand.c +10 -10
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +7 -7
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/seq.c +7 -7
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/set2.c +20 -20
- data/ext/cumo/narray/gen/tmpl/sort.c +11 -11
- data/ext/cumo/narray/gen/tmpl/sort_index.c +18 -18
- data/ext/cumo/narray/gen/tmpl/store.c +6 -6
- data/ext/cumo/narray/gen/tmpl/store_array.c +19 -19
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/store_bit.c +23 -23
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +28 -28
- data/ext/cumo/narray/gen/tmpl/store_from.c +16 -16
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/to_a.c +10 -10
- data/ext/cumo/narray/gen/tmpl/unary.c +25 -25
- data/ext/cumo/narray/gen/tmpl/unary2.c +17 -17
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +15 -15
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +13 -13
- data/ext/cumo/narray/gen/tmpl/unary_s.c +17 -17
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +9 -9
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +5 -5
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +29 -29
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +14 -14
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +21 -21
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +28 -28
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +29 -29
- data/ext/cumo/narray/gen/tmpl_bit/each.c +10 -10
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +10 -10
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +8 -8
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +8 -8
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +17 -17
- data/ext/cumo/narray/gen/tmpl_bit/format.c +14 -14
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +11 -11
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +3 -3
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +33 -33
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +19 -19
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +22 -22
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +18 -18
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +12 -12
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +24 -24
- data/ext/cumo/narray/gen/tmpl_bit/where.c +16 -16
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +20 -20
- data/ext/cumo/narray/index.c +213 -213
- data/ext/cumo/narray/math.c +27 -27
- data/ext/cumo/narray/narray.c +484 -484
- data/ext/cumo/narray/ndloop.c +259 -258
- data/ext/cumo/narray/rand.c +3 -3
- data/ext/cumo/narray/step.c +70 -70
- data/ext/cumo/narray/struct.c +139 -139
- metadata +6 -7
- data/ext/cumo/include/cumo/intern_fwd.h +0 -38
- data/lib/erbpp.rb +0 -294
- data/lib/erbpp/line_number.rb +0 -137
- data/lib/erbpp/narray_def.rb +0 -381
data/ext/cumo/include/cumo.h
CHANGED
@@ -11,137 +11,14 @@ extern "C" {
|
|
11
11
|
#endif
|
12
12
|
#endif
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
void
|
15
|
+
cumo_cuda_cublas_check_status(cublasStatus_t status);
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
cublasHandle_t
|
18
|
+
cumo_cuda_cublas_handle();
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
#define option_uplo cumo_cublas_option_uplo
|
24
|
-
extern cublasFillMode_t cumo_cublas_option_uplo(VALUE uplo);
|
25
|
-
|
26
|
-
#define option_diag cumo_cublas_option_diag
|
27
|
-
extern cublasDiagType_t cumo_cublas_option_diag(VALUE diag);
|
28
|
-
|
29
|
-
#define option_side cumo_cublas_option_side
|
30
|
-
extern cublasSideMode_t cumo_cublas_option_side(VALUE side);
|
31
|
-
|
32
|
-
//#define check_func cumo_cublas_check_func
|
33
|
-
//extern void cumo_cublas_check_func(void **func, const char *name);
|
34
|
-
|
35
|
-
// TODO: Check if a and b are row_major?
|
36
|
-
#define SWAP_IFROW(a,b,tmp) \
|
37
|
-
{(tmp)=(a);(a)=(b);(b)=(tmp);}
|
38
|
-
|
39
|
-
#define SWAP_IFTR(trans,a,b,tmp) \
|
40
|
-
{ if ((trans)!=CUBLAS_OP_N) \
|
41
|
-
{(tmp)=(a);(a)=(b);(b)=(tmp);} \
|
42
|
-
}
|
43
|
-
|
44
|
-
/*
|
45
|
-
//#define SWAP_IFCOLTR(order,trans,a,b,tmp) \
|
46
|
-
// { if (((order)==CblasRowMajor && (trans)!=CblasNoTrans) || \
|
47
|
-
// ((order)!=CblasRowMajor && (trans)==CblasNoTrans)) \
|
48
|
-
// {(tmp)=(a);(a)=(b);(b)=(tmp);} \
|
49
|
-
// }
|
50
|
-
|
51
|
-
//#define SWAP_IFCOL(order,a,b,tmp) \
|
52
|
-
// { if ((order)==CblasColMajor) {(tmp)=(a);(a)=(b);(b)=(tmp);} }
|
53
|
-
//
|
54
|
-
//#define SWAP_IFROW(order,a,b,tmp) \
|
55
|
-
// { if ((order)==CblasRowMajor) {(tmp)=(a);(a)=(b);(b)=(tmp);} }
|
56
|
-
//
|
57
|
-
//#define SWAP_IFCOLTR(order,trans,a,b,tmp) \
|
58
|
-
// { if (((order)==CblasRowMajor && (trans)!=CblasNoTrans) || \
|
59
|
-
// ((order)!=CblasRowMajor && (trans)==CblasNoTrans)) \
|
60
|
-
// {(tmp)=(a);(a)=(b);(b)=(tmp);} \
|
61
|
-
// }
|
62
|
-
//
|
63
|
-
//#define CHECK_FUNC(fptr, fname) \
|
64
|
-
// { if ((fptr)==0) { check_func((void*)(&(fptr)),fname); } }
|
65
|
-
*/
|
66
|
-
|
67
|
-
#define ROW_SIZE(na) ((na)->shape[(na)->ndim-2])
|
68
|
-
#define COL_SIZE(na) ((na)->shape[(na)->ndim-1])
|
69
|
-
|
70
|
-
#define CHECK_NARRAY_TYPE(x,t) \
|
71
|
-
if (CLASS_OF(x)!=(t)) { \
|
72
|
-
rb_raise(rb_eTypeError,"invalid NArray type (class)"); \
|
73
|
-
}
|
74
|
-
|
75
|
-
// Error Class ??
|
76
|
-
#define CHECK_DIM_GE(na,nd) \
|
77
|
-
if ((na)->ndim<(nd)) { \
|
78
|
-
rb_raise(nary_eShapeError, \
|
79
|
-
"n-dimension=%d, but >=%d is expected", \
|
80
|
-
(na)->ndim, (nd)); \
|
81
|
-
}
|
82
|
-
|
83
|
-
#define CHECK_DIM_EQ(na1,nd) \
|
84
|
-
if ((na1)->ndim != (nd)) { \
|
85
|
-
rb_raise(nary_eShapeError, \
|
86
|
-
"dimention mismatch: %d != %d", \
|
87
|
-
(na1)->ndim, (nd)); \
|
88
|
-
}
|
89
|
-
|
90
|
-
#define CHECK_SQUARE(name,na) \
|
91
|
-
if ((na)->shape[(na)->ndim-1] != (na)->shape[(na)->ndim-2]) { \
|
92
|
-
rb_raise(nary_eShapeError,"%s is not square matrix",name); \
|
93
|
-
}
|
94
|
-
|
95
|
-
#define CHECK_SIZE_GE(na,sz) \
|
96
|
-
if ((na)->size < (size_t)(sz)) { \
|
97
|
-
rb_raise(nary_eShapeError, \
|
98
|
-
"NArray size must be >= %"SZF"u",(size_t)(sz));\
|
99
|
-
}
|
100
|
-
#define CHECK_NON_EMPTY(na) \
|
101
|
-
if ((na)->size==0) { \
|
102
|
-
rb_raise(nary_eShapeError,"empty NArray"); \
|
103
|
-
}
|
104
|
-
|
105
|
-
#define CHECK_SIZE_EQ(n,m) \
|
106
|
-
if ((n)!=(m)) { \
|
107
|
-
rb_raise(nary_eShapeError, \
|
108
|
-
"size mismatch: %"SZF"d != %"SZF"d", \
|
109
|
-
(size_t)(n),(size_t)(m)); \
|
110
|
-
}
|
111
|
-
|
112
|
-
#define CHECK_SAME_SHAPE(na1,na2) \
|
113
|
-
{ int i; \
|
114
|
-
CHECK_DIM_EQ(na1,na2->ndim); \
|
115
|
-
for (i=0; i<na1->ndim; i++) { \
|
116
|
-
CHECK_SIZE_EQ(na1->shape[i],na2->shape[i]); \
|
117
|
-
} \
|
118
|
-
}
|
119
|
-
|
120
|
-
#define CHECK_INT_EQ(sm,m,sn,n) \
|
121
|
-
if ((m) != (n)) { \
|
122
|
-
rb_raise(nary_eShapeError, \
|
123
|
-
"%s must be == %s: %s=%d %s=%d", \
|
124
|
-
sm,sn,sm,m,sn,n); \
|
125
|
-
}
|
126
|
-
|
127
|
-
// Error Class ??
|
128
|
-
#define CHECK_LEADING_GE(sld,ld,sn,n) \
|
129
|
-
if ((ld) < (n)) { \
|
130
|
-
rb_raise(nary_eShapeError, \
|
131
|
-
"%s must be >= max(%s,1): %s=%d %s=%d", \
|
132
|
-
sld,sn,sld,ld,sn,n); \
|
133
|
-
}
|
134
|
-
|
135
|
-
#define COPY_OR_CAST_TO(a,T) \
|
136
|
-
{ \
|
137
|
-
if (CLASS_OF(a) == (T)) { \
|
138
|
-
if (!TEST_INPLACE(a)) { \
|
139
|
-
a = na_copy(a); \
|
140
|
-
} \
|
141
|
-
} else { \
|
142
|
-
a = rb_funcall(T,rb_intern("cast"),1,a); \
|
143
|
-
} \
|
144
|
-
}
|
20
|
+
VALUE
|
21
|
+
cumo_cuda_cublas_option_value(VALUE value, VALUE default_value);
|
145
22
|
|
146
23
|
#if defined(__cplusplus)
|
147
24
|
#if 0
|
@@ -21,6 +21,22 @@ cumo_cuda_runtime_check_status(cudaError_t status)
|
|
21
21
|
}
|
22
22
|
}
|
23
23
|
|
24
|
+
static inline int
|
25
|
+
cumo_cuda_runtime_get_device_count()
|
26
|
+
{
|
27
|
+
int device_count;
|
28
|
+
cumo_cuda_runtime_check_status(cudaGetDeviceCount(&device_count));
|
29
|
+
return device_count;
|
30
|
+
}
|
31
|
+
|
32
|
+
static inline int
|
33
|
+
cumo_cuda_runtime_get_device()
|
34
|
+
{
|
35
|
+
int device;
|
36
|
+
cumo_cuda_runtime_check_status(cudaGetDevice(&device));
|
37
|
+
return device;
|
38
|
+
}
|
39
|
+
|
24
40
|
static inline bool
|
25
41
|
cumo_cuda_runtime_is_device_memory(void* ptr)
|
26
42
|
{
|
@@ -1,11 +1,6 @@
|
|
1
1
|
#ifndef CUMO_INDEXER_H
|
2
2
|
#define CUMO_INDEXER_H
|
3
3
|
|
4
|
-
/* Add cumo_ prefix */
|
5
|
-
#define na_indexer_t cumo_na_indexer_t
|
6
|
-
#define na_iarray_t cumo_na_iarray_t
|
7
|
-
#define na_reduction_arg_t cumo_na_reduction_arg_t
|
8
|
-
|
9
4
|
#ifndef __CUDACC__
|
10
5
|
#include "cumo/narray.h"
|
11
6
|
#include "cumo/ndloop.h"
|
@@ -21,10 +16,10 @@
|
|
21
16
|
typedef struct {
|
22
17
|
unsigned char ndim; // # of dimensions
|
23
18
|
size_t total_size; // # of total elements
|
24
|
-
size_t shape[
|
25
|
-
uint64_t index[
|
19
|
+
size_t shape[CUMO_NA_MAX_DIMENSION]; // # of elements for each dimension
|
20
|
+
uint64_t index[CUMO_NA_MAX_DIMENSION]; // indicies for each dimension
|
26
21
|
uint64_t raw_index;
|
27
|
-
}
|
22
|
+
} cumo_na_indexer_t;
|
28
23
|
|
29
24
|
/* A structure to get data address with indexer.
|
30
25
|
*
|
@@ -32,24 +27,23 @@ typedef struct {
|
|
32
27
|
*/
|
33
28
|
typedef struct {
|
34
29
|
char* ptr;
|
35
|
-
ssize_t step[
|
36
|
-
}
|
30
|
+
ssize_t step[CUMO_NA_MAX_DIMENSION]; // or strides
|
31
|
+
} cumo_na_iarray_t;
|
37
32
|
|
38
33
|
typedef struct {
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
} na_reduction_arg_t;
|
34
|
+
cumo_na_iarray_t in;
|
35
|
+
cumo_na_iarray_t out;
|
36
|
+
cumo_na_indexer_t in_indexer;
|
37
|
+
cumo_na_indexer_t out_indexer;
|
38
|
+
} cumo_na_reduction_arg_t;
|
45
39
|
|
46
40
|
#ifndef __CUDACC__
|
47
|
-
extern int
|
41
|
+
extern int cumo_na_debug_flag; // narray.c
|
48
42
|
|
49
43
|
static void
|
50
|
-
|
44
|
+
print_cumo_na_indexer_t(cumo_na_indexer_t* indexer)
|
51
45
|
{
|
52
|
-
printf("
|
46
|
+
printf("cumo_na_indexer_t = 0x%"SZF"x {\n", (size_t)indexer);
|
53
47
|
printf(" ndim = %d\n", indexer->ndim);
|
54
48
|
printf(" total_size = %ld\n", indexer->total_size);
|
55
49
|
printf(" shape = 0x%"SZF"x\n", (size_t)indexer->shape);
|
@@ -60,9 +54,9 @@ print_na_indexer_t(na_indexer_t* indexer)
|
|
60
54
|
}
|
61
55
|
|
62
56
|
static void
|
63
|
-
|
57
|
+
print_cumo_na_iarray_t(cumo_na_iarray_t* iarray, unsigned char ndim)
|
64
58
|
{
|
65
|
-
printf("
|
59
|
+
printf("cumo_na_iarray_t = 0x%"SZF"x {\n", (size_t)iarray);
|
66
60
|
printf(" ptr = 0x%"SZF"x\n", (size_t)iarray->ptr);
|
67
61
|
printf(" step = 0x%"SZF"x\n", (size_t)iarray->step);
|
68
62
|
for (int i = 0; i < ndim; ++i) {
|
@@ -72,27 +66,25 @@ print_na_iarray_t(na_iarray_t* iarray, unsigned char ndim)
|
|
72
66
|
}
|
73
67
|
|
74
68
|
static void
|
75
|
-
|
69
|
+
print_cumo_na_reduction_arg_t(cumo_na_reduction_arg_t* arg)
|
76
70
|
{
|
77
|
-
printf("
|
71
|
+
printf("cumo_na_reduction_arg_t = 0x%"SZF"x {\n", (size_t)arg);
|
78
72
|
printf("--in--\n");
|
79
|
-
|
73
|
+
print_cumo_na_iarray_t(&arg->in, arg->in_indexer.ndim);
|
80
74
|
printf("--out--\n");
|
81
|
-
|
75
|
+
print_cumo_na_iarray_t(&arg->out, arg->out_indexer.ndim);
|
82
76
|
printf("--in_indexer--\n");
|
83
|
-
|
77
|
+
print_cumo_na_indexer_t(&arg->in_indexer);
|
84
78
|
printf("--out_indexer--\n");
|
85
|
-
|
86
|
-
printf("--reduce_indexer--\n");
|
87
|
-
print_na_indexer_t(&arg->reduce_indexer);
|
79
|
+
print_cumo_na_indexer_t(&arg->out_indexer);
|
88
80
|
printf("}\n");
|
89
81
|
}
|
90
82
|
|
91
|
-
// Note that you, then, have to call
|
92
|
-
static
|
93
|
-
|
83
|
+
// Note that you, then, have to call cumo_na_indexer_set to create index[]
|
84
|
+
static cumo_na_indexer_t
|
85
|
+
cumo_na_make_indexer(cumo_na_loop_args_t* arg)
|
94
86
|
{
|
95
|
-
|
87
|
+
cumo_na_indexer_t indexer;
|
96
88
|
indexer.ndim = arg->ndim;
|
97
89
|
indexer.total_size = 1;
|
98
90
|
for (int i = 0; i < arg->ndim; ++i) {
|
@@ -102,10 +94,10 @@ na_make_indexer(na_loop_args_t* arg)
|
|
102
94
|
return indexer;
|
103
95
|
}
|
104
96
|
|
105
|
-
static
|
106
|
-
|
97
|
+
static cumo_na_iarray_t
|
98
|
+
cumo_na_make_iarray_given_ndim(cumo_na_loop_args_t* arg, int ndim)
|
107
99
|
{
|
108
|
-
|
100
|
+
cumo_na_iarray_t iarray;
|
109
101
|
iarray.ptr = arg->ptr + arg->iter[0].pos;
|
110
102
|
for (int idim = ndim; --idim >= 0;) {
|
111
103
|
iarray.step[idim] = arg->iter[idim].step;
|
@@ -113,16 +105,16 @@ na_make_iarray_given_ndim(na_loop_args_t* arg, int ndim)
|
|
113
105
|
return iarray;
|
114
106
|
}
|
115
107
|
|
116
|
-
static
|
117
|
-
|
108
|
+
static cumo_na_iarray_t
|
109
|
+
cumo_na_make_iarray(cumo_na_loop_args_t* arg)
|
118
110
|
{
|
119
|
-
return
|
111
|
+
return cumo_na_make_iarray_given_ndim(arg, arg->ndim);
|
120
112
|
}
|
121
113
|
|
122
|
-
static
|
123
|
-
|
114
|
+
static cumo_na_reduction_arg_t
|
115
|
+
cumo_na_make_reduction_arg(cumo_na_loop_t* lp_user)
|
124
116
|
{
|
125
|
-
|
117
|
+
cumo_na_reduction_arg_t arg;
|
126
118
|
int i;
|
127
119
|
int in_ndim = lp_user->args[0].ndim;
|
128
120
|
|
@@ -131,33 +123,24 @@ na_make_reduction_arg(na_loop_t* lp_user)
|
|
131
123
|
// out shape = (2, 4, 6)
|
132
124
|
// reduce shape = (3, 5)
|
133
125
|
|
134
|
-
arg.in =
|
135
|
-
arg.in_indexer =
|
126
|
+
arg.in = cumo_na_make_iarray(&lp_user->args[0]);
|
127
|
+
arg.in_indexer = cumo_na_make_indexer(&lp_user->args[0]);
|
136
128
|
|
137
|
-
arg.reduce_indexer.ndim = 0;
|
138
|
-
arg.reduce_indexer.total_size = 1;
|
139
129
|
arg.out_indexer.ndim = 0;
|
140
130
|
arg.out_indexer.total_size = 1;
|
141
131
|
for (i = 0; i < in_ndim; ++i) {
|
142
|
-
if (
|
143
|
-
arg.reduce_indexer.shape[arg.reduce_indexer.ndim] = arg.in_indexer.shape[i];
|
144
|
-
arg.reduce_indexer.total_size *= arg.in_indexer.shape[i];
|
145
|
-
++arg.reduce_indexer.ndim;
|
146
|
-
} else {
|
132
|
+
if (!cumo_na_test_reduce(lp_user->reduce, i)) {
|
147
133
|
arg.out_indexer.shape[arg.out_indexer.ndim] = arg.in_indexer.shape[i];
|
148
134
|
arg.out_indexer.total_size *= arg.in_indexer.shape[i];
|
149
135
|
++arg.out_indexer.ndim;
|
150
136
|
}
|
151
137
|
}
|
152
|
-
arg.out =
|
138
|
+
arg.out = cumo_na_make_iarray_given_ndim(&lp_user->args[1], arg.out_indexer.ndim);
|
153
139
|
|
154
|
-
if (
|
155
|
-
|
140
|
+
if (cumo_na_debug_flag) {
|
141
|
+
print_cumo_na_reduction_arg_t(&arg);
|
156
142
|
}
|
157
143
|
|
158
|
-
assert(arg.reduce_indexer.ndim == lp_user->reduce_dim);
|
159
|
-
assert(arg.in_indexer.ndim == arg.reduce_indexer.ndim + arg.out_indexer.ndim);
|
160
|
-
|
161
144
|
return arg;
|
162
145
|
}
|
163
146
|
|
@@ -169,7 +152,7 @@ na_make_reduction_arg(na_loop_t* lp_user)
|
|
169
152
|
|
170
153
|
__host__ __device__
|
171
154
|
static inline void
|
172
|
-
cumo_na_indexer_set_dim(
|
155
|
+
cumo_na_indexer_set_dim(cumo_na_indexer_t* indexer, uint64_t i) {
|
173
156
|
indexer->raw_index = i;
|
174
157
|
for (int j = indexer->ndim; --j >= 0;) {
|
175
158
|
indexer->index[j] = i % indexer->shape[j];
|
@@ -181,7 +164,7 @@ cumo_na_indexer_set_dim(na_indexer_t* indexer, uint64_t i) {
|
|
181
164
|
#define CUMO_NA_INDEXER_SET(NDIM) \
|
182
165
|
__host__ __device__ \
|
183
166
|
static inline void \
|
184
|
-
cumo_na_indexer_set_dim##NDIM(
|
167
|
+
cumo_na_indexer_set_dim##NDIM(cumo_na_indexer_t* indexer, uint64_t i) { \
|
185
168
|
indexer->raw_index = i; \
|
186
169
|
for (int j = NDIM; --j >= 0;) { \
|
187
170
|
indexer->index[j] = i % indexer->shape[j]; \
|
@@ -196,13 +179,13 @@ CUMO_NA_INDEXER_SET(0)
|
|
196
179
|
|
197
180
|
__host__ __device__
|
198
181
|
static inline void
|
199
|
-
cumo_na_indexer_set_dim1(
|
182
|
+
cumo_na_indexer_set_dim1(cumo_na_indexer_t* indexer, uint64_t i) {
|
200
183
|
indexer->raw_index = i;
|
201
184
|
}
|
202
185
|
|
203
186
|
__host__ __device__
|
204
187
|
static inline char*
|
205
|
-
cumo_na_iarray_at_dim(
|
188
|
+
cumo_na_iarray_at_dim(cumo_na_iarray_t* iarray, cumo_na_indexer_t* indexer) {
|
206
189
|
char* ptr = iarray->ptr;
|
207
190
|
for (int idim = 0; idim < indexer->ndim; ++idim) {
|
208
191
|
ptr += iarray->step[idim] * indexer->index[idim];
|
@@ -214,7 +197,7 @@ cumo_na_iarray_at_dim(na_iarray_t* iarray, na_indexer_t* indexer) {
|
|
214
197
|
#define CUMO_NA_IARRAY_AT(NDIM) \
|
215
198
|
__host__ __device__ \
|
216
199
|
static inline char* \
|
217
|
-
cumo_na_iarray_at_dim##NDIM(
|
200
|
+
cumo_na_iarray_at_dim##NDIM(cumo_na_iarray_t* iarray, cumo_na_indexer_t* indexer) { \
|
218
201
|
char* ptr = iarray->ptr; \
|
219
202
|
for (int idim = 0; idim < NDIM; ++idim) { \
|
220
203
|
ptr += iarray->step[idim] * indexer->index[idim]; \
|
@@ -229,7 +212,7 @@ CUMO_NA_IARRAY_AT(0)
|
|
229
212
|
|
230
213
|
__host__ __device__
|
231
214
|
static inline char*
|
232
|
-
cumo_na_iarray_at_dim1(
|
215
|
+
cumo_na_iarray_at_dim1(cumo_na_iarray_t* iarray, cumo_na_indexer_t* indexer) {
|
233
216
|
return iarray->ptr + iarray->step[0] * indexer->raw_index;
|
234
217
|
}
|
235
218
|
|
@@ -3,132 +3,78 @@
|
|
3
3
|
|
4
4
|
void cumo_debug_breakpoint(void);
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
VALUE
|
11
|
-
|
12
|
-
|
13
|
-
VALUE
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
#define na_make_view cumo_nary_make_view
|
19
|
-
VALUE cumo_nary_make_view(VALUE self);
|
20
|
-
|
21
|
-
#define na_s_allocate cumo_nary_s_allocate
|
22
|
-
VALUE cumo_nary_s_allocate(VALUE klass);
|
23
|
-
#define na_s_allocate_view cumo_nary_s_allocate_view
|
24
|
-
VALUE cumo_nary_s_allocate_view(VALUE klass);
|
25
|
-
#define na_s_new_like cumo_nary_s_new_like
|
26
|
-
VALUE cumo_nary_s_new_like(VALUE type, VALUE obj);
|
27
|
-
|
28
|
-
#define na_alloc_shape cumo_na_alloc_shape
|
29
|
-
void cumo_na_alloc_shape(narray_t *na, int ndim);
|
30
|
-
#define na_array_to_internal_shape cumo_na_array_to_internal_shape
|
6
|
+
VALUE cumo_na_new(VALUE elem, int ndim, size_t *shape);
|
7
|
+
VALUE cumo_na_view_new(VALUE elem, int ndim, size_t *shape);
|
8
|
+
VALUE cumo_na_debug_info(VALUE);
|
9
|
+
|
10
|
+
VALUE cumo_na_make_view(VALUE self);
|
11
|
+
|
12
|
+
VALUE cumo_na_s_allocate(VALUE klass);
|
13
|
+
VALUE cumo_na_s_allocate_view(VALUE klass);
|
14
|
+
VALUE cumo_na_s_new_like(VALUE type, VALUE obj);
|
15
|
+
|
16
|
+
void cumo_na_alloc_shape(cumo_narray_t *na, int ndim);
|
31
17
|
void cumo_na_array_to_internal_shape(VALUE self, VALUE ary, size_t *shape);
|
32
|
-
#define na_index_arg_to_internal_order cumo_na_index_arg_to_internal_order
|
33
18
|
void cumo_na_index_arg_to_internal_order(int argc, VALUE *argv, VALUE self);
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
#define na_check_ladder cumo_nary_check_ladder
|
59
|
-
VALUE cumo_nary_check_ladder(VALUE self, int start_dim);
|
60
|
-
#define na_check_contiguous cumo_nary_check_contiguous
|
61
|
-
VALUE cumo_nary_check_contiguous(VALUE self);
|
62
|
-
|
63
|
-
#define na_flatten_dim cumo_nary_flatten_dim
|
64
|
-
VALUE cumo_nary_flatten_dim(VALUE self, int sd);
|
65
|
-
|
66
|
-
#define na_flatten cumo_nary_flatten
|
67
|
-
VALUE cumo_nary_flatten(VALUE);
|
68
|
-
|
69
|
-
#define na_copy cumo_nary_dup
|
70
|
-
VALUE cumo_nary_dup(VALUE);
|
71
|
-
|
72
|
-
#define na_store cumo_nary_store
|
73
|
-
VALUE cumo_nary_store(VALUE self, VALUE src);
|
74
|
-
|
75
|
-
#define na_upcast cumo_na_upcast
|
19
|
+
void cumo_na_setup_shape(cumo_narray_t *na, int ndim, size_t *shape);
|
20
|
+
|
21
|
+
unsigned int cumo_na_element_stride(VALUE nary);
|
22
|
+
size_t cumo_na_dtype_element_stride(VALUE klass);
|
23
|
+
|
24
|
+
char *cumo_na_get_pointer(VALUE);
|
25
|
+
char *cumo_na_get_pointer_for_write(VALUE);
|
26
|
+
char *cumo_na_get_pointer_for_read(VALUE);
|
27
|
+
char *cumo_na_get_pointer_for_read_write(VALUE);
|
28
|
+
size_t cumo_na_get_offset(VALUE self);
|
29
|
+
|
30
|
+
void cumo_na_copy_flags(VALUE src, VALUE dst);
|
31
|
+
|
32
|
+
VALUE cumo_na_check_ladder(VALUE self, int start_dim);
|
33
|
+
VALUE cumo_na_check_contiguous(VALUE self);
|
34
|
+
|
35
|
+
VALUE cumo_na_flatten_dim(VALUE self, int sd);
|
36
|
+
|
37
|
+
VALUE cumo_na_flatten(VALUE);
|
38
|
+
|
39
|
+
VALUE cumo_na_copy(VALUE);
|
40
|
+
|
41
|
+
VALUE cumo_na_store(VALUE self, VALUE src);
|
42
|
+
|
76
43
|
VALUE cumo_na_upcast(VALUE type1, VALUE type2);
|
77
44
|
|
78
|
-
#define na_release_lock cumo_na_release_lock
|
79
45
|
void cumo_na_release_lock(VALUE); // currently do nothing
|
80
46
|
|
81
47
|
// used in reduce methods
|
82
|
-
|
83
|
-
|
84
|
-
VALUE cumo_nary_reduce_dimension(int argc, VALUE *argv, int naryc, VALUE *naryv,
|
85
|
-
ndfunc_t *ndf, na_iter_func_t nan_iter);
|
48
|
+
VALUE cumo_na_reduce_dimension(int argc, VALUE *argv, int naryc, VALUE *naryv,
|
49
|
+
cumo_ndfunc_t *ndf, cumo_na_iter_func_t nan_iter);
|
86
50
|
|
87
|
-
|
88
|
-
|
89
|
-
VALUE cumo_nary_reduce_options(VALUE axes, VALUE *opts, int naryc, VALUE *naryv,
|
90
|
-
ndfunc_t *ndf);
|
51
|
+
VALUE cumo_na_reduce_options(VALUE axes, VALUE *opts, int naryc, VALUE *naryv,
|
52
|
+
cumo_ndfunc_t *ndf);
|
91
53
|
|
92
54
|
// ndloop
|
93
|
-
|
94
|
-
VALUE
|
95
|
-
|
96
|
-
VALUE
|
97
|
-
|
98
|
-
VALUE
|
99
|
-
|
100
|
-
VALUE
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
#define na_ndloop_with_index cumo_na_ndloop_with_index
|
111
|
-
VALUE cumo_na_ndloop_with_index(ndfunc_t *nf, int argc, ...);
|
112
|
-
|
113
|
-
#define na_info_str cumo_nary_info_str
|
114
|
-
VALUE cumo_nary_info_str(VALUE);
|
115
|
-
|
116
|
-
#define na_test_reduce cumo_nary_test_reduce
|
117
|
-
bool cumo_nary_test_reduce(VALUE reduce, int dim);
|
118
|
-
|
119
|
-
#define nary_step_array_index cumo_nary_step_array_index
|
120
|
-
void cumo_nary_step_array_index(VALUE self, size_t ary_size, size_t *plen, ssize_t *pbeg, ssize_t *pstep);
|
121
|
-
#define nary_step_sequence cumo_nary_step_sequence
|
122
|
-
void cumo_nary_step_sequence(VALUE self, size_t *plen, double *pbeg, double *pstep);
|
55
|
+
VALUE cumo_na_ndloop(cumo_ndfunc_t *nf, int argc, ...);
|
56
|
+
VALUE cumo_na_ndloop2(cumo_ndfunc_t *nf, VALUE args);
|
57
|
+
VALUE cumo_na_ndloop3(cumo_ndfunc_t *nf, void *ptr, int argc, ...);
|
58
|
+
VALUE cumo_na_ndloop4(cumo_ndfunc_t *nf, void *ptr, VALUE args);
|
59
|
+
|
60
|
+
VALUE cumo_na_ndloop_cast_narray_to_rarray(cumo_ndfunc_t *nf, VALUE nary, VALUE fmt);
|
61
|
+
VALUE cumo_na_ndloop_store_rarray(cumo_ndfunc_t *nf, VALUE nary, VALUE rary);
|
62
|
+
VALUE cumo_na_ndloop_store_rarray2(cumo_ndfunc_t *nf, VALUE nary, VALUE rary, VALUE opt);
|
63
|
+
VALUE cumo_na_ndloop_inspect(VALUE nary, cumo_na_text_func_t func, VALUE opt);
|
64
|
+
VALUE cumo_na_ndloop_with_index(cumo_ndfunc_t *nf, int argc, ...);
|
65
|
+
|
66
|
+
VALUE cumo_na_info_str(VALUE);
|
67
|
+
|
68
|
+
bool cumo_na_test_reduce(VALUE reduce, int dim);
|
69
|
+
|
70
|
+
void cumo_na_step_array_index(VALUE self, size_t ary_size, size_t *plen, ssize_t *pbeg, ssize_t *pstep);
|
71
|
+
void cumo_na_step_sequence(VALUE self, size_t *plen, double *pbeg, double *pstep);
|
123
72
|
|
124
73
|
// used in aref, aset
|
125
|
-
|
126
|
-
|
127
|
-
#define na_aref_main cumo_nary_aref_main
|
128
|
-
VALUE cumo_nary_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
|
74
|
+
int cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx);
|
75
|
+
VALUE cumo_na_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
|
129
76
|
|
130
77
|
// defined in array, used in math
|
131
|
-
#define na_ary_composition_dtype cumo_na_ary_composition_dtype
|
132
78
|
VALUE cumo_na_ary_composition_dtype(VALUE ary);
|
133
79
|
|
134
80
|
#include "ruby/version.h"
|