cumo 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/3rd_party/LICENSE.txt +60 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +13 -1
- data/LICENSE.txt +1 -62
- data/README.md +33 -29
- data/bench/cumo_bench.rb +47 -25
- data/bench/numo_bench.rb +27 -25
- data/docs/src-tree.md +16 -0
- data/ext/cumo/cuda/cublas.c +69 -219
- data/ext/cumo/cuda/memory_pool_impl.hpp +1 -0
- data/ext/cumo/cuda/runtime.c +2 -14
- data/ext/cumo/cumo.c +16 -16
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/include/cumo/cuda/cublas.h +6 -129
- data/ext/cumo/include/cumo/cuda/runtime.h +16 -0
- data/ext/cumo/include/cumo/indexer.h +46 -63
- data/ext/cumo/include/cumo/intern.h +58 -112
- data/ext/cumo/include/cumo/narray.h +214 -185
- data/ext/cumo/include/cumo/narray_kernel.h +66 -37
- data/ext/cumo/include/cumo/ndloop.h +42 -42
- data/ext/cumo/include/cumo/reduce_kernel.h +55 -71
- data/ext/cumo/include/cumo/template.h +56 -51
- data/ext/cumo/include/cumo/template_kernel.h +31 -31
- data/ext/cumo/include/cumo/types/bit.h +3 -3
- data/ext/cumo/include/cumo/types/bit_kernel.h +2 -2
- data/ext/cumo/include/cumo/types/complex.h +126 -126
- data/ext/cumo/include/cumo/types/complex_kernel.h +126 -126
- data/ext/cumo/include/cumo/types/complex_macro.h +28 -28
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +20 -20
- data/ext/cumo/include/cumo/types/dcomplex.h +5 -5
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +1 -1
- data/ext/cumo/include/cumo/types/int_macro.h +1 -1
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +1 -1
- data/ext/cumo/include/cumo/types/robj_macro.h +30 -30
- data/ext/cumo/include/cumo/types/scomplex.h +5 -5
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +1 -1
- data/ext/cumo/narray/array.c +143 -143
- data/ext/cumo/narray/data.c +184 -184
- data/ext/cumo/narray/gen/cogen.rb +5 -2
- data/ext/cumo/narray/gen/cogen_kernel.rb +5 -2
- data/ext/cumo/narray/gen/def/dcomplex.rb +1 -1
- data/ext/cumo/narray/gen/def/scomplex.rb +1 -1
- data/ext/cumo/narray/gen/erbln.rb +132 -0
- data/ext/cumo/narray/gen/erbpp2.rb +18 -13
- data/ext/cumo/narray/gen/narray_def.rb +3 -3
- data/ext/cumo/narray/gen/spec.rb +2 -2
- data/ext/cumo/narray/gen/tmpl/accum.c +15 -15
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +22 -22
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/accum_index.c +30 -30
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +2 -2
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +14 -14
- data/ext/cumo/narray/gen/tmpl/allocate.c +11 -11
- data/ext/cumo/narray/gen/tmpl/aref.c +2 -2
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +4 -4
- data/ext/cumo/narray/gen/tmpl/aset.c +2 -2
- data/ext/cumo/narray/gen/tmpl/binary.c +28 -28
- data/ext/cumo/narray/gen/tmpl/binary2.c +18 -18
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/binary_s.c +13 -13
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/bincount.c +23 -23
- data/ext/cumo/narray/gen/tmpl/cast.c +7 -7
- data/ext/cumo/narray/gen/tmpl/cast_array.c +3 -3
- data/ext/cumo/narray/gen/tmpl/clip.c +38 -38
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +2 -2
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +19 -19
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +7 -7
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +15 -15
- data/ext/cumo/narray/gen/tmpl/cum.c +15 -15
- data/ext/cumo/narray/gen/tmpl/each.c +9 -9
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +9 -9
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +15 -15
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +5 -5
- data/ext/cumo/narray/gen/tmpl/extract_data.c +12 -12
- data/ext/cumo/narray/gen/tmpl/eye.c +9 -9
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/fill.c +9 -9
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +1 -1
- data/ext/cumo/narray/gen/tmpl/format.c +11 -11
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +8 -8
- data/ext/cumo/narray/gen/tmpl/frexp.c +13 -13
- data/ext/cumo/narray/gen/tmpl/gemm.c +252 -108
- data/ext/cumo/narray/gen/tmpl/inspect.c +1 -1
- data/ext/cumo/narray/gen/tmpl/lib.c +2 -2
- data/ext/cumo/narray/gen/tmpl/logseq.c +7 -7
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +17 -17
- data/ext/cumo/narray/gen/tmpl/median.c +10 -10
- data/ext/cumo/narray/gen/tmpl/minmax.c +10 -10
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +3 -3
- data/ext/cumo/narray/gen/tmpl/poly.c +6 -6
- data/ext/cumo/narray/gen/tmpl/pow.c +28 -28
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/rand.c +10 -10
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +7 -7
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/seq.c +7 -7
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/set2.c +20 -20
- data/ext/cumo/narray/gen/tmpl/sort.c +11 -11
- data/ext/cumo/narray/gen/tmpl/sort_index.c +18 -18
- data/ext/cumo/narray/gen/tmpl/store.c +6 -6
- data/ext/cumo/narray/gen/tmpl/store_array.c +19 -19
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/store_bit.c +23 -23
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +28 -28
- data/ext/cumo/narray/gen/tmpl/store_from.c +16 -16
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/to_a.c +10 -10
- data/ext/cumo/narray/gen/tmpl/unary.c +25 -25
- data/ext/cumo/narray/gen/tmpl/unary2.c +17 -17
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +15 -15
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +13 -13
- data/ext/cumo/narray/gen/tmpl/unary_s.c +17 -17
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +9 -9
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +5 -5
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +29 -29
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +14 -14
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +21 -21
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +28 -28
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +29 -29
- data/ext/cumo/narray/gen/tmpl_bit/each.c +10 -10
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +10 -10
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +8 -8
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +8 -8
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +17 -17
- data/ext/cumo/narray/gen/tmpl_bit/format.c +14 -14
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +11 -11
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +3 -3
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +33 -33
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +19 -19
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +22 -22
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +18 -18
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +12 -12
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +24 -24
- data/ext/cumo/narray/gen/tmpl_bit/where.c +16 -16
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +20 -20
- data/ext/cumo/narray/index.c +213 -213
- data/ext/cumo/narray/math.c +27 -27
- data/ext/cumo/narray/narray.c +484 -484
- data/ext/cumo/narray/ndloop.c +259 -258
- data/ext/cumo/narray/rand.c +3 -3
- data/ext/cumo/narray/step.c +70 -70
- data/ext/cumo/narray/struct.c +139 -139
- metadata +6 -7
- data/ext/cumo/include/cumo/intern_fwd.h +0 -38
- data/lib/erbpp.rb +0 -294
- data/lib/erbpp/line_number.rb +0 -137
- data/lib/erbpp/narray_def.rb +0 -381
@@ -1,146 +1,151 @@
|
|
1
1
|
#ifndef CUMO_TEMPLATE_H
|
2
2
|
#define CUMO_TEMPLATE_H
|
3
3
|
|
4
|
-
#define
|
4
|
+
#define CUMO_INIT_COUNTER( lp, c ) \
|
5
5
|
{ c = (lp)->n[0]; }
|
6
6
|
|
7
|
-
#define
|
8
|
-
#define
|
9
|
-
#define
|
10
|
-
#define
|
11
|
-
#define
|
12
|
-
#define
|
7
|
+
#define CUMO_NDL_CNT(lp) ((lp)->n[0])
|
8
|
+
#define CUMO_NDL_ARG(lp,i) ((lp)->args[i])
|
9
|
+
#define CUMO_NDL_PTR(lp,i) ((lp)->args[i].ptr + (lp)->args[i].iter[0].pos)
|
10
|
+
#define CUMO_NDL_STEP(lp,i) ((lp)->args[i].iter[0].step)
|
11
|
+
#define CUMO_NDL_IDX(lp,i) ((lp)->args[i].iter[0].idx)
|
12
|
+
#define CUMO_NDL_ESZ(lp,i) ((lp)->args[i].elmsz)
|
13
|
+
#define CUMO_NDL_SHAPE(lp,i) ((lp)->args[i].shape)
|
13
14
|
|
14
|
-
#define
|
15
|
+
#define CUMO_NDL_ARG_STEP(arg,idim) ((arg).iter[idim].step)
|
16
|
+
#define CUMO_NDL_ARG_IDX(arg,idim) ((arg).iter[idim].idx)
|
17
|
+
#define CUMO_NDL_ARG_SHAPE(arg,idim) ((arg).shape[idim])
|
18
|
+
|
19
|
+
#define CUMO_INIT_PTR( lp, i, pt, st ) \
|
15
20
|
{ \
|
16
21
|
pt = ((lp)->args[i]).ptr + ((lp)->args[i].iter[0]).pos; \
|
17
22
|
st = ((lp)->args[i].iter[0]).step; \
|
18
23
|
}
|
19
24
|
|
20
|
-
#define
|
25
|
+
#define CUMO_INIT_PTR_IDX( lp, i, pt, st, id ) \
|
21
26
|
{ \
|
22
27
|
pt = ((lp)->args[i]).ptr + ((lp)->args[i].iter[0]).pos; \
|
23
28
|
st = ((lp)->args[i].iter[0]).step; \
|
24
29
|
id = ((lp)->args[i].iter[0]).idx; \
|
25
30
|
}
|
26
31
|
|
27
|
-
#define
|
32
|
+
#define CUMO_INIT_ELMSIZE( lp, i, es ) \
|
28
33
|
{ \
|
29
34
|
es = ((lp)->args[i]).elmsz; \
|
30
35
|
}
|
31
36
|
|
32
|
-
#define
|
37
|
+
#define CUMO_INIT_PTR_BIT( lp, i, ad, ps, st ) \
|
33
38
|
{ \
|
34
39
|
ps = ((lp)->args[i].iter[0]).pos; \
|
35
|
-
ad = (
|
36
|
-
ps %=
|
40
|
+
ad = (CUMO_BIT_DIGIT*)(((lp)->args[i]).ptr) + ps/CUMO_NB; \
|
41
|
+
ps %= CUMO_NB; \
|
37
42
|
st = ((lp)->args[i].iter[0]).step; \
|
38
43
|
}
|
39
44
|
|
40
|
-
#define
|
45
|
+
#define CUMO_INIT_PTR_BIT_IDX( lp, i, ad, ps, st, id ) \
|
41
46
|
{ \
|
42
47
|
ps = ((lp)->args[i].iter[0]).pos; \
|
43
|
-
ad = (
|
44
|
-
ps %=
|
48
|
+
ad = (CUMO_BIT_DIGIT*)(((lp)->args[i]).ptr) + ps/CUMO_NB; \
|
49
|
+
ps %= CUMO_NB; \
|
45
50
|
st = ((lp)->args[i].iter[0]).step; \
|
46
51
|
id = ((lp)->args[i].iter[0]).idx; \
|
47
52
|
}
|
48
53
|
|
49
|
-
#define
|
54
|
+
#define CUMO_GET_DATA( ptr, type, val ) \
|
50
55
|
{ \
|
51
56
|
val = *(type*)(ptr); \
|
52
57
|
}
|
53
58
|
|
54
|
-
#define
|
59
|
+
#define CUMO_SET_DATA( ptr, type, val ) \
|
55
60
|
{ \
|
56
61
|
*(type*)(ptr) = val; \
|
57
62
|
}
|
58
63
|
|
59
|
-
#define
|
64
|
+
#define CUMO_GET_DATA_STRIDE( ptr, step, type, val ) \
|
60
65
|
{ \
|
61
66
|
val = *(type*)(ptr); \
|
62
67
|
ptr += step; \
|
63
68
|
}
|
64
69
|
|
65
|
-
#define
|
70
|
+
#define CUMO_GET_DATA_INDEX( ptr, idx, type, val ) \
|
66
71
|
{ \
|
67
72
|
val = *(type*)(ptr + *idx); \
|
68
73
|
idx++; \
|
69
74
|
}
|
70
75
|
|
71
|
-
#define
|
76
|
+
#define CUMO_SET_DATA_STRIDE( ptr, step, type, val ) \
|
72
77
|
{ \
|
73
78
|
*(type*)(ptr) = val; \
|
74
79
|
ptr += step; \
|
75
80
|
}
|
76
81
|
|
77
|
-
#define
|
82
|
+
#define CUMO_SET_DATA_INDEX( ptr, idx, type, val ) \
|
78
83
|
{ \
|
79
84
|
*(type*)(ptr + *idx) = val; \
|
80
85
|
idx++; \
|
81
86
|
}
|
82
87
|
|
83
|
-
#define
|
88
|
+
#define CUMO_LOAD_BIT( adr, pos, val ) \
|
84
89
|
{ \
|
85
|
-
size_t dig = (pos) /
|
86
|
-
int bit = (pos) %
|
87
|
-
val = (((
|
90
|
+
size_t dig = (pos) / CUMO_NB; \
|
91
|
+
int bit = (pos) % CUMO_NB; \
|
92
|
+
val = (((CUMO_BIT_DIGIT*)(adr))[dig]>>(bit)) & 1u; \
|
88
93
|
}
|
89
94
|
|
90
|
-
#define
|
95
|
+
#define CUMO_LOAD_BIT_STEP( adr, pos, step, idx, val ) \
|
91
96
|
{ \
|
92
97
|
size_t dig; int bit; \
|
93
98
|
if (idx) { \
|
94
|
-
dig = ((pos) + *(idx)) /
|
95
|
-
bit = ((pos) + *(idx)) %
|
99
|
+
dig = ((pos) + *(idx)) / CUMO_NB; \
|
100
|
+
bit = ((pos) + *(idx)) % CUMO_NB; \
|
96
101
|
idx++; \
|
97
102
|
} else { \
|
98
|
-
dig = (pos) /
|
99
|
-
bit = (pos) %
|
103
|
+
dig = (pos) / CUMO_NB; \
|
104
|
+
bit = (pos) % CUMO_NB; \
|
100
105
|
pos += step; \
|
101
106
|
} \
|
102
|
-
val = (((
|
107
|
+
val = (((CUMO_BIT_DIGIT*)(adr))[dig]>>bit) & 1u; \
|
103
108
|
}
|
104
109
|
|
105
|
-
#define
|
110
|
+
#define CUMO_STORE_BIT(adr,pos,val) \
|
106
111
|
{ \
|
107
|
-
size_t dig = (pos) /
|
108
|
-
int bit = (pos) %
|
109
|
-
((
|
110
|
-
(((
|
112
|
+
size_t dig = (pos) / CUMO_NB; \
|
113
|
+
int bit = (pos) % CUMO_NB; \
|
114
|
+
((CUMO_BIT_DIGIT*)(adr))[dig] = \
|
115
|
+
(((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | ((val)<<(bit)); \
|
111
116
|
}
|
112
117
|
// val -> val&1 ??
|
113
118
|
|
114
|
-
#define
|
119
|
+
#define CUMO_STORE_BIT_STEP( adr, pos, step, idx, val )\
|
115
120
|
{ \
|
116
121
|
size_t dig; int bit; \
|
117
122
|
if (idx) { \
|
118
|
-
dig = ((pos) + *(idx)) /
|
119
|
-
bit = ((pos) + *(idx)) %
|
123
|
+
dig = ((pos) + *(idx)) / CUMO_NB; \
|
124
|
+
bit = ((pos) + *(idx)) % CUMO_NB; \
|
120
125
|
idx++; \
|
121
126
|
} else { \
|
122
|
-
dig = (pos) /
|
123
|
-
bit = (pos) %
|
127
|
+
dig = (pos) / CUMO_NB; \
|
128
|
+
bit = (pos) % CUMO_NB; \
|
124
129
|
pos += step; \
|
125
130
|
} \
|
126
|
-
((
|
127
|
-
(((
|
131
|
+
((CUMO_BIT_DIGIT*)(adr))[dig] = \
|
132
|
+
(((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | ((val)<<(bit)); \
|
128
133
|
}
|
129
134
|
// val -> val&1 ??
|
130
135
|
|
131
136
|
static inline int
|
132
|
-
|
137
|
+
cumo_is_aligned(const void *ptr, const size_t alignment)
|
133
138
|
{
|
134
139
|
return ((size_t)(ptr) & ((alignment)-1)) == 0;
|
135
140
|
}
|
136
141
|
|
137
142
|
static inline int
|
138
|
-
|
143
|
+
cumo_is_aligned_step(const ssize_t step, const size_t alignment)
|
139
144
|
{
|
140
145
|
return ((step) & ((alignment)-1)) == 0;
|
141
146
|
}
|
142
147
|
|
143
|
-
#define
|
148
|
+
#define CUMO_SHOW_WARNING_ONCE( c_str ) \
|
144
149
|
{ \
|
145
150
|
static bool show_warning = true; \
|
146
151
|
if (show_warning) { \
|
@@ -149,10 +154,10 @@ is_aligned_step(const ssize_t step, const size_t alignment)
|
|
149
154
|
} \
|
150
155
|
}
|
151
156
|
|
152
|
-
#define
|
153
|
-
|
157
|
+
#define CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE( func_name, type_name ) \
|
158
|
+
CUMO_SHOW_WARNING_ONCE("Warning: FIXME: Method \"" func_name "\" for dtype \"" type_name "\" synchronizes with CPU.\n")
|
154
159
|
|
155
|
-
#define
|
156
|
-
|
160
|
+
#define CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE( func_name, type_name ) \
|
161
|
+
CUMO_SHOW_WARNING_ONCE("Warning: Method \"" func_name "\" for dtype \"" type_name "\" synchronizes with CPU.\n")
|
157
162
|
|
158
163
|
#endif /* ifndef CUMO_TEMPLATE_H */
|
@@ -1,76 +1,76 @@
|
|
1
1
|
#ifndef CUMO_TEMPLATE_KERNEL_H
|
2
2
|
#define CUMO_TEMPLATE_KERNEL_H
|
3
3
|
|
4
|
-
#define
|
4
|
+
#define CUMO_LOAD_BIT( adr, pos, val ) \
|
5
5
|
{ \
|
6
|
-
size_t dig = (size_t)(pos) /
|
7
|
-
int bit = (size_t)(pos) %
|
8
|
-
val = (((
|
6
|
+
size_t dig = (size_t)(pos) / CUMO_NB; \
|
7
|
+
int bit = (size_t)(pos) % CUMO_NB; \
|
8
|
+
val = (((CUMO_BIT_DIGIT*)(adr))[dig]>>(bit)) & 1u; \
|
9
9
|
}
|
10
10
|
|
11
|
-
#define
|
11
|
+
#define CUMO_LOAD_BIT_STEP( adr, pos, step, idx, val ) \
|
12
12
|
{ \
|
13
13
|
size_t dig; int bit; \
|
14
14
|
if (idx) { \
|
15
|
-
dig = (size_t)((pos) + *(idx)) /
|
16
|
-
bit = (size_t)((pos) + *(idx)) %
|
15
|
+
dig = (size_t)((pos) + *(idx)) / CUMO_NB; \
|
16
|
+
bit = (size_t)((pos) + *(idx)) % CUMO_NB; \
|
17
17
|
idx++; \
|
18
18
|
} else { \
|
19
|
-
dig = (size_t)(pos) /
|
20
|
-
bit = (size_t)(pos) %
|
19
|
+
dig = (size_t)(pos) / CUMO_NB; \
|
20
|
+
bit = (size_t)(pos) % CUMO_NB; \
|
21
21
|
pos += step; \
|
22
22
|
} \
|
23
|
-
val = (((
|
23
|
+
val = (((CUMO_BIT_DIGIT*)(adr))[dig]>>bit) & 1u; \
|
24
24
|
}
|
25
25
|
|
26
|
-
#define
|
26
|
+
#define CUMO_STORE_BIT(adr,pos,val) \
|
27
27
|
{ \
|
28
|
-
size_t dig = (size_t)(pos) /
|
29
|
-
int bit = (size_t)(pos) %
|
28
|
+
size_t dig = (size_t)(pos) / CUMO_NB; \
|
29
|
+
int bit = (size_t)(pos) % CUMO_NB; \
|
30
30
|
if (val) { \
|
31
|
-
atomicOr((
|
31
|
+
atomicOr((CUMO_BIT_DIGIT*)(adr) + (dig), (val)<<(bit)); \
|
32
32
|
} else { \
|
33
|
-
atomicAnd((
|
33
|
+
atomicAnd((CUMO_BIT_DIGIT*)(adr) + (dig), ~(1u<<(bit))); \
|
34
34
|
} \
|
35
35
|
}
|
36
36
|
// val -> val&1 ??
|
37
37
|
|
38
|
-
#define
|
38
|
+
#define CUMO_STORE_BIT_STEP( adr, pos, step, idx, val ) \
|
39
39
|
{ \
|
40
40
|
size_t dig; int bit; \
|
41
41
|
if (idx) { \
|
42
|
-
dig = (size_t)((pos) + *(idx)) /
|
43
|
-
bit = (size_t)((pos) + *(idx)) %
|
42
|
+
dig = (size_t)((pos) + *(idx)) / CUMO_NB; \
|
43
|
+
bit = (size_t)((pos) + *(idx)) % CUMO_NB; \
|
44
44
|
idx++; \
|
45
45
|
} else { \
|
46
|
-
dig = (size_t)(pos) /
|
47
|
-
bit = (size_t)(pos) %
|
46
|
+
dig = (size_t)(pos) / CUMO_NB; \
|
47
|
+
bit = (size_t)(pos) % CUMO_NB; \
|
48
48
|
pos += step; \
|
49
49
|
} \
|
50
50
|
if (val) { \
|
51
|
-
atomicOr((
|
51
|
+
atomicOr((CUMO_BIT_DIGIT*)(adr) + (dig), (val)<<(bit)); \
|
52
52
|
} else { \
|
53
|
-
atomicAnd((
|
53
|
+
atomicAnd((CUMO_BIT_DIGIT*)(adr) + (dig), ~((1u)<<(bit))); \
|
54
54
|
} \
|
55
55
|
}
|
56
56
|
// val -> val&1 ??
|
57
57
|
|
58
|
-
#define
|
59
|
-
#define
|
58
|
+
#define CUMO_MAX_BLOCK_DIM 128
|
59
|
+
#define CUMO_MAX_GRID_DIM 2147483647 // ref. http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities
|
60
60
|
|
61
61
|
static inline size_t
|
62
|
-
|
62
|
+
cumo_get_grid_dim(size_t n)
|
63
63
|
{
|
64
|
-
size_t
|
65
|
-
if (
|
66
|
-
return
|
64
|
+
size_t grid_dim = (n / CUMO_MAX_BLOCK_DIM) + 1;
|
65
|
+
if (grid_dim > CUMO_MAX_GRID_DIM) grid_dim = CUMO_MAX_GRID_DIM;
|
66
|
+
return grid_dim;
|
67
67
|
}
|
68
68
|
|
69
69
|
static inline size_t
|
70
|
-
|
70
|
+
cumo_get_block_dim(size_t n)
|
71
71
|
{
|
72
|
-
size_t
|
73
|
-
return
|
72
|
+
size_t block_dim = (n > CUMO_MAX_BLOCK_DIM) ? CUMO_MAX_BLOCK_DIM : n;
|
73
|
+
return block_dim;
|
74
74
|
}
|
75
75
|
|
76
76
|
|
@@ -1,5 +1,5 @@
|
|
1
|
-
typedef
|
2
|
-
typedef
|
1
|
+
typedef CUMO_BIT_DIGIT dtype;
|
2
|
+
typedef CUMO_BIT_DIGIT rtype;
|
3
3
|
#define cT cumo_cBit
|
4
4
|
#define cRT cT
|
5
5
|
|
@@ -30,7 +30,7 @@ typedef BIT_DIGIT rtype;
|
|
30
30
|
#define m_count_false(x) ((x)==0)
|
31
31
|
#define m_count_false_cpu(x) ((x)==0)
|
32
32
|
|
33
|
-
static inline
|
33
|
+
static inline CUMO_BIT_DIGIT m_num_to_data(VALUE num) {
|
34
34
|
if (RTEST(num)) {
|
35
35
|
if (!RTEST(rb_equal(num,INT2FIX(0)))) {
|
36
36
|
return 1;
|
@@ -1,165 +1,165 @@
|
|
1
1
|
static inline dtype c_new(rtype r, rtype i) {
|
2
2
|
dtype z;
|
3
|
-
|
4
|
-
|
3
|
+
CUMO_REAL(z) = r;
|
4
|
+
CUMO_IMAG(z) = i;
|
5
5
|
return z;
|
6
6
|
}
|
7
7
|
|
8
8
|
static inline dtype c_set_real(dtype x, rtype r) {
|
9
|
-
|
9
|
+
CUMO_REAL(x)=r;
|
10
10
|
return x;
|
11
11
|
}
|
12
12
|
|
13
13
|
static inline dtype c_set_imag(dtype x, rtype i) {
|
14
|
-
|
14
|
+
CUMO_IMAG(x)=i;
|
15
15
|
return x;
|
16
16
|
}
|
17
17
|
|
18
18
|
static inline VALUE COMP2NUM(dtype x) {
|
19
19
|
VALUE v;
|
20
20
|
v = rb_funcall(rb_intern("Kernel"), rb_intern("Complex"), 2,
|
21
|
-
rb_float_new(
|
21
|
+
rb_float_new(CUMO_REAL(x)), rb_float_new(CUMO_IMAG(x)));
|
22
22
|
return v;
|
23
23
|
}
|
24
24
|
|
25
25
|
static inline dtype NUM2COMP(VALUE v) {
|
26
26
|
dtype z;
|
27
|
-
|
28
|
-
|
27
|
+
CUMO_REAL(z) = NUM2DBL(rb_funcall(v,cumo_id_real,0));
|
28
|
+
CUMO_IMAG(z) = NUM2DBL(rb_funcall(v,cumo_id_imag,0));
|
29
29
|
return z;
|
30
30
|
}
|
31
31
|
|
32
|
-
#define c_is_zero(x) (
|
33
|
-
#define c_eq(x,y) (
|
34
|
-
#define c_ne(x,y) (
|
35
|
-
#define c_isnan(x) (isnan(
|
36
|
-
#define c_isinf(x) (isinf(
|
37
|
-
#define c_isposinf(x) ((isinf(
|
38
|
-
(isinf(
|
39
|
-
#define c_isneginf(x) ((isinf(
|
40
|
-
(isinf(
|
41
|
-
#define c_isfinite(x) (isfinite(
|
32
|
+
#define c_is_zero(x) (CUMO_REAL(x)==0 && CUMO_IMAG(x)==0)
|
33
|
+
#define c_eq(x,y) (CUMO_REAL(x)==CUMO_REAL(y) && CUMO_IMAG(x)==CUMO_IMAG(y))
|
34
|
+
#define c_ne(x,y) (CUMO_REAL(x)!=CUMO_REAL(y) || CUMO_IMAG(x)!=CUMO_IMAG(y))
|
35
|
+
#define c_isnan(x) (isnan(CUMO_REAL(x)) || isnan(CUMO_IMAG(x)))
|
36
|
+
#define c_isinf(x) (isinf(CUMO_REAL(x)) || isinf(CUMO_IMAG(x)))
|
37
|
+
#define c_isposinf(x) ((isinf(CUMO_REAL(x)) && signbit(CUMO_REAL(x))==0) || \
|
38
|
+
(isinf(CUMO_IMAG(x)) && signbit(CUMO_IMAG(x))==0))
|
39
|
+
#define c_isneginf(x) ((isinf(CUMO_REAL(x)) && signbit(CUMO_REAL(x))) || \
|
40
|
+
(isinf(CUMO_IMAG(x)) && signbit(CUMO_IMAG(x))))
|
41
|
+
#define c_isfinite(x) (isfinite(CUMO_REAL(x)) && isfinite(CUMO_IMAG(x)))
|
42
42
|
|
43
43
|
static inline dtype c_zero() {
|
44
44
|
dtype z;
|
45
|
-
|
46
|
-
|
45
|
+
CUMO_REAL(z) = 0;
|
46
|
+
CUMO_IMAG(z) = 0;
|
47
47
|
return z;
|
48
48
|
}
|
49
49
|
|
50
50
|
static inline dtype c_one() {
|
51
51
|
dtype z;
|
52
|
-
|
53
|
-
|
52
|
+
CUMO_REAL(z) = 1;
|
53
|
+
CUMO_IMAG(z) = 0;
|
54
54
|
return z;
|
55
55
|
}
|
56
56
|
|
57
57
|
static inline dtype c_minus(dtype x) {
|
58
58
|
dtype z;
|
59
|
-
|
60
|
-
|
59
|
+
CUMO_REAL(z) = -CUMO_REAL(x);
|
60
|
+
CUMO_IMAG(z) = -CUMO_IMAG(x);
|
61
61
|
return z;
|
62
62
|
}
|
63
63
|
|
64
64
|
static inline dtype c_im(dtype x) {
|
65
65
|
dtype z;
|
66
|
-
|
67
|
-
|
66
|
+
CUMO_REAL(z) = -CUMO_IMAG(x);
|
67
|
+
CUMO_IMAG(z) = CUMO_REAL(x);
|
68
68
|
return z;
|
69
69
|
}
|
70
70
|
|
71
71
|
static inline dtype c_add(dtype x, dtype y) {
|
72
72
|
dtype z;
|
73
|
-
|
74
|
-
|
73
|
+
CUMO_REAL(z) = CUMO_REAL(x)+CUMO_REAL(y);
|
74
|
+
CUMO_IMAG(z) = CUMO_IMAG(x)+CUMO_IMAG(y);
|
75
75
|
return z;
|
76
76
|
}
|
77
77
|
|
78
78
|
static inline dtype c_sub(dtype x, dtype y) {
|
79
79
|
dtype z;
|
80
|
-
|
81
|
-
|
80
|
+
CUMO_REAL(z) = CUMO_REAL(x)-CUMO_REAL(y);
|
81
|
+
CUMO_IMAG(z) = CUMO_IMAG(x)-CUMO_IMAG(y);
|
82
82
|
return z;
|
83
83
|
}
|
84
84
|
|
85
85
|
|
86
86
|
static inline dtype c_mul(dtype x, dtype y) {
|
87
87
|
dtype z;
|
88
|
-
|
89
|
-
|
88
|
+
CUMO_REAL(z) = CUMO_REAL(x)*CUMO_REAL(y)-CUMO_IMAG(x)*CUMO_IMAG(y);
|
89
|
+
CUMO_IMAG(z) = CUMO_REAL(x)*CUMO_IMAG(y)+CUMO_IMAG(x)*CUMO_REAL(y);
|
90
90
|
return z;
|
91
91
|
}
|
92
92
|
|
93
93
|
static inline dtype c_mul_r(dtype x, rtype y) {
|
94
94
|
dtype z;
|
95
|
-
|
96
|
-
|
95
|
+
CUMO_REAL(z) = CUMO_REAL(x)*y;
|
96
|
+
CUMO_IMAG(z) = CUMO_IMAG(x)*y;
|
97
97
|
return z;
|
98
98
|
}
|
99
99
|
|
100
100
|
static inline dtype c_div(dtype x, dtype y) {
|
101
101
|
dtype z;
|
102
102
|
rtype s,yr,yi;
|
103
|
-
s = r_hypot(
|
104
|
-
yr =
|
105
|
-
yi =
|
106
|
-
|
107
|
-
|
103
|
+
s = r_hypot(CUMO_REAL(y),CUMO_IMAG(y));
|
104
|
+
yr = CUMO_REAL(y)/s;
|
105
|
+
yi = CUMO_IMAG(y)/s;
|
106
|
+
CUMO_REAL(z) = (CUMO_REAL(x)*yr+CUMO_IMAG(x)*yi)/s;
|
107
|
+
CUMO_IMAG(z) = (CUMO_IMAG(x)*yr-CUMO_REAL(x)*yi)/s;
|
108
108
|
return z;
|
109
109
|
}
|
110
110
|
|
111
111
|
static inline dtype c_div_r(dtype x, rtype y) {
|
112
112
|
dtype z;
|
113
|
-
|
114
|
-
|
113
|
+
CUMO_REAL(z) = CUMO_REAL(x)/y;
|
114
|
+
CUMO_IMAG(z) = CUMO_IMAG(x)/y;
|
115
115
|
return z;
|
116
116
|
}
|
117
117
|
|
118
118
|
static inline dtype c_reciprocal(dtype x) {
|
119
119
|
dtype z;
|
120
|
-
if ( r_abs(
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
120
|
+
if ( r_abs(CUMO_REAL(x)) > r_abs(CUMO_IMAG(x)) ) {
|
121
|
+
CUMO_IMAG(z) = CUMO_IMAG(x)/CUMO_REAL(x);
|
122
|
+
CUMO_REAL(z) = (1+CUMO_IMAG(z)*CUMO_IMAG(z))*CUMO_REAL(x);
|
123
|
+
CUMO_IMAG(z) /= -CUMO_REAL(z);
|
124
|
+
CUMO_REAL(z) = 1/CUMO_REAL(z);
|
125
125
|
} else {
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
126
|
+
CUMO_REAL(z) = CUMO_REAL(x)/CUMO_IMAG(x);
|
127
|
+
CUMO_IMAG(z) = (1+CUMO_REAL(z)*CUMO_REAL(z))*CUMO_IMAG(x);
|
128
|
+
CUMO_REAL(z) /= CUMO_IMAG(z);
|
129
|
+
CUMO_IMAG(z) = -1/CUMO_IMAG(z);
|
130
130
|
}
|
131
131
|
return z;
|
132
132
|
}
|
133
133
|
|
134
134
|
static inline dtype c_square(dtype x) {
|
135
135
|
dtype z;
|
136
|
-
|
137
|
-
|
136
|
+
CUMO_REAL(z) = CUMO_REAL(x)*CUMO_REAL(x)-CUMO_IMAG(x)*CUMO_IMAG(x);
|
137
|
+
CUMO_IMAG(z) = 2*CUMO_REAL(x)*CUMO_IMAG(x);
|
138
138
|
return z;
|
139
139
|
}
|
140
140
|
|
141
141
|
static inline dtype c_sqrt(dtype x) {
|
142
142
|
dtype z;
|
143
143
|
rtype xr, xi, r;
|
144
|
-
xr =
|
145
|
-
xi =
|
144
|
+
xr = CUMO_REAL(x)/2;
|
145
|
+
xi = CUMO_IMAG(x)/2;
|
146
146
|
r = r_hypot(xr,xi);
|
147
147
|
if (xr>0) {
|
148
|
-
|
149
|
-
|
148
|
+
CUMO_REAL(z) = sqrt(r+xr);
|
149
|
+
CUMO_IMAG(z) = xi/CUMO_REAL(z);
|
150
150
|
} else if ( (r-=xr)!=0 ) {
|
151
|
-
|
152
|
-
|
151
|
+
CUMO_IMAG(z) = (xi>=0) ? sqrt(r):-sqrt(r);
|
152
|
+
CUMO_REAL(z) = xi/CUMO_IMAG(z);
|
153
153
|
} else {
|
154
|
-
|
154
|
+
CUMO_REAL(z) = CUMO_IMAG(z) = 0;
|
155
155
|
}
|
156
156
|
return z;
|
157
157
|
}
|
158
158
|
|
159
159
|
static inline dtype c_log(dtype x) {
|
160
160
|
dtype z;
|
161
|
-
|
162
|
-
|
161
|
+
CUMO_REAL(z) = r_log(r_hypot(CUMO_REAL(x),CUMO_IMAG(x)));
|
162
|
+
CUMO_IMAG(z) = r_atan2(CUMO_IMAG(x),CUMO_REAL(x));
|
163
163
|
return z;
|
164
164
|
}
|
165
165
|
|
@@ -179,73 +179,73 @@ static inline dtype c_log10(dtype x) {
|
|
179
179
|
|
180
180
|
static inline dtype c_exp(dtype x) {
|
181
181
|
dtype z;
|
182
|
-
rtype a = r_exp(
|
183
|
-
|
184
|
-
|
182
|
+
rtype a = r_exp(CUMO_REAL(x));
|
183
|
+
CUMO_REAL(z) = a*r_cos(CUMO_IMAG(x));
|
184
|
+
CUMO_IMAG(z) = a*r_sin(CUMO_IMAG(x));
|
185
185
|
return z;
|
186
186
|
}
|
187
187
|
|
188
188
|
static inline dtype c_exp2(dtype x) {
|
189
189
|
dtype z;
|
190
|
-
rtype a = r_exp(
|
191
|
-
|
192
|
-
|
190
|
+
rtype a = r_exp(CUMO_REAL(x)*M_LN2);
|
191
|
+
CUMO_REAL(z) = a*r_cos(CUMO_IMAG(x));
|
192
|
+
CUMO_IMAG(z) = a*r_sin(CUMO_IMAG(x));
|
193
193
|
return z;
|
194
194
|
}
|
195
195
|
|
196
196
|
static inline dtype c_exp10(dtype x) {
|
197
197
|
dtype z;
|
198
|
-
rtype a = r_exp(
|
199
|
-
|
200
|
-
|
198
|
+
rtype a = r_exp(CUMO_REAL(x)*M_LN10);
|
199
|
+
CUMO_REAL(z) = a*r_cos(CUMO_IMAG(x));
|
200
|
+
CUMO_IMAG(z) = a*r_sin(CUMO_IMAG(x));
|
201
201
|
return z;
|
202
202
|
}
|
203
203
|
|
204
204
|
static inline dtype c_sin(dtype x) {
|
205
205
|
dtype z;
|
206
|
-
|
207
|
-
|
206
|
+
CUMO_REAL(z) = r_sin(CUMO_REAL(x))*r_cosh(CUMO_IMAG(x));
|
207
|
+
CUMO_IMAG(z) = r_cos(CUMO_REAL(x))*r_sinh(CUMO_IMAG(x));
|
208
208
|
return z;
|
209
209
|
}
|
210
210
|
|
211
211
|
static inline dtype c_sinh(dtype x) {
|
212
212
|
dtype z;
|
213
|
-
|
214
|
-
|
213
|
+
CUMO_REAL(z) = r_sinh(CUMO_REAL(x))*r_cos(CUMO_IMAG(x));
|
214
|
+
CUMO_IMAG(z) = r_cosh(CUMO_REAL(x))*r_sin(CUMO_IMAG(x));
|
215
215
|
return z;
|
216
216
|
}
|
217
217
|
|
218
218
|
static inline dtype c_cos(dtype x) {
|
219
219
|
dtype z;
|
220
|
-
|
221
|
-
|
220
|
+
CUMO_REAL(z) = r_cos(CUMO_REAL(x))*r_cosh(CUMO_IMAG(x));
|
221
|
+
CUMO_IMAG(z) = -r_sin(CUMO_REAL(x))*r_sinh(CUMO_IMAG(x));
|
222
222
|
return z;
|
223
223
|
}
|
224
224
|
|
225
225
|
static inline dtype c_cosh(dtype x) {
|
226
226
|
dtype z;
|
227
|
-
|
228
|
-
|
227
|
+
CUMO_REAL(z) = r_cosh(CUMO_REAL(x))*r_cos(CUMO_IMAG(x));
|
228
|
+
CUMO_IMAG(z) = r_sinh(CUMO_REAL(x))*r_sin(CUMO_IMAG(x));
|
229
229
|
return z;
|
230
230
|
}
|
231
231
|
|
232
232
|
static inline dtype c_tan(dtype x) {
|
233
233
|
dtype z;
|
234
234
|
rtype c, d;
|
235
|
-
if (r_abs(
|
236
|
-
c = r_cos(
|
237
|
-
d = r_sinh(
|
235
|
+
if (r_abs(CUMO_IMAG(x))<1) {
|
236
|
+
c = r_cos(CUMO_REAL(x));
|
237
|
+
d = r_sinh(CUMO_IMAG(x));
|
238
238
|
d = c*c + d*d;
|
239
|
-
|
240
|
-
|
239
|
+
CUMO_REAL(z) = 0.5*r_sin(2*CUMO_REAL(x))/d;
|
240
|
+
CUMO_IMAG(z) = 0.5*r_sinh(2*CUMO_IMAG(x))/d;
|
241
241
|
} else {
|
242
|
-
d = r_exp(-
|
242
|
+
d = r_exp(-CUMO_IMAG(x));
|
243
243
|
c = 2*d/(1-d*d);
|
244
244
|
c = c*c;
|
245
|
-
d = r_cos(
|
245
|
+
d = r_cos(CUMO_REAL(x));
|
246
246
|
d = 1.0 + d*d*c;
|
247
|
-
|
248
|
-
|
247
|
+
CUMO_REAL(z) = 0.5*r_sin(2*CUMO_REAL(x))*c/d;
|
248
|
+
CUMO_IMAG(z) = 1/r_tanh(CUMO_IMAG(x))/d;
|
249
249
|
}
|
250
250
|
return z;
|
251
251
|
}
|
@@ -253,17 +253,17 @@ static inline dtype c_tan(dtype x) {
|
|
253
253
|
static inline dtype c_tanh(dtype x) {
|
254
254
|
dtype z;
|
255
255
|
rtype c, d, s;
|
256
|
-
c = r_cos(
|
257
|
-
s = r_sinh(
|
256
|
+
c = r_cos(CUMO_IMAG(x));
|
257
|
+
s = r_sinh(CUMO_REAL(x));
|
258
258
|
d = c*c + s*s;
|
259
|
-
if (r_abs(
|
260
|
-
|
261
|
-
|
259
|
+
if (r_abs(CUMO_REAL(x))<1) {
|
260
|
+
CUMO_REAL(z) = s*r_cosh(CUMO_REAL(x))/d;
|
261
|
+
CUMO_IMAG(z) = 0.5*r_sin(2*CUMO_IMAG(x))/d;
|
262
262
|
} else {
|
263
263
|
c = c / s;
|
264
264
|
c = 1 + c*c;
|
265
|
-
|
266
|
-
|
265
|
+
CUMO_REAL(z) = 1/(r_tanh(CUMO_REAL(x))*c);
|
266
|
+
CUMO_IMAG(z) = 0.5*r_sin(2*CUMO_IMAG(x))/d;
|
267
267
|
}
|
268
268
|
return z;
|
269
269
|
}
|
@@ -271,24 +271,24 @@ static inline dtype c_tanh(dtype x) {
|
|
271
271
|
static inline dtype c_asin(dtype x) {
|
272
272
|
dtype z, y;
|
273
273
|
y = c_square(x);
|
274
|
-
|
275
|
-
|
274
|
+
CUMO_REAL(y) = 1-CUMO_REAL(y);
|
275
|
+
CUMO_IMAG(y) = -CUMO_IMAG(y);
|
276
276
|
y = c_sqrt(y);
|
277
|
-
|
278
|
-
|
277
|
+
CUMO_REAL(y) -= CUMO_IMAG(x);
|
278
|
+
CUMO_IMAG(y) += CUMO_REAL(x);
|
279
279
|
y = c_log(y);
|
280
|
-
|
281
|
-
|
280
|
+
CUMO_REAL(z) = CUMO_IMAG(y);
|
281
|
+
CUMO_IMAG(z) = -CUMO_REAL(y);
|
282
282
|
return z;
|
283
283
|
}
|
284
284
|
|
285
285
|
static inline dtype c_asinh(dtype x) {
|
286
286
|
dtype z, y;
|
287
287
|
y = c_square(x);
|
288
|
-
|
288
|
+
CUMO_REAL(y) += 1;
|
289
289
|
y = c_sqrt(y);
|
290
|
-
|
291
|
-
|
290
|
+
CUMO_REAL(y) += CUMO_REAL(x);
|
291
|
+
CUMO_IMAG(y) += CUMO_IMAG(x);
|
292
292
|
z = c_log(y);
|
293
293
|
return z;
|
294
294
|
}
|
@@ -296,51 +296,51 @@ static inline dtype c_asinh(dtype x) {
|
|
296
296
|
static inline dtype c_acos(dtype x) {
|
297
297
|
dtype z, y;
|
298
298
|
y = c_square(x);
|
299
|
-
|
300
|
-
|
299
|
+
CUMO_REAL(y) = 1-CUMO_REAL(y);
|
300
|
+
CUMO_IMAG(y) = -CUMO_IMAG(y);
|
301
301
|
y = c_sqrt(y);
|
302
|
-
|
303
|
-
|
302
|
+
CUMO_REAL(z) = CUMO_REAL(x)-CUMO_IMAG(y);
|
303
|
+
CUMO_IMAG(z) = CUMO_IMAG(x)+CUMO_REAL(y);
|
304
304
|
y = c_log(z);
|
305
|
-
|
306
|
-
|
305
|
+
CUMO_REAL(z) = CUMO_IMAG(y);
|
306
|
+
CUMO_IMAG(z) = -CUMO_REAL(y);
|
307
307
|
return z;
|
308
308
|
}
|
309
309
|
|
310
310
|
static inline dtype c_acosh(dtype x) {
|
311
311
|
dtype z, y;
|
312
312
|
y = c_square(x);
|
313
|
-
|
313
|
+
CUMO_REAL(y) -= 1;
|
314
314
|
y = c_sqrt(y);
|
315
|
-
|
316
|
-
|
315
|
+
CUMO_REAL(y) += CUMO_REAL(x);
|
316
|
+
CUMO_IMAG(y) += CUMO_IMAG(x);
|
317
317
|
z = c_log(y);
|
318
318
|
return z;
|
319
319
|
}
|
320
320
|
|
321
321
|
static inline dtype c_atan(dtype x) {
|
322
322
|
dtype z, y;
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
323
|
+
CUMO_REAL(y) = -CUMO_REAL(x);
|
324
|
+
CUMO_IMAG(y) = 1-CUMO_IMAG(x);
|
325
|
+
CUMO_REAL(z) = CUMO_REAL(x);
|
326
|
+
CUMO_IMAG(z) = 1+CUMO_IMAG(x);
|
327
327
|
y = c_div(z,y);
|
328
328
|
y = c_log(y);
|
329
|
-
|
330
|
-
|
329
|
+
CUMO_REAL(z) = -CUMO_IMAG(y)/2;
|
330
|
+
CUMO_IMAG(z) = CUMO_REAL(y)/2;
|
331
331
|
return z;
|
332
332
|
}
|
333
333
|
|
334
334
|
static inline dtype c_atanh(dtype x) {
|
335
335
|
dtype z, y;
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
336
|
+
CUMO_REAL(y) = 1-CUMO_REAL(x);
|
337
|
+
CUMO_IMAG(y) = -CUMO_IMAG(x);
|
338
|
+
CUMO_REAL(z) = 1+CUMO_REAL(x);
|
339
|
+
CUMO_IMAG(z) = CUMO_IMAG(x);
|
340
340
|
y = c_div(z,y);
|
341
341
|
y = c_log(y);
|
342
|
-
|
343
|
-
|
342
|
+
CUMO_REAL(z) = CUMO_REAL(y)/2;
|
343
|
+
CUMO_IMAG(z) = CUMO_IMAG(y)/2;
|
344
344
|
return z;
|
345
345
|
}
|
346
346
|
|
@@ -349,7 +349,7 @@ static inline dtype c_pow(dtype x, dtype y)
|
|
349
349
|
dtype z;
|
350
350
|
if (c_is_zero(y)) {
|
351
351
|
z = c_one();
|
352
|
-
} else if (c_is_zero(x) &&
|
352
|
+
} else if (c_is_zero(x) && CUMO_REAL(y)>0 && CUMO_IMAG(y)==0) {
|
353
353
|
z = c_zero();
|
354
354
|
} else {
|
355
355
|
z = c_log(x);
|
@@ -386,11 +386,11 @@ static inline dtype c_cbrt(dtype x) {
|
|
386
386
|
}
|
387
387
|
|
388
388
|
static inline rtype c_abs(dtype x) {
|
389
|
-
return r_hypot(
|
389
|
+
return r_hypot(CUMO_REAL(x),CUMO_IMAG(x));
|
390
390
|
}
|
391
391
|
|
392
392
|
static inline rtype c_abs_square(dtype x) {
|
393
|
-
return
|
393
|
+
return CUMO_REAL(x)*CUMO_REAL(x)+CUMO_IMAG(x)*CUMO_IMAG(x);
|
394
394
|
}
|
395
395
|
|
396
396
|
|