cumo 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +23 -24
- data/bench/cumo_bench.rb +1 -0
- data/ext/cumo/cuda/memory_pool.cpp +9 -1
- data/ext/cumo/cuda/memory_pool_impl.cpp +2 -13
- data/ext/cumo/cumo.c +4 -4
- data/ext/cumo/depend.erb +1 -1
- data/ext/cumo/extconf.rb +2 -0
- data/ext/cumo/include/cumo.h +4 -4
- data/ext/cumo/include/cumo/indexer.h +50 -0
- data/ext/cumo/include/cumo/intern.h +1 -0
- data/ext/cumo/include/cumo/narray.h +20 -1
- data/ext/cumo/include/cumo/narray_kernel.h +10 -0
- data/ext/cumo/include/cumo/ndloop.h +1 -1
- data/ext/cumo/narray/array.c +8 -2
- data/ext/cumo/narray/gen/tmpl/store_array.c +15 -3
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +10 -2
- data/ext/cumo/narray/index.c +77 -43
- data/ext/cumo/narray/narray.c +11 -2
- data/ext/cumo/narray/ndloop.c +49 -1
- data/ext/cumo/narray/ndloop_kernel.cu +97 -0
- data/ext/cumo/narray/step.c +56 -250
- data/lib/cumo/narray/extra.rb +50 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1b28beaea182d622d304bcb3153e56aa3280993ec079aea44c00b915d1e92b77
|
4
|
+
data.tar.gz: 26fc0e1942a444e5f9cb4641b3e36f9985593a10de26188f0a4142e72314d82a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a678cb7965fbbc9febf6b5f2f557f8be34f28c051fc0437a87506d3a067a34778a73b75dbeb56da14fd538062a8454355efd06bb686056db5b4df7cab9c04e86
|
7
|
+
data.tar.gz: 30ce98cae4e84ee7e9e73eae3ad76bcaca1e636462301d1afe1aa50e1f50633ed1b16756b90aaeba1a3e0870179d7e2dbee41696b175f0a454efee93e5f89591
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# Cumo
|
2
2
|
|
3
|
-
Cumo (pronounced
|
4
|
-
This library provides the benefit of speedup using GPU by replacing Numo with only a small piece of codes.
|
3
|
+
Cumo (pronounced "koomo") is a CUDA-aware, GPU-optimized numerical library that offers a significant performance boost over [Ruby Numo](https://github.com/ruby-numo), while (mostly) maintaining drop-in compatibility.
|
5
4
|
|
6
5
|
<img src="https://raw.githubusercontent.com/sonots/cumo-logo/master/logo_transparent.png" alt="cumo logo" title="cumo logo" width="50%">
|
7
6
|
|
@@ -13,7 +12,7 @@ This library provides the benefit of speedup using GPU by replacing Numo with on
|
|
13
12
|
|
14
13
|
## Preparation
|
15
14
|
|
16
|
-
Install CUDA and
|
15
|
+
Install CUDA and set your environment variables as follows:
|
17
16
|
|
18
17
|
```bash
|
19
18
|
export CUDA_PATH="/usr/local/cuda"
|
@@ -25,7 +24,7 @@ export LIBRARY_PATH="$CUDA_PATH/lib64:$CUDA_PATH/lib:$LIBRARY_PATH"
|
|
25
24
|
|
26
25
|
## Installation
|
27
26
|
|
28
|
-
Add
|
27
|
+
Add the following line to your Gemfile:
|
29
28
|
|
30
29
|
```ruby
|
31
30
|
gem 'cumo'
|
@@ -63,15 +62,15 @@ An example:
|
|
63
62
|
=> 15
|
64
63
|
```
|
65
64
|
|
66
|
-
###
|
65
|
+
### Switching from Numo to Cumo
|
67
66
|
|
68
|
-
|
67
|
+
The following find-and-replace should just work:
|
69
68
|
|
70
69
|
```
|
71
70
|
find . -type f | xargs sed -i -e 's/Numo/Cumo/g' -e 's/numo/cumo/g'
|
72
71
|
```
|
73
72
|
|
74
|
-
If you want to switch Numo and Cumo
|
73
|
+
If you want to dynamically switch between Numo and Cumo, something like the following will work:
|
75
74
|
|
76
75
|
```ruby
|
77
76
|
if gpu
|
@@ -87,17 +86,17 @@ a = xm::DFloat.new(3,5).seq
|
|
87
86
|
|
88
87
|
### Incompatibility With Numo
|
89
88
|
|
90
|
-
|
89
|
+
The following methods behave incompatibly with Numo by default for performance reasons:
|
91
90
|
|
92
91
|
* `extract`
|
93
92
|
* `[]`
|
94
93
|
* `count_true`
|
95
94
|
* `count_false`
|
96
95
|
|
97
|
-
Numo returns a Ruby numeric object for 0-dimensional NArray,
|
98
|
-
|
96
|
+
Numo returns a Ruby numeric object for 0-dimensional NArray, while Cumo returns the 0-dimensional NArray instead of a Ruby numeric object.
|
97
|
+
Cumo differs in this way to avoid synchronization and minimize CPU ⇄ GPU data transfer.
|
99
98
|
|
100
|
-
|
99
|
+
Set the `CUMO_COMPATIBLE_MODE` environment variable to `ON` to force Numo NArray compatibility (for worse performance).
|
101
100
|
|
102
101
|
You may enable or disable `compatible_mode` as:
|
103
102
|
|
@@ -109,7 +108,7 @@ Cumo.disable_compatible_mode # disable
|
|
109
108
|
Cumo.compatible_mode_enabled? #=> false
|
110
109
|
```
|
111
110
|
|
112
|
-
You can also use following methods which
|
111
|
+
You can also use the following methods which behave like Numo's NArray methods. The behavior of these methods does not depend on `compatible_mode`.
|
113
112
|
|
114
113
|
* `extract_cpu`
|
115
114
|
* `aref_cpu(*idx)`
|
@@ -118,7 +117,7 @@ You can also use following methods which behaves as Numo NArray's methods. Behav
|
|
118
117
|
|
119
118
|
### Select a GPU device ID
|
120
119
|
|
121
|
-
Set `CUDA_VISIBLE_DEVICES=id` environment variable, or
|
120
|
+
Set the `CUDA_VISIBLE_DEVICES=id` environment variable, or
|
122
121
|
|
123
122
|
```
|
124
123
|
require 'cumo'
|
@@ -129,7 +128,7 @@ where `id` is an integer.
|
|
129
128
|
|
130
129
|
### Disable GPU Memory Pool
|
131
130
|
|
132
|
-
GPU memory pool is enabled
|
131
|
+
GPU memory pool is enabled by default. To disable it, set `CUMO_MEMORY_POOL=OFF`, or:
|
133
132
|
|
134
133
|
```
|
135
134
|
require 'cumo'
|
@@ -138,11 +137,11 @@ Cumo::CUDA::MemoryPool.disable
|
|
138
137
|
|
139
138
|
## Documentation
|
140
139
|
|
141
|
-
See https://github.com/ruby-numo/numo-narray#documentation
|
140
|
+
See https://github.com/ruby-numo/numo-narray#documentation, replacing Numo with Cumo.
|
142
141
|
|
143
142
|
## Contributions
|
144
143
|
|
145
|
-
This project is
|
144
|
+
This project is under active development. See [issues](https://github.com/sonots/cumo/issues) for future works.
|
146
145
|
|
147
146
|
## Development
|
148
147
|
|
@@ -170,12 +169,12 @@ Generate docs:
|
|
170
169
|
bundle exec rake docs
|
171
170
|
```
|
172
171
|
|
173
|
-
## Advanced Tips
|
172
|
+
## Advanced Development Tips
|
174
173
|
|
175
174
|
### ccache
|
176
175
|
|
177
176
|
[ccache](https://ccache.samba.org/) would be useful to speedup compilation time.
|
178
|
-
Install ccache and
|
177
|
+
Install ccache and configure with:
|
179
178
|
|
180
179
|
|
181
180
|
```bash
|
@@ -187,7 +186,7 @@ ln -sf "$HOME/opt/ccache/bin/ccache" "$HOME/opt/ccache/bin/nvcc"
|
|
187
186
|
|
188
187
|
### Build in parallel
|
189
188
|
|
190
|
-
|
189
|
+
Set `MAKEFLAGS` to specify `make` command options. You can build in parallel as:
|
191
190
|
|
192
191
|
```
|
193
192
|
bundle exec env MAKEFLAG=-j8 rake compile
|
@@ -199,11 +198,11 @@ bundle exec env MAKEFLAG=-j8 rake compile
|
|
199
198
|
bundle exec env CUMO_NVCC_GENERATE_CODE=arch=compute_60,code=sm_60 rake compile
|
200
199
|
```
|
201
200
|
|
202
|
-
This is useful even on development because it makes possible to skip JIT compilation of PTX to cubin
|
201
|
+
This is useful even on development because it makes it possible to skip JIT compilation of PTX to cubin during runtime.
|
203
202
|
|
204
203
|
### Run tests with gdb
|
205
204
|
|
206
|
-
Compile with
|
205
|
+
Compile with debugging enabled:
|
207
206
|
|
208
207
|
```
|
209
208
|
bundle exec DEBUG=1 rake compile
|
@@ -242,7 +241,7 @@ bundle exec DTYPE=dfloat ruby test/narray_test.rb
|
|
242
241
|
bundle exec CUDA_LAUNCH_BLOCKING=1
|
243
242
|
```
|
244
243
|
|
245
|
-
### Show GPU
|
244
|
+
### Show GPU synchronization warnings
|
246
245
|
|
247
246
|
Cumo shows warnings if CPU and GPU synchronization occurs if:
|
248
247
|
|
@@ -250,8 +249,8 @@ Cumo shows warnings if CPU and GPU synchronization occurs if:
|
|
250
249
|
export CUMO_SHOW_WARNING=ON
|
251
250
|
```
|
252
251
|
|
253
|
-
|
254
|
-
|
252
|
+
By default, Cumo shows warnings that occurred at the same place only once.
|
253
|
+
To show all, multiple warnings, set:
|
255
254
|
|
256
255
|
```
|
257
256
|
export CUMO_SHOW_WARNING=ON
|
data/bench/cumo_bench.rb
CHANGED
@@ -29,7 +29,15 @@ cumo_cuda_runtime_malloc(size_t size)
|
|
29
29
|
} catch (const cumo::internal::CUDARuntimeError& e) {
|
30
30
|
cumo_cuda_runtime_check_status(e.status());
|
31
31
|
} catch (const cumo::internal::OutOfMemoryError& e) {
|
32
|
-
|
32
|
+
// retry after GC
|
33
|
+
rb_funcall(rb_define_module("GC"), rb_intern("start"), 0);
|
34
|
+
try {
|
35
|
+
return reinterpret_cast<char*>(pool.Malloc(size));
|
36
|
+
} catch (const cumo::internal::CUDARuntimeError& e) {
|
37
|
+
cumo_cuda_runtime_check_status(e.status());
|
38
|
+
} catch (const cumo::internal::OutOfMemoryError& e) {
|
39
|
+
rb_raise(cumo_cuda_eOutOfMemoryError, "%s", e.what());
|
40
|
+
}
|
33
41
|
}
|
34
42
|
} else {
|
35
43
|
void *ptr = 0;
|
@@ -139,6 +139,8 @@ intptr_t SingleDeviceMemoryPool::Malloc(size_t size, cudaStream_t stream_ptr) {
|
|
139
139
|
if (e.status() != cudaErrorMemoryAllocation) {
|
140
140
|
throw;
|
141
141
|
}
|
142
|
+
// Retry after free all free blocks.
|
143
|
+
// NOTE: Anotehr retry after GC is done at cumo_cuda_runtime_malloc.
|
142
144
|
FreeAllBlocks();
|
143
145
|
try {
|
144
146
|
mem = std::make_shared<Memory>(size);
|
@@ -146,21 +148,8 @@ intptr_t SingleDeviceMemoryPool::Malloc(size_t size, cudaStream_t stream_ptr) {
|
|
146
148
|
if (e.status() != cudaErrorMemoryAllocation) {
|
147
149
|
throw;
|
148
150
|
}
|
149
|
-
#ifdef NO_RUBY // cpp test does not bind with libruby
|
150
151
|
size_t total = size + GetTotalBytes();
|
151
152
|
throw OutOfMemoryError(size, total);
|
152
|
-
#else
|
153
|
-
rb_funcall(rb_define_module("GC"), rb_intern("start"), 0);
|
154
|
-
try {
|
155
|
-
mem = std::make_shared<Memory>(size);
|
156
|
-
} catch (const CUDARuntimeError& e) {
|
157
|
-
if (e.status() != cudaErrorMemoryAllocation) {
|
158
|
-
throw;
|
159
|
-
}
|
160
|
-
size_t total = size + GetTotalBytes();
|
161
|
-
throw OutOfMemoryError(size, total);
|
162
|
-
}
|
163
|
-
#endif
|
164
153
|
}
|
165
154
|
}
|
166
155
|
chunk = std::make_shared<Chunk>(mem, 0, size, stream_ptr);
|
data/ext/cumo/cumo.c
CHANGED
@@ -54,11 +54,11 @@ bool cumo_show_warning_enabled_p()
|
|
54
54
|
return cumo_show_warning_enabled;
|
55
55
|
}
|
56
56
|
|
57
|
-
static bool
|
57
|
+
static bool cumo_show_warning_once_enabled;
|
58
58
|
|
59
|
-
bool
|
59
|
+
bool cumo_show_warning_once_enabled_p()
|
60
60
|
{
|
61
|
-
return
|
61
|
+
return cumo_show_warning_once_enabled;
|
62
62
|
}
|
63
63
|
|
64
64
|
/*
|
@@ -130,7 +130,7 @@ Init_cumo()
|
|
130
130
|
|
131
131
|
// default is true
|
132
132
|
env = getenv("CUMO_SHOW_WARNING_ONCE");
|
133
|
-
|
133
|
+
cumo_show_warning_once_enabled = env == NULL || (strcmp(env, "OFF") != 0 && strcmp(env, "0") != 0 && strcmp(env, "NO") != 0);
|
134
134
|
|
135
135
|
Init_cumo_narray();
|
136
136
|
|
data/ext/cumo/depend.erb
CHANGED
@@ -53,6 +53,6 @@ run-ctest : cuda/memory_pool_impl_test.exe
|
|
53
53
|
./$<
|
54
54
|
|
55
55
|
cuda/memory_pool_impl_test.exe: cuda/memory_pool_impl_test.cpp cuda/memory_pool_impl.cpp cuda/memory_pool_impl.hpp
|
56
|
-
nvcc -
|
56
|
+
nvcc -std=c++14 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< cuda/memory_pool_impl.cpp
|
57
57
|
|
58
58
|
CLEANOBJS = *.o */*.o */*/*.o *.bak narray/types/*.c narray/types/*_kernel.cu *.exe */*.exe
|
data/ext/cumo/extconf.rb
CHANGED
@@ -68,6 +68,7 @@ narray/step
|
|
68
68
|
narray/index
|
69
69
|
narray/index_kernel
|
70
70
|
narray/ndloop
|
71
|
+
narray/ndloop_kernel
|
71
72
|
narray/data
|
72
73
|
narray/data_kernel
|
73
74
|
narray/types/bit
|
@@ -158,6 +159,7 @@ unless have_type("u_int64_t", stdint)
|
|
158
159
|
have_type("uint64_t", stdint)
|
159
160
|
end
|
160
161
|
have_func("exp10")
|
162
|
+
have_func("rb_arithmetic_sequence_extract")
|
161
163
|
|
162
164
|
have_var("rb_cComplex")
|
163
165
|
have_func("rb_thread_call_without_gvl")
|
data/ext/cumo/include/cumo.h
CHANGED
@@ -10,17 +10,17 @@ extern "C" {
|
|
10
10
|
#endif
|
11
11
|
#endif
|
12
12
|
|
13
|
-
#define CUMO_VERSION "0.2.
|
14
|
-
#define CUMO_VERSION_CODE
|
13
|
+
#define CUMO_VERSION "0.2.5"
|
14
|
+
#define CUMO_VERSION_CODE 25
|
15
15
|
|
16
16
|
bool cumo_compatible_mode_enabled_p();
|
17
17
|
bool cumo_show_warning_enabled_p();
|
18
|
-
bool
|
18
|
+
bool cumo_show_warning_once_enabled_p();
|
19
19
|
|
20
20
|
#define CUMO_SHOW_WARNING_ONCE( c_str ) \
|
21
21
|
{ \
|
22
22
|
if (cumo_show_warning_enabled_p()) { \
|
23
|
-
if (
|
23
|
+
if (cumo_show_warning_once_enabled_p()) { \
|
24
24
|
static bool show_warning = true; \
|
25
25
|
if (show_warning) { \
|
26
26
|
fprintf(stderr, (c_str)); \
|
@@ -30,6 +30,11 @@ typedef struct {
|
|
30
30
|
ssize_t step[CUMO_NA_MAX_DIMENSION]; // or strides
|
31
31
|
} cumo_na_iarray_t;
|
32
32
|
|
33
|
+
typedef struct {
|
34
|
+
char* ptr;
|
35
|
+
cumo_stridx_t stridx[CUMO_NA_MAX_DIMENSION];
|
36
|
+
} cumo_na_iarray_stridx_t;
|
37
|
+
|
33
38
|
typedef struct {
|
34
39
|
cumo_na_iarray_t in;
|
35
40
|
cumo_na_iarray_t out;
|
@@ -216,6 +221,51 @@ cumo_na_iarray_at_dim1(cumo_na_iarray_t* iarray, cumo_na_indexer_t* indexer) {
|
|
216
221
|
return iarray->ptr + iarray->step[0] * indexer->raw_index;
|
217
222
|
}
|
218
223
|
|
224
|
+
__host__ __device__
|
225
|
+
static inline char*
|
226
|
+
cumo_na_iarray_stridx_at_dim(cumo_na_iarray_stridx_t* iarray, cumo_na_indexer_t* indexer) {
|
227
|
+
char* ptr = iarray->ptr;
|
228
|
+
for (int idim = 0; idim < indexer->ndim; ++idim) {
|
229
|
+
if (CUMO_SDX_IS_INDEX(iarray->stridx[idim])) {
|
230
|
+
ptr += CUMO_SDX_GET_INDEX(iarray->stridx[idim])[indexer->index[idim]];
|
231
|
+
} else {
|
232
|
+
ptr += CUMO_SDX_GET_STRIDE(iarray->stridx[idim]) * indexer->index[idim];
|
233
|
+
}
|
234
|
+
}
|
235
|
+
return ptr;
|
236
|
+
}
|
237
|
+
|
238
|
+
// Let compiler optimize
|
239
|
+
#define CUMO_NA_IARRAY_STRIDX_AT(NDIM) \
|
240
|
+
__host__ __device__ \
|
241
|
+
static inline char* \
|
242
|
+
cumo_na_iarray_stridx_at_dim##NDIM(cumo_na_iarray_stridx_t* iarray, cumo_na_indexer_t* indexer) { \
|
243
|
+
char* ptr = iarray->ptr; \
|
244
|
+
for (int idim = 0; idim < NDIM; ++idim) { \
|
245
|
+
if (CUMO_SDX_IS_INDEX(iarray->stridx[idim])) { \
|
246
|
+
ptr += CUMO_SDX_GET_INDEX(iarray->stridx[idim])[indexer->index[idim]]; \
|
247
|
+
} else { \
|
248
|
+
ptr += CUMO_SDX_GET_STRIDE(iarray->stridx[idim]) * indexer->index[idim]; \
|
249
|
+
} \
|
250
|
+
} \
|
251
|
+
return ptr; \
|
252
|
+
}
|
253
|
+
|
254
|
+
CUMO_NA_IARRAY_STRIDX_AT(4)
|
255
|
+
CUMO_NA_IARRAY_STRIDX_AT(3)
|
256
|
+
CUMO_NA_IARRAY_STRIDX_AT(2)
|
257
|
+
CUMO_NA_IARRAY_STRIDX_AT(0)
|
258
|
+
|
259
|
+
__host__ __device__
|
260
|
+
static inline char*
|
261
|
+
cumo_na_iarray_stridx_at_dim1(cumo_na_iarray_stridx_t* iarray, cumo_na_indexer_t* indexer) {
|
262
|
+
if (CUMO_SDX_IS_INDEX(iarray->stridx[0])) {
|
263
|
+
return iarray->ptr + CUMO_SDX_GET_INDEX(iarray->stridx[0])[indexer->raw_index];
|
264
|
+
} else {
|
265
|
+
return iarray->ptr + CUMO_SDX_GET_STRIDE(iarray->stridx[0]) * indexer->raw_index;
|
266
|
+
}
|
267
|
+
}
|
268
|
+
|
219
269
|
#endif // #ifdef __CUDACC__
|
220
270
|
|
221
271
|
#endif // CUMO_INDEXER_H
|
@@ -69,6 +69,7 @@ bool cumo_na_test_reduce(VALUE reduce, int dim);
|
|
69
69
|
|
70
70
|
void cumo_na_step_array_index(VALUE self, size_t ary_size, size_t *plen, ssize_t *pbeg, ssize_t *pstep);
|
71
71
|
void cumo_na_step_sequence(VALUE self, size_t *plen, double *pbeg, double *pstep);
|
72
|
+
void cumo_na_parse_enumerator_step(VALUE enum_obj, VALUE *pstep);
|
72
73
|
|
73
74
|
// used in aref, aset
|
74
75
|
int cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx);
|
@@ -196,10 +196,12 @@ extern VALUE cumo_cUInt32;
|
|
196
196
|
extern VALUE cumo_cUInt16;
|
197
197
|
extern VALUE cumo_cUInt8;
|
198
198
|
extern VALUE cumo_cRObject;
|
199
|
-
extern VALUE cumo_na_cStep;
|
200
199
|
#ifndef HAVE_RB_CCOMPLEX
|
201
200
|
extern VALUE rb_cComplex;
|
202
201
|
#endif
|
202
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
203
|
+
extern VALUE rb_cArithSeq;
|
204
|
+
#endif
|
203
205
|
|
204
206
|
extern VALUE cumo_sym_reduce;
|
205
207
|
extern VALUE cumo_sym_option;
|
@@ -265,6 +267,23 @@ typedef struct {
|
|
265
267
|
unsigned int element_stride;
|
266
268
|
} cumo_narray_type_info_t;
|
267
269
|
|
270
|
+
// from ruby/enumerator.c
|
271
|
+
typedef struct {
|
272
|
+
VALUE obj;
|
273
|
+
ID meth;
|
274
|
+
VALUE args;
|
275
|
+
// use only above in this source
|
276
|
+
VALUE fib;
|
277
|
+
VALUE dst;
|
278
|
+
VALUE lookahead;
|
279
|
+
VALUE feedvalue;
|
280
|
+
VALUE stop_exc;
|
281
|
+
VALUE size;
|
282
|
+
// incompatible below depending on ruby version
|
283
|
+
//VALUE procs; // ruby 2.4
|
284
|
+
//rb_enumerator_size_func *size_fn; // ruby 2.1-2.4
|
285
|
+
//VALUE (*size_fn)(ANYARGS); // ruby 2.0
|
286
|
+
} cumo_enumerator_t;
|
268
287
|
|
269
288
|
static inline cumo_narray_t *
|
270
289
|
cumo_na_get_narray_t(VALUE obj)
|
@@ -165,6 +165,16 @@ typedef unsigned int CUMO_BIT_DIGIT;
|
|
165
165
|
#define CUMO_BALL (~(CUMO_BIT_DIGIT)0)
|
166
166
|
#define CUMO_SLB(n) (((n)==CUMO_NB)?~(CUMO_BIT_DIGIT)0:(~(~(CUMO_BIT_DIGIT)0<<(n))))
|
167
167
|
|
168
|
+
typedef union {
|
169
|
+
ssize_t stride;
|
170
|
+
size_t *index;
|
171
|
+
} cumo_stridx_t;
|
172
|
+
|
173
|
+
#define CUMO_SDX_IS_STRIDE(x) ((x).stride&0x1)
|
174
|
+
#define CUMO_SDX_IS_INDEX(x) (!CUMO_SDX_IS_STRIDE(x))
|
175
|
+
#define CUMO_SDX_GET_STRIDE(x) ((x).stride>>1)
|
176
|
+
#define CUMO_SDX_GET_INDEX(x) ((x).index)
|
177
|
+
|
168
178
|
#include "cumo/indexer.h"
|
169
179
|
#include "cumo/intern_kernel.h"
|
170
180
|
|