cumo 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +23 -24
- data/bench/cumo_bench.rb +1 -0
- data/ext/cumo/cuda/memory_pool.cpp +9 -1
- data/ext/cumo/cuda/memory_pool_impl.cpp +2 -13
- data/ext/cumo/cumo.c +4 -4
- data/ext/cumo/depend.erb +1 -1
- data/ext/cumo/extconf.rb +2 -0
- data/ext/cumo/include/cumo.h +4 -4
- data/ext/cumo/include/cumo/indexer.h +50 -0
- data/ext/cumo/include/cumo/intern.h +1 -0
- data/ext/cumo/include/cumo/narray.h +20 -1
- data/ext/cumo/include/cumo/narray_kernel.h +10 -0
- data/ext/cumo/include/cumo/ndloop.h +1 -1
- data/ext/cumo/narray/array.c +8 -2
- data/ext/cumo/narray/gen/tmpl/store_array.c +15 -3
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +10 -2
- data/ext/cumo/narray/index.c +77 -43
- data/ext/cumo/narray/narray.c +11 -2
- data/ext/cumo/narray/ndloop.c +49 -1
- data/ext/cumo/narray/ndloop_kernel.cu +97 -0
- data/ext/cumo/narray/step.c +56 -250
- data/lib/cumo/narray/extra.rb +50 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1b28beaea182d622d304bcb3153e56aa3280993ec079aea44c00b915d1e92b77
|
4
|
+
data.tar.gz: 26fc0e1942a444e5f9cb4641b3e36f9985593a10de26188f0a4142e72314d82a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a678cb7965fbbc9febf6b5f2f557f8be34f28c051fc0437a87506d3a067a34778a73b75dbeb56da14fd538062a8454355efd06bb686056db5b4df7cab9c04e86
|
7
|
+
data.tar.gz: 30ce98cae4e84ee7e9e73eae3ad76bcaca1e636462301d1afe1aa50e1f50633ed1b16756b90aaeba1a3e0870179d7e2dbee41696b175f0a454efee93e5f89591
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# Cumo
|
2
2
|
|
3
|
-
Cumo (pronounced
|
4
|
-
This library provides the benefit of speedup using GPU by replacing Numo with only a small piece of codes.
|
3
|
+
Cumo (pronounced "koomo") is a CUDA-aware, GPU-optimized numerical library that offers a significant performance boost over [Ruby Numo](https://github.com/ruby-numo), while (mostly) maintaining drop-in compatibility.
|
5
4
|
|
6
5
|
<img src="https://raw.githubusercontent.com/sonots/cumo-logo/master/logo_transparent.png" alt="cumo logo" title="cumo logo" width="50%">
|
7
6
|
|
@@ -13,7 +12,7 @@ This library provides the benefit of speedup using GPU by replacing Numo with on
|
|
13
12
|
|
14
13
|
## Preparation
|
15
14
|
|
16
|
-
Install CUDA and
|
15
|
+
Install CUDA and set your environment variables as follows:
|
17
16
|
|
18
17
|
```bash
|
19
18
|
export CUDA_PATH="/usr/local/cuda"
|
@@ -25,7 +24,7 @@ export LIBRARY_PATH="$CUDA_PATH/lib64:$CUDA_PATH/lib:$LIBRARY_PATH"
|
|
25
24
|
|
26
25
|
## Installation
|
27
26
|
|
28
|
-
Add
|
27
|
+
Add the following line to your Gemfile:
|
29
28
|
|
30
29
|
```ruby
|
31
30
|
gem 'cumo'
|
@@ -63,15 +62,15 @@ An example:
|
|
63
62
|
=> 15
|
64
63
|
```
|
65
64
|
|
66
|
-
###
|
65
|
+
### Switching from Numo to Cumo
|
67
66
|
|
68
|
-
|
67
|
+
The following find-and-replace should just work:
|
69
68
|
|
70
69
|
```
|
71
70
|
find . -type f | xargs sed -i -e 's/Numo/Cumo/g' -e 's/numo/cumo/g'
|
72
71
|
```
|
73
72
|
|
74
|
-
If you want to switch Numo and Cumo
|
73
|
+
If you want to dynamically switch between Numo and Cumo, something like the following will work:
|
75
74
|
|
76
75
|
```ruby
|
77
76
|
if gpu
|
@@ -87,17 +86,17 @@ a = xm::DFloat.new(3,5).seq
|
|
87
86
|
|
88
87
|
### Incompatibility With Numo
|
89
88
|
|
90
|
-
|
89
|
+
The following methods behave incompatibly with Numo by default for performance reasons:
|
91
90
|
|
92
91
|
* `extract`
|
93
92
|
* `[]`
|
94
93
|
* `count_true`
|
95
94
|
* `count_false`
|
96
95
|
|
97
|
-
Numo returns a Ruby numeric object for 0-dimensional NArray,
|
98
|
-
|
96
|
+
Numo returns a Ruby numeric object for 0-dimensional NArray, while Cumo returns the 0-dimensional NArray instead of a Ruby numeric object.
|
97
|
+
Cumo differs in this way to avoid synchronization and minimize CPU ⇄ GPU data transfer.
|
99
98
|
|
100
|
-
|
99
|
+
Set the `CUMO_COMPATIBLE_MODE` environment variable to `ON` to force Numo NArray compatibility (for worse performance).
|
101
100
|
|
102
101
|
You may enable or disable `compatible_mode` as:
|
103
102
|
|
@@ -109,7 +108,7 @@ Cumo.disable_compatible_mode # disable
|
|
109
108
|
Cumo.compatible_mode_enabled? #=> false
|
110
109
|
```
|
111
110
|
|
112
|
-
You can also use following methods which
|
111
|
+
You can also use the following methods which behave like Numo's NArray methods. The behavior of these methods does not depend on `compatible_mode`.
|
113
112
|
|
114
113
|
* `extract_cpu`
|
115
114
|
* `aref_cpu(*idx)`
|
@@ -118,7 +117,7 @@ You can also use following methods which behaves as Numo NArray's methods. Behav
|
|
118
117
|
|
119
118
|
### Select a GPU device ID
|
120
119
|
|
121
|
-
Set `CUDA_VISIBLE_DEVICES=id` environment variable, or
|
120
|
+
Set the `CUDA_VISIBLE_DEVICES=id` environment variable, or
|
122
121
|
|
123
122
|
```
|
124
123
|
require 'cumo'
|
@@ -129,7 +128,7 @@ where `id` is an integer.
|
|
129
128
|
|
130
129
|
### Disable GPU Memory Pool
|
131
130
|
|
132
|
-
GPU memory pool is enabled
|
131
|
+
GPU memory pool is enabled by default. To disable it, set `CUMO_MEMORY_POOL=OFF`, or:
|
133
132
|
|
134
133
|
```
|
135
134
|
require 'cumo'
|
@@ -138,11 +137,11 @@ Cumo::CUDA::MemoryPool.disable
|
|
138
137
|
|
139
138
|
## Documentation
|
140
139
|
|
141
|
-
See https://github.com/ruby-numo/numo-narray#documentation
|
140
|
+
See https://github.com/ruby-numo/numo-narray#documentation, replacing Numo with Cumo.
|
142
141
|
|
143
142
|
## Contributions
|
144
143
|
|
145
|
-
This project is
|
144
|
+
This project is under active development. See [issues](https://github.com/sonots/cumo/issues) for future works.
|
146
145
|
|
147
146
|
## Development
|
148
147
|
|
@@ -170,12 +169,12 @@ Generate docs:
|
|
170
169
|
bundle exec rake docs
|
171
170
|
```
|
172
171
|
|
173
|
-
## Advanced Tips
|
172
|
+
## Advanced Development Tips
|
174
173
|
|
175
174
|
### ccache
|
176
175
|
|
177
176
|
[ccache](https://ccache.samba.org/) would be useful to speedup compilation time.
|
178
|
-
Install ccache and
|
177
|
+
Install ccache and configure with:
|
179
178
|
|
180
179
|
|
181
180
|
```bash
|
@@ -187,7 +186,7 @@ ln -sf "$HOME/opt/ccache/bin/ccache" "$HOME/opt/ccache/bin/nvcc"
|
|
187
186
|
|
188
187
|
### Build in parallel
|
189
188
|
|
190
|
-
|
189
|
+
Set `MAKEFLAGS` to specify `make` command options. You can build in parallel as:
|
191
190
|
|
192
191
|
```
|
193
192
|
bundle exec env MAKEFLAG=-j8 rake compile
|
@@ -199,11 +198,11 @@ bundle exec env MAKEFLAG=-j8 rake compile
|
|
199
198
|
bundle exec env CUMO_NVCC_GENERATE_CODE=arch=compute_60,code=sm_60 rake compile
|
200
199
|
```
|
201
200
|
|
202
|
-
This is useful even on development because it makes possible to skip JIT compilation of PTX to cubin
|
201
|
+
This is useful even on development because it makes it possible to skip JIT compilation of PTX to cubin during runtime.
|
203
202
|
|
204
203
|
### Run tests with gdb
|
205
204
|
|
206
|
-
Compile with
|
205
|
+
Compile with debugging enabled:
|
207
206
|
|
208
207
|
```
|
209
208
|
bundle exec DEBUG=1 rake compile
|
@@ -242,7 +241,7 @@ bundle exec DTYPE=dfloat ruby test/narray_test.rb
|
|
242
241
|
bundle exec CUDA_LAUNCH_BLOCKING=1
|
243
242
|
```
|
244
243
|
|
245
|
-
### Show GPU
|
244
|
+
### Show GPU synchronization warnings
|
246
245
|
|
247
246
|
Cumo shows warnings if CPU and GPU synchronization occurs if:
|
248
247
|
|
@@ -250,8 +249,8 @@ Cumo shows warnings if CPU and GPU synchronization occurs if:
|
|
250
249
|
export CUMO_SHOW_WARNING=ON
|
251
250
|
```
|
252
251
|
|
253
|
-
|
254
|
-
|
252
|
+
By default, Cumo shows warnings that occurred at the same place only once.
|
253
|
+
To show all, multiple warnings, set:
|
255
254
|
|
256
255
|
```
|
257
256
|
export CUMO_SHOW_WARNING=ON
|
data/bench/cumo_bench.rb
CHANGED
@@ -29,7 +29,15 @@ cumo_cuda_runtime_malloc(size_t size)
|
|
29
29
|
} catch (const cumo::internal::CUDARuntimeError& e) {
|
30
30
|
cumo_cuda_runtime_check_status(e.status());
|
31
31
|
} catch (const cumo::internal::OutOfMemoryError& e) {
|
32
|
-
|
32
|
+
// retry after GC
|
33
|
+
rb_funcall(rb_define_module("GC"), rb_intern("start"), 0);
|
34
|
+
try {
|
35
|
+
return reinterpret_cast<char*>(pool.Malloc(size));
|
36
|
+
} catch (const cumo::internal::CUDARuntimeError& e) {
|
37
|
+
cumo_cuda_runtime_check_status(e.status());
|
38
|
+
} catch (const cumo::internal::OutOfMemoryError& e) {
|
39
|
+
rb_raise(cumo_cuda_eOutOfMemoryError, "%s", e.what());
|
40
|
+
}
|
33
41
|
}
|
34
42
|
} else {
|
35
43
|
void *ptr = 0;
|
@@ -139,6 +139,8 @@ intptr_t SingleDeviceMemoryPool::Malloc(size_t size, cudaStream_t stream_ptr) {
|
|
139
139
|
if (e.status() != cudaErrorMemoryAllocation) {
|
140
140
|
throw;
|
141
141
|
}
|
142
|
+
// Retry after free all free blocks.
|
143
|
+
// NOTE: Anotehr retry after GC is done at cumo_cuda_runtime_malloc.
|
142
144
|
FreeAllBlocks();
|
143
145
|
try {
|
144
146
|
mem = std::make_shared<Memory>(size);
|
@@ -146,21 +148,8 @@ intptr_t SingleDeviceMemoryPool::Malloc(size_t size, cudaStream_t stream_ptr) {
|
|
146
148
|
if (e.status() != cudaErrorMemoryAllocation) {
|
147
149
|
throw;
|
148
150
|
}
|
149
|
-
#ifdef NO_RUBY // cpp test does not bind with libruby
|
150
151
|
size_t total = size + GetTotalBytes();
|
151
152
|
throw OutOfMemoryError(size, total);
|
152
|
-
#else
|
153
|
-
rb_funcall(rb_define_module("GC"), rb_intern("start"), 0);
|
154
|
-
try {
|
155
|
-
mem = std::make_shared<Memory>(size);
|
156
|
-
} catch (const CUDARuntimeError& e) {
|
157
|
-
if (e.status() != cudaErrorMemoryAllocation) {
|
158
|
-
throw;
|
159
|
-
}
|
160
|
-
size_t total = size + GetTotalBytes();
|
161
|
-
throw OutOfMemoryError(size, total);
|
162
|
-
}
|
163
|
-
#endif
|
164
153
|
}
|
165
154
|
}
|
166
155
|
chunk = std::make_shared<Chunk>(mem, 0, size, stream_ptr);
|
data/ext/cumo/cumo.c
CHANGED
@@ -54,11 +54,11 @@ bool cumo_show_warning_enabled_p()
|
|
54
54
|
return cumo_show_warning_enabled;
|
55
55
|
}
|
56
56
|
|
57
|
-
static bool
|
57
|
+
static bool cumo_show_warning_once_enabled;
|
58
58
|
|
59
|
-
bool
|
59
|
+
bool cumo_show_warning_once_enabled_p()
|
60
60
|
{
|
61
|
-
return
|
61
|
+
return cumo_show_warning_once_enabled;
|
62
62
|
}
|
63
63
|
|
64
64
|
/*
|
@@ -130,7 +130,7 @@ Init_cumo()
|
|
130
130
|
|
131
131
|
// default is true
|
132
132
|
env = getenv("CUMO_SHOW_WARNING_ONCE");
|
133
|
-
|
133
|
+
cumo_show_warning_once_enabled = env == NULL || (strcmp(env, "OFF") != 0 && strcmp(env, "0") != 0 && strcmp(env, "NO") != 0);
|
134
134
|
|
135
135
|
Init_cumo_narray();
|
136
136
|
|
data/ext/cumo/depend.erb
CHANGED
@@ -53,6 +53,6 @@ run-ctest : cuda/memory_pool_impl_test.exe
|
|
53
53
|
./$<
|
54
54
|
|
55
55
|
cuda/memory_pool_impl_test.exe: cuda/memory_pool_impl_test.cpp cuda/memory_pool_impl.cpp cuda/memory_pool_impl.hpp
|
56
|
-
nvcc -
|
56
|
+
nvcc -std=c++14 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< cuda/memory_pool_impl.cpp
|
57
57
|
|
58
58
|
CLEANOBJS = *.o */*.o */*/*.o *.bak narray/types/*.c narray/types/*_kernel.cu *.exe */*.exe
|
data/ext/cumo/extconf.rb
CHANGED
@@ -68,6 +68,7 @@ narray/step
|
|
68
68
|
narray/index
|
69
69
|
narray/index_kernel
|
70
70
|
narray/ndloop
|
71
|
+
narray/ndloop_kernel
|
71
72
|
narray/data
|
72
73
|
narray/data_kernel
|
73
74
|
narray/types/bit
|
@@ -158,6 +159,7 @@ unless have_type("u_int64_t", stdint)
|
|
158
159
|
have_type("uint64_t", stdint)
|
159
160
|
end
|
160
161
|
have_func("exp10")
|
162
|
+
have_func("rb_arithmetic_sequence_extract")
|
161
163
|
|
162
164
|
have_var("rb_cComplex")
|
163
165
|
have_func("rb_thread_call_without_gvl")
|
data/ext/cumo/include/cumo.h
CHANGED
@@ -10,17 +10,17 @@ extern "C" {
|
|
10
10
|
#endif
|
11
11
|
#endif
|
12
12
|
|
13
|
-
#define CUMO_VERSION "0.2.
|
14
|
-
#define CUMO_VERSION_CODE
|
13
|
+
#define CUMO_VERSION "0.2.5"
|
14
|
+
#define CUMO_VERSION_CODE 25
|
15
15
|
|
16
16
|
bool cumo_compatible_mode_enabled_p();
|
17
17
|
bool cumo_show_warning_enabled_p();
|
18
|
-
bool
|
18
|
+
bool cumo_show_warning_once_enabled_p();
|
19
19
|
|
20
20
|
#define CUMO_SHOW_WARNING_ONCE( c_str ) \
|
21
21
|
{ \
|
22
22
|
if (cumo_show_warning_enabled_p()) { \
|
23
|
-
if (
|
23
|
+
if (cumo_show_warning_once_enabled_p()) { \
|
24
24
|
static bool show_warning = true; \
|
25
25
|
if (show_warning) { \
|
26
26
|
fprintf(stderr, (c_str)); \
|
@@ -30,6 +30,11 @@ typedef struct {
|
|
30
30
|
ssize_t step[CUMO_NA_MAX_DIMENSION]; // or strides
|
31
31
|
} cumo_na_iarray_t;
|
32
32
|
|
33
|
+
typedef struct {
|
34
|
+
char* ptr;
|
35
|
+
cumo_stridx_t stridx[CUMO_NA_MAX_DIMENSION];
|
36
|
+
} cumo_na_iarray_stridx_t;
|
37
|
+
|
33
38
|
typedef struct {
|
34
39
|
cumo_na_iarray_t in;
|
35
40
|
cumo_na_iarray_t out;
|
@@ -216,6 +221,51 @@ cumo_na_iarray_at_dim1(cumo_na_iarray_t* iarray, cumo_na_indexer_t* indexer) {
|
|
216
221
|
return iarray->ptr + iarray->step[0] * indexer->raw_index;
|
217
222
|
}
|
218
223
|
|
224
|
+
__host__ __device__
|
225
|
+
static inline char*
|
226
|
+
cumo_na_iarray_stridx_at_dim(cumo_na_iarray_stridx_t* iarray, cumo_na_indexer_t* indexer) {
|
227
|
+
char* ptr = iarray->ptr;
|
228
|
+
for (int idim = 0; idim < indexer->ndim; ++idim) {
|
229
|
+
if (CUMO_SDX_IS_INDEX(iarray->stridx[idim])) {
|
230
|
+
ptr += CUMO_SDX_GET_INDEX(iarray->stridx[idim])[indexer->index[idim]];
|
231
|
+
} else {
|
232
|
+
ptr += CUMO_SDX_GET_STRIDE(iarray->stridx[idim]) * indexer->index[idim];
|
233
|
+
}
|
234
|
+
}
|
235
|
+
return ptr;
|
236
|
+
}
|
237
|
+
|
238
|
+
// Let compiler optimize
|
239
|
+
#define CUMO_NA_IARRAY_STRIDX_AT(NDIM) \
|
240
|
+
__host__ __device__ \
|
241
|
+
static inline char* \
|
242
|
+
cumo_na_iarray_stridx_at_dim##NDIM(cumo_na_iarray_stridx_t* iarray, cumo_na_indexer_t* indexer) { \
|
243
|
+
char* ptr = iarray->ptr; \
|
244
|
+
for (int idim = 0; idim < NDIM; ++idim) { \
|
245
|
+
if (CUMO_SDX_IS_INDEX(iarray->stridx[idim])) { \
|
246
|
+
ptr += CUMO_SDX_GET_INDEX(iarray->stridx[idim])[indexer->index[idim]]; \
|
247
|
+
} else { \
|
248
|
+
ptr += CUMO_SDX_GET_STRIDE(iarray->stridx[idim]) * indexer->index[idim]; \
|
249
|
+
} \
|
250
|
+
} \
|
251
|
+
return ptr; \
|
252
|
+
}
|
253
|
+
|
254
|
+
CUMO_NA_IARRAY_STRIDX_AT(4)
|
255
|
+
CUMO_NA_IARRAY_STRIDX_AT(3)
|
256
|
+
CUMO_NA_IARRAY_STRIDX_AT(2)
|
257
|
+
CUMO_NA_IARRAY_STRIDX_AT(0)
|
258
|
+
|
259
|
+
__host__ __device__
|
260
|
+
static inline char*
|
261
|
+
cumo_na_iarray_stridx_at_dim1(cumo_na_iarray_stridx_t* iarray, cumo_na_indexer_t* indexer) {
|
262
|
+
if (CUMO_SDX_IS_INDEX(iarray->stridx[0])) {
|
263
|
+
return iarray->ptr + CUMO_SDX_GET_INDEX(iarray->stridx[0])[indexer->raw_index];
|
264
|
+
} else {
|
265
|
+
return iarray->ptr + CUMO_SDX_GET_STRIDE(iarray->stridx[0]) * indexer->raw_index;
|
266
|
+
}
|
267
|
+
}
|
268
|
+
|
219
269
|
#endif // #ifdef __CUDACC__
|
220
270
|
|
221
271
|
#endif // CUMO_INDEXER_H
|
@@ -69,6 +69,7 @@ bool cumo_na_test_reduce(VALUE reduce, int dim);
|
|
69
69
|
|
70
70
|
void cumo_na_step_array_index(VALUE self, size_t ary_size, size_t *plen, ssize_t *pbeg, ssize_t *pstep);
|
71
71
|
void cumo_na_step_sequence(VALUE self, size_t *plen, double *pbeg, double *pstep);
|
72
|
+
void cumo_na_parse_enumerator_step(VALUE enum_obj, VALUE *pstep);
|
72
73
|
|
73
74
|
// used in aref, aset
|
74
75
|
int cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx);
|
@@ -196,10 +196,12 @@ extern VALUE cumo_cUInt32;
|
|
196
196
|
extern VALUE cumo_cUInt16;
|
197
197
|
extern VALUE cumo_cUInt8;
|
198
198
|
extern VALUE cumo_cRObject;
|
199
|
-
extern VALUE cumo_na_cStep;
|
200
199
|
#ifndef HAVE_RB_CCOMPLEX
|
201
200
|
extern VALUE rb_cComplex;
|
202
201
|
#endif
|
202
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
203
|
+
extern VALUE rb_cArithSeq;
|
204
|
+
#endif
|
203
205
|
|
204
206
|
extern VALUE cumo_sym_reduce;
|
205
207
|
extern VALUE cumo_sym_option;
|
@@ -265,6 +267,23 @@ typedef struct {
|
|
265
267
|
unsigned int element_stride;
|
266
268
|
} cumo_narray_type_info_t;
|
267
269
|
|
270
|
+
// from ruby/enumerator.c
|
271
|
+
typedef struct {
|
272
|
+
VALUE obj;
|
273
|
+
ID meth;
|
274
|
+
VALUE args;
|
275
|
+
// use only above in this source
|
276
|
+
VALUE fib;
|
277
|
+
VALUE dst;
|
278
|
+
VALUE lookahead;
|
279
|
+
VALUE feedvalue;
|
280
|
+
VALUE stop_exc;
|
281
|
+
VALUE size;
|
282
|
+
// incompatible below depending on ruby version
|
283
|
+
//VALUE procs; // ruby 2.4
|
284
|
+
//rb_enumerator_size_func *size_fn; // ruby 2.1-2.4
|
285
|
+
//VALUE (*size_fn)(ANYARGS); // ruby 2.0
|
286
|
+
} cumo_enumerator_t;
|
268
287
|
|
269
288
|
static inline cumo_narray_t *
|
270
289
|
cumo_na_get_narray_t(VALUE obj)
|
@@ -165,6 +165,16 @@ typedef unsigned int CUMO_BIT_DIGIT;
|
|
165
165
|
#define CUMO_BALL (~(CUMO_BIT_DIGIT)0)
|
166
166
|
#define CUMO_SLB(n) (((n)==CUMO_NB)?~(CUMO_BIT_DIGIT)0:(~(~(CUMO_BIT_DIGIT)0<<(n))))
|
167
167
|
|
168
|
+
typedef union {
|
169
|
+
ssize_t stride;
|
170
|
+
size_t *index;
|
171
|
+
} cumo_stridx_t;
|
172
|
+
|
173
|
+
#define CUMO_SDX_IS_STRIDE(x) ((x).stride&0x1)
|
174
|
+
#define CUMO_SDX_IS_INDEX(x) (!CUMO_SDX_IS_STRIDE(x))
|
175
|
+
#define CUMO_SDX_GET_STRIDE(x) ((x).stride>>1)
|
176
|
+
#define CUMO_SDX_GET_INDEX(x) ((x).index)
|
177
|
+
|
168
178
|
#include "cumo/indexer.h"
|
169
179
|
#include "cumo/intern_kernel.h"
|
170
180
|
|