cumo 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/ext/cumo/extconf.rb +2 -0
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/narray/data.c +21 -24
- data/ext/cumo/narray/data_kernel.cu +75 -0
- data/ext/cumo/narray/gen/tmpl/qsort.c +1 -0
- data/ext/cumo/narray/gen/tmpl/sort.c +1 -0
- data/ext/cumo/narray/index.c +15 -36
- data/ext/cumo/narray/index_kernel.cu +86 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ac2b9873bc48d45afcac57ff6e45ba84cc69ed1c61430cb13236a5c1ce018d0c
|
4
|
+
data.tar.gz: c001063b6a66de3055f98789420d5574b5b2d53357624dc6ffbe750ce2f727f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f18aa1652ddd921ae91da6f75e28a5b9338091111a07b1cc97b586b19e0a755fcb195b2307626a27f95def01845629ed710786f12932888a14e7dcfc55b0d034
|
7
|
+
data.tar.gz: ed082b7188a9b517074eb78216fd0a15333ae84e7eeb271c6f7675d8c4bfecd0ef8ccf575017dfe450c75b85e5f275b801c11295e985be447cd9c08703601c86
|
data/CHANGELOG.md
CHANGED
data/ext/cumo/extconf.rb
CHANGED
data/ext/cumo/include/cumo.h
CHANGED
data/ext/cumo/narray/data.c
CHANGED
@@ -53,15 +53,23 @@ static ID cumo_id_swap_byte;
|
|
53
53
|
} \
|
54
54
|
}
|
55
55
|
|
56
|
-
|
56
|
+
void cumo_iter_copy_bytes_kernel_launch(char *p1, char *p2, ssize_t s1, ssize_t s2, size_t *idx1, size_t *idx2, size_t n, int elmsz);
|
57
|
+
// #define m_memcpy(src,dst) memcpy(dst,src,e)
|
58
|
+
|
57
59
|
static void
|
58
60
|
iter_copy_bytes(cumo_na_loop_t *const lp)
|
59
61
|
{
|
60
|
-
size_t
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
62
|
+
size_t n;
|
63
|
+
ssize_t s1, s2;
|
64
|
+
char *p1, *p2;
|
65
|
+
size_t *idx1, *idx2;
|
66
|
+
CUMO_INIT_COUNTER(lp, n);
|
67
|
+
CUMO_INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
68
|
+
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
69
|
+
cumo_iter_copy_bytes_kernel_launch(p1, p2, s1, s2, idx1, idx2, n, lp->args[0].elmsz);
|
70
|
+
// size_t e;
|
71
|
+
// e = lp->args[0].elmsz;
|
72
|
+
// LOOP_UNARY_PTR(lp,m_memcpy);
|
65
73
|
}
|
66
74
|
|
67
75
|
VALUE
|
@@ -562,6 +570,10 @@ cumo_na_flatten(VALUE self)
|
|
562
570
|
|
563
571
|
#define MIN(a,b) (((a)<(b))?(a):(b))
|
564
572
|
|
573
|
+
void cumo_na_diagonal_index_index_kernel_launch(size_t *idx, size_t *idx0, size_t *idx1, size_t k0, size_t k1, uint64_t n);
|
574
|
+
void cumo_na_diagonal_index_stride_kernel_launch(size_t *idx, size_t *idx0, ssize_t s1, size_t k0, size_t k1, uint64_t n);
|
575
|
+
void cumo_na_diagonal_stride_index_kernel_launch(size_t *idx, ssize_t s0, size_t *idx1, size_t k0, size_t k1, uint64_t n);
|
576
|
+
|
565
577
|
/*
|
566
578
|
Returns a diagonal view of NArray
|
567
579
|
@overload diagonal([offset,axes])
|
@@ -601,7 +613,6 @@ static VALUE
|
|
601
613
|
cumo_na_diagonal(int argc, VALUE *argv, VALUE self)
|
602
614
|
{
|
603
615
|
int i, k, nd;
|
604
|
-
size_t j;
|
605
616
|
size_t *idx0, *idx1, *diag_idx;
|
606
617
|
size_t *shape;
|
607
618
|
size_t diag_size;
|
@@ -754,20 +765,12 @@ cumo_na_diagonal(int argc, VALUE *argv, VALUE self)
|
|
754
765
|
idx0 = CUMO_SDX_GET_INDEX(na1->stridx[ax[0]]);
|
755
766
|
// diag_idx = ALLOC_N(size_t, diag_size);
|
756
767
|
diag_idx = (size_t*)cumo_cuda_runtime_malloc(sizeof(size_t)*diag_size);
|
757
|
-
|
758
|
-
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_diagonal", "any");
|
759
|
-
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
760
|
-
|
761
768
|
if (CUMO_SDX_IS_INDEX(na1->stridx[ax[1]])) {
|
762
769
|
idx1 = CUMO_SDX_GET_INDEX(na1->stridx[ax[1]]);
|
763
|
-
|
764
|
-
diag_idx[j] = idx0[j+k0] + idx1[j+k1];
|
765
|
-
}
|
770
|
+
cumo_na_diagonal_index_index_kernel_launch(diag_idx, idx0, idx1, k0, k1, diag_size);
|
766
771
|
} else {
|
767
772
|
stride1 = CUMO_SDX_GET_STRIDE(na1->stridx[ax[1]]);
|
768
|
-
|
769
|
-
diag_idx[j] = idx0[j+k0] + stride1*(j+k1);
|
770
|
-
}
|
773
|
+
cumo_na_diagonal_index_stride_kernel_launch(diag_idx, idx0, stride1, k0, k1, diag_size);
|
771
774
|
}
|
772
775
|
CUMO_SDX_SET_INDEX(na2->stridx[nd-2],diag_idx);
|
773
776
|
} else {
|
@@ -776,13 +779,7 @@ cumo_na_diagonal(int argc, VALUE *argv, VALUE self)
|
|
776
779
|
idx1 = CUMO_SDX_GET_INDEX(na1->stridx[ax[1]]);
|
777
780
|
// diag_idx = ALLOC_N(size_t, diag_size);
|
778
781
|
diag_idx = (size_t*)cumo_cuda_runtime_malloc(sizeof(size_t)*diag_size);
|
779
|
-
|
780
|
-
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_diagonal", "any");
|
781
|
-
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
782
|
-
|
783
|
-
for (j=0; j<diag_size; j++) {
|
784
|
-
diag_idx[j] = stride0*(j+k0) + idx1[j+k1];
|
785
|
-
}
|
782
|
+
cumo_na_diagonal_stride_index_kernel_launch(diag_idx, stride0, idx1, k0, k1, diag_size);
|
786
783
|
CUMO_SDX_SET_INDEX(na2->stridx[nd-2],diag_idx);
|
787
784
|
} else {
|
788
785
|
stride1 = CUMO_SDX_GET_STRIDE(na1->stridx[ax[1]]);
|
@@ -0,0 +1,75 @@
|
|
1
|
+
#include "cumo/narray_kernel.h"
|
2
|
+
|
3
|
+
#if defined(__cplusplus)
|
4
|
+
extern "C" {
|
5
|
+
#if 0
|
6
|
+
} /* satisfy cc-mode */
|
7
|
+
#endif
|
8
|
+
#endif
|
9
|
+
|
10
|
+
__global__ void cumo_iter_copy_bytes_kernel(char *p1, char *p2, ssize_t s1, ssize_t s2, size_t *idx1, size_t *idx2, uint64_t n, ssize_t elmsz)
|
11
|
+
{
|
12
|
+
char *p1_ = NULL;
|
13
|
+
char *p2_ = NULL;
|
14
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
15
|
+
p1_ = p1 + (idx1 ? idx1[i] : i * s1);
|
16
|
+
p2_ = p2 + (idx2 ? idx2[i] : i * s2);
|
17
|
+
memcpy(p2_, p1_, elmsz);
|
18
|
+
}
|
19
|
+
}
|
20
|
+
|
21
|
+
__global__ void cumo_na_diagonal_index_index_kernel(size_t *idx, size_t *idx0, size_t *idx1, size_t k0, size_t k1, uint64_t n)
|
22
|
+
{
|
23
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
24
|
+
idx[i] = idx0[i+k0] + idx1[i+k1];
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
__global__ void cumo_na_diagonal_index_stride_kernel(size_t *idx, size_t *idx0, ssize_t s1, size_t k0, size_t k1, uint64_t n)
|
29
|
+
{
|
30
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
31
|
+
idx[i] = idx0[i+k0] + s1*(i+k1);
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
__global__ void cumo_na_diagonal_stride_index_kernel(size_t *idx, ssize_t s0, size_t *idx1, size_t k0, size_t k1, uint64_t n)
|
36
|
+
{
|
37
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
38
|
+
idx[i] = s0*(i+k0) + idx1[i+k1];
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
void cumo_iter_copy_bytes_kernel_launch(char *p1, char *p2, ssize_t s1, ssize_t s2, size_t *idx1, size_t *idx2, uint64_t n, ssize_t elmsz)
|
43
|
+
{
|
44
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
45
|
+
size_t block_dim = cumo_get_block_dim(n);
|
46
|
+
cumo_iter_copy_bytes_kernel<<<grid_dim, block_dim>>>(p1, p2, s1, s2, idx1, idx2, n, elmsz);
|
47
|
+
}
|
48
|
+
|
49
|
+
void cumo_na_diagonal_index_index_kernel_launch(size_t *idx, size_t *idx0, size_t *idx1, size_t k0, size_t k1, uint64_t n)
|
50
|
+
{
|
51
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
52
|
+
size_t block_dim = cumo_get_block_dim(n);
|
53
|
+
cumo_na_diagonal_index_index_kernel<<<grid_dim, block_dim>>>(idx, idx0, idx1, k0, k1, n);
|
54
|
+
}
|
55
|
+
|
56
|
+
void cumo_na_diagonal_index_stride_kernel_launch(size_t *idx, size_t *idx0, ssize_t s1, size_t k0, size_t k1, uint64_t n)
|
57
|
+
{
|
58
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
59
|
+
size_t block_dim = cumo_get_block_dim(n);
|
60
|
+
cumo_na_diagonal_index_stride_kernel<<<grid_dim, block_dim>>>(idx, idx0, s1, k0, k1, n);
|
61
|
+
}
|
62
|
+
|
63
|
+
void cumo_na_diagonal_stride_index_kernel_launch(size_t *idx, ssize_t s0, size_t *idx1, size_t k0, size_t k1, uint64_t n)
|
64
|
+
{
|
65
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
66
|
+
size_t block_dim = cumo_get_block_dim(n);
|
67
|
+
cumo_na_diagonal_stride_index_kernel<<<grid_dim, block_dim>>>(idx, s0, idx1, k0, k1, n);
|
68
|
+
}
|
69
|
+
|
70
|
+
#if defined(__cplusplus)
|
71
|
+
#if 0
|
72
|
+
{ /* satisfy cc-mode */
|
73
|
+
#endif
|
74
|
+
} /* extern "C" { */
|
75
|
+
#endif
|
@@ -9,6 +9,7 @@ static void
|
|
9
9
|
CUMO_INIT_COUNTER(lp, n);
|
10
10
|
CUMO_INIT_PTR(lp, 0, ptr, step);
|
11
11
|
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
|
12
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
12
13
|
<%=type_name%>_qsort<%=j%>(ptr, n, step);
|
13
14
|
}
|
14
15
|
<% end %>
|
data/ext/cumo/narray/index.c
CHANGED
@@ -391,12 +391,14 @@ cumo_na_get_strides_nadata(const cumo_narray_data_t *na, ssize_t *strides, ssize
|
|
391
391
|
}
|
392
392
|
}
|
393
393
|
|
394
|
+
void cumo_na_index_aref_nadata_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n);
|
395
|
+
|
394
396
|
static void
|
395
397
|
cumo_na_index_aref_nadata(cumo_narray_data_t *na1, cumo_narray_view_t *na2,
|
396
398
|
cumo_na_index_arg_t *q, ssize_t elmsz, int ndim, int keep_dim)
|
397
399
|
{
|
398
400
|
int i, j;
|
399
|
-
ssize_t size,
|
401
|
+
ssize_t size, total=1;
|
400
402
|
ssize_t stride1;
|
401
403
|
ssize_t *strides_na1;
|
402
404
|
size_t *index;
|
@@ -425,15 +427,10 @@ cumo_na_index_aref_nadata(cumo_narray_data_t *na1, cumo_narray_view_t *na2,
|
|
425
427
|
|
426
428
|
// array index
|
427
429
|
if (q[i].idx != NULL) {
|
428
|
-
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_index_aref_nadata", "any");
|
429
|
-
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
430
|
-
|
431
430
|
index = q[i].idx;
|
432
431
|
CUMO_SDX_SET_INDEX(na2->stridx[j],index);
|
433
432
|
q[i].idx = NULL;
|
434
|
-
|
435
|
-
index[k] = index[k] * stride1;
|
436
|
-
}
|
433
|
+
cumo_na_index_aref_nadata_index_stride_kernel_launch(index, stride1, size);
|
437
434
|
} else {
|
438
435
|
beg = q[i].beg;
|
439
436
|
step = q[i].step;
|
@@ -447,6 +444,11 @@ cumo_na_index_aref_nadata(cumo_narray_data_t *na1, cumo_narray_view_t *na2,
|
|
447
444
|
}
|
448
445
|
|
449
446
|
|
447
|
+
void cumo_na_index_aref_naview_index_index_kernel_launch(size_t *idx, size_t *idx1, uint64_t n);
|
448
|
+
void cumo_na_index_aref_naview_index_stride_last_kernel_launch(size_t *idx, ssize_t s1, size_t last, uint64_t n);
|
449
|
+
void cumo_na_index_aref_naview_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n);
|
450
|
+
void cumo_na_index_aref_naview_index_index_beg_step_kernel_launch(size_t *idx, size_t *idx1, size_t beg, ssize_t step, uint64_t n);
|
451
|
+
|
450
452
|
static void
|
451
453
|
cumo_na_index_aref_naview(cumo_narray_view_t *na1, cumo_narray_view_t *na2,
|
452
454
|
cumo_na_index_arg_t *q, ssize_t elmsz, int ndim, int keep_dim)
|
@@ -481,64 +483,41 @@ cumo_na_index_aref_naview(cumo_narray_view_t *na1, cumo_narray_view_t *na2,
|
|
481
483
|
}
|
482
484
|
else if (q[i].idx != NULL && CUMO_SDX_IS_INDEX(sdx1)) {
|
483
485
|
// index <- index
|
484
|
-
int k;
|
485
486
|
size_t *index = q[i].idx;
|
486
|
-
|
487
|
-
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_index_aref_naview", "any");
|
488
|
-
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
489
|
-
|
487
|
+
size_t *index1 = CUMO_SDX_GET_INDEX(sdx1);
|
490
488
|
CUMO_SDX_SET_INDEX(na2->stridx[j], index);
|
491
489
|
q[i].idx = NULL;
|
492
|
-
|
493
|
-
for (k=0; k<size; k++) {
|
494
|
-
index[k] = CUMO_SDX_GET_INDEX(sdx1)[index[k]];
|
495
|
-
}
|
490
|
+
cumo_na_index_aref_naview_index_index_kernel_launch(index, index1, size);
|
496
491
|
}
|
497
492
|
else if (q[i].idx != NULL && CUMO_SDX_IS_STRIDE(sdx1)) {
|
498
493
|
// index <- step
|
499
494
|
ssize_t stride1 = CUMO_SDX_GET_STRIDE(sdx1);
|
500
495
|
size_t *index = q[i].idx;
|
501
|
-
|
502
|
-
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_index_aref_naview", "any");
|
503
|
-
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
504
|
-
|
505
496
|
CUMO_SDX_SET_INDEX(na2->stridx[j],index);
|
506
497
|
q[i].idx = NULL;
|
507
498
|
|
508
499
|
if (stride1<0) {
|
509
500
|
size_t last;
|
510
|
-
int k;
|
511
501
|
stride1 = -stride1;
|
512
502
|
last = na1->base.shape[q[i].orig_dim] - 1;
|
513
503
|
if (na2->offset < last * stride1) {
|
514
504
|
rb_raise(rb_eStandardError,"bug: negative offset");
|
515
505
|
}
|
516
506
|
na2->offset -= last * stride1;
|
517
|
-
|
518
|
-
index[k] = (last - index[k]) * stride1;
|
519
|
-
}
|
507
|
+
cumo_na_index_aref_naview_index_stride_last_kernel_launch(index, stride1, last, size);
|
520
508
|
} else {
|
521
|
-
|
522
|
-
for (k=0; k<size; k++) {
|
523
|
-
index[k] = index[k] * stride1;
|
524
|
-
}
|
509
|
+
cumo_na_index_aref_naview_index_stride_kernel_launch(index, stride1, size);
|
525
510
|
}
|
526
511
|
}
|
527
512
|
else if (q[i].idx == NULL && CUMO_SDX_IS_INDEX(sdx1)) {
|
528
513
|
// step <- index
|
529
|
-
int k;
|
530
514
|
size_t beg = q[i].beg;
|
531
515
|
ssize_t step = q[i].step;
|
532
516
|
// size_t *index = ALLOC_N(size_t, size);
|
533
517
|
size_t *index = (size_t*)cumo_cuda_runtime_malloc(sizeof(size_t)*size);
|
518
|
+
size_t *index1 = CUMO_SDX_GET_INDEX(sdx1);
|
534
519
|
CUMO_SDX_SET_INDEX(na2->stridx[j],index);
|
535
|
-
|
536
|
-
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_index_aref_naview", "any");
|
537
|
-
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
538
|
-
|
539
|
-
for (k=0; k<size; k++) {
|
540
|
-
index[k] = CUMO_SDX_GET_INDEX(sdx1)[beg+step*k];
|
541
|
-
}
|
520
|
+
cumo_na_index_aref_naview_index_index_beg_step_kernel_launch(index, index1, beg, step, size);
|
542
521
|
}
|
543
522
|
else if (q[i].idx == NULL && CUMO_SDX_IS_STRIDE(sdx1)) {
|
544
523
|
// step <- step
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#include "cumo/narray_kernel.h"
|
2
|
+
|
3
|
+
#if defined(__cplusplus)
|
4
|
+
extern "C" {
|
5
|
+
#if 0
|
6
|
+
} /* satisfy cc-mode */
|
7
|
+
#endif
|
8
|
+
#endif
|
9
|
+
|
10
|
+
__global__ void cumo_na_index_aref_nadata_index_stride_kernel(size_t *idx, ssize_t s1, uint64_t n)
|
11
|
+
{
|
12
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
13
|
+
idx[i] = idx[i] * s1;
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
__global__ void cumo_na_index_aref_naview_index_index_kernel(size_t *idx, size_t *idx1, uint64_t n)
|
18
|
+
{
|
19
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
20
|
+
idx[i] = idx1[idx[i]];
|
21
|
+
}
|
22
|
+
}
|
23
|
+
|
24
|
+
__global__ void cumo_na_index_aref_naview_index_stride_last_kernel(size_t *idx, ssize_t s1, size_t last, uint64_t n)
|
25
|
+
{
|
26
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
27
|
+
idx[i] = (last - idx[i]) * s1;
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
__global__ void cumo_na_index_aref_naview_index_stride_kernel(size_t *idx, ssize_t s1, uint64_t n)
|
32
|
+
{
|
33
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
34
|
+
idx[i] = idx[i] * s1;
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
__global__ void cumo_na_index_aref_naview_index_index_beg_step_kernel(size_t *idx, size_t *idx1, size_t beg, ssize_t step, uint64_t n)
|
39
|
+
{
|
40
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
41
|
+
idx[i] = idx1[beg + step * i];
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
void cumo_na_index_aref_nadata_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n)
|
46
|
+
{
|
47
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
48
|
+
size_t block_dim = cumo_get_block_dim(n);
|
49
|
+
cumo_na_index_aref_nadata_index_stride_kernel<<<grid_dim, block_dim>>>(idx, s1, n);
|
50
|
+
}
|
51
|
+
|
52
|
+
void cumo_na_index_aref_naview_index_index_kernel_launch(size_t *idx, size_t *idx1, uint64_t n)
|
53
|
+
{
|
54
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
55
|
+
size_t block_dim = cumo_get_block_dim(n);
|
56
|
+
cumo_na_index_aref_naview_index_index_kernel<<<grid_dim, block_dim>>>(idx, idx1, n);
|
57
|
+
}
|
58
|
+
|
59
|
+
void cumo_na_index_aref_naview_index_stride_last_kernel_launch(size_t *idx, ssize_t s1, size_t last, uint64_t n)
|
60
|
+
{
|
61
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
62
|
+
size_t block_dim = cumo_get_block_dim(n);
|
63
|
+
cumo_na_index_aref_naview_index_stride_last_kernel<<<grid_dim, block_dim>>>(idx, s1, last, n);
|
64
|
+
}
|
65
|
+
|
66
|
+
void cumo_na_index_aref_naview_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n)
|
67
|
+
{
|
68
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
69
|
+
size_t block_dim = cumo_get_block_dim(n);
|
70
|
+
cumo_na_index_aref_naview_index_stride_kernel<<<grid_dim, block_dim>>>(idx, s1, n);
|
71
|
+
}
|
72
|
+
|
73
|
+
void cumo_na_index_aref_naview_index_index_beg_step_kernel_launch(size_t *idx, size_t *idx1, size_t beg, ssize_t step, uint64_t n)
|
74
|
+
{
|
75
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
76
|
+
size_t block_dim = cumo_get_block_dim(n);
|
77
|
+
cumo_na_index_aref_naview_index_index_beg_step_kernel<<<grid_dim, block_dim>>>(idx, idx1, beg, step, n);
|
78
|
+
}
|
79
|
+
|
80
|
+
#if defined(__cplusplus)
|
81
|
+
#if 0
|
82
|
+
{ /* satisfy cc-mode */
|
83
|
+
#endif
|
84
|
+
} /* extern "C" { */
|
85
|
+
#endif
|
86
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cumo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
@@ -170,6 +170,7 @@ files:
|
|
170
170
|
- ext/cumo/narray/SFMT.h
|
171
171
|
- ext/cumo/narray/array.c
|
172
172
|
- ext/cumo/narray/data.c
|
173
|
+
- ext/cumo/narray/data_kernel.cu
|
173
174
|
- ext/cumo/narray/gen/cogen.rb
|
174
175
|
- ext/cumo/narray/gen/cogen_kernel.rb
|
175
176
|
- ext/cumo/narray/gen/def/bit.rb
|
@@ -305,6 +306,7 @@ files:
|
|
305
306
|
- ext/cumo/narray/gen/tmpl_bit/where.c
|
306
307
|
- ext/cumo/narray/gen/tmpl_bit/where2.c
|
307
308
|
- ext/cumo/narray/index.c
|
309
|
+
- ext/cumo/narray/index_kernel.cu
|
308
310
|
- ext/cumo/narray/kwargs.c
|
309
311
|
- ext/cumo/narray/math.c
|
310
312
|
- ext/cumo/narray/narray.c
|