cumo 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a909afef2849be43637539db86170d49f2063e53df43d90e4237b1b7857bb1b0
4
- data.tar.gz: 831c2071af2e8ac4f5da2506d488d164f90358268dcd8b6b891dc549d387c38b
3
+ metadata.gz: ac2b9873bc48d45afcac57ff6e45ba84cc69ed1c61430cb13236a5c1ce018d0c
4
+ data.tar.gz: c001063b6a66de3055f98789420d5574b5b2d53357624dc6ffbe750ce2f727f1
5
5
  SHA512:
6
- metadata.gz: fbc6727d8f2fe31292b36c1a962aae090c2ac9d00037b4b04f674ace1e99db49203141966d93cb1689830a66196dc6432ea90501cd70c57a4dff9f47374f1170
7
- data.tar.gz: '096263752329c9dee63ddd22aaa1658b127c251b08997e728aa3b0304b3243937ba58ef7a2f3096eda5e2cd2afdbb40908bfdbb1169d8f0eec43ad1a76cb0511'
6
+ metadata.gz: f18aa1652ddd921ae91da6f75e28a5b9338091111a07b1cc97b586b19e0a755fcb195b2307626a27f95def01845629ed710786f12932888a14e7dcfc55b0d034
7
+ data.tar.gz: ed082b7188a9b517074eb78216fd0a15333ae84e7eeb271c6f7675d8c4bfecd0ef8ccf575017dfe450c75b85e5f275b801c11295e985be447cd9c08703601c86
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ # 0.2.2 (2018-11-13)
2
+
3
+ Enhancements:
4
+
5
+ * CUDA kernelize na\_index\_aref\_naview
6
+ * CUDA kernelize na\_index\_aref\_nadata
7
+ * CUDA kernelize diagonal
8
+ * CUDA kernelize copy
9
+
1
10
  # 0.2.1 (2018-11-12)
2
11
 
3
12
  Enhancements:
data/ext/cumo/extconf.rb CHANGED
@@ -66,8 +66,10 @@ narray/narray
66
66
  narray/array
67
67
  narray/step
68
68
  narray/index
69
+ narray/index_kernel
69
70
  narray/ndloop
70
71
  narray/data
72
+ narray/data_kernel
71
73
  narray/types/bit
72
74
  narray/types/int8
73
75
  narray/types/int16
@@ -10,8 +10,8 @@ extern "C" {
10
10
  #endif
11
11
  #endif
12
12
 
13
- #define CUMO_VERSION "0.2.1"
14
- #define CUMO_VERSION_CODE 21
13
+ #define CUMO_VERSION "0.2.2"
14
+ #define CUMO_VERSION_CODE 22
15
15
 
16
16
  bool cumo_compatible_mode_enabled_p();
17
17
  bool cumo_show_warning_enabled_p();
@@ -53,15 +53,23 @@ static ID cumo_id_swap_byte;
53
53
  } \
54
54
  }
55
55
 
56
- #define m_memcpy(src,dst) memcpy(dst,src,e)
56
+ void cumo_iter_copy_bytes_kernel_launch(char *p1, char *p2, ssize_t s1, ssize_t s2, size_t *idx1, size_t *idx2, size_t n, int elmsz);
57
+ // #define m_memcpy(src,dst) memcpy(dst,src,e)
58
+
57
59
  static void
58
60
  iter_copy_bytes(cumo_na_loop_t *const lp)
59
61
  {
60
- size_t e;
61
- e = lp->args[0].elmsz;
62
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_copy_bytes", "any");
63
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
64
- LOOP_UNARY_PTR(lp,m_memcpy);
62
+ size_t n;
63
+ ssize_t s1, s2;
64
+ char *p1, *p2;
65
+ size_t *idx1, *idx2;
66
+ CUMO_INIT_COUNTER(lp, n);
67
+ CUMO_INIT_PTR_IDX(lp, 0, p1, s1, idx1);
68
+ CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
69
+ cumo_iter_copy_bytes_kernel_launch(p1, p2, s1, s2, idx1, idx2, n, lp->args[0].elmsz);
70
+ // size_t e;
71
+ // e = lp->args[0].elmsz;
72
+ // LOOP_UNARY_PTR(lp,m_memcpy);
65
73
  }
66
74
 
67
75
  VALUE
@@ -562,6 +570,10 @@ cumo_na_flatten(VALUE self)
562
570
 
563
571
  #define MIN(a,b) (((a)<(b))?(a):(b))
564
572
 
573
+ void cumo_na_diagonal_index_index_kernel_launch(size_t *idx, size_t *idx0, size_t *idx1, size_t k0, size_t k1, uint64_t n);
574
+ void cumo_na_diagonal_index_stride_kernel_launch(size_t *idx, size_t *idx0, ssize_t s1, size_t k0, size_t k1, uint64_t n);
575
+ void cumo_na_diagonal_stride_index_kernel_launch(size_t *idx, ssize_t s0, size_t *idx1, size_t k0, size_t k1, uint64_t n);
576
+
565
577
  /*
566
578
  Returns a diagonal view of NArray
567
579
  @overload diagonal([offset,axes])
@@ -601,7 +613,6 @@ static VALUE
601
613
  cumo_na_diagonal(int argc, VALUE *argv, VALUE self)
602
614
  {
603
615
  int i, k, nd;
604
- size_t j;
605
616
  size_t *idx0, *idx1, *diag_idx;
606
617
  size_t *shape;
607
618
  size_t diag_size;
@@ -754,20 +765,12 @@ cumo_na_diagonal(int argc, VALUE *argv, VALUE self)
754
765
  idx0 = CUMO_SDX_GET_INDEX(na1->stridx[ax[0]]);
755
766
  // diag_idx = ALLOC_N(size_t, diag_size);
756
767
  diag_idx = (size_t*)cumo_cuda_runtime_malloc(sizeof(size_t)*diag_size);
757
-
758
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_diagonal", "any");
759
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
760
-
761
768
  if (CUMO_SDX_IS_INDEX(na1->stridx[ax[1]])) {
762
769
  idx1 = CUMO_SDX_GET_INDEX(na1->stridx[ax[1]]);
763
- for (j=0; j<diag_size; j++) {
764
- diag_idx[j] = idx0[j+k0] + idx1[j+k1];
765
- }
770
+ cumo_na_diagonal_index_index_kernel_launch(diag_idx, idx0, idx1, k0, k1, diag_size);
766
771
  } else {
767
772
  stride1 = CUMO_SDX_GET_STRIDE(na1->stridx[ax[1]]);
768
- for (j=0; j<diag_size; j++) {
769
- diag_idx[j] = idx0[j+k0] + stride1*(j+k1);
770
- }
773
+ cumo_na_diagonal_index_stride_kernel_launch(diag_idx, idx0, stride1, k0, k1, diag_size);
771
774
  }
772
775
  CUMO_SDX_SET_INDEX(na2->stridx[nd-2],diag_idx);
773
776
  } else {
@@ -776,13 +779,7 @@ cumo_na_diagonal(int argc, VALUE *argv, VALUE self)
776
779
  idx1 = CUMO_SDX_GET_INDEX(na1->stridx[ax[1]]);
777
780
  // diag_idx = ALLOC_N(size_t, diag_size);
778
781
  diag_idx = (size_t*)cumo_cuda_runtime_malloc(sizeof(size_t)*diag_size);
779
-
780
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_diagonal", "any");
781
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
782
-
783
- for (j=0; j<diag_size; j++) {
784
- diag_idx[j] = stride0*(j+k0) + idx1[j+k1];
785
- }
782
+ cumo_na_diagonal_stride_index_kernel_launch(diag_idx, stride0, idx1, k0, k1, diag_size);
786
783
  CUMO_SDX_SET_INDEX(na2->stridx[nd-2],diag_idx);
787
784
  } else {
788
785
  stride1 = CUMO_SDX_GET_STRIDE(na1->stridx[ax[1]]);
@@ -0,0 +1,75 @@
1
+ #include "cumo/narray_kernel.h"
2
+
3
+ #if defined(__cplusplus)
4
+ extern "C" {
5
+ #if 0
6
+ } /* satisfy cc-mode */
7
+ #endif
8
+ #endif
9
+
10
+ __global__ void cumo_iter_copy_bytes_kernel(char *p1, char *p2, ssize_t s1, ssize_t s2, size_t *idx1, size_t *idx2, uint64_t n, ssize_t elmsz)
11
+ {
12
+ char *p1_ = NULL;
13
+ char *p2_ = NULL;
14
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
15
+ p1_ = p1 + (idx1 ? idx1[i] : i * s1);
16
+ p2_ = p2 + (idx2 ? idx2[i] : i * s2);
17
+ memcpy(p2_, p1_, elmsz);
18
+ }
19
+ }
20
+
21
+ __global__ void cumo_na_diagonal_index_index_kernel(size_t *idx, size_t *idx0, size_t *idx1, size_t k0, size_t k1, uint64_t n)
22
+ {
23
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
24
+ idx[i] = idx0[i+k0] + idx1[i+k1];
25
+ }
26
+ }
27
+
28
+ __global__ void cumo_na_diagonal_index_stride_kernel(size_t *idx, size_t *idx0, ssize_t s1, size_t k0, size_t k1, uint64_t n)
29
+ {
30
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
31
+ idx[i] = idx0[i+k0] + s1*(i+k1);
32
+ }
33
+ }
34
+
35
+ __global__ void cumo_na_diagonal_stride_index_kernel(size_t *idx, ssize_t s0, size_t *idx1, size_t k0, size_t k1, uint64_t n)
36
+ {
37
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
38
+ idx[i] = s0*(i+k0) + idx1[i+k1];
39
+ }
40
+ }
41
+
42
+ void cumo_iter_copy_bytes_kernel_launch(char *p1, char *p2, ssize_t s1, ssize_t s2, size_t *idx1, size_t *idx2, uint64_t n, ssize_t elmsz)
43
+ {
44
+ size_t grid_dim = cumo_get_grid_dim(n);
45
+ size_t block_dim = cumo_get_block_dim(n);
46
+ cumo_iter_copy_bytes_kernel<<<grid_dim, block_dim>>>(p1, p2, s1, s2, idx1, idx2, n, elmsz);
47
+ }
48
+
49
+ void cumo_na_diagonal_index_index_kernel_launch(size_t *idx, size_t *idx0, size_t *idx1, size_t k0, size_t k1, uint64_t n)
50
+ {
51
+ size_t grid_dim = cumo_get_grid_dim(n);
52
+ size_t block_dim = cumo_get_block_dim(n);
53
+ cumo_na_diagonal_index_index_kernel<<<grid_dim, block_dim>>>(idx, idx0, idx1, k0, k1, n);
54
+ }
55
+
56
+ void cumo_na_diagonal_index_stride_kernel_launch(size_t *idx, size_t *idx0, ssize_t s1, size_t k0, size_t k1, uint64_t n)
57
+ {
58
+ size_t grid_dim = cumo_get_grid_dim(n);
59
+ size_t block_dim = cumo_get_block_dim(n);
60
+ cumo_na_diagonal_index_stride_kernel<<<grid_dim, block_dim>>>(idx, idx0, s1, k0, k1, n);
61
+ }
62
+
63
+ void cumo_na_diagonal_stride_index_kernel_launch(size_t *idx, ssize_t s0, size_t *idx1, size_t k0, size_t k1, uint64_t n)
64
+ {
65
+ size_t grid_dim = cumo_get_grid_dim(n);
66
+ size_t block_dim = cumo_get_block_dim(n);
67
+ cumo_na_diagonal_stride_index_kernel<<<grid_dim, block_dim>>>(idx, s0, idx1, k0, k1, n);
68
+ }
69
+
70
+ #if defined(__cplusplus)
71
+ #if 0
72
+ { /* satisfy cc-mode */
73
+ #endif
74
+ } /* extern "C" { */
75
+ #endif
@@ -130,6 +130,7 @@ static void
130
130
  r,
131
131
  swaptype,
132
132
  presorted;
133
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
133
134
 
134
135
  loop:SWAPINIT(a, es);
135
136
  if (n < 7)
@@ -9,6 +9,7 @@ static void
9
9
  CUMO_INIT_COUNTER(lp, n);
10
10
  CUMO_INIT_PTR(lp, 0, ptr, step);
11
11
  CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
12
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
12
13
  <%=type_name%>_qsort<%=j%>(ptr, n, step);
13
14
  }
14
15
  <% end %>
@@ -391,12 +391,14 @@ cumo_na_get_strides_nadata(const cumo_narray_data_t *na, ssize_t *strides, ssize
391
391
  }
392
392
  }
393
393
 
394
+ void cumo_na_index_aref_nadata_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n);
395
+
394
396
  static void
395
397
  cumo_na_index_aref_nadata(cumo_narray_data_t *na1, cumo_narray_view_t *na2,
396
398
  cumo_na_index_arg_t *q, ssize_t elmsz, int ndim, int keep_dim)
397
399
  {
398
400
  int i, j;
399
- ssize_t size, k, total=1;
401
+ ssize_t size, total=1;
400
402
  ssize_t stride1;
401
403
  ssize_t *strides_na1;
402
404
  size_t *index;
@@ -425,15 +427,10 @@ cumo_na_index_aref_nadata(cumo_narray_data_t *na1, cumo_narray_view_t *na2,
425
427
 
426
428
  // array index
427
429
  if (q[i].idx != NULL) {
428
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_index_aref_nadata", "any");
429
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
430
-
431
430
  index = q[i].idx;
432
431
  CUMO_SDX_SET_INDEX(na2->stridx[j],index);
433
432
  q[i].idx = NULL;
434
- for (k=0; k<size; k++) {
435
- index[k] = index[k] * stride1;
436
- }
433
+ cumo_na_index_aref_nadata_index_stride_kernel_launch(index, stride1, size);
437
434
  } else {
438
435
  beg = q[i].beg;
439
436
  step = q[i].step;
@@ -447,6 +444,11 @@ cumo_na_index_aref_nadata(cumo_narray_data_t *na1, cumo_narray_view_t *na2,
447
444
  }
448
445
 
449
446
 
447
+ void cumo_na_index_aref_naview_index_index_kernel_launch(size_t *idx, size_t *idx1, uint64_t n);
448
+ void cumo_na_index_aref_naview_index_stride_last_kernel_launch(size_t *idx, ssize_t s1, size_t last, uint64_t n);
449
+ void cumo_na_index_aref_naview_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n);
450
+ void cumo_na_index_aref_naview_index_index_beg_step_kernel_launch(size_t *idx, size_t *idx1, size_t beg, ssize_t step, uint64_t n);
451
+
450
452
  static void
451
453
  cumo_na_index_aref_naview(cumo_narray_view_t *na1, cumo_narray_view_t *na2,
452
454
  cumo_na_index_arg_t *q, ssize_t elmsz, int ndim, int keep_dim)
@@ -481,64 +483,41 @@ cumo_na_index_aref_naview(cumo_narray_view_t *na1, cumo_narray_view_t *na2,
481
483
  }
482
484
  else if (q[i].idx != NULL && CUMO_SDX_IS_INDEX(sdx1)) {
483
485
  // index <- index
484
- int k;
485
486
  size_t *index = q[i].idx;
486
-
487
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_index_aref_naview", "any");
488
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
489
-
487
+ size_t *index1 = CUMO_SDX_GET_INDEX(sdx1);
490
488
  CUMO_SDX_SET_INDEX(na2->stridx[j], index);
491
489
  q[i].idx = NULL;
492
-
493
- for (k=0; k<size; k++) {
494
- index[k] = CUMO_SDX_GET_INDEX(sdx1)[index[k]];
495
- }
490
+ cumo_na_index_aref_naview_index_index_kernel_launch(index, index1, size);
496
491
  }
497
492
  else if (q[i].idx != NULL && CUMO_SDX_IS_STRIDE(sdx1)) {
498
493
  // index <- step
499
494
  ssize_t stride1 = CUMO_SDX_GET_STRIDE(sdx1);
500
495
  size_t *index = q[i].idx;
501
-
502
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_index_aref_naview", "any");
503
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
504
-
505
496
  CUMO_SDX_SET_INDEX(na2->stridx[j],index);
506
497
  q[i].idx = NULL;
507
498
 
508
499
  if (stride1<0) {
509
500
  size_t last;
510
- int k;
511
501
  stride1 = -stride1;
512
502
  last = na1->base.shape[q[i].orig_dim] - 1;
513
503
  if (na2->offset < last * stride1) {
514
504
  rb_raise(rb_eStandardError,"bug: negative offset");
515
505
  }
516
506
  na2->offset -= last * stride1;
517
- for (k=0; k<size; k++) {
518
- index[k] = (last - index[k]) * stride1;
519
- }
507
+ cumo_na_index_aref_naview_index_stride_last_kernel_launch(index, stride1, last, size);
520
508
  } else {
521
- int k;
522
- for (k=0; k<size; k++) {
523
- index[k] = index[k] * stride1;
524
- }
509
+ cumo_na_index_aref_naview_index_stride_kernel_launch(index, stride1, size);
525
510
  }
526
511
  }
527
512
  else if (q[i].idx == NULL && CUMO_SDX_IS_INDEX(sdx1)) {
528
513
  // step <- index
529
- int k;
530
514
  size_t beg = q[i].beg;
531
515
  ssize_t step = q[i].step;
532
516
  // size_t *index = ALLOC_N(size_t, size);
533
517
  size_t *index = (size_t*)cumo_cuda_runtime_malloc(sizeof(size_t)*size);
518
+ size_t *index1 = CUMO_SDX_GET_INDEX(sdx1);
534
519
  CUMO_SDX_SET_INDEX(na2->stridx[j],index);
535
-
536
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_index_aref_naview", "any");
537
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
538
-
539
- for (k=0; k<size; k++) {
540
- index[k] = CUMO_SDX_GET_INDEX(sdx1)[beg+step*k];
541
- }
520
+ cumo_na_index_aref_naview_index_index_beg_step_kernel_launch(index, index1, beg, step, size);
542
521
  }
543
522
  else if (q[i].idx == NULL && CUMO_SDX_IS_STRIDE(sdx1)) {
544
523
  // step <- step
@@ -0,0 +1,86 @@
1
+ #include "cumo/narray_kernel.h"
2
+
3
+ #if defined(__cplusplus)
4
+ extern "C" {
5
+ #if 0
6
+ } /* satisfy cc-mode */
7
+ #endif
8
+ #endif
9
+
10
+ __global__ void cumo_na_index_aref_nadata_index_stride_kernel(size_t *idx, ssize_t s1, uint64_t n)
11
+ {
12
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
13
+ idx[i] = idx[i] * s1;
14
+ }
15
+ }
16
+
17
+ __global__ void cumo_na_index_aref_naview_index_index_kernel(size_t *idx, size_t *idx1, uint64_t n)
18
+ {
19
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
20
+ idx[i] = idx1[idx[i]];
21
+ }
22
+ }
23
+
24
+ __global__ void cumo_na_index_aref_naview_index_stride_last_kernel(size_t *idx, ssize_t s1, size_t last, uint64_t n)
25
+ {
26
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
27
+ idx[i] = (last - idx[i]) * s1;
28
+ }
29
+ }
30
+
31
+ __global__ void cumo_na_index_aref_naview_index_stride_kernel(size_t *idx, ssize_t s1, uint64_t n)
32
+ {
33
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
34
+ idx[i] = idx[i] * s1;
35
+ }
36
+ }
37
+
38
+ __global__ void cumo_na_index_aref_naview_index_index_beg_step_kernel(size_t *idx, size_t *idx1, size_t beg, ssize_t step, uint64_t n)
39
+ {
40
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
41
+ idx[i] = idx1[beg + step * i];
42
+ }
43
+ }
44
+
45
+ void cumo_na_index_aref_nadata_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n)
46
+ {
47
+ size_t grid_dim = cumo_get_grid_dim(n);
48
+ size_t block_dim = cumo_get_block_dim(n);
49
+ cumo_na_index_aref_nadata_index_stride_kernel<<<grid_dim, block_dim>>>(idx, s1, n);
50
+ }
51
+
52
+ void cumo_na_index_aref_naview_index_index_kernel_launch(size_t *idx, size_t *idx1, uint64_t n)
53
+ {
54
+ size_t grid_dim = cumo_get_grid_dim(n);
55
+ size_t block_dim = cumo_get_block_dim(n);
56
+ cumo_na_index_aref_naview_index_index_kernel<<<grid_dim, block_dim>>>(idx, idx1, n);
57
+ }
58
+
59
+ void cumo_na_index_aref_naview_index_stride_last_kernel_launch(size_t *idx, ssize_t s1, size_t last, uint64_t n)
60
+ {
61
+ size_t grid_dim = cumo_get_grid_dim(n);
62
+ size_t block_dim = cumo_get_block_dim(n);
63
+ cumo_na_index_aref_naview_index_stride_last_kernel<<<grid_dim, block_dim>>>(idx, s1, last, n);
64
+ }
65
+
66
+ void cumo_na_index_aref_naview_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n)
67
+ {
68
+ size_t grid_dim = cumo_get_grid_dim(n);
69
+ size_t block_dim = cumo_get_block_dim(n);
70
+ cumo_na_index_aref_naview_index_stride_kernel<<<grid_dim, block_dim>>>(idx, s1, n);
71
+ }
72
+
73
+ void cumo_na_index_aref_naview_index_index_beg_step_kernel_launch(size_t *idx, size_t *idx1, size_t beg, ssize_t step, uint64_t n)
74
+ {
75
+ size_t grid_dim = cumo_get_grid_dim(n);
76
+ size_t block_dim = cumo_get_block_dim(n);
77
+ cumo_na_index_aref_naview_index_index_beg_step_kernel<<<grid_dim, block_dim>>>(idx, idx1, beg, step, n);
78
+ }
79
+
80
+ #if defined(__cplusplus)
81
+ #if 0
82
+ { /* satisfy cc-mode */
83
+ #endif
84
+ } /* extern "C" { */
85
+ #endif
86
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cumo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
@@ -170,6 +170,7 @@ files:
170
170
  - ext/cumo/narray/SFMT.h
171
171
  - ext/cumo/narray/array.c
172
172
  - ext/cumo/narray/data.c
173
+ - ext/cumo/narray/data_kernel.cu
173
174
  - ext/cumo/narray/gen/cogen.rb
174
175
  - ext/cumo/narray/gen/cogen_kernel.rb
175
176
  - ext/cumo/narray/gen/def/bit.rb
@@ -305,6 +306,7 @@ files:
305
306
  - ext/cumo/narray/gen/tmpl_bit/where.c
306
307
  - ext/cumo/narray/gen/tmpl_bit/where2.c
307
308
  - ext/cumo/narray/index.c
309
+ - ext/cumo/narray/index_kernel.cu
308
310
  - ext/cumo/narray/kwargs.c
309
311
  - ext/cumo/narray/math.c
310
312
  - ext/cumo/narray/narray.c