cumo 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a909afef2849be43637539db86170d49f2063e53df43d90e4237b1b7857bb1b0
4
- data.tar.gz: 831c2071af2e8ac4f5da2506d488d164f90358268dcd8b6b891dc549d387c38b
3
+ metadata.gz: ac2b9873bc48d45afcac57ff6e45ba84cc69ed1c61430cb13236a5c1ce018d0c
4
+ data.tar.gz: c001063b6a66de3055f98789420d5574b5b2d53357624dc6ffbe750ce2f727f1
5
5
  SHA512:
6
- metadata.gz: fbc6727d8f2fe31292b36c1a962aae090c2ac9d00037b4b04f674ace1e99db49203141966d93cb1689830a66196dc6432ea90501cd70c57a4dff9f47374f1170
7
- data.tar.gz: '096263752329c9dee63ddd22aaa1658b127c251b08997e728aa3b0304b3243937ba58ef7a2f3096eda5e2cd2afdbb40908bfdbb1169d8f0eec43ad1a76cb0511'
6
+ metadata.gz: f18aa1652ddd921ae91da6f75e28a5b9338091111a07b1cc97b586b19e0a755fcb195b2307626a27f95def01845629ed710786f12932888a14e7dcfc55b0d034
7
+ data.tar.gz: ed082b7188a9b517074eb78216fd0a15333ae84e7eeb271c6f7675d8c4bfecd0ef8ccf575017dfe450c75b85e5f275b801c11295e985be447cd9c08703601c86
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ # 0.2.2 (2018-11-13)
2
+
3
+ Enhancements:
4
+
5
+ * CUDA kernelize na\_index\_aref\_naview
6
+ * CUDA kernelize na\_index\_aref\_nadata
7
+ * CUDA kernelize diagonal
8
+ * CUDA kernelize copy
9
+
1
10
  # 0.2.1 (2018-11-12)
2
11
 
3
12
  Enhancements:
data/ext/cumo/extconf.rb CHANGED
@@ -66,8 +66,10 @@ narray/narray
66
66
  narray/array
67
67
  narray/step
68
68
  narray/index
69
+ narray/index_kernel
69
70
  narray/ndloop
70
71
  narray/data
72
+ narray/data_kernel
71
73
  narray/types/bit
72
74
  narray/types/int8
73
75
  narray/types/int16
@@ -10,8 +10,8 @@ extern "C" {
10
10
  #endif
11
11
  #endif
12
12
 
13
- #define CUMO_VERSION "0.2.1"
14
- #define CUMO_VERSION_CODE 21
13
+ #define CUMO_VERSION "0.2.2"
14
+ #define CUMO_VERSION_CODE 22
15
15
 
16
16
  bool cumo_compatible_mode_enabled_p();
17
17
  bool cumo_show_warning_enabled_p();
@@ -53,15 +53,23 @@ static ID cumo_id_swap_byte;
53
53
  } \
54
54
  }
55
55
 
56
- #define m_memcpy(src,dst) memcpy(dst,src,e)
56
+ void cumo_iter_copy_bytes_kernel_launch(char *p1, char *p2, ssize_t s1, ssize_t s2, size_t *idx1, size_t *idx2, size_t n, int elmsz);
57
+ // #define m_memcpy(src,dst) memcpy(dst,src,e)
58
+
57
59
  static void
58
60
  iter_copy_bytes(cumo_na_loop_t *const lp)
59
61
  {
60
- size_t e;
61
- e = lp->args[0].elmsz;
62
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_copy_bytes", "any");
63
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
64
- LOOP_UNARY_PTR(lp,m_memcpy);
62
+ size_t n;
63
+ ssize_t s1, s2;
64
+ char *p1, *p2;
65
+ size_t *idx1, *idx2;
66
+ CUMO_INIT_COUNTER(lp, n);
67
+ CUMO_INIT_PTR_IDX(lp, 0, p1, s1, idx1);
68
+ CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
69
+ cumo_iter_copy_bytes_kernel_launch(p1, p2, s1, s2, idx1, idx2, n, lp->args[0].elmsz);
70
+ // size_t e;
71
+ // e = lp->args[0].elmsz;
72
+ // LOOP_UNARY_PTR(lp,m_memcpy);
65
73
  }
66
74
 
67
75
  VALUE
@@ -562,6 +570,10 @@ cumo_na_flatten(VALUE self)
562
570
 
563
571
  #define MIN(a,b) (((a)<(b))?(a):(b))
564
572
 
573
+ void cumo_na_diagonal_index_index_kernel_launch(size_t *idx, size_t *idx0, size_t *idx1, size_t k0, size_t k1, uint64_t n);
574
+ void cumo_na_diagonal_index_stride_kernel_launch(size_t *idx, size_t *idx0, ssize_t s1, size_t k0, size_t k1, uint64_t n);
575
+ void cumo_na_diagonal_stride_index_kernel_launch(size_t *idx, ssize_t s0, size_t *idx1, size_t k0, size_t k1, uint64_t n);
576
+
565
577
  /*
566
578
  Returns a diagonal view of NArray
567
579
  @overload diagonal([offset,axes])
@@ -601,7 +613,6 @@ static VALUE
601
613
  cumo_na_diagonal(int argc, VALUE *argv, VALUE self)
602
614
  {
603
615
  int i, k, nd;
604
- size_t j;
605
616
  size_t *idx0, *idx1, *diag_idx;
606
617
  size_t *shape;
607
618
  size_t diag_size;
@@ -754,20 +765,12 @@ cumo_na_diagonal(int argc, VALUE *argv, VALUE self)
754
765
  idx0 = CUMO_SDX_GET_INDEX(na1->stridx[ax[0]]);
755
766
  // diag_idx = ALLOC_N(size_t, diag_size);
756
767
  diag_idx = (size_t*)cumo_cuda_runtime_malloc(sizeof(size_t)*diag_size);
757
-
758
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_diagonal", "any");
759
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
760
-
761
768
  if (CUMO_SDX_IS_INDEX(na1->stridx[ax[1]])) {
762
769
  idx1 = CUMO_SDX_GET_INDEX(na1->stridx[ax[1]]);
763
- for (j=0; j<diag_size; j++) {
764
- diag_idx[j] = idx0[j+k0] + idx1[j+k1];
765
- }
770
+ cumo_na_diagonal_index_index_kernel_launch(diag_idx, idx0, idx1, k0, k1, diag_size);
766
771
  } else {
767
772
  stride1 = CUMO_SDX_GET_STRIDE(na1->stridx[ax[1]]);
768
- for (j=0; j<diag_size; j++) {
769
- diag_idx[j] = idx0[j+k0] + stride1*(j+k1);
770
- }
773
+ cumo_na_diagonal_index_stride_kernel_launch(diag_idx, idx0, stride1, k0, k1, diag_size);
771
774
  }
772
775
  CUMO_SDX_SET_INDEX(na2->stridx[nd-2],diag_idx);
773
776
  } else {
@@ -776,13 +779,7 @@ cumo_na_diagonal(int argc, VALUE *argv, VALUE self)
776
779
  idx1 = CUMO_SDX_GET_INDEX(na1->stridx[ax[1]]);
777
780
  // diag_idx = ALLOC_N(size_t, diag_size);
778
781
  diag_idx = (size_t*)cumo_cuda_runtime_malloc(sizeof(size_t)*diag_size);
779
-
780
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_diagonal", "any");
781
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
782
-
783
- for (j=0; j<diag_size; j++) {
784
- diag_idx[j] = stride0*(j+k0) + idx1[j+k1];
785
- }
782
+ cumo_na_diagonal_stride_index_kernel_launch(diag_idx, stride0, idx1, k0, k1, diag_size);
786
783
  CUMO_SDX_SET_INDEX(na2->stridx[nd-2],diag_idx);
787
784
  } else {
788
785
  stride1 = CUMO_SDX_GET_STRIDE(na1->stridx[ax[1]]);
@@ -0,0 +1,75 @@
1
+ #include "cumo/narray_kernel.h"
2
+
3
+ #if defined(__cplusplus)
4
+ extern "C" {
5
+ #if 0
6
+ } /* satisfy cc-mode */
7
+ #endif
8
+ #endif
9
+
10
+ __global__ void cumo_iter_copy_bytes_kernel(char *p1, char *p2, ssize_t s1, ssize_t s2, size_t *idx1, size_t *idx2, uint64_t n, ssize_t elmsz)
11
+ {
12
+ char *p1_ = NULL;
13
+ char *p2_ = NULL;
14
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
15
+ p1_ = p1 + (idx1 ? idx1[i] : i * s1);
16
+ p2_ = p2 + (idx2 ? idx2[i] : i * s2);
17
+ memcpy(p2_, p1_, elmsz);
18
+ }
19
+ }
20
+
21
+ __global__ void cumo_na_diagonal_index_index_kernel(size_t *idx, size_t *idx0, size_t *idx1, size_t k0, size_t k1, uint64_t n)
22
+ {
23
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
24
+ idx[i] = idx0[i+k0] + idx1[i+k1];
25
+ }
26
+ }
27
+
28
+ __global__ void cumo_na_diagonal_index_stride_kernel(size_t *idx, size_t *idx0, ssize_t s1, size_t k0, size_t k1, uint64_t n)
29
+ {
30
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
31
+ idx[i] = idx0[i+k0] + s1*(i+k1);
32
+ }
33
+ }
34
+
35
+ __global__ void cumo_na_diagonal_stride_index_kernel(size_t *idx, ssize_t s0, size_t *idx1, size_t k0, size_t k1, uint64_t n)
36
+ {
37
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
38
+ idx[i] = s0*(i+k0) + idx1[i+k1];
39
+ }
40
+ }
41
+
42
+ void cumo_iter_copy_bytes_kernel_launch(char *p1, char *p2, ssize_t s1, ssize_t s2, size_t *idx1, size_t *idx2, uint64_t n, ssize_t elmsz)
43
+ {
44
+ size_t grid_dim = cumo_get_grid_dim(n);
45
+ size_t block_dim = cumo_get_block_dim(n);
46
+ cumo_iter_copy_bytes_kernel<<<grid_dim, block_dim>>>(p1, p2, s1, s2, idx1, idx2, n, elmsz);
47
+ }
48
+
49
+ void cumo_na_diagonal_index_index_kernel_launch(size_t *idx, size_t *idx0, size_t *idx1, size_t k0, size_t k1, uint64_t n)
50
+ {
51
+ size_t grid_dim = cumo_get_grid_dim(n);
52
+ size_t block_dim = cumo_get_block_dim(n);
53
+ cumo_na_diagonal_index_index_kernel<<<grid_dim, block_dim>>>(idx, idx0, idx1, k0, k1, n);
54
+ }
55
+
56
+ void cumo_na_diagonal_index_stride_kernel_launch(size_t *idx, size_t *idx0, ssize_t s1, size_t k0, size_t k1, uint64_t n)
57
+ {
58
+ size_t grid_dim = cumo_get_grid_dim(n);
59
+ size_t block_dim = cumo_get_block_dim(n);
60
+ cumo_na_diagonal_index_stride_kernel<<<grid_dim, block_dim>>>(idx, idx0, s1, k0, k1, n);
61
+ }
62
+
63
+ void cumo_na_diagonal_stride_index_kernel_launch(size_t *idx, ssize_t s0, size_t *idx1, size_t k0, size_t k1, uint64_t n)
64
+ {
65
+ size_t grid_dim = cumo_get_grid_dim(n);
66
+ size_t block_dim = cumo_get_block_dim(n);
67
+ cumo_na_diagonal_stride_index_kernel<<<grid_dim, block_dim>>>(idx, s0, idx1, k0, k1, n);
68
+ }
69
+
70
+ #if defined(__cplusplus)
71
+ #if 0
72
+ { /* satisfy cc-mode */
73
+ #endif
74
+ } /* extern "C" { */
75
+ #endif
@@ -130,6 +130,7 @@ static void
130
130
  r,
131
131
  swaptype,
132
132
  presorted;
133
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
133
134
 
134
135
  loop:SWAPINIT(a, es);
135
136
  if (n < 7)
@@ -9,6 +9,7 @@ static void
9
9
  CUMO_INIT_COUNTER(lp, n);
10
10
  CUMO_INIT_PTR(lp, 0, ptr, step);
11
11
  CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
12
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
12
13
  <%=type_name%>_qsort<%=j%>(ptr, n, step);
13
14
  }
14
15
  <% end %>
@@ -391,12 +391,14 @@ cumo_na_get_strides_nadata(const cumo_narray_data_t *na, ssize_t *strides, ssize
391
391
  }
392
392
  }
393
393
 
394
+ void cumo_na_index_aref_nadata_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n);
395
+
394
396
  static void
395
397
  cumo_na_index_aref_nadata(cumo_narray_data_t *na1, cumo_narray_view_t *na2,
396
398
  cumo_na_index_arg_t *q, ssize_t elmsz, int ndim, int keep_dim)
397
399
  {
398
400
  int i, j;
399
- ssize_t size, k, total=1;
401
+ ssize_t size, total=1;
400
402
  ssize_t stride1;
401
403
  ssize_t *strides_na1;
402
404
  size_t *index;
@@ -425,15 +427,10 @@ cumo_na_index_aref_nadata(cumo_narray_data_t *na1, cumo_narray_view_t *na2,
425
427
 
426
428
  // array index
427
429
  if (q[i].idx != NULL) {
428
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_index_aref_nadata", "any");
429
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
430
-
431
430
  index = q[i].idx;
432
431
  CUMO_SDX_SET_INDEX(na2->stridx[j],index);
433
432
  q[i].idx = NULL;
434
- for (k=0; k<size; k++) {
435
- index[k] = index[k] * stride1;
436
- }
433
+ cumo_na_index_aref_nadata_index_stride_kernel_launch(index, stride1, size);
437
434
  } else {
438
435
  beg = q[i].beg;
439
436
  step = q[i].step;
@@ -447,6 +444,11 @@ cumo_na_index_aref_nadata(cumo_narray_data_t *na1, cumo_narray_view_t *na2,
447
444
  }
448
445
 
449
446
 
447
+ void cumo_na_index_aref_naview_index_index_kernel_launch(size_t *idx, size_t *idx1, uint64_t n);
448
+ void cumo_na_index_aref_naview_index_stride_last_kernel_launch(size_t *idx, ssize_t s1, size_t last, uint64_t n);
449
+ void cumo_na_index_aref_naview_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n);
450
+ void cumo_na_index_aref_naview_index_index_beg_step_kernel_launch(size_t *idx, size_t *idx1, size_t beg, ssize_t step, uint64_t n);
451
+
450
452
  static void
451
453
  cumo_na_index_aref_naview(cumo_narray_view_t *na1, cumo_narray_view_t *na2,
452
454
  cumo_na_index_arg_t *q, ssize_t elmsz, int ndim, int keep_dim)
@@ -481,64 +483,41 @@ cumo_na_index_aref_naview(cumo_narray_view_t *na1, cumo_narray_view_t *na2,
481
483
  }
482
484
  else if (q[i].idx != NULL && CUMO_SDX_IS_INDEX(sdx1)) {
483
485
  // index <- index
484
- int k;
485
486
  size_t *index = q[i].idx;
486
-
487
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_index_aref_naview", "any");
488
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
489
-
487
+ size_t *index1 = CUMO_SDX_GET_INDEX(sdx1);
490
488
  CUMO_SDX_SET_INDEX(na2->stridx[j], index);
491
489
  q[i].idx = NULL;
492
-
493
- for (k=0; k<size; k++) {
494
- index[k] = CUMO_SDX_GET_INDEX(sdx1)[index[k]];
495
- }
490
+ cumo_na_index_aref_naview_index_index_kernel_launch(index, index1, size);
496
491
  }
497
492
  else if (q[i].idx != NULL && CUMO_SDX_IS_STRIDE(sdx1)) {
498
493
  // index <- step
499
494
  ssize_t stride1 = CUMO_SDX_GET_STRIDE(sdx1);
500
495
  size_t *index = q[i].idx;
501
-
502
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_index_aref_naview", "any");
503
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
504
-
505
496
  CUMO_SDX_SET_INDEX(na2->stridx[j],index);
506
497
  q[i].idx = NULL;
507
498
 
508
499
  if (stride1<0) {
509
500
  size_t last;
510
- int k;
511
501
  stride1 = -stride1;
512
502
  last = na1->base.shape[q[i].orig_dim] - 1;
513
503
  if (na2->offset < last * stride1) {
514
504
  rb_raise(rb_eStandardError,"bug: negative offset");
515
505
  }
516
506
  na2->offset -= last * stride1;
517
- for (k=0; k<size; k++) {
518
- index[k] = (last - index[k]) * stride1;
519
- }
507
+ cumo_na_index_aref_naview_index_stride_last_kernel_launch(index, stride1, last, size);
520
508
  } else {
521
- int k;
522
- for (k=0; k<size; k++) {
523
- index[k] = index[k] * stride1;
524
- }
509
+ cumo_na_index_aref_naview_index_stride_kernel_launch(index, stride1, size);
525
510
  }
526
511
  }
527
512
  else if (q[i].idx == NULL && CUMO_SDX_IS_INDEX(sdx1)) {
528
513
  // step <- index
529
- int k;
530
514
  size_t beg = q[i].beg;
531
515
  ssize_t step = q[i].step;
532
516
  // size_t *index = ALLOC_N(size_t, size);
533
517
  size_t *index = (size_t*)cumo_cuda_runtime_malloc(sizeof(size_t)*size);
518
+ size_t *index1 = CUMO_SDX_GET_INDEX(sdx1);
534
519
  CUMO_SDX_SET_INDEX(na2->stridx[j],index);
535
-
536
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("na_index_aref_naview", "any");
537
- cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
538
-
539
- for (k=0; k<size; k++) {
540
- index[k] = CUMO_SDX_GET_INDEX(sdx1)[beg+step*k];
541
- }
520
+ cumo_na_index_aref_naview_index_index_beg_step_kernel_launch(index, index1, beg, step, size);
542
521
  }
543
522
  else if (q[i].idx == NULL && CUMO_SDX_IS_STRIDE(sdx1)) {
544
523
  // step <- step
@@ -0,0 +1,86 @@
1
+ #include "cumo/narray_kernel.h"
2
+
3
+ #if defined(__cplusplus)
4
+ extern "C" {
5
+ #if 0
6
+ } /* satisfy cc-mode */
7
+ #endif
8
+ #endif
9
+
10
+ __global__ void cumo_na_index_aref_nadata_index_stride_kernel(size_t *idx, ssize_t s1, uint64_t n)
11
+ {
12
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
13
+ idx[i] = idx[i] * s1;
14
+ }
15
+ }
16
+
17
+ __global__ void cumo_na_index_aref_naview_index_index_kernel(size_t *idx, size_t *idx1, uint64_t n)
18
+ {
19
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
20
+ idx[i] = idx1[idx[i]];
21
+ }
22
+ }
23
+
24
+ __global__ void cumo_na_index_aref_naview_index_stride_last_kernel(size_t *idx, ssize_t s1, size_t last, uint64_t n)
25
+ {
26
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
27
+ idx[i] = (last - idx[i]) * s1;
28
+ }
29
+ }
30
+
31
+ __global__ void cumo_na_index_aref_naview_index_stride_kernel(size_t *idx, ssize_t s1, uint64_t n)
32
+ {
33
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
34
+ idx[i] = idx[i] * s1;
35
+ }
36
+ }
37
+
38
+ __global__ void cumo_na_index_aref_naview_index_index_beg_step_kernel(size_t *idx, size_t *idx1, size_t beg, ssize_t step, uint64_t n)
39
+ {
40
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
41
+ idx[i] = idx1[beg + step * i];
42
+ }
43
+ }
44
+
45
+ void cumo_na_index_aref_nadata_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n)
46
+ {
47
+ size_t grid_dim = cumo_get_grid_dim(n);
48
+ size_t block_dim = cumo_get_block_dim(n);
49
+ cumo_na_index_aref_nadata_index_stride_kernel<<<grid_dim, block_dim>>>(idx, s1, n);
50
+ }
51
+
52
+ void cumo_na_index_aref_naview_index_index_kernel_launch(size_t *idx, size_t *idx1, uint64_t n)
53
+ {
54
+ size_t grid_dim = cumo_get_grid_dim(n);
55
+ size_t block_dim = cumo_get_block_dim(n);
56
+ cumo_na_index_aref_naview_index_index_kernel<<<grid_dim, block_dim>>>(idx, idx1, n);
57
+ }
58
+
59
+ void cumo_na_index_aref_naview_index_stride_last_kernel_launch(size_t *idx, ssize_t s1, size_t last, uint64_t n)
60
+ {
61
+ size_t grid_dim = cumo_get_grid_dim(n);
62
+ size_t block_dim = cumo_get_block_dim(n);
63
+ cumo_na_index_aref_naview_index_stride_last_kernel<<<grid_dim, block_dim>>>(idx, s1, last, n);
64
+ }
65
+
66
+ void cumo_na_index_aref_naview_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n)
67
+ {
68
+ size_t grid_dim = cumo_get_grid_dim(n);
69
+ size_t block_dim = cumo_get_block_dim(n);
70
+ cumo_na_index_aref_naview_index_stride_kernel<<<grid_dim, block_dim>>>(idx, s1, n);
71
+ }
72
+
73
+ void cumo_na_index_aref_naview_index_index_beg_step_kernel_launch(size_t *idx, size_t *idx1, size_t beg, ssize_t step, uint64_t n)
74
+ {
75
+ size_t grid_dim = cumo_get_grid_dim(n);
76
+ size_t block_dim = cumo_get_block_dim(n);
77
+ cumo_na_index_aref_naview_index_index_beg_step_kernel<<<grid_dim, block_dim>>>(idx, idx1, beg, step, n);
78
+ }
79
+
80
+ #if defined(__cplusplus)
81
+ #if 0
82
+ { /* satisfy cc-mode */
83
+ #endif
84
+ } /* extern "C" { */
85
+ #endif
86
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cumo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
@@ -170,6 +170,7 @@ files:
170
170
  - ext/cumo/narray/SFMT.h
171
171
  - ext/cumo/narray/array.c
172
172
  - ext/cumo/narray/data.c
173
+ - ext/cumo/narray/data_kernel.cu
173
174
  - ext/cumo/narray/gen/cogen.rb
174
175
  - ext/cumo/narray/gen/cogen_kernel.rb
175
176
  - ext/cumo/narray/gen/def/bit.rb
@@ -305,6 +306,7 @@ files:
305
306
  - ext/cumo/narray/gen/tmpl_bit/where.c
306
307
  - ext/cumo/narray/gen/tmpl_bit/where2.c
307
308
  - ext/cumo/narray/index.c
309
+ - ext/cumo/narray/index_kernel.cu
308
310
  - ext/cumo/narray/kwargs.c
309
311
  - ext/cumo/narray/math.c
310
312
  - ext/cumo/narray/narray.c