cumo 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.rubocop.yml +15 -0
- data/.rubocop_todo.yml +1272 -0
- data/3rd_party/mkmf-cu/Gemfile +2 -0
- data/3rd_party/mkmf-cu/Rakefile +2 -1
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +36 -7
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
- data/CHANGELOG.md +69 -0
- data/Gemfile +6 -1
- data/README.md +2 -10
- data/Rakefile +8 -11
- data/bench/broadcast_fp32.rb +28 -26
- data/bench/cumo_bench.rb +18 -16
- data/bench/numo_bench.rb +18 -16
- data/bench/reduction_fp32.rb +14 -12
- data/bin/console +1 -0
- data/cumo.gemspec +5 -8
- data/ext/cumo/cuda/cudnn.c +2 -2
- data/ext/cumo/cumo.c +7 -3
- data/ext/cumo/depend.erb +15 -13
- data/ext/cumo/extconf.rb +32 -46
- data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
- data/ext/cumo/include/cumo/intern.h +1 -0
- data/ext/cumo/include/cumo/narray.h +13 -1
- data/ext/cumo/include/cumo/template.h +2 -4
- data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
- data/ext/cumo/include/cumo/types/float_macro.h +2 -2
- data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/narray/array.c +3 -3
- data/ext/cumo/narray/data.c +23 -2
- data/ext/cumo/narray/gen/cogen.rb +8 -7
- data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
- data/ext/cumo/narray/gen/def/bit.rb +3 -1
- data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
- data/ext/cumo/narray/gen/def/int16.rb +2 -0
- data/ext/cumo/narray/gen/def/int32.rb +2 -0
- data/ext/cumo/narray/gen/def/int64.rb +2 -0
- data/ext/cumo/narray/gen/def/int8.rb +2 -0
- data/ext/cumo/narray/gen/def/robject.rb +2 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
- data/ext/cumo/narray/gen/def/uint16.rb +2 -0
- data/ext/cumo/narray/gen/def/uint32.rb +2 -0
- data/ext/cumo/narray/gen/def/uint64.rb +2 -0
- data/ext/cumo/narray/gen/def/uint8.rb +2 -0
- data/ext/cumo/narray/gen/erbln.rb +9 -7
- data/ext/cumo/narray/gen/erbpp2.rb +26 -24
- data/ext/cumo/narray/gen/narray_def.rb +13 -11
- data/ext/cumo/narray/gen/spec.rb +58 -55
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
- data/ext/cumo/narray/gen/tmpl/at.c +34 -0
- data/ext/cumo/narray/gen/tmpl/batch_norm.c +1 -1
- data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +2 -2
- data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
- data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
- data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
- data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +1 -1
- data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
- data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
- data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
- data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
- data/ext/cumo/narray/gen/tmpl/sort.c +1 -1
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
- data/ext/cumo/narray/index.c +243 -39
- data/ext/cumo/narray/index_kernel.cu +84 -0
- data/ext/cumo/narray/narray.c +38 -1
- data/ext/cumo/narray/ndloop.c +1 -1
- data/ext/cumo/narray/struct.c +1 -1
- data/lib/cumo/cuda/compile_error.rb +1 -1
- data/lib/cumo/cuda/compiler.rb +23 -22
- data/lib/cumo/cuda/cudnn.rb +1 -1
- data/lib/cumo/cuda/device.rb +1 -1
- data/lib/cumo/cuda/link_state.rb +2 -2
- data/lib/cumo/cuda/module.rb +1 -2
- data/lib/cumo/cuda/nvrtc_program.rb +3 -2
- data/lib/cumo/cuda.rb +2 -0
- data/lib/cumo/linalg.rb +2 -0
- data/lib/cumo/narray/extra.rb +137 -185
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo.rb +3 -1
- data/test/bit_test.rb +157 -0
- data/test/cuda/compiler_test.rb +69 -0
- data/test/cuda/device_test.rb +30 -0
- data/test/cuda/memory_pool_test.rb +45 -0
- data/test/cuda/nvrtc_test.rb +51 -0
- data/test/cuda/runtime_test.rb +28 -0
- data/test/cudnn_test.rb +498 -0
- data/test/cumo_test.rb +27 -0
- data/test/narray_test.rb +745 -0
- data/test/ractor_test.rb +52 -0
- data/test/test_helper.rb +31 -0
- metadata +31 -54
- data/.travis.yml +0 -5
- data/numo-narray-version +0 -1
|
@@ -24,8 +24,7 @@ static void
|
|
|
24
24
|
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, y);
|
|
25
25
|
}
|
|
26
26
|
} else {
|
|
27
|
-
o1 = p1
|
|
28
|
-
o1 -= p3;
|
|
27
|
+
o1 = p1-p3;
|
|
29
28
|
l1 = CUMO_NB+o1;
|
|
30
29
|
r1 = CUMO_NB-o1;
|
|
31
30
|
if (p3>0 || n<CUMO_NB) {
|
|
@@ -48,17 +47,32 @@ static void
|
|
|
48
47
|
}
|
|
49
48
|
} else {
|
|
50
49
|
for (; n>=CUMO_NB; n-=CUMO_NB) {
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if (o1>0)
|
|
50
|
+
if (o1==0) {
|
|
51
|
+
x = *a1;
|
|
52
|
+
} else if (o1>0) {
|
|
53
|
+
x = *a1>>o1 | *(a1+1)<<r1;
|
|
54
|
+
} else {
|
|
55
|
+
x = *a1<<-o1 | *(a1-1)>>l1;
|
|
56
|
+
}
|
|
54
57
|
a1++;
|
|
55
58
|
y = m_<%=name%>(x);
|
|
56
59
|
*(a3++) = y;
|
|
57
60
|
}
|
|
58
61
|
}
|
|
59
62
|
if (n>0) {
|
|
60
|
-
|
|
61
|
-
|
|
63
|
+
if (o1==0) {
|
|
64
|
+
x = *a1;
|
|
65
|
+
} else if (o1>0) {
|
|
66
|
+
x = *a1>>o1;
|
|
67
|
+
if ((int)n>r1) {
|
|
68
|
+
x |= *(a1+1)<<r1;
|
|
69
|
+
}
|
|
70
|
+
} else {
|
|
71
|
+
x = *(a1-1)>>l1;
|
|
72
|
+
if ((int)n>-o1) {
|
|
73
|
+
x |= *a1<<-o1;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
62
76
|
y = m_<%=name%>(x);
|
|
63
77
|
*a3 = (y & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n);
|
|
64
78
|
}
|
data/ext/cumo/narray/index.c
CHANGED
|
@@ -183,7 +183,6 @@ static void
|
|
|
183
183
|
cumo_na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, cumo_na_index_arg_t *q)
|
|
184
184
|
{
|
|
185
185
|
int n;
|
|
186
|
-
VALUE excl_end;
|
|
187
186
|
ssize_t beg, end, beg_orig, end_orig;
|
|
188
187
|
const char *dot = "..", *edot = "...";
|
|
189
188
|
|
|
@@ -197,10 +196,15 @@ cumo_na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, cumo_
|
|
|
197
196
|
beg += size;
|
|
198
197
|
}
|
|
199
198
|
if (T_NIL == TYPE(x.end)) { // endless range
|
|
200
|
-
end = size -1;
|
|
199
|
+
end = size - 1;
|
|
201
200
|
if (RTEST(x.exclude_end)) {
|
|
202
201
|
dot = edot;
|
|
203
202
|
}
|
|
203
|
+
if (beg < 0 || beg >= size) {
|
|
204
|
+
rb_raise(rb_eRangeError,
|
|
205
|
+
"%"SZF"d%s is out of range for size=%"SZF"d",
|
|
206
|
+
beg_orig, dot, size);
|
|
207
|
+
}
|
|
204
208
|
} else {
|
|
205
209
|
end = end_orig = NUM2SSIZET(x.end);
|
|
206
210
|
if (end < 0) {
|
|
@@ -210,19 +214,15 @@ cumo_na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, cumo_
|
|
|
210
214
|
end--;
|
|
211
215
|
dot = edot;
|
|
212
216
|
}
|
|
213
|
-
|
|
214
|
-
if (beg < 0 || beg >= size || end < 0 || end >= size) {
|
|
215
|
-
if (T_NIL == TYPE(x.end)) { // endless range
|
|
216
|
-
rb_raise(rb_eRangeError,
|
|
217
|
-
"%"SZF"d%s is out of range for size=%"SZF"d",
|
|
218
|
-
beg_orig, dot, size);
|
|
219
|
-
} else {
|
|
217
|
+
if (beg < 0 || beg >= size || end < 0 || end >= size) {
|
|
220
218
|
rb_raise(rb_eRangeError,
|
|
221
219
|
"%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
|
|
222
220
|
beg_orig, dot, end_orig, size);
|
|
223
221
|
}
|
|
224
222
|
}
|
|
225
223
|
#else
|
|
224
|
+
VALUE excl_end;
|
|
225
|
+
|
|
226
226
|
beg = beg_orig = NUM2SSIZET(rb_funcall(range,cumo_id_beg,0));
|
|
227
227
|
if (beg < 0) {
|
|
228
228
|
beg += size;
|
|
@@ -258,7 +258,7 @@ cumo_na_parse_enumerator_step(VALUE enum_obj, VALUE *pstep)
|
|
|
258
258
|
if (!RB_TYPE_P(enum_obj, T_DATA)) {
|
|
259
259
|
rb_raise(rb_eTypeError,"wrong argument type (not T_DATA)");
|
|
260
260
|
}
|
|
261
|
-
e = (
|
|
261
|
+
e = CUMO_RENUMERATOR_PTR(enum_obj);
|
|
262
262
|
|
|
263
263
|
if (!rb_obj_is_kind_of(e->obj, rb_cRange)) {
|
|
264
264
|
rb_raise(rb_eTypeError,"not Range object");
|
|
@@ -292,7 +292,7 @@ cumo_na_parse_enumerator(VALUE enum_obj, int orig_dim, ssize_t size, cumo_na_ind
|
|
|
292
292
|
rb_raise(rb_eTypeError,"wrong argument type (not T_DATA)");
|
|
293
293
|
}
|
|
294
294
|
cumo_na_parse_enumerator_step(enum_obj, &step);
|
|
295
|
-
e = (
|
|
295
|
+
e = CUMO_RENUMERATOR_PTR(enum_obj);
|
|
296
296
|
cumo_na_parse_range(e->obj, NUM2SSIZET(step), orig_dim, size, q); // e->obj : Range Object
|
|
297
297
|
}
|
|
298
298
|
|
|
@@ -568,6 +568,188 @@ cumo_na_index_aref_naview(cumo_narray_view_t *na1, cumo_narray_view_t *na2,
|
|
|
568
568
|
na2->base.size = total;
|
|
569
569
|
}
|
|
570
570
|
|
|
571
|
+
void cumo_na_index_at_nadata_index_stride_add_kernel_launch(size_t *idx, size_t *idx1, ssize_t s1, uint64_t n);
|
|
572
|
+
void cumo_na_index_at_nadata_index_beg_step_stride_kernel_launch(size_t *idx, size_t beg, ssize_t step, ssize_t s1, uint64_t n);
|
|
573
|
+
void cumo_na_index_at_nadata_index_beg_step_stride_add_kernel_launch(size_t *idx, size_t beg, ssize_t step, ssize_t s1, uint64_t n);
|
|
574
|
+
|
|
575
|
+
static void
|
|
576
|
+
cumo_na_index_at_nadata(cumo_narray_data_t *na1, cumo_narray_view_t *na2,
|
|
577
|
+
cumo_na_index_arg_t *q, ssize_t elmsz, int ndim, int keep_dim)
|
|
578
|
+
{
|
|
579
|
+
int i;
|
|
580
|
+
size_t size = q[ndim-1].n;
|
|
581
|
+
ssize_t stride1;
|
|
582
|
+
ssize_t *strides_na1;
|
|
583
|
+
size_t *index;
|
|
584
|
+
ssize_t beg, step;
|
|
585
|
+
int use_cumo_cuda_runtime_malloc = 0;
|
|
586
|
+
|
|
587
|
+
strides_na1 = ALLOCA_N(ssize_t, na1->base.ndim);
|
|
588
|
+
cumo_na_get_strides_nadata(na1, strides_na1, elmsz);
|
|
589
|
+
|
|
590
|
+
if (q[ndim-1].idx != NULL) {
|
|
591
|
+
index = q[ndim-1].idx;
|
|
592
|
+
} else {
|
|
593
|
+
//index = ALLOC_N(size_t, size);
|
|
594
|
+
index = (size_t*)cumo_cuda_runtime_malloc(sizeof(size_t)*size);
|
|
595
|
+
use_cumo_cuda_runtime_malloc = 1;
|
|
596
|
+
}
|
|
597
|
+
CUMO_SDX_SET_INDEX(na2->stridx[0], index);
|
|
598
|
+
|
|
599
|
+
for (i=ndim-1; i>=0; i--) {
|
|
600
|
+
stride1 = strides_na1[q[i].orig_dim];
|
|
601
|
+
if (i==ndim-1) {
|
|
602
|
+
if (size == 0) {
|
|
603
|
+
rb_raise(cumo_na_eShapeError, "cannot get element of empty array");
|
|
604
|
+
}
|
|
605
|
+
} else {
|
|
606
|
+
if (size != q[i].n) {
|
|
607
|
+
rb_raise(cumo_na_eShapeError, "index array sizes mismatch");
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
if (q[i].idx != NULL) {
|
|
612
|
+
if (i==ndim-1) {
|
|
613
|
+
cumo_na_index_aref_nadata_index_stride_kernel_launch(index, stride1, size);
|
|
614
|
+
q[i].idx = NULL;
|
|
615
|
+
} else {
|
|
616
|
+
cumo_na_index_at_nadata_index_stride_add_kernel_launch(index, q[i].idx, stride1, size);
|
|
617
|
+
}
|
|
618
|
+
} else {
|
|
619
|
+
beg = q[i].beg;
|
|
620
|
+
step = q[i].step;
|
|
621
|
+
if (i==ndim-1) {
|
|
622
|
+
cumo_na_index_at_nadata_index_beg_step_stride_kernel_launch(index, beg, step, stride1, size);
|
|
623
|
+
} else {
|
|
624
|
+
cumo_na_index_at_nadata_index_beg_step_stride_add_kernel_launch(index, beg, step, stride1, size);
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
}
|
|
629
|
+
na2->base.size = size;
|
|
630
|
+
na2->base.shape[0] = size;
|
|
631
|
+
if (use_cumo_cuda_runtime_malloc) {
|
|
632
|
+
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("index", "cumo_na_index_at_nadata");
|
|
633
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
void cumo_na_index_at_naview_index_index_index_add_kernel_launch(size_t *idx, size_t *idx1, size_t *idx2, uint64_t n);
|
|
638
|
+
void cumo_na_index_at_naview_index_index_beg_step_add_kernel_launch(size_t *idx, size_t *idx1, size_t beg, ssize_t step, uint64_t n);
|
|
639
|
+
void cumo_na_index_at_naview_index_stride_last_add_kernel_launch(size_t *idx, ssize_t s1, size_t last, uint64_t n);
|
|
640
|
+
|
|
641
|
+
static void
|
|
642
|
+
cumo_na_index_at_naview(cumo_narray_view_t *na1, cumo_narray_view_t *na2,
|
|
643
|
+
cumo_na_index_arg_t *q, ssize_t elmsz, int ndim, int keep_dim)
|
|
644
|
+
{
|
|
645
|
+
int i;
|
|
646
|
+
size_t *index;
|
|
647
|
+
size_t size = q[ndim-1].n;
|
|
648
|
+
int use_cumo_cuda_runtime_malloc = 0;
|
|
649
|
+
|
|
650
|
+
if (q[ndim-1].idx != NULL) {
|
|
651
|
+
index = q[ndim-1].idx;
|
|
652
|
+
} else {
|
|
653
|
+
//index = ALLOC_N(size_t, size);
|
|
654
|
+
index = (size_t*)cumo_cuda_runtime_malloc(sizeof(size_t)*size);
|
|
655
|
+
use_cumo_cuda_runtime_malloc = 1;
|
|
656
|
+
}
|
|
657
|
+
CUMO_SDX_SET_INDEX(na2->stridx[0], index);
|
|
658
|
+
|
|
659
|
+
for (i=ndim-1; i>=0; i--) {
|
|
660
|
+
cumo_stridx_t sdx1 = na1->stridx[q[i].orig_dim];
|
|
661
|
+
if (i==ndim-1) {
|
|
662
|
+
if (size == 0) {
|
|
663
|
+
rb_raise(cumo_na_eShapeError, "cannot get element of empty array");
|
|
664
|
+
}
|
|
665
|
+
} else {
|
|
666
|
+
if (size != q[i].n) {
|
|
667
|
+
rb_raise(cumo_na_eShapeError, "index array sizes mismatch");
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
if (q[i].idx != NULL && CUMO_SDX_IS_INDEX(sdx1)) {
|
|
672
|
+
// index <- index
|
|
673
|
+
size_t *index1 = CUMO_SDX_GET_INDEX(sdx1);
|
|
674
|
+
if (i==ndim-1) {
|
|
675
|
+
cumo_na_index_aref_naview_index_index_kernel_launch(index, index1, size);
|
|
676
|
+
q[i].idx = NULL;
|
|
677
|
+
} else {
|
|
678
|
+
cumo_na_index_at_naview_index_index_index_add_kernel_launch(index, index1, q[i].idx, size);
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
else if (q[i].idx == NULL && CUMO_SDX_IS_INDEX(sdx1)) {
|
|
682
|
+
// step <- index
|
|
683
|
+
size_t beg = q[i].beg;
|
|
684
|
+
ssize_t step = q[i].step;
|
|
685
|
+
size_t *index1 = CUMO_SDX_GET_INDEX(sdx1);
|
|
686
|
+
if (i==ndim-1) {
|
|
687
|
+
cumo_na_index_aref_naview_index_index_beg_step_kernel_launch(index, index1, beg, step, size);
|
|
688
|
+
} else {
|
|
689
|
+
cumo_na_index_at_naview_index_index_beg_step_add_kernel_launch(index, index1, beg, step, size);
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
else if (q[i].idx != NULL && CUMO_SDX_IS_STRIDE(sdx1)) {
|
|
693
|
+
// index <- step
|
|
694
|
+
ssize_t stride1 = CUMO_SDX_GET_STRIDE(sdx1);
|
|
695
|
+
if (stride1<0) {
|
|
696
|
+
size_t last;
|
|
697
|
+
stride1 = -stride1;
|
|
698
|
+
last = na1->base.shape[q[i].orig_dim] - 1;
|
|
699
|
+
if (na2->offset < last * stride1) {
|
|
700
|
+
rb_raise(rb_eStandardError,"bug: negative offset");
|
|
701
|
+
}
|
|
702
|
+
na2->offset -= last * stride1;
|
|
703
|
+
if (i==ndim-1) {
|
|
704
|
+
cumo_na_index_aref_naview_index_stride_last_kernel_launch(index, stride1, last, size);
|
|
705
|
+
q[i].idx = NULL;
|
|
706
|
+
} else {
|
|
707
|
+
cumo_na_index_at_naview_index_stride_last_add_kernel_launch(index, stride1, last, size);
|
|
708
|
+
}
|
|
709
|
+
} else {
|
|
710
|
+
if (i==ndim-1) {
|
|
711
|
+
cumo_na_index_aref_nadata_index_stride_kernel_launch(index, stride1, size);
|
|
712
|
+
q[i].idx = NULL;
|
|
713
|
+
} else {
|
|
714
|
+
cumo_na_index_at_nadata_index_stride_add_kernel_launch(index, q[i].idx, stride1, size);
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
else if (q[i].idx == NULL && CUMO_SDX_IS_STRIDE(sdx1)) {
|
|
719
|
+
// step <- step
|
|
720
|
+
size_t beg = q[i].beg;
|
|
721
|
+
ssize_t step = q[i].step;
|
|
722
|
+
ssize_t stride1 = CUMO_SDX_GET_STRIDE(sdx1);
|
|
723
|
+
if (stride1<0) {
|
|
724
|
+
size_t last;
|
|
725
|
+
stride1 = -stride1;
|
|
726
|
+
last = na1->base.shape[q[i].orig_dim] - 1;
|
|
727
|
+
if (na2->offset < last * stride1) {
|
|
728
|
+
rb_raise(rb_eStandardError,"bug: negative offset");
|
|
729
|
+
}
|
|
730
|
+
na2->offset -= last * stride1;
|
|
731
|
+
if (i==ndim-1) {
|
|
732
|
+
cumo_na_index_at_nadata_index_beg_step_stride_kernel_launch(index, last - beg, -step, stride1, size);
|
|
733
|
+
} else {
|
|
734
|
+
cumo_na_index_at_nadata_index_beg_step_stride_add_kernel_launch(index, last - beg, -step, stride1, size);
|
|
735
|
+
}
|
|
736
|
+
} else {
|
|
737
|
+
if (i==ndim-1) {
|
|
738
|
+
cumo_na_index_at_nadata_index_beg_step_stride_kernel_launch(index, beg, step, stride1, size);
|
|
739
|
+
} else {
|
|
740
|
+
cumo_na_index_at_nadata_index_beg_step_stride_add_kernel_launch(index, beg, step, stride1, size);
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
na2->base.size = size;
|
|
746
|
+
na2->base.shape[0] = size;
|
|
747
|
+
if (use_cumo_cuda_runtime_malloc) {
|
|
748
|
+
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("index", "cumo_na_index_at_naview");
|
|
749
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
|
|
571
753
|
static int
|
|
572
754
|
cumo_na_ndim_new_narray(int ndim, const cumo_na_index_arg_t *q)
|
|
573
755
|
{
|
|
@@ -587,6 +769,7 @@ typedef struct {
|
|
|
587
769
|
cumo_narray_t *na1;
|
|
588
770
|
int keep_dim;
|
|
589
771
|
size_t pos; // offset position for 0-dimensional narray. 0-dimensional array does not use q.
|
|
772
|
+
int at_mode; // 0: aref, 1: at
|
|
590
773
|
} cumo_na_aref_md_data_t;
|
|
591
774
|
|
|
592
775
|
static cumo_na_index_arg_t*
|
|
@@ -614,6 +797,7 @@ VALUE cumo_na_aref_md_protected(VALUE data_value)
|
|
|
614
797
|
cumo_na_index_arg_t *q = data->q;
|
|
615
798
|
cumo_narray_t *na1 = data->na1;
|
|
616
799
|
int keep_dim = data->keep_dim;
|
|
800
|
+
int at_mode = data->at_mode;
|
|
617
801
|
|
|
618
802
|
int ndim_new;
|
|
619
803
|
VALUE view;
|
|
@@ -624,10 +808,14 @@ VALUE cumo_na_aref_md_protected(VALUE data_value)
|
|
|
624
808
|
|
|
625
809
|
if (cumo_na_debug_flag) print_index_arg(q,ndim);
|
|
626
810
|
|
|
627
|
-
if (
|
|
628
|
-
ndim_new =
|
|
811
|
+
if (at_mode) {
|
|
812
|
+
ndim_new = 1;
|
|
629
813
|
} else {
|
|
630
|
-
|
|
814
|
+
if (keep_dim) {
|
|
815
|
+
ndim_new = ndim;
|
|
816
|
+
} else {
|
|
817
|
+
ndim_new = cumo_na_ndim_new_narray(ndim, q);
|
|
818
|
+
}
|
|
631
819
|
}
|
|
632
820
|
view = cumo_na_s_allocate_view(rb_obj_class(self));
|
|
633
821
|
|
|
@@ -636,7 +824,7 @@ VALUE cumo_na_aref_md_protected(VALUE data_value)
|
|
|
636
824
|
|
|
637
825
|
cumo_na_alloc_shape((cumo_narray_t*)na2, ndim_new);
|
|
638
826
|
|
|
639
|
-
na2->stridx =
|
|
827
|
+
na2->stridx = ZALLOC_N(cumo_stridx_t,ndim_new);
|
|
640
828
|
|
|
641
829
|
elmsz = cumo_na_element_stride(self);
|
|
642
830
|
|
|
@@ -647,7 +835,11 @@ VALUE cumo_na_aref_md_protected(VALUE data_value)
|
|
|
647
835
|
na2->offset = data->pos;
|
|
648
836
|
na2->base.size = 1;
|
|
649
837
|
} else {
|
|
650
|
-
|
|
838
|
+
if (at_mode) {
|
|
839
|
+
cumo_na_index_at_nadata((cumo_narray_data_t *)na1,na2,q,elmsz,ndim,keep_dim);
|
|
840
|
+
} else {
|
|
841
|
+
cumo_na_index_aref_nadata((cumo_narray_data_t *)na1,na2,q,elmsz,ndim,keep_dim);
|
|
842
|
+
}
|
|
651
843
|
}
|
|
652
844
|
na2->data = self;
|
|
653
845
|
break;
|
|
@@ -659,7 +851,11 @@ VALUE cumo_na_aref_md_protected(VALUE data_value)
|
|
|
659
851
|
} else {
|
|
660
852
|
na2->offset = ((cumo_narray_view_t *)na1)->offset;
|
|
661
853
|
na2->data = ((cumo_narray_view_t *)na1)->data;
|
|
662
|
-
|
|
854
|
+
if (at_mode) {
|
|
855
|
+
cumo_na_index_at_naview((cumo_narray_view_t *)na1,na2,q,elmsz,ndim,keep_dim);
|
|
856
|
+
} else {
|
|
857
|
+
cumo_na_index_aref_naview((cumo_narray_view_t *)na1,na2,q,elmsz,ndim,keep_dim);
|
|
858
|
+
}
|
|
663
859
|
}
|
|
664
860
|
break;
|
|
665
861
|
}
|
|
@@ -684,7 +880,7 @@ cumo_na_aref_md_ensure(VALUE data_value)
|
|
|
684
880
|
}
|
|
685
881
|
|
|
686
882
|
static VALUE
|
|
687
|
-
cumo_na_aref_md(int argc, VALUE *argv, VALUE self, int keep_dim, int result_nd, size_t pos)
|
|
883
|
+
cumo_na_aref_md(int argc, VALUE *argv, VALUE self, int keep_dim, int result_nd, size_t pos, int at_mode)
|
|
688
884
|
{
|
|
689
885
|
VALUE args; // should be GC protected
|
|
690
886
|
cumo_narray_t *na1;
|
|
@@ -696,6 +892,9 @@ cumo_na_aref_md(int argc, VALUE *argv, VALUE self, int keep_dim, int result_nd,
|
|
|
696
892
|
CumoGetNArray(self,na1);
|
|
697
893
|
|
|
698
894
|
args = rb_ary_new4(argc,argv);
|
|
895
|
+
if (at_mode && na1->ndim == 0) {
|
|
896
|
+
rb_raise(cumo_na_eDimensionError,"argument length does not match dimension size");
|
|
897
|
+
}
|
|
699
898
|
|
|
700
899
|
if (argc == 1 && result_nd == 1) {
|
|
701
900
|
idx = argv[0];
|
|
@@ -724,6 +923,7 @@ cumo_na_aref_md(int argc, VALUE *argv, VALUE self, int keep_dim, int result_nd,
|
|
|
724
923
|
data.q = cumo_na_allocate_index_args(result_nd);
|
|
725
924
|
data.na1 = na1;
|
|
726
925
|
data.keep_dim = keep_dim;
|
|
926
|
+
data.at_mode = at_mode;
|
|
727
927
|
|
|
728
928
|
switch(na1->type) {
|
|
729
929
|
case CUMO_NARRAY_DATA_T:
|
|
@@ -760,7 +960,15 @@ cumo_na_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd,
|
|
|
760
960
|
return rb_funcall(*idx,cumo_id_mask,1,self);
|
|
761
961
|
}
|
|
762
962
|
}
|
|
763
|
-
return cumo_na_aref_md(nidx, idx, self, keep_dim, result_nd, pos);
|
|
963
|
+
return cumo_na_aref_md(nidx, idx, self, keep_dim, result_nd, pos, 0);
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
/* method: at([idx1,idx2,...,idxN], [idx1,idx2,...,idxN]) */
|
|
967
|
+
VALUE
|
|
968
|
+
cumo_na_at_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos)
|
|
969
|
+
{
|
|
970
|
+
cumo_na_index_arg_to_internal_order(nidx, idx, self);
|
|
971
|
+
return cumo_na_aref_md(nidx, idx, self, keep_dim, result_nd, pos, 1);
|
|
764
972
|
}
|
|
765
973
|
|
|
766
974
|
|
|
@@ -782,16 +990,18 @@ check_index_count(int argc, int cumo_na_ndim, int count_new, int count_rest)
|
|
|
782
990
|
|
|
783
991
|
switch(count_rest) {
|
|
784
992
|
case 0:
|
|
785
|
-
if (
|
|
993
|
+
if (argc == 1 && count_new == 0) return 1;
|
|
786
994
|
if (argc == result_nd) return result_nd;
|
|
787
995
|
rb_raise(rb_eIndexError,"# of index(=%i) should be "
|
|
788
|
-
"equal to ndim(=%i)",argc,cumo_na_ndim);
|
|
996
|
+
"equal to ndim(=%i) or 1", argc,cumo_na_ndim);
|
|
789
997
|
break;
|
|
790
998
|
case 1:
|
|
791
999
|
if (argc-1 <= result_nd) return result_nd;
|
|
792
1000
|
rb_raise(rb_eIndexError,"# of index(=%i) > ndim(=%i) with :rest",
|
|
793
1001
|
argc,cumo_na_ndim);
|
|
794
1002
|
break;
|
|
1003
|
+
default:
|
|
1004
|
+
rb_raise(rb_eIndexError,"multiple rest-dimension is not allowd");
|
|
795
1005
|
}
|
|
796
1006
|
return -1;
|
|
797
1007
|
}
|
|
@@ -802,7 +1012,6 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
|
|
|
802
1012
|
int i, j;
|
|
803
1013
|
int count_new=0;
|
|
804
1014
|
int count_rest=0;
|
|
805
|
-
int count_else=0;
|
|
806
1015
|
ssize_t x, s, m, pos, *idx;
|
|
807
1016
|
cumo_narray_t *na;
|
|
808
1017
|
cumo_narray_view_t *nv;
|
|
@@ -811,8 +1020,7 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
|
|
|
811
1020
|
|
|
812
1021
|
CumoGetNArray(self,na);
|
|
813
1022
|
if (na->size == 0) {
|
|
814
|
-
rb_raise(
|
|
815
|
-
return -1;
|
|
1023
|
+
rb_raise(cumo_na_eShapeError, "cannot get element of empty array");
|
|
816
1024
|
}
|
|
817
1025
|
idx = ALLOCA_N(ssize_t, argc);
|
|
818
1026
|
for (i=j=0; i<argc; i++) {
|
|
@@ -835,16 +1043,10 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
|
|
|
835
1043
|
argv[i] = cumo_sym_new;
|
|
836
1044
|
count_new++;
|
|
837
1045
|
}
|
|
838
|
-
// not break
|
|
839
|
-
default:
|
|
840
|
-
count_else++;
|
|
841
1046
|
}
|
|
842
1047
|
}
|
|
843
1048
|
|
|
844
|
-
if (
|
|
845
|
-
rb_raise(rb_eIndexError,"multiple rest-dimension is not allowd");
|
|
846
|
-
}
|
|
847
|
-
if (count_else != 0) {
|
|
1049
|
+
if (j != argc) {
|
|
848
1050
|
return check_index_count(argc, na->ndim, count_new, count_rest);
|
|
849
1051
|
}
|
|
850
1052
|
|
|
@@ -865,8 +1067,9 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
|
|
|
865
1067
|
}
|
|
866
1068
|
}
|
|
867
1069
|
*pos_idx = pos;
|
|
1070
|
+
return 0;
|
|
868
1071
|
}
|
|
869
|
-
|
|
1072
|
+
if (j == 1) {
|
|
870
1073
|
x = cumo_na_range_check(idx[0], na->size, 0);
|
|
871
1074
|
for (i=na->ndim-1; i>=0; i--) {
|
|
872
1075
|
s = na->shape[i];
|
|
@@ -882,19 +1085,19 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
|
|
|
882
1085
|
}
|
|
883
1086
|
}
|
|
884
1087
|
*pos_idx = pos;
|
|
885
|
-
|
|
886
|
-
return check_index_count(argc, na->ndim, count_new, count_rest);
|
|
1088
|
+
return 0;
|
|
887
1089
|
}
|
|
888
1090
|
break;
|
|
889
1091
|
default:
|
|
890
1092
|
if (!stride) {
|
|
891
1093
|
stride = cumo_na_element_stride(self);
|
|
892
1094
|
}
|
|
893
|
-
if (
|
|
1095
|
+
if (j == 1) {
|
|
894
1096
|
x = cumo_na_range_check(idx[0], na->size, 0);
|
|
895
1097
|
*pos_idx = stride * x;
|
|
1098
|
+
return 0;
|
|
896
1099
|
}
|
|
897
|
-
|
|
1100
|
+
if (j == na->ndim) {
|
|
898
1101
|
pos = 0;
|
|
899
1102
|
for (i=j-1; i>=0; i--) {
|
|
900
1103
|
x = cumo_na_range_check(idx[i], na->shape[i], i);
|
|
@@ -902,11 +1105,12 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
|
|
|
902
1105
|
stride *= na->shape[i];
|
|
903
1106
|
}
|
|
904
1107
|
*pos_idx = pos;
|
|
905
|
-
|
|
906
|
-
return check_index_count(argc, na->ndim, count_new, count_rest);
|
|
1108
|
+
return 0;
|
|
907
1109
|
}
|
|
908
1110
|
}
|
|
909
|
-
|
|
1111
|
+
rb_raise(rb_eIndexError,"# of index(=%i) should be "
|
|
1112
|
+
"equal to ndim(=%i) or 1", argc,na->ndim);
|
|
1113
|
+
return -1;
|
|
910
1114
|
}
|
|
911
1115
|
|
|
912
1116
|
|
|
@@ -42,6 +42,48 @@ __global__ void cumo_na_index_aref_naview_index_index_beg_step_kernel(size_t *id
|
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
+
__global__ void cumo_na_index_at_nadata_index_beg_step_stride_kernel(size_t *idx, size_t beg, ssize_t step, ssize_t s1, uint64_t n)
|
|
46
|
+
{
|
|
47
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
|
48
|
+
idx[i] = (beg + step * i) * s1;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
__global__ void cumo_na_index_at_nadata_index_beg_step_stride_add_kernel(size_t *idx, size_t beg, ssize_t step, ssize_t s1, uint64_t n)
|
|
53
|
+
{
|
|
54
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
|
55
|
+
idx[i] += (beg + step * i) * s1;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
__global__ void cumo_na_index_at_nadata_index_stride_add_kernel(size_t *idx, size_t *idx1, ssize_t s1, uint64_t n)
|
|
60
|
+
{
|
|
61
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
|
62
|
+
idx[i] += idx1[i] * s1;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
__global__ void cumo_na_index_at_naview_index_index_index_add_kernel(size_t *idx, size_t *idx1, size_t *idx2, uint64_t n)
|
|
67
|
+
{
|
|
68
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
|
69
|
+
idx[i] += idx1[idx2[i]];
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
__global__ void cumo_na_index_at_naview_index_index_beg_step_add_kernel(size_t *idx, size_t *idx1, size_t beg, ssize_t step, uint64_t n)
|
|
74
|
+
{
|
|
75
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
|
76
|
+
idx[i] += idx1[beg + step * i];
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
__global__ void cumo_na_index_at_naview_index_stride_last_add_kernel(size_t *idx, ssize_t s1, size_t last, uint64_t n)
|
|
81
|
+
{
|
|
82
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
|
|
83
|
+
idx[i] += (last - idx[i]) * s1;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
45
87
|
void cumo_na_index_aref_nadata_index_stride_kernel_launch(size_t *idx, ssize_t s1, uint64_t n)
|
|
46
88
|
{
|
|
47
89
|
size_t grid_dim = cumo_get_grid_dim(n);
|
|
@@ -77,6 +119,48 @@ void cumo_na_index_aref_naview_index_index_beg_step_kernel_launch(size_t *idx, s
|
|
|
77
119
|
cumo_na_index_aref_naview_index_index_beg_step_kernel<<<grid_dim, block_dim>>>(idx, idx1, beg, step, n);
|
|
78
120
|
}
|
|
79
121
|
|
|
122
|
+
void cumo_na_index_at_nadata_index_stride_add_kernel_launch(size_t *idx, size_t *idx1, ssize_t s1, uint64_t n)
|
|
123
|
+
{
|
|
124
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
|
125
|
+
size_t block_dim = cumo_get_block_dim(n);
|
|
126
|
+
cumo_na_index_at_nadata_index_stride_add_kernel<<<grid_dim, block_dim>>>(idx, idx1, s1, n);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
void cumo_na_index_at_nadata_index_beg_step_stride_kernel_launch(size_t *idx, size_t beg, ssize_t step, ssize_t s1, uint64_t n)
|
|
130
|
+
{
|
|
131
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
|
132
|
+
size_t block_dim = cumo_get_block_dim(n);
|
|
133
|
+
cumo_na_index_at_nadata_index_beg_step_stride_kernel<<<grid_dim, block_dim>>>(idx, beg, step, s1, n);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
void cumo_na_index_at_nadata_index_beg_step_stride_add_kernel_launch(size_t *idx, size_t beg, ssize_t step, ssize_t s1, uint64_t n)
|
|
137
|
+
{
|
|
138
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
|
139
|
+
size_t block_dim = cumo_get_block_dim(n);
|
|
140
|
+
cumo_na_index_at_nadata_index_beg_step_stride_add_kernel<<<grid_dim, block_dim>>>(idx, beg, step, s1, n);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
void cumo_na_index_at_naview_index_index_index_add_kernel_launch(size_t *idx, size_t *idx1, size_t *idx2, uint64_t n)
|
|
144
|
+
{
|
|
145
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
|
146
|
+
size_t block_dim = cumo_get_block_dim(n);
|
|
147
|
+
cumo_na_index_at_naview_index_index_index_add_kernel<<<grid_dim, block_dim>>>(idx, idx1, idx2, n);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
void cumo_na_index_at_naview_index_index_beg_step_add_kernel_launch(size_t *idx, size_t *idx1, size_t beg, ssize_t step, uint64_t n)
|
|
151
|
+
{
|
|
152
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
|
153
|
+
size_t block_dim = cumo_get_block_dim(n);
|
|
154
|
+
cumo_na_index_at_naview_index_index_beg_step_add_kernel<<<grid_dim, block_dim>>>(idx, idx1, beg, step, n);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
void cumo_na_index_at_naview_index_stride_last_add_kernel_launch(size_t *idx, ssize_t s1, size_t last, uint64_t n)
|
|
158
|
+
{
|
|
159
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
|
160
|
+
size_t block_dim = cumo_get_block_dim(n);
|
|
161
|
+
cumo_na_index_at_naview_index_stride_last_add_kernel<<<grid_dim, block_dim>>>(idx, s1, last, n);
|
|
162
|
+
}
|
|
163
|
+
|
|
80
164
|
#if defined(__cplusplus)
|
|
81
165
|
#if 0
|
|
82
166
|
{ /* satisfy cc-mode */
|
data/ext/cumo/narray/narray.c
CHANGED
|
@@ -889,6 +889,39 @@ cumo_na_check_contiguous(VALUE self)
|
|
|
889
889
|
return Qfalse;
|
|
890
890
|
}
|
|
891
891
|
|
|
892
|
+
VALUE
|
|
893
|
+
cumo_na_check_fortran_contiguous(VALUE self)
|
|
894
|
+
{
|
|
895
|
+
int i;
|
|
896
|
+
ssize_t st0;
|
|
897
|
+
cumo_narray_t *na;
|
|
898
|
+
|
|
899
|
+
switch(CUMO_RNARRAY_TYPE(self)) {
|
|
900
|
+
case CUMO_NARRAY_DATA_T:
|
|
901
|
+
case CUMO_NARRAY_FILEMAP_T:
|
|
902
|
+
return Qfalse;
|
|
903
|
+
case CUMO_NARRAY_VIEW_T:
|
|
904
|
+
CumoGetNArray(self,na);
|
|
905
|
+
|
|
906
|
+
// not contiguous if it has index
|
|
907
|
+
for (i=0; i < CUMO_NA_NDIM(na); i++) {
|
|
908
|
+
if (CUMO_NA_IS_INDEX_AT(na,i))
|
|
909
|
+
return Qfalse;
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
// check f-contiguous
|
|
913
|
+
st0 = cumo_na_element_stride(self); // elmsz
|
|
914
|
+
for (i=0; i < CUMO_NA_NDIM(na); i++) {
|
|
915
|
+
if (CUMO_NA_SHAPE(na)[i] == 1)
|
|
916
|
+
continue;
|
|
917
|
+
if (CUMO_NA_STRIDE_AT(na, i) != st0)
|
|
918
|
+
return Qfalse;
|
|
919
|
+
st0 *= CUMO_NA_SHAPE(na)[i];
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
return Qtrue;
|
|
923
|
+
}
|
|
924
|
+
|
|
892
925
|
VALUE
|
|
893
926
|
cumo_na_as_contiguous_array(VALUE a)
|
|
894
927
|
{
|
|
@@ -1388,7 +1421,7 @@ static VALUE cumo_na_inplace( VALUE self );
|
|
|
1388
1421
|
/*
|
|
1389
1422
|
Load marshal data.
|
|
1390
1423
|
@overload marshal_load(data)
|
|
1391
|
-
@
|
|
1424
|
+
@param [Array] Array containing marshal data.
|
|
1392
1425
|
@return [nil]
|
|
1393
1426
|
*/
|
|
1394
1427
|
static VALUE
|
|
@@ -1833,6 +1866,9 @@ cumo_na_equal(VALUE self, volatile VALUE other)
|
|
|
1833
1866
|
return Qfalse;
|
|
1834
1867
|
}
|
|
1835
1868
|
}
|
|
1869
|
+
if (na1->size == 0) {
|
|
1870
|
+
return Qtrue;
|
|
1871
|
+
}
|
|
1836
1872
|
vbool = rb_funcall(self, cumo_id_eq, 1, other);
|
|
1837
1873
|
return (rb_funcall(vbool, cumo_id_count_false_cpu, 0)==INT2FIX(0)) ? Qtrue : Qfalse;
|
|
1838
1874
|
}
|
|
@@ -1929,6 +1965,7 @@ Init_cumo_narray()
|
|
|
1929
1965
|
rb_define_method(cNArray, "debug_info", cumo_na_debug_info, 0);
|
|
1930
1966
|
|
|
1931
1967
|
rb_define_method(cNArray, "contiguous?", cumo_na_check_contiguous, 0);
|
|
1968
|
+
rb_define_method(cNArray, "fortran_contiguous?", cumo_na_check_fortran_contiguous, 0);
|
|
1932
1969
|
|
|
1933
1970
|
rb_define_method(cNArray, "view", cumo_na_make_view, 0);
|
|
1934
1971
|
rb_define_method(cNArray, "expand_dims", cumo_na_expand_dims, 1);
|
data/ext/cumo/narray/ndloop.c
CHANGED
|
@@ -56,7 +56,7 @@ typedef struct CUMO_NA_MD_LOOP {
|
|
|
56
56
|
// [2,3,4], 111b for sum(), 010b for sum(axis: 1), 110b for sum(axis: [1,2])
|
|
57
57
|
VALUE loop_opt;
|
|
58
58
|
cumo_ndfunc_t *ndfunc;
|
|
59
|
-
void (*loop_func)();
|
|
59
|
+
void (*loop_func)(cumo_ndfunc_t *, struct CUMO_NA_MD_LOOP *);
|
|
60
60
|
} cumo_na_md_loop_t;
|
|
61
61
|
|
|
62
62
|
#define LARG(lp,iarg) ((lp)->user.args[iarg])
|