cumo 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +5 -4
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +24 -24
- data/CHANGELOG.md +12 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +1 -1
- data/ext/cumo/include/cumo/narray.h +2 -0
- data/ext/cumo/include/cumo/types/complex.h +2 -2
- data/ext/cumo/include/cumo.h +1 -1
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +4 -1
- data/ext/cumo/narray/gen/tmpl/allocate.c +1 -0
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +1 -0
- data/ext/cumo/narray/narray.c +97 -3
- data/test/narray_test.rb +80 -0
- data/test/ractor_test.rb +5 -3
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 17cb9dfdf9be41292bcd0204a67c5f919da60588d005d4441ad632767dce504c
|
|
4
|
+
data.tar.gz: 4c6e388cdb5b3b9f99d45a989a610d63e493fe53b3baad570c7bb2656f72d86c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 917adaa087836d673a143364f88fb9ddf91ad84cbcc064b115258e32f4b22e70a63c45f3e655a2e56ad58e70d6a4330f3e63ce177b5ad9e0a0c9c11685b39503
|
|
7
|
+
data.tar.gz: c5ec5a4179266a1cf4c5f15b5bc0d7e8b0b01a5a69bb7a75ad3c1387e861666da8b571156bc90295f2cf702558f18e332ce7fd68823330f834ea8d9c4f8b6419
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on
|
|
3
|
+
# on 2026-01-09 18:33:26 UTC using RuboCop version 1.82.1.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -371,7 +371,7 @@ Lint/ConstantDefinitionInBlock:
|
|
|
371
371
|
Exclude:
|
|
372
372
|
- 'test/cuda/compiler_test.rb'
|
|
373
373
|
|
|
374
|
-
# Offense count:
|
|
374
|
+
# Offense count: 665
|
|
375
375
|
# Configuration parameters: Only, Ignore.
|
|
376
376
|
Lint/ConstantResolution:
|
|
377
377
|
Enabled: false
|
|
@@ -388,7 +388,7 @@ Lint/ErbNewArguments:
|
|
|
388
388
|
Exclude:
|
|
389
389
|
- 'ext/cumo/narray/gen/erbpp2.rb'
|
|
390
390
|
|
|
391
|
-
# Offense count:
|
|
391
|
+
# Offense count: 15
|
|
392
392
|
Lint/FloatComparison:
|
|
393
393
|
Exclude:
|
|
394
394
|
- 'test/narray_test.rb'
|
|
@@ -411,7 +411,7 @@ Lint/NonAtomicFileOperation:
|
|
|
411
411
|
Exclude:
|
|
412
412
|
- 'lib/cumo/cuda/compiler.rb'
|
|
413
413
|
|
|
414
|
-
# Offense count:
|
|
414
|
+
# Offense count: 34
|
|
415
415
|
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
416
416
|
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredClasses.
|
|
417
417
|
# IgnoredClasses: Time, DateTime
|
|
@@ -424,6 +424,7 @@ Lint/NumberConversion:
|
|
|
424
424
|
- 'ext/cumo/narray/gen/erbln.rb'
|
|
425
425
|
- 'lib/cumo/narray/extra.rb'
|
|
426
426
|
- 'test/cudnn_test.rb'
|
|
427
|
+
- 'test/narray_test.rb'
|
|
427
428
|
|
|
428
429
|
# Offense count: 2
|
|
429
430
|
# This cop supports safe autocorrection (--autocorrect).
|
|
@@ -38,33 +38,33 @@ module MakeMakefileCuda
|
|
|
38
38
|
cmd = "nvcc #{s}"
|
|
39
39
|
if ENV['CUMO_NVCC_GENERATE_CODE']
|
|
40
40
|
cmd << " --generate-code=#{ENV['CUMO_NVCC_GENERATE_CODE']}"
|
|
41
|
-
elsif ENV['DEBUG']
|
|
42
|
-
cmd << " -arch=sm_35"
|
|
43
41
|
else
|
|
44
|
-
|
|
45
|
-
if cuda_version >= Gem::Version.new("13.0")
|
|
46
|
-
# CUDA 13.0
|
|
47
|
-
capability = [75, 87, 89, 90, 121]
|
|
48
|
-
elsif cuda_version >= Gem::Version.new("12.9")
|
|
49
|
-
# CUDA 12.9
|
|
50
|
-
capability = [50, 60, 70, 75, 87, 89, 90, 121]
|
|
51
|
-
elsif cuda_version >= Gem::Version.new("12.8")
|
|
52
|
-
# CUDA 12.8
|
|
53
|
-
capability = [50, 60, 70, 75, 87, 89, 90, 120]
|
|
54
|
-
elsif cuda_version >= Gem::Version.new("12.0")
|
|
55
|
-
# CUDA 12.0 – 12.6
|
|
56
|
-
capability = [50, 60, 70, 75, 87, 89, 90]
|
|
57
|
-
elsif cuda_version >= Gem::Version.new("11.8")
|
|
58
|
-
# CUDA 11.8
|
|
59
|
-
capability = [35, 50, 60, 70, 75, 87, 89, 90]
|
|
60
|
-
else
|
|
61
|
-
# CUDA 11.0
|
|
62
|
-
capability = [35, 50, 60, 70, 75, 80]
|
|
63
|
-
end
|
|
64
|
-
|
|
42
|
+
capability = nil
|
|
65
43
|
if find_executable('nvidia-smi')
|
|
66
44
|
arch_version = `nvidia-smi --query-gpu=compute_cap --format=csv,noheader`.strip
|
|
67
|
-
capability
|
|
45
|
+
capability = [(arch_version.to_f * 10).to_i] unless arch_version.empty?
|
|
46
|
+
end
|
|
47
|
+
unless capability
|
|
48
|
+
# Ref. https://en.wikipedia.org/wiki/CUDA
|
|
49
|
+
if cuda_version >= Gem::Version.new("13.0")
|
|
50
|
+
# CUDA 13.0
|
|
51
|
+
capability = [75, 80, 86, 87, 89, 90, 100, 103, 110, 120, 121]
|
|
52
|
+
elsif cuda_version >= Gem::Version.new("12.9")
|
|
53
|
+
# CUDA 12.9
|
|
54
|
+
capability = [50, 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90, 100, 103, 110, 120, 121]
|
|
55
|
+
elsif cuda_version >= Gem::Version.new("12.8")
|
|
56
|
+
# CUDA 12.8
|
|
57
|
+
capability = [50, 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90, 100, 103, 110, 120]
|
|
58
|
+
elsif cuda_version >= Gem::Version.new("12.0")
|
|
59
|
+
# CUDA 12.0 – 12.6
|
|
60
|
+
capability = [50, 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90]
|
|
61
|
+
elsif cuda_version >= Gem::Version.new("11.8")
|
|
62
|
+
# CUDA 11.8
|
|
63
|
+
capability = [35, 50, 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90]
|
|
64
|
+
else
|
|
65
|
+
# CUDA 11.0
|
|
66
|
+
capability = [35, 50, 60, 61, 62, 70, 72, 75, 80]
|
|
67
|
+
end
|
|
68
68
|
end
|
|
69
69
|
|
|
70
70
|
capability.each do |arch|
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
1
|
+
# 0.5.2 (2025/01/25)
|
|
2
|
+
|
|
3
|
+
Fixes:
|
|
4
|
+
|
|
5
|
+
* Backport: Add support for copy on write with store_binary and frozen string
|
|
6
|
+
* Remove unnecessary debug code
|
|
7
|
+
* Fix capability list
|
|
8
|
+
* Build only with supported capabilities to reduce compilation time
|
|
9
|
+
* Fix SEGV when calling {mean, var, stddev, rms} on a single-element array (#154)
|
|
10
|
+
* Suppress warning message for deprecated declarations
|
|
11
|
+
* Fix variable typo in complex log2 and log10 functions (#152)
|
|
12
|
+
|
|
1
13
|
# 0.5.1 (2025/12/30)
|
|
2
14
|
|
|
3
15
|
Enhancements:
|
|
@@ -226,6 +226,7 @@ typedef struct {
|
|
|
226
226
|
typedef struct {
|
|
227
227
|
cumo_narray_t base;
|
|
228
228
|
char *ptr;
|
|
229
|
+
bool owned;
|
|
229
230
|
} cumo_narray_data_t;
|
|
230
231
|
|
|
231
232
|
|
|
@@ -360,6 +361,7 @@ _cumo_na_get_narray_t(VALUE obj, unsigned char cumo_na_type)
|
|
|
360
361
|
#define CUMO_NA_DATA(na) ((cumo_narray_data_t*)(na))
|
|
361
362
|
#define CUMO_NA_VIEW(na) ((cumo_narray_view_t*)(na))
|
|
362
363
|
#define CUMO_NA_DATA_PTR(na) (CUMO_NA_DATA(na)->ptr)
|
|
364
|
+
#define CUMO_NA_DATA_OWNED(na) (CUMO_NA_DATA(na)->owned)
|
|
363
365
|
#define CUMO_NA_VIEW_DATA(na) (CUMO_NA_VIEW(na)->data)
|
|
364
366
|
#define CUMO_NA_VIEW_OFFSET(na) (CUMO_NA_VIEW(na)->offset)
|
|
365
367
|
#define CUMO_NA_VIEW_STRIDX(na) (CUMO_NA_VIEW(na)->stridx)
|
|
@@ -166,14 +166,14 @@ static inline dtype c_log(dtype x) {
|
|
|
166
166
|
static inline dtype c_log2(dtype x) {
|
|
167
167
|
dtype z;
|
|
168
168
|
z = c_log(x);
|
|
169
|
-
z = c_mul_r(
|
|
169
|
+
z = c_mul_r(z,M_LOG2E);
|
|
170
170
|
return z;
|
|
171
171
|
}
|
|
172
172
|
|
|
173
173
|
static inline dtype c_log10(dtype x) {
|
|
174
174
|
dtype z;
|
|
175
175
|
z = c_log(x);
|
|
176
|
-
z = c_mul_r(
|
|
176
|
+
z = c_mul_r(z,M_LOG10E);
|
|
177
177
|
return z;
|
|
178
178
|
}
|
|
179
179
|
|
data/ext/cumo/include/cumo.h
CHANGED
|
@@ -29,7 +29,9 @@ static void
|
|
|
29
29
|
assert(na->base.type == CUMO_NARRAY_DATA_T);
|
|
30
30
|
|
|
31
31
|
if (na->ptr != NULL) {
|
|
32
|
-
|
|
32
|
+
if (na->owned) {
|
|
33
|
+
cumo_cuda_runtime_free(na->ptr);
|
|
34
|
+
}
|
|
33
35
|
na->ptr = NULL;
|
|
34
36
|
}
|
|
35
37
|
if (na->base.size > 0) {
|
|
@@ -103,5 +105,6 @@ static VALUE
|
|
|
103
105
|
na->base.shape = NULL;
|
|
104
106
|
na->base.reduce = INT2FIX(0);
|
|
105
107
|
na->ptr = NULL;
|
|
108
|
+
na->owned = FALSE;
|
|
106
109
|
return TypedData_Wrap_Struct(klass, &<%=type_name%>_data_type, (void*)na);
|
|
107
110
|
}
|
|
@@ -79,10 +79,10 @@ void cumo_<%=type_name%>_mean_kernel_launch(uint64_t n, char *p1, ssize_t s1, ch
|
|
|
79
79
|
{
|
|
80
80
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
81
81
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_end, (dtype*)p2, n);
|
|
82
|
+
if (s1_idx == 1 || n == 1) {
|
|
83
|
+
cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_begin + n, (dtype*)p2, n);
|
|
85
84
|
} else {
|
|
85
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
86
86
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
87
87
|
cumo_<%=type_name%>_mean_kernel<<<1,1>>>(range.begin(), range.end(), (dtype*)p2, n);
|
|
88
88
|
}
|
|
@@ -92,10 +92,10 @@ void cumo_<%=type_name%>_var_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
|
|
|
92
92
|
{
|
|
93
93
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
94
94
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_end, (rtype*)p2);
|
|
95
|
+
if (s1_idx == 1 || n == 1) {
|
|
96
|
+
cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_begin + n, (rtype*)p2);
|
|
98
97
|
} else {
|
|
98
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
99
99
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
100
100
|
cumo_<%=type_name%>_var_kernel<<<1,1>>>(range.begin(), range.end(), (rtype*)p2);
|
|
101
101
|
}
|
|
@@ -105,10 +105,10 @@ void cumo_<%=type_name%>_stddev_kernel_launch(uint64_t n, char *p1, ssize_t s1,
|
|
|
105
105
|
{
|
|
106
106
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
107
107
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_end, (rtype*)p2);
|
|
108
|
+
if (s1_idx == 1 || n == 1) {
|
|
109
|
+
cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_begin + n, (rtype*)p2);
|
|
111
110
|
} else {
|
|
111
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
112
112
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
113
113
|
cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(range.begin(), range.end(), (rtype*)p2);
|
|
114
114
|
}
|
|
@@ -118,10 +118,10 @@ void cumo_<%=type_name%>_rms_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
|
|
|
118
118
|
{
|
|
119
119
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
120
120
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_end, (rtype*)p2, n);
|
|
121
|
+
if (s1_idx == 1 || n == 1) {
|
|
122
|
+
cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_begin + n, (rtype*)p2, n);
|
|
124
123
|
} else {
|
|
124
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
125
125
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
126
126
|
cumo_<%=type_name%>_rms_kernel<<<1,1>>>(range.begin(), range.end(), (rtype*)p2, n);
|
|
127
127
|
}
|
|
@@ -57,10 +57,10 @@ void cumo_<%=type_name%>_mean_kernel_launch(uint64_t n, char *p1, ssize_t s1, ch
|
|
|
57
57
|
{
|
|
58
58
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
59
59
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2, n);
|
|
60
|
+
if (s1_idx == 1 || n == 1) {
|
|
61
|
+
cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2, n);
|
|
63
62
|
} else {
|
|
63
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
64
64
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
65
65
|
cumo_<%=type_name%>_mean_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2, n);
|
|
66
66
|
}
|
|
@@ -70,10 +70,10 @@ void cumo_<%=type_name%>_var_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
|
|
|
70
70
|
{
|
|
71
71
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
72
72
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2);
|
|
73
|
+
if (s1_idx == 1 || n == 1) {
|
|
74
|
+
cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2);
|
|
76
75
|
} else {
|
|
76
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
77
77
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
78
78
|
cumo_<%=type_name%>_var_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2);
|
|
79
79
|
}
|
|
@@ -83,10 +83,10 @@ void cumo_<%=type_name%>_stddev_kernel_launch(uint64_t n, char *p1, ssize_t s1,
|
|
|
83
83
|
{
|
|
84
84
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
85
85
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2);
|
|
86
|
+
if (s1_idx == 1 || n == 1) {
|
|
87
|
+
cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2);
|
|
89
88
|
} else {
|
|
89
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
90
90
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
91
91
|
cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2);
|
|
92
92
|
}
|
|
@@ -96,10 +96,10 @@ void cumo_<%=type_name%>_rms_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
|
|
|
96
96
|
{
|
|
97
97
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
98
98
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2, n);
|
|
99
|
+
if (s1_idx == 1 || n == 1) {
|
|
100
|
+
cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2, n);
|
|
102
101
|
} else {
|
|
102
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
103
103
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
104
104
|
cumo_<%=type_name%>_rms_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2, n);
|
|
105
105
|
}
|
data/ext/cumo/narray/narray.c
CHANGED
|
@@ -34,6 +34,7 @@ static ID cumo_id_count_false_cpu;
|
|
|
34
34
|
static ID cumo_id_axis;
|
|
35
35
|
static ID cumo_id_nan;
|
|
36
36
|
static ID cumo_id_keepdims;
|
|
37
|
+
static ID cumo_id_source;
|
|
37
38
|
|
|
38
39
|
VALUE cumo_sym_reduce;
|
|
39
40
|
VALUE cumo_sym_option;
|
|
@@ -577,6 +578,87 @@ cumo_na_s_eye(int argc, VALUE *argv, VALUE klass)
|
|
|
577
578
|
#define READ 1
|
|
578
579
|
#define WRITE 2
|
|
579
580
|
|
|
581
|
+
static void
|
|
582
|
+
cumo_na_set_pointer(VALUE self, char *ptr, size_t byte_size)
|
|
583
|
+
{
|
|
584
|
+
VALUE obj;
|
|
585
|
+
cumo_narray_t *na;
|
|
586
|
+
|
|
587
|
+
if (OBJ_FROZEN(self)) {
|
|
588
|
+
rb_raise(rb_eRuntimeError, "cannot write to frozen NArray.");
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
CumoGetNArray(self,na);
|
|
592
|
+
|
|
593
|
+
switch(CUMO_NA_TYPE(na)) {
|
|
594
|
+
case CUMO_NARRAY_DATA_T:
|
|
595
|
+
if (CUMO_NA_SIZE(na) > 0) {
|
|
596
|
+
if (CUMO_NA_DATA_PTR(na) != NULL && CUMO_NA_DATA_OWNED(na)) {
|
|
597
|
+
xfree(CUMO_NA_DATA_PTR(na));
|
|
598
|
+
}
|
|
599
|
+
CUMO_NA_DATA_PTR(na) = ptr;
|
|
600
|
+
CUMO_NA_DATA_OWNED(na) = FALSE;
|
|
601
|
+
}
|
|
602
|
+
return;
|
|
603
|
+
case CUMO_NARRAY_VIEW_T:
|
|
604
|
+
obj = CUMO_NA_VIEW_DATA(na);
|
|
605
|
+
if (OBJ_FROZEN(obj)) {
|
|
606
|
+
rb_raise(rb_eRuntimeError, "cannot write to frozen NArray.");
|
|
607
|
+
}
|
|
608
|
+
CumoGetNArray(obj,na);
|
|
609
|
+
switch(CUMO_NA_TYPE(na)) {
|
|
610
|
+
case CUMO_NARRAY_DATA_T:
|
|
611
|
+
if (CUMO_NA_SIZE(na) > 0) {
|
|
612
|
+
if (CUMO_NA_DATA_PTR(na) != NULL && CUMO_NA_DATA_OWNED(na)) {
|
|
613
|
+
xfree(CUMO_NA_DATA_PTR(na));
|
|
614
|
+
}
|
|
615
|
+
CUMO_NA_DATA_PTR(na) = ptr;
|
|
616
|
+
CUMO_NA_DATA_OWNED(na) = FALSE;
|
|
617
|
+
}
|
|
618
|
+
return;
|
|
619
|
+
default:
|
|
620
|
+
rb_raise(rb_eRuntimeError,"invalid NA_TYPE of view: %d",CUMO_NA_TYPE(na));
|
|
621
|
+
}
|
|
622
|
+
default:
|
|
623
|
+
rb_raise(rb_eRuntimeError,"invalid NA_TYPE: %d",CUMO_NA_TYPE(na));
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
static void
|
|
628
|
+
cumo_na_pointer_copy_on_write(VALUE self)
|
|
629
|
+
{
|
|
630
|
+
cumo_narray_t *na;
|
|
631
|
+
void *ptr;
|
|
632
|
+
VALUE velmsz;
|
|
633
|
+
size_t byte_size;
|
|
634
|
+
|
|
635
|
+
CumoGetNArray(self,na);
|
|
636
|
+
if (CUMO_NA_TYPE(na) == CUMO_NARRAY_VIEW_T) {
|
|
637
|
+
self = CUMO_NA_VIEW_DATA(na);
|
|
638
|
+
CumoGetNArray(self,na);
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
ptr = CUMO_NA_DATA_PTR(na);
|
|
642
|
+
if (ptr == NULL) {
|
|
643
|
+
return;
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
if (CUMO_NA_DATA_OWNED(na)) {
|
|
647
|
+
return;
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
velmsz = rb_const_get(rb_obj_class(self), cumo_id_element_byte_size);
|
|
651
|
+
if (FIXNUM_P(velmsz)) {
|
|
652
|
+
byte_size = CUMO_NA_SIZE(na) * NUM2SIZET(velmsz);
|
|
653
|
+
} else {
|
|
654
|
+
byte_size = ceil(CUMO_NA_SIZE(na) * NUM2DBL(velmsz));
|
|
655
|
+
}
|
|
656
|
+
CUMO_NA_DATA_PTR(na) = NULL;
|
|
657
|
+
rb_funcall(self, cumo_id_allocate, 0);
|
|
658
|
+
memcpy(CUMO_NA_DATA_PTR(na), ptr, byte_size);
|
|
659
|
+
rb_ivar_set(self, cumo_id_source, Qnil);
|
|
660
|
+
}
|
|
661
|
+
|
|
580
662
|
static char *
|
|
581
663
|
cumo_na_get_pointer_for_rw(VALUE self, int flag)
|
|
582
664
|
{
|
|
@@ -592,6 +674,9 @@ cumo_na_get_pointer_for_rw(VALUE self, int flag)
|
|
|
592
674
|
|
|
593
675
|
switch(CUMO_NA_TYPE(na)) {
|
|
594
676
|
case CUMO_NARRAY_DATA_T:
|
|
677
|
+
if (flag & WRITE) {
|
|
678
|
+
cumo_na_pointer_copy_on_write(self);
|
|
679
|
+
}
|
|
595
680
|
ptr = CUMO_NA_DATA_PTR(na);
|
|
596
681
|
if (CUMO_NA_SIZE(na) > 0 && ptr == NULL) {
|
|
597
682
|
if (flag & READ) {
|
|
@@ -608,6 +693,9 @@ cumo_na_get_pointer_for_rw(VALUE self, int flag)
|
|
|
608
693
|
if ((flag & WRITE) && OBJ_FROZEN(obj)) {
|
|
609
694
|
rb_raise(rb_eRuntimeError, "cannot write to frozen NArray.");
|
|
610
695
|
}
|
|
696
|
+
if (flag & WRITE) {
|
|
697
|
+
cumo_na_pointer_copy_on_write(self);
|
|
698
|
+
}
|
|
611
699
|
CumoGetNArray(obj,na);
|
|
612
700
|
switch(CUMO_NA_TYPE(na)) {
|
|
613
701
|
case CUMO_NARRAY_DATA_T:
|
|
@@ -1313,7 +1401,6 @@ static VALUE
|
|
|
1313
1401
|
cumo_na_store_binary(int argc, VALUE *argv, VALUE self)
|
|
1314
1402
|
{
|
|
1315
1403
|
size_t size, str_len, byte_size, offset;
|
|
1316
|
-
char *ptr;
|
|
1317
1404
|
int narg;
|
|
1318
1405
|
VALUE vstr, voffset;
|
|
1319
1406
|
VALUE velmsz;
|
|
@@ -1343,8 +1430,13 @@ cumo_na_store_binary(int argc, VALUE *argv, VALUE self)
|
|
|
1343
1430
|
rb_raise(rb_eArgError, "string is too short to store");
|
|
1344
1431
|
}
|
|
1345
1432
|
|
|
1346
|
-
|
|
1347
|
-
|
|
1433
|
+
if (OBJ_FROZEN(vstr)) {
|
|
1434
|
+
cumo_na_set_pointer(self, RSTRING_PTR(vstr)+offset, byte_size);
|
|
1435
|
+
rb_ivar_set(self, cumo_id_source, vstr);
|
|
1436
|
+
} else {
|
|
1437
|
+
void *ptr = cumo_na_get_pointer_for_write(self);
|
|
1438
|
+
memcpy(ptr, RSTRING_PTR(vstr)+offset, byte_size);
|
|
1439
|
+
}
|
|
1348
1440
|
|
|
1349
1441
|
return SIZET2NUM(byte_size);
|
|
1350
1442
|
}
|
|
@@ -1456,6 +1548,7 @@ cumo_na_marshal_load(VALUE self, VALUE a)
|
|
|
1456
1548
|
ptr = cumo_na_get_pointer_for_write(self);
|
|
1457
1549
|
memcpy(ptr, RARRAY_PTR(v), CUMO_NA_SIZE(na)*sizeof(VALUE));
|
|
1458
1550
|
} else {
|
|
1551
|
+
rb_str_freeze(v);
|
|
1459
1552
|
cumo_na_store_binary(1,&v,self);
|
|
1460
1553
|
if (CUMO_TEST_BYTE_SWAPPED(self)) {
|
|
1461
1554
|
rb_funcall(cumo_na_inplace(self),cumo_id_to_host,0);
|
|
@@ -2023,6 +2116,7 @@ Init_cumo_narray()
|
|
|
2023
2116
|
cumo_id_axis = rb_intern("axis");
|
|
2024
2117
|
cumo_id_nan = rb_intern("nan");
|
|
2025
2118
|
cumo_id_keepdims = rb_intern("keepdims");
|
|
2119
|
+
cumo_id_source = rb_intern("source");
|
|
2026
2120
|
|
|
2027
2121
|
cumo_sym_reduce = ID2SYM(rb_intern("reduce"));
|
|
2028
2122
|
cumo_sym_option = ID2SYM(rb_intern("option"));
|
data/test/narray_test.rb
CHANGED
|
@@ -736,10 +736,90 @@ class NArrayTest < Test::Unit::TestCase
|
|
|
736
736
|
at.at([0, 1], [0, 1]).inplace - 1
|
|
737
737
|
assert { at == [[0, 2, 3], [4, 4, 6]] }
|
|
738
738
|
end
|
|
739
|
+
|
|
740
|
+
sub_test_case "#{dtype}.from_binary" do
|
|
741
|
+
test "frozen string" do
|
|
742
|
+
shape = [2, 5]
|
|
743
|
+
a = dtype.new(*shape)
|
|
744
|
+
a.rand(0, 10)
|
|
745
|
+
original_data = a.to_binary
|
|
746
|
+
data = original_data.dup.freeze
|
|
747
|
+
restored_a = dtype.from_binary(data, shape)
|
|
748
|
+
assert { restored_a == a }
|
|
749
|
+
restored_a[0, 0] += 1
|
|
750
|
+
assert { restored_a != a }
|
|
751
|
+
assert { data == original_data }
|
|
752
|
+
end
|
|
753
|
+
|
|
754
|
+
test "not frozen string" do
|
|
755
|
+
shape = [2, 5]
|
|
756
|
+
a = dtype.new(*shape)
|
|
757
|
+
a.rand(0, 10)
|
|
758
|
+
original_data = a.to_binary
|
|
759
|
+
data = original_data.dup
|
|
760
|
+
restored_a = dtype.from_binary(data, shape)
|
|
761
|
+
assert { restored_a == a }
|
|
762
|
+
restored_a[0, 0] += 1
|
|
763
|
+
assert { restored_a != a }
|
|
764
|
+
assert { data == original_data }
|
|
765
|
+
end
|
|
766
|
+
end
|
|
767
|
+
|
|
768
|
+
sub_test_case "#{dtype}#store_binary" do
|
|
769
|
+
test "frozen string" do
|
|
770
|
+
shape = [2, 5]
|
|
771
|
+
a = dtype.new(*shape)
|
|
772
|
+
a.rand(0, 10)
|
|
773
|
+
original_data = a.to_binary
|
|
774
|
+
data = original_data.dup.freeze
|
|
775
|
+
restored_a = dtype.new(*shape)
|
|
776
|
+
restored_a.store_binary(data)
|
|
777
|
+
assert { restored_a == a }
|
|
778
|
+
restored_a[0, 0] += 1
|
|
779
|
+
assert { restored_a != a }
|
|
780
|
+
assert { data == original_data }
|
|
781
|
+
end
|
|
782
|
+
|
|
783
|
+
test "not frozen string" do
|
|
784
|
+
shape = [2, 5]
|
|
785
|
+
a = dtype.new(*shape)
|
|
786
|
+
a.rand(0, 10)
|
|
787
|
+
original_data = a.to_binary
|
|
788
|
+
data = original_data.dup
|
|
789
|
+
restored_a = dtype.new(*shape)
|
|
790
|
+
restored_a.store_binary(data)
|
|
791
|
+
assert { restored_a == a }
|
|
792
|
+
restored_a[0, 0] += 1
|
|
793
|
+
assert { restored_a != a }
|
|
794
|
+
assert { data == original_data }
|
|
795
|
+
end
|
|
796
|
+
end
|
|
739
797
|
end
|
|
740
798
|
|
|
741
799
|
test "Cumo::DFloat.cast(Cumo::RObject[1, nil, 3])" do
|
|
742
800
|
assert_equal(Cumo::DFloat[1, Float::NAN, 3].format_to_a,
|
|
743
801
|
Cumo::DFloat.cast(Cumo::RObject[1, nil, 3]).format_to_a)
|
|
744
802
|
end
|
|
803
|
+
|
|
804
|
+
test "single element array" do
|
|
805
|
+
assert { Cumo::SFloat[1].mean == 1.0 }
|
|
806
|
+
assert { Cumo::DFloat[1].mean == 1.0 }
|
|
807
|
+
assert { Cumo::SComplex[1].mean == 1.0 }
|
|
808
|
+
assert { Cumo::DComplex[1].mean == 1.0 }
|
|
809
|
+
|
|
810
|
+
assert { Cumo::SFloat[1].var.to_f.nan? }
|
|
811
|
+
assert { Cumo::DFloat[1].var.to_f.nan? }
|
|
812
|
+
assert { Cumo::SComplex[1].var.to_f.nan? }
|
|
813
|
+
assert { Cumo::DComplex[1].var.to_f.nan? }
|
|
814
|
+
|
|
815
|
+
assert { Cumo::SFloat[1].stddev.to_f.nan? }
|
|
816
|
+
assert { Cumo::DFloat[1].stddev.to_f.nan? }
|
|
817
|
+
assert { Cumo::SComplex[1].stddev.to_f.nan? }
|
|
818
|
+
assert { Cumo::DComplex[1].stddev.to_f.nan? }
|
|
819
|
+
|
|
820
|
+
assert { Cumo::SFloat[1].rms == 1.0 }
|
|
821
|
+
assert { Cumo::DFloat[1].rms == 1.0 }
|
|
822
|
+
assert { Cumo::SComplex[1].rms == 1.0 }
|
|
823
|
+
assert { Cumo::DComplex[1].rms == 1.0 }
|
|
824
|
+
end
|
|
745
825
|
end
|
data/test/ractor_test.rb
CHANGED
|
@@ -10,7 +10,7 @@ class NArrayRactorTest < CumoTestBase
|
|
|
10
10
|
dtype = data.fetch(:dtype)
|
|
11
11
|
ary = random_array(dtype)
|
|
12
12
|
r = Ractor.new(ary) { |x| x }
|
|
13
|
-
ary2 = r.take
|
|
13
|
+
ary2 = r.respond_to?(:take) ? r.take : r.value
|
|
14
14
|
assert_equal(ary, ary2)
|
|
15
15
|
assert_not_same(ary, ary2)
|
|
16
16
|
end
|
|
@@ -22,7 +22,7 @@ class NArrayRactorTest < CumoTestBase
|
|
|
22
22
|
r = Ractor.new(ary1) do |ary2|
|
|
23
23
|
[ary2, ary2 * 10]
|
|
24
24
|
end
|
|
25
|
-
ary2, res = r.take
|
|
25
|
+
ary2, res = r.respond_to?(:take) ? r.take : r.value
|
|
26
26
|
assert_equal((dtype != Cumo::RObject),
|
|
27
27
|
ary1.equal?(ary2))
|
|
28
28
|
assert_equal(ary1 * 10, res)
|
|
@@ -37,7 +37,9 @@ class NArrayRactorTest < CumoTestBase
|
|
|
37
37
|
r2 = Ractor.new(ary1) do |ary4|
|
|
38
38
|
ary4 * 10
|
|
39
39
|
end
|
|
40
|
-
|
|
40
|
+
result1 = r1.respond_to?(:take) ? r1.take : r1.value
|
|
41
|
+
result2 = r2.respond_to?(:take) ? r2.take : r2.value
|
|
42
|
+
assert_equal(result1, result2)
|
|
41
43
|
end
|
|
42
44
|
|
|
43
45
|
def random_array(dtype, n=1000)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cumo
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Naotoshi Seo
|
|
@@ -331,7 +331,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
331
331
|
- !ruby/object:Gem::Version
|
|
332
332
|
version: '0'
|
|
333
333
|
requirements: []
|
|
334
|
-
rubygems_version:
|
|
334
|
+
rubygems_version: 4.0.4
|
|
335
335
|
specification_version: 4
|
|
336
336
|
summary: Cumo is CUDA aware numerical library whose interface is highly compatible
|
|
337
337
|
with Ruby Numo
|