cumo 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 93c1ecf4d6098da90d957600dc7254e02072999fa33374951809cb8c4f5645ee
4
- data.tar.gz: f8961f11f4b8feed097fbfbe3fe0603e270f8f1b44121c112c506e42cefc2bf1
3
+ metadata.gz: 17cb9dfdf9be41292bcd0204a67c5f919da60588d005d4441ad632767dce504c
4
+ data.tar.gz: 4c6e388cdb5b3b9f99d45a989a610d63e493fe53b3baad570c7bb2656f72d86c
5
5
  SHA512:
6
- metadata.gz: df0b42ff21e2158657e0d8a86872f9e85a6af7ab6ae09c7dfc4368d71001846c7f087633bfa2f6071bdd43f910da041470a43694c2aa2c37c74b5ff684e85c88
7
- data.tar.gz: 95572510fbc31633f423db010c9135271c5ded4bfda28c5f07734b90d76e9fd36fa8c2af0bdd1d03151df2eba93aa3f07c61d6d39aa2f8c7d011364a7ee99615
6
+ metadata.gz: 917adaa087836d673a143364f88fb9ddf91ad84cbcc064b115258e32f4b22e70a63c45f3e655a2e56ad58e70d6a4330f3e63ce177b5ad9e0a0c9c11685b39503
7
+ data.tar.gz: c5ec5a4179266a1cf4c5f15b5bc0d7e8b0b01a5a69bb7a75ad3c1387e861666da8b571156bc90295f2cf702558f18e332ce7fd68823330f834ea8d9c4f8b6419
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2025-12-29 17:31:25 UTC using RuboCop version 1.82.1.
3
+ # on 2026-01-09 18:33:26 UTC using RuboCop version 1.82.1.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -371,7 +371,7 @@ Lint/ConstantDefinitionInBlock:
371
371
  Exclude:
372
372
  - 'test/cuda/compiler_test.rb'
373
373
 
374
- # Offense count: 650
374
+ # Offense count: 665
375
375
  # Configuration parameters: Only, Ignore.
376
376
  Lint/ConstantResolution:
377
377
  Enabled: false
@@ -388,7 +388,7 @@ Lint/ErbNewArguments:
388
388
  Exclude:
389
389
  - 'ext/cumo/narray/gen/erbpp2.rb'
390
390
 
391
- # Offense count: 7
391
+ # Offense count: 15
392
392
  Lint/FloatComparison:
393
393
  Exclude:
394
394
  - 'test/narray_test.rb'
@@ -411,7 +411,7 @@ Lint/NonAtomicFileOperation:
411
411
  Exclude:
412
412
  - 'lib/cumo/cuda/compiler.rb'
413
413
 
414
- # Offense count: 26
414
+ # Offense count: 34
415
415
  # This cop supports unsafe autocorrection (--autocorrect-all).
416
416
  # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredClasses.
417
417
  # IgnoredClasses: Time, DateTime
@@ -424,6 +424,7 @@ Lint/NumberConversion:
424
424
  - 'ext/cumo/narray/gen/erbln.rb'
425
425
  - 'lib/cumo/narray/extra.rb'
426
426
  - 'test/cudnn_test.rb'
427
+ - 'test/narray_test.rb'
427
428
 
428
429
  # Offense count: 2
429
430
  # This cop supports safe autocorrection (--autocorrect).
@@ -38,33 +38,33 @@ module MakeMakefileCuda
38
38
  cmd = "nvcc #{s}"
39
39
  if ENV['CUMO_NVCC_GENERATE_CODE']
40
40
  cmd << " --generate-code=#{ENV['CUMO_NVCC_GENERATE_CODE']}"
41
- elsif ENV['DEBUG']
42
- cmd << " -arch=sm_35"
43
41
  else
44
- # Ref. https://en.wikipedia.org/wiki/CUDA
45
- if cuda_version >= Gem::Version.new("13.0")
46
- # CUDA 13.0
47
- capability = [75, 87, 89, 90, 121]
48
- elsif cuda_version >= Gem::Version.new("12.9")
49
- # CUDA 12.9
50
- capability = [50, 60, 70, 75, 87, 89, 90, 121]
51
- elsif cuda_version >= Gem::Version.new("12.8")
52
- # CUDA 12.8
53
- capability = [50, 60, 70, 75, 87, 89, 90, 120]
54
- elsif cuda_version >= Gem::Version.new("12.0")
55
- # CUDA 12.0 – 12.6
56
- capability = [50, 60, 70, 75, 87, 89, 90]
57
- elsif cuda_version >= Gem::Version.new("11.8")
58
- # CUDA 11.8
59
- capability = [35, 50, 60, 70, 75, 87, 89, 90]
60
- else
61
- # CUDA 11.0
62
- capability = [35, 50, 60, 70, 75, 80]
63
- end
64
-
42
+ capability = nil
65
43
  if find_executable('nvidia-smi')
66
44
  arch_version = `nvidia-smi --query-gpu=compute_cap --format=csv,noheader`.strip
67
- capability << (arch_version.to_f * 10).to_i unless arch_version.empty?
45
+ capability = [(arch_version.to_f * 10).to_i] unless arch_version.empty?
46
+ end
47
+ unless capability
48
+ # Ref. https://en.wikipedia.org/wiki/CUDA
49
+ if cuda_version >= Gem::Version.new("13.0")
50
+ # CUDA 13.0
51
+ capability = [75, 80, 86, 87, 89, 90, 100, 103, 110, 120, 121]
52
+ elsif cuda_version >= Gem::Version.new("12.9")
53
+ # CUDA 12.9
54
+ capability = [50, 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90, 100, 103, 110, 120, 121]
55
+ elsif cuda_version >= Gem::Version.new("12.8")
56
+ # CUDA 12.8
57
+ capability = [50, 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90, 100, 103, 110, 120]
58
+ elsif cuda_version >= Gem::Version.new("12.0")
59
+ # CUDA 12.0 – 12.6
60
+ capability = [50, 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90]
61
+ elsif cuda_version >= Gem::Version.new("11.8")
62
+ # CUDA 11.8
63
+ capability = [35, 50, 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90]
64
+ else
65
+ # CUDA 11.0
66
+ capability = [35, 50, 60, 61, 62, 70, 72, 75, 80]
67
+ end
68
68
  end
69
69
 
70
70
  capability.each do |arch|
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ # 0.5.2 (2025/01/25)
2
+
3
+ Fixes:
4
+
5
+ * Backport: Add support for copy on write with store_binary and frozen string
6
+ * Remove unnecessary debug code
7
+ * Fix capability list
8
+ * Build only with supported capabilities to reduce compilation time
9
+ * Fix SEGV when calling {mean, var, stddev, rms} on a single-element array (#154)
10
+ * Suppress warning message for deprecated declarations
11
+ * Fix variable typo in complex log2 and log10 functions (#152)
12
+
1
13
  # 0.5.1 (2025/12/30)
2
14
 
3
15
  Enhancements:
@@ -26,7 +26,7 @@ class cumo_thrust_strided_range
26
26
  {
27
27
  public:
28
28
 
29
- typedef typename thrust::iterator_difference<Iterator>::type difference_type;
29
+ typedef typename thrust::iterator_traits<Iterator>::difference_type difference_type;
30
30
 
31
31
  struct stride_functor
32
32
  {
@@ -226,6 +226,7 @@ typedef struct {
226
226
  typedef struct {
227
227
  cumo_narray_t base;
228
228
  char *ptr;
229
+ bool owned;
229
230
  } cumo_narray_data_t;
230
231
 
231
232
 
@@ -360,6 +361,7 @@ _cumo_na_get_narray_t(VALUE obj, unsigned char cumo_na_type)
360
361
  #define CUMO_NA_DATA(na) ((cumo_narray_data_t*)(na))
361
362
  #define CUMO_NA_VIEW(na) ((cumo_narray_view_t*)(na))
362
363
  #define CUMO_NA_DATA_PTR(na) (CUMO_NA_DATA(na)->ptr)
364
+ #define CUMO_NA_DATA_OWNED(na) (CUMO_NA_DATA(na)->owned)
363
365
  #define CUMO_NA_VIEW_DATA(na) (CUMO_NA_VIEW(na)->data)
364
366
  #define CUMO_NA_VIEW_OFFSET(na) (CUMO_NA_VIEW(na)->offset)
365
367
  #define CUMO_NA_VIEW_STRIDX(na) (CUMO_NA_VIEW(na)->stridx)
@@ -166,14 +166,14 @@ static inline dtype c_log(dtype x) {
166
166
  static inline dtype c_log2(dtype x) {
167
167
  dtype z;
168
168
  z = c_log(x);
169
- z = c_mul_r(x,M_LOG2E);
169
+ z = c_mul_r(z,M_LOG2E);
170
170
  return z;
171
171
  }
172
172
 
173
173
  static inline dtype c_log10(dtype x) {
174
174
  dtype z;
175
175
  z = c_log(x);
176
- z = c_mul_r(x,M_LOG10E);
176
+ z = c_mul_r(z,M_LOG10E);
177
177
  return z;
178
178
  }
179
179
 
@@ -10,7 +10,7 @@ extern "C" {
10
10
  #endif
11
11
  #endif
12
12
 
13
- #define CUMO_VERSION "0.5.1"
13
+ #define CUMO_VERSION "0.5.2"
14
14
  #define CUMO_VERSION_CODE 51
15
15
 
16
16
  bool cumo_compatible_mode_enabled_p();
@@ -29,7 +29,9 @@ static void
29
29
  assert(na->base.type == CUMO_NARRAY_DATA_T);
30
30
 
31
31
  if (na->ptr != NULL) {
32
- cumo_cuda_runtime_free(na->ptr);
32
+ if (na->owned) {
33
+ cumo_cuda_runtime_free(na->ptr);
34
+ }
33
35
  na->ptr = NULL;
34
36
  }
35
37
  if (na->base.size > 0) {
@@ -103,5 +105,6 @@ static VALUE
103
105
  na->base.shape = NULL;
104
106
  na->base.reduce = INT2FIX(0);
105
107
  na->ptr = NULL;
108
+ na->owned = FALSE;
106
109
  return TypedData_Wrap_Struct(klass, &<%=type_name%>_data_type, (void*)na);
107
110
  }
@@ -22,6 +22,7 @@ static VALUE
22
22
  ptr = cumo_cuda_runtime_malloc(sizeof(dtype) * na->size);
23
23
  <% end %>
24
24
  CUMO_NA_DATA_PTR(na) = ptr;
25
+ CUMO_NA_DATA_OWNED(na) = TRUE;
25
26
  }
26
27
  break;
27
28
  case CUMO_NARRAY_VIEW_T:
@@ -79,10 +79,10 @@ void cumo_<%=type_name%>_mean_kernel_launch(uint64_t n, char *p1, ssize_t s1, ch
79
79
  {
80
80
  ssize_t s1_idx = s1 / sizeof(dtype);
81
81
  thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
82
- thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
83
- if (s1_idx == 1) {
84
- cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_end, (dtype*)p2, n);
82
+ if (s1_idx == 1 || n == 1) {
83
+ cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_begin + n, (dtype*)p2, n);
85
84
  } else {
85
+ thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
86
86
  cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
87
87
  cumo_<%=type_name%>_mean_kernel<<<1,1>>>(range.begin(), range.end(), (dtype*)p2, n);
88
88
  }
@@ -92,10 +92,10 @@ void cumo_<%=type_name%>_var_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
92
92
  {
93
93
  ssize_t s1_idx = s1 / sizeof(dtype);
94
94
  thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
95
- thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
96
- if (s1_idx == 1) {
97
- cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_end, (rtype*)p2);
95
+ if (s1_idx == 1 || n == 1) {
96
+ cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_begin + n, (rtype*)p2);
98
97
  } else {
98
+ thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
99
99
  cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
100
100
  cumo_<%=type_name%>_var_kernel<<<1,1>>>(range.begin(), range.end(), (rtype*)p2);
101
101
  }
@@ -105,10 +105,10 @@ void cumo_<%=type_name%>_stddev_kernel_launch(uint64_t n, char *p1, ssize_t s1,
105
105
  {
106
106
  ssize_t s1_idx = s1 / sizeof(dtype);
107
107
  thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
108
- thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
109
- if (s1_idx == 1) {
110
- cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_end, (rtype*)p2);
108
+ if (s1_idx == 1 || n == 1) {
109
+ cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_begin + n, (rtype*)p2);
111
110
  } else {
111
+ thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
112
112
  cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
113
113
  cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(range.begin(), range.end(), (rtype*)p2);
114
114
  }
@@ -118,10 +118,10 @@ void cumo_<%=type_name%>_rms_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
118
118
  {
119
119
  ssize_t s1_idx = s1 / sizeof(dtype);
120
120
  thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
121
- thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
122
- if (s1_idx == 1) {
123
- cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_end, (rtype*)p2, n);
121
+ if (s1_idx == 1 || n == 1) {
122
+ cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_begin + n, (rtype*)p2, n);
124
123
  } else {
124
+ thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
125
125
  cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
126
126
  cumo_<%=type_name%>_rms_kernel<<<1,1>>>(range.begin(), range.end(), (rtype*)p2, n);
127
127
  }
@@ -57,10 +57,10 @@ void cumo_<%=type_name%>_mean_kernel_launch(uint64_t n, char *p1, ssize_t s1, ch
57
57
  {
58
58
  ssize_t s1_idx = s1 / sizeof(dtype);
59
59
  thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
60
- thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
61
- if (s1_idx == 1) {
62
- cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2, n);
60
+ if (s1_idx == 1 || n == 1) {
61
+ cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2, n);
63
62
  } else {
63
+ thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
64
64
  cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
65
65
  cumo_<%=type_name%>_mean_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2, n);
66
66
  }
@@ -70,10 +70,10 @@ void cumo_<%=type_name%>_var_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
70
70
  {
71
71
  ssize_t s1_idx = s1 / sizeof(dtype);
72
72
  thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
73
- thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
74
- if (s1_idx == 1) {
75
- cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2);
73
+ if (s1_idx == 1 || n == 1) {
74
+ cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2);
76
75
  } else {
76
+ thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
77
77
  cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
78
78
  cumo_<%=type_name%>_var_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2);
79
79
  }
@@ -83,10 +83,10 @@ void cumo_<%=type_name%>_stddev_kernel_launch(uint64_t n, char *p1, ssize_t s1,
83
83
  {
84
84
  ssize_t s1_idx = s1 / sizeof(dtype);
85
85
  thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
86
- thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
87
- if (s1_idx == 1) {
88
- cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2);
86
+ if (s1_idx == 1 || n == 1) {
87
+ cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2);
89
88
  } else {
89
+ thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
90
90
  cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
91
91
  cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2);
92
92
  }
@@ -96,10 +96,10 @@ void cumo_<%=type_name%>_rms_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
96
96
  {
97
97
  ssize_t s1_idx = s1 / sizeof(dtype);
98
98
  thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
99
- thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
100
- if (s1_idx == 1) {
101
- cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2, n);
99
+ if (s1_idx == 1 || n == 1) {
100
+ cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2, n);
102
101
  } else {
102
+ thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
103
103
  cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
104
104
  cumo_<%=type_name%>_rms_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2, n);
105
105
  }
@@ -12,6 +12,7 @@ static VALUE
12
12
  if (na->size > 0 && ptr == NULL) {
13
13
  ptr = cumo_cuda_runtime_malloc(((na->size-1)/8/sizeof(CUMO_BIT_DIGIT)+1)*sizeof(CUMO_BIT_DIGIT));
14
14
  CUMO_NA_DATA_PTR(na) = ptr;
15
+ CUMO_NA_DATA_OWNED(na) = TRUE;
15
16
  }
16
17
  break;
17
18
  case CUMO_NARRAY_VIEW_T:
@@ -34,6 +34,7 @@ static ID cumo_id_count_false_cpu;
34
34
  static ID cumo_id_axis;
35
35
  static ID cumo_id_nan;
36
36
  static ID cumo_id_keepdims;
37
+ static ID cumo_id_source;
37
38
 
38
39
  VALUE cumo_sym_reduce;
39
40
  VALUE cumo_sym_option;
@@ -577,6 +578,87 @@ cumo_na_s_eye(int argc, VALUE *argv, VALUE klass)
577
578
  #define READ 1
578
579
  #define WRITE 2
579
580
 
581
+ static void
582
+ cumo_na_set_pointer(VALUE self, char *ptr, size_t byte_size)
583
+ {
584
+ VALUE obj;
585
+ cumo_narray_t *na;
586
+
587
+ if (OBJ_FROZEN(self)) {
588
+ rb_raise(rb_eRuntimeError, "cannot write to frozen NArray.");
589
+ }
590
+
591
+ CumoGetNArray(self,na);
592
+
593
+ switch(CUMO_NA_TYPE(na)) {
594
+ case CUMO_NARRAY_DATA_T:
595
+ if (CUMO_NA_SIZE(na) > 0) {
596
+ if (CUMO_NA_DATA_PTR(na) != NULL && CUMO_NA_DATA_OWNED(na)) {
597
+ xfree(CUMO_NA_DATA_PTR(na));
598
+ }
599
+ CUMO_NA_DATA_PTR(na) = ptr;
600
+ CUMO_NA_DATA_OWNED(na) = FALSE;
601
+ }
602
+ return;
603
+ case CUMO_NARRAY_VIEW_T:
604
+ obj = CUMO_NA_VIEW_DATA(na);
605
+ if (OBJ_FROZEN(obj)) {
606
+ rb_raise(rb_eRuntimeError, "cannot write to frozen NArray.");
607
+ }
608
+ CumoGetNArray(obj,na);
609
+ switch(CUMO_NA_TYPE(na)) {
610
+ case CUMO_NARRAY_DATA_T:
611
+ if (CUMO_NA_SIZE(na) > 0) {
612
+ if (CUMO_NA_DATA_PTR(na) != NULL && CUMO_NA_DATA_OWNED(na)) {
613
+ xfree(CUMO_NA_DATA_PTR(na));
614
+ }
615
+ CUMO_NA_DATA_PTR(na) = ptr;
616
+ CUMO_NA_DATA_OWNED(na) = FALSE;
617
+ }
618
+ return;
619
+ default:
620
+ rb_raise(rb_eRuntimeError,"invalid NA_TYPE of view: %d",CUMO_NA_TYPE(na));
621
+ }
622
+ default:
623
+ rb_raise(rb_eRuntimeError,"invalid NA_TYPE: %d",CUMO_NA_TYPE(na));
624
+ }
625
+ }
626
+
627
+ static void
628
+ cumo_na_pointer_copy_on_write(VALUE self)
629
+ {
630
+ cumo_narray_t *na;
631
+ void *ptr;
632
+ VALUE velmsz;
633
+ size_t byte_size;
634
+
635
+ CumoGetNArray(self,na);
636
+ if (CUMO_NA_TYPE(na) == CUMO_NARRAY_VIEW_T) {
637
+ self = CUMO_NA_VIEW_DATA(na);
638
+ CumoGetNArray(self,na);
639
+ }
640
+
641
+ ptr = CUMO_NA_DATA_PTR(na);
642
+ if (ptr == NULL) {
643
+ return;
644
+ }
645
+
646
+ if (CUMO_NA_DATA_OWNED(na)) {
647
+ return;
648
+ }
649
+
650
+ velmsz = rb_const_get(rb_obj_class(self), cumo_id_element_byte_size);
651
+ if (FIXNUM_P(velmsz)) {
652
+ byte_size = CUMO_NA_SIZE(na) * NUM2SIZET(velmsz);
653
+ } else {
654
+ byte_size = ceil(CUMO_NA_SIZE(na) * NUM2DBL(velmsz));
655
+ }
656
+ CUMO_NA_DATA_PTR(na) = NULL;
657
+ rb_funcall(self, cumo_id_allocate, 0);
658
+ memcpy(CUMO_NA_DATA_PTR(na), ptr, byte_size);
659
+ rb_ivar_set(self, cumo_id_source, Qnil);
660
+ }
661
+
580
662
  static char *
581
663
  cumo_na_get_pointer_for_rw(VALUE self, int flag)
582
664
  {
@@ -592,6 +674,9 @@ cumo_na_get_pointer_for_rw(VALUE self, int flag)
592
674
 
593
675
  switch(CUMO_NA_TYPE(na)) {
594
676
  case CUMO_NARRAY_DATA_T:
677
+ if (flag & WRITE) {
678
+ cumo_na_pointer_copy_on_write(self);
679
+ }
595
680
  ptr = CUMO_NA_DATA_PTR(na);
596
681
  if (CUMO_NA_SIZE(na) > 0 && ptr == NULL) {
597
682
  if (flag & READ) {
@@ -608,6 +693,9 @@ cumo_na_get_pointer_for_rw(VALUE self, int flag)
608
693
  if ((flag & WRITE) && OBJ_FROZEN(obj)) {
609
694
  rb_raise(rb_eRuntimeError, "cannot write to frozen NArray.");
610
695
  }
696
+ if (flag & WRITE) {
697
+ cumo_na_pointer_copy_on_write(self);
698
+ }
611
699
  CumoGetNArray(obj,na);
612
700
  switch(CUMO_NA_TYPE(na)) {
613
701
  case CUMO_NARRAY_DATA_T:
@@ -1313,7 +1401,6 @@ static VALUE
1313
1401
  cumo_na_store_binary(int argc, VALUE *argv, VALUE self)
1314
1402
  {
1315
1403
  size_t size, str_len, byte_size, offset;
1316
- char *ptr;
1317
1404
  int narg;
1318
1405
  VALUE vstr, voffset;
1319
1406
  VALUE velmsz;
@@ -1343,8 +1430,13 @@ cumo_na_store_binary(int argc, VALUE *argv, VALUE self)
1343
1430
  rb_raise(rb_eArgError, "string is too short to store");
1344
1431
  }
1345
1432
 
1346
- ptr = cumo_na_get_pointer_for_write(self);
1347
- memcpy(ptr, RSTRING_PTR(vstr)+offset, byte_size);
1433
+ if (OBJ_FROZEN(vstr)) {
1434
+ cumo_na_set_pointer(self, RSTRING_PTR(vstr)+offset, byte_size);
1435
+ rb_ivar_set(self, cumo_id_source, vstr);
1436
+ } else {
1437
+ void *ptr = cumo_na_get_pointer_for_write(self);
1438
+ memcpy(ptr, RSTRING_PTR(vstr)+offset, byte_size);
1439
+ }
1348
1440
 
1349
1441
  return SIZET2NUM(byte_size);
1350
1442
  }
@@ -1456,6 +1548,7 @@ cumo_na_marshal_load(VALUE self, VALUE a)
1456
1548
  ptr = cumo_na_get_pointer_for_write(self);
1457
1549
  memcpy(ptr, RARRAY_PTR(v), CUMO_NA_SIZE(na)*sizeof(VALUE));
1458
1550
  } else {
1551
+ rb_str_freeze(v);
1459
1552
  cumo_na_store_binary(1,&v,self);
1460
1553
  if (CUMO_TEST_BYTE_SWAPPED(self)) {
1461
1554
  rb_funcall(cumo_na_inplace(self),cumo_id_to_host,0);
@@ -2023,6 +2116,7 @@ Init_cumo_narray()
2023
2116
  cumo_id_axis = rb_intern("axis");
2024
2117
  cumo_id_nan = rb_intern("nan");
2025
2118
  cumo_id_keepdims = rb_intern("keepdims");
2119
+ cumo_id_source = rb_intern("source");
2026
2120
 
2027
2121
  cumo_sym_reduce = ID2SYM(rb_intern("reduce"));
2028
2122
  cumo_sym_option = ID2SYM(rb_intern("option"));
data/test/narray_test.rb CHANGED
@@ -736,10 +736,90 @@ class NArrayTest < Test::Unit::TestCase
736
736
  at.at([0, 1], [0, 1]).inplace - 1
737
737
  assert { at == [[0, 2, 3], [4, 4, 6]] }
738
738
  end
739
+
740
+ sub_test_case "#{dtype}.from_binary" do
741
+ test "frozen string" do
742
+ shape = [2, 5]
743
+ a = dtype.new(*shape)
744
+ a.rand(0, 10)
745
+ original_data = a.to_binary
746
+ data = original_data.dup.freeze
747
+ restored_a = dtype.from_binary(data, shape)
748
+ assert { restored_a == a }
749
+ restored_a[0, 0] += 1
750
+ assert { restored_a != a }
751
+ assert { data == original_data }
752
+ end
753
+
754
+ test "not frozen string" do
755
+ shape = [2, 5]
756
+ a = dtype.new(*shape)
757
+ a.rand(0, 10)
758
+ original_data = a.to_binary
759
+ data = original_data.dup
760
+ restored_a = dtype.from_binary(data, shape)
761
+ assert { restored_a == a }
762
+ restored_a[0, 0] += 1
763
+ assert { restored_a != a }
764
+ assert { data == original_data }
765
+ end
766
+ end
767
+
768
+ sub_test_case "#{dtype}#store_binary" do
769
+ test "frozen string" do
770
+ shape = [2, 5]
771
+ a = dtype.new(*shape)
772
+ a.rand(0, 10)
773
+ original_data = a.to_binary
774
+ data = original_data.dup.freeze
775
+ restored_a = dtype.new(*shape)
776
+ restored_a.store_binary(data)
777
+ assert { restored_a == a }
778
+ restored_a[0, 0] += 1
779
+ assert { restored_a != a }
780
+ assert { data == original_data }
781
+ end
782
+
783
+ test "not frozen string" do
784
+ shape = [2, 5]
785
+ a = dtype.new(*shape)
786
+ a.rand(0, 10)
787
+ original_data = a.to_binary
788
+ data = original_data.dup
789
+ restored_a = dtype.new(*shape)
790
+ restored_a.store_binary(data)
791
+ assert { restored_a == a }
792
+ restored_a[0, 0] += 1
793
+ assert { restored_a != a }
794
+ assert { data == original_data }
795
+ end
796
+ end
739
797
  end
740
798
 
741
799
  test "Cumo::DFloat.cast(Cumo::RObject[1, nil, 3])" do
742
800
  assert_equal(Cumo::DFloat[1, Float::NAN, 3].format_to_a,
743
801
  Cumo::DFloat.cast(Cumo::RObject[1, nil, 3]).format_to_a)
744
802
  end
803
+
804
+ test "single element array" do
805
+ assert { Cumo::SFloat[1].mean == 1.0 }
806
+ assert { Cumo::DFloat[1].mean == 1.0 }
807
+ assert { Cumo::SComplex[1].mean == 1.0 }
808
+ assert { Cumo::DComplex[1].mean == 1.0 }
809
+
810
+ assert { Cumo::SFloat[1].var.to_f.nan? }
811
+ assert { Cumo::DFloat[1].var.to_f.nan? }
812
+ assert { Cumo::SComplex[1].var.to_f.nan? }
813
+ assert { Cumo::DComplex[1].var.to_f.nan? }
814
+
815
+ assert { Cumo::SFloat[1].stddev.to_f.nan? }
816
+ assert { Cumo::DFloat[1].stddev.to_f.nan? }
817
+ assert { Cumo::SComplex[1].stddev.to_f.nan? }
818
+ assert { Cumo::DComplex[1].stddev.to_f.nan? }
819
+
820
+ assert { Cumo::SFloat[1].rms == 1.0 }
821
+ assert { Cumo::DFloat[1].rms == 1.0 }
822
+ assert { Cumo::SComplex[1].rms == 1.0 }
823
+ assert { Cumo::DComplex[1].rms == 1.0 }
824
+ end
745
825
  end
data/test/ractor_test.rb CHANGED
@@ -10,7 +10,7 @@ class NArrayRactorTest < CumoTestBase
10
10
  dtype = data.fetch(:dtype)
11
11
  ary = random_array(dtype)
12
12
  r = Ractor.new(ary) { |x| x }
13
- ary2 = r.take
13
+ ary2 = r.respond_to?(:take) ? r.take : r.value
14
14
  assert_equal(ary, ary2)
15
15
  assert_not_same(ary, ary2)
16
16
  end
@@ -22,7 +22,7 @@ class NArrayRactorTest < CumoTestBase
22
22
  r = Ractor.new(ary1) do |ary2|
23
23
  [ary2, ary2 * 10]
24
24
  end
25
- ary2, res = r.take
25
+ ary2, res = r.respond_to?(:take) ? r.take : r.value
26
26
  assert_equal((dtype != Cumo::RObject),
27
27
  ary1.equal?(ary2))
28
28
  assert_equal(ary1 * 10, res)
@@ -37,7 +37,9 @@ class NArrayRactorTest < CumoTestBase
37
37
  r2 = Ractor.new(ary1) do |ary4|
38
38
  ary4 * 10
39
39
  end
40
- assert_equal(r1.take, r2.take)
40
+ result1 = r1.respond_to?(:take) ? r1.take : r1.value
41
+ result2 = r2.respond_to?(:take) ? r2.take : r2.value
42
+ assert_equal(result1, result2)
41
43
  end
42
44
 
43
45
  def random_array(dtype, n=1000)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cumo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
@@ -331,7 +331,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
331
331
  - !ruby/object:Gem::Version
332
332
  version: '0'
333
333
  requirements: []
334
- rubygems_version: 3.6.9
334
+ rubygems_version: 4.0.4
335
335
  specification_version: 4
336
336
  summary: Cumo is CUDA aware numerical library whose interface is highly compatible
337
337
  with Ruby Numo