cumo 0.4.3 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +15 -0
  4. data/.rubocop_todo.yml +1252 -0
  5. data/3rd_party/mkmf-cu/Gemfile +2 -0
  6. data/3rd_party/mkmf-cu/Rakefile +2 -1
  7. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
  8. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +43 -7
  9. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
  11. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
  12. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
  13. data/CHANGELOG.md +85 -0
  14. data/Dockerfile +34 -0
  15. data/Gemfile +6 -1
  16. data/README.md +2 -10
  17. data/Rakefile +8 -11
  18. data/bench/broadcast_fp32.rb +28 -26
  19. data/bench/cumo_bench.rb +18 -16
  20. data/bench/numo_bench.rb +18 -16
  21. data/bench/reduction_fp32.rb +14 -12
  22. data/bin/console +1 -0
  23. data/cumo.gemspec +6 -9
  24. data/docker-build.sh +4 -0
  25. data/docker-launch.sh +4 -0
  26. data/docs/src-tree.md +1 -1
  27. data/ext/cumo/cuda/cudnn.c +2 -2
  28. data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
  29. data/ext/cumo/cuda/driver.c +8 -0
  30. data/ext/cumo/cumo.c +7 -3
  31. data/ext/cumo/depend.erb +15 -13
  32. data/ext/cumo/extconf.rb +33 -47
  33. data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
  34. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +13 -6
  35. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
  36. data/ext/cumo/include/cumo/intern.h +1 -0
  37. data/ext/cumo/include/cumo/narray.h +13 -1
  38. data/ext/cumo/include/cumo/template.h +2 -4
  39. data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
  40. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
  41. data/ext/cumo/include/cumo/types/float_macro.h +2 -2
  42. data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
  43. data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
  44. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
  45. data/ext/cumo/include/cumo.h +2 -2
  46. data/ext/cumo/narray/array.c +8 -6
  47. data/ext/cumo/narray/data.c +48 -28
  48. data/ext/cumo/narray/gen/cogen.rb +8 -7
  49. data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
  50. data/ext/cumo/narray/gen/def/bit.rb +3 -1
  51. data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
  52. data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
  53. data/ext/cumo/narray/gen/def/int16.rb +2 -0
  54. data/ext/cumo/narray/gen/def/int32.rb +2 -0
  55. data/ext/cumo/narray/gen/def/int64.rb +2 -0
  56. data/ext/cumo/narray/gen/def/int8.rb +2 -0
  57. data/ext/cumo/narray/gen/def/robject.rb +2 -0
  58. data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
  59. data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
  60. data/ext/cumo/narray/gen/def/uint16.rb +2 -0
  61. data/ext/cumo/narray/gen/def/uint32.rb +2 -0
  62. data/ext/cumo/narray/gen/def/uint64.rb +2 -0
  63. data/ext/cumo/narray/gen/def/uint8.rb +2 -0
  64. data/ext/cumo/narray/gen/erbln.rb +9 -7
  65. data/ext/cumo/narray/gen/erbpp2.rb +26 -24
  66. data/ext/cumo/narray/gen/narray_def.rb +13 -11
  67. data/ext/cumo/narray/gen/spec.rb +58 -55
  68. data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
  69. data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
  70. data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
  71. data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
  72. data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
  73. data/ext/cumo/narray/gen/tmpl/at.c +34 -0
  74. data/ext/cumo/narray/gen/tmpl/batch_norm.c +5 -2
  75. data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +6 -3
  76. data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
  77. data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
  78. data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
  79. data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
  80. data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
  81. data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
  82. data/ext/cumo/narray/gen/tmpl/each.c +4 -2
  83. data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
  84. data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +5 -2
  85. data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
  86. data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
  87. data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
  88. data/ext/cumo/narray/gen/tmpl/median.c +2 -2
  89. data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
  90. data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
  91. data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
  92. data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
  93. data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
  94. data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
  95. data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
  96. data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
  97. data/ext/cumo/narray/gen/tmpl/sort.c +3 -3
  98. data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
  99. data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
  100. data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
  101. data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
  102. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
  103. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
  104. data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
  105. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
  106. data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
  107. data/ext/cumo/narray/index.c +244 -40
  108. data/ext/cumo/narray/index_kernel.cu +84 -0
  109. data/ext/cumo/narray/narray.c +57 -19
  110. data/ext/cumo/narray/ndloop.c +1 -1
  111. data/ext/cumo/narray/struct.c +1 -1
  112. data/lib/cumo/cuda/compile_error.rb +1 -1
  113. data/lib/cumo/cuda/compiler.rb +23 -22
  114. data/lib/cumo/cuda/cudnn.rb +1 -1
  115. data/lib/cumo/cuda/device.rb +1 -1
  116. data/lib/cumo/cuda/link_state.rb +2 -2
  117. data/lib/cumo/cuda/module.rb +1 -2
  118. data/lib/cumo/cuda/nvrtc_program.rb +3 -2
  119. data/lib/cumo/cuda.rb +2 -0
  120. data/lib/cumo/linalg.rb +2 -0
  121. data/lib/cumo/narray/extra.rb +297 -341
  122. data/lib/cumo/narray.rb +2 -0
  123. data/lib/cumo.rb +3 -1
  124. data/test/bit_test.rb +157 -0
  125. data/test/cuda/compiler_test.rb +69 -0
  126. data/test/cuda/device_test.rb +31 -0
  127. data/test/cuda/memory_pool_test.rb +45 -0
  128. data/test/cuda/nvrtc_test.rb +51 -0
  129. data/test/cuda/runtime_test.rb +28 -0
  130. data/test/cudnn_test.rb +498 -0
  131. data/test/cumo_test.rb +27 -0
  132. data/test/narray_test.rb +745 -0
  133. data/test/ractor_test.rb +52 -0
  134. data/test/test_helper.rb +31 -0
  135. metadata +34 -54
  136. data/.travis.yml +0 -5
  137. data/numo-narray-version +0 -1
data/ext/cumo/depend.erb CHANGED
@@ -1,3 +1,5 @@
1
+ MAKEFLAGS = <%= ENV.fetch('MAKEFLAGS', "-j#{Etc.nprocessors}") %>
2
+
1
3
  TAGSRC = \
2
4
  ../../ruby/include/ruby/*.h \
3
5
  ../../ruby/*.c \
@@ -11,17 +13,17 @@ tags : TAGS
11
13
  TAGS : $(TAGSRC)
12
14
  etags $(TAGSRC)
13
15
 
14
- C_TMPL = <%=Dir.glob("narray/gen/tmpl*/*.c").join(" ")%>
15
- CU_TMPL = <%=Dir.glob("narray/gen/tmpl*/*.cu").join(" ")%>
16
+ C_TMPL = <%=Dir.glob("#{__dir__}/narray/gen/tmpl*/*.c").join(" ")%>
17
+ CU_TMPL = <%=Dir.glob("#{__dir__}/narray/gen/tmpl*/*.cu").join(" ")%>
16
18
 
17
- C_COGEN = narray/gen/cogen.rb
18
- CU_COGEN = narray/gen/cogen_kernel.rb
19
- C_DEPENDS = $(C_TMPL) narray/gen/*.rb
20
- CU_DEPENDS = $(CU_TMPL) narray/gen/*.rb
19
+ C_COGEN = <%= __dir__ %>/narray/gen/cogen.rb
20
+ CU_COGEN = <%= __dir__ %>/narray/gen/cogen_kernel.rb
21
+ C_DEPENDS = $(C_TMPL) <%= __dir__ %>/narray/gen/*.rb
22
+ CU_DEPENDS = $(CU_TMPL) <%= __dir__ %>/narray/gen/*.rb
21
23
 
22
24
  <%
23
25
  list_type_c = []
24
- list_type_rb = Dir.glob("narray/gen/def/*.rb")
26
+ list_type_rb = Dir.glob("#{__dir__}/narray/gen/def/*.rb")
25
27
  list_type_rb.each do |type_rb|
26
28
  type_name = File.basename(type_rb, ".rb")
27
29
  next if ENV['DTYPE'] and !type_name.downcase.include?(ENV['DTYPE'].downcase)
@@ -34,7 +36,7 @@ list_type_rb.each do |type_rb|
34
36
 
35
37
  <%
36
38
  list_type_cu = []
37
- list_type_rb = Dir.glob("narray/gen/def/*.rb")
39
+ list_type_rb = Dir.glob("#{__dir__}/narray/gen/def/*.rb")
38
40
  list_type_rb.each do |type_rb|
39
41
  type_name = File.basename(type_rb, ".rb")
40
42
  next if ENV['DTYPE'] and !type_name.downcase.include?(ENV['DTYPE'].downcase)
@@ -47,12 +49,12 @@ list_type_rb.each do |type_rb|
47
49
 
48
50
  src : <%= list_type_cu.join(" ") %> <%= list_type_c.join(" ") %>
49
51
 
50
- build-ctest : cuda/memory_pool_impl_test.exe
52
+ build-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
51
53
 
52
- run-ctest : cuda/memory_pool_impl_test.exe
54
+ run-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
53
55
  ./$<
54
56
 
55
- cuda/memory_pool_impl_test.exe: cuda/memory_pool_impl_test.cpp cuda/memory_pool_impl.cpp cuda/memory_pool_impl.hpp
56
- nvcc -std=c++14 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< cuda/memory_pool_impl.cpp
57
+ <%= __dir__ %>/cuda/memory_pool_impl_test.exe: <%= __dir__ %>/cuda/memory_pool_impl_test.cpp <%= __dir__ %>/cuda/memory_pool_impl.cpp <%= __dir__ %>/cuda/memory_pool_impl.hpp
58
+ nvcc -std=c++17 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< <%= __dir__ %>/cuda/memory_pool_impl.cpp
57
59
 
58
- CLEANOBJS = *.o */*.o */*/*.o *.bak narray/types/*.c narray/types/*_kernel.cu *.exe */*.exe
60
+ CLEANOBJS = <%= __dir__ %>/*.o <%= __dir__ %>/*/*.o <%= __dir__ %>/*/*/*.o <%= __dir__ %>/*.bak <%= __dir__ %>/narray/types/*.c <%= __dir__ %>/narray/types/*_kernel.cu <%= __dir__ %>/*.exe <%= __dir__ %>/*/*.exe
data/ext/cumo/extconf.rb CHANGED
@@ -1,39 +1,19 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rbconfig.rb'
4
+ require 'fileutils'
2
5
  require "erb"
6
+ require 'etc'
3
7
  require_relative '../../3rd_party/mkmf-cu/lib/mkmf-cu'
4
8
 
5
- if RUBY_VERSION < "2.0.0"
6
- puts "Cumo::NArray requires Ruby version 2.0 or later."
7
- exit(1)
8
- end
9
-
10
- def have_numo_narray!
11
- version_path = File.join(__dir__, "..", "..", "numo-narray-version")
12
- version = File.read(version_path).strip
13
- gem_spec = Gem::Specification.find_by_name("numo-narray", version)
14
-
15
- $INCFLAGS += " -I#{gem_spec.gem_dir}/ext/numo/narray"
16
- if !have_header("numo/narray.h")
17
- puts "
18
- Header numo/narray.h was not found. Give pathname as follows:
19
- % ruby extconf.rb --with-narray-include=narray_h_dir"
20
- exit(1)
21
- end
22
-
23
- if RUBY_PLATFORM =~ /cygwin|mingw/
24
- $LDFLAGS += " -L#{gem_spec.gem_dir}/ext/numo"
25
- unless have_library("narray","nary_new")
26
- puts "libnarray.a not found"
27
- exit(1)
28
- end
29
- end
9
+ def d(file)
10
+ File.join(__dir__, file)
30
11
  end
31
12
 
32
13
  def create_depend
33
14
  message "creating depend\n"
34
- depend_path = File.join(__dir__, "depend")
35
- File.open(depend_path, "w") do |depend|
36
- depend_erb_path = File.join(__dir__, "depend.erb")
15
+ File.open(d("depend"), "w") do |depend|
16
+ depend_erb_path = d("depend.erb")
37
17
  File.open(depend_erb_path, "r") do |depend_erb|
38
18
  erb = ERB.new(depend_erb.read)
39
19
  erb.filename = depend_erb_path
@@ -42,20 +22,20 @@ def create_depend
42
22
  end
43
23
  end
44
24
 
45
- rm_f 'include/cumo/extconf.h'
25
+ rm_f d('include/cumo/extconf.h')
46
26
 
47
27
  MakeMakefileCuda.install!(cxx: true)
48
28
 
49
29
  if ENV['DEBUG']
50
30
  $CFLAGS << " -g -O0 -Wall"
51
31
  end
52
- $CXXFLAGS << " -std=c++14"
32
+ $CXXFLAGS << " -std=c++17"
53
33
  #$CFLAGS=" $(cflags) -O3 -m64 -msse2 -funroll-loops"
54
34
  #$CFLAGS=" $(cflags) -O3"
55
- $INCFLAGS = "-Iinclude -Inarray -Icuda #{$INCFLAGS}"
35
+ $INCFLAGS = "-I$(srcdir)/include -I$(srcdir)/narray -I$(srcdir)/cuda #{$INCFLAGS}"
56
36
 
57
- $INSTALLFILES = Dir.glob(%w[include/cumo/*.h include/cumo/types/*.h include/cumo/cuda/*.h]).map{|x| [x,'$(archdir)'] }
58
- $INSTALLFILES << ['include/cumo/extconf.h','$(archdir)']
37
+ $INSTALLFILES = Dir.glob(%w[include/cumo/*.h include/cumo/types/*.h include/cumo/cuda/*.h]).map { |x| [x, '$(archdir)'] }
38
+ $INSTALLFILES << ['include/cumo/extconf.h', '$(archdir)']
59
39
  if /cygwin|mingw/ =~ RUBY_PLATFORM
60
40
  $INSTALLFILES << ['libcumo.a', '$(archdir)']
61
41
  end
@@ -113,17 +93,10 @@ cuda/cudnn
113
93
  cuda/cudnn_impl
114
94
  )
115
95
 
116
- if RUBY_VERSION[0..3] == "2.1."
117
- puts "add kwargs"
118
- srcs << "kwargs"
119
- end
120
-
121
- $objs = srcs.map {|src| "#{src}.o" }
96
+ $objs = srcs.map { |src| "#{src}.o" }
122
97
 
123
98
  dir_config("narray")
124
99
 
125
- have_numo_narray!
126
-
127
100
  if have_header("dlfcn.h")
128
101
  exit(1) unless have_library("dl")
129
102
  exit(1) unless have_func("dlopen")
@@ -147,14 +120,14 @@ end
147
120
 
148
121
  have_type("bool", stdbool)
149
122
  unless have_type("u_int8_t", stdint)
150
- have_type("uint8_t",stdint)
123
+ have_type("uint8_t", stdint)
151
124
  end
152
125
  unless have_type("u_int16_t", stdint)
153
- have_type("uint16_t",stdint)
126
+ have_type("uint16_t", stdint)
154
127
  end
155
128
  have_type("int32_t", stdint)
156
129
  unless have_type("u_int32_t", stdint)
157
- have_type("uint32_t",stdint)
130
+ have_type("uint32_t", stdint)
158
131
  end
159
132
  have_type("int64_t", stdint)
160
133
  unless have_type("u_int64_t", stdint)
@@ -162,17 +135,22 @@ unless have_type("u_int64_t", stdint)
162
135
  end
163
136
  have_func("exp10")
164
137
  have_func("rb_arithmetic_sequence_extract")
138
+ have_func("RTYPEDDATA_GET_DATA")
165
139
 
166
140
  have_var("rb_cComplex")
167
141
  have_func("rb_thread_call_without_gvl")
168
142
 
169
- create_header('include/cumo/extconf.h')
143
+ create_header d('include/cumo/extconf.h')
170
144
  $extconf_h = nil # nvcc does not support #include RUBY_EXTCONF_H
171
145
 
146
+ # Create *.o directories
147
+ FileUtils.mkdir_p('narray')
148
+ FileUtils.mkdir_p('cuda')
149
+
172
150
  create_depend
173
151
 
174
- HEADER_DIRS = (ENV['CPATH'] || '').split(':')
175
- LIB_DIRS = (ENV['LIBRARY_PATH'] || '').split(':')
152
+ HEADER_DIRS = (ENV['CPATH'] || '').split(File::PATH_SEPARATOR)
153
+ LIB_DIRS = (ENV['LIBRARY_PATH'] || '').split(File::PATH_SEPARATOR)
176
154
  dir_config('cumo', HEADER_DIRS, LIB_DIRS)
177
155
 
178
156
  have_library('cuda')
@@ -186,4 +164,12 @@ if have_library('cudnn') # TODO(sonots): cuDNN version check
186
164
  $CXXFLAGS << " -DCUDNN_FOUND"
187
165
  end
188
166
 
167
+ have_library('stdc++')
168
+
189
169
  create_makefile('cumo')
170
+
171
+ begin
172
+ require 'extconf_compile_commands_json'
173
+ ExtconfCompileCommandsJson.generate!
174
+ rescue LoadError
175
+ end
@@ -14,9 +14,11 @@ extern "C" {
14
14
  #endif
15
15
  #endif
16
16
 
17
+ extern VALUE cumo_cuda_eCUDNNError;
18
+
17
19
  #ifdef CUDNN_FOUND
18
20
 
19
- VALUE cumo_na_eShapeError;
21
+ extern VALUE cumo_na_eShapeError;
20
22
 
21
23
  #define CUMO_CUDA_CUDNN_DEFAULT_MAX_WORKSPACE_SIZE 8 * 1024 * 1024
22
24
 
@@ -28,8 +28,10 @@ class cumo_thrust_strided_range
28
28
 
29
29
  typedef typename thrust::iterator_difference<Iterator>::type difference_type;
30
30
 
31
- struct stride_functor : public thrust::unary_function<difference_type,difference_type>
31
+ struct stride_functor
32
32
  {
33
+ using argument_type = difference_type;
34
+ using result_type = difference_type;
33
35
  difference_type stride;
34
36
 
35
37
  stride_functor(difference_type stride)
@@ -86,8 +88,10 @@ struct cumo_thrust_minmax_pair
86
88
  // returns a cumo_thrust_minmax_pair whose minimum and maximum values
87
89
  // are initialized to x.
88
90
  template <typename T>
89
- struct cumo_thrust_minmax_unary_op : public thrust::unary_function< T, cumo_thrust_minmax_pair<T> >
91
+ struct cumo_thrust_minmax_unary_op
90
92
  {
93
+ using argument_type = T;
94
+ using result_type = cumo_thrust_minmax_pair<T>;
91
95
  __host__ __device__ cumo_thrust_minmax_pair<T> operator()(const T& x) const
92
96
  {
93
97
  cumo_thrust_minmax_pair<T> result;
@@ -102,8 +106,11 @@ struct cumo_thrust_minmax_unary_op : public thrust::unary_function< T, cumo_thru
102
106
  // maximum values are the min() and max() respectively of
103
107
  // the minimums and maximums of the input pairs
104
108
  template <typename T>
105
- struct cumo_thrust_minmax_binary_op : public thrust::binary_function< cumo_thrust_minmax_pair<T>, cumo_thrust_minmax_pair<T>, cumo_thrust_minmax_pair<T> >
109
+ struct cumo_thrust_minmax_binary_op
106
110
  {
111
+ using first_argument_type = cumo_thrust_minmax_pair<T>;
112
+ using second_argument_type = cumo_thrust_minmax_pair<T>;
113
+ using result_type = cumo_thrust_minmax_pair<T>;
107
114
  __host__ __device__ cumo_thrust_minmax_pair<T> operator()(const cumo_thrust_minmax_pair<T>& x, const cumo_thrust_minmax_pair<T>& y) const
108
115
  {
109
116
  cumo_thrust_minmax_pair<T> result;
@@ -157,10 +164,10 @@ struct cumo_thrust_variance_unary_op
157
164
  // all values that have been agregated so far
158
165
  template <typename T>
159
166
  struct cumo_thrust_variance_binary_op
160
- : public thrust::binary_function<const cumo_thrust_variance_data<T>&,
161
- const cumo_thrust_variance_data<T>&,
162
- cumo_thrust_variance_data<T> >
163
167
  {
168
+ using first_argument_type = const cumo_thrust_variance_data<T>&;
169
+ using second_argument_type = const cumo_thrust_variance_data<T>&;
170
+ using result_type = cumo_thrust_variance_data<T>;
164
171
  __host__ __device__
165
172
  cumo_thrust_variance_data<T> operator()(const cumo_thrust_variance_data<T>& x, const cumo_thrust_variance_data <T>& y) const
166
173
  {
@@ -49,10 +49,10 @@ struct cumo_thrust_complex_variance_unary_op
49
49
  // all values that have been agregated so far
50
50
  template <typename T, typename R>
51
51
  struct cumo_thrust_complex_variance_binary_op
52
- : public thrust::binary_function<const cumo_thrust_complex_variance_data<T,R>&,
53
- const cumo_thrust_complex_variance_data<T,R>&,
54
- cumo_thrust_complex_variance_data<T,R> >
55
52
  {
53
+ using first_argument_type = const cumo_thrust_complex_variance_data<T,R>&;
54
+ using second_argument_type = const cumo_thrust_complex_variance_data<T,R>&;
55
+ using result_type = cumo_thrust_complex_variance_data<T,R>;
56
56
  __host__ __device__
57
57
  cumo_thrust_complex_variance_data<T,R> operator()(const cumo_thrust_complex_variance_data<T,R>& x, const cumo_thrust_complex_variance_data<T,R>& y) const
58
58
  {
@@ -79,6 +79,7 @@ void cumo_na_parse_enumerator_step(VALUE enum_obj, VALUE *pstep);
79
79
  // used in aref, aset
80
80
  int cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx);
81
81
  VALUE cumo_na_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
82
+ VALUE cumo_na_at_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
82
83
 
83
84
  // defined in array, used in math
84
85
  VALUE cumo_na_ary_composition_dtype(VALUE ary);
@@ -141,7 +141,7 @@ extern "C" {
141
141
  # endif
142
142
  #endif
143
143
 
144
- #if SIZEOF_VALUE > 4
144
+ #if SIZEOF_LONG > 4
145
145
  # undef INT322NUM
146
146
  # undef UINT322NUM
147
147
  # define INT322NUM(x) INT2FIX(x)
@@ -329,6 +329,12 @@ _cumo_na_get_narray_t(VALUE obj, unsigned char cumo_na_type)
329
329
  #define CUMO_RNARRAY_VIEW(val) ((cumo_narray_view_t*)DATA_PTR(val))
330
330
  #define CUMO_RNARRAY_FILEMAP(val) ((cumo_narray_filemap_t*)DATA_PTR(val))
331
331
 
332
+ #ifdef HAVE_RTYPEDDATA_GET_DATA
333
+ #define CUMO_RENUMERATOR_PTR(ptr) ((cumo_enumerator_t *)RTYPEDDATA_GET_DATA(ptr))
334
+ #else
335
+ #define CUMO_RENUMERATOR_PTR(ptr) ((cumo_enumerator_t *)DATA_PTR(ptr))
336
+ #endif
337
+
332
338
  #define CUMO_RNARRAY_NDIM(val) (CUMO_RNARRAY(val)->ndim)
333
339
  #define CUMO_RNARRAY_TYPE(val) (CUMO_RNARRAY(val)->type)
334
340
  #define CUMO_RNARRAY_FLAG(val) (CUMO_RNARRAY(val)->flag)
@@ -483,6 +489,12 @@ typedef unsigned int CUMO_BIT_DIGIT;
483
489
  #include "cumo/ndloop.h"
484
490
  #include "cumo/intern.h"
485
491
 
492
+ // for Ractor support code
493
+ #ifndef HAVE_RB_EXT_RACTOR_SAFE
494
+ # undef RUBY_TYPED_FROZEN_SHAREABLE
495
+ # define RUBY_TYPED_FROZEN_SHAREABLE 0
496
+ #endif
497
+
486
498
  #if defined(__cplusplus)
487
499
  #if 0
488
500
  { /* satisfy cc-mode */
@@ -112,9 +112,8 @@
112
112
  size_t dig = (pos) / CUMO_NB; \
113
113
  int bit = (pos) % CUMO_NB; \
114
114
  ((CUMO_BIT_DIGIT*)(adr))[dig] = \
115
- (((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | ((val)<<(bit)); \
115
+ (((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | (((val)&1u)<<(bit)); \
116
116
  }
117
- // val -> val&1 ??
118
117
 
119
118
  #define CUMO_STORE_BIT_STEP( adr, pos, step, idx, val )\
120
119
  { \
@@ -129,9 +128,8 @@
129
128
  pos += step; \
130
129
  } \
131
130
  ((CUMO_BIT_DIGIT*)(adr))[dig] = \
132
- (((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | ((val)<<(bit)); \
131
+ (((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | (((val)&1u)<<(bit)); \
133
132
  }
134
- // val -> val&1 ??
135
133
 
136
134
  static inline int
137
135
  cumo_is_aligned(const void *ptr, const size_t alignment)
@@ -117,7 +117,7 @@ static inline dtype c_from_dcomplex(cumo_dcomplex x) {
117
117
  #define m_acosh(x) c_acosh(x)
118
118
  #define m_atanh(x) c_atanh(x)
119
119
  #define m_hypot(x,y) c_hypot(x,y)
120
- #define m_sinc(x) c_div(c_sin(x),x)
120
+ #define m_sinc(x) ((REAL(x)==0 && IMAG(x)==0) ? (c_new(1,0)):(c_div(c_sin(x),x)))
121
121
 
122
122
  #define m_sum_init INT2FIX(0)
123
123
  #define m_mulsum_init INT2FIX(0)
@@ -157,18 +157,27 @@ __host__ __device__ static inline dtype f_seq(dtype x, dtype y, double c)
157
157
  /* --------- thrust ----------------- */
158
158
  #include "cumo/cuda/cumo_thrust_complex.hpp"
159
159
 
160
- struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
160
+ struct cumo_thrust_plus
161
161
  {
162
+ using first_argument_type = dtype;
163
+ using second_argument_type = dtype;
164
+ using result_type = dtype;
162
165
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
163
166
  };
164
167
 
165
- struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
168
+ struct cumo_thrust_multiplies
166
169
  {
170
+ using first_argument_type = dtype;
171
+ using second_argument_type = dtype;
172
+ using result_type = dtype;
167
173
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
168
174
  };
169
175
 
170
- struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype, dtype, dtype>
176
+ struct cumo_thrust_multiplies_mulsum_nan
171
177
  {
178
+ using first_argument_type = dtype;
179
+ using second_argument_type = dtype;
180
+ using result_type = dtype;
172
181
  __host__ __device__ dtype operator()(dtype x, dtype y) {
173
182
  if (not_nan(x) && not_nan(y)) {
174
183
  return m_mul(x, y);
@@ -178,8 +187,10 @@ struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype,
178
187
  }
179
188
  };
180
189
 
181
- struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
190
+ struct cumo_thrust_square
182
191
  {
192
+ using argument_type = dtype;
193
+ using result_type = dtype;
183
194
  __host__ __device__ rtype operator()(const dtype& x) const { return c_abs_square(x); }
184
195
  };
185
196
 
@@ -12,7 +12,7 @@ extern double pow(double, double);
12
12
  #define m_zero 0.0
13
13
  #define m_one 1.0
14
14
 
15
- #define m_num_to_data(x) NUM2DBL(x)
15
+ #define m_num_to_data(x) (NIL_P(x) ? nan("") : NUM2DBL(x))
16
16
  #define m_data_to_num(x) rb_float_new(x)
17
17
 
18
18
  #define m_from_double(x) (x)
@@ -110,7 +110,7 @@ extern double pow(double, double);
110
110
  #define m_atanh(x) atanh(x)
111
111
  #define m_atan2(x,y) atan2(x,y)
112
112
  #define m_hypot(x,y) hypot(x,y)
113
- #define m_sinc(x) (sin(x)/(x))
113
+ #define m_sinc(x) (((x)==0) ? 1.0:(sin(x)/(x)))
114
114
 
115
115
  #define m_erf(x) erf(x)
116
116
  #define m_erfc(x) erfc(x)
@@ -72,18 +72,27 @@ __host__ __device__ static inline dtype f_minimum_nan(dtype x, dtype y)
72
72
  /* --------- thrust ----------------- */
73
73
  #include "cumo/cuda/cumo_thrust.hpp"
74
74
 
75
- struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
75
+ struct cumo_thrust_plus
76
76
  {
77
+ using first_argument_type = dtype;
78
+ using second_argument_type = dtype;
79
+ using result_type = dtype;
77
80
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
78
81
  };
79
82
 
80
- struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
83
+ struct cumo_thrust_multiplies
81
84
  {
85
+ using first_argument_type = dtype;
86
+ using second_argument_type = dtype;
87
+ using result_type = dtype;
82
88
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
83
89
  };
84
90
 
85
- struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype, dtype, dtype>
91
+ struct cumo_thrust_multiplies_mulsum_nan
86
92
  {
93
+ using first_argument_type = dtype;
94
+ using second_argument_type = dtype;
95
+ using result_type = dtype;
87
96
  __host__ __device__ dtype operator()(dtype x, dtype y) {
88
97
  if (not_nan(x) && not_nan(y)) {
89
98
  return m_mul(x, y);
@@ -93,8 +102,10 @@ struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype,
93
102
  }
94
103
  };
95
104
 
96
- struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
105
+ struct cumo_thrust_square
97
106
  {
107
+ using argument_type = dtype;
108
+ using result_type = dtype;
98
109
  __host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
99
110
  };
100
111
 
@@ -1,8 +1,9 @@
1
1
  #define m_zero 0
2
2
  #define m_one 1
3
3
 
4
- #define m_from_double(x) (x)
5
- #define m_from_real(x) (x)
4
+ /* Handle negative values consistently across platforms for unsigned integer types */
5
+ #define m_from_double(x) ((x) < 0 ? (dtype)((long long)(x)) : (dtype)(x))
6
+ #define m_from_real(x) ((x) < 0 ? (dtype)((long long)(x)) : (dtype)(x))
6
7
  #define m_from_sint(x) (x)
7
8
  #define m_from_int32(x) (x)
8
9
  #define m_from_int64(x) (x)
@@ -70,18 +70,26 @@ __host__ __device__ static inline dtype f_minimum(dtype x, dtype y)
70
70
  /* --------- thrust ----------------- */
71
71
  #include "cumo/cuda/cumo_thrust.hpp"
72
72
 
73
- struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
73
+ struct cumo_thrust_plus
74
74
  {
75
+ using first_argument_type = dtype;
76
+ using second_argument_type = dtype;
77
+ using result_type = dtype;
75
78
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
76
79
  };
77
80
 
78
- struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
81
+ struct cumo_thrust_multiplies
79
82
  {
83
+ using first_argument_type = dtype;
84
+ using second_argument_type = dtype;
85
+ using result_type = dtype;
80
86
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
81
87
  };
82
88
 
83
- struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
89
+ struct cumo_thrust_square
84
90
  {
91
+ using argument_type = dtype;
92
+ using result_type = dtype;
85
93
  __host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
86
94
  };
87
95
 
@@ -10,8 +10,8 @@ extern "C" {
10
10
  #endif
11
11
  #endif
12
12
 
13
- #define CUMO_VERSION "0.4.3"
14
- #define CUMO_VERSION_CODE 43
13
+ #define CUMO_VERSION "0.5.1"
14
+ #define CUMO_VERSION_CODE 51
15
15
 
16
16
  bool cumo_compatible_mode_enabled_p();
17
17
  bool cumo_show_warning_enabled_p();
@@ -366,7 +366,7 @@ cumo_na_composition3_ary(VALUE ary, VALUE *ptype, VALUE *pshape, VALUE *pnary)
366
366
  VALUE dtype, dshape;
367
367
 
368
368
  mdai = cumo_na_mdai_alloc(ary);
369
- vmdai = TypedData_Wrap_Struct(rb_cData, &mdai_data_type, (void*)mdai);
369
+ vmdai = TypedData_Wrap_Struct(rb_cObject, &mdai_data_type, (void*)mdai);
370
370
  if ( cumo_na_mdai_investigate(mdai, 1) ) {
371
371
  // empty
372
372
  dtype = update_type(ptype, cumo_cInt32);
@@ -466,11 +466,13 @@ cumo_na_s_array_shape(VALUE mod, VALUE ary)
466
466
  @return [Cumo::NArray]
467
467
  @example
468
468
  Cumo::NArray.new_like([[1,2,3],[4,5,6]])
469
- => Cumo::Int32#shape=[2,3](empty)
469
+ # => Cumo::Int32#shape=[2,3](empty)
470
+
470
471
  Cumo::DFloat.new_like([[1,2],[3,4]])
471
- => Cumo::DFloat#shape=[2,2](empty)
472
+ # => Cumo::DFloat#shape=[2,2](empty)
473
+
472
474
  Cumo::NArray.new_like([1,2i,3])
473
- => Cumo::DComplex#shape=[3](empty)
475
+ # => Cumo::DComplex#shape=[3](empty)
474
476
  */
475
477
  VALUE
476
478
  cumo_na_s_new_like(VALUE type, VALUE obj)
@@ -612,7 +614,7 @@ cumo_na_ary_composition_for_struct(VALUE nstruct, VALUE ary)
612
614
 
613
615
  mdai = cumo_na_mdai_alloc(ary);
614
616
  mdai->na_type = nstruct;
615
- vmdai = TypedData_Wrap_Struct(rb_cData, &mdai_data_type, (void*)mdai);
617
+ vmdai = TypedData_Wrap_Struct(rb_cObject, &mdai_data_type, (void*)mdai);
616
618
  cumo_na_mdai_for_struct(mdai, 0);
617
619
  nc = cumo_na_compose_alloc();
618
620
  vnc = WrapCompose(nc);
@@ -626,7 +628,7 @@ cumo_na_ary_composition_for_struct(VALUE nstruct, VALUE ary)
626
628
 
627
629
 
628
630
  void
629
- Init_cumo_na_array()
631
+ Init_cumo_na_array(void)
630
632
  {
631
633
  rb_define_singleton_method(cNArray, "array_shape", cumo_na_s_array_shape, 1);
632
634
  rb_define_singleton_method(cNArray, "array_type", cumo_na_s_array_type, 1);