RubyGems - cumo - Versions diffs - 0.4.3 → 0.5.1 - Mend

cumo 0.4.3 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

checksums.yaml +4 -4
data/.gitignore +3 -0
data/.rubocop.yml +15 -0
data/.rubocop_todo.yml +1252 -0
data/3rd_party/mkmf-cu/Gemfile +2 -0
data/3rd_party/mkmf-cu/Rakefile +2 -1
data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +43 -7
data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
data/CHANGELOG.md +85 -0
data/Dockerfile +34 -0
data/Gemfile +6 -1
data/README.md +2 -10
data/Rakefile +8 -11
data/bench/broadcast_fp32.rb +28 -26
data/bench/cumo_bench.rb +18 -16
data/bench/numo_bench.rb +18 -16
data/bench/reduction_fp32.rb +14 -12
data/bin/console +1 -0
data/cumo.gemspec +6 -9
data/docker-build.sh +4 -0
data/docker-launch.sh +4 -0
data/docs/src-tree.md +1 -1
data/ext/cumo/cuda/cudnn.c +2 -2
data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
data/ext/cumo/cuda/driver.c +8 -0
data/ext/cumo/cumo.c +7 -3
data/ext/cumo/depend.erb +15 -13
data/ext/cumo/extconf.rb +33 -47
data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +13 -6
data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
data/ext/cumo/include/cumo/intern.h +1 -0
data/ext/cumo/include/cumo/narray.h +13 -1
data/ext/cumo/include/cumo/template.h +2 -4
data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
data/ext/cumo/include/cumo/types/float_macro.h +2 -2
data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
data/ext/cumo/include/cumo.h +2 -2
data/ext/cumo/narray/array.c +8 -6
data/ext/cumo/narray/data.c +48 -28
data/ext/cumo/narray/gen/cogen.rb +8 -7
data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
data/ext/cumo/narray/gen/def/bit.rb +3 -1
data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
data/ext/cumo/narray/gen/def/int16.rb +2 -0
data/ext/cumo/narray/gen/def/int32.rb +2 -0
data/ext/cumo/narray/gen/def/int64.rb +2 -0
data/ext/cumo/narray/gen/def/int8.rb +2 -0
data/ext/cumo/narray/gen/def/robject.rb +2 -0
data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
data/ext/cumo/narray/gen/def/uint16.rb +2 -0
data/ext/cumo/narray/gen/def/uint32.rb +2 -0
data/ext/cumo/narray/gen/def/uint64.rb +2 -0
data/ext/cumo/narray/gen/def/uint8.rb +2 -0
data/ext/cumo/narray/gen/erbln.rb +9 -7
data/ext/cumo/narray/gen/erbpp2.rb +26 -24
data/ext/cumo/narray/gen/narray_def.rb +13 -11
data/ext/cumo/narray/gen/spec.rb +58 -55
data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
data/ext/cumo/narray/gen/tmpl/at.c +34 -0
data/ext/cumo/narray/gen/tmpl/batch_norm.c +5 -2
data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +6 -3
data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
data/ext/cumo/narray/gen/tmpl/each.c +4 -2
data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +5 -2
data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
data/ext/cumo/narray/gen/tmpl/median.c +2 -2
data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
data/ext/cumo/narray/gen/tmpl/sort.c +3 -3
data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
data/ext/cumo/narray/index.c +244 -40
data/ext/cumo/narray/index_kernel.cu +84 -0
data/ext/cumo/narray/narray.c +57 -19
data/ext/cumo/narray/ndloop.c +1 -1
data/ext/cumo/narray/struct.c +1 -1
data/lib/cumo/cuda/compile_error.rb +1 -1
data/lib/cumo/cuda/compiler.rb +23 -22
data/lib/cumo/cuda/cudnn.rb +1 -1
data/lib/cumo/cuda/device.rb +1 -1
data/lib/cumo/cuda/link_state.rb +2 -2
data/lib/cumo/cuda/module.rb +1 -2
data/lib/cumo/cuda/nvrtc_program.rb +3 -2
data/lib/cumo/cuda.rb +2 -0
data/lib/cumo/linalg.rb +2 -0
data/lib/cumo/narray/extra.rb +297 -341
data/lib/cumo/narray.rb +2 -0
data/lib/cumo.rb +3 -1
data/test/bit_test.rb +157 -0
data/test/cuda/compiler_test.rb +69 -0
data/test/cuda/device_test.rb +31 -0
data/test/cuda/memory_pool_test.rb +45 -0
data/test/cuda/nvrtc_test.rb +51 -0
data/test/cuda/runtime_test.rb +28 -0
data/test/cudnn_test.rb +498 -0
data/test/cumo_test.rb +27 -0
data/test/narray_test.rb +745 -0
data/test/ractor_test.rb +52 -0
data/test/test_helper.rb +31 -0
metadata +34 -54
data/.travis.yml +0 -5
data/numo-narray-version +0 -1

data/ext/cumo/depend.erb CHANGED Viewed

@@ -1,3 +1,5 @@
+MAKEFLAGS = <%= ENV.fetch('MAKEFLAGS', "-j#{Etc.nprocessors}") %>
 TAGSRC = \
  ../../ruby/include/ruby/*.h \
  ../../ruby/*.c \
@@ -11,17 +13,17 @@ tags : TAGS
 TAGS : $(TAGSRC)
 	etags $(TAGSRC)
-C_TMPL = <%=Dir.glob("narray/gen/tmpl*/*.c").join(" ")%>
-CU_TMPL = <%=Dir.glob("narray/gen/tmpl*/*.cu").join(" ")%>
+C_TMPL = <%=Dir.glob("#{__dir__}/narray/gen/tmpl*/*.c").join(" ")%>
+CU_TMPL = <%=Dir.glob("#{__dir__}/narray/gen/tmpl*/*.cu").join(" ")%>
-C_COGEN = narray/gen/cogen.rb
-CU_COGEN = narray/gen/cogen_kernel.rb
-C_DEPENDS = $(C_TMPL) narray/gen/*.rb
-CU_DEPENDS = $(CU_TMPL) narray/gen/*.rb
+C_COGEN = <%= __dir__ %>/narray/gen/cogen.rb
+CU_COGEN = <%= __dir__ %>/narray/gen/cogen_kernel.rb
+C_DEPENDS = $(C_TMPL) <%= __dir__ %>/narray/gen/*.rb
+CU_DEPENDS = $(CU_TMPL) <%= __dir__ %>/narray/gen/*.rb
 <%
 list_type_c = []
-list_type_rb = Dir.glob("narray/gen/def/*.rb")
+list_type_rb = Dir.glob("#{__dir__}/narray/gen/def/*.rb")
 list_type_rb.each do |type_rb|
   type_name = File.basename(type_rb, ".rb")
   next if ENV['DTYPE'] and !type_name.downcase.include?(ENV['DTYPE'].downcase)
@@ -34,7 +36,7 @@ list_type_rb.each do |type_rb|
 <%
 list_type_cu = []
-list_type_rb = Dir.glob("narray/gen/def/*.rb")
+list_type_rb = Dir.glob("#{__dir__}/narray/gen/def/*.rb")
 list_type_rb.each do |type_rb|
   type_name = File.basename(type_rb, ".rb")
   next if ENV['DTYPE'] and !type_name.downcase.include?(ENV['DTYPE'].downcase)
@@ -47,12 +49,12 @@ list_type_rb.each do |type_rb|
 src : <%= list_type_cu.join(" ") %> <%= list_type_c.join(" ") %>
-build-ctest : cuda/memory_pool_impl_test.exe
+build-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
-run-ctest : cuda/memory_pool_impl_test.exe
+run-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
 	./$<
-cuda/memory_pool_impl_test.exe: cuda/memory_pool_impl_test.cpp cuda/memory_pool_impl.cpp cuda/memory_pool_impl.hpp
-	nvcc -std=c++14 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< cuda/memory_pool_impl.cpp
+<%= __dir__ %>/cuda/memory_pool_impl_test.exe: <%= __dir__ %>/cuda/memory_pool_impl_test.cpp <%= __dir__ %>/cuda/memory_pool_impl.cpp <%= __dir__ %>/cuda/memory_pool_impl.hpp
+	nvcc -std=c++17 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< <%= __dir__ %>/cuda/memory_pool_impl.cpp
-CLEANOBJS = *.o */*.o */*/*.o *.bak narray/types/*.c narray/types/*_kernel.cu *.exe */*.exe
+CLEANOBJS = <%= __dir__ %>/*.o <%= __dir__ %>/*/*.o <%= __dir__ %>/*/*/*.o <%= __dir__ %>/*.bak <%= __dir__ %>/narray/types/*.c <%= __dir__ %>/narray/types/*_kernel.cu <%= __dir__ %>/*.exe <%= __dir__ %>/*/*.exe

data/ext/cumo/extconf.rb CHANGED Viewed

@@ -1,39 +1,19 @@
+# frozen_string_literal: true
 require 'rbconfig.rb'
+require 'fileutils'
 require "erb"
+require 'etc'
 require_relative '../../3rd_party/mkmf-cu/lib/mkmf-cu'
-if RUBY_VERSION < "2.0.0"
-  puts "Cumo::NArray requires Ruby version 2.0 or later."
-  exit(1)
-end
-def have_numo_narray!
-  version_path = File.join(__dir__, "..", "..", "numo-narray-version")
-  version = File.read(version_path).strip
-  gem_spec = Gem::Specification.find_by_name("numo-narray", version)
-  $INCFLAGS += " -I#{gem_spec.gem_dir}/ext/numo/narray"
-  if !have_header("numo/narray.h")
-    puts "
-    Header numo/narray.h was not found. Give pathname as follows:
-    % ruby extconf.rb --with-narray-include=narray_h_dir"
-    exit(1)
-  end
-  if RUBY_PLATFORM =~ /cygwin|mingw/
-    $LDFLAGS += " -L#{gem_spec.gem_dir}/ext/numo"
-    unless have_library("narray","nary_new")
-      puts "libnarray.a not found"
-      exit(1)
-    end
-  end
+def d(file)
+  File.join(__dir__, file)
 end
 def create_depend
   message "creating depend\n"
-  depend_path = File.join(__dir__, "depend")
-  File.open(depend_path, "w") do |depend|
-    depend_erb_path = File.join(__dir__, "depend.erb")
+  File.open(d("depend"), "w") do |depend|
+    depend_erb_path = d("depend.erb")
     File.open(depend_erb_path, "r") do |depend_erb|
       erb = ERB.new(depend_erb.read)
       erb.filename = depend_erb_path
@@ -42,20 +22,20 @@ def create_depend
   end
 end
-rm_f 'include/cumo/extconf.h'
+rm_f d('include/cumo/extconf.h')
 MakeMakefileCuda.install!(cxx: true)
 if ENV['DEBUG']
   $CFLAGS << " -g -O0 -Wall"
 end
-$CXXFLAGS << " -std=c++14"
+$CXXFLAGS << " -std=c++17"
 #$CFLAGS=" $(cflags) -O3 -m64 -msse2 -funroll-loops"
 #$CFLAGS=" $(cflags) -O3"
-$INCFLAGS = "-Iinclude -Inarray -Icuda #{$INCFLAGS}"
+$INCFLAGS = "-I$(srcdir)/include -I$(srcdir)/narray -I$(srcdir)/cuda #{$INCFLAGS}"
-$INSTALLFILES = Dir.glob(%w[include/cumo/*.h include/cumo/types/*.h include/cumo/cuda/*.h]).map{|x| [x,'$(archdir)'] }
-$INSTALLFILES << ['include/cumo/extconf.h','$(archdir)']
+$INSTALLFILES = Dir.glob(%w[include/cumo/*.h include/cumo/types/*.h include/cumo/cuda/*.h]).map { |x| [x, '$(archdir)'] }
+$INSTALLFILES << ['include/cumo/extconf.h', '$(archdir)']
 if /cygwin|mingw/ =~ RUBY_PLATFORM
   $INSTALLFILES << ['libcumo.a', '$(archdir)']
 end
@@ -113,17 +93,10 @@ cuda/cudnn
 cuda/cudnn_impl
 )
-if RUBY_VERSION[0..3] == "2.1."
-  puts "add kwargs"
-  srcs << "kwargs"
-end
-$objs = srcs.map {|src| "#{src}.o" }
+$objs = srcs.map { |src| "#{src}.o" }
 dir_config("narray")
-have_numo_narray!
 if have_header("dlfcn.h")
   exit(1) unless have_library("dl")
   exit(1) unless have_func("dlopen")
@@ -147,14 +120,14 @@ end
 have_type("bool", stdbool)
 unless have_type("u_int8_t", stdint)
-  have_type("uint8_t",stdint)
+  have_type("uint8_t", stdint)
 end
 unless have_type("u_int16_t", stdint)
-  have_type("uint16_t",stdint)
+  have_type("uint16_t", stdint)
 end
 have_type("int32_t", stdint)
 unless have_type("u_int32_t", stdint)
-  have_type("uint32_t",stdint)
+  have_type("uint32_t", stdint)
 end
 have_type("int64_t", stdint)
 unless have_type("u_int64_t", stdint)
@@ -162,17 +135,22 @@ unless have_type("u_int64_t", stdint)
 end
 have_func("exp10")
 have_func("rb_arithmetic_sequence_extract")
+have_func("RTYPEDDATA_GET_DATA")
 have_var("rb_cComplex")
 have_func("rb_thread_call_without_gvl")
-create_header('include/cumo/extconf.h')
+create_header d('include/cumo/extconf.h')
 $extconf_h = nil # nvcc does not support #include RUBY_EXTCONF_H
+# Create *.o directories
+FileUtils.mkdir_p('narray')
+FileUtils.mkdir_p('cuda')
 create_depend
-HEADER_DIRS = (ENV['CPATH'] || '').split(':')
-LIB_DIRS = (ENV['LIBRARY_PATH'] || '').split(':')
+HEADER_DIRS = (ENV['CPATH'] || '').split(File::PATH_SEPARATOR)
+LIB_DIRS = (ENV['LIBRARY_PATH'] || '').split(File::PATH_SEPARATOR)
 dir_config('cumo', HEADER_DIRS, LIB_DIRS)
 have_library('cuda')
@@ -186,4 +164,12 @@ if have_library('cudnn') # TODO(sonots): cuDNN version check
   $CXXFLAGS << " -DCUDNN_FOUND"
 end
+have_library('stdc++')
 create_makefile('cumo')
+begin
+  require 'extconf_compile_commands_json'
+  ExtconfCompileCommandsJson.generate!
+rescue LoadError
+end

data/ext/cumo/include/cumo/cuda/cudnn.h CHANGED Viewed

@@ -14,9 +14,11 @@ extern "C" {
 #endif
 #endif
+extern VALUE cumo_cuda_eCUDNNError;
 #ifdef CUDNN_FOUND
-VALUE cumo_na_eShapeError;
+extern VALUE cumo_na_eShapeError;
 #define CUMO_CUDA_CUDNN_DEFAULT_MAX_WORKSPACE_SIZE 8 * 1024 * 1024

data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp CHANGED Viewed

@@ -28,8 +28,10 @@ class cumo_thrust_strided_range
     typedef typename thrust::iterator_difference<Iterator>::type difference_type;
-    struct stride_functor : public thrust::unary_function<difference_type,difference_type>
+    struct stride_functor
     {
+        using argument_type = difference_type;
+        using result_type   = difference_type;
         difference_type stride;
         stride_functor(difference_type stride)
@@ -86,8 +88,10 @@ struct cumo_thrust_minmax_pair
 // returns a cumo_thrust_minmax_pair whose minimum and maximum values
 // are initialized to x.
 template <typename T>
-struct cumo_thrust_minmax_unary_op : public thrust::unary_function< T, cumo_thrust_minmax_pair<T> >
+struct cumo_thrust_minmax_unary_op
 {
+    using argument_type = T;
+    using result_type   = cumo_thrust_minmax_pair<T>;
     __host__ __device__ cumo_thrust_minmax_pair<T> operator()(const T& x) const
     {
         cumo_thrust_minmax_pair<T> result;
@@ -102,8 +106,11 @@ struct cumo_thrust_minmax_unary_op : public thrust::unary_function< T, cumo_thru
 // maximum values are the min() and max() respectively of
 // the minimums and maximums of the input pairs
 template <typename T>
-struct cumo_thrust_minmax_binary_op : public thrust::binary_function< cumo_thrust_minmax_pair<T>, cumo_thrust_minmax_pair<T>, cumo_thrust_minmax_pair<T> >
+struct cumo_thrust_minmax_binary_op
 {
+    using first_argument_type  = cumo_thrust_minmax_pair<T>;
+    using second_argument_type = cumo_thrust_minmax_pair<T>;
+    using result_type          = cumo_thrust_minmax_pair<T>;
     __host__ __device__ cumo_thrust_minmax_pair<T> operator()(const cumo_thrust_minmax_pair<T>& x, const cumo_thrust_minmax_pair<T>& y) const
     {
         cumo_thrust_minmax_pair<T> result;
@@ -157,10 +164,10 @@ struct cumo_thrust_variance_unary_op
 // all values that have been agregated so far
 template <typename T>
 struct cumo_thrust_variance_binary_op
-    : public thrust::binary_function<const cumo_thrust_variance_data<T>&,
-                                     const cumo_thrust_variance_data<T>&,
-                                           cumo_thrust_variance_data<T> >
 {
+    using first_argument_type  = const cumo_thrust_variance_data<T>&;
+    using second_argument_type = const cumo_thrust_variance_data<T>&;
+    using result_type          = cumo_thrust_variance_data<T>;
     __host__ __device__
     cumo_thrust_variance_data<T> operator()(const cumo_thrust_variance_data<T>& x, const cumo_thrust_variance_data <T>& y) const
     {

data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp CHANGED Viewed

@@ -49,10 +49,10 @@ struct cumo_thrust_complex_variance_unary_op
 // all values that have been agregated so far
 template <typename T, typename R>
 struct cumo_thrust_complex_variance_binary_op
-    : public thrust::binary_function<const cumo_thrust_complex_variance_data<T,R>&,
-                                     const cumo_thrust_complex_variance_data<T,R>&,
-                                           cumo_thrust_complex_variance_data<T,R> >
 {
+    using first_argument_type  = const cumo_thrust_complex_variance_data<T,R>&;
+    using second_argument_type = const cumo_thrust_complex_variance_data<T,R>&;
+    using result_type          = cumo_thrust_complex_variance_data<T,R>;
     __host__ __device__
     cumo_thrust_complex_variance_data<T,R> operator()(const cumo_thrust_complex_variance_data<T,R>& x, const cumo_thrust_complex_variance_data<T,R>& y) const
     {

data/ext/cumo/include/cumo/intern.h CHANGED Viewed

@@ -79,6 +79,7 @@ void cumo_na_parse_enumerator_step(VALUE enum_obj, VALUE *pstep);
 // used in aref, aset
 int cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx);
 VALUE cumo_na_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
+VALUE cumo_na_at_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
 // defined in array, used in math
 VALUE cumo_na_ary_composition_dtype(VALUE ary);

data/ext/cumo/include/cumo/narray.h CHANGED Viewed

@@ -141,7 +141,7 @@ extern "C" {
 # endif
 #endif
-#if SIZEOF_VALUE > 4
+#if SIZEOF_LONG > 4
 # undef INT322NUM
 # undef UINT322NUM
 # define INT322NUM(x) INT2FIX(x)
@@ -329,6 +329,12 @@ _cumo_na_get_narray_t(VALUE obj, unsigned char cumo_na_type)
 #define CUMO_RNARRAY_VIEW(val)       ((cumo_narray_view_t*)DATA_PTR(val))
 #define CUMO_RNARRAY_FILEMAP(val)    ((cumo_narray_filemap_t*)DATA_PTR(val))
+#ifdef HAVE_RTYPEDDATA_GET_DATA
+#define CUMO_RENUMERATOR_PTR(ptr)    ((cumo_enumerator_t *)RTYPEDDATA_GET_DATA(ptr))
+#else
+#define CUMO_RENUMERATOR_PTR(ptr)    ((cumo_enumerator_t *)DATA_PTR(ptr))
+#endif
 #define CUMO_RNARRAY_NDIM(val)       (CUMO_RNARRAY(val)->ndim)
 #define CUMO_RNARRAY_TYPE(val)       (CUMO_RNARRAY(val)->type)
 #define CUMO_RNARRAY_FLAG(val)       (CUMO_RNARRAY(val)->flag)
@@ -483,6 +489,12 @@ typedef unsigned int CUMO_BIT_DIGIT;
 #include "cumo/ndloop.h"
 #include "cumo/intern.h"
+// for Ractor support code
+#ifndef HAVE_RB_EXT_RACTOR_SAFE
+#   undef RUBY_TYPED_FROZEN_SHAREABLE
+#   define RUBY_TYPED_FROZEN_SHAREABLE 0
+#endif
 #if defined(__cplusplus)
 #if 0
 { /* satisfy cc-mode */

data/ext/cumo/include/cumo/template.h CHANGED Viewed

@@ -112,9 +112,8 @@
         size_t dig = (pos) / CUMO_NB;                \
         int    bit = (pos) % CUMO_NB;                \
         ((CUMO_BIT_DIGIT*)(adr))[dig] =              \
-            (((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | ((val)<<(bit)); \
+            (((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | (((val)&1u)<<(bit)); \
     }
-// val -> val&1 ??
 #define CUMO_STORE_BIT_STEP( adr, pos, step, idx, val )\
     {                                           \
@@ -129,9 +128,8 @@
             pos += step;                        \
         }                                       \
         ((CUMO_BIT_DIGIT*)(adr))[dig] =              \
-            (((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | ((val)<<(bit)); \
+            (((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | (((val)&1u)<<(bit)); \
     }
-// val -> val&1 ??
 static inline int
 cumo_is_aligned(const void *ptr, const size_t alignment)

data/ext/cumo/include/cumo/types/complex_macro.h CHANGED Viewed

@@ -117,7 +117,7 @@ static inline dtype c_from_dcomplex(cumo_dcomplex x) {
 #define m_acosh(x)   c_acosh(x)
 #define m_atanh(x)   c_atanh(x)
 #define m_hypot(x,y) c_hypot(x,y)
-#define m_sinc(x)    c_div(c_sin(x),x)
+#define m_sinc(x)    ((REAL(x)==0 && IMAG(x)==0) ? (c_new(1,0)):(c_div(c_sin(x),x)))
 #define m_sum_init INT2FIX(0)
 #define m_mulsum_init INT2FIX(0)

data/ext/cumo/include/cumo/types/complex_macro_kernel.h CHANGED Viewed

@@ -157,18 +157,27 @@ __host__ __device__ static inline dtype f_seq(dtype x, dtype y, double c)
 /* --------- thrust ----------------- */
 #include "cumo/cuda/cumo_thrust_complex.hpp"
-struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
+struct cumo_thrust_plus
 {
+    using first_argument_type  = dtype;
+    using second_argument_type = dtype;
+    using result_type          = dtype;
     __host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
 };
-struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
+struct cumo_thrust_multiplies
 {
+    using first_argument_type  = dtype;
+    using second_argument_type = dtype;
+    using result_type          = dtype;
     __host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
 };
-struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype, dtype, dtype>
+struct cumo_thrust_multiplies_mulsum_nan
 {
+    using first_argument_type  = dtype;
+    using second_argument_type = dtype;
+    using result_type          = dtype;
     __host__ __device__ dtype operator()(dtype x, dtype y) {
         if (not_nan(x) && not_nan(y)) {
             return m_mul(x, y);
@@ -178,8 +187,10 @@ struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype,
     }
 };
-struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
+struct cumo_thrust_square
 {
+    using argument_type = dtype;
+    using result_type   = dtype;
     __host__ __device__ rtype operator()(const dtype& x) const { return c_abs_square(x); }
 };

data/ext/cumo/include/cumo/types/float_macro.h CHANGED Viewed

@@ -12,7 +12,7 @@ extern double pow(double, double);
 #define m_zero 0.0
 #define m_one  1.0
-#define m_num_to_data(x) NUM2DBL(x)
+#define m_num_to_data(x) (NIL_P(x) ? nan("") : NUM2DBL(x))
 #define m_data_to_num(x) rb_float_new(x)
 #define m_from_double(x) (x)
@@ -110,7 +110,7 @@ extern double pow(double, double);
 #define m_atanh(x)   atanh(x)
 #define m_atan2(x,y) atan2(x,y)
 #define m_hypot(x,y) hypot(x,y)
-#define m_sinc(x)    (sin(x)/(x))
+#define m_sinc(x)    (((x)==0) ? 1.0:(sin(x)/(x)))
 #define m_erf(x)     erf(x)
 #define m_erfc(x)    erfc(x)

data/ext/cumo/include/cumo/types/real_accum_kernel.h CHANGED Viewed

@@ -72,18 +72,27 @@ __host__ __device__ static inline dtype f_minimum_nan(dtype x, dtype y)
 /* --------- thrust ----------------- */
 #include "cumo/cuda/cumo_thrust.hpp"
-struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
+struct cumo_thrust_plus
 {
+    using first_argument_type  = dtype;
+    using second_argument_type = dtype;
+    using result_type          = dtype;
     __host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
 };
-struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
+struct cumo_thrust_multiplies
 {
+    using first_argument_type  = dtype;
+    using second_argument_type = dtype;
+    using result_type          = dtype;
     __host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
 };
-struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype, dtype, dtype>
+struct cumo_thrust_multiplies_mulsum_nan
 {
+    using first_argument_type  = dtype;
+    using second_argument_type = dtype;
+    using result_type          = dtype;
     __host__ __device__ dtype operator()(dtype x, dtype y) {
         if (not_nan(x) && not_nan(y)) {
             return m_mul(x, y);
@@ -93,8 +102,10 @@ struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype,
     }
 };
-struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
+struct cumo_thrust_square
 {
+    using argument_type = dtype;
+    using result_type   = dtype;
     __host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
 };

data/ext/cumo/include/cumo/types/xint_macro.h CHANGED Viewed

@@ -1,8 +1,9 @@
 #define m_zero 0
 #define m_one  1
-#define m_from_double(x) (x)
-#define m_from_real(x) (x)
+/* Handle negative values consistently across platforms for unsigned integer types */
+#define m_from_double(x) ((x) < 0 ? (dtype)((long long)(x)) : (dtype)(x))
+#define m_from_real(x) ((x) < 0 ? (dtype)((long long)(x)) : (dtype)(x))
 #define m_from_sint(x) (x)
 #define m_from_int32(x) (x)
 #define m_from_int64(x) (x)

data/ext/cumo/include/cumo/types/xint_macro_kernel.h CHANGED Viewed

@@ -70,18 +70,26 @@ __host__ __device__ static inline dtype f_minimum(dtype x, dtype y)
 /* --------- thrust ----------------- */
 #include "cumo/cuda/cumo_thrust.hpp"
-struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
+struct cumo_thrust_plus
 {
+    using first_argument_type  = dtype;
+    using second_argument_type = dtype;
+    using result_type          = dtype;
     __host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
 };
-struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
+struct cumo_thrust_multiplies
 {
+    using first_argument_type  = dtype;
+    using second_argument_type = dtype;
+    using result_type          = dtype;
     __host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
 };
-struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
+struct cumo_thrust_square
 {
+    using argument_type = dtype;
+    using result_type   = dtype;
     __host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
 };

data/ext/cumo/include/cumo.h CHANGED Viewed

@@ -10,8 +10,8 @@ extern "C" {
 #endif
 #endif
-#define CUMO_VERSION "0.4.3"
-#define CUMO_VERSION_CODE 43
+#define CUMO_VERSION "0.5.1"
+#define CUMO_VERSION_CODE 51
 bool cumo_compatible_mode_enabled_p();
 bool cumo_show_warning_enabled_p();

data/ext/cumo/narray/array.c CHANGED Viewed

@@ -366,7 +366,7 @@ cumo_na_composition3_ary(VALUE ary, VALUE *ptype, VALUE *pshape, VALUE *pnary)
     VALUE dtype, dshape;
     mdai = cumo_na_mdai_alloc(ary);
-    vmdai = TypedData_Wrap_Struct(rb_cData, &mdai_data_type, (void*)mdai);
+    vmdai = TypedData_Wrap_Struct(rb_cObject, &mdai_data_type, (void*)mdai);
     if ( cumo_na_mdai_investigate(mdai, 1) ) {
         // empty
         dtype = update_type(ptype, cumo_cInt32);
@@ -466,11 +466,13 @@ cumo_na_s_array_shape(VALUE mod, VALUE ary)
   @return [Cumo::NArray]
   @example
     Cumo::NArray.new_like([[1,2,3],[4,5,6]])
-    => Cumo::Int32#shape=[2,3](empty)
+    # => Cumo::Int32#shape=[2,3](empty)
     Cumo::DFloat.new_like([[1,2],[3,4]])
-    => Cumo::DFloat#shape=[2,2](empty)
+    # => Cumo::DFloat#shape=[2,2](empty)
     Cumo::NArray.new_like([1,2i,3])
-    => Cumo::DComplex#shape=[3](empty)
+    # => Cumo::DComplex#shape=[3](empty)
 */
 VALUE
 cumo_na_s_new_like(VALUE type, VALUE obj)
@@ -612,7 +614,7 @@ cumo_na_ary_composition_for_struct(VALUE nstruct, VALUE ary)
     mdai = cumo_na_mdai_alloc(ary);
     mdai->na_type = nstruct;
-    vmdai = TypedData_Wrap_Struct(rb_cData, &mdai_data_type, (void*)mdai);
+    vmdai = TypedData_Wrap_Struct(rb_cObject, &mdai_data_type, (void*)mdai);
     cumo_na_mdai_for_struct(mdai, 0);
     nc = cumo_na_compose_alloc();
     vnc = WrapCompose(nc);
@@ -626,7 +628,7 @@ cumo_na_ary_composition_for_struct(VALUE nstruct, VALUE ary)
 void
-Init_cumo_na_array()
+Init_cumo_na_array(void)
 {
     rb_define_singleton_method(cNArray, "array_shape", cumo_na_s_array_shape, 1);
     rb_define_singleton_method(cNArray, "array_type", cumo_na_s_array_type, 1);