cumo 0.4.3 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.rubocop.yml +15 -0
- data/.rubocop_todo.yml +1252 -0
- data/3rd_party/mkmf-cu/Gemfile +2 -0
- data/3rd_party/mkmf-cu/Rakefile +2 -1
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +43 -7
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
- data/CHANGELOG.md +85 -0
- data/Dockerfile +34 -0
- data/Gemfile +6 -1
- data/README.md +2 -10
- data/Rakefile +8 -11
- data/bench/broadcast_fp32.rb +28 -26
- data/bench/cumo_bench.rb +18 -16
- data/bench/numo_bench.rb +18 -16
- data/bench/reduction_fp32.rb +14 -12
- data/bin/console +1 -0
- data/cumo.gemspec +6 -9
- data/docker-build.sh +4 -0
- data/docker-launch.sh +4 -0
- data/docs/src-tree.md +1 -1
- data/ext/cumo/cuda/cudnn.c +2 -2
- data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
- data/ext/cumo/cuda/driver.c +8 -0
- data/ext/cumo/cumo.c +7 -3
- data/ext/cumo/depend.erb +15 -13
- data/ext/cumo/extconf.rb +33 -47
- data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +13 -6
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
- data/ext/cumo/include/cumo/intern.h +1 -0
- data/ext/cumo/include/cumo/narray.h +13 -1
- data/ext/cumo/include/cumo/template.h +2 -4
- data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
- data/ext/cumo/include/cumo/types/float_macro.h +2 -2
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
- data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/narray/array.c +8 -6
- data/ext/cumo/narray/data.c +48 -28
- data/ext/cumo/narray/gen/cogen.rb +8 -7
- data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
- data/ext/cumo/narray/gen/def/bit.rb +3 -1
- data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
- data/ext/cumo/narray/gen/def/int16.rb +2 -0
- data/ext/cumo/narray/gen/def/int32.rb +2 -0
- data/ext/cumo/narray/gen/def/int64.rb +2 -0
- data/ext/cumo/narray/gen/def/int8.rb +2 -0
- data/ext/cumo/narray/gen/def/robject.rb +2 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
- data/ext/cumo/narray/gen/def/uint16.rb +2 -0
- data/ext/cumo/narray/gen/def/uint32.rb +2 -0
- data/ext/cumo/narray/gen/def/uint64.rb +2 -0
- data/ext/cumo/narray/gen/def/uint8.rb +2 -0
- data/ext/cumo/narray/gen/erbln.rb +9 -7
- data/ext/cumo/narray/gen/erbpp2.rb +26 -24
- data/ext/cumo/narray/gen/narray_def.rb +13 -11
- data/ext/cumo/narray/gen/spec.rb +58 -55
- data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
- data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
- data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
- data/ext/cumo/narray/gen/tmpl/at.c +34 -0
- data/ext/cumo/narray/gen/tmpl/batch_norm.c +5 -2
- data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +6 -3
- data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
- data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
- data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
- data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
- data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
- data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
- data/ext/cumo/narray/gen/tmpl/each.c +4 -2
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
- data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +5 -2
- data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
- data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
- data/ext/cumo/narray/gen/tmpl/median.c +2 -2
- data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
- data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
- data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
- data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
- data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
- data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
- data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
- data/ext/cumo/narray/gen/tmpl/sort.c +3 -3
- data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
- data/ext/cumo/narray/index.c +244 -40
- data/ext/cumo/narray/index_kernel.cu +84 -0
- data/ext/cumo/narray/narray.c +57 -19
- data/ext/cumo/narray/ndloop.c +1 -1
- data/ext/cumo/narray/struct.c +1 -1
- data/lib/cumo/cuda/compile_error.rb +1 -1
- data/lib/cumo/cuda/compiler.rb +23 -22
- data/lib/cumo/cuda/cudnn.rb +1 -1
- data/lib/cumo/cuda/device.rb +1 -1
- data/lib/cumo/cuda/link_state.rb +2 -2
- data/lib/cumo/cuda/module.rb +1 -2
- data/lib/cumo/cuda/nvrtc_program.rb +3 -2
- data/lib/cumo/cuda.rb +2 -0
- data/lib/cumo/linalg.rb +2 -0
- data/lib/cumo/narray/extra.rb +297 -341
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo.rb +3 -1
- data/test/bit_test.rb +157 -0
- data/test/cuda/compiler_test.rb +69 -0
- data/test/cuda/device_test.rb +31 -0
- data/test/cuda/memory_pool_test.rb +45 -0
- data/test/cuda/nvrtc_test.rb +51 -0
- data/test/cuda/runtime_test.rb +28 -0
- data/test/cudnn_test.rb +498 -0
- data/test/cumo_test.rb +27 -0
- data/test/narray_test.rb +745 -0
- data/test/ractor_test.rb +52 -0
- data/test/test_helper.rb +31 -0
- metadata +34 -54
- data/.travis.yml +0 -5
- data/numo-narray-version +0 -1
data/ext/cumo/depend.erb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
MAKEFLAGS = <%= ENV.fetch('MAKEFLAGS', "-j#{Etc.nprocessors}") %>
|
|
2
|
+
|
|
1
3
|
TAGSRC = \
|
|
2
4
|
../../ruby/include/ruby/*.h \
|
|
3
5
|
../../ruby/*.c \
|
|
@@ -11,17 +13,17 @@ tags : TAGS
|
|
|
11
13
|
TAGS : $(TAGSRC)
|
|
12
14
|
etags $(TAGSRC)
|
|
13
15
|
|
|
14
|
-
C_TMPL = <%=Dir.glob("narray/gen/tmpl*/*.c").join(" ")%>
|
|
15
|
-
CU_TMPL = <%=Dir.glob("narray/gen/tmpl*/*.cu").join(" ")%>
|
|
16
|
+
C_TMPL = <%=Dir.glob("#{__dir__}/narray/gen/tmpl*/*.c").join(" ")%>
|
|
17
|
+
CU_TMPL = <%=Dir.glob("#{__dir__}/narray/gen/tmpl*/*.cu").join(" ")%>
|
|
16
18
|
|
|
17
|
-
C_COGEN = narray/gen/cogen.rb
|
|
18
|
-
CU_COGEN = narray/gen/cogen_kernel.rb
|
|
19
|
-
C_DEPENDS = $(C_TMPL) narray/gen/*.rb
|
|
20
|
-
CU_DEPENDS = $(CU_TMPL) narray/gen/*.rb
|
|
19
|
+
C_COGEN = <%= __dir__ %>/narray/gen/cogen.rb
|
|
20
|
+
CU_COGEN = <%= __dir__ %>/narray/gen/cogen_kernel.rb
|
|
21
|
+
C_DEPENDS = $(C_TMPL) <%= __dir__ %>/narray/gen/*.rb
|
|
22
|
+
CU_DEPENDS = $(CU_TMPL) <%= __dir__ %>/narray/gen/*.rb
|
|
21
23
|
|
|
22
24
|
<%
|
|
23
25
|
list_type_c = []
|
|
24
|
-
list_type_rb = Dir.glob("narray/gen/def/*.rb")
|
|
26
|
+
list_type_rb = Dir.glob("#{__dir__}/narray/gen/def/*.rb")
|
|
25
27
|
list_type_rb.each do |type_rb|
|
|
26
28
|
type_name = File.basename(type_rb, ".rb")
|
|
27
29
|
next if ENV['DTYPE'] and !type_name.downcase.include?(ENV['DTYPE'].downcase)
|
|
@@ -34,7 +36,7 @@ list_type_rb.each do |type_rb|
|
|
|
34
36
|
|
|
35
37
|
<%
|
|
36
38
|
list_type_cu = []
|
|
37
|
-
list_type_rb = Dir.glob("narray/gen/def/*.rb")
|
|
39
|
+
list_type_rb = Dir.glob("#{__dir__}/narray/gen/def/*.rb")
|
|
38
40
|
list_type_rb.each do |type_rb|
|
|
39
41
|
type_name = File.basename(type_rb, ".rb")
|
|
40
42
|
next if ENV['DTYPE'] and !type_name.downcase.include?(ENV['DTYPE'].downcase)
|
|
@@ -47,12 +49,12 @@ list_type_rb.each do |type_rb|
|
|
|
47
49
|
|
|
48
50
|
src : <%= list_type_cu.join(" ") %> <%= list_type_c.join(" ") %>
|
|
49
51
|
|
|
50
|
-
build-ctest : cuda/memory_pool_impl_test.exe
|
|
52
|
+
build-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
|
|
51
53
|
|
|
52
|
-
run-ctest : cuda/memory_pool_impl_test.exe
|
|
54
|
+
run-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
|
|
53
55
|
./$<
|
|
54
56
|
|
|
55
|
-
cuda/memory_pool_impl_test.exe: cuda/memory_pool_impl_test.cpp cuda/memory_pool_impl.cpp cuda/memory_pool_impl.hpp
|
|
56
|
-
nvcc -std=c++
|
|
57
|
+
<%= __dir__ %>/cuda/memory_pool_impl_test.exe: <%= __dir__ %>/cuda/memory_pool_impl_test.cpp <%= __dir__ %>/cuda/memory_pool_impl.cpp <%= __dir__ %>/cuda/memory_pool_impl.hpp
|
|
58
|
+
nvcc -std=c++17 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< <%= __dir__ %>/cuda/memory_pool_impl.cpp
|
|
57
59
|
|
|
58
|
-
CLEANOBJS =
|
|
60
|
+
CLEANOBJS = <%= __dir__ %>/*.o <%= __dir__ %>/*/*.o <%= __dir__ %>/*/*/*.o <%= __dir__ %>/*.bak <%= __dir__ %>/narray/types/*.c <%= __dir__ %>/narray/types/*_kernel.cu <%= __dir__ %>/*.exe <%= __dir__ %>/*/*.exe
|
data/ext/cumo/extconf.rb
CHANGED
|
@@ -1,39 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'rbconfig.rb'
|
|
4
|
+
require 'fileutils'
|
|
2
5
|
require "erb"
|
|
6
|
+
require 'etc'
|
|
3
7
|
require_relative '../../3rd_party/mkmf-cu/lib/mkmf-cu'
|
|
4
8
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
exit(1)
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
def have_numo_narray!
|
|
11
|
-
version_path = File.join(__dir__, "..", "..", "numo-narray-version")
|
|
12
|
-
version = File.read(version_path).strip
|
|
13
|
-
gem_spec = Gem::Specification.find_by_name("numo-narray", version)
|
|
14
|
-
|
|
15
|
-
$INCFLAGS += " -I#{gem_spec.gem_dir}/ext/numo/narray"
|
|
16
|
-
if !have_header("numo/narray.h")
|
|
17
|
-
puts "
|
|
18
|
-
Header numo/narray.h was not found. Give pathname as follows:
|
|
19
|
-
% ruby extconf.rb --with-narray-include=narray_h_dir"
|
|
20
|
-
exit(1)
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
if RUBY_PLATFORM =~ /cygwin|mingw/
|
|
24
|
-
$LDFLAGS += " -L#{gem_spec.gem_dir}/ext/numo"
|
|
25
|
-
unless have_library("narray","nary_new")
|
|
26
|
-
puts "libnarray.a not found"
|
|
27
|
-
exit(1)
|
|
28
|
-
end
|
|
29
|
-
end
|
|
9
|
+
def d(file)
|
|
10
|
+
File.join(__dir__, file)
|
|
30
11
|
end
|
|
31
12
|
|
|
32
13
|
def create_depend
|
|
33
14
|
message "creating depend\n"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
depend_erb_path = File.join(__dir__, "depend.erb")
|
|
15
|
+
File.open(d("depend"), "w") do |depend|
|
|
16
|
+
depend_erb_path = d("depend.erb")
|
|
37
17
|
File.open(depend_erb_path, "r") do |depend_erb|
|
|
38
18
|
erb = ERB.new(depend_erb.read)
|
|
39
19
|
erb.filename = depend_erb_path
|
|
@@ -42,20 +22,20 @@ def create_depend
|
|
|
42
22
|
end
|
|
43
23
|
end
|
|
44
24
|
|
|
45
|
-
rm_f 'include/cumo/extconf.h'
|
|
25
|
+
rm_f d('include/cumo/extconf.h')
|
|
46
26
|
|
|
47
27
|
MakeMakefileCuda.install!(cxx: true)
|
|
48
28
|
|
|
49
29
|
if ENV['DEBUG']
|
|
50
30
|
$CFLAGS << " -g -O0 -Wall"
|
|
51
31
|
end
|
|
52
|
-
$CXXFLAGS << " -std=c++
|
|
32
|
+
$CXXFLAGS << " -std=c++17"
|
|
53
33
|
#$CFLAGS=" $(cflags) -O3 -m64 -msse2 -funroll-loops"
|
|
54
34
|
#$CFLAGS=" $(cflags) -O3"
|
|
55
|
-
$INCFLAGS = "-
|
|
35
|
+
$INCFLAGS = "-I$(srcdir)/include -I$(srcdir)/narray -I$(srcdir)/cuda #{$INCFLAGS}"
|
|
56
36
|
|
|
57
|
-
$INSTALLFILES = Dir.glob(%w[include/cumo/*.h include/cumo/types/*.h include/cumo/cuda/*.h]).map{|x| [x,'$(archdir)'] }
|
|
58
|
-
$INSTALLFILES << ['include/cumo/extconf.h','$(archdir)']
|
|
37
|
+
$INSTALLFILES = Dir.glob(%w[include/cumo/*.h include/cumo/types/*.h include/cumo/cuda/*.h]).map { |x| [x, '$(archdir)'] }
|
|
38
|
+
$INSTALLFILES << ['include/cumo/extconf.h', '$(archdir)']
|
|
59
39
|
if /cygwin|mingw/ =~ RUBY_PLATFORM
|
|
60
40
|
$INSTALLFILES << ['libcumo.a', '$(archdir)']
|
|
61
41
|
end
|
|
@@ -113,17 +93,10 @@ cuda/cudnn
|
|
|
113
93
|
cuda/cudnn_impl
|
|
114
94
|
)
|
|
115
95
|
|
|
116
|
-
|
|
117
|
-
puts "add kwargs"
|
|
118
|
-
srcs << "kwargs"
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
$objs = srcs.map {|src| "#{src}.o" }
|
|
96
|
+
$objs = srcs.map { |src| "#{src}.o" }
|
|
122
97
|
|
|
123
98
|
dir_config("narray")
|
|
124
99
|
|
|
125
|
-
have_numo_narray!
|
|
126
|
-
|
|
127
100
|
if have_header("dlfcn.h")
|
|
128
101
|
exit(1) unless have_library("dl")
|
|
129
102
|
exit(1) unless have_func("dlopen")
|
|
@@ -147,14 +120,14 @@ end
|
|
|
147
120
|
|
|
148
121
|
have_type("bool", stdbool)
|
|
149
122
|
unless have_type("u_int8_t", stdint)
|
|
150
|
-
have_type("uint8_t",stdint)
|
|
123
|
+
have_type("uint8_t", stdint)
|
|
151
124
|
end
|
|
152
125
|
unless have_type("u_int16_t", stdint)
|
|
153
|
-
have_type("uint16_t",stdint)
|
|
126
|
+
have_type("uint16_t", stdint)
|
|
154
127
|
end
|
|
155
128
|
have_type("int32_t", stdint)
|
|
156
129
|
unless have_type("u_int32_t", stdint)
|
|
157
|
-
have_type("uint32_t",stdint)
|
|
130
|
+
have_type("uint32_t", stdint)
|
|
158
131
|
end
|
|
159
132
|
have_type("int64_t", stdint)
|
|
160
133
|
unless have_type("u_int64_t", stdint)
|
|
@@ -162,17 +135,22 @@ unless have_type("u_int64_t", stdint)
|
|
|
162
135
|
end
|
|
163
136
|
have_func("exp10")
|
|
164
137
|
have_func("rb_arithmetic_sequence_extract")
|
|
138
|
+
have_func("RTYPEDDATA_GET_DATA")
|
|
165
139
|
|
|
166
140
|
have_var("rb_cComplex")
|
|
167
141
|
have_func("rb_thread_call_without_gvl")
|
|
168
142
|
|
|
169
|
-
create_header('include/cumo/extconf.h')
|
|
143
|
+
create_header d('include/cumo/extconf.h')
|
|
170
144
|
$extconf_h = nil # nvcc does not support #include RUBY_EXTCONF_H
|
|
171
145
|
|
|
146
|
+
# Create *.o directories
|
|
147
|
+
FileUtils.mkdir_p('narray')
|
|
148
|
+
FileUtils.mkdir_p('cuda')
|
|
149
|
+
|
|
172
150
|
create_depend
|
|
173
151
|
|
|
174
|
-
HEADER_DIRS = (ENV['CPATH'] || '').split(
|
|
175
|
-
LIB_DIRS = (ENV['LIBRARY_PATH'] || '').split(
|
|
152
|
+
HEADER_DIRS = (ENV['CPATH'] || '').split(File::PATH_SEPARATOR)
|
|
153
|
+
LIB_DIRS = (ENV['LIBRARY_PATH'] || '').split(File::PATH_SEPARATOR)
|
|
176
154
|
dir_config('cumo', HEADER_DIRS, LIB_DIRS)
|
|
177
155
|
|
|
178
156
|
have_library('cuda')
|
|
@@ -186,4 +164,12 @@ if have_library('cudnn') # TODO(sonots): cuDNN version check
|
|
|
186
164
|
$CXXFLAGS << " -DCUDNN_FOUND"
|
|
187
165
|
end
|
|
188
166
|
|
|
167
|
+
have_library('stdc++')
|
|
168
|
+
|
|
189
169
|
create_makefile('cumo')
|
|
170
|
+
|
|
171
|
+
begin
|
|
172
|
+
require 'extconf_compile_commands_json'
|
|
173
|
+
ExtconfCompileCommandsJson.generate!
|
|
174
|
+
rescue LoadError
|
|
175
|
+
end
|
|
@@ -28,8 +28,10 @@ class cumo_thrust_strided_range
|
|
|
28
28
|
|
|
29
29
|
typedef typename thrust::iterator_difference<Iterator>::type difference_type;
|
|
30
30
|
|
|
31
|
-
struct stride_functor
|
|
31
|
+
struct stride_functor
|
|
32
32
|
{
|
|
33
|
+
using argument_type = difference_type;
|
|
34
|
+
using result_type = difference_type;
|
|
33
35
|
difference_type stride;
|
|
34
36
|
|
|
35
37
|
stride_functor(difference_type stride)
|
|
@@ -86,8 +88,10 @@ struct cumo_thrust_minmax_pair
|
|
|
86
88
|
// returns a cumo_thrust_minmax_pair whose minimum and maximum values
|
|
87
89
|
// are initialized to x.
|
|
88
90
|
template <typename T>
|
|
89
|
-
struct cumo_thrust_minmax_unary_op
|
|
91
|
+
struct cumo_thrust_minmax_unary_op
|
|
90
92
|
{
|
|
93
|
+
using argument_type = T;
|
|
94
|
+
using result_type = cumo_thrust_minmax_pair<T>;
|
|
91
95
|
__host__ __device__ cumo_thrust_minmax_pair<T> operator()(const T& x) const
|
|
92
96
|
{
|
|
93
97
|
cumo_thrust_minmax_pair<T> result;
|
|
@@ -102,8 +106,11 @@ struct cumo_thrust_minmax_unary_op : public thrust::unary_function< T, cumo_thru
|
|
|
102
106
|
// maximum values are the min() and max() respectively of
|
|
103
107
|
// the minimums and maximums of the input pairs
|
|
104
108
|
template <typename T>
|
|
105
|
-
struct cumo_thrust_minmax_binary_op
|
|
109
|
+
struct cumo_thrust_minmax_binary_op
|
|
106
110
|
{
|
|
111
|
+
using first_argument_type = cumo_thrust_minmax_pair<T>;
|
|
112
|
+
using second_argument_type = cumo_thrust_minmax_pair<T>;
|
|
113
|
+
using result_type = cumo_thrust_minmax_pair<T>;
|
|
107
114
|
__host__ __device__ cumo_thrust_minmax_pair<T> operator()(const cumo_thrust_minmax_pair<T>& x, const cumo_thrust_minmax_pair<T>& y) const
|
|
108
115
|
{
|
|
109
116
|
cumo_thrust_minmax_pair<T> result;
|
|
@@ -157,10 +164,10 @@ struct cumo_thrust_variance_unary_op
|
|
|
157
164
|
// all values that have been agregated so far
|
|
158
165
|
template <typename T>
|
|
159
166
|
struct cumo_thrust_variance_binary_op
|
|
160
|
-
: public thrust::binary_function<const cumo_thrust_variance_data<T>&,
|
|
161
|
-
const cumo_thrust_variance_data<T>&,
|
|
162
|
-
cumo_thrust_variance_data<T> >
|
|
163
167
|
{
|
|
168
|
+
using first_argument_type = const cumo_thrust_variance_data<T>&;
|
|
169
|
+
using second_argument_type = const cumo_thrust_variance_data<T>&;
|
|
170
|
+
using result_type = cumo_thrust_variance_data<T>;
|
|
164
171
|
__host__ __device__
|
|
165
172
|
cumo_thrust_variance_data<T> operator()(const cumo_thrust_variance_data<T>& x, const cumo_thrust_variance_data <T>& y) const
|
|
166
173
|
{
|
|
@@ -49,10 +49,10 @@ struct cumo_thrust_complex_variance_unary_op
|
|
|
49
49
|
// all values that have been agregated so far
|
|
50
50
|
template <typename T, typename R>
|
|
51
51
|
struct cumo_thrust_complex_variance_binary_op
|
|
52
|
-
: public thrust::binary_function<const cumo_thrust_complex_variance_data<T,R>&,
|
|
53
|
-
const cumo_thrust_complex_variance_data<T,R>&,
|
|
54
|
-
cumo_thrust_complex_variance_data<T,R> >
|
|
55
52
|
{
|
|
53
|
+
using first_argument_type = const cumo_thrust_complex_variance_data<T,R>&;
|
|
54
|
+
using second_argument_type = const cumo_thrust_complex_variance_data<T,R>&;
|
|
55
|
+
using result_type = cumo_thrust_complex_variance_data<T,R>;
|
|
56
56
|
__host__ __device__
|
|
57
57
|
cumo_thrust_complex_variance_data<T,R> operator()(const cumo_thrust_complex_variance_data<T,R>& x, const cumo_thrust_complex_variance_data<T,R>& y) const
|
|
58
58
|
{
|
|
@@ -79,6 +79,7 @@ void cumo_na_parse_enumerator_step(VALUE enum_obj, VALUE *pstep);
|
|
|
79
79
|
// used in aref, aset
|
|
80
80
|
int cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx);
|
|
81
81
|
VALUE cumo_na_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
|
|
82
|
+
VALUE cumo_na_at_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
|
|
82
83
|
|
|
83
84
|
// defined in array, used in math
|
|
84
85
|
VALUE cumo_na_ary_composition_dtype(VALUE ary);
|
|
@@ -141,7 +141,7 @@ extern "C" {
|
|
|
141
141
|
# endif
|
|
142
142
|
#endif
|
|
143
143
|
|
|
144
|
-
#if
|
|
144
|
+
#if SIZEOF_LONG > 4
|
|
145
145
|
# undef INT322NUM
|
|
146
146
|
# undef UINT322NUM
|
|
147
147
|
# define INT322NUM(x) INT2FIX(x)
|
|
@@ -329,6 +329,12 @@ _cumo_na_get_narray_t(VALUE obj, unsigned char cumo_na_type)
|
|
|
329
329
|
#define CUMO_RNARRAY_VIEW(val) ((cumo_narray_view_t*)DATA_PTR(val))
|
|
330
330
|
#define CUMO_RNARRAY_FILEMAP(val) ((cumo_narray_filemap_t*)DATA_PTR(val))
|
|
331
331
|
|
|
332
|
+
#ifdef HAVE_RTYPEDDATA_GET_DATA
|
|
333
|
+
#define CUMO_RENUMERATOR_PTR(ptr) ((cumo_enumerator_t *)RTYPEDDATA_GET_DATA(ptr))
|
|
334
|
+
#else
|
|
335
|
+
#define CUMO_RENUMERATOR_PTR(ptr) ((cumo_enumerator_t *)DATA_PTR(ptr))
|
|
336
|
+
#endif
|
|
337
|
+
|
|
332
338
|
#define CUMO_RNARRAY_NDIM(val) (CUMO_RNARRAY(val)->ndim)
|
|
333
339
|
#define CUMO_RNARRAY_TYPE(val) (CUMO_RNARRAY(val)->type)
|
|
334
340
|
#define CUMO_RNARRAY_FLAG(val) (CUMO_RNARRAY(val)->flag)
|
|
@@ -483,6 +489,12 @@ typedef unsigned int CUMO_BIT_DIGIT;
|
|
|
483
489
|
#include "cumo/ndloop.h"
|
|
484
490
|
#include "cumo/intern.h"
|
|
485
491
|
|
|
492
|
+
// for Ractor support code
|
|
493
|
+
#ifndef HAVE_RB_EXT_RACTOR_SAFE
|
|
494
|
+
# undef RUBY_TYPED_FROZEN_SHAREABLE
|
|
495
|
+
# define RUBY_TYPED_FROZEN_SHAREABLE 0
|
|
496
|
+
#endif
|
|
497
|
+
|
|
486
498
|
#if defined(__cplusplus)
|
|
487
499
|
#if 0
|
|
488
500
|
{ /* satisfy cc-mode */
|
|
@@ -112,9 +112,8 @@
|
|
|
112
112
|
size_t dig = (pos) / CUMO_NB; \
|
|
113
113
|
int bit = (pos) % CUMO_NB; \
|
|
114
114
|
((CUMO_BIT_DIGIT*)(adr))[dig] = \
|
|
115
|
-
(((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | ((val)<<(bit)); \
|
|
115
|
+
(((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | (((val)&1u)<<(bit)); \
|
|
116
116
|
}
|
|
117
|
-
// val -> val&1 ??
|
|
118
117
|
|
|
119
118
|
#define CUMO_STORE_BIT_STEP( adr, pos, step, idx, val )\
|
|
120
119
|
{ \
|
|
@@ -129,9 +128,8 @@
|
|
|
129
128
|
pos += step; \
|
|
130
129
|
} \
|
|
131
130
|
((CUMO_BIT_DIGIT*)(adr))[dig] = \
|
|
132
|
-
(((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | ((val)<<(bit)); \
|
|
131
|
+
(((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | (((val)&1u)<<(bit)); \
|
|
133
132
|
}
|
|
134
|
-
// val -> val&1 ??
|
|
135
133
|
|
|
136
134
|
static inline int
|
|
137
135
|
cumo_is_aligned(const void *ptr, const size_t alignment)
|
|
@@ -117,7 +117,7 @@ static inline dtype c_from_dcomplex(cumo_dcomplex x) {
|
|
|
117
117
|
#define m_acosh(x) c_acosh(x)
|
|
118
118
|
#define m_atanh(x) c_atanh(x)
|
|
119
119
|
#define m_hypot(x,y) c_hypot(x,y)
|
|
120
|
-
#define m_sinc(x) c_div(c_sin(x),x)
|
|
120
|
+
#define m_sinc(x) ((REAL(x)==0 && IMAG(x)==0) ? (c_new(1,0)):(c_div(c_sin(x),x)))
|
|
121
121
|
|
|
122
122
|
#define m_sum_init INT2FIX(0)
|
|
123
123
|
#define m_mulsum_init INT2FIX(0)
|
|
@@ -157,18 +157,27 @@ __host__ __device__ static inline dtype f_seq(dtype x, dtype y, double c)
|
|
|
157
157
|
/* --------- thrust ----------------- */
|
|
158
158
|
#include "cumo/cuda/cumo_thrust_complex.hpp"
|
|
159
159
|
|
|
160
|
-
struct cumo_thrust_plus
|
|
160
|
+
struct cumo_thrust_plus
|
|
161
161
|
{
|
|
162
|
+
using first_argument_type = dtype;
|
|
163
|
+
using second_argument_type = dtype;
|
|
164
|
+
using result_type = dtype;
|
|
162
165
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
|
|
163
166
|
};
|
|
164
167
|
|
|
165
|
-
struct cumo_thrust_multiplies
|
|
168
|
+
struct cumo_thrust_multiplies
|
|
166
169
|
{
|
|
170
|
+
using first_argument_type = dtype;
|
|
171
|
+
using second_argument_type = dtype;
|
|
172
|
+
using result_type = dtype;
|
|
167
173
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
|
|
168
174
|
};
|
|
169
175
|
|
|
170
|
-
struct cumo_thrust_multiplies_mulsum_nan
|
|
176
|
+
struct cumo_thrust_multiplies_mulsum_nan
|
|
171
177
|
{
|
|
178
|
+
using first_argument_type = dtype;
|
|
179
|
+
using second_argument_type = dtype;
|
|
180
|
+
using result_type = dtype;
|
|
172
181
|
__host__ __device__ dtype operator()(dtype x, dtype y) {
|
|
173
182
|
if (not_nan(x) && not_nan(y)) {
|
|
174
183
|
return m_mul(x, y);
|
|
@@ -178,8 +187,10 @@ struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype,
|
|
|
178
187
|
}
|
|
179
188
|
};
|
|
180
189
|
|
|
181
|
-
struct cumo_thrust_square
|
|
190
|
+
struct cumo_thrust_square
|
|
182
191
|
{
|
|
192
|
+
using argument_type = dtype;
|
|
193
|
+
using result_type = dtype;
|
|
183
194
|
__host__ __device__ rtype operator()(const dtype& x) const { return c_abs_square(x); }
|
|
184
195
|
};
|
|
185
196
|
|
|
@@ -12,7 +12,7 @@ extern double pow(double, double);
|
|
|
12
12
|
#define m_zero 0.0
|
|
13
13
|
#define m_one 1.0
|
|
14
14
|
|
|
15
|
-
#define m_num_to_data(x) NUM2DBL(x)
|
|
15
|
+
#define m_num_to_data(x) (NIL_P(x) ? nan("") : NUM2DBL(x))
|
|
16
16
|
#define m_data_to_num(x) rb_float_new(x)
|
|
17
17
|
|
|
18
18
|
#define m_from_double(x) (x)
|
|
@@ -110,7 +110,7 @@ extern double pow(double, double);
|
|
|
110
110
|
#define m_atanh(x) atanh(x)
|
|
111
111
|
#define m_atan2(x,y) atan2(x,y)
|
|
112
112
|
#define m_hypot(x,y) hypot(x,y)
|
|
113
|
-
#define m_sinc(x) (sin(x)/(x))
|
|
113
|
+
#define m_sinc(x) (((x)==0) ? 1.0:(sin(x)/(x)))
|
|
114
114
|
|
|
115
115
|
#define m_erf(x) erf(x)
|
|
116
116
|
#define m_erfc(x) erfc(x)
|
|
@@ -72,18 +72,27 @@ __host__ __device__ static inline dtype f_minimum_nan(dtype x, dtype y)
|
|
|
72
72
|
/* --------- thrust ----------------- */
|
|
73
73
|
#include "cumo/cuda/cumo_thrust.hpp"
|
|
74
74
|
|
|
75
|
-
struct cumo_thrust_plus
|
|
75
|
+
struct cumo_thrust_plus
|
|
76
76
|
{
|
|
77
|
+
using first_argument_type = dtype;
|
|
78
|
+
using second_argument_type = dtype;
|
|
79
|
+
using result_type = dtype;
|
|
77
80
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
|
|
78
81
|
};
|
|
79
82
|
|
|
80
|
-
struct cumo_thrust_multiplies
|
|
83
|
+
struct cumo_thrust_multiplies
|
|
81
84
|
{
|
|
85
|
+
using first_argument_type = dtype;
|
|
86
|
+
using second_argument_type = dtype;
|
|
87
|
+
using result_type = dtype;
|
|
82
88
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
|
|
83
89
|
};
|
|
84
90
|
|
|
85
|
-
struct cumo_thrust_multiplies_mulsum_nan
|
|
91
|
+
struct cumo_thrust_multiplies_mulsum_nan
|
|
86
92
|
{
|
|
93
|
+
using first_argument_type = dtype;
|
|
94
|
+
using second_argument_type = dtype;
|
|
95
|
+
using result_type = dtype;
|
|
87
96
|
__host__ __device__ dtype operator()(dtype x, dtype y) {
|
|
88
97
|
if (not_nan(x) && not_nan(y)) {
|
|
89
98
|
return m_mul(x, y);
|
|
@@ -93,8 +102,10 @@ struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype,
|
|
|
93
102
|
}
|
|
94
103
|
};
|
|
95
104
|
|
|
96
|
-
struct cumo_thrust_square
|
|
105
|
+
struct cumo_thrust_square
|
|
97
106
|
{
|
|
107
|
+
using argument_type = dtype;
|
|
108
|
+
using result_type = dtype;
|
|
98
109
|
__host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
|
|
99
110
|
};
|
|
100
111
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
#define m_zero 0
|
|
2
2
|
#define m_one 1
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
#define
|
|
4
|
+
/* Handle negative values consistently across platforms for unsigned integer types */
|
|
5
|
+
#define m_from_double(x) ((x) < 0 ? (dtype)((long long)(x)) : (dtype)(x))
|
|
6
|
+
#define m_from_real(x) ((x) < 0 ? (dtype)((long long)(x)) : (dtype)(x))
|
|
6
7
|
#define m_from_sint(x) (x)
|
|
7
8
|
#define m_from_int32(x) (x)
|
|
8
9
|
#define m_from_int64(x) (x)
|
|
@@ -70,18 +70,26 @@ __host__ __device__ static inline dtype f_minimum(dtype x, dtype y)
|
|
|
70
70
|
/* --------- thrust ----------------- */
|
|
71
71
|
#include "cumo/cuda/cumo_thrust.hpp"
|
|
72
72
|
|
|
73
|
-
struct cumo_thrust_plus
|
|
73
|
+
struct cumo_thrust_plus
|
|
74
74
|
{
|
|
75
|
+
using first_argument_type = dtype;
|
|
76
|
+
using second_argument_type = dtype;
|
|
77
|
+
using result_type = dtype;
|
|
75
78
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
|
|
76
79
|
};
|
|
77
80
|
|
|
78
|
-
struct cumo_thrust_multiplies
|
|
81
|
+
struct cumo_thrust_multiplies
|
|
79
82
|
{
|
|
83
|
+
using first_argument_type = dtype;
|
|
84
|
+
using second_argument_type = dtype;
|
|
85
|
+
using result_type = dtype;
|
|
80
86
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
|
|
81
87
|
};
|
|
82
88
|
|
|
83
|
-
struct cumo_thrust_square
|
|
89
|
+
struct cumo_thrust_square
|
|
84
90
|
{
|
|
91
|
+
using argument_type = dtype;
|
|
92
|
+
using result_type = dtype;
|
|
85
93
|
__host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
|
|
86
94
|
};
|
|
87
95
|
|
data/ext/cumo/include/cumo.h
CHANGED
data/ext/cumo/narray/array.c
CHANGED
|
@@ -366,7 +366,7 @@ cumo_na_composition3_ary(VALUE ary, VALUE *ptype, VALUE *pshape, VALUE *pnary)
|
|
|
366
366
|
VALUE dtype, dshape;
|
|
367
367
|
|
|
368
368
|
mdai = cumo_na_mdai_alloc(ary);
|
|
369
|
-
vmdai = TypedData_Wrap_Struct(
|
|
369
|
+
vmdai = TypedData_Wrap_Struct(rb_cObject, &mdai_data_type, (void*)mdai);
|
|
370
370
|
if ( cumo_na_mdai_investigate(mdai, 1) ) {
|
|
371
371
|
// empty
|
|
372
372
|
dtype = update_type(ptype, cumo_cInt32);
|
|
@@ -466,11 +466,13 @@ cumo_na_s_array_shape(VALUE mod, VALUE ary)
|
|
|
466
466
|
@return [Cumo::NArray]
|
|
467
467
|
@example
|
|
468
468
|
Cumo::NArray.new_like([[1,2,3],[4,5,6]])
|
|
469
|
-
=> Cumo::Int32#shape=[2,3](empty)
|
|
469
|
+
# => Cumo::Int32#shape=[2,3](empty)
|
|
470
|
+
|
|
470
471
|
Cumo::DFloat.new_like([[1,2],[3,4]])
|
|
471
|
-
=> Cumo::DFloat#shape=[2,2](empty)
|
|
472
|
+
# => Cumo::DFloat#shape=[2,2](empty)
|
|
473
|
+
|
|
472
474
|
Cumo::NArray.new_like([1,2i,3])
|
|
473
|
-
=> Cumo::DComplex#shape=[3](empty)
|
|
475
|
+
# => Cumo::DComplex#shape=[3](empty)
|
|
474
476
|
*/
|
|
475
477
|
VALUE
|
|
476
478
|
cumo_na_s_new_like(VALUE type, VALUE obj)
|
|
@@ -612,7 +614,7 @@ cumo_na_ary_composition_for_struct(VALUE nstruct, VALUE ary)
|
|
|
612
614
|
|
|
613
615
|
mdai = cumo_na_mdai_alloc(ary);
|
|
614
616
|
mdai->na_type = nstruct;
|
|
615
|
-
vmdai = TypedData_Wrap_Struct(
|
|
617
|
+
vmdai = TypedData_Wrap_Struct(rb_cObject, &mdai_data_type, (void*)mdai);
|
|
616
618
|
cumo_na_mdai_for_struct(mdai, 0);
|
|
617
619
|
nc = cumo_na_compose_alloc();
|
|
618
620
|
vnc = WrapCompose(nc);
|
|
@@ -626,7 +628,7 @@ cumo_na_ary_composition_for_struct(VALUE nstruct, VALUE ary)
|
|
|
626
628
|
|
|
627
629
|
|
|
628
630
|
void
|
|
629
|
-
Init_cumo_na_array()
|
|
631
|
+
Init_cumo_na_array(void)
|
|
630
632
|
{
|
|
631
633
|
rb_define_singleton_method(cNArray, "array_shape", cumo_na_s_array_shape, 1);
|
|
632
634
|
rb_define_singleton_method(cNArray, "array_type", cumo_na_s_array_type, 1);
|