cumo 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +15 -0
  4. data/.rubocop_todo.yml +1272 -0
  5. data/3rd_party/mkmf-cu/Gemfile +2 -0
  6. data/3rd_party/mkmf-cu/Rakefile +2 -1
  7. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
  8. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +36 -7
  9. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
  11. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
  12. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
  13. data/CHANGELOG.md +69 -0
  14. data/Gemfile +6 -1
  15. data/README.md +2 -10
  16. data/Rakefile +8 -11
  17. data/bench/broadcast_fp32.rb +28 -26
  18. data/bench/cumo_bench.rb +18 -16
  19. data/bench/numo_bench.rb +18 -16
  20. data/bench/reduction_fp32.rb +14 -12
  21. data/bin/console +1 -0
  22. data/cumo.gemspec +5 -8
  23. data/ext/cumo/cuda/cudnn.c +2 -2
  24. data/ext/cumo/cumo.c +7 -3
  25. data/ext/cumo/depend.erb +15 -13
  26. data/ext/cumo/extconf.rb +32 -46
  27. data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
  28. data/ext/cumo/include/cumo/intern.h +1 -0
  29. data/ext/cumo/include/cumo/narray.h +13 -1
  30. data/ext/cumo/include/cumo/template.h +2 -4
  31. data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
  32. data/ext/cumo/include/cumo/types/float_macro.h +2 -2
  33. data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
  34. data/ext/cumo/include/cumo.h +2 -2
  35. data/ext/cumo/narray/array.c +3 -3
  36. data/ext/cumo/narray/data.c +23 -2
  37. data/ext/cumo/narray/gen/cogen.rb +8 -7
  38. data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
  39. data/ext/cumo/narray/gen/def/bit.rb +3 -1
  40. data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
  41. data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
  42. data/ext/cumo/narray/gen/def/int16.rb +2 -0
  43. data/ext/cumo/narray/gen/def/int32.rb +2 -0
  44. data/ext/cumo/narray/gen/def/int64.rb +2 -0
  45. data/ext/cumo/narray/gen/def/int8.rb +2 -0
  46. data/ext/cumo/narray/gen/def/robject.rb +2 -0
  47. data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
  48. data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
  49. data/ext/cumo/narray/gen/def/uint16.rb +2 -0
  50. data/ext/cumo/narray/gen/def/uint32.rb +2 -0
  51. data/ext/cumo/narray/gen/def/uint64.rb +2 -0
  52. data/ext/cumo/narray/gen/def/uint8.rb +2 -0
  53. data/ext/cumo/narray/gen/erbln.rb +9 -7
  54. data/ext/cumo/narray/gen/erbpp2.rb +26 -24
  55. data/ext/cumo/narray/gen/narray_def.rb +13 -11
  56. data/ext/cumo/narray/gen/spec.rb +58 -55
  57. data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
  58. data/ext/cumo/narray/gen/tmpl/at.c +34 -0
  59. data/ext/cumo/narray/gen/tmpl/batch_norm.c +1 -1
  60. data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +2 -2
  61. data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
  62. data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
  63. data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
  64. data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +1 -1
  65. data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
  66. data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
  67. data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
  68. data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
  69. data/ext/cumo/narray/gen/tmpl/sort.c +1 -1
  70. data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
  71. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
  72. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
  73. data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
  74. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
  75. data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
  76. data/ext/cumo/narray/index.c +243 -39
  77. data/ext/cumo/narray/index_kernel.cu +84 -0
  78. data/ext/cumo/narray/narray.c +38 -1
  79. data/ext/cumo/narray/ndloop.c +1 -1
  80. data/ext/cumo/narray/struct.c +1 -1
  81. data/lib/cumo/cuda/compile_error.rb +1 -1
  82. data/lib/cumo/cuda/compiler.rb +23 -22
  83. data/lib/cumo/cuda/cudnn.rb +1 -1
  84. data/lib/cumo/cuda/device.rb +1 -1
  85. data/lib/cumo/cuda/link_state.rb +2 -2
  86. data/lib/cumo/cuda/module.rb +1 -2
  87. data/lib/cumo/cuda/nvrtc_program.rb +3 -2
  88. data/lib/cumo/cuda.rb +2 -0
  89. data/lib/cumo/linalg.rb +2 -0
  90. data/lib/cumo/narray/extra.rb +137 -185
  91. data/lib/cumo/narray.rb +2 -0
  92. data/lib/cumo.rb +3 -1
  93. data/test/bit_test.rb +157 -0
  94. data/test/cuda/compiler_test.rb +69 -0
  95. data/test/cuda/device_test.rb +30 -0
  96. data/test/cuda/memory_pool_test.rb +45 -0
  97. data/test/cuda/nvrtc_test.rb +51 -0
  98. data/test/cuda/runtime_test.rb +28 -0
  99. data/test/cudnn_test.rb +498 -0
  100. data/test/cumo_test.rb +27 -0
  101. data/test/narray_test.rb +745 -0
  102. data/test/ractor_test.rb +52 -0
  103. data/test/test_helper.rb +31 -0
  104. metadata +31 -54
  105. data/.travis.yml +0 -5
  106. data/numo-narray-version +0 -1
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source "http://rubygems.org"
2
4
 
3
5
  gemspec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'bundler/gem_tasks'
2
4
  require 'rake/testtask'
3
5
  require 'rake/clean'
@@ -8,4 +10,3 @@ end
8
10
  desc "Run tests"
9
11
 
10
12
  task :default => [:test]
11
-
@@ -1,4 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require_relative "../lib/mkmf-cu/cli"
3
5
 
4
6
  MakeMakefileCuda::CLI.new(ARGV).run
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "open3"
2
4
  require_relative "nvcc"
3
5
 
@@ -6,7 +8,7 @@ module MakeMakefileCuda
6
8
  attr_reader :argv
7
9
 
8
10
  def initialize(argv)
9
- @argv = argv.map{|e| e.dup }
11
+ @argv = argv.map { |e| e.dup }
10
12
  end
11
13
 
12
14
  def run
@@ -32,17 +34,35 @@ module MakeMakefileCuda
32
34
  # TODO(sonots): Make it possible to configure "nvcc" and additional arguments
33
35
  def nvcc_command
34
36
  s = MakeMakefileCuda::Nvcc.generate(argv)
35
- cmd = "nvcc " << s
37
+ cmd = "nvcc #{s}"
36
38
  if ENV['CUMO_NVCC_GENERATE_CODE']
37
39
  cmd << " --generate-code=#{ENV['CUMO_NVCC_GENERATE_CODE']}"
38
40
  elsif ENV['DEBUG']
39
41
  cmd << " -arch=sm_35"
40
42
  else
41
- cmd << " --generate-code=arch=compute_35,code=sm_35"
42
- cmd << " --generate-code=arch=compute_50,code=sm_50"
43
- cmd << " --generate-code=arch=compute_60,code=sm_60"
44
- cmd << " --generate-code=arch=compute_70,code=sm_70"
45
- cmd << " --generate-code=arch=compute_70,code=compute_70"
43
+ # Ref. https://en.wikipedia.org/wiki/CUDA
44
+ if cuda_version >= Gem::Version.new("13.0")
45
+ # CUDA 13.0
46
+ capability = [75, 87, 89, 90, 121]
47
+ elsif cuda_version >= Gem::Version.new("12.9")
48
+ # CUDA 12.9
49
+ capability = [50, 60, 70, 75, 87, 89, 90, 121]
50
+ elsif cuda_version >= Gem::Version.new("12.8")
51
+ # CUDA 12.8
52
+ capability = [50, 60, 70, 75, 87, 89, 90, 120]
53
+ elsif cuda_version >= Gem::Version.new("12.0")
54
+ # CUDA 12.0 – 12.6
55
+ capability = [50, 60, 70, 75, 87, 89, 90]
56
+ elsif cuda_version >= Gem::Version.new("11.8")
57
+ # CUDA 11.8
58
+ capability = [35, 50, 60, 70, 75, 87, 89, 90]
59
+ else
60
+ # CUDA 11.0
61
+ capability = [35, 50, 60, 70, 75, 80]
62
+ end
63
+ capability.each do |arch|
64
+ cmd << " --generate-code=arch=compute_#{arch},code=sm_#{arch}"
65
+ end
46
66
  end
47
67
  cmd
48
68
  end
@@ -88,5 +108,14 @@ module MakeMakefileCuda
88
108
  raise "#{color_code} is not supported" unless COLOR_CODES[code]
89
109
  "\e[#{COLOR_CODES[code]}m#{str}\e[0m"
90
110
  end
111
+
112
+ def cuda_version
113
+ @cuda_version ||= begin
114
+ output = `nvcc --version`
115
+ if output =~ /Cuda compilation tools, release ([^,]*),/
116
+ Gem::Version.new($1)
117
+ end
118
+ end
119
+ end
91
120
  end
92
121
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "optparse"
2
4
  require "rbconfig"
3
5
 
@@ -29,50 +31,50 @@ module MakeMakefileCuda
29
31
 
30
32
  def build_optparser
31
33
  opt = OptionParser.new
32
- opt_h = Hash.new{|h, k| h[k] = [] }
33
-
34
- opt.on("--arch arg") {|v| opt_h["-arch"] << v }
35
- opt.on("--std arg") {|v| opt_h["-std"] << v }
36
- opt.on("--stdlib arg") {|v| opt_h["-stdlib"] << v }
37
-
38
- opt.on("--Wl arg") {|v| opt_h["-Wl"] << v }
39
-
40
- opt.on('--profile') {|v| opt_h["-pg"] << "" }
41
- opt.on('-g') {|v| opt_h["-g"] << "" }
42
- opt.on('-G', "--device-debug") {|v| opt_h["-G"] << "" }
43
-
44
- opt.on('-I path') {|v| opt_h["-I"] << v }
45
- opt.on('-D flag') {|v| opt_h["-D"] << v }
46
- opt.on('-W flag') {|v| opt_h["-W"] << v }
47
- opt.on('-o output') {|v| opt_h["-o"] << v }
48
- opt.on('-c file') {|v| opt_h["-c"] << v }
49
- opt.on('-f flag') {|v| opt_h["-f"] << v }
50
- opt.on('-l file') {|v| opt_h["-l"] << v }
51
- opt.on('-L path') {|v| opt_h["-L"] << v }
52
- opt.on('-x pat', "--x pat") {|v| opt_h["-x"] << v }
53
- opt.on('-O num'){|v| opt_h["-O"] << v if /[0-9]/ =~ v }
54
- opt.on('--mkmf-cu-ext ext'){|v| opt_h["--mkmf-cu-ext"] << v}
34
+ opt_h = Hash.new { |h, k| h[k] = [] }
35
+
36
+ opt.on("--arch arg") { |v| opt_h["-arch"] << v }
37
+ opt.on("--std arg") { |v| opt_h["-std"] << v }
38
+ opt.on("--stdlib arg") { |v| opt_h["-stdlib"] << v }
39
+
40
+ opt.on("--Wl arg") { |v| opt_h["-Wl"] << v }
41
+
42
+ opt.on('--profile') { |v| opt_h["-pg"] << "" }
43
+ opt.on('-g') { |v| opt_h["-g"] << "" }
44
+ opt.on('-G', "--device-debug") { |v| opt_h["-G"] << "" }
45
+
46
+ opt.on('-I path') { |v| opt_h["-I"] << quote(v) }
47
+ opt.on('-D flag') { |v| opt_h["-D"] << v }
48
+ opt.on('-W flag') { |v| opt_h["-W"] << v }
49
+ opt.on('-o output') { |v| opt_h["-o"] << quote(v) }
50
+ opt.on('-c file') { |v| opt_h["-c"] << quote(v) }
51
+ opt.on('-f flag') { |v| opt_h["-f"] << v }
52
+ opt.on('-l file') { |v| opt_h["-l"] << quote(v) }
53
+ opt.on('-L path') { |v| opt_h["-L"] << quote(v) }
54
+ opt.on('-x pat', "--x pat") { |v| opt_h["-x"] << v }
55
+ opt.on('-O num') { |v| opt_h["-O"] << v if /[0-9]/ =~ v }
56
+ opt.on('--mkmf-cu-ext ext') { |v| opt_h["--mkmf-cu-ext"] << v }
55
57
 
56
58
  return [opt, opt_h]
57
59
  end
58
60
 
59
61
  def parse_ill_short(argv, opt_h)
60
- ["-shared", "-rdynamic", "-dynamic", "-bundle", "-pipe", "-pg", "-ggdb3"].each{|opt|
62
+ ["-shared", "-rdynamic", "-dynamic", "-bundle", "-pipe", "-pg", "-ggdb3"].each { |opt|
61
63
  if ind = argv.find_index(opt)
62
64
  opt_h[opt] << ""
63
65
  argv.delete_at(ind)
64
66
  end
65
67
  }
66
- ["-arch", "-std", "-stdlib"].each{|opt|
68
+ ["-arch", "-std", "-stdlib"].each { |opt|
67
69
  if ind = argv.find_index(opt)
68
70
  argv[ind] = "-" + opt
69
71
  end
70
72
  }
71
73
  end
72
74
 
73
- def parse_ill_short_with_arg(argv, opt_h)
74
- [/\A(\-stdlib)=(.*)/, /\A(\-std)=(.*)/, /\A(\-Wl),(.*)/].each{|reg|
75
- argv.each{|e|
75
+ def parse_ill_short_with_arg(argv, opt_h)
76
+ [/\A(\-stdlib)=(.*)/, /\A(\-std)=(.*)/, /\A(\-Wl),(.*)/].each { |reg|
77
+ argv.each { |e|
76
78
  if reg =~ e
77
79
  e[0..-1] = "-" + $1 + '=' + $2
78
80
  end
@@ -81,14 +83,14 @@ module MakeMakefileCuda
81
83
  end
82
84
 
83
85
  def compiler_option(opt_h)
84
- ret = ""
85
- ["-f", "-W", "-pipe"].each{|op|
86
- opt_h[op].each{|e|
86
+ ret = +""
87
+ ["-f", "-W", "-pipe"].each { |op|
88
+ opt_h[op].each { |e|
87
89
  ret << " --compiler-options " + "#{op}#{e}"
88
90
  }
89
91
  }
90
- ["-stdlib", "-std"].each{|op|
91
- opt_h[op].each{|e|
92
+ ["-stdlib", "-std"].each { |op|
93
+ opt_h[op].each { |e|
92
94
  ret << " --compiler-options " + "#{op}=#{e}"
93
95
  }
94
96
  }
@@ -96,13 +98,13 @@ module MakeMakefileCuda
96
98
  end
97
99
 
98
100
  def linker_option(opt_h)
99
- ret = " -shared "
100
- ["-dynamic", "-bundle"].each{|op|
101
- opt_h[op].each{|e|
101
+ ret = +" -shared "
102
+ ["-dynamic", "-bundle"].each { |op|
103
+ opt_h[op].each { |e|
102
104
  ret << " --linker-options " + op
103
105
  }
104
106
  }
105
- opt_h["-Wl"].each{|e|
107
+ opt_h["-Wl"].each { |e|
106
108
  ret << " --linker-options " + e
107
109
  }
108
110
  return ret
@@ -110,17 +112,17 @@ module MakeMakefileCuda
110
112
 
111
113
  def compiler_bin(opt_h)
112
114
  if opt_h["--mkmf-cu-ext"][0] == "c"
113
- " --compiler-bindir " + RbConfig::CONFIG["CC"]
115
+ " --compiler-bindir " + ENV.fetch("NVCC_CCBIN", RbConfig::CONFIG["CC"])
114
116
  elsif opt_h["--mkmf-cu-ext"][0] == "cxx"
115
- " --compiler-bindir " + RbConfig::CONFIG["CXX"]
117
+ " --compiler-bindir " + ENV.fetch("NVCC_CCBIN", RbConfig::CONFIG["CXX"])
116
118
  end
117
119
  end
118
120
 
119
121
  def generate_compiling_command_line(opt_h)
120
- s = ""
122
+ s = +""
121
123
  # options nvcc can uderstatnd
122
- ["-std", "-pg", "-g", "-G", "-x", "-I", "-D", "-o", "-c", "-O"].each{|op|
123
- opt_h[op].each{|e|
124
+ ["-std", "-pg", "-g", "-G", "-x", "-I", "-D", "-o", "-c", "-O"].each { |op|
125
+ opt_h[op].each { |e|
124
126
  case op
125
127
  when "-o", "-c", "-x", "-std"
126
128
  s << " #{op} #{e}"
@@ -136,9 +138,9 @@ module MakeMakefileCuda
136
138
  end
137
139
 
138
140
  def generate_linking_command_line(argv, opt_h)
139
- s = ""
140
- ["-L", "-l", "-o", "-c", "-O"].each{|op|
141
- opt_h[op].each{|e|
141
+ s = +""
142
+ ["-L", "-l", "-o", "-c", "-O"].each { |op|
143
+ opt_h[op].each { |e|
142
144
  case op
143
145
  when "-o", "-c"
144
146
  s << " #{op} #{e}"
@@ -153,5 +155,9 @@ module MakeMakefileCuda
153
155
  s << compiler_bin(opt_h)
154
156
  return s
155
157
  end
158
+
159
+ def quote(str)
160
+ "\"#{str}\""
161
+ end
156
162
  end
157
163
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "mkmf"
2
4
 
3
5
  module MakeMakefileCuda
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Gem::Specification.new do |s|
2
4
  s.name = 'mkmf-cu'
3
5
  s.version = '0.1.2'
4
6
  s.date = '2016-03-26'
5
7
  s.summary = "Write Ruby extension in C/C++ with NVIDIA CUDA."
6
8
  s.description =
7
- "Write Ruby extension in C/C++ with NVIDIA CUDA. A simple wrapper command for nvcc and a monkey patch for mkmf."
9
+ "Write Ruby extension in C/C++ with NVIDIA CUDA. A simple wrapper command for nvcc and a monkey patch for mkmf."
8
10
  s.authors = ["Takashi Tamura"]
9
11
  s.email = ''
10
12
  s.files = ["lib/mkmf-cu.rb", "lib/mkmf-cu/opt.rb", "LICENSE", "README.md"]
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "test/unit"
2
4
  require "mkmf-cu/opt"
3
5
  require "mkmf-cu"
@@ -23,7 +25,7 @@ class TestMkmfCuOpt < Test::Unit::TestCase
23
25
  end
24
26
 
25
27
  def test_compiler_option
26
- @opt_h.merge!({"-shared"=>[""], "-pipe"=>[""]})
28
+ @opt_h.merge!({"-shared" => [""], "-pipe" => [""]})
27
29
  assert_equal(" --compiler-options -pipe", compiler_option(@opt_h))
28
30
  end
29
31
 
@@ -37,7 +39,7 @@ class TestMkmfCuOpt < Test::Unit::TestCase
37
39
  end
38
40
 
39
41
  def test_linker_option
40
- @opt_h.merge!({"-Wl"=>["-a", "-b"]})
42
+ @opt_h.merge!({"-Wl" => ["-a", "-b"]})
41
43
  assert_equal(" --linker-options -a --linker-options -b",
42
44
  linker_option(@opt_h))
43
45
  end
@@ -50,7 +52,7 @@ class TestMkmfCuOpt < Test::Unit::TestCase
50
52
  end
51
53
 
52
54
  def test_compiler_bin
53
- h = Hash.new{|h, k| h[k] = [] }.merge({"-shared"=>[""], "-pipe"=>[""], "--mkmf-cu-ext"=>["c"]})
55
+ h = Hash.new { |h, k| h[k] = [] }.merge({"-shared" => [""], "-pipe" => [""], "--mkmf-cu-ext" => ["c"]})
54
56
  assert_equal(" --compiler-bindir " + RbConfig::CONFIG["CC"],
55
57
  compiler_bin(h))
56
58
  end
data/CHANGELOG.md CHANGED
@@ -1,3 +1,72 @@
1
+ # 0.5.0 (2025/11/01)
2
+
3
+ Fixes:
4
+
5
+ * Remove unnecessary numo-narray dependency
6
+ * Fix Errno::EXDEV for Invalid cross-device link
7
+ * Remove clobber from default task
8
+ * Enable parallel build by default
9
+ * Add magic comment for frozen_string_literal
10
+ * Backport: fix na_flatten_dim(): SEGV when flattening an empty narray view
11
+ * Backport: bug in reshape!: stridx in NArrayView should be reconstructed
12
+ * Backport: mask and masked arrays must have the same shape
13
+ * Backport: fix na_parse_range() to suppress warnings
14
+ * Backport: FIXNUM length is based on LONG, not VALUE
15
+ * Backport: fix bug in NArray#sort: qsort() does not support strided loop
16
+ * Backport: fix na_aref_md_protected(): na2->stridx should be zero-inizialized
17
+ * Backport: q[i].idx should be freed when i != ndim-1
18
+ * Backport: fix variable type
19
+ * Backport: add tests for Bit view arrays
20
+ * Backport: fix macro: STORE_BIT STORE_BIT_STEP: requires mask to leave the lowest bit
21
+ * Backport: fix NArray::Bit#any?,all?: empty array should return false
22
+ * Backport: fix NArray::Bit#count_true/false: empty array should return zero
23
+ * Backport: bug in NArray::Bit; fix bit operation in tmpl_bit/{store_bit,unary,binary}.c
24
+ * Fix typo
25
+ * Backport 135: Make all empty arrays equal
26
+ * Backport: minor fixes in na_get_result_dimension(), check_index_count()
27
+ * Backport 116: new method: NArray#fortran_contiguous?
28
+ * Backport 186: Fix NMath.sinc(0)
29
+ * Backport 188: Fix a typo
30
+ * Fix FrozenError
31
+ * Use add_dependency instead of add_runtime_dependency
32
+ * Remove unused variable
33
+ * Remove unused .travis.yml
34
+ * Remove unnecessary require to fix warnings of "loading in progress, circular require considered harmful"
35
+ * Remove unused variable
36
+ * Fix numo-narray library path
37
+ * Add extconf_compile_commands_json as development dependency
38
+ * Add extconf_compile_commands_json for clangd LSP
39
+ * Remove unnecessary loop if disable assert()
40
+ * Fix cross-platform negative value conversion for unsigned integer types
41
+ * Revert "Fix cross-platform negative value conversion for unsigned integer types"
42
+ * Remove unnecessary require
43
+ * Add Ractor support
44
+ * Update minimum CUDA version
45
+ * Update minimum ruby supported version
46
+ * Use rake-compiler
47
+ * Use absolute file path
48
+ * Allow convert nil to NaN in Numo::DFloat.cast
49
+ * Fix cross-platform negative value conversion for unsigned integer types
50
+ * Fix old-style function definitions
51
+ * Fix old-style function definition in qsort.c
52
+ * Add required_ruby_version in gemspec
53
+ * Use released version of power_assert gem
54
+ * Fix LoadError
55
+ * Quoted file path
56
+ * Add CUDA compute capability (#151)
57
+ * extconf.rb: Use File::PATH_SEPARATOR
58
+ * Fix build error with cuDNN features
59
+ * Link c++ library
60
+ * Fix link error with "multiple definition of `cumo_cuda_eCudnnError'"
61
+ * Fix failure with Ruby 3.3
62
+ * Fix keyword argument expansion
63
+ * Remove compute_35 because it was removed at CUDA 12
64
+ * Use NVCC_CCBIN env var to detect compiler for cuda code on GCC 15 environment
65
+ * Fix build error with GCC 15
66
+ * Use rb_cObject instead of rb_cData
67
+ * Remove unnecessary dependency
68
+ * at() method was rewritten in C.
69
+
1
70
  # 0.4.3 (2019-06-11)
2
71
 
3
72
  Fixes:
data/Gemfile CHANGED
@@ -1,8 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source "https://rubygems.org"
2
4
 
3
5
  gemspec
4
6
 
7
+ gem 'extconf_compile_commands_json'
8
+ gem 'rake-compiler'
5
9
  gem 'test-unit'
6
10
  gem 'yard'
7
11
  gem 'pry-byebug'
8
- gem 'power_assert', git: 'https://github.com/k-tsj/power_assert'
12
+ gem 'power_assert'
13
+ gem 'rubocop'
data/README.md CHANGED
@@ -6,9 +6,9 @@ Cumo (pronounced "koomo") is a CUDA-aware, GPU-optimized numerical library that
6
6
 
7
7
  ## Requirements
8
8
 
9
- * Ruby 2.5 or later
9
+ * Ruby 3.0 or later
10
10
  * NVIDIA GPU Compute Capability 3.5 (Kepler) or later
11
- * CUDA 9.0 or later
11
+ * CUDA 11.0 or later
12
12
 
13
13
  ## Preparation
14
14
 
@@ -195,14 +195,6 @@ ln -sf "$HOME/opt/ccache/bin/ccache" "$HOME/opt/ccache/bin/g++"
195
195
  ln -sf "$HOME/opt/ccache/bin/ccache" "$HOME/opt/ccache/bin/nvcc"
196
196
  ```
197
197
 
198
- ### Build in parallel
199
-
200
- Set `MAKEFLAGS` to specify `make` command options. You can build in parallel as:
201
-
202
- ```
203
- bundle exec env MAKEFLAG=-j8 rake compile
204
- ```
205
-
206
198
  ### Specify nvcc --generate-code options
207
199
 
208
200
  ```
data/Rakefile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "bundler/gem_tasks"
2
4
  require "rake/testtask"
3
5
 
@@ -7,22 +9,17 @@ Rake::TestTask.new(:test) do |t|
7
9
  t.test_files = FileList["test/**/*_test.rb"]
8
10
  end
9
11
 
10
- task :compile do
11
- sh 'cd ext/cumo && ruby extconf.rb && make && make build-ctest'
12
- end
12
+ require "rake/extensiontask"
13
+ Rake::ExtensionTask.new("cumo")
13
14
 
14
15
  task :ctest do
15
- sh 'cd ext/cumo && ruby extconf.rb && make run-ctest'
16
- end
17
-
18
- task :clean do
19
- sh 'cd ext/cumo && make clean'
16
+ sh 'cd ext/cumo && ruby extconf.rb && make && make build-ctest && make run-ctest'
20
17
  end
21
18
 
22
19
  task :docs do
23
20
  dir = "ext/cumo"
24
- srcs = %w[array.c data.c index.c math.c narray.c rand.c struct.c].map{|s| File.join(dir, "narray", s)}
25
- srcs += %w[cublas.c driver.c nvrtc.c runtime.c memory_pool.cpp].map{|s| File.join(dir, "cuda", s) }
21
+ srcs = %w[array.c data.c index.c math.c narray.c rand.c struct.c].map { |s| File.join(dir, "narray", s) }
22
+ srcs += %w[cublas.c driver.c nvrtc.c runtime.c memory_pool.cpp].map { |s| File.join(dir, "cuda", s) }
26
23
  srcs << File.join(dir, "narray", "types/*.c")
27
24
  srcs << "lib/cumo/narray/extra.rb"
28
25
  sh "cd ext/cumo; ruby extconf.rb; make src"
@@ -34,7 +31,7 @@ task :gdb do
34
31
  sh "gdb -x run.gdb --args ruby -I. ./test.rb"
35
32
  end
36
33
 
37
- task :default => [:clobber, :compile, :test]
34
+ task :default => [:compile, :test]
38
35
 
39
36
  desc 'Open an irb session preloaded with the gem library'
40
37
  task :console do
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'benchmark'
2
4
  require 'cumo/narray'
3
5
 
4
6
  num_iteration = 1000
5
7
 
6
8
  Benchmark.bm 20 do |r|
7
- x = Cumo::SFloat.ones([1000,784])
8
- y = Cumo::SFloat.ones([1000,784])
9
+ x = Cumo::SFloat.ones([1000, 784])
10
+ y = Cumo::SFloat.ones([1000, 784])
9
11
  r.report "x.inplace + y" do
10
12
  num_iteration.times do
11
13
  x.inplace + y
@@ -13,8 +15,8 @@ Benchmark.bm 20 do |r|
13
15
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
14
16
  end
15
17
 
16
- x = Cumo::SFloat.ones([1000,784])
17
- y = Cumo::SFloat.ones([1000,784])
18
+ x = Cumo::SFloat.ones([1000, 784])
19
+ y = Cumo::SFloat.ones([1000, 784])
18
20
  r.report "x + y" do
19
21
  num_iteration.times do
20
22
  (x + y).free
@@ -22,8 +24,8 @@ Benchmark.bm 20 do |r|
22
24
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
23
25
  end
24
26
 
25
- x = Cumo::SFloat.ones([1000,784])
26
- y = Cumo::SFloat.ones([1000,784])
27
+ x = Cumo::SFloat.ones([1000, 784])
28
+ y = Cumo::SFloat.ones([1000, 784])
27
29
  r.report "x.inplace + 1.0" do
28
30
  num_iteration.times do
29
31
  x.inplace + 1.0
@@ -31,8 +33,8 @@ Benchmark.bm 20 do |r|
31
33
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
32
34
  end
33
35
 
34
- x = Cumo::SFloat.ones([1000,784])
35
- z = Cumo::SFloat.ones([1000,1])
36
+ x = Cumo::SFloat.ones([1000, 784])
37
+ z = Cumo::SFloat.ones([1000, 1])
36
38
  r.report "x.inplace + z" do
37
39
  num_iteration.times do
38
40
  x.inplace + z
@@ -40,8 +42,8 @@ Benchmark.bm 20 do |r|
40
42
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
41
43
  end
42
44
 
43
- x = Cumo::SFloat.ones([1000,784])
44
- y = Cumo::SFloat.ones([1000,784])
45
+ x = Cumo::SFloat.ones([1000, 784])
46
+ y = Cumo::SFloat.ones([1000, 784])
45
47
  r.report "x.inplace - y" do
46
48
  num_iteration.times do
47
49
  x.inplace - y
@@ -49,8 +51,8 @@ Benchmark.bm 20 do |r|
49
51
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
50
52
  end
51
53
 
52
- x = Cumo::SFloat.ones([1000,784])
53
- y = Cumo::SFloat.ones([1000,784])
54
+ x = Cumo::SFloat.ones([1000, 784])
55
+ y = Cumo::SFloat.ones([1000, 784])
54
56
  r.report "x.inplace - 1.0" do
55
57
  num_iteration.times do
56
58
  x.inplace - 1.0
@@ -58,8 +60,8 @@ Benchmark.bm 20 do |r|
58
60
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
59
61
  end
60
62
 
61
- x = Cumo::SFloat.ones([1000,784])
62
- z = Cumo::SFloat.ones([1000,1])
63
+ x = Cumo::SFloat.ones([1000, 784])
64
+ z = Cumo::SFloat.ones([1000, 1])
63
65
  r.report "x.inplace - z" do
64
66
  num_iteration.times do
65
67
  x.inplace - z
@@ -67,8 +69,8 @@ Benchmark.bm 20 do |r|
67
69
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
68
70
  end
69
71
 
70
- x = Cumo::SFloat.ones([1000,784])
71
- y = Cumo::SFloat.ones([1000,784])
72
+ x = Cumo::SFloat.ones([1000, 784])
73
+ y = Cumo::SFloat.ones([1000, 784])
72
74
  r.report "x.inplace * y" do
73
75
  num_iteration.times do
74
76
  x.inplace * y
@@ -76,8 +78,8 @@ Benchmark.bm 20 do |r|
76
78
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
77
79
  end
78
80
 
79
- x = Cumo::SFloat.ones([1000,784])
80
- y = Cumo::SFloat.ones([1000,784])
81
+ x = Cumo::SFloat.ones([1000, 784])
82
+ y = Cumo::SFloat.ones([1000, 784])
81
83
  r.report "x.inplace * 1.0" do
82
84
  num_iteration.times do
83
85
  x.inplace * 1.0
@@ -85,8 +87,8 @@ Benchmark.bm 20 do |r|
85
87
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
86
88
  end
87
89
 
88
- x = Cumo::SFloat.ones([1000,784])
89
- z = Cumo::SFloat.ones([1000,1])
90
+ x = Cumo::SFloat.ones([1000, 784])
91
+ z = Cumo::SFloat.ones([1000, 1])
90
92
  r.report "x.inplace * z" do
91
93
  num_iteration.times do
92
94
  x.inplace * z
@@ -94,8 +96,8 @@ Benchmark.bm 20 do |r|
94
96
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
95
97
  end
96
98
 
97
- x = Cumo::SFloat.ones([1000,784])
98
- y = Cumo::SFloat.ones([1000,784])
99
+ x = Cumo::SFloat.ones([1000, 784])
100
+ y = Cumo::SFloat.ones([1000, 784])
99
101
  r.report "x.inplace / y" do
100
102
  num_iteration.times do
101
103
  x.inplace / y
@@ -103,8 +105,8 @@ Benchmark.bm 20 do |r|
103
105
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
104
106
  end
105
107
 
106
- x = Cumo::SFloat.ones([1000,784])
107
- y = Cumo::SFloat.ones([1000,784])
108
+ x = Cumo::SFloat.ones([1000, 784])
109
+ y = Cumo::SFloat.ones([1000, 784])
108
110
  r.report "x.inplace / 1.0" do
109
111
  num_iteration.times do
110
112
  x.inplace / 1.0
@@ -112,8 +114,8 @@ Benchmark.bm 20 do |r|
112
114
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
113
115
  end
114
116
 
115
- x = Cumo::SFloat.ones([1000,784])
116
- z = Cumo::SFloat.ones([1000,1])
117
+ x = Cumo::SFloat.ones([1000, 784])
118
+ z = Cumo::SFloat.ones([1000, 1])
117
119
  r.report "x.inplace / z" do
118
120
  num_iteration.times do
119
121
  x.inplace / z