cumo 0.4.3 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +15 -0
  4. data/.rubocop_todo.yml +1252 -0
  5. data/3rd_party/mkmf-cu/Gemfile +2 -0
  6. data/3rd_party/mkmf-cu/Rakefile +2 -1
  7. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
  8. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +43 -7
  9. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
  11. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
  12. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
  13. data/CHANGELOG.md +85 -0
  14. data/Dockerfile +34 -0
  15. data/Gemfile +6 -1
  16. data/README.md +2 -10
  17. data/Rakefile +8 -11
  18. data/bench/broadcast_fp32.rb +28 -26
  19. data/bench/cumo_bench.rb +18 -16
  20. data/bench/numo_bench.rb +18 -16
  21. data/bench/reduction_fp32.rb +14 -12
  22. data/bin/console +1 -0
  23. data/cumo.gemspec +6 -9
  24. data/docker-build.sh +4 -0
  25. data/docker-launch.sh +4 -0
  26. data/docs/src-tree.md +1 -1
  27. data/ext/cumo/cuda/cudnn.c +2 -2
  28. data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
  29. data/ext/cumo/cuda/driver.c +8 -0
  30. data/ext/cumo/cumo.c +7 -3
  31. data/ext/cumo/depend.erb +15 -13
  32. data/ext/cumo/extconf.rb +33 -47
  33. data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
  34. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +13 -6
  35. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
  36. data/ext/cumo/include/cumo/intern.h +1 -0
  37. data/ext/cumo/include/cumo/narray.h +13 -1
  38. data/ext/cumo/include/cumo/template.h +2 -4
  39. data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
  40. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
  41. data/ext/cumo/include/cumo/types/float_macro.h +2 -2
  42. data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
  43. data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
  44. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
  45. data/ext/cumo/include/cumo.h +2 -2
  46. data/ext/cumo/narray/array.c +8 -6
  47. data/ext/cumo/narray/data.c +48 -28
  48. data/ext/cumo/narray/gen/cogen.rb +8 -7
  49. data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
  50. data/ext/cumo/narray/gen/def/bit.rb +3 -1
  51. data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
  52. data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
  53. data/ext/cumo/narray/gen/def/int16.rb +2 -0
  54. data/ext/cumo/narray/gen/def/int32.rb +2 -0
  55. data/ext/cumo/narray/gen/def/int64.rb +2 -0
  56. data/ext/cumo/narray/gen/def/int8.rb +2 -0
  57. data/ext/cumo/narray/gen/def/robject.rb +2 -0
  58. data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
  59. data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
  60. data/ext/cumo/narray/gen/def/uint16.rb +2 -0
  61. data/ext/cumo/narray/gen/def/uint32.rb +2 -0
  62. data/ext/cumo/narray/gen/def/uint64.rb +2 -0
  63. data/ext/cumo/narray/gen/def/uint8.rb +2 -0
  64. data/ext/cumo/narray/gen/erbln.rb +9 -7
  65. data/ext/cumo/narray/gen/erbpp2.rb +26 -24
  66. data/ext/cumo/narray/gen/narray_def.rb +13 -11
  67. data/ext/cumo/narray/gen/spec.rb +58 -55
  68. data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
  69. data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
  70. data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
  71. data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
  72. data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
  73. data/ext/cumo/narray/gen/tmpl/at.c +34 -0
  74. data/ext/cumo/narray/gen/tmpl/batch_norm.c +5 -2
  75. data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +6 -3
  76. data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
  77. data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
  78. data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
  79. data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
  80. data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
  81. data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
  82. data/ext/cumo/narray/gen/tmpl/each.c +4 -2
  83. data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
  84. data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +5 -2
  85. data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
  86. data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
  87. data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
  88. data/ext/cumo/narray/gen/tmpl/median.c +2 -2
  89. data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
  90. data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
  91. data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
  92. data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
  93. data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
  94. data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
  95. data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
  96. data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
  97. data/ext/cumo/narray/gen/tmpl/sort.c +3 -3
  98. data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
  99. data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
  100. data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
  101. data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
  102. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
  103. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
  104. data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
  105. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
  106. data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
  107. data/ext/cumo/narray/index.c +244 -40
  108. data/ext/cumo/narray/index_kernel.cu +84 -0
  109. data/ext/cumo/narray/narray.c +57 -19
  110. data/ext/cumo/narray/ndloop.c +1 -1
  111. data/ext/cumo/narray/struct.c +1 -1
  112. data/lib/cumo/cuda/compile_error.rb +1 -1
  113. data/lib/cumo/cuda/compiler.rb +23 -22
  114. data/lib/cumo/cuda/cudnn.rb +1 -1
  115. data/lib/cumo/cuda/device.rb +1 -1
  116. data/lib/cumo/cuda/link_state.rb +2 -2
  117. data/lib/cumo/cuda/module.rb +1 -2
  118. data/lib/cumo/cuda/nvrtc_program.rb +3 -2
  119. data/lib/cumo/cuda.rb +2 -0
  120. data/lib/cumo/linalg.rb +2 -0
  121. data/lib/cumo/narray/extra.rb +297 -341
  122. data/lib/cumo/narray.rb +2 -0
  123. data/lib/cumo.rb +3 -1
  124. data/test/bit_test.rb +157 -0
  125. data/test/cuda/compiler_test.rb +69 -0
  126. data/test/cuda/device_test.rb +31 -0
  127. data/test/cuda/memory_pool_test.rb +45 -0
  128. data/test/cuda/nvrtc_test.rb +51 -0
  129. data/test/cuda/runtime_test.rb +28 -0
  130. data/test/cudnn_test.rb +498 -0
  131. data/test/cumo_test.rb +27 -0
  132. data/test/narray_test.rb +745 -0
  133. data/test/ractor_test.rb +52 -0
  134. data/test/test_helper.rb +31 -0
  135. metadata +34 -54
  136. data/.travis.yml +0 -5
  137. data/numo-narray-version +0 -1
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source "http://rubygems.org"
2
4
 
3
5
  gemspec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'bundler/gem_tasks'
2
4
  require 'rake/testtask'
3
5
  require 'rake/clean'
@@ -8,4 +10,3 @@ end
8
10
  desc "Run tests"
9
11
 
10
12
  task :default => [:test]
11
-
@@ -1,4 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require_relative "../lib/mkmf-cu/cli"
3
5
 
4
6
  MakeMakefileCuda::CLI.new(ARGV).run
@@ -1,3 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
1
4
  require "open3"
2
5
  require_relative "nvcc"
3
6
 
@@ -6,7 +9,7 @@ module MakeMakefileCuda
6
9
  attr_reader :argv
7
10
 
8
11
  def initialize(argv)
9
- @argv = argv.map{|e| e.dup }
12
+ @argv = argv.map { |e| e.dup }
10
13
  end
11
14
 
12
15
  def run
@@ -32,17 +35,41 @@ module MakeMakefileCuda
32
35
  # TODO(sonots): Make it possible to configure "nvcc" and additional arguments
33
36
  def nvcc_command
34
37
  s = MakeMakefileCuda::Nvcc.generate(argv)
35
- cmd = "nvcc " << s
38
+ cmd = "nvcc #{s}"
36
39
  if ENV['CUMO_NVCC_GENERATE_CODE']
37
40
  cmd << " --generate-code=#{ENV['CUMO_NVCC_GENERATE_CODE']}"
38
41
  elsif ENV['DEBUG']
39
42
  cmd << " -arch=sm_35"
40
43
  else
41
- cmd << " --generate-code=arch=compute_35,code=sm_35"
42
- cmd << " --generate-code=arch=compute_50,code=sm_50"
43
- cmd << " --generate-code=arch=compute_60,code=sm_60"
44
- cmd << " --generate-code=arch=compute_70,code=sm_70"
45
- cmd << " --generate-code=arch=compute_70,code=compute_70"
44
+ # Ref. https://en.wikipedia.org/wiki/CUDA
45
+ if cuda_version >= Gem::Version.new("13.0")
46
+ # CUDA 13.0
47
+ capability = [75, 87, 89, 90, 121]
48
+ elsif cuda_version >= Gem::Version.new("12.9")
49
+ # CUDA 12.9
50
+ capability = [50, 60, 70, 75, 87, 89, 90, 121]
51
+ elsif cuda_version >= Gem::Version.new("12.8")
52
+ # CUDA 12.8
53
+ capability = [50, 60, 70, 75, 87, 89, 90, 120]
54
+ elsif cuda_version >= Gem::Version.new("12.0")
55
+ # CUDA 12.0 – 12.6
56
+ capability = [50, 60, 70, 75, 87, 89, 90]
57
+ elsif cuda_version >= Gem::Version.new("11.8")
58
+ # CUDA 11.8
59
+ capability = [35, 50, 60, 70, 75, 87, 89, 90]
60
+ else
61
+ # CUDA 11.0
62
+ capability = [35, 50, 60, 70, 75, 80]
63
+ end
64
+
65
+ if find_executable('nvidia-smi')
66
+ arch_version = `nvidia-smi --query-gpu=compute_cap --format=csv,noheader`.strip
67
+ capability << (arch_version.to_f * 10).to_i unless arch_version.empty?
68
+ end
69
+
70
+ capability.each do |arch|
71
+ cmd << " --generate-code=arch=compute_#{arch},code=sm_#{arch}"
72
+ end
46
73
  end
47
74
  cmd
48
75
  end
@@ -88,5 +115,14 @@ module MakeMakefileCuda
88
115
  raise "#{color_code} is not supported" unless COLOR_CODES[code]
89
116
  "\e[#{COLOR_CODES[code]}m#{str}\e[0m"
90
117
  end
118
+
119
+ def cuda_version
120
+ @cuda_version ||= begin
121
+ output = `nvcc --version`
122
+ if output =~ /Cuda compilation tools, release ([^,]*),/
123
+ Gem::Version.new($1)
124
+ end
125
+ end
126
+ end
91
127
  end
92
128
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "optparse"
2
4
  require "rbconfig"
3
5
 
@@ -29,50 +31,50 @@ module MakeMakefileCuda
29
31
 
30
32
  def build_optparser
31
33
  opt = OptionParser.new
32
- opt_h = Hash.new{|h, k| h[k] = [] }
33
-
34
- opt.on("--arch arg") {|v| opt_h["-arch"] << v }
35
- opt.on("--std arg") {|v| opt_h["-std"] << v }
36
- opt.on("--stdlib arg") {|v| opt_h["-stdlib"] << v }
37
-
38
- opt.on("--Wl arg") {|v| opt_h["-Wl"] << v }
39
-
40
- opt.on('--profile') {|v| opt_h["-pg"] << "" }
41
- opt.on('-g') {|v| opt_h["-g"] << "" }
42
- opt.on('-G', "--device-debug") {|v| opt_h["-G"] << "" }
43
-
44
- opt.on('-I path') {|v| opt_h["-I"] << v }
45
- opt.on('-D flag') {|v| opt_h["-D"] << v }
46
- opt.on('-W flag') {|v| opt_h["-W"] << v }
47
- opt.on('-o output') {|v| opt_h["-o"] << v }
48
- opt.on('-c file') {|v| opt_h["-c"] << v }
49
- opt.on('-f flag') {|v| opt_h["-f"] << v }
50
- opt.on('-l file') {|v| opt_h["-l"] << v }
51
- opt.on('-L path') {|v| opt_h["-L"] << v }
52
- opt.on('-x pat', "--x pat") {|v| opt_h["-x"] << v }
53
- opt.on('-O num'){|v| opt_h["-O"] << v if /[0-9]/ =~ v }
54
- opt.on('--mkmf-cu-ext ext'){|v| opt_h["--mkmf-cu-ext"] << v}
34
+ opt_h = Hash.new { |h, k| h[k] = [] }
35
+
36
+ opt.on("--arch arg") { |v| opt_h["-arch"] << v }
37
+ opt.on("--std arg") { |v| opt_h["-std"] << v }
38
+ opt.on("--stdlib arg") { |v| opt_h["-stdlib"] << v }
39
+
40
+ opt.on("--Wl arg") { |v| opt_h["-Wl"] << v }
41
+
42
+ opt.on('--profile') { |v| opt_h["-pg"] << "" }
43
+ opt.on('-g') { |v| opt_h["-g"] << "" }
44
+ opt.on('-G', "--device-debug") { |v| opt_h["-G"] << "" }
45
+
46
+ opt.on('-I path') { |v| opt_h["-I"] << quote(v) }
47
+ opt.on('-D flag') { |v| opt_h["-D"] << v }
48
+ opt.on('-W flag') { |v| opt_h["-W"] << v }
49
+ opt.on('-o output') { |v| opt_h["-o"] << quote(v) }
50
+ opt.on('-c file') { |v| opt_h["-c"] << quote(v) }
51
+ opt.on('-f flag') { |v| opt_h["-f"] << v }
52
+ opt.on('-l file') { |v| opt_h["-l"] << quote(v) }
53
+ opt.on('-L path') { |v| opt_h["-L"] << quote(v) }
54
+ opt.on('-x pat', "--x pat") { |v| opt_h["-x"] << v }
55
+ opt.on('-O num') { |v| opt_h["-O"] << v if /[0-9]/ =~ v }
56
+ opt.on('--mkmf-cu-ext ext') { |v| opt_h["--mkmf-cu-ext"] << v }
55
57
 
56
58
  return [opt, opt_h]
57
59
  end
58
60
 
59
61
  def parse_ill_short(argv, opt_h)
60
- ["-shared", "-rdynamic", "-dynamic", "-bundle", "-pipe", "-pg", "-ggdb3"].each{|opt|
62
+ ["-shared", "-rdynamic", "-dynamic", "-bundle", "-pipe", "-pg", "-ggdb3"].each { |opt|
61
63
  if ind = argv.find_index(opt)
62
64
  opt_h[opt] << ""
63
65
  argv.delete_at(ind)
64
66
  end
65
67
  }
66
- ["-arch", "-std", "-stdlib"].each{|opt|
68
+ ["-arch", "-std", "-stdlib"].each { |opt|
67
69
  if ind = argv.find_index(opt)
68
70
  argv[ind] = "-" + opt
69
71
  end
70
72
  }
71
73
  end
72
74
 
73
- def parse_ill_short_with_arg(argv, opt_h)
74
- [/\A(\-stdlib)=(.*)/, /\A(\-std)=(.*)/, /\A(\-Wl),(.*)/].each{|reg|
75
- argv.each{|e|
75
+ def parse_ill_short_with_arg(argv, opt_h)
76
+ [/\A(\-stdlib)=(.*)/, /\A(\-std)=(.*)/, /\A(\-Wl),(.*)/].each { |reg|
77
+ argv.each { |e|
76
78
  if reg =~ e
77
79
  e[0..-1] = "-" + $1 + '=' + $2
78
80
  end
@@ -81,14 +83,14 @@ module MakeMakefileCuda
81
83
  end
82
84
 
83
85
  def compiler_option(opt_h)
84
- ret = ""
85
- ["-f", "-W", "-pipe"].each{|op|
86
- opt_h[op].each{|e|
86
+ ret = +""
87
+ ["-f", "-W", "-pipe"].each { |op|
88
+ opt_h[op].each { |e|
87
89
  ret << " --compiler-options " + "#{op}#{e}"
88
90
  }
89
91
  }
90
- ["-stdlib", "-std"].each{|op|
91
- opt_h[op].each{|e|
92
+ ["-stdlib", "-std"].each { |op|
93
+ opt_h[op].each { |e|
92
94
  ret << " --compiler-options " + "#{op}=#{e}"
93
95
  }
94
96
  }
@@ -96,13 +98,13 @@ module MakeMakefileCuda
96
98
  end
97
99
 
98
100
  def linker_option(opt_h)
99
- ret = " -shared "
100
- ["-dynamic", "-bundle"].each{|op|
101
- opt_h[op].each{|e|
101
+ ret = +" -shared "
102
+ ["-dynamic", "-bundle"].each { |op|
103
+ opt_h[op].each { |e|
102
104
  ret << " --linker-options " + op
103
105
  }
104
106
  }
105
- opt_h["-Wl"].each{|e|
107
+ opt_h["-Wl"].each { |e|
106
108
  ret << " --linker-options " + e
107
109
  }
108
110
  return ret
@@ -110,17 +112,17 @@ module MakeMakefileCuda
110
112
 
111
113
  def compiler_bin(opt_h)
112
114
  if opt_h["--mkmf-cu-ext"][0] == "c"
113
- " --compiler-bindir " + RbConfig::CONFIG["CC"]
115
+ " --compiler-bindir " + ENV.fetch("NVCC_CCBIN", RbConfig::CONFIG["CC"])
114
116
  elsif opt_h["--mkmf-cu-ext"][0] == "cxx"
115
- " --compiler-bindir " + RbConfig::CONFIG["CXX"]
117
+ " --compiler-bindir " + ENV.fetch("NVCC_CCBIN", RbConfig::CONFIG["CXX"])
116
118
  end
117
119
  end
118
120
 
119
121
  def generate_compiling_command_line(opt_h)
120
- s = ""
122
+ s = +""
121
123
  # options nvcc can uderstatnd
122
- ["-std", "-pg", "-g", "-G", "-x", "-I", "-D", "-o", "-c", "-O"].each{|op|
123
- opt_h[op].each{|e|
124
+ ["-std", "-pg", "-g", "-G", "-x", "-I", "-D", "-o", "-c", "-O"].each { |op|
125
+ opt_h[op].each { |e|
124
126
  case op
125
127
  when "-o", "-c", "-x", "-std"
126
128
  s << " #{op} #{e}"
@@ -136,9 +138,9 @@ module MakeMakefileCuda
136
138
  end
137
139
 
138
140
  def generate_linking_command_line(argv, opt_h)
139
- s = ""
140
- ["-L", "-l", "-o", "-c", "-O"].each{|op|
141
- opt_h[op].each{|e|
141
+ s = +""
142
+ ["-L", "-l", "-o", "-c", "-O"].each { |op|
143
+ opt_h[op].each { |e|
142
144
  case op
143
145
  when "-o", "-c"
144
146
  s << " #{op} #{e}"
@@ -153,5 +155,9 @@ module MakeMakefileCuda
153
155
  s << compiler_bin(opt_h)
154
156
  return s
155
157
  end
158
+
159
+ def quote(str)
160
+ "\"#{str}\""
161
+ end
156
162
  end
157
163
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "mkmf"
2
4
 
3
5
  module MakeMakefileCuda
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Gem::Specification.new do |s|
2
4
  s.name = 'mkmf-cu'
3
5
  s.version = '0.1.2'
4
6
  s.date = '2016-03-26'
5
7
  s.summary = "Write Ruby extension in C/C++ with NVIDIA CUDA."
6
8
  s.description =
7
- "Write Ruby extension in C/C++ with NVIDIA CUDA. A simple wrapper command for nvcc and a monkey patch for mkmf."
9
+ "Write Ruby extension in C/C++ with NVIDIA CUDA. A simple wrapper command for nvcc and a monkey patch for mkmf."
8
10
  s.authors = ["Takashi Tamura"]
9
11
  s.email = ''
10
12
  s.files = ["lib/mkmf-cu.rb", "lib/mkmf-cu/opt.rb", "LICENSE", "README.md"]
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "test/unit"
2
4
  require "mkmf-cu/opt"
3
5
  require "mkmf-cu"
@@ -23,7 +25,7 @@ class TestMkmfCuOpt < Test::Unit::TestCase
23
25
  end
24
26
 
25
27
  def test_compiler_option
26
- @opt_h.merge!({"-shared"=>[""], "-pipe"=>[""]})
28
+ @opt_h.merge!({"-shared" => [""], "-pipe" => [""]})
27
29
  assert_equal(" --compiler-options -pipe", compiler_option(@opt_h))
28
30
  end
29
31
 
@@ -37,7 +39,7 @@ class TestMkmfCuOpt < Test::Unit::TestCase
37
39
  end
38
40
 
39
41
  def test_linker_option
40
- @opt_h.merge!({"-Wl"=>["-a", "-b"]})
42
+ @opt_h.merge!({"-Wl" => ["-a", "-b"]})
41
43
  assert_equal(" --linker-options -a --linker-options -b",
42
44
  linker_option(@opt_h))
43
45
  end
@@ -50,7 +52,7 @@ class TestMkmfCuOpt < Test::Unit::TestCase
50
52
  end
51
53
 
52
54
  def test_compiler_bin
53
- h = Hash.new{|h, k| h[k] = [] }.merge({"-shared"=>[""], "-pipe"=>[""], "--mkmf-cu-ext"=>["c"]})
55
+ h = Hash.new { |h, k| h[k] = [] }.merge({"-shared" => [""], "-pipe" => [""], "--mkmf-cu-ext" => ["c"]})
54
56
  assert_equal(" --compiler-bindir " + RbConfig::CONFIG["CC"],
55
57
  compiler_bin(h))
56
58
  end
data/CHANGELOG.md CHANGED
@@ -1,3 +1,88 @@
1
+ # 0.5.1 (2025/12/30)
2
+
3
+ Enhancements:
4
+
5
+ * Add CUDA 13 support (#153)
6
+ * Add cuDNN 9 support
7
+
8
+ Fixes:
9
+
10
+ * Backport: fix example code
11
+ * Backport: fix example code
12
+ * Backport: fix doc
13
+ * Backport: fix documents
14
+ * Backport: fix document of logseq
15
+ * Backport: trim comment out
16
+
17
+ # 0.5.0 (2025/11/01)
18
+
19
+ Fixes:
20
+
21
+ * Remove unnecessary numo-narray dependency
22
+ * Fix Errno::EXDEV for Invalid cross-device link
23
+ * Remove clobber from default task
24
+ * Enable parallel build by default
25
+ * Add magic comment for frozen_string_literal
26
+ * Backport: fix na_flatten_dim(): SEGV when flattening an empty narray view
27
+ * Backport: bug in reshape!: stridx in NArrayView should be reconstructed
28
+ * Backport: mask and masked arrays must have the same shape
29
+ * Backport: fix na_parse_range() to suppress warnings
30
+ * Backport: FIXNUM length is based on LONG, not VALUE
31
+ * Backport: fix bug in NArray#sort: qsort() does not support strided loop
32
+ * Backport: fix na_aref_md_protected(): na2->stridx should be zero-inizialized
33
+ * Backport: q[i].idx should be freed when i != ndim-1
34
+ * Backport: fix variable type
35
+ * Backport: add tests for Bit view arrays
36
+ * Backport: fix macro: STORE_BIT STORE_BIT_STEP: requires mask to leave the lowest bit
37
+ * Backport: fix NArray::Bit#any?,all?: empty array should return false
38
+ * Backport: fix NArray::Bit#count_true/false: empty array should return zero
39
+ * Backport: bug in NArray::Bit; fix bit operation in tmpl_bit/{store_bit,unary,binary}.c
40
+ * Fix typo
41
+ * Backport 135: Make all empty arrays equal
42
+ * Backport: minor fixes in na_get_result_dimension(), check_index_count()
43
+ * Backport 116: new method: NArray#fortran_contiguous?
44
+ * Backport 186: Fix NMath.sinc(0)
45
+ * Backport 188: Fix a typo
46
+ * Fix FrozenError
47
+ * Use add_dependency instead of add_runtime_dependency
48
+ * Remove unused variable
49
+ * Remove unused .travis.yml
50
+ * Remove unnecessary require to fix warnings of "loading in progress, circular require considered harmful"
51
+ * Remove unused variable
52
+ * Fix numo-narray library path
53
+ * Add extconf_compile_commands_json as development dependency
54
+ * Add extconf_compile_commands_json for clangd LSP
55
+ * Remove unnecessary loop if disable assert()
56
+ * Fix cross-platform negative value conversion for unsigned integer types
57
+ * Revert "Fix cross-platform negative value conversion for unsigned integer types"
58
+ * Remove unnecessary require
59
+ * Add Ractor support
60
+ * Update minimum CUDA version
61
+ * Update minimum ruby supported version
62
+ * Use rake-compiler
63
+ * Use absolute file path
64
+ * Allow convert nil to NaN in Numo::DFloat.cast
65
+ * Fix cross-platform negative value conversion for unsigned integer types
66
+ * Fix old-style function definitions
67
+ * Fix old-style function definition in qsort.c
68
+ * Add required_ruby_version in gemspec
69
+ * Use released version of power_assert gem
70
+ * Fix LoadError
71
+ * Quoted file path
72
+ * Add CUDA compute capability (#151)
73
+ * extconf.rb: Use File::PATH_SEPARATOR
74
+ * Fix build error with cuDNN features
75
+ * Link c++ library
76
+ * Fix link error with "multiple definition of `cumo_cuda_eCudnnError'"
77
+ * Fix failure with Ruby 3.3
78
+ * Fix keyword argument expansion
79
+ * Remove compute_35 because it was removed at CUDA 12
80
+ * Use NVCC_CCBIN env var to detect compiler for cuda code on GCC 15 environment
81
+ * Fix build error with GCC 15
82
+ * Use rb_cObject instead of rb_cData
83
+ * Remove unnecessary dependency
84
+ * at() method was rewritten in C.
85
+
1
86
  # 0.4.3 (2019-06-11)
2
87
 
3
88
  Fixes:
data/Dockerfile ADDED
@@ -0,0 +1,34 @@
1
+ FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
2
+
3
+ ARG RUBY_VERSION=3.4.7
4
+
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+ ENV RBENV_ROOT="/root/.rbenv"
7
+ ENV PATH="${RBENV_ROOT}/bin:${RBENV_ROOT}/shims:${PATH}"
8
+
9
+ ENV CUDA_PATH=/usr/local/cuda
10
+ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
11
+ ENV CPATH=/usr/local/cuda/include:${CPATH}
12
+ ENV LIBRARY_PATH=/usr/local/cuda/lib64:${LIBRARY_PATH}
13
+
14
+ RUN apt-get update && apt-get install -y --no-install-recommends \
15
+ git \
16
+ build-essential \
17
+ wget \
18
+ curl \
19
+ vim \
20
+ ca-certificates \
21
+ libssl-dev \
22
+ libreadline-dev \
23
+ zlib1g-dev \
24
+ libyaml-dev \
25
+ libffi-dev \
26
+ && rm -rf /var/lib/apt/lists/*
27
+
28
+ RUN git clone --depth 1 https://github.com/rbenv/ruby-build.git && \
29
+ cd ruby-build/bin && ./ruby-build ${RUBY_VERSION} /usr && \
30
+ git config --global --add safe.directory /workspace
31
+
32
+ WORKDIR /workspace
33
+
34
+ CMD ["/bin/bash"]
data/Gemfile CHANGED
@@ -1,8 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source "https://rubygems.org"
2
4
 
3
5
  gemspec
4
6
 
7
+ gem 'extconf_compile_commands_json'
8
+ gem 'rake-compiler'
5
9
  gem 'test-unit'
6
10
  gem 'yard'
7
11
  gem 'pry-byebug'
8
- gem 'power_assert', git: 'https://github.com/k-tsj/power_assert'
12
+ gem 'power_assert'
13
+ gem 'rubocop'
data/README.md CHANGED
@@ -6,9 +6,9 @@ Cumo (pronounced "koomo") is a CUDA-aware, GPU-optimized numerical library that
6
6
 
7
7
  ## Requirements
8
8
 
9
- * Ruby 2.5 or later
9
+ * Ruby 3.0 or later
10
10
  * NVIDIA GPU Compute Capability 3.5 (Kepler) or later
11
- * CUDA 9.0 or later
11
+ * CUDA 11.0 or later
12
12
 
13
13
  ## Preparation
14
14
 
@@ -195,14 +195,6 @@ ln -sf "$HOME/opt/ccache/bin/ccache" "$HOME/opt/ccache/bin/g++"
195
195
  ln -sf "$HOME/opt/ccache/bin/ccache" "$HOME/opt/ccache/bin/nvcc"
196
196
  ```
197
197
 
198
- ### Build in parallel
199
-
200
- Set `MAKEFLAGS` to specify `make` command options. You can build in parallel as:
201
-
202
- ```
203
- bundle exec env MAKEFLAG=-j8 rake compile
204
- ```
205
-
206
198
  ### Specify nvcc --generate-code options
207
199
 
208
200
  ```
data/Rakefile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "bundler/gem_tasks"
2
4
  require "rake/testtask"
3
5
 
@@ -7,22 +9,17 @@ Rake::TestTask.new(:test) do |t|
7
9
  t.test_files = FileList["test/**/*_test.rb"]
8
10
  end
9
11
 
10
- task :compile do
11
- sh 'cd ext/cumo && ruby extconf.rb && make && make build-ctest'
12
- end
12
+ require "rake/extensiontask"
13
+ Rake::ExtensionTask.new("cumo")
13
14
 
14
15
  task :ctest do
15
- sh 'cd ext/cumo && ruby extconf.rb && make run-ctest'
16
- end
17
-
18
- task :clean do
19
- sh 'cd ext/cumo && make clean'
16
+ sh 'cd ext/cumo && ruby extconf.rb && make && make build-ctest && make run-ctest'
20
17
  end
21
18
 
22
19
  task :docs do
23
20
  dir = "ext/cumo"
24
- srcs = %w[array.c data.c index.c math.c narray.c rand.c struct.c].map{|s| File.join(dir, "narray", s)}
25
- srcs += %w[cublas.c driver.c nvrtc.c runtime.c memory_pool.cpp].map{|s| File.join(dir, "cuda", s) }
21
+ srcs = %w[array.c data.c index.c math.c narray.c rand.c struct.c].map { |s| File.join(dir, "narray", s) }
22
+ srcs += %w[cublas.c driver.c nvrtc.c runtime.c memory_pool.cpp].map { |s| File.join(dir, "cuda", s) }
26
23
  srcs << File.join(dir, "narray", "types/*.c")
27
24
  srcs << "lib/cumo/narray/extra.rb"
28
25
  sh "cd ext/cumo; ruby extconf.rb; make src"
@@ -34,7 +31,7 @@ task :gdb do
34
31
  sh "gdb -x run.gdb --args ruby -I. ./test.rb"
35
32
  end
36
33
 
37
- task :default => [:clobber, :compile, :test]
34
+ task :default => [:compile, :test]
38
35
 
39
36
  desc 'Open an irb session preloaded with the gem library'
40
37
  task :console do