cumo 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +15 -0
  4. data/.rubocop_todo.yml +1272 -0
  5. data/3rd_party/mkmf-cu/Gemfile +2 -0
  6. data/3rd_party/mkmf-cu/Rakefile +2 -1
  7. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
  8. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +36 -7
  9. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
  11. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
  12. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
  13. data/CHANGELOG.md +69 -0
  14. data/Gemfile +6 -1
  15. data/README.md +2 -10
  16. data/Rakefile +8 -11
  17. data/bench/broadcast_fp32.rb +28 -26
  18. data/bench/cumo_bench.rb +18 -16
  19. data/bench/numo_bench.rb +18 -16
  20. data/bench/reduction_fp32.rb +14 -12
  21. data/bin/console +1 -0
  22. data/cumo.gemspec +5 -8
  23. data/ext/cumo/cuda/cudnn.c +2 -2
  24. data/ext/cumo/cumo.c +7 -3
  25. data/ext/cumo/depend.erb +15 -13
  26. data/ext/cumo/extconf.rb +32 -46
  27. data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
  28. data/ext/cumo/include/cumo/intern.h +1 -0
  29. data/ext/cumo/include/cumo/narray.h +13 -1
  30. data/ext/cumo/include/cumo/template.h +2 -4
  31. data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
  32. data/ext/cumo/include/cumo/types/float_macro.h +2 -2
  33. data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
  34. data/ext/cumo/include/cumo.h +2 -2
  35. data/ext/cumo/narray/array.c +3 -3
  36. data/ext/cumo/narray/data.c +23 -2
  37. data/ext/cumo/narray/gen/cogen.rb +8 -7
  38. data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
  39. data/ext/cumo/narray/gen/def/bit.rb +3 -1
  40. data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
  41. data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
  42. data/ext/cumo/narray/gen/def/int16.rb +2 -0
  43. data/ext/cumo/narray/gen/def/int32.rb +2 -0
  44. data/ext/cumo/narray/gen/def/int64.rb +2 -0
  45. data/ext/cumo/narray/gen/def/int8.rb +2 -0
  46. data/ext/cumo/narray/gen/def/robject.rb +2 -0
  47. data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
  48. data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
  49. data/ext/cumo/narray/gen/def/uint16.rb +2 -0
  50. data/ext/cumo/narray/gen/def/uint32.rb +2 -0
  51. data/ext/cumo/narray/gen/def/uint64.rb +2 -0
  52. data/ext/cumo/narray/gen/def/uint8.rb +2 -0
  53. data/ext/cumo/narray/gen/erbln.rb +9 -7
  54. data/ext/cumo/narray/gen/erbpp2.rb +26 -24
  55. data/ext/cumo/narray/gen/narray_def.rb +13 -11
  56. data/ext/cumo/narray/gen/spec.rb +58 -55
  57. data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
  58. data/ext/cumo/narray/gen/tmpl/at.c +34 -0
  59. data/ext/cumo/narray/gen/tmpl/batch_norm.c +1 -1
  60. data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +2 -2
  61. data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
  62. data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
  63. data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
  64. data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +1 -1
  65. data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
  66. data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
  67. data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
  68. data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
  69. data/ext/cumo/narray/gen/tmpl/sort.c +1 -1
  70. data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
  71. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
  72. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
  73. data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
  74. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
  75. data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
  76. data/ext/cumo/narray/index.c +243 -39
  77. data/ext/cumo/narray/index_kernel.cu +84 -0
  78. data/ext/cumo/narray/narray.c +38 -1
  79. data/ext/cumo/narray/ndloop.c +1 -1
  80. data/ext/cumo/narray/struct.c +1 -1
  81. data/lib/cumo/cuda/compile_error.rb +1 -1
  82. data/lib/cumo/cuda/compiler.rb +23 -22
  83. data/lib/cumo/cuda/cudnn.rb +1 -1
  84. data/lib/cumo/cuda/device.rb +1 -1
  85. data/lib/cumo/cuda/link_state.rb +2 -2
  86. data/lib/cumo/cuda/module.rb +1 -2
  87. data/lib/cumo/cuda/nvrtc_program.rb +3 -2
  88. data/lib/cumo/cuda.rb +2 -0
  89. data/lib/cumo/linalg.rb +2 -0
  90. data/lib/cumo/narray/extra.rb +137 -185
  91. data/lib/cumo/narray.rb +2 -0
  92. data/lib/cumo.rb +3 -1
  93. data/test/bit_test.rb +157 -0
  94. data/test/cuda/compiler_test.rb +69 -0
  95. data/test/cuda/device_test.rb +30 -0
  96. data/test/cuda/memory_pool_test.rb +45 -0
  97. data/test/cuda/nvrtc_test.rb +51 -0
  98. data/test/cuda/runtime_test.rb +28 -0
  99. data/test/cudnn_test.rb +498 -0
  100. data/test/cumo_test.rb +27 -0
  101. data/test/narray_test.rb +745 -0
  102. data/test/ractor_test.rb +52 -0
  103. data/test/test_helper.rb +31 -0
  104. metadata +31 -54
  105. data/.travis.yml +0 -5
  106. data/numo-narray-version +0 -1
@@ -812,7 +812,7 @@ nst_s_add_type(int argc, VALUE *argv, VALUE mod)
812
812
 
813
813
  #define NST_TYPEDEF(tpname,tpclass) \
814
814
  static VALUE \
815
- nst_s_##tpname(VALUE argc, VALUE *argv, VALUE mod) \
815
+ nst_s_##tpname(int argc, VALUE *argv, VALUE mod) \
816
816
  { nstruct_add_type(tpclass,argc,argv,mod); \
817
817
  return Qnil; \
818
818
  }
@@ -1,4 +1,4 @@
1
- require_relative '../cuda'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Cumo::CUDA
4
4
  class CompileError < StandardError
@@ -1,32 +1,33 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'tmpdir'
2
4
  require 'tempfile'
3
5
  require 'fileutils'
4
6
  require 'digest/md5'
5
- require_relative '../cuda'
6
7
 
7
8
  module Cumo::CUDA
8
9
  class Compiler
9
10
  VALID_KERNEL_NAME = /\A[a-zA-Z_][a-zA-Z_0-9]*\z/
10
11
  DEFAULT_CACHE_DIR = File.expand_path('~/.cumo/kernel_cache')
11
-
12
+
12
13
  @@empty_file_preprocess_cache ||= {}
13
-
14
+
14
15
  def self.valid_kernel_name?(name)
15
16
  VALID_KERNEL_NAME.match?(name)
16
17
  end
17
-
18
+
18
19
  def compile_using_nvrtc(source, options: [], arch: nil)
19
20
  arch ||= get_arch
20
21
  options += ["-arch=#{arch}"]
21
-
22
+
22
23
  Dir.mktmpdir do |root_dir|
23
24
  path = File.join(root_dir, 'kern')
24
25
  cu_path = "#{path}.cu"
25
-
26
+
26
27
  File.open(cu_path, 'w') do |cu_file|
27
28
  cu_file.write(source)
28
29
  end
29
-
30
+
30
31
  prog = NVRTCProgram.new(source, name: cu_path)
31
32
  begin
32
33
  ptx = prog.compile(options: options)
@@ -41,12 +42,12 @@ module Cumo::CUDA
41
42
  return ptx
42
43
  end
43
44
  end
44
-
45
+
45
46
  def compile_with_cache(source, options: [], arch: nil, cache_dir: nil, extra_source: nil)
46
47
  # NVRTC does not use extra_source. extra_source is used for cache key.
47
48
  cache_dir ||= get_cache_dir
48
49
  arch ||= get_arch
49
-
50
+
50
51
  options += ['-ftz=true']
51
52
 
52
53
  env = [arch, options, get_nvrtc_version]
@@ -57,15 +58,15 @@ module Cumo::CUDA
57
58
  @@empty_file_preprocess_cache[env] = base
58
59
  end
59
60
  key_src = "#{env} #{base} #{source} #{extra_source}"
60
-
61
+
61
62
  key_src.encode!('utf-8')
62
63
  digest = Digest::MD5.hexdigest(key_src)
63
64
  name = "#{digest}_2.cubin"
64
-
65
+
65
66
  unless Dir.exist?(cache_dir)
66
67
  FileUtils.mkdir_p(cache_dir)
67
68
  end
68
-
69
+
69
70
  # TODO(sonots): thread-safe?
70
71
  path = File.join(cache_dir, name)
71
72
  cubin = load_cache(path)
@@ -74,7 +75,7 @@ module Cumo::CUDA
74
75
  mod.load(cubin)
75
76
  return mod
76
77
  end
77
-
78
+
78
79
  ptx = compile_using_nvrtc(source, options: options, arch: arch)
79
80
  cubin = nil
80
81
  cubin_hash = nil
@@ -85,7 +86,7 @@ module Cumo::CUDA
85
86
  end
86
87
 
87
88
  save_cache(path, cubin_hash, cubin)
88
-
89
+
89
90
  # Save .cu source file along with .cubin
90
91
  if get_bool_env_variable('CUMO_CACHE_SAVE_CUDA_SOURCE', false)
91
92
  File.open("#{path}.cu", 'w') do |f|
@@ -97,7 +98,7 @@ module Cumo::CUDA
97
98
  mod.load(cubin)
98
99
  return mod
99
100
  end
100
-
101
+
101
102
  private
102
103
 
103
104
  def save_cache(path, cubin_hash, cubin)
@@ -105,7 +106,7 @@ module Cumo::CUDA
105
106
  tf.write(cubin_hash)
106
107
  tf.write(cubin)
107
108
  temp_path = tf.path
108
- File.rename(temp_path, path)
109
+ FileUtils.mv(temp_path, path)
109
110
  end
110
111
 
111
112
  def load_cache(path)
@@ -121,29 +122,29 @@ module Cumo::CUDA
121
122
  end
122
123
  nil
123
124
  end
124
-
125
+
125
126
  def get_cache_dir
126
127
  ENV.fetch('CUMO_CACHE_DIR', DEFAULT_CACHE_DIR)
127
128
  end
128
-
129
+
129
130
  def get_nvrtc_version
130
131
  @@nvrtc_version ||= NVRTC.nvrtcVersion
131
132
  end
132
-
133
+
133
134
  def get_arch
134
135
  cc = Device.new.compute_capability
135
136
  "compute_#{cc}"
136
137
  end
137
-
138
+
138
139
  def get_bool_env_variable(name, default)
139
140
  val = ENV[name]
140
141
  return default if val.nil? or val.size == 0
141
142
  Integer(val) == 1 rescue false
142
143
  end
143
-
144
+
144
145
  def preprocess(source, options, arch)
145
146
  options += ["-arch=#{arch}"]
146
-
147
+
147
148
  prog = NVRTCProgram.new(source, name: '')
148
149
  begin
149
150
  result = prog.compile(options: options)
@@ -1,4 +1,4 @@
1
- require 'cumo'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Cumo
4
4
  [SFloat, DFloat].each do |klass|
@@ -1,4 +1,4 @@
1
- require_relative '../cuda'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Cumo::CUDA
4
4
  class Device
@@ -1,4 +1,4 @@
1
- require_relative '../cuda'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Cumo::CUDA
4
4
  # CUDA link state.
@@ -25,7 +25,7 @@ module Cumo::CUDA
25
25
  end
26
26
 
27
27
  def complete
28
- cubin = Driver.cuLinkComplete(@ptr)
28
+ Driver.cuLinkComplete(@ptr)
29
29
  end
30
30
  end
31
31
  end
@@ -1,4 +1,4 @@
1
- require_relative '../cuda'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Cumo::CUDA
4
4
  # CUDA kernel module.
@@ -37,4 +37,3 @@ module Cumo::CUDA
37
37
  end
38
38
  end
39
39
  end
40
-
@@ -1,4 +1,5 @@
1
- require_relative '../cuda'
1
+ # frozen_string_literal: true
2
+
2
3
  require_relative 'compile_error'
3
4
 
4
5
  module Cumo::CUDA
@@ -18,7 +19,7 @@ module Cumo::CUDA
18
19
  begin
19
20
  NVRTC.nvrtcCompileProgram(@ptr, options)
20
21
  return NVRTC.nvrtcGetPTX(@ptr)
21
- rescue NVRTCError => e
22
+ rescue NVRTCError
22
23
  log = NVRTC.nvrtcGetProgramLog(@ptr)
23
24
  raise CompileError.new(log, @src, @name, options)
24
25
  end
data/lib/cumo/cuda.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Cumo
2
4
  module CUDA
3
5
  end
data/lib/cumo/linalg.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'cumo'
2
4
 
3
5
  # Provide compatibility layers with numo/linalg