cumo 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +15 -0
  4. data/.rubocop_todo.yml +1272 -0
  5. data/3rd_party/mkmf-cu/Gemfile +2 -0
  6. data/3rd_party/mkmf-cu/Rakefile +2 -1
  7. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
  8. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +36 -7
  9. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
  11. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
  12. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
  13. data/CHANGELOG.md +69 -0
  14. data/Gemfile +6 -1
  15. data/README.md +2 -10
  16. data/Rakefile +8 -11
  17. data/bench/broadcast_fp32.rb +28 -26
  18. data/bench/cumo_bench.rb +18 -16
  19. data/bench/numo_bench.rb +18 -16
  20. data/bench/reduction_fp32.rb +14 -12
  21. data/bin/console +1 -0
  22. data/cumo.gemspec +5 -8
  23. data/ext/cumo/cuda/cudnn.c +2 -2
  24. data/ext/cumo/cumo.c +7 -3
  25. data/ext/cumo/depend.erb +15 -13
  26. data/ext/cumo/extconf.rb +32 -46
  27. data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
  28. data/ext/cumo/include/cumo/intern.h +1 -0
  29. data/ext/cumo/include/cumo/narray.h +13 -1
  30. data/ext/cumo/include/cumo/template.h +2 -4
  31. data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
  32. data/ext/cumo/include/cumo/types/float_macro.h +2 -2
  33. data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
  34. data/ext/cumo/include/cumo.h +2 -2
  35. data/ext/cumo/narray/array.c +3 -3
  36. data/ext/cumo/narray/data.c +23 -2
  37. data/ext/cumo/narray/gen/cogen.rb +8 -7
  38. data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
  39. data/ext/cumo/narray/gen/def/bit.rb +3 -1
  40. data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
  41. data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
  42. data/ext/cumo/narray/gen/def/int16.rb +2 -0
  43. data/ext/cumo/narray/gen/def/int32.rb +2 -0
  44. data/ext/cumo/narray/gen/def/int64.rb +2 -0
  45. data/ext/cumo/narray/gen/def/int8.rb +2 -0
  46. data/ext/cumo/narray/gen/def/robject.rb +2 -0
  47. data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
  48. data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
  49. data/ext/cumo/narray/gen/def/uint16.rb +2 -0
  50. data/ext/cumo/narray/gen/def/uint32.rb +2 -0
  51. data/ext/cumo/narray/gen/def/uint64.rb +2 -0
  52. data/ext/cumo/narray/gen/def/uint8.rb +2 -0
  53. data/ext/cumo/narray/gen/erbln.rb +9 -7
  54. data/ext/cumo/narray/gen/erbpp2.rb +26 -24
  55. data/ext/cumo/narray/gen/narray_def.rb +13 -11
  56. data/ext/cumo/narray/gen/spec.rb +58 -55
  57. data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
  58. data/ext/cumo/narray/gen/tmpl/at.c +34 -0
  59. data/ext/cumo/narray/gen/tmpl/batch_norm.c +1 -1
  60. data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +2 -2
  61. data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
  62. data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
  63. data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
  64. data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +1 -1
  65. data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
  66. data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
  67. data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
  68. data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
  69. data/ext/cumo/narray/gen/tmpl/sort.c +1 -1
  70. data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
  71. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
  72. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
  73. data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
  74. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
  75. data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
  76. data/ext/cumo/narray/index.c +243 -39
  77. data/ext/cumo/narray/index_kernel.cu +84 -0
  78. data/ext/cumo/narray/narray.c +38 -1
  79. data/ext/cumo/narray/ndloop.c +1 -1
  80. data/ext/cumo/narray/struct.c +1 -1
  81. data/lib/cumo/cuda/compile_error.rb +1 -1
  82. data/lib/cumo/cuda/compiler.rb +23 -22
  83. data/lib/cumo/cuda/cudnn.rb +1 -1
  84. data/lib/cumo/cuda/device.rb +1 -1
  85. data/lib/cumo/cuda/link_state.rb +2 -2
  86. data/lib/cumo/cuda/module.rb +1 -2
  87. data/lib/cumo/cuda/nvrtc_program.rb +3 -2
  88. data/lib/cumo/cuda.rb +2 -0
  89. data/lib/cumo/linalg.rb +2 -0
  90. data/lib/cumo/narray/extra.rb +137 -185
  91. data/lib/cumo/narray.rb +2 -0
  92. data/lib/cumo.rb +3 -1
  93. data/test/bit_test.rb +157 -0
  94. data/test/cuda/compiler_test.rb +69 -0
  95. data/test/cuda/device_test.rb +30 -0
  96. data/test/cuda/memory_pool_test.rb +45 -0
  97. data/test/cuda/nvrtc_test.rb +51 -0
  98. data/test/cuda/runtime_test.rb +28 -0
  99. data/test/cudnn_test.rb +498 -0
  100. data/test/cumo_test.rb +27 -0
  101. data/test/narray_test.rb +745 -0
  102. data/test/ractor_test.rb +52 -0
  103. data/test/test_helper.rb +31 -0
  104. metadata +31 -54
  105. data/.travis.yml +0 -5
  106. data/numo-narray-version +0 -1
data/lib/cumo/narray.rb CHANGED
@@ -1,2 +1,4 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # This file is for compatibility with require 'numo/narray'
2
4
  require_relative '../cumo'
data/lib/cumo.rb CHANGED
@@ -1,3 +1,5 @@
1
- require_relative File.join(__dir__, '../ext/cumo/cumo')
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'cumo.so'
2
4
  require_relative 'cumo/cuda'
3
5
  require_relative 'cumo/narray/extra'
data/test/bit_test.rb ADDED
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "test_helper"
4
+
5
+ class BitTest < Test::Unit::TestCase
6
+ dtype = Cumo::Bit
7
+
8
+ test dtype do
9
+ assert { dtype < Cumo::NArray }
10
+ end
11
+
12
+ procs = [
13
+ [proc { |tp, a| tp[*a] }, ""],
14
+ [proc { |tp, a| tp[*a][true] }, "[true]"],
15
+ [proc { |tp, a| tp[*a][0..-1] }, "[0..-1]"]
16
+ ]
17
+ procs.each do |init, ref|
18
+
19
+ test "#{dtype},[0,1,1,0,1,0,0,1]#{ref}" do
20
+ src = [0, 1, 1, 0, 1, 0, 0, 1]
21
+ n = src.size
22
+ a = init.call(dtype, src)
23
+
24
+ assert { a == src }
25
+ assert { (a & 0) == [0] * n }
26
+ assert { (a & 1) == src }
27
+ assert { (a | 0) == src }
28
+ assert { (a | 1) == [1] * n }
29
+ assert { (a ^ 0) == src.map { |x| x ^ 0 } }
30
+ assert { (a ^ 1) == src.map { |x| x ^ 1 } }
31
+ assert { ~a == src.map { |x| 1 - x } }
32
+
33
+ assert { a.count_true == 4 }
34
+ assert { a.count_false == 4 }
35
+ assert { a.where == [1, 2, 4, 7] }
36
+ assert { a.where2 == [[1, 2, 4, 7], [0, 3, 5, 6]] }
37
+ # TODO(sonots): FIX ME
38
+ # assert { a.mask(Cumo::DFloat[1,2,3,4,5,6,7,8]) == [2,3,5,8] }
39
+ assert { !a.all? }
40
+ assert { a.any? }
41
+ assert { !a.none? }
42
+ end
43
+ end
44
+
45
+ procs = [
46
+ [proc { |tp, a| tp[*a] }, ""],
47
+ [proc { |tp, a| tp[*a][true, 0..-1] }, "[true,true]"],
48
+ ]
49
+ procs.each do |init, ref|
50
+
51
+ test "#{dtype},[[0,1,1,0],[1,0,0,1]]#{ref}" do
52
+ src = [[0, 1, 1, 0], [1, 0, 0, 1]]
53
+ a = init.call(dtype, src)
54
+
55
+ assert { a[5] == 0 }
56
+ assert { a[-1] == 1 }
57
+ assert { a[1, 0] == src[1][0] }
58
+ assert { a[1, 1] == src[1][1] }
59
+ assert { a[1, 2] == src[1][2] }
60
+ assert { a[3..4] == [0, 1] }
61
+ assert { a[0, 1..2] == [1, 1] }
62
+ assert { a[0, :*] == src[0] }
63
+ assert { a[1, :*] == src[1] }
64
+ assert { a[:*, 1] == [src[0][1], src[1][1]] }
65
+
66
+ assert { a.count_true == 4 }
67
+ assert { a.count_false == 4 }
68
+ assert { a.where == [1, 2, 4, 7] }
69
+ assert { a.where2 == [[1, 2, 4, 7], [0, 3, 5, 6]] }
70
+ # TODO(sonots): FIX ME
71
+ # assert { a.mask(Cumo::DFloat[[1,2,3,4],[5,6,7,8]]) == [2,3,5,8] }
72
+ assert { !a.all? }
73
+ assert { a.any? }
74
+ assert { !a.none? }
75
+ end
76
+
77
+ test "#{dtype},[[0,1,1,0],[1,0,0,1]]#{ref},aset[]=" do
78
+ src = [[0, 1, 1, 0], [1, 0, 0, 1]]
79
+
80
+ a = init.call(dtype, src)
81
+ a[5] = 1
82
+ assert { a[5] == 1 }
83
+
84
+ a = init.call(dtype, src)
85
+ a[-1] = 0
86
+ assert { a[-1] == 0 }
87
+
88
+ a = init.call(dtype, src)
89
+ a[1, 0] = 0
90
+ assert { a[1, 0] == 0 }
91
+
92
+ a = init.call(dtype, src)
93
+ a[1, 1] = 1
94
+ assert { a[1, 1] == 1 }
95
+
96
+ a = init.call(dtype, src)
97
+ a[1, 2] = 1
98
+ assert { a[1, 2] == 1 }
99
+
100
+ a = init.call(dtype, src)
101
+ a[3..4] = [1, 0]
102
+ assert { a[3..4] == [1, 0] }
103
+
104
+ a = init.call(dtype, src)
105
+ a[0, 1..2] = [0, 0]
106
+ assert { a[0, 1..2] == [0, 0] }
107
+
108
+ a = init.call(dtype, src)
109
+ a[0, :*] = [1, 0, 0, 1]
110
+ assert { a[0, :*] == [1, 0, 0, 1] }
111
+
112
+ a = init.call(dtype, src)
113
+ a[1, :*] = [0, 1, 1, 0]
114
+ assert { a[1, :*] == [0, 1, 1, 0] }
115
+
116
+ a = init.call(dtype, src)
117
+ a[:*, 1] = [0, 1]
118
+ assert { a[:*, 1] == [0, 1] }
119
+
120
+ a = init.call(dtype, src)
121
+ a[5] = dtype.cast(1)
122
+ assert { a[5] == 1 }
123
+ assert { a[5] == dtype.cast(1) }
124
+
125
+ a = init.call(dtype, src)
126
+ a[1, 0] = dtype.cast(0)
127
+ assert { a[1, 0] == 0 }
128
+ assert { a[1, 0] == dtype.cast(0) }
129
+
130
+ a = init.call(dtype, src)
131
+ a[3..4] = dtype.cast([1, 0])
132
+ assert { a[3..4] == [1, 0] }
133
+ assert { a[3..4] == dtype.cast([1, 0]) }
134
+
135
+ a = init.call(dtype, src)
136
+ a[:*, 1] = dtype.cast([0, 1])
137
+ assert { a[:*, 1] == [0, 1] }
138
+ assert { a[:*, 1] == dtype.cast([0, 1]) }
139
+ end
140
+ end
141
+
142
+ test "store to view" do
143
+ n = 14
144
+ x = Cumo::Bit.zeros(n + 2, n + 2, 3)
145
+ ~(x[1..-2, 1..-2, 0].inplace)
146
+ assert { x.where.size == n * n }
147
+
148
+ x1 = Cumo::Bit.ones(n, n)
149
+ x0 = Cumo::Bit.zeros(n, n)
150
+ y0 = Cumo::Bit.zeros(n + 2, n + 2)
151
+ x = Cumo::NArray.dstack([x1, x0, x0])
152
+ y = Cumo::NArray.dstack([y0, y0, y0])
153
+ y[1..-2, 1..-2, true] = x
154
+ assert { (~y[1..-2, 1..-2, 0]).where.size == 0 }
155
+ assert { y[true, true, 1].where.size == 0 }
156
+ end
157
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../test_helper"
4
+
5
+ module Cumo::CUDA
6
+ class CompilerTest < Test::Unit::TestCase
7
+ sub_test_case "valid_kernel_name?" do
8
+ def test_valid
9
+ assert_true(Compiler.valid_kernel_name?('valid_name_1'))
10
+ end
11
+
12
+ def test_empty
13
+ assert_false(Compiler.valid_kernel_name?(''))
14
+ end
15
+
16
+ def test_start_with_digit
17
+ assert_false(Compiler.valid_kernel_name?('0_invalid'))
18
+ end
19
+
20
+ def test_new_line
21
+ assert_false(Compiler.valid_kernel_name?("invalid\nname"))
22
+ end
23
+
24
+ def test_symbol
25
+ assert_false(Compiler.valid_kernel_name?("invalid$name"))
26
+ end
27
+
28
+ def test_space
29
+ assert_false(Compiler.valid_kernel_name?("invalid name"))
30
+ end
31
+ end
32
+
33
+ sub_test_case "compile_using_nvrtc" do
34
+ def test_valid
35
+ compiler = Compiler.new
36
+ source = "__global__ void k() {}\n"
37
+ ptx = compiler.compile_using_nvrtc(source)
38
+ assert { ptx =~ /Generated by NVIDIA NVVM Compiler/ }
39
+ end
40
+
41
+ def test_invalid
42
+ compiler = Compiler.new
43
+ source = "__global__ void k() {something_wrong}\n"
44
+ assert_raise(CompileError) { compiler.compile_using_nvrtc(source) }
45
+ end
46
+ end
47
+
48
+ sub_test_case "compile_with_cache" do
49
+ CACHE_DIR = File.join(__dir__, '.kernel_cache')
50
+
51
+ class << self
52
+ def startup
53
+ FileUtils.rm_rf(CACHE_DIR)
54
+ end
55
+ end
56
+
57
+ def test_valid
58
+ compiler = Compiler.new
59
+ source = "__global__ void k() {}\n"
60
+ assert_nothing_raised { compiler.compile_with_cache(source, cache_dir: CACHE_DIR) }
61
+ end
62
+
63
+ def test_valid_from_cache
64
+ test_valid
65
+ test_valid
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../test_helper"
4
+
5
+ module Cumo::CUDA
6
+ class DeviceTest < Test::Unit::TestCase
7
+ def test_initialize
8
+ assert { Device.new(0).id == 0 }
9
+ assert { Device.new.id.is_a?(Integer) }
10
+ end
11
+
12
+ def test_use
13
+ assert_nothing_raised { Device.new(0).use }
14
+ end
15
+
16
+ def test_with
17
+ Device.new(0).with do
18
+ assert { Device.new.id == 0 }
19
+ end
20
+ end
21
+
22
+ def test_synchronize
23
+ assert_nothing_raised { Device.new.synchronize }
24
+ end
25
+
26
+ def test_compute_capability
27
+ assert { Device.new.compute_capability.size == 2 }
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../test_helper"
4
+
5
+ module Cumo::CUDA
6
+ class MemoryPoolTest < Test::Unit::TestCase
7
+ def setup
8
+ @orig_state = MemoryPool.enabled?
9
+ end
10
+
11
+ def teardown
12
+ @orig_state ? MemoryPool.enable : MemoryPool.disable
13
+ end
14
+
15
+ def test_enable
16
+ MemoryPool.enable
17
+ assert { MemoryPool.enabled? }
18
+ end
19
+
20
+ def test_disable
21
+ MemoryPool.disable
22
+ assert { !MemoryPool.enabled? }
23
+ end
24
+
25
+ def test_free_all_blocks
26
+ assert_nothing_raised { MemoryPool.free_all_blocks }
27
+ end
28
+
29
+ def test_n_free_blocks
30
+ assert_nothing_raised { MemoryPool.n_free_blocks }
31
+ end
32
+
33
+ def test_used_bytes
34
+ assert_nothing_raised { MemoryPool.used_bytes }
35
+ end
36
+
37
+ def test_free_bytes
38
+ assert_nothing_raised { MemoryPool.free_bytes }
39
+ end
40
+
41
+ def test_total_bytes
42
+ assert_nothing_raised { MemoryPool.total_bytes }
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../test_helper"
4
+
5
+ module Cumo::CUDA
6
+ class NVRTCTest < Test::Unit::TestCase
7
+ def test_nvrtcVersion
8
+ major, minor = NVRTC.nvrtcVersion
9
+ assert { major.is_a?(Integer) }
10
+ assert { minor.is_a?(Integer) }
11
+ end
12
+
13
+ def test_nvrtcCreateProgram
14
+ src = "__global__ void k() {}\n"
15
+ name = "simple.cu"
16
+ headers = []
17
+ include_names = []
18
+ assert_nothing_raised do
19
+ NVRTC.nvrtcCreateProgram(src, name, headers, include_names)
20
+ end
21
+ end
22
+
23
+ def test_nvrtcDestroyProgram
24
+ prog = test_nvrtcCreateProgram
25
+ assert_nothing_raised do
26
+ NVRTC.nvrtcDestroyProgram(prog)
27
+ end
28
+ end
29
+
30
+ def test_nvrtcCompileProgram
31
+ prog = test_nvrtcCreateProgram
32
+ options = []
33
+ assert_nothing_raised do
34
+ NVRTC.nvrtcCompileProgram(prog, options)
35
+ end
36
+ prog
37
+ end
38
+
39
+ def test_nvrtcGetPTX
40
+ prog = test_nvrtcCompileProgram
41
+ ptx = NVRTC.nvrtcGetPTX(prog)
42
+ assert { ptx =~ /Generated by NVIDIA NVVM Compiler/ }
43
+ end
44
+
45
+ def test_nvrtcGetProgramLog
46
+ prog = test_nvrtcCompileProgram
47
+ log = NVRTC.nvrtcGetProgramLog(prog)
48
+ assert { log.is_a?(String) }
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../test_helper"
4
+
5
+ module Cumo::CUDA
6
+ class RuntimeTest < Test::Unit::TestCase
7
+ def test_cudaDriverGetVersion
8
+ assert { Runtime.cudaDriverGetVersion.is_a?(Integer) }
9
+ end
10
+
11
+ def test_cudaRuntimeGetVersion
12
+ assert { Runtime.cudaRuntimeGetVersion.is_a?(Integer) }
13
+ end
14
+
15
+ def test_cudaSetDevice_cudaGetDevice
16
+ assert_nothing_raised { Runtime.cudaSetDevice(0) }
17
+ assert { Runtime.cudaGetDevice == 0 }
18
+ end
19
+
20
+ def test_cudaGetDeviceCount
21
+ assert { Runtime.cudaGetDeviceCount.is_a?(Integer) }
22
+ end
23
+
24
+ def test_cudaDeviceSynchronize
25
+ assert_nothing_raised { Runtime.cudaDeviceSynchronize }
26
+ end
27
+ end
28
+ end