cumo 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +15 -0
  4. data/.rubocop_todo.yml +1272 -0
  5. data/3rd_party/mkmf-cu/Gemfile +2 -0
  6. data/3rd_party/mkmf-cu/Rakefile +2 -1
  7. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
  8. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +36 -7
  9. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
  11. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
  12. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
  13. data/CHANGELOG.md +69 -0
  14. data/Gemfile +6 -1
  15. data/README.md +2 -10
  16. data/Rakefile +8 -11
  17. data/bench/broadcast_fp32.rb +28 -26
  18. data/bench/cumo_bench.rb +18 -16
  19. data/bench/numo_bench.rb +18 -16
  20. data/bench/reduction_fp32.rb +14 -12
  21. data/bin/console +1 -0
  22. data/cumo.gemspec +5 -8
  23. data/ext/cumo/cuda/cudnn.c +2 -2
  24. data/ext/cumo/cumo.c +7 -3
  25. data/ext/cumo/depend.erb +15 -13
  26. data/ext/cumo/extconf.rb +32 -46
  27. data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
  28. data/ext/cumo/include/cumo/intern.h +1 -0
  29. data/ext/cumo/include/cumo/narray.h +13 -1
  30. data/ext/cumo/include/cumo/template.h +2 -4
  31. data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
  32. data/ext/cumo/include/cumo/types/float_macro.h +2 -2
  33. data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
  34. data/ext/cumo/include/cumo.h +2 -2
  35. data/ext/cumo/narray/array.c +3 -3
  36. data/ext/cumo/narray/data.c +23 -2
  37. data/ext/cumo/narray/gen/cogen.rb +8 -7
  38. data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
  39. data/ext/cumo/narray/gen/def/bit.rb +3 -1
  40. data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
  41. data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
  42. data/ext/cumo/narray/gen/def/int16.rb +2 -0
  43. data/ext/cumo/narray/gen/def/int32.rb +2 -0
  44. data/ext/cumo/narray/gen/def/int64.rb +2 -0
  45. data/ext/cumo/narray/gen/def/int8.rb +2 -0
  46. data/ext/cumo/narray/gen/def/robject.rb +2 -0
  47. data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
  48. data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
  49. data/ext/cumo/narray/gen/def/uint16.rb +2 -0
  50. data/ext/cumo/narray/gen/def/uint32.rb +2 -0
  51. data/ext/cumo/narray/gen/def/uint64.rb +2 -0
  52. data/ext/cumo/narray/gen/def/uint8.rb +2 -0
  53. data/ext/cumo/narray/gen/erbln.rb +9 -7
  54. data/ext/cumo/narray/gen/erbpp2.rb +26 -24
  55. data/ext/cumo/narray/gen/narray_def.rb +13 -11
  56. data/ext/cumo/narray/gen/spec.rb +58 -55
  57. data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
  58. data/ext/cumo/narray/gen/tmpl/at.c +34 -0
  59. data/ext/cumo/narray/gen/tmpl/batch_norm.c +1 -1
  60. data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +2 -2
  61. data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
  62. data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
  63. data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
  64. data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +1 -1
  65. data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
  66. data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
  67. data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
  68. data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
  69. data/ext/cumo/narray/gen/tmpl/sort.c +1 -1
  70. data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
  71. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
  72. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
  73. data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
  74. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
  75. data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
  76. data/ext/cumo/narray/index.c +243 -39
  77. data/ext/cumo/narray/index_kernel.cu +84 -0
  78. data/ext/cumo/narray/narray.c +38 -1
  79. data/ext/cumo/narray/ndloop.c +1 -1
  80. data/ext/cumo/narray/struct.c +1 -1
  81. data/lib/cumo/cuda/compile_error.rb +1 -1
  82. data/lib/cumo/cuda/compiler.rb +23 -22
  83. data/lib/cumo/cuda/cudnn.rb +1 -1
  84. data/lib/cumo/cuda/device.rb +1 -1
  85. data/lib/cumo/cuda/link_state.rb +2 -2
  86. data/lib/cumo/cuda/module.rb +1 -2
  87. data/lib/cumo/cuda/nvrtc_program.rb +3 -2
  88. data/lib/cumo/cuda.rb +2 -0
  89. data/lib/cumo/linalg.rb +2 -0
  90. data/lib/cumo/narray/extra.rb +137 -185
  91. data/lib/cumo/narray.rb +2 -0
  92. data/lib/cumo.rb +3 -1
  93. data/test/bit_test.rb +157 -0
  94. data/test/cuda/compiler_test.rb +69 -0
  95. data/test/cuda/device_test.rb +30 -0
  96. data/test/cuda/memory_pool_test.rb +45 -0
  97. data/test/cuda/nvrtc_test.rb +51 -0
  98. data/test/cuda/runtime_test.rb +28 -0
  99. data/test/cudnn_test.rb +498 -0
  100. data/test/cumo_test.rb +27 -0
  101. data/test/narray_test.rb +745 -0
  102. data/test/ractor_test.rb +52 -0
  103. data/test/test_helper.rb +31 -0
  104. metadata +31 -54
  105. data/.travis.yml +0 -5
  106. data/numo-narray-version +0 -1
data/bench/cumo_bench.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'cumo/narray'
2
4
  require 'benchmark'
3
5
 
@@ -5,7 +7,7 @@ NUM = (ARGV.first || 100).to_i
5
7
 
6
8
  # warm up
7
9
  a = Cumo::Float32.new(10).seq(1)
8
- b = Cumo::Float32.new(10).seq(10,10)
10
+ b = Cumo::Float32.new(10).seq(10, 10)
9
11
  c = a + b
10
12
  c.free
11
13
 
@@ -14,7 +16,7 @@ def elementwise(num = nil)
14
16
  puts "elementwise(#{num})"
15
17
  Benchmark.bm do |r|
16
18
  a = Cumo::Float32.new(10000).seq(1)
17
- b = Cumo::Float32.new(10000).seq(10,10)
19
+ b = Cumo::Float32.new(10000).seq(10, 10)
18
20
  (a + b).free # warm up
19
21
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
20
22
  r.report('10**4') do
@@ -25,7 +27,7 @@ def elementwise(num = nil)
25
27
  end
26
28
 
27
29
  a = Cumo::Float32.new(100000).seq(1)
28
- b = Cumo::Float32.new(100000).seq(10,10)
30
+ b = Cumo::Float32.new(100000).seq(10, 10)
29
31
  (a + b).free # warm up
30
32
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
31
33
  r.report('10**5') do
@@ -36,7 +38,7 @@ def elementwise(num = nil)
36
38
  end
37
39
 
38
40
  a = Cumo::Float32.new(1000000).seq(1)
39
- b = Cumo::Float32.new(1000000).seq(10,10)
41
+ b = Cumo::Float32.new(1000000).seq(10, 10)
40
42
  (a + b).free # warm up
41
43
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
42
44
  r.report('10**6') do
@@ -47,7 +49,7 @@ def elementwise(num = nil)
47
49
  end
48
50
 
49
51
  a = Cumo::Float32.new(10000000).seq(1)
50
- b = Cumo::Float32.new(10000000).seq(10,10)
52
+ b = Cumo::Float32.new(10000000).seq(10, 10)
51
53
  (a + b).free # warm up
52
54
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
53
55
  r.report('10**7') do
@@ -58,7 +60,7 @@ def elementwise(num = nil)
58
60
  end
59
61
 
60
62
  a = Cumo::Float32.new(100000000).seq(1)
61
- b = Cumo::Float32.new(100000000).seq(10,10)
63
+ b = Cumo::Float32.new(100000000).seq(10, 10)
62
64
  (a + b).free # warm up
63
65
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
64
66
  r.report('10**8') do
@@ -130,8 +132,8 @@ def dot(num = nil)
130
132
  num ||= 1
131
133
  puts "dot(#{num})"
132
134
  Benchmark.bm do |r|
133
- a = Cumo::Float32.new(100,100).seq(1)
134
- b = Cumo::Float32.new(100,100).seq(10,10)
135
+ a = Cumo::Float32.new(100, 100).seq(1)
136
+ b = Cumo::Float32.new(100, 100).seq(10, 10)
135
137
  a.dot(b).free # warm up
136
138
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
137
139
  r.report('10**4') do
@@ -141,8 +143,8 @@ def dot(num = nil)
141
143
  end
142
144
  end
143
145
 
144
- a = Cumo::Float32.new(100,1000).seq(1)
145
- b = Cumo::Float32.new(1000,100).seq(10,10)
146
+ a = Cumo::Float32.new(100, 1000).seq(1)
147
+ b = Cumo::Float32.new(1000, 100).seq(10, 10)
146
148
  a.dot(b).free # warm up
147
149
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
148
150
  r.report('10**5') do
@@ -152,8 +154,8 @@ def dot(num = nil)
152
154
  end
153
155
  end
154
156
 
155
- a = Cumo::Float32.new(100,10000).seq(1)
156
- b = Cumo::Float32.new(10000,100).seq(10,10)
157
+ a = Cumo::Float32.new(100, 10000).seq(1)
158
+ b = Cumo::Float32.new(10000, 100).seq(10, 10)
157
159
  a.dot(b).free # warm up
158
160
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
159
161
  r.report('10**6') do
@@ -163,8 +165,8 @@ def dot(num = nil)
163
165
  end
164
166
  end
165
167
 
166
- a = Cumo::Float32.new(100,100000).seq(1)
167
- b = Cumo::Float32.new(100000,100).seq(10,10)
168
+ a = Cumo::Float32.new(100, 100000).seq(1)
169
+ b = Cumo::Float32.new(100000, 100).seq(10, 10)
168
170
  a.dot(b).free # warm up
169
171
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
170
172
  r.report('10**7') do
@@ -174,8 +176,8 @@ def dot(num = nil)
174
176
  end
175
177
  end
176
178
 
177
- a = Cumo::Float32.new(100,1000000).seq(1)
178
- b = Cumo::Float32.new(1000000,100).seq(10,10)
179
+ a = Cumo::Float32.new(100, 1000000).seq(1)
180
+ b = Cumo::Float32.new(1000000, 100).seq(10, 10)
179
181
  a.dot(b).free # warm up
180
182
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
181
183
  r.report('10**8') do
data/bench/numo_bench.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'numo/narray'
2
4
  require 'benchmark'
3
5
 
@@ -5,7 +7,7 @@ NUM = (ARGV.first || 100).to_i
5
7
 
6
8
  # warm up
7
9
  a = Numo::Float32.new(10).seq(1)
8
- b = Numo::Float32.new(10).seq(10,10)
10
+ b = Numo::Float32.new(10).seq(10, 10)
9
11
  c = a + b
10
12
 
11
13
  def elementwise(num = nil)
@@ -13,31 +15,31 @@ def elementwise(num = nil)
13
15
  puts "elementwise(#{num})"
14
16
  Benchmark.bm do |r|
15
17
  a = Numo::Float32.new(10000).seq(1)
16
- b = Numo::Float32.new(10000).seq(10,10)
18
+ b = Numo::Float32.new(10000).seq(10, 10)
17
19
  r.report('10**4') do
18
20
  NUM.times { (a + b) }
19
21
  end
20
22
 
21
23
  a = Numo::Float32.new(100000).seq(1)
22
- b = Numo::Float32.new(100000).seq(10,10)
24
+ b = Numo::Float32.new(100000).seq(10, 10)
23
25
  r.report('10**5') do
24
26
  NUM.times { (a + b) }
25
27
  end
26
28
 
27
29
  a = Numo::Float32.new(1000000).seq(1)
28
- b = Numo::Float32.new(1000000).seq(10,10)
30
+ b = Numo::Float32.new(1000000).seq(10, 10)
29
31
  r.report('10**6') do
30
32
  NUM.times { (a + b) }
31
33
  end
32
34
 
33
35
  a = Numo::Float32.new(10000000).seq(1)
34
- b = Numo::Float32.new(10000000).seq(10,10)
36
+ b = Numo::Float32.new(10000000).seq(10, 10)
35
37
  r.report('10**7') do
36
38
  NUM.times { (a + b) }
37
39
  end
38
40
 
39
41
  a = Numo::Float32.new(100000000).seq(1)
40
- b = Numo::Float32.new(100000000).seq(10,10)
42
+ b = Numo::Float32.new(100000000).seq(10, 10)
41
43
  r.report('10**8') do
42
44
  NUM.times { (a + b) }
43
45
  end
@@ -79,32 +81,32 @@ def dot(num = nil)
79
81
  num ||= 1
80
82
  puts "dot(#{num})"
81
83
  Benchmark.bm do |r|
82
- a = Numo::Float32.new(100,100).seq(1)
83
- b = Numo::Float32.new(100,100).seq(10,10)
84
+ a = Numo::Float32.new(100, 100).seq(1)
85
+ b = Numo::Float32.new(100, 100).seq(10, 10)
84
86
  r.report('10**4') do
85
87
  num.times { a.dot(b) }
86
88
  end
87
89
 
88
- a = Numo::Float32.new(100,1000).seq(1)
89
- b = Numo::Float32.new(1000,100).seq(10,10)
90
+ a = Numo::Float32.new(100, 1000).seq(1)
91
+ b = Numo::Float32.new(1000, 100).seq(10, 10)
90
92
  r.report('10**5') do
91
93
  num.times { a.dot(b) }
92
94
  end
93
95
 
94
- a = Numo::Float32.new(100,10000).seq(1)
95
- b = Numo::Float32.new(10000,100).seq(10,10)
96
+ a = Numo::Float32.new(100, 10000).seq(1)
97
+ b = Numo::Float32.new(10000, 100).seq(10, 10)
96
98
  r.report('10**6') do
97
99
  num.times { a.dot(b) }
98
100
  end
99
101
 
100
- a = Numo::Float32.new(100,100000).seq(1)
101
- b = Numo::Float32.new(100000,100).seq(10,10)
102
+ a = Numo::Float32.new(100, 100000).seq(1)
103
+ b = Numo::Float32.new(100000, 100).seq(10, 10)
102
104
  r.report('10**7') do
103
105
  num.times { a.dot(b) }
104
106
  end
105
107
 
106
- a = Numo::Float32.new(100,1000000).seq(1)
107
- b = Numo::Float32.new(1000000,100).seq(10,10)
108
+ a = Numo::Float32.new(100, 1000000).seq(1)
109
+ b = Numo::Float32.new(1000000, 100).seq(10, 10)
108
110
  r.report('10**8') do
109
111
  num.times { a.dot(b) }
110
112
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'benchmark'
2
4
  require 'cumo/narray'
3
5
 
@@ -5,7 +7,7 @@ num_iteration = 100
5
7
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
6
8
 
7
9
  Benchmark.bm 30 do |r|
8
- x = Cumo::SFloat.ones([500,500])
10
+ x = Cumo::SFloat.ones([500, 500])
9
11
  r.report "x.sum" do
10
12
  num_iteration.times do
11
13
  x.sum
@@ -13,7 +15,7 @@ Benchmark.bm 30 do |r|
13
15
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
14
16
  end
15
17
 
16
- x = Cumo::SFloat.ones([500,500])
18
+ x = Cumo::SFloat.ones([500, 500])
17
19
  r.report "x.sum(axis: 0)" do
18
20
  num_iteration.times do
19
21
  x.sum(axis: 0)
@@ -21,7 +23,7 @@ Benchmark.bm 30 do |r|
21
23
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
22
24
  end
23
25
 
24
- x = Cumo::SFloat.ones([500,500])
26
+ x = Cumo::SFloat.ones([500, 500])
25
27
  r.report "x.sum(axis: 1)" do
26
28
  num_iteration.times do
27
29
  x.sum(axis: 1)
@@ -29,7 +31,7 @@ Benchmark.bm 30 do |r|
29
31
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
30
32
  end
31
33
 
32
- x = Cumo::SFloat.ones([500,500])
34
+ x = Cumo::SFloat.ones([500, 500])
33
35
  r.report "x.sum(keepdims: true)" do
34
36
  num_iteration.times do
35
37
  x.sum(keepdims: true)
@@ -37,7 +39,7 @@ Benchmark.bm 30 do |r|
37
39
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
38
40
  end
39
41
 
40
- x = Cumo::SFloat.ones([500,500])
42
+ x = Cumo::SFloat.ones([500, 500])
41
43
  r.report "x.sum(axis: 0, keepdims: true)" do
42
44
  num_iteration.times do
43
45
  x.sum(axis: 0, keepdims: true)
@@ -45,7 +47,7 @@ Benchmark.bm 30 do |r|
45
47
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
46
48
  end
47
49
 
48
- x = Cumo::SFloat.ones([500,500])
50
+ x = Cumo::SFloat.ones([500, 500])
49
51
  r.report "x.sum(axis: 1, keepdims: true)" do
50
52
  num_iteration.times do
51
53
  x.sum(axis: 1, keepdims: true)
@@ -53,7 +55,7 @@ Benchmark.bm 30 do |r|
53
55
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
54
56
  end
55
57
 
56
- x = Cumo::SFloat.ones([500,500])
58
+ x = Cumo::SFloat.ones([500, 500])
57
59
  r.report "x.max" do
58
60
  num_iteration.times do
59
61
  x.max
@@ -61,7 +63,7 @@ Benchmark.bm 30 do |r|
61
63
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
62
64
  end
63
65
 
64
- x = Cumo::SFloat.ones([500,500])
66
+ x = Cumo::SFloat.ones([500, 500])
65
67
  r.report "x.max(axis: 0)" do
66
68
  num_iteration.times do
67
69
  x.max(axis: 0)
@@ -69,7 +71,7 @@ Benchmark.bm 30 do |r|
69
71
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
70
72
  end
71
73
 
72
- x = Cumo::SFloat.ones([500,500])
74
+ x = Cumo::SFloat.ones([500, 500])
73
75
  r.report "x.max(axis: 1)" do
74
76
  num_iteration.times do
75
77
  x.max(axis: 1)
@@ -77,7 +79,7 @@ Benchmark.bm 30 do |r|
77
79
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
78
80
  end
79
81
 
80
- x = Cumo::SFloat.ones([500,500])
82
+ x = Cumo::SFloat.ones([500, 500])
81
83
  r.report "x.max(keepdims: true)" do
82
84
  num_iteration.times do
83
85
  x.max(keepdims: true)
@@ -85,7 +87,7 @@ Benchmark.bm 30 do |r|
85
87
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
86
88
  end
87
89
 
88
- x = Cumo::SFloat.ones([500,500])
90
+ x = Cumo::SFloat.ones([500, 500])
89
91
  r.report "x.max(axis: 0, keepdims: true)" do
90
92
  num_iteration.times do
91
93
  x.max(axis: 0, keepdims: true)
@@ -93,7 +95,7 @@ Benchmark.bm 30 do |r|
93
95
  Cumo::CUDA::Runtime.cudaDeviceSynchronize
94
96
  end
95
97
 
96
- x = Cumo::SFloat.ones([500,500])
98
+ x = Cumo::SFloat.ones([500, 500])
97
99
  r.report "x.max(axis: 1, keepdims: true)" do
98
100
  num_iteration.times do
99
101
  x.max(axis: 1, keepdims: true)
data/bin/console CHANGED
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  require "bundler/setup"
4
5
  require "cumo"
data/cumo.gemspec CHANGED
@@ -1,9 +1,9 @@
1
- # coding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  lib = File.expand_path("../lib", __FILE__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
5
 
5
6
  cumo_version = File.read(File.join(__dir__, "ext/cumo/include/cumo.h")).match(/CUMO_VERSION "([^"]+)"/)[1]
6
- numo_narray_version = File.read(File.join(__dir__, "numo-narray-version")).strip
7
7
 
8
8
  Gem::Specification.new do |spec|
9
9
  spec.name = "cumo"
@@ -16,7 +16,9 @@ Gem::Specification.new do |spec|
16
16
  spec.homepage = "https://github.com/sonots/cumo"
17
17
  spec.license = "BSD-3-Clause"
18
18
 
19
- spec.files = `git ls-files -z`.split("\x0").reject do |f|
19
+ spec.required_ruby_version = ">= 3.0.0"
20
+
21
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
20
22
  f.match(%r{^(test|spec|features)/})
21
23
  end
22
24
  spec.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -24,9 +26,4 @@ Gem::Specification.new do |spec|
24
26
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
27
  spec.require_paths = ["lib"]
26
28
  spec.extensions = ["ext/cumo/extconf.rb"]
27
-
28
- spec.add_runtime_dependency "numo-narray", numo_narray_version
29
-
30
- spec.add_development_dependency "bundler", "~> 1.15"
31
- spec.add_development_dependency "rake", "~> 10.0"
32
29
  end
@@ -50,7 +50,7 @@ cumo_cuda_cudnn_handle()
50
50
  @return [Boolean] Returns true if cuDNN is available
51
51
  */
52
52
  static VALUE
53
- rb_cudnn_available_p()
53
+ rb_cudnn_available_p(VALUE self)
54
54
  {
55
55
  #if CUDNN_FOUND
56
56
  return Qtrue;
@@ -72,7 +72,7 @@ Init_cumo_cuda_cudnn(void)
72
72
  rb_define_const(mCUDA, "Cudnn", mCUDNN); // alias
73
73
  eCUDNNError = rb_define_class_under(mCUDA, "CUDNNError", rb_eStandardError);
74
74
 
75
- rb_define_singleton_method(mCUDNN, "available?", RUBY_METHOD_FUNC(rb_cudnn_available_p), 0);
75
+ rb_define_singleton_method(mCUDNN, "available?", rb_cudnn_available_p, 0);
76
76
  #ifdef CUDNN_FOUND
77
77
  rb_define_const(mCUDNN, "CUDNN_POOLING_MAX", INT2NUM(CUDNN_POOLING_MAX));
78
78
  rb_define_const(mCUDNN, "CUDNN_POOLING_MAX_DETERMINISTIC", INT2NUM(CUDNN_POOLING_MAX_DETERMINISTIC));
data/ext/cumo/cumo.c CHANGED
@@ -114,13 +114,17 @@ Init_cumo()
114
114
  const char* env;
115
115
  VALUE mCumo;
116
116
 
117
+ #ifdef HAVE_RB_EXT_RACTOR_SAFE
118
+ rb_ext_ractor_safe(true);
119
+ #endif
120
+
117
121
  mCumo = rb_define_module("Cumo");
118
122
 
119
123
  rb_define_const(mCumo, "VERSION", rb_str_new2(CUMO_VERSION));
120
124
 
121
- rb_define_singleton_method(mCumo, "enable_compatible_mode", RUBY_METHOD_FUNC(rb_enable_compatible_mode), 0);
122
- rb_define_singleton_method(mCumo, "disable_compatible_mode", RUBY_METHOD_FUNC(rb_disable_compatible_mode), 0);
123
- rb_define_singleton_method(mCumo, "compatible_mode_enabled?", RUBY_METHOD_FUNC(rb_compatible_mode_enabled_p), 0);
125
+ rb_define_singleton_method(mCumo, "enable_compatible_mode", rb_enable_compatible_mode, 0);
126
+ rb_define_singleton_method(mCumo, "disable_compatible_mode", rb_disable_compatible_mode, 0);
127
+ rb_define_singleton_method(mCumo, "compatible_mode_enabled?", rb_compatible_mode_enabled_p, 0);
124
128
 
125
129
  // default is false
126
130
  env = getenv("CUMO_COMPATIBLE_MODE");
data/ext/cumo/depend.erb CHANGED
@@ -1,3 +1,5 @@
1
+ MAKEFLAGS = <%= ENV.fetch('MAKEFLAGS', "-j#{Etc.nprocessors}") %>
2
+
1
3
  TAGSRC = \
2
4
  ../../ruby/include/ruby/*.h \
3
5
  ../../ruby/*.c \
@@ -11,17 +13,17 @@ tags : TAGS
11
13
  TAGS : $(TAGSRC)
12
14
  etags $(TAGSRC)
13
15
 
14
- C_TMPL = <%=Dir.glob("narray/gen/tmpl*/*.c").join(" ")%>
15
- CU_TMPL = <%=Dir.glob("narray/gen/tmpl*/*.cu").join(" ")%>
16
+ C_TMPL = <%=Dir.glob("#{__dir__}/narray/gen/tmpl*/*.c").join(" ")%>
17
+ CU_TMPL = <%=Dir.glob("#{__dir__}/narray/gen/tmpl*/*.cu").join(" ")%>
16
18
 
17
- C_COGEN = narray/gen/cogen.rb
18
- CU_COGEN = narray/gen/cogen_kernel.rb
19
- C_DEPENDS = $(C_TMPL) narray/gen/*.rb
20
- CU_DEPENDS = $(CU_TMPL) narray/gen/*.rb
19
+ C_COGEN = <%= __dir__ %>/narray/gen/cogen.rb
20
+ CU_COGEN = <%= __dir__ %>/narray/gen/cogen_kernel.rb
21
+ C_DEPENDS = $(C_TMPL) <%= __dir__ %>/narray/gen/*.rb
22
+ CU_DEPENDS = $(CU_TMPL) <%= __dir__ %>/narray/gen/*.rb
21
23
 
22
24
  <%
23
25
  list_type_c = []
24
- list_type_rb = Dir.glob("narray/gen/def/*.rb")
26
+ list_type_rb = Dir.glob("#{__dir__}/narray/gen/def/*.rb")
25
27
  list_type_rb.each do |type_rb|
26
28
  type_name = File.basename(type_rb, ".rb")
27
29
  next if ENV['DTYPE'] and !type_name.downcase.include?(ENV['DTYPE'].downcase)
@@ -34,7 +36,7 @@ list_type_rb.each do |type_rb|
34
36
 
35
37
  <%
36
38
  list_type_cu = []
37
- list_type_rb = Dir.glob("narray/gen/def/*.rb")
39
+ list_type_rb = Dir.glob("#{__dir__}/narray/gen/def/*.rb")
38
40
  list_type_rb.each do |type_rb|
39
41
  type_name = File.basename(type_rb, ".rb")
40
42
  next if ENV['DTYPE'] and !type_name.downcase.include?(ENV['DTYPE'].downcase)
@@ -47,12 +49,12 @@ list_type_rb.each do |type_rb|
47
49
 
48
50
  src : <%= list_type_cu.join(" ") %> <%= list_type_c.join(" ") %>
49
51
 
50
- build-ctest : cuda/memory_pool_impl_test.exe
52
+ build-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
51
53
 
52
- run-ctest : cuda/memory_pool_impl_test.exe
54
+ run-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
53
55
  ./$<
54
56
 
55
- cuda/memory_pool_impl_test.exe: cuda/memory_pool_impl_test.cpp cuda/memory_pool_impl.cpp cuda/memory_pool_impl.hpp
56
- nvcc -std=c++14 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< cuda/memory_pool_impl.cpp
57
+ <%= __dir__ %>/cuda/memory_pool_impl_test.exe: <%= __dir__ %>/cuda/memory_pool_impl_test.cpp <%= __dir__ %>/cuda/memory_pool_impl.cpp <%= __dir__ %>/cuda/memory_pool_impl.hpp
58
+ nvcc -std=c++14 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< <%= __dir__ %>/cuda/memory_pool_impl.cpp
57
59
 
58
- CLEANOBJS = *.o */*.o */*/*.o *.bak narray/types/*.c narray/types/*_kernel.cu *.exe */*.exe
60
+ CLEANOBJS = <%= __dir__ %>/*.o <%= __dir__ %>/*/*.o <%= __dir__ %>/*/*/*.o <%= __dir__ %>/*.bak <%= __dir__ %>/narray/types/*.c <%= __dir__ %>/narray/types/*_kernel.cu <%= __dir__ %>/*.exe <%= __dir__ %>/*/*.exe
data/ext/cumo/extconf.rb CHANGED
@@ -1,39 +1,19 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rbconfig.rb'
4
+ require 'fileutils'
2
5
  require "erb"
6
+ require 'etc'
3
7
  require_relative '../../3rd_party/mkmf-cu/lib/mkmf-cu'
4
8
 
5
- if RUBY_VERSION < "2.0.0"
6
- puts "Cumo::NArray requires Ruby version 2.0 or later."
7
- exit(1)
8
- end
9
-
10
- def have_numo_narray!
11
- version_path = File.join(__dir__, "..", "..", "numo-narray-version")
12
- version = File.read(version_path).strip
13
- gem_spec = Gem::Specification.find_by_name("numo-narray", version)
14
-
15
- $INCFLAGS += " -I#{gem_spec.gem_dir}/ext/numo/narray"
16
- if !have_header("numo/narray.h")
17
- puts "
18
- Header numo/narray.h was not found. Give pathname as follows:
19
- % ruby extconf.rb --with-narray-include=narray_h_dir"
20
- exit(1)
21
- end
22
-
23
- if RUBY_PLATFORM =~ /cygwin|mingw/
24
- $LDFLAGS += " -L#{gem_spec.gem_dir}/ext/numo"
25
- unless have_library("narray","nary_new")
26
- puts "libnarray.a not found"
27
- exit(1)
28
- end
29
- end
9
+ def d(file)
10
+ File.join(__dir__, file)
30
11
  end
31
12
 
32
13
  def create_depend
33
14
  message "creating depend\n"
34
- depend_path = File.join(__dir__, "depend")
35
- File.open(depend_path, "w") do |depend|
36
- depend_erb_path = File.join(__dir__, "depend.erb")
15
+ File.open(d("depend"), "w") do |depend|
16
+ depend_erb_path = d("depend.erb")
37
17
  File.open(depend_erb_path, "r") do |depend_erb|
38
18
  erb = ERB.new(depend_erb.read)
39
19
  erb.filename = depend_erb_path
@@ -42,7 +22,7 @@ def create_depend
42
22
  end
43
23
  end
44
24
 
45
- rm_f 'include/cumo/extconf.h'
25
+ rm_f d('include/cumo/extconf.h')
46
26
 
47
27
  MakeMakefileCuda.install!(cxx: true)
48
28
 
@@ -52,10 +32,10 @@ end
52
32
  $CXXFLAGS << " -std=c++14"
53
33
  #$CFLAGS=" $(cflags) -O3 -m64 -msse2 -funroll-loops"
54
34
  #$CFLAGS=" $(cflags) -O3"
55
- $INCFLAGS = "-Iinclude -Inarray -Icuda #{$INCFLAGS}"
35
+ $INCFLAGS = "-I$(srcdir)/include -I$(srcdir)/narray -I$(srcdir)/cuda #{$INCFLAGS}"
56
36
 
57
- $INSTALLFILES = Dir.glob(%w[include/cumo/*.h include/cumo/types/*.h include/cumo/cuda/*.h]).map{|x| [x,'$(archdir)'] }
58
- $INSTALLFILES << ['include/cumo/extconf.h','$(archdir)']
37
+ $INSTALLFILES = Dir.glob(%w[include/cumo/*.h include/cumo/types/*.h include/cumo/cuda/*.h]).map { |x| [x, '$(archdir)'] }
38
+ $INSTALLFILES << ['include/cumo/extconf.h', '$(archdir)']
59
39
  if /cygwin|mingw/ =~ RUBY_PLATFORM
60
40
  $INSTALLFILES << ['libcumo.a', '$(archdir)']
61
41
  end
@@ -113,17 +93,10 @@ cuda/cudnn
113
93
  cuda/cudnn_impl
114
94
  )
115
95
 
116
- if RUBY_VERSION[0..3] == "2.1."
117
- puts "add kwargs"
118
- srcs << "kwargs"
119
- end
120
-
121
- $objs = srcs.map {|src| "#{src}.o" }
96
+ $objs = srcs.map { |src| "#{src}.o" }
122
97
 
123
98
  dir_config("narray")
124
99
 
125
- have_numo_narray!
126
-
127
100
  if have_header("dlfcn.h")
128
101
  exit(1) unless have_library("dl")
129
102
  exit(1) unless have_func("dlopen")
@@ -147,14 +120,14 @@ end
147
120
 
148
121
  have_type("bool", stdbool)
149
122
  unless have_type("u_int8_t", stdint)
150
- have_type("uint8_t",stdint)
123
+ have_type("uint8_t", stdint)
151
124
  end
152
125
  unless have_type("u_int16_t", stdint)
153
- have_type("uint16_t",stdint)
126
+ have_type("uint16_t", stdint)
154
127
  end
155
128
  have_type("int32_t", stdint)
156
129
  unless have_type("u_int32_t", stdint)
157
- have_type("uint32_t",stdint)
130
+ have_type("uint32_t", stdint)
158
131
  end
159
132
  have_type("int64_t", stdint)
160
133
  unless have_type("u_int64_t", stdint)
@@ -162,17 +135,22 @@ unless have_type("u_int64_t", stdint)
162
135
  end
163
136
  have_func("exp10")
164
137
  have_func("rb_arithmetic_sequence_extract")
138
+ have_func("RTYPEDDATA_GET_DATA")
165
139
 
166
140
  have_var("rb_cComplex")
167
141
  have_func("rb_thread_call_without_gvl")
168
142
 
169
- create_header('include/cumo/extconf.h')
143
+ create_header d('include/cumo/extconf.h')
170
144
  $extconf_h = nil # nvcc does not support #include RUBY_EXTCONF_H
171
145
 
146
+ # Create *.o directories
147
+ FileUtils.mkdir_p('narray')
148
+ FileUtils.mkdir_p('cuda')
149
+
172
150
  create_depend
173
151
 
174
- HEADER_DIRS = (ENV['CPATH'] || '').split(':')
175
- LIB_DIRS = (ENV['LIBRARY_PATH'] || '').split(':')
152
+ HEADER_DIRS = (ENV['CPATH'] || '').split(File::PATH_SEPARATOR)
153
+ LIB_DIRS = (ENV['LIBRARY_PATH'] || '').split(File::PATH_SEPARATOR)
176
154
  dir_config('cumo', HEADER_DIRS, LIB_DIRS)
177
155
 
178
156
  have_library('cuda')
@@ -186,4 +164,12 @@ if have_library('cudnn') # TODO(sonots): cuDNN version check
186
164
  $CXXFLAGS << " -DCUDNN_FOUND"
187
165
  end
188
166
 
167
+ have_library('stdc++')
168
+
189
169
  create_makefile('cumo')
170
+
171
+ begin
172
+ require 'extconf_compile_commands_json'
173
+ ExtconfCompileCommandsJson.generate!
174
+ rescue LoadError
175
+ end
@@ -14,9 +14,11 @@ extern "C" {
14
14
  #endif
15
15
  #endif
16
16
 
17
+ extern VALUE cumo_cuda_eCUDNNError;
18
+
17
19
  #ifdef CUDNN_FOUND
18
20
 
19
- VALUE cumo_na_eShapeError;
21
+ extern VALUE cumo_na_eShapeError;
20
22
 
21
23
  #define CUMO_CUDA_CUDNN_DEFAULT_MAX_WORKSPACE_SIZE 8 * 1024 * 1024
22
24
 
@@ -79,6 +79,7 @@ void cumo_na_parse_enumerator_step(VALUE enum_obj, VALUE *pstep);
79
79
  // used in aref, aset
80
80
  int cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx);
81
81
  VALUE cumo_na_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
82
+ VALUE cumo_na_at_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
82
83
 
83
84
  // defined in array, used in math
84
85
  VALUE cumo_na_ary_composition_dtype(VALUE ary);
@@ -141,7 +141,7 @@ extern "C" {
141
141
  # endif
142
142
  #endif
143
143
 
144
- #if SIZEOF_VALUE > 4
144
+ #if SIZEOF_LONG > 4
145
145
  # undef INT322NUM
146
146
  # undef UINT322NUM
147
147
  # define INT322NUM(x) INT2FIX(x)
@@ -329,6 +329,12 @@ _cumo_na_get_narray_t(VALUE obj, unsigned char cumo_na_type)
329
329
  #define CUMO_RNARRAY_VIEW(val) ((cumo_narray_view_t*)DATA_PTR(val))
330
330
  #define CUMO_RNARRAY_FILEMAP(val) ((cumo_narray_filemap_t*)DATA_PTR(val))
331
331
 
332
+ #ifdef HAVE_RTYPEDDATA_GET_DATA
333
+ #define CUMO_RENUMERATOR_PTR(ptr) ((cumo_enumerator_t *)RTYPEDDATA_GET_DATA(ptr))
334
+ #else
335
+ #define CUMO_RENUMERATOR_PTR(ptr) ((cumo_enumerator_t *)DATA_PTR(ptr))
336
+ #endif
337
+
332
338
  #define CUMO_RNARRAY_NDIM(val) (CUMO_RNARRAY(val)->ndim)
333
339
  #define CUMO_RNARRAY_TYPE(val) (CUMO_RNARRAY(val)->type)
334
340
  #define CUMO_RNARRAY_FLAG(val) (CUMO_RNARRAY(val)->flag)
@@ -483,6 +489,12 @@ typedef unsigned int CUMO_BIT_DIGIT;
483
489
  #include "cumo/ndloop.h"
484
490
  #include "cumo/intern.h"
485
491
 
492
+ // for Ractor support code
493
+ #ifndef HAVE_RB_EXT_RACTOR_SAFE
494
+ # undef RUBY_TYPED_FROZEN_SHAREABLE
495
+ # define RUBY_TYPED_FROZEN_SHAREABLE 0
496
+ #endif
497
+
486
498
  #if defined(__cplusplus)
487
499
  #if 0
488
500
  { /* satisfy cc-mode */
@@ -112,9 +112,8 @@
112
112
  size_t dig = (pos) / CUMO_NB; \
113
113
  int bit = (pos) % CUMO_NB; \
114
114
  ((CUMO_BIT_DIGIT*)(adr))[dig] = \
115
- (((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | ((val)<<(bit)); \
115
+ (((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | (((val)&1u)<<(bit)); \
116
116
  }
117
- // val -> val&1 ??
118
117
 
119
118
  #define CUMO_STORE_BIT_STEP( adr, pos, step, idx, val )\
120
119
  { \
@@ -129,9 +128,8 @@
129
128
  pos += step; \
130
129
  } \
131
130
  ((CUMO_BIT_DIGIT*)(adr))[dig] = \
132
- (((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | ((val)<<(bit)); \
131
+ (((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | (((val)&1u)<<(bit)); \
133
132
  }
134
- // val -> val&1 ??
135
133
 
136
134
  static inline int
137
135
  cumo_is_aligned(const void *ptr, const size_t alignment)