cumo 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.rubocop.yml +15 -0
- data/.rubocop_todo.yml +1272 -0
- data/3rd_party/mkmf-cu/Gemfile +2 -0
- data/3rd_party/mkmf-cu/Rakefile +2 -1
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +36 -7
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
- data/CHANGELOG.md +69 -0
- data/Gemfile +6 -1
- data/README.md +2 -10
- data/Rakefile +8 -11
- data/bench/broadcast_fp32.rb +28 -26
- data/bench/cumo_bench.rb +18 -16
- data/bench/numo_bench.rb +18 -16
- data/bench/reduction_fp32.rb +14 -12
- data/bin/console +1 -0
- data/cumo.gemspec +5 -8
- data/ext/cumo/cuda/cudnn.c +2 -2
- data/ext/cumo/cumo.c +7 -3
- data/ext/cumo/depend.erb +15 -13
- data/ext/cumo/extconf.rb +32 -46
- data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
- data/ext/cumo/include/cumo/intern.h +1 -0
- data/ext/cumo/include/cumo/narray.h +13 -1
- data/ext/cumo/include/cumo/template.h +2 -4
- data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
- data/ext/cumo/include/cumo/types/float_macro.h +2 -2
- data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/narray/array.c +3 -3
- data/ext/cumo/narray/data.c +23 -2
- data/ext/cumo/narray/gen/cogen.rb +8 -7
- data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
- data/ext/cumo/narray/gen/def/bit.rb +3 -1
- data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
- data/ext/cumo/narray/gen/def/int16.rb +2 -0
- data/ext/cumo/narray/gen/def/int32.rb +2 -0
- data/ext/cumo/narray/gen/def/int64.rb +2 -0
- data/ext/cumo/narray/gen/def/int8.rb +2 -0
- data/ext/cumo/narray/gen/def/robject.rb +2 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
- data/ext/cumo/narray/gen/def/uint16.rb +2 -0
- data/ext/cumo/narray/gen/def/uint32.rb +2 -0
- data/ext/cumo/narray/gen/def/uint64.rb +2 -0
- data/ext/cumo/narray/gen/def/uint8.rb +2 -0
- data/ext/cumo/narray/gen/erbln.rb +9 -7
- data/ext/cumo/narray/gen/erbpp2.rb +26 -24
- data/ext/cumo/narray/gen/narray_def.rb +13 -11
- data/ext/cumo/narray/gen/spec.rb +58 -55
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
- data/ext/cumo/narray/gen/tmpl/at.c +34 -0
- data/ext/cumo/narray/gen/tmpl/batch_norm.c +1 -1
- data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +2 -2
- data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
- data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
- data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
- data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +1 -1
- data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
- data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
- data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
- data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
- data/ext/cumo/narray/gen/tmpl/sort.c +1 -1
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
- data/ext/cumo/narray/index.c +243 -39
- data/ext/cumo/narray/index_kernel.cu +84 -0
- data/ext/cumo/narray/narray.c +38 -1
- data/ext/cumo/narray/ndloop.c +1 -1
- data/ext/cumo/narray/struct.c +1 -1
- data/lib/cumo/cuda/compile_error.rb +1 -1
- data/lib/cumo/cuda/compiler.rb +23 -22
- data/lib/cumo/cuda/cudnn.rb +1 -1
- data/lib/cumo/cuda/device.rb +1 -1
- data/lib/cumo/cuda/link_state.rb +2 -2
- data/lib/cumo/cuda/module.rb +1 -2
- data/lib/cumo/cuda/nvrtc_program.rb +3 -2
- data/lib/cumo/cuda.rb +2 -0
- data/lib/cumo/linalg.rb +2 -0
- data/lib/cumo/narray/extra.rb +137 -185
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo.rb +3 -1
- data/test/bit_test.rb +157 -0
- data/test/cuda/compiler_test.rb +69 -0
- data/test/cuda/device_test.rb +30 -0
- data/test/cuda/memory_pool_test.rb +45 -0
- data/test/cuda/nvrtc_test.rb +51 -0
- data/test/cuda/runtime_test.rb +28 -0
- data/test/cudnn_test.rb +498 -0
- data/test/cumo_test.rb +27 -0
- data/test/narray_test.rb +745 -0
- data/test/ractor_test.rb +52 -0
- data/test/test_helper.rb +31 -0
- metadata +31 -54
- data/.travis.yml +0 -5
- data/numo-narray-version +0 -1
data/bench/cumo_bench.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'cumo/narray'
|
|
2
4
|
require 'benchmark'
|
|
3
5
|
|
|
@@ -5,7 +7,7 @@ NUM = (ARGV.first || 100).to_i
|
|
|
5
7
|
|
|
6
8
|
# warm up
|
|
7
9
|
a = Cumo::Float32.new(10).seq(1)
|
|
8
|
-
b = Cumo::Float32.new(10).seq(10,10)
|
|
10
|
+
b = Cumo::Float32.new(10).seq(10, 10)
|
|
9
11
|
c = a + b
|
|
10
12
|
c.free
|
|
11
13
|
|
|
@@ -14,7 +16,7 @@ def elementwise(num = nil)
|
|
|
14
16
|
puts "elementwise(#{num})"
|
|
15
17
|
Benchmark.bm do |r|
|
|
16
18
|
a = Cumo::Float32.new(10000).seq(1)
|
|
17
|
-
b = Cumo::Float32.new(10000).seq(10,10)
|
|
19
|
+
b = Cumo::Float32.new(10000).seq(10, 10)
|
|
18
20
|
(a + b).free # warm up
|
|
19
21
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
20
22
|
r.report('10**4') do
|
|
@@ -25,7 +27,7 @@ def elementwise(num = nil)
|
|
|
25
27
|
end
|
|
26
28
|
|
|
27
29
|
a = Cumo::Float32.new(100000).seq(1)
|
|
28
|
-
b = Cumo::Float32.new(100000).seq(10,10)
|
|
30
|
+
b = Cumo::Float32.new(100000).seq(10, 10)
|
|
29
31
|
(a + b).free # warm up
|
|
30
32
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
31
33
|
r.report('10**5') do
|
|
@@ -36,7 +38,7 @@ def elementwise(num = nil)
|
|
|
36
38
|
end
|
|
37
39
|
|
|
38
40
|
a = Cumo::Float32.new(1000000).seq(1)
|
|
39
|
-
b = Cumo::Float32.new(1000000).seq(10,10)
|
|
41
|
+
b = Cumo::Float32.new(1000000).seq(10, 10)
|
|
40
42
|
(a + b).free # warm up
|
|
41
43
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
42
44
|
r.report('10**6') do
|
|
@@ -47,7 +49,7 @@ def elementwise(num = nil)
|
|
|
47
49
|
end
|
|
48
50
|
|
|
49
51
|
a = Cumo::Float32.new(10000000).seq(1)
|
|
50
|
-
b = Cumo::Float32.new(10000000).seq(10,10)
|
|
52
|
+
b = Cumo::Float32.new(10000000).seq(10, 10)
|
|
51
53
|
(a + b).free # warm up
|
|
52
54
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
53
55
|
r.report('10**7') do
|
|
@@ -58,7 +60,7 @@ def elementwise(num = nil)
|
|
|
58
60
|
end
|
|
59
61
|
|
|
60
62
|
a = Cumo::Float32.new(100000000).seq(1)
|
|
61
|
-
b = Cumo::Float32.new(100000000).seq(10,10)
|
|
63
|
+
b = Cumo::Float32.new(100000000).seq(10, 10)
|
|
62
64
|
(a + b).free # warm up
|
|
63
65
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
64
66
|
r.report('10**8') do
|
|
@@ -130,8 +132,8 @@ def dot(num = nil)
|
|
|
130
132
|
num ||= 1
|
|
131
133
|
puts "dot(#{num})"
|
|
132
134
|
Benchmark.bm do |r|
|
|
133
|
-
a = Cumo::Float32.new(100,100).seq(1)
|
|
134
|
-
b = Cumo::Float32.new(100,100).seq(10,10)
|
|
135
|
+
a = Cumo::Float32.new(100, 100).seq(1)
|
|
136
|
+
b = Cumo::Float32.new(100, 100).seq(10, 10)
|
|
135
137
|
a.dot(b).free # warm up
|
|
136
138
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
137
139
|
r.report('10**4') do
|
|
@@ -141,8 +143,8 @@ def dot(num = nil)
|
|
|
141
143
|
end
|
|
142
144
|
end
|
|
143
145
|
|
|
144
|
-
a = Cumo::Float32.new(100,1000).seq(1)
|
|
145
|
-
b = Cumo::Float32.new(1000,100).seq(10,10)
|
|
146
|
+
a = Cumo::Float32.new(100, 1000).seq(1)
|
|
147
|
+
b = Cumo::Float32.new(1000, 100).seq(10, 10)
|
|
146
148
|
a.dot(b).free # warm up
|
|
147
149
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
148
150
|
r.report('10**5') do
|
|
@@ -152,8 +154,8 @@ def dot(num = nil)
|
|
|
152
154
|
end
|
|
153
155
|
end
|
|
154
156
|
|
|
155
|
-
a = Cumo::Float32.new(100,10000).seq(1)
|
|
156
|
-
b = Cumo::Float32.new(10000,100).seq(10,10)
|
|
157
|
+
a = Cumo::Float32.new(100, 10000).seq(1)
|
|
158
|
+
b = Cumo::Float32.new(10000, 100).seq(10, 10)
|
|
157
159
|
a.dot(b).free # warm up
|
|
158
160
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
159
161
|
r.report('10**6') do
|
|
@@ -163,8 +165,8 @@ def dot(num = nil)
|
|
|
163
165
|
end
|
|
164
166
|
end
|
|
165
167
|
|
|
166
|
-
a = Cumo::Float32.new(100,100000).seq(1)
|
|
167
|
-
b = Cumo::Float32.new(100000,100).seq(10,10)
|
|
168
|
+
a = Cumo::Float32.new(100, 100000).seq(1)
|
|
169
|
+
b = Cumo::Float32.new(100000, 100).seq(10, 10)
|
|
168
170
|
a.dot(b).free # warm up
|
|
169
171
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
170
172
|
r.report('10**7') do
|
|
@@ -174,8 +176,8 @@ def dot(num = nil)
|
|
|
174
176
|
end
|
|
175
177
|
end
|
|
176
178
|
|
|
177
|
-
a = Cumo::Float32.new(100,1000000).seq(1)
|
|
178
|
-
b = Cumo::Float32.new(1000000,100).seq(10,10)
|
|
179
|
+
a = Cumo::Float32.new(100, 1000000).seq(1)
|
|
180
|
+
b = Cumo::Float32.new(1000000, 100).seq(10, 10)
|
|
179
181
|
a.dot(b).free # warm up
|
|
180
182
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
181
183
|
r.report('10**8') do
|
data/bench/numo_bench.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'numo/narray'
|
|
2
4
|
require 'benchmark'
|
|
3
5
|
|
|
@@ -5,7 +7,7 @@ NUM = (ARGV.first || 100).to_i
|
|
|
5
7
|
|
|
6
8
|
# warm up
|
|
7
9
|
a = Numo::Float32.new(10).seq(1)
|
|
8
|
-
b = Numo::Float32.new(10).seq(10,10)
|
|
10
|
+
b = Numo::Float32.new(10).seq(10, 10)
|
|
9
11
|
c = a + b
|
|
10
12
|
|
|
11
13
|
def elementwise(num = nil)
|
|
@@ -13,31 +15,31 @@ def elementwise(num = nil)
|
|
|
13
15
|
puts "elementwise(#{num})"
|
|
14
16
|
Benchmark.bm do |r|
|
|
15
17
|
a = Numo::Float32.new(10000).seq(1)
|
|
16
|
-
b = Numo::Float32.new(10000).seq(10,10)
|
|
18
|
+
b = Numo::Float32.new(10000).seq(10, 10)
|
|
17
19
|
r.report('10**4') do
|
|
18
20
|
NUM.times { (a + b) }
|
|
19
21
|
end
|
|
20
22
|
|
|
21
23
|
a = Numo::Float32.new(100000).seq(1)
|
|
22
|
-
b = Numo::Float32.new(100000).seq(10,10)
|
|
24
|
+
b = Numo::Float32.new(100000).seq(10, 10)
|
|
23
25
|
r.report('10**5') do
|
|
24
26
|
NUM.times { (a + b) }
|
|
25
27
|
end
|
|
26
28
|
|
|
27
29
|
a = Numo::Float32.new(1000000).seq(1)
|
|
28
|
-
b = Numo::Float32.new(1000000).seq(10,10)
|
|
30
|
+
b = Numo::Float32.new(1000000).seq(10, 10)
|
|
29
31
|
r.report('10**6') do
|
|
30
32
|
NUM.times { (a + b) }
|
|
31
33
|
end
|
|
32
34
|
|
|
33
35
|
a = Numo::Float32.new(10000000).seq(1)
|
|
34
|
-
b = Numo::Float32.new(10000000).seq(10,10)
|
|
36
|
+
b = Numo::Float32.new(10000000).seq(10, 10)
|
|
35
37
|
r.report('10**7') do
|
|
36
38
|
NUM.times { (a + b) }
|
|
37
39
|
end
|
|
38
40
|
|
|
39
41
|
a = Numo::Float32.new(100000000).seq(1)
|
|
40
|
-
b = Numo::Float32.new(100000000).seq(10,10)
|
|
42
|
+
b = Numo::Float32.new(100000000).seq(10, 10)
|
|
41
43
|
r.report('10**8') do
|
|
42
44
|
NUM.times { (a + b) }
|
|
43
45
|
end
|
|
@@ -79,32 +81,32 @@ def dot(num = nil)
|
|
|
79
81
|
num ||= 1
|
|
80
82
|
puts "dot(#{num})"
|
|
81
83
|
Benchmark.bm do |r|
|
|
82
|
-
a = Numo::Float32.new(100,100).seq(1)
|
|
83
|
-
b = Numo::Float32.new(100,100).seq(10,10)
|
|
84
|
+
a = Numo::Float32.new(100, 100).seq(1)
|
|
85
|
+
b = Numo::Float32.new(100, 100).seq(10, 10)
|
|
84
86
|
r.report('10**4') do
|
|
85
87
|
num.times { a.dot(b) }
|
|
86
88
|
end
|
|
87
89
|
|
|
88
|
-
a = Numo::Float32.new(100,1000).seq(1)
|
|
89
|
-
b = Numo::Float32.new(1000,100).seq(10,10)
|
|
90
|
+
a = Numo::Float32.new(100, 1000).seq(1)
|
|
91
|
+
b = Numo::Float32.new(1000, 100).seq(10, 10)
|
|
90
92
|
r.report('10**5') do
|
|
91
93
|
num.times { a.dot(b) }
|
|
92
94
|
end
|
|
93
95
|
|
|
94
|
-
a = Numo::Float32.new(100,10000).seq(1)
|
|
95
|
-
b = Numo::Float32.new(10000,100).seq(10,10)
|
|
96
|
+
a = Numo::Float32.new(100, 10000).seq(1)
|
|
97
|
+
b = Numo::Float32.new(10000, 100).seq(10, 10)
|
|
96
98
|
r.report('10**6') do
|
|
97
99
|
num.times { a.dot(b) }
|
|
98
100
|
end
|
|
99
101
|
|
|
100
|
-
a = Numo::Float32.new(100,100000).seq(1)
|
|
101
|
-
b = Numo::Float32.new(100000,100).seq(10,10)
|
|
102
|
+
a = Numo::Float32.new(100, 100000).seq(1)
|
|
103
|
+
b = Numo::Float32.new(100000, 100).seq(10, 10)
|
|
102
104
|
r.report('10**7') do
|
|
103
105
|
num.times { a.dot(b) }
|
|
104
106
|
end
|
|
105
107
|
|
|
106
|
-
a = Numo::Float32.new(100,1000000).seq(1)
|
|
107
|
-
b = Numo::Float32.new(1000000,100).seq(10,10)
|
|
108
|
+
a = Numo::Float32.new(100, 1000000).seq(1)
|
|
109
|
+
b = Numo::Float32.new(1000000, 100).seq(10, 10)
|
|
108
110
|
r.report('10**8') do
|
|
109
111
|
num.times { a.dot(b) }
|
|
110
112
|
end
|
data/bench/reduction_fp32.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'benchmark'
|
|
2
4
|
require 'cumo/narray'
|
|
3
5
|
|
|
@@ -5,7 +7,7 @@ num_iteration = 100
|
|
|
5
7
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
6
8
|
|
|
7
9
|
Benchmark.bm 30 do |r|
|
|
8
|
-
x = Cumo::SFloat.ones([500,500])
|
|
10
|
+
x = Cumo::SFloat.ones([500, 500])
|
|
9
11
|
r.report "x.sum" do
|
|
10
12
|
num_iteration.times do
|
|
11
13
|
x.sum
|
|
@@ -13,7 +15,7 @@ Benchmark.bm 30 do |r|
|
|
|
13
15
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
14
16
|
end
|
|
15
17
|
|
|
16
|
-
x = Cumo::SFloat.ones([500,500])
|
|
18
|
+
x = Cumo::SFloat.ones([500, 500])
|
|
17
19
|
r.report "x.sum(axis: 0)" do
|
|
18
20
|
num_iteration.times do
|
|
19
21
|
x.sum(axis: 0)
|
|
@@ -21,7 +23,7 @@ Benchmark.bm 30 do |r|
|
|
|
21
23
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
22
24
|
end
|
|
23
25
|
|
|
24
|
-
x = Cumo::SFloat.ones([500,500])
|
|
26
|
+
x = Cumo::SFloat.ones([500, 500])
|
|
25
27
|
r.report "x.sum(axis: 1)" do
|
|
26
28
|
num_iteration.times do
|
|
27
29
|
x.sum(axis: 1)
|
|
@@ -29,7 +31,7 @@ Benchmark.bm 30 do |r|
|
|
|
29
31
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
30
32
|
end
|
|
31
33
|
|
|
32
|
-
x = Cumo::SFloat.ones([500,500])
|
|
34
|
+
x = Cumo::SFloat.ones([500, 500])
|
|
33
35
|
r.report "x.sum(keepdims: true)" do
|
|
34
36
|
num_iteration.times do
|
|
35
37
|
x.sum(keepdims: true)
|
|
@@ -37,7 +39,7 @@ Benchmark.bm 30 do |r|
|
|
|
37
39
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
38
40
|
end
|
|
39
41
|
|
|
40
|
-
x = Cumo::SFloat.ones([500,500])
|
|
42
|
+
x = Cumo::SFloat.ones([500, 500])
|
|
41
43
|
r.report "x.sum(axis: 0, keepdims: true)" do
|
|
42
44
|
num_iteration.times do
|
|
43
45
|
x.sum(axis: 0, keepdims: true)
|
|
@@ -45,7 +47,7 @@ Benchmark.bm 30 do |r|
|
|
|
45
47
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
46
48
|
end
|
|
47
49
|
|
|
48
|
-
x = Cumo::SFloat.ones([500,500])
|
|
50
|
+
x = Cumo::SFloat.ones([500, 500])
|
|
49
51
|
r.report "x.sum(axis: 1, keepdims: true)" do
|
|
50
52
|
num_iteration.times do
|
|
51
53
|
x.sum(axis: 1, keepdims: true)
|
|
@@ -53,7 +55,7 @@ Benchmark.bm 30 do |r|
|
|
|
53
55
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
54
56
|
end
|
|
55
57
|
|
|
56
|
-
x = Cumo::SFloat.ones([500,500])
|
|
58
|
+
x = Cumo::SFloat.ones([500, 500])
|
|
57
59
|
r.report "x.max" do
|
|
58
60
|
num_iteration.times do
|
|
59
61
|
x.max
|
|
@@ -61,7 +63,7 @@ Benchmark.bm 30 do |r|
|
|
|
61
63
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
62
64
|
end
|
|
63
65
|
|
|
64
|
-
x = Cumo::SFloat.ones([500,500])
|
|
66
|
+
x = Cumo::SFloat.ones([500, 500])
|
|
65
67
|
r.report "x.max(axis: 0)" do
|
|
66
68
|
num_iteration.times do
|
|
67
69
|
x.max(axis: 0)
|
|
@@ -69,7 +71,7 @@ Benchmark.bm 30 do |r|
|
|
|
69
71
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
70
72
|
end
|
|
71
73
|
|
|
72
|
-
x = Cumo::SFloat.ones([500,500])
|
|
74
|
+
x = Cumo::SFloat.ones([500, 500])
|
|
73
75
|
r.report "x.max(axis: 1)" do
|
|
74
76
|
num_iteration.times do
|
|
75
77
|
x.max(axis: 1)
|
|
@@ -77,7 +79,7 @@ Benchmark.bm 30 do |r|
|
|
|
77
79
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
78
80
|
end
|
|
79
81
|
|
|
80
|
-
x = Cumo::SFloat.ones([500,500])
|
|
82
|
+
x = Cumo::SFloat.ones([500, 500])
|
|
81
83
|
r.report "x.max(keepdims: true)" do
|
|
82
84
|
num_iteration.times do
|
|
83
85
|
x.max(keepdims: true)
|
|
@@ -85,7 +87,7 @@ Benchmark.bm 30 do |r|
|
|
|
85
87
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
86
88
|
end
|
|
87
89
|
|
|
88
|
-
x = Cumo::SFloat.ones([500,500])
|
|
90
|
+
x = Cumo::SFloat.ones([500, 500])
|
|
89
91
|
r.report "x.max(axis: 0, keepdims: true)" do
|
|
90
92
|
num_iteration.times do
|
|
91
93
|
x.max(axis: 0, keepdims: true)
|
|
@@ -93,7 +95,7 @@ Benchmark.bm 30 do |r|
|
|
|
93
95
|
Cumo::CUDA::Runtime.cudaDeviceSynchronize
|
|
94
96
|
end
|
|
95
97
|
|
|
96
|
-
x = Cumo::SFloat.ones([500,500])
|
|
98
|
+
x = Cumo::SFloat.ones([500, 500])
|
|
97
99
|
r.report "x.max(axis: 1, keepdims: true)" do
|
|
98
100
|
num_iteration.times do
|
|
99
101
|
x.max(axis: 1, keepdims: true)
|
data/bin/console
CHANGED
data/cumo.gemspec
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
lib = File.expand_path("../lib", __FILE__)
|
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
5
|
|
|
5
6
|
cumo_version = File.read(File.join(__dir__, "ext/cumo/include/cumo.h")).match(/CUMO_VERSION "([^"]+)"/)[1]
|
|
6
|
-
numo_narray_version = File.read(File.join(__dir__, "numo-narray-version")).strip
|
|
7
7
|
|
|
8
8
|
Gem::Specification.new do |spec|
|
|
9
9
|
spec.name = "cumo"
|
|
@@ -16,7 +16,9 @@ Gem::Specification.new do |spec|
|
|
|
16
16
|
spec.homepage = "https://github.com/sonots/cumo"
|
|
17
17
|
spec.license = "BSD-3-Clause"
|
|
18
18
|
|
|
19
|
-
spec.
|
|
19
|
+
spec.required_ruby_version = ">= 3.0.0"
|
|
20
|
+
|
|
21
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
|
20
22
|
f.match(%r{^(test|spec|features)/})
|
|
21
23
|
end
|
|
22
24
|
spec.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
|
@@ -24,9 +26,4 @@ Gem::Specification.new do |spec|
|
|
|
24
26
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
25
27
|
spec.require_paths = ["lib"]
|
|
26
28
|
spec.extensions = ["ext/cumo/extconf.rb"]
|
|
27
|
-
|
|
28
|
-
spec.add_runtime_dependency "numo-narray", numo_narray_version
|
|
29
|
-
|
|
30
|
-
spec.add_development_dependency "bundler", "~> 1.15"
|
|
31
|
-
spec.add_development_dependency "rake", "~> 10.0"
|
|
32
29
|
end
|
data/ext/cumo/cuda/cudnn.c
CHANGED
|
@@ -50,7 +50,7 @@ cumo_cuda_cudnn_handle()
|
|
|
50
50
|
@return [Boolean] Returns true if cuDNN is available
|
|
51
51
|
*/
|
|
52
52
|
static VALUE
|
|
53
|
-
rb_cudnn_available_p()
|
|
53
|
+
rb_cudnn_available_p(VALUE self)
|
|
54
54
|
{
|
|
55
55
|
#if CUDNN_FOUND
|
|
56
56
|
return Qtrue;
|
|
@@ -72,7 +72,7 @@ Init_cumo_cuda_cudnn(void)
|
|
|
72
72
|
rb_define_const(mCUDA, "Cudnn", mCUDNN); // alias
|
|
73
73
|
eCUDNNError = rb_define_class_under(mCUDA, "CUDNNError", rb_eStandardError);
|
|
74
74
|
|
|
75
|
-
rb_define_singleton_method(mCUDNN, "available?",
|
|
75
|
+
rb_define_singleton_method(mCUDNN, "available?", rb_cudnn_available_p, 0);
|
|
76
76
|
#ifdef CUDNN_FOUND
|
|
77
77
|
rb_define_const(mCUDNN, "CUDNN_POOLING_MAX", INT2NUM(CUDNN_POOLING_MAX));
|
|
78
78
|
rb_define_const(mCUDNN, "CUDNN_POOLING_MAX_DETERMINISTIC", INT2NUM(CUDNN_POOLING_MAX_DETERMINISTIC));
|
data/ext/cumo/cumo.c
CHANGED
|
@@ -114,13 +114,17 @@ Init_cumo()
|
|
|
114
114
|
const char* env;
|
|
115
115
|
VALUE mCumo;
|
|
116
116
|
|
|
117
|
+
#ifdef HAVE_RB_EXT_RACTOR_SAFE
|
|
118
|
+
rb_ext_ractor_safe(true);
|
|
119
|
+
#endif
|
|
120
|
+
|
|
117
121
|
mCumo = rb_define_module("Cumo");
|
|
118
122
|
|
|
119
123
|
rb_define_const(mCumo, "VERSION", rb_str_new2(CUMO_VERSION));
|
|
120
124
|
|
|
121
|
-
rb_define_singleton_method(mCumo, "enable_compatible_mode",
|
|
122
|
-
rb_define_singleton_method(mCumo, "disable_compatible_mode",
|
|
123
|
-
rb_define_singleton_method(mCumo, "compatible_mode_enabled?",
|
|
125
|
+
rb_define_singleton_method(mCumo, "enable_compatible_mode", rb_enable_compatible_mode, 0);
|
|
126
|
+
rb_define_singleton_method(mCumo, "disable_compatible_mode", rb_disable_compatible_mode, 0);
|
|
127
|
+
rb_define_singleton_method(mCumo, "compatible_mode_enabled?", rb_compatible_mode_enabled_p, 0);
|
|
124
128
|
|
|
125
129
|
// default is false
|
|
126
130
|
env = getenv("CUMO_COMPATIBLE_MODE");
|
data/ext/cumo/depend.erb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
MAKEFLAGS = <%= ENV.fetch('MAKEFLAGS', "-j#{Etc.nprocessors}") %>
|
|
2
|
+
|
|
1
3
|
TAGSRC = \
|
|
2
4
|
../../ruby/include/ruby/*.h \
|
|
3
5
|
../../ruby/*.c \
|
|
@@ -11,17 +13,17 @@ tags : TAGS
|
|
|
11
13
|
TAGS : $(TAGSRC)
|
|
12
14
|
etags $(TAGSRC)
|
|
13
15
|
|
|
14
|
-
C_TMPL = <%=Dir.glob("narray/gen/tmpl*/*.c").join(" ")%>
|
|
15
|
-
CU_TMPL = <%=Dir.glob("narray/gen/tmpl*/*.cu").join(" ")%>
|
|
16
|
+
C_TMPL = <%=Dir.glob("#{__dir__}/narray/gen/tmpl*/*.c").join(" ")%>
|
|
17
|
+
CU_TMPL = <%=Dir.glob("#{__dir__}/narray/gen/tmpl*/*.cu").join(" ")%>
|
|
16
18
|
|
|
17
|
-
C_COGEN = narray/gen/cogen.rb
|
|
18
|
-
CU_COGEN = narray/gen/cogen_kernel.rb
|
|
19
|
-
C_DEPENDS = $(C_TMPL) narray/gen/*.rb
|
|
20
|
-
CU_DEPENDS = $(CU_TMPL) narray/gen/*.rb
|
|
19
|
+
C_COGEN = <%= __dir__ %>/narray/gen/cogen.rb
|
|
20
|
+
CU_COGEN = <%= __dir__ %>/narray/gen/cogen_kernel.rb
|
|
21
|
+
C_DEPENDS = $(C_TMPL) <%= __dir__ %>/narray/gen/*.rb
|
|
22
|
+
CU_DEPENDS = $(CU_TMPL) <%= __dir__ %>/narray/gen/*.rb
|
|
21
23
|
|
|
22
24
|
<%
|
|
23
25
|
list_type_c = []
|
|
24
|
-
list_type_rb = Dir.glob("narray/gen/def/*.rb")
|
|
26
|
+
list_type_rb = Dir.glob("#{__dir__}/narray/gen/def/*.rb")
|
|
25
27
|
list_type_rb.each do |type_rb|
|
|
26
28
|
type_name = File.basename(type_rb, ".rb")
|
|
27
29
|
next if ENV['DTYPE'] and !type_name.downcase.include?(ENV['DTYPE'].downcase)
|
|
@@ -34,7 +36,7 @@ list_type_rb.each do |type_rb|
|
|
|
34
36
|
|
|
35
37
|
<%
|
|
36
38
|
list_type_cu = []
|
|
37
|
-
list_type_rb = Dir.glob("narray/gen/def/*.rb")
|
|
39
|
+
list_type_rb = Dir.glob("#{__dir__}/narray/gen/def/*.rb")
|
|
38
40
|
list_type_rb.each do |type_rb|
|
|
39
41
|
type_name = File.basename(type_rb, ".rb")
|
|
40
42
|
next if ENV['DTYPE'] and !type_name.downcase.include?(ENV['DTYPE'].downcase)
|
|
@@ -47,12 +49,12 @@ list_type_rb.each do |type_rb|
|
|
|
47
49
|
|
|
48
50
|
src : <%= list_type_cu.join(" ") %> <%= list_type_c.join(" ") %>
|
|
49
51
|
|
|
50
|
-
build-ctest : cuda/memory_pool_impl_test.exe
|
|
52
|
+
build-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
|
|
51
53
|
|
|
52
|
-
run-ctest : cuda/memory_pool_impl_test.exe
|
|
54
|
+
run-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
|
|
53
55
|
./$<
|
|
54
56
|
|
|
55
|
-
cuda/memory_pool_impl_test.exe: cuda/memory_pool_impl_test.cpp cuda/memory_pool_impl.cpp cuda/memory_pool_impl.hpp
|
|
56
|
-
nvcc -std=c++14 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< cuda/memory_pool_impl.cpp
|
|
57
|
+
<%= __dir__ %>/cuda/memory_pool_impl_test.exe: <%= __dir__ %>/cuda/memory_pool_impl_test.cpp <%= __dir__ %>/cuda/memory_pool_impl.cpp <%= __dir__ %>/cuda/memory_pool_impl.hpp
|
|
58
|
+
nvcc -std=c++14 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< <%= __dir__ %>/cuda/memory_pool_impl.cpp
|
|
57
59
|
|
|
58
|
-
CLEANOBJS =
|
|
60
|
+
CLEANOBJS = <%= __dir__ %>/*.o <%= __dir__ %>/*/*.o <%= __dir__ %>/*/*/*.o <%= __dir__ %>/*.bak <%= __dir__ %>/narray/types/*.c <%= __dir__ %>/narray/types/*_kernel.cu <%= __dir__ %>/*.exe <%= __dir__ %>/*/*.exe
|
data/ext/cumo/extconf.rb
CHANGED
|
@@ -1,39 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'rbconfig.rb'
|
|
4
|
+
require 'fileutils'
|
|
2
5
|
require "erb"
|
|
6
|
+
require 'etc'
|
|
3
7
|
require_relative '../../3rd_party/mkmf-cu/lib/mkmf-cu'
|
|
4
8
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
exit(1)
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
def have_numo_narray!
|
|
11
|
-
version_path = File.join(__dir__, "..", "..", "numo-narray-version")
|
|
12
|
-
version = File.read(version_path).strip
|
|
13
|
-
gem_spec = Gem::Specification.find_by_name("numo-narray", version)
|
|
14
|
-
|
|
15
|
-
$INCFLAGS += " -I#{gem_spec.gem_dir}/ext/numo/narray"
|
|
16
|
-
if !have_header("numo/narray.h")
|
|
17
|
-
puts "
|
|
18
|
-
Header numo/narray.h was not found. Give pathname as follows:
|
|
19
|
-
% ruby extconf.rb --with-narray-include=narray_h_dir"
|
|
20
|
-
exit(1)
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
if RUBY_PLATFORM =~ /cygwin|mingw/
|
|
24
|
-
$LDFLAGS += " -L#{gem_spec.gem_dir}/ext/numo"
|
|
25
|
-
unless have_library("narray","nary_new")
|
|
26
|
-
puts "libnarray.a not found"
|
|
27
|
-
exit(1)
|
|
28
|
-
end
|
|
29
|
-
end
|
|
9
|
+
def d(file)
|
|
10
|
+
File.join(__dir__, file)
|
|
30
11
|
end
|
|
31
12
|
|
|
32
13
|
def create_depend
|
|
33
14
|
message "creating depend\n"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
depend_erb_path = File.join(__dir__, "depend.erb")
|
|
15
|
+
File.open(d("depend"), "w") do |depend|
|
|
16
|
+
depend_erb_path = d("depend.erb")
|
|
37
17
|
File.open(depend_erb_path, "r") do |depend_erb|
|
|
38
18
|
erb = ERB.new(depend_erb.read)
|
|
39
19
|
erb.filename = depend_erb_path
|
|
@@ -42,7 +22,7 @@ def create_depend
|
|
|
42
22
|
end
|
|
43
23
|
end
|
|
44
24
|
|
|
45
|
-
rm_f 'include/cumo/extconf.h'
|
|
25
|
+
rm_f d('include/cumo/extconf.h')
|
|
46
26
|
|
|
47
27
|
MakeMakefileCuda.install!(cxx: true)
|
|
48
28
|
|
|
@@ -52,10 +32,10 @@ end
|
|
|
52
32
|
$CXXFLAGS << " -std=c++14"
|
|
53
33
|
#$CFLAGS=" $(cflags) -O3 -m64 -msse2 -funroll-loops"
|
|
54
34
|
#$CFLAGS=" $(cflags) -O3"
|
|
55
|
-
$INCFLAGS = "-
|
|
35
|
+
$INCFLAGS = "-I$(srcdir)/include -I$(srcdir)/narray -I$(srcdir)/cuda #{$INCFLAGS}"
|
|
56
36
|
|
|
57
|
-
$INSTALLFILES = Dir.glob(%w[include/cumo/*.h include/cumo/types/*.h include/cumo/cuda/*.h]).map{|x| [x,'$(archdir)'] }
|
|
58
|
-
$INSTALLFILES << ['include/cumo/extconf.h','$(archdir)']
|
|
37
|
+
$INSTALLFILES = Dir.glob(%w[include/cumo/*.h include/cumo/types/*.h include/cumo/cuda/*.h]).map { |x| [x, '$(archdir)'] }
|
|
38
|
+
$INSTALLFILES << ['include/cumo/extconf.h', '$(archdir)']
|
|
59
39
|
if /cygwin|mingw/ =~ RUBY_PLATFORM
|
|
60
40
|
$INSTALLFILES << ['libcumo.a', '$(archdir)']
|
|
61
41
|
end
|
|
@@ -113,17 +93,10 @@ cuda/cudnn
|
|
|
113
93
|
cuda/cudnn_impl
|
|
114
94
|
)
|
|
115
95
|
|
|
116
|
-
|
|
117
|
-
puts "add kwargs"
|
|
118
|
-
srcs << "kwargs"
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
$objs = srcs.map {|src| "#{src}.o" }
|
|
96
|
+
$objs = srcs.map { |src| "#{src}.o" }
|
|
122
97
|
|
|
123
98
|
dir_config("narray")
|
|
124
99
|
|
|
125
|
-
have_numo_narray!
|
|
126
|
-
|
|
127
100
|
if have_header("dlfcn.h")
|
|
128
101
|
exit(1) unless have_library("dl")
|
|
129
102
|
exit(1) unless have_func("dlopen")
|
|
@@ -147,14 +120,14 @@ end
|
|
|
147
120
|
|
|
148
121
|
have_type("bool", stdbool)
|
|
149
122
|
unless have_type("u_int8_t", stdint)
|
|
150
|
-
have_type("uint8_t",stdint)
|
|
123
|
+
have_type("uint8_t", stdint)
|
|
151
124
|
end
|
|
152
125
|
unless have_type("u_int16_t", stdint)
|
|
153
|
-
have_type("uint16_t",stdint)
|
|
126
|
+
have_type("uint16_t", stdint)
|
|
154
127
|
end
|
|
155
128
|
have_type("int32_t", stdint)
|
|
156
129
|
unless have_type("u_int32_t", stdint)
|
|
157
|
-
have_type("uint32_t",stdint)
|
|
130
|
+
have_type("uint32_t", stdint)
|
|
158
131
|
end
|
|
159
132
|
have_type("int64_t", stdint)
|
|
160
133
|
unless have_type("u_int64_t", stdint)
|
|
@@ -162,17 +135,22 @@ unless have_type("u_int64_t", stdint)
|
|
|
162
135
|
end
|
|
163
136
|
have_func("exp10")
|
|
164
137
|
have_func("rb_arithmetic_sequence_extract")
|
|
138
|
+
have_func("RTYPEDDATA_GET_DATA")
|
|
165
139
|
|
|
166
140
|
have_var("rb_cComplex")
|
|
167
141
|
have_func("rb_thread_call_without_gvl")
|
|
168
142
|
|
|
169
|
-
create_header('include/cumo/extconf.h')
|
|
143
|
+
create_header d('include/cumo/extconf.h')
|
|
170
144
|
$extconf_h = nil # nvcc does not support #include RUBY_EXTCONF_H
|
|
171
145
|
|
|
146
|
+
# Create *.o directories
|
|
147
|
+
FileUtils.mkdir_p('narray')
|
|
148
|
+
FileUtils.mkdir_p('cuda')
|
|
149
|
+
|
|
172
150
|
create_depend
|
|
173
151
|
|
|
174
|
-
HEADER_DIRS = (ENV['CPATH'] || '').split(
|
|
175
|
-
LIB_DIRS = (ENV['LIBRARY_PATH'] || '').split(
|
|
152
|
+
HEADER_DIRS = (ENV['CPATH'] || '').split(File::PATH_SEPARATOR)
|
|
153
|
+
LIB_DIRS = (ENV['LIBRARY_PATH'] || '').split(File::PATH_SEPARATOR)
|
|
176
154
|
dir_config('cumo', HEADER_DIRS, LIB_DIRS)
|
|
177
155
|
|
|
178
156
|
have_library('cuda')
|
|
@@ -186,4 +164,12 @@ if have_library('cudnn') # TODO(sonots): cuDNN version check
|
|
|
186
164
|
$CXXFLAGS << " -DCUDNN_FOUND"
|
|
187
165
|
end
|
|
188
166
|
|
|
167
|
+
have_library('stdc++')
|
|
168
|
+
|
|
189
169
|
create_makefile('cumo')
|
|
170
|
+
|
|
171
|
+
begin
|
|
172
|
+
require 'extconf_compile_commands_json'
|
|
173
|
+
ExtconfCompileCommandsJson.generate!
|
|
174
|
+
rescue LoadError
|
|
175
|
+
end
|
|
@@ -79,6 +79,7 @@ void cumo_na_parse_enumerator_step(VALUE enum_obj, VALUE *pstep);
|
|
|
79
79
|
// used in aref, aset
|
|
80
80
|
int cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx);
|
|
81
81
|
VALUE cumo_na_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
|
|
82
|
+
VALUE cumo_na_at_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
|
|
82
83
|
|
|
83
84
|
// defined in array, used in math
|
|
84
85
|
VALUE cumo_na_ary_composition_dtype(VALUE ary);
|
|
@@ -141,7 +141,7 @@ extern "C" {
|
|
|
141
141
|
# endif
|
|
142
142
|
#endif
|
|
143
143
|
|
|
144
|
-
#if
|
|
144
|
+
#if SIZEOF_LONG > 4
|
|
145
145
|
# undef INT322NUM
|
|
146
146
|
# undef UINT322NUM
|
|
147
147
|
# define INT322NUM(x) INT2FIX(x)
|
|
@@ -329,6 +329,12 @@ _cumo_na_get_narray_t(VALUE obj, unsigned char cumo_na_type)
|
|
|
329
329
|
#define CUMO_RNARRAY_VIEW(val) ((cumo_narray_view_t*)DATA_PTR(val))
|
|
330
330
|
#define CUMO_RNARRAY_FILEMAP(val) ((cumo_narray_filemap_t*)DATA_PTR(val))
|
|
331
331
|
|
|
332
|
+
#ifdef HAVE_RTYPEDDATA_GET_DATA
|
|
333
|
+
#define CUMO_RENUMERATOR_PTR(ptr) ((cumo_enumerator_t *)RTYPEDDATA_GET_DATA(ptr))
|
|
334
|
+
#else
|
|
335
|
+
#define CUMO_RENUMERATOR_PTR(ptr) ((cumo_enumerator_t *)DATA_PTR(ptr))
|
|
336
|
+
#endif
|
|
337
|
+
|
|
332
338
|
#define CUMO_RNARRAY_NDIM(val) (CUMO_RNARRAY(val)->ndim)
|
|
333
339
|
#define CUMO_RNARRAY_TYPE(val) (CUMO_RNARRAY(val)->type)
|
|
334
340
|
#define CUMO_RNARRAY_FLAG(val) (CUMO_RNARRAY(val)->flag)
|
|
@@ -483,6 +489,12 @@ typedef unsigned int CUMO_BIT_DIGIT;
|
|
|
483
489
|
#include "cumo/ndloop.h"
|
|
484
490
|
#include "cumo/intern.h"
|
|
485
491
|
|
|
492
|
+
// for Ractor support code
|
|
493
|
+
#ifndef HAVE_RB_EXT_RACTOR_SAFE
|
|
494
|
+
# undef RUBY_TYPED_FROZEN_SHAREABLE
|
|
495
|
+
# define RUBY_TYPED_FROZEN_SHAREABLE 0
|
|
496
|
+
#endif
|
|
497
|
+
|
|
486
498
|
#if defined(__cplusplus)
|
|
487
499
|
#if 0
|
|
488
500
|
{ /* satisfy cc-mode */
|
|
@@ -112,9 +112,8 @@
|
|
|
112
112
|
size_t dig = (pos) / CUMO_NB; \
|
|
113
113
|
int bit = (pos) % CUMO_NB; \
|
|
114
114
|
((CUMO_BIT_DIGIT*)(adr))[dig] = \
|
|
115
|
-
(((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | ((val)<<(bit)); \
|
|
115
|
+
(((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | (((val)&1u)<<(bit)); \
|
|
116
116
|
}
|
|
117
|
-
// val -> val&1 ??
|
|
118
117
|
|
|
119
118
|
#define CUMO_STORE_BIT_STEP( adr, pos, step, idx, val )\
|
|
120
119
|
{ \
|
|
@@ -129,9 +128,8 @@
|
|
|
129
128
|
pos += step; \
|
|
130
129
|
} \
|
|
131
130
|
((CUMO_BIT_DIGIT*)(adr))[dig] = \
|
|
132
|
-
(((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | ((val)<<(bit)); \
|
|
131
|
+
(((CUMO_BIT_DIGIT*)(adr))[dig] & ~(1u<<(bit))) | (((val)&1u)<<(bit)); \
|
|
133
132
|
}
|
|
134
|
-
// val -> val&1 ??
|
|
135
133
|
|
|
136
134
|
static inline int
|
|
137
135
|
cumo_is_aligned(const void *ptr, const size_t alignment)
|