CooCoo 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/CooCoo.gemspec +47 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +88 -0
- data/README.md +123 -0
- data/Rakefile +81 -0
- data/bin/cuda-dev-info +25 -0
- data/bin/cuda-free +28 -0
- data/bin/cuda-free-trend +7 -0
- data/bin/ffi-gen +267 -0
- data/bin/spec_runner_html.sh +42 -0
- data/bin/trainer +198 -0
- data/bin/trend-cost +13 -0
- data/examples/char-rnn.rb +405 -0
- data/examples/cifar/cifar.rb +94 -0
- data/examples/img-similarity.rb +201 -0
- data/examples/math_ops.rb +57 -0
- data/examples/mnist.rb +365 -0
- data/examples/mnist_classifier.rb +293 -0
- data/examples/mnist_dream.rb +214 -0
- data/examples/seeds.rb +268 -0
- data/examples/seeds_dataset.txt +210 -0
- data/examples/t10k-images-idx3-ubyte +0 -0
- data/examples/t10k-labels-idx1-ubyte +0 -0
- data/examples/train-images-idx3-ubyte +0 -0
- data/examples/train-labels-idx1-ubyte +0 -0
- data/ext/buffer/Rakefile +50 -0
- data/ext/buffer/buffer.pre.cu +727 -0
- data/ext/buffer/matrix.pre.cu +49 -0
- data/lib/CooCoo.rb +1 -0
- data/lib/coo-coo.rb +18 -0
- data/lib/coo-coo/activation_functions.rb +344 -0
- data/lib/coo-coo/consts.rb +5 -0
- data/lib/coo-coo/convolution.rb +298 -0
- data/lib/coo-coo/core_ext.rb +75 -0
- data/lib/coo-coo/cost_functions.rb +91 -0
- data/lib/coo-coo/cuda.rb +116 -0
- data/lib/coo-coo/cuda/device_buffer.rb +240 -0
- data/lib/coo-coo/cuda/device_buffer/ffi.rb +109 -0
- data/lib/coo-coo/cuda/error.rb +51 -0
- data/lib/coo-coo/cuda/host_buffer.rb +117 -0
- data/lib/coo-coo/cuda/runtime.rb +157 -0
- data/lib/coo-coo/cuda/vector.rb +315 -0
- data/lib/coo-coo/data_sources.rb +2 -0
- data/lib/coo-coo/data_sources/xournal.rb +25 -0
- data/lib/coo-coo/data_sources/xournal/bitmap_stream.rb +197 -0
- data/lib/coo-coo/data_sources/xournal/document.rb +377 -0
- data/lib/coo-coo/data_sources/xournal/loader.rb +144 -0
- data/lib/coo-coo/data_sources/xournal/renderer.rb +101 -0
- data/lib/coo-coo/data_sources/xournal/saver.rb +99 -0
- data/lib/coo-coo/data_sources/xournal/training_document.rb +78 -0
- data/lib/coo-coo/data_sources/xournal/training_document/constants.rb +15 -0
- data/lib/coo-coo/data_sources/xournal/training_document/document_maker.rb +89 -0
- data/lib/coo-coo/data_sources/xournal/training_document/document_reader.rb +105 -0
- data/lib/coo-coo/data_sources/xournal/training_document/example.rb +37 -0
- data/lib/coo-coo/data_sources/xournal/training_document/sets.rb +76 -0
- data/lib/coo-coo/debug.rb +8 -0
- data/lib/coo-coo/dot.rb +129 -0
- data/lib/coo-coo/drawing.rb +4 -0
- data/lib/coo-coo/drawing/cairo_canvas.rb +100 -0
- data/lib/coo-coo/drawing/canvas.rb +68 -0
- data/lib/coo-coo/drawing/chunky_canvas.rb +101 -0
- data/lib/coo-coo/drawing/sixel.rb +214 -0
- data/lib/coo-coo/enum.rb +17 -0
- data/lib/coo-coo/from_name.rb +58 -0
- data/lib/coo-coo/fully_connected_layer.rb +205 -0
- data/lib/coo-coo/generation_script.rb +38 -0
- data/lib/coo-coo/grapher.rb +140 -0
- data/lib/coo-coo/image.rb +286 -0
- data/lib/coo-coo/layer.rb +67 -0
- data/lib/coo-coo/layer_factory.rb +26 -0
- data/lib/coo-coo/linear_layer.rb +59 -0
- data/lib/coo-coo/math.rb +607 -0
- data/lib/coo-coo/math/abstract_vector.rb +121 -0
- data/lib/coo-coo/math/functions.rb +39 -0
- data/lib/coo-coo/math/interpolation.rb +7 -0
- data/lib/coo-coo/network.rb +264 -0
- data/lib/coo-coo/neuron.rb +112 -0
- data/lib/coo-coo/neuron_layer.rb +168 -0
- data/lib/coo-coo/option_parser.rb +18 -0
- data/lib/coo-coo/platform.rb +17 -0
- data/lib/coo-coo/progress_bar.rb +11 -0
- data/lib/coo-coo/recurrence/backend.rb +99 -0
- data/lib/coo-coo/recurrence/frontend.rb +101 -0
- data/lib/coo-coo/sequence.rb +187 -0
- data/lib/coo-coo/shell.rb +2 -0
- data/lib/coo-coo/temporal_network.rb +291 -0
- data/lib/coo-coo/trainer.rb +21 -0
- data/lib/coo-coo/trainer/base.rb +67 -0
- data/lib/coo-coo/trainer/batch.rb +82 -0
- data/lib/coo-coo/trainer/batch_stats.rb +27 -0
- data/lib/coo-coo/trainer/momentum_stochastic.rb +59 -0
- data/lib/coo-coo/trainer/stochastic.rb +47 -0
- data/lib/coo-coo/transformer.rb +272 -0
- data/lib/coo-coo/vector_layer.rb +194 -0
- data/lib/coo-coo/version.rb +3 -0
- data/lib/coo-coo/weight_deltas.rb +23 -0
- data/prototypes/convolution.rb +116 -0
- data/prototypes/linear_drop.rb +51 -0
- data/prototypes/recurrent_layers.rb +79 -0
- data/www/images/screamer.png +0 -0
- data/www/images/screamer.xcf +0 -0
- data/www/index.html +82 -0
- metadata +373 -0
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'ffi'
|
2
|
+
require 'coo-coo/cuda/device_buffer'
|
3
|
+
|
4
|
+
module CooCoo
|
5
|
+
module CUDA
|
6
|
+
class DeviceBuffer < ::FFI::Struct
|
7
|
+
module FFI
|
8
|
+
extend ::FFI::Library
|
9
|
+
ffi_lib Pathname.new(__FILE__).join('..', '..', '..', '..', '..', 'ext', 'buffer', "buffer.#{RbConfig::CONFIG['DLEXT']}").to_s
|
10
|
+
|
11
|
+
def self.buffer_function(*args)
|
12
|
+
if args.size == 3
|
13
|
+
func, args, return_type = args
|
14
|
+
meth = func
|
15
|
+
elsif args.size == 4
|
16
|
+
meth, func, args, return_type = args
|
17
|
+
else
|
18
|
+
raise ArgumentError.new("Wrong number of arguments: (given #{args.size}, expected 3 or 4")
|
19
|
+
end
|
20
|
+
|
21
|
+
attach_function("buffer_#{func}", args, return_type)
|
22
|
+
|
23
|
+
caller = if return_type.kind_of?(Symbol)
|
24
|
+
:call_func
|
25
|
+
else
|
26
|
+
:call_buffer
|
27
|
+
end
|
28
|
+
|
29
|
+
class_eval <<-EOT
|
30
|
+
def self.#{meth}(*call_args)
|
31
|
+
#{caller}(:#{func}, *call_args)
|
32
|
+
end
|
33
|
+
EOT
|
34
|
+
end
|
35
|
+
|
36
|
+
buffer_function :block_size, [], :int
|
37
|
+
buffer_function :set_block_size, [ :int ], :void
|
38
|
+
buffer_function :max_grid_size, [], :int
|
39
|
+
buffer_function :set_max_grid_size, [ :int ], :void
|
40
|
+
|
41
|
+
buffer_function :init, [ :int ], :int
|
42
|
+
buffer_function :total_bytes_allocated, [], :size_t
|
43
|
+
buffer_function :num_allocated, [], :long_long
|
44
|
+
|
45
|
+
buffer_function :new, [ :size_t, :double ], DeviceBuffer.auto_ptr
|
46
|
+
buffer_function :free, [ DeviceBuffer ], :size_t
|
47
|
+
buffer_function :length, [ DeviceBuffer ], :size_t
|
48
|
+
buffer_function :set, [ DeviceBuffer, DeviceBuffer ], :int
|
49
|
+
buffer_function :setv, [ DeviceBuffer, :pointer, :size_t ], :int
|
50
|
+
buffer_function :setvn, [ DeviceBuffer, :size_t, :pointer, :size_t ], :int
|
51
|
+
buffer_function :setd, [ DeviceBuffer, :double, :size_t, :size_t ], :int
|
52
|
+
buffer_function :set_element, [ DeviceBuffer, :size_t, :double ], :int
|
53
|
+
buffer_function :get, [ DeviceBuffer, :pointer, :size_t ], :int
|
54
|
+
buffer_function :slice, [ DeviceBuffer, :size_t, :size_t ], DeviceBuffer.auto_ptr
|
55
|
+
buffer_function :slice_2d, [ DeviceBuffer, :size_t, :size_t, :size_t, :size_t, :size_t, :size_t, :double ], DeviceBuffer.auto_ptr
|
56
|
+
buffer_function :set2d, [ DeviceBuffer, :size_t, DeviceBuffer, :size_t, :size_t, :size_t ], :int
|
57
|
+
buffer_function :set2dv, [ DeviceBuffer, :size_t, :pointer, :size_t, :size_t, :size_t, :size_t ], :int
|
58
|
+
buffer_function :host_slice, [ DeviceBuffer, :pointer, :size_t, :size_t ], :int
|
59
|
+
|
60
|
+
[ :add, :sub, :mul, :pow, :div,
|
61
|
+
:collect_eq, :collect_neq, :collect_lt, :collect_lte, :collect_gt, :collect_gte
|
62
|
+
].each do |binary_op|
|
63
|
+
buffer_function binary_op, [ DeviceBuffer, DeviceBuffer ], DeviceBuffer.auto_ptr
|
64
|
+
buffer_function "#{binary_op}d", [ DeviceBuffer, :double ], DeviceBuffer.auto_ptr
|
65
|
+
end
|
66
|
+
buffer_function :eq, [ DeviceBuffer, DeviceBuffer ], :int
|
67
|
+
buffer_function :addd, [ DeviceBuffer, :double ], DeviceBuffer.auto_ptr
|
68
|
+
buffer_function :subd, [ DeviceBuffer, :double ], DeviceBuffer.auto_ptr
|
69
|
+
buffer_function :muld, [ DeviceBuffer, :double ], DeviceBuffer.auto_ptr
|
70
|
+
buffer_function :divd, [ DeviceBuffer, :double ], DeviceBuffer.auto_ptr
|
71
|
+
|
72
|
+
buffer_function :sum, [ DeviceBuffer ], :double
|
73
|
+
buffer_function :min, [ DeviceBuffer ], :double
|
74
|
+
buffer_function :max, [ DeviceBuffer ], :double
|
75
|
+
|
76
|
+
buffer_function :dot, [ DeviceBuffer, :size_t, :size_t, DeviceBuffer, :size_t, :size_t ], DeviceBuffer.auto_ptr
|
77
|
+
buffer_function :identity, [ :size_t, :size_t ], DeviceBuffer.auto_ptr
|
78
|
+
buffer_function :diagflat, [ DeviceBuffer ], DeviceBuffer.auto_ptr
|
79
|
+
|
80
|
+
[ :abs, :exp, :log, :log10, :log2, :sqrt,
|
81
|
+
:sin, :asin, :cos, :acos, :tan, :atan,
|
82
|
+
:sinh, :asinh, :cosh, :acosh, :tanh, :atanh,
|
83
|
+
:ceil, :floor, :round,
|
84
|
+
:collect_nan, :collect_inf
|
85
|
+
].each do |f|
|
86
|
+
buffer_function f, [ DeviceBuffer ], DeviceBuffer.auto_ptr
|
87
|
+
end
|
88
|
+
|
89
|
+
def self.call_func(func, *args)
|
90
|
+
r = send("buffer_#{func}", *args)
|
91
|
+
raise APIError.new(r) if r != 0
|
92
|
+
r
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.call_buffer(func, *args)
|
96
|
+
retries = 0
|
97
|
+
r = send("buffer_#{func}", *args)
|
98
|
+
raise NullResultError.new if r.null?
|
99
|
+
r
|
100
|
+
rescue NullResultError
|
101
|
+
raise if retries > 1
|
102
|
+
retries += 1
|
103
|
+
CUDA.collect_garbage
|
104
|
+
retry
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'coo-coo/cuda/runtime'
|
2
|
+
|
3
|
+
module CooCoo
|
4
|
+
module CUDA
|
5
|
+
class Error < RuntimeError
|
6
|
+
end
|
7
|
+
|
8
|
+
class APIError < Error
|
9
|
+
def initialize(err = nil)
|
10
|
+
@err = err || Runtime.cudaGetLastError()
|
11
|
+
super(message)
|
12
|
+
end
|
13
|
+
|
14
|
+
def message
|
15
|
+
"CUDA API Error: #{name} #{string}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def error
|
19
|
+
@err
|
20
|
+
end
|
21
|
+
|
22
|
+
def name
|
23
|
+
Runtime.cudaGetErrorName(@err)
|
24
|
+
end
|
25
|
+
|
26
|
+
def string
|
27
|
+
Runtime.cudaGetErrorString(@err)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class NoMemoryError < Error
|
32
|
+
def initialize(amount = nil)
|
33
|
+
if amount
|
34
|
+
super("CUDA failed to allocate #{amount} bytes on the device.")
|
35
|
+
else
|
36
|
+
super("CUDA failed to allocate memory on the device.")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class NullResultError < Error
|
42
|
+
def initialize(msg = "NULL CUDA result")
|
43
|
+
@cuda_error = APIError.new
|
44
|
+
super(msg + ": Last #{@cuda_error.message}")
|
45
|
+
end
|
46
|
+
|
47
|
+
attr_reader :cuda_error
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'ffi'
|
2
|
+
|
3
|
+
module CooCoo
|
4
|
+
module CUDA
|
5
|
+
TYPE_GETTER = {
|
6
|
+
char: :read_char,
|
7
|
+
long: :read_long,
|
8
|
+
float: :read_float,
|
9
|
+
double: :read_double
|
10
|
+
}
|
11
|
+
TYPE_WRITER = {
|
12
|
+
char: :write_char,
|
13
|
+
long: :write_long,
|
14
|
+
float: :write_float,
|
15
|
+
double: :write_double
|
16
|
+
}
|
17
|
+
TYPE_CONVERTOR = {
|
18
|
+
double: ->(x) { x.to_f },
|
19
|
+
float: ->(x) { x.to_f },
|
20
|
+
long: ->(x) { x.to_i },
|
21
|
+
char: ->(x) { x.to_i }
|
22
|
+
}
|
23
|
+
|
24
|
+
class HostBuffer
|
25
|
+
attr_reader :size, :type
|
26
|
+
|
27
|
+
def self.[](other, length = nil)
|
28
|
+
if other.kind_of?(self)
|
29
|
+
return other.resize(length || other.size)
|
30
|
+
elsif other.respond_to?(:each_with_index)
|
31
|
+
return self.new(length || other.size).set(other)
|
32
|
+
elsif other.kind_of?(Numeric)
|
33
|
+
return self.new(length || 1).set(other)
|
34
|
+
else
|
35
|
+
return self[other.to_enum, length]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def initialize(size, type = :double)
|
40
|
+
@size = size
|
41
|
+
@type = type
|
42
|
+
@buffer = ::FFI::MemoryPointer.new(type, size)
|
43
|
+
end
|
44
|
+
|
45
|
+
def resize(new_size)
|
46
|
+
return self if @size == new_size
|
47
|
+
|
48
|
+
self.class.new(new_size).set(self.each)
|
49
|
+
end
|
50
|
+
|
51
|
+
def byte_size
|
52
|
+
@size * type_size
|
53
|
+
end
|
54
|
+
|
55
|
+
def type_size
|
56
|
+
::FFI.type_size(@type)
|
57
|
+
end
|
58
|
+
|
59
|
+
def set(values)
|
60
|
+
if values.respond_to?(:each_with_index)
|
61
|
+
values.each_with_index do |v, i|
|
62
|
+
break if i >= size
|
63
|
+
self[i] = v
|
64
|
+
end
|
65
|
+
else
|
66
|
+
size.times do |i|
|
67
|
+
self[i] = values
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
self
|
72
|
+
end
|
73
|
+
|
74
|
+
def get
|
75
|
+
@buffer
|
76
|
+
end
|
77
|
+
|
78
|
+
def to_ptr
|
79
|
+
@buffer
|
80
|
+
end
|
81
|
+
|
82
|
+
def []=(index, value)
|
83
|
+
@buffer[index].send(type_writer, type_convertor.call(value))
|
84
|
+
end
|
85
|
+
|
86
|
+
def [](index)
|
87
|
+
@buffer[index].send(type_reader)
|
88
|
+
end
|
89
|
+
|
90
|
+
def each(&block)
|
91
|
+
return to_enum(__method__) unless block_given?
|
92
|
+
|
93
|
+
size.times do |i|
|
94
|
+
block.call(self[i])
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def type_writer
|
99
|
+
@type_writer ||= TYPE_WRITER[@type]
|
100
|
+
end
|
101
|
+
|
102
|
+
def type_convertor
|
103
|
+
@type_convertor ||= TYPE_CONVERTOR[@type]
|
104
|
+
end
|
105
|
+
|
106
|
+
def type_reader
|
107
|
+
@type_reader ||= TYPE_GETTER[@type]
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_a
|
111
|
+
@size.times.collect do |i|
|
112
|
+
self[i]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
require 'ffi'
|
2
|
+
require 'coo-coo/platform'
|
3
|
+
|
4
|
+
module CooCoo
|
5
|
+
module CUDA
|
6
|
+
module Runtime
|
7
|
+
extend ::FFI::Library
|
8
|
+
if CooCoo::Platform.windows?
|
9
|
+
ffi_lib 'cudart64_90.dll'
|
10
|
+
else
|
11
|
+
ffi_lib 'libcudart.so'
|
12
|
+
end
|
13
|
+
|
14
|
+
class DeviceProperties < ::FFI::Struct
|
15
|
+
layout(:name, [ :char, 256 ],
|
16
|
+
:totalGlobalMem, :size_t,
|
17
|
+
:sharedMemPerBlock, :size_t,
|
18
|
+
:regsPerBlock, :int,
|
19
|
+
:warpSize, :int,
|
20
|
+
:memPitch, :size_t,
|
21
|
+
:maxThreadsPerBlock, :int,
|
22
|
+
:maxThreadsDim, [ :int, 3 ],
|
23
|
+
:maxGridSize, [ :int, 3 ],
|
24
|
+
:clockRate, :int,
|
25
|
+
:totalConstMem, :size_t,
|
26
|
+
:major, :int,
|
27
|
+
:minor, :int,
|
28
|
+
:textureAlignment, :size_t,
|
29
|
+
:texturePitchAlignment, :size_t,
|
30
|
+
:deviceOverlap, :int,
|
31
|
+
:multiProcessorCount, :int,
|
32
|
+
:kernelExecTimeoutEnabled, :int,
|
33
|
+
:integrated, :int,
|
34
|
+
:canMapHostMemory, :int,
|
35
|
+
:computeMode, :int,
|
36
|
+
:maxTexture1D, :int,
|
37
|
+
:maxTexture1DMipmap, :int,
|
38
|
+
:maxTexture1DLinear, :int,
|
39
|
+
:maxTexture2D, [ :int, 2 ],
|
40
|
+
:maxTexture2DMipmap, [ :int, 2 ],
|
41
|
+
:maxTexture2DLinear, [ :int, 3 ],
|
42
|
+
:maxTexture2DGather, [ :int, 2 ],
|
43
|
+
:maxTexture3D, [ :int, 3 ],
|
44
|
+
:maxTexture3DAlt, [ :int, 3 ],
|
45
|
+
:maxTextureCubemap, :int,
|
46
|
+
:maxTexture1DLayered, [ :int, 2 ],
|
47
|
+
:maxTexture2DLayered, [ :int, 3 ],
|
48
|
+
:maxTextureCubemapLayered, [ :int, 2 ],
|
49
|
+
:maxSurface1D, :int,
|
50
|
+
:maxSurface2D, [ :int, 2 ],
|
51
|
+
:maxSurface3D, [ :int, 3 ],
|
52
|
+
:maxSurface1DLayered, [ :int, 2 ],
|
53
|
+
:maxSurface2DLayered, [ :int, 3 ],
|
54
|
+
:maxSurfaceCubemap, :int,
|
55
|
+
:maxSurfaceCubemapLayered, [ :int, 2 ],
|
56
|
+
:surfaceAlignment, :size_t,
|
57
|
+
:concurrentKernels, :int,
|
58
|
+
:ecc_enabled, :int,
|
59
|
+
:pciBusID, :int,
|
60
|
+
:pciDeviceID, :int,
|
61
|
+
:pciDomainID, :int,
|
62
|
+
:tccDriver, :int,
|
63
|
+
:asyncEngineCount, :int,
|
64
|
+
:unifiedAddressing, :int,
|
65
|
+
:memoryClockRate, :int,
|
66
|
+
:memoryBusWidth, :int,
|
67
|
+
:l2CacheSize, :int,
|
68
|
+
:maxThreadsPerMultiProcessor, :int,
|
69
|
+
:streamPrioritiesSupported, :int,
|
70
|
+
:globalL1CacheSupported, :int,
|
71
|
+
:localL1CacheSupported, :int,
|
72
|
+
:sharedMemPerMultiprocessor, :size_t,
|
73
|
+
:regsPerMultiprocessor, :int,
|
74
|
+
:managedMemSupported, :int,
|
75
|
+
:isMultiGpuBoard, :int,
|
76
|
+
:multiGpuBoardGroupID, :int,
|
77
|
+
:singleToDoublePrecisionPerfRatio, :int,
|
78
|
+
:pageableMemoryAccess, :int,
|
79
|
+
:concurrentManagedAccess, :int,
|
80
|
+
:computePreemptionSupported, :int,
|
81
|
+
:canUseHostPointerForRegisteredMem, :int,
|
82
|
+
:cooperativeLaunch, :int,
|
83
|
+
:cooperativeMultiDeviceLaunch, :int
|
84
|
+
)
|
85
|
+
end
|
86
|
+
attach_function :cudaGetErrorName, [ :int ], :string
|
87
|
+
attach_function :cudaGetErrorString, [ :int ], :string
|
88
|
+
attach_function :cudaGetLastError, [], :int
|
89
|
+
|
90
|
+
attach_function :cudaDeviceReset, [], :int
|
91
|
+
attach_function :cudaSetDevice, [ :int ], :int
|
92
|
+
attach_function :cudaGetDevice, [ :pointer ], :int
|
93
|
+
attach_function :cudaGetDeviceCount, [ :pointer ], :int
|
94
|
+
attach_function :cudaGetDeviceFlags, [ :pointer ], :int
|
95
|
+
attach_function :cudaGetDeviceProperties, [ DeviceProperties, :int ], :int
|
96
|
+
|
97
|
+
attach_function :cudaMemGetInfo, [ :pointer, :pointer ], :int
|
98
|
+
|
99
|
+
attach_function :cudaMalloc, [ :pointer, :int ], :void
|
100
|
+
attach_function :cudaFree, [ :pointer ], :void
|
101
|
+
|
102
|
+
enum :memcpy_modes, [ :host_to_device, 1, :device_to_host, 2 ]
|
103
|
+
attach_function :cudaMemcpy, [ :pointer, :pointer, :int, :memcpy_modes ], :void
|
104
|
+
|
105
|
+
def self.get_device
|
106
|
+
dev = ::FFI::MemoryPointer.new(:int, 1)
|
107
|
+
cudaGetDevice(dev)
|
108
|
+
dev.read_int
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.set_device(dev)
|
112
|
+
err = cudaSetDevice(dev)
|
113
|
+
raise APIError.new(err) if err != 0
|
114
|
+
dev
|
115
|
+
end
|
116
|
+
|
117
|
+
def self.device_count
|
118
|
+
n = ::FFI::MemoryPointer.new(:int, 1)
|
119
|
+
cudaGetDeviceCount(n)
|
120
|
+
n.read_int
|
121
|
+
end
|
122
|
+
|
123
|
+
def self.get_device_props(dev = nil)
|
124
|
+
props = DeviceProperties.new
|
125
|
+
cudaGetDeviceProperties(props, dev || get_device)
|
126
|
+
props
|
127
|
+
end
|
128
|
+
|
129
|
+
def self.read_size_t(pointer)
|
130
|
+
if @size_t_reader == nil
|
131
|
+
type = ::FFI.find_type(:size_t)
|
132
|
+
@size_t_reader = case type.size
|
133
|
+
when 8 then :read_ulong_long
|
134
|
+
when 4 then :read_ulong
|
135
|
+
when 2 then :read_ushort
|
136
|
+
when 1 then :read_ubyte
|
137
|
+
else raise ArgumentError.new("size_t type not found")
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
pointer.send(@size_t_reader)
|
142
|
+
end
|
143
|
+
|
144
|
+
def self.memory_info
|
145
|
+
free = ::FFI::MemoryPointer.new(:size_t, 1)
|
146
|
+
total = ::FFI::MemoryPointer.new(:size_t, 1)
|
147
|
+
err = cudaMemGetInfo(free, total)
|
148
|
+
raise APIError.new(err) if err != 0
|
149
|
+
[ read_size_t(free), read_size_t(total) ]
|
150
|
+
end
|
151
|
+
|
152
|
+
def self.total_global_mem
|
153
|
+
memory_info[1]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
@@ -0,0 +1,315 @@
|
|
1
|
+
require 'coo-coo/math/abstract_vector'
|
2
|
+
require 'coo-coo/cuda/device_buffer'
|
3
|
+
require 'coo-coo/core_ext'
|
4
|
+
|
5
|
+
module CooCoo
|
6
|
+
module CUDA
|
7
|
+
class Vector < CooCoo::Math::AbstractVector
|
8
|
+
def initialize(length, initial_value = 0.0, &block)
|
9
|
+
if length != nil && length <= 0
|
10
|
+
raise ArgumentError.new("Invalid Vector size")
|
11
|
+
elsif length != nil
|
12
|
+
@elements = DeviceBuffer.create(length, initial_value)
|
13
|
+
if block
|
14
|
+
@elements.size.times.each_slice(1024).with_index do |slice, slice_idx|
|
15
|
+
@elements[slice_idx * 1024, 1024] = slice.collect do |i|
|
16
|
+
block.call(i)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.[](value, max_size = nil, default_value = 0.0)
|
24
|
+
if value.kind_of?(DeviceBuffer)
|
25
|
+
v = new(nil)
|
26
|
+
v.instance_variable_set('@elements', value)
|
27
|
+
v
|
28
|
+
else
|
29
|
+
if value.respond_to?(:each)
|
30
|
+
max_size ||= value.size
|
31
|
+
else
|
32
|
+
max_size ||= 1
|
33
|
+
end
|
34
|
+
new(max_size, default_value).set(value)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def set(values)
|
39
|
+
if values.kind_of?(self.class)
|
40
|
+
@elements.set(values.elements)
|
41
|
+
else
|
42
|
+
@elements.set(values)
|
43
|
+
end
|
44
|
+
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.zeros(length)
|
49
|
+
self.new(length, 0.0)
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.ones(length)
|
53
|
+
self.new(length, 1.0)
|
54
|
+
end
|
55
|
+
|
56
|
+
def zeros
|
57
|
+
self.zeros(size)
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.identity(w, h)
|
61
|
+
self[DeviceBuffer.identity(w, h)]
|
62
|
+
end
|
63
|
+
|
64
|
+
def diagflat
|
65
|
+
self.class[@elements.diagflat]
|
66
|
+
end
|
67
|
+
|
68
|
+
def clone
|
69
|
+
self.class.new(self.size).set(@elements)
|
70
|
+
end
|
71
|
+
|
72
|
+
def append(other)
|
73
|
+
b = self.class.new(size + other.size)
|
74
|
+
b[0, size] = self
|
75
|
+
b[size, other.size] = other
|
76
|
+
b
|
77
|
+
end
|
78
|
+
|
79
|
+
def coerce(other)
|
80
|
+
if other.respond_to?(:each)
|
81
|
+
return self.class[other], self
|
82
|
+
else
|
83
|
+
return self.class.new(self.size, other), self
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def to_s
|
88
|
+
'[' + each.collect(&:to_f).join(', ') + ']'
|
89
|
+
end
|
90
|
+
|
91
|
+
def inspect
|
92
|
+
to_a.inspect
|
93
|
+
end
|
94
|
+
|
95
|
+
def to_a
|
96
|
+
@elements.to_a
|
97
|
+
end
|
98
|
+
|
99
|
+
def _dump(depth)
|
100
|
+
@elements.to_a.pack('E*')
|
101
|
+
end
|
102
|
+
|
103
|
+
def self._load(args)
|
104
|
+
arr = args.unpack('E*')
|
105
|
+
self[arr]
|
106
|
+
end
|
107
|
+
|
108
|
+
def null?
|
109
|
+
@elements.null?
|
110
|
+
end
|
111
|
+
|
112
|
+
def [](i, len = nil)
|
113
|
+
v = @elements[i, len]
|
114
|
+
if len
|
115
|
+
self.class[v]
|
116
|
+
else
|
117
|
+
v
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def []=(i, v, l = nil)
|
122
|
+
if l == nil
|
123
|
+
@elements[i] = v
|
124
|
+
else
|
125
|
+
@elements[i, v] = l
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def each(&block)
|
130
|
+
@elements.each(&block)
|
131
|
+
end
|
132
|
+
|
133
|
+
def each_with_index(&block)
|
134
|
+
@elements.each.with_index(&block)
|
135
|
+
end
|
136
|
+
|
137
|
+
def each_slice(n, &block)
|
138
|
+
return to_enum(__method__, n) unless block
|
139
|
+
|
140
|
+
@elements.each_slice(n) do |slice|
|
141
|
+
block.call(self.class[slice])
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# @!method min
|
146
|
+
# @return [Float] minimum value of +self+
|
147
|
+
# @!method max
|
148
|
+
# @return [Float] maximum value of +self+
|
149
|
+
# @!method minmax
|
150
|
+
# @return [[Float, Float]] {#min} and {#max} values of +self+
|
151
|
+
# @!method sum
|
152
|
+
# Reduces the vector with {#+}.
|
153
|
+
# @return [Float] the sum of +self+
|
154
|
+
delegate :min, :max, :minmax, :sum, :to => :elements
|
155
|
+
|
156
|
+
def sum
|
157
|
+
@elements.sum
|
158
|
+
end
|
159
|
+
|
160
|
+
def average
|
161
|
+
@elements.sum / size
|
162
|
+
end
|
163
|
+
|
164
|
+
def magnitude_squared
|
165
|
+
(self * self).sum
|
166
|
+
end
|
167
|
+
|
168
|
+
def magnitude
|
169
|
+
::Math.sqrt(magnitude_squared)
|
170
|
+
end
|
171
|
+
|
172
|
+
def normalize
|
173
|
+
self / magnitude
|
174
|
+
end
|
175
|
+
|
176
|
+
def dot(w, h, other, ow = nil, oh = nil)
|
177
|
+
if other.kind_of?(self.class)
|
178
|
+
self.class[@elements.dot(w, h, other.elements, ow, oh)]
|
179
|
+
elsif other.respond_to?(:each)
|
180
|
+
dot(w, h, self.class[other.each], ow, oh)
|
181
|
+
else
|
182
|
+
raise ArgumentError.new("argument is not a #{self.class} or Enumerator")
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
private
|
187
|
+
def self.bin_op(op)
|
188
|
+
class_eval <<-EOT
|
189
|
+
def #{op}(other)
|
190
|
+
if other.kind_of?(self.class)
|
191
|
+
self.class[@elements.send(:#{op}, other.elements)]
|
192
|
+
else
|
193
|
+
self.class[@elements.send(:#{op}, other)]
|
194
|
+
end
|
195
|
+
end
|
196
|
+
EOT
|
197
|
+
end
|
198
|
+
|
199
|
+
public
|
200
|
+
# @!macro [attach] vector.bin_op
|
201
|
+
# @!method $1(other)
|
202
|
+
# Calls the equivalent of +#$1+ on each element of +self+ against +other+.
|
203
|
+
# @param other [Vector, Array, Enumerable, Numeric]
|
204
|
+
# @return [Vector]
|
205
|
+
bin_op('<')
|
206
|
+
bin_op('<=')
|
207
|
+
bin_op('>=')
|
208
|
+
bin_op('>')
|
209
|
+
bin_op('+')
|
210
|
+
bin_op('-')
|
211
|
+
bin_op('*')
|
212
|
+
bin_op('/')
|
213
|
+
bin_op('**')
|
214
|
+
bin_op('collect_equal?')
|
215
|
+
bin_op('collect_not_equal?')
|
216
|
+
|
217
|
+
# Negates every element in the vector.
|
218
|
+
# @return [Vector]
|
219
|
+
def -@
|
220
|
+
self * -1.0
|
221
|
+
end
|
222
|
+
|
223
|
+
def collect_equal?(n)
|
224
|
+
if n.kind_of?(self.class)
|
225
|
+
self.class[@elements.collect_equal?(n.elements)]
|
226
|
+
else
|
227
|
+
self.class[@elements.collect_equal?(n)]
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
def size
|
232
|
+
@elements.size
|
233
|
+
end
|
234
|
+
|
235
|
+
def length
|
236
|
+
@elements.size
|
237
|
+
end
|
238
|
+
|
239
|
+
def ==(other)
|
240
|
+
if other.kind_of?(self.class)
|
241
|
+
@elements == other.elements
|
242
|
+
elsif other != nil
|
243
|
+
b, a = coerce(other)
|
244
|
+
self == b
|
245
|
+
else
|
246
|
+
false
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
private
|
251
|
+
def self.f(name, real_name = nil)
|
252
|
+
class_eval <<-EOT
|
253
|
+
def #{name}
|
254
|
+
self.class[@elements.send(:#{real_name || name})]
|
255
|
+
end
|
256
|
+
EOT
|
257
|
+
end
|
258
|
+
|
259
|
+
public
|
260
|
+
|
261
|
+
# @!macro [attach] vector.f
|
262
|
+
# @!method $1()
|
263
|
+
# Calls the equivalent of +Math.$1+ on each element of +self+.
|
264
|
+
# @return [Vector] the equivalent of +Math.$1+ over +self+.
|
265
|
+
f :abs
|
266
|
+
f :exp
|
267
|
+
f :log
|
268
|
+
f :log10
|
269
|
+
f :log2
|
270
|
+
f :sqrt
|
271
|
+
f :sin
|
272
|
+
f :asin
|
273
|
+
f :cos
|
274
|
+
f :acos
|
275
|
+
f :tan
|
276
|
+
f :atan
|
277
|
+
f :sinh
|
278
|
+
f :asinh
|
279
|
+
f :cosh
|
280
|
+
f :acosh
|
281
|
+
f :tanh
|
282
|
+
f :atanh
|
283
|
+
f :ceil
|
284
|
+
f :floor
|
285
|
+
f :round
|
286
|
+
f :collect_nan?, :collect_nan
|
287
|
+
f :collect_infinite?, :collect_inf
|
288
|
+
|
289
|
+
def nan?
|
290
|
+
collect_nan?.sum > 0
|
291
|
+
end
|
292
|
+
|
293
|
+
def infinite?
|
294
|
+
collect_infinite?.sum > 0
|
295
|
+
end
|
296
|
+
|
297
|
+
def slice_2d(*args)
|
298
|
+
self.class[@elements.slice_2d(*args)]
|
299
|
+
end
|
300
|
+
|
301
|
+
def set2d!(width, src, src_width, x, y)
|
302
|
+
raise ArgumentError.new("src's size #{src.size} must be divisible by src_width #{src_width}") if src.respond_to?(:each) && src.size % src_width > 0
|
303
|
+
|
304
|
+
src = src.elements if src.kind_of?(self.class)
|
305
|
+
@elements.set2d!(width, src, src_width, x, y)
|
306
|
+
self
|
307
|
+
end
|
308
|
+
|
309
|
+
protected
|
310
|
+
def elements
|
311
|
+
@elements
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|