ignis-numerics 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +15 -0
- data/lib/ignis-numerics.rb +62 -0
- data/lib/nvruby/array.rb +646 -0
- data/lib/nvruby/fft/cufft_bindings.rb +134 -0
- data/lib/nvruby/fft/fft_plan.rb +288 -0
- data/lib/nvruby/fft/operations.rb +364 -0
- data/lib/nvruby/linalg/cutensor_bindings.rb +107 -0
- data/lib/nvruby/mathdx/fft_kernel.rb +258 -0
- data/lib/nvruby/mathdx/gemm_kernel.rb +293 -0
- data/lib/nvruby/mathdx.rb +73 -0
- data/lib/nvruby/random/curand_bindings.rb +115 -0
- data/lib/nvruby/random/generator.rb +305 -0
- data/lib/nvruby/solver/amgx_bindings.rb +172 -0
- data/lib/nvruby/solver/amgx_config.rb +142 -0
- data/lib/nvruby/solver/amgx_solver.rb +251 -0
- data/lib/nvruby/solver/cudss_bindings.rb +115 -0
- data/lib/nvruby/solver/cusolver_bindings.rb +358 -0
- data/lib/nvruby/solver/eigen.rb +226 -0
- data/lib/nvruby/solver/lu.rb +265 -0
- data/lib/nvruby/solver/sparse_solver.rb +429 -0
- data/lib/nvruby/solver/svd.rb +266 -0
- data/lib/nvruby/solver.rb +122 -0
- data/lib/nvruby/sparse/cusparse_bindings.rb +231 -0
- data/lib/nvruby/sparse/sparse_matrix.rb +456 -0
- data/lib/nvruby/tensor/contraction.rb +218 -0
- data/lib/nvruby/tensor.rb +42 -0
- metadata +85 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ffi"
|
|
4
|
+
|
|
5
|
+
module Ignis
|
|
6
|
+
module Random
|
|
7
|
+
# cuRAND library FFI bindings
|
|
8
|
+
module CuRANDBindings
|
|
9
|
+
extend FFI::Library
|
|
10
|
+
|
|
11
|
+
# Generator types (curandRngType_t)
|
|
12
|
+
CURAND_RNG_PSEUDO_DEFAULT = 100
|
|
13
|
+
CURAND_RNG_PSEUDO_XORWOW = 101
|
|
14
|
+
CURAND_RNG_PSEUDO_MRG32K3A = 121
|
|
15
|
+
CURAND_RNG_PSEUDO_MTGP32 = 141
|
|
16
|
+
CURAND_RNG_PSEUDO_MT19937 = 142
|
|
17
|
+
CURAND_RNG_PSEUDO_PHILOX4_32_10 = 161
|
|
18
|
+
CURAND_RNG_QUASI_DEFAULT = 200
|
|
19
|
+
CURAND_RNG_QUASI_SOBOL32 = 201
|
|
20
|
+
CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 = 202
|
|
21
|
+
CURAND_RNG_QUASI_SOBOL64 = 203
|
|
22
|
+
CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 = 204
|
|
23
|
+
|
|
24
|
+
# Ordering types
|
|
25
|
+
CURAND_ORDERING_PSEUDO_BEST = 100
|
|
26
|
+
CURAND_ORDERING_PSEUDO_DEFAULT = 101
|
|
27
|
+
CURAND_ORDERING_PSEUDO_SEEDED = 102
|
|
28
|
+
CURAND_ORDERING_PSEUDO_LEGACY = 103
|
|
29
|
+
CURAND_ORDERING_PSEUDO_DYNAMIC = 104
|
|
30
|
+
CURAND_ORDERING_QUASI_DEFAULT = 201
|
|
31
|
+
|
|
32
|
+
# Direction vector types
|
|
33
|
+
CURAND_DIRECTION_VECTORS_32_JOEKUO6 = 101
|
|
34
|
+
CURAND_DIRECTION_VECTORS_64_JOEKUO6 = 102
|
|
35
|
+
CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 = 103
|
|
36
|
+
CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 = 104
|
|
37
|
+
|
|
38
|
+
@loaded = false
|
|
39
|
+
|
|
40
|
+
class << self
|
|
41
|
+
# Ensure cuRAND is loaded
|
|
42
|
+
# @return [void]
|
|
43
|
+
def ensure_loaded!
|
|
44
|
+
return if @loaded
|
|
45
|
+
|
|
46
|
+
CUDA::LibraryLoader.load_library(:curand)
|
|
47
|
+
|
|
48
|
+
cuda_bin = Ignis.configuration.cuda_bin_path
|
|
49
|
+
if cuda_bin
|
|
50
|
+
ffi_lib Dir.glob(File.join(cuda_bin, "curand64_*.dll")).max
|
|
51
|
+
else
|
|
52
|
+
ffi_lib "curand64_10"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
attach_curand_functions!
|
|
56
|
+
@loaded = true
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
# rubocop:disable Metrics/MethodLength
|
|
62
|
+
def attach_curand_functions!
|
|
63
|
+
# Generator management
|
|
64
|
+
attach_function :curandCreateGenerator, [:pointer, :int], :int
|
|
65
|
+
attach_function :curandCreateGeneratorHost, [:pointer, :int], :int
|
|
66
|
+
attach_function :curandDestroyGenerator, [:pointer], :int
|
|
67
|
+
attach_function :curandSetStream, [:pointer, :pointer], :int
|
|
68
|
+
attach_function :curandSetPseudoRandomGeneratorSeed, [:pointer, :uint64], :int
|
|
69
|
+
attach_function :curandSetGeneratorOffset, [:pointer, :uint64], :int
|
|
70
|
+
attach_function :curandSetGeneratorOrdering, [:pointer, :int], :int
|
|
71
|
+
attach_function :curandSetQuasiRandomGeneratorDimensions, [:pointer, :uint], :int
|
|
72
|
+
|
|
73
|
+
# Generate random numbers
|
|
74
|
+
attach_function :curandGenerate, [:pointer, :pointer, :size_t], :int
|
|
75
|
+
attach_function :curandGenerateLongLong, [:pointer, :pointer, :size_t], :int
|
|
76
|
+
|
|
77
|
+
# Uniform distribution
|
|
78
|
+
attach_function :curandGenerateUniform, [:pointer, :pointer, :size_t], :int
|
|
79
|
+
attach_function :curandGenerateUniformDouble, [:pointer, :pointer, :size_t], :int
|
|
80
|
+
|
|
81
|
+
# Normal distribution
|
|
82
|
+
attach_function :curandGenerateNormal, [:pointer, :pointer, :size_t, :float, :float], :int
|
|
83
|
+
attach_function :curandGenerateNormalDouble, [:pointer, :pointer, :size_t, :double, :double], :int
|
|
84
|
+
|
|
85
|
+
# Log-normal distribution
|
|
86
|
+
attach_function :curandGenerateLogNormal, [:pointer, :pointer, :size_t, :float, :float], :int
|
|
87
|
+
attach_function :curandGenerateLogNormalDouble, [:pointer, :pointer, :size_t, :double, :double], :int
|
|
88
|
+
|
|
89
|
+
# Poisson distribution
|
|
90
|
+
attach_function :curandGeneratePoisson, [:pointer, :pointer, :size_t, :double], :int
|
|
91
|
+
|
|
92
|
+
# Get version
|
|
93
|
+
attach_function :curandGetVersion, [:pointer], :int
|
|
94
|
+
|
|
95
|
+
# Direction vectors for quasi-random
|
|
96
|
+
attach_function :curandGetDirectionVectors32, [:pointer, :int], :int
|
|
97
|
+
attach_function :curandGetDirectionVectors64, [:pointer, :int], :int
|
|
98
|
+
attach_function :curandGetScrambleConstants32, [:pointer], :int
|
|
99
|
+
attach_function :curandGetScrambleConstants64, [:pointer], :int
|
|
100
|
+
end
|
|
101
|
+
# rubocop:enable Metrics/MethodLength
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Check cuRAND status and raise error if not success
|
|
105
|
+
# @param status [Integer] cuRAND status code
|
|
106
|
+
# @param context [String] Context for error message
|
|
107
|
+
# @return [void]
|
|
108
|
+
def self.check_status!(status, context = "cuRAND operation")
|
|
109
|
+
return if status.zero?
|
|
110
|
+
|
|
111
|
+
raise CuRANDError, status
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ignis" # jit/kernels provided by the core gem
|
|
4
|
+
|
|
5
|
+
module Ignis
|
|
6
|
+
module Random
|
|
7
|
+
# GPU random number generator using cuRAND
|
|
8
|
+
class Generator
|
|
9
|
+
# Available generator types
|
|
10
|
+
GENERATOR_TYPES = {
|
|
11
|
+
default: CuRANDBindings::CURAND_RNG_PSEUDO_DEFAULT,
|
|
12
|
+
xorwow: CuRANDBindings::CURAND_RNG_PSEUDO_XORWOW,
|
|
13
|
+
mrg32k3a: CuRANDBindings::CURAND_RNG_PSEUDO_MRG32K3A,
|
|
14
|
+
mtgp32: CuRANDBindings::CURAND_RNG_PSEUDO_MTGP32,
|
|
15
|
+
mt19937: CuRANDBindings::CURAND_RNG_PSEUDO_MT19937,
|
|
16
|
+
philox: CuRANDBindings::CURAND_RNG_PSEUDO_PHILOX4_32_10,
|
|
17
|
+
sobol32: CuRANDBindings::CURAND_RNG_QUASI_SOBOL32,
|
|
18
|
+
scrambled_sobol32: CuRANDBindings::CURAND_RNG_QUASI_SCRAMBLED_SOBOL32,
|
|
19
|
+
sobol64: CuRANDBindings::CURAND_RNG_QUASI_SOBOL64,
|
|
20
|
+
scrambled_sobol64: CuRANDBindings::CURAND_RNG_QUASI_SCRAMBLED_SOBOL64
|
|
21
|
+
}.freeze
|
|
22
|
+
|
|
23
|
+
# @return [Symbol] Generator type
|
|
24
|
+
attr_reader :generator_type
|
|
25
|
+
|
|
26
|
+
# @return [Integer, nil] Seed value
|
|
27
|
+
attr_reader :seed
|
|
28
|
+
|
|
29
|
+
# @return [Integer] Device index
|
|
30
|
+
attr_reader :device_index
|
|
31
|
+
|
|
32
|
+
# @param generator_type [Symbol] Type of generator
|
|
33
|
+
# @param seed [Integer, nil] Random seed
|
|
34
|
+
# @param device [Integer, nil] Device index
|
|
35
|
+
def initialize(generator_type: :xorwow, seed: nil, device: nil)
|
|
36
|
+
CuRANDBindings.ensure_loaded!
|
|
37
|
+
|
|
38
|
+
@generator_type = generator_type
|
|
39
|
+
@seed = seed
|
|
40
|
+
@device_index = device || Ignis.configuration.default_device
|
|
41
|
+
@destroyed = false
|
|
42
|
+
|
|
43
|
+
@handle = create_generator
|
|
44
|
+
set_seed(seed) if seed
|
|
45
|
+
|
|
46
|
+
ObjectSpace.define_finalizer(self, self.class.release_finalizer(@handle))
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Set the random seed
|
|
50
|
+
# @param seed [Integer] Seed value
|
|
51
|
+
# @return [self]
|
|
52
|
+
def set_seed(seed)
|
|
53
|
+
raise InvalidOperationError, "Generator has been destroyed" if @destroyed
|
|
54
|
+
raise ArgumentError, "Cannot set seed for quasi-random generator" if quasi_random?
|
|
55
|
+
|
|
56
|
+
@seed = seed
|
|
57
|
+
status = CuRANDBindings.curandSetPseudoRandomGeneratorSeed(@handle, seed)
|
|
58
|
+
CuRANDBindings.check_status!(status, "Set generator seed")
|
|
59
|
+
|
|
60
|
+
self
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Set generator offset
|
|
64
|
+
# @param offset [Integer] Offset value
|
|
65
|
+
# @return [self]
|
|
66
|
+
def set_offset(offset)
|
|
67
|
+
raise InvalidOperationError, "Generator has been destroyed" if @destroyed
|
|
68
|
+
|
|
69
|
+
status = CuRANDBindings.curandSetGeneratorOffset(@handle, offset)
|
|
70
|
+
CuRANDBindings.check_status!(status, "Set generator offset")
|
|
71
|
+
|
|
72
|
+
self
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Set CUDA stream
|
|
76
|
+
# @param stream [CUDA::Stream] Stream to use
|
|
77
|
+
# @return [self]
|
|
78
|
+
def set_stream(stream)
|
|
79
|
+
raise InvalidOperationError, "Generator has been destroyed" if @destroyed
|
|
80
|
+
|
|
81
|
+
status = CuRANDBindings.curandSetStream(@handle, stream.handle)
|
|
82
|
+
CuRANDBindings.check_status!(status, "Set generator stream")
|
|
83
|
+
|
|
84
|
+
self
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Generate uniform random numbers in [0, 1)
|
|
88
|
+
# @param shape [Array<Integer>] Shape of output array
|
|
89
|
+
# @param low [Float] Lower bound (inclusive)
|
|
90
|
+
# @param high [Float] Upper bound (exclusive)
|
|
91
|
+
# @param dtype [Symbol] Data type (:float32 or :float64)
|
|
92
|
+
# @return [NvArray] Array filled with uniform random values
|
|
93
|
+
def uniform(shape, low: 0.0, high: 1.0, dtype: :float32)
|
|
94
|
+
raise InvalidOperationError, "Generator has been destroyed" if @destroyed
|
|
95
|
+
raise UnsupportedDTypeError.new(dtype, operation: "uniform") unless %i[float32 float64].include?(dtype)
|
|
96
|
+
|
|
97
|
+
output = NvArray.new(shape: shape, dtype: dtype, device: @device_index)
|
|
98
|
+
output.to_device
|
|
99
|
+
|
|
100
|
+
count = output.size
|
|
101
|
+
|
|
102
|
+
status = if dtype == :float32
|
|
103
|
+
CuRANDBindings.curandGenerateUniform(@handle, output.device_ffi_ptr, count)
|
|
104
|
+
else
|
|
105
|
+
CuRANDBindings.curandGenerateUniformDouble(@handle, output.device_ffi_ptr, count)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
CuRANDBindings.check_status!(status, "Generate uniform")
|
|
109
|
+
|
|
110
|
+
# Scale to [low, high) if needed
|
|
111
|
+
scale_uniform(output, low, high) unless low.zero? && high == 1.0
|
|
112
|
+
|
|
113
|
+
output
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Generate normal (Gaussian) random numbers
|
|
117
|
+
# @param shape [Array<Integer>] Shape of output array
|
|
118
|
+
# @param mean [Float] Mean value
|
|
119
|
+
# @param std [Float] Standard deviation
|
|
120
|
+
# @param dtype [Symbol] Data type (:float32 or :float64)
|
|
121
|
+
# @return [NvArray] Array filled with normal random values
|
|
122
|
+
def normal(shape, mean: 0.0, std: 1.0, dtype: :float32)
|
|
123
|
+
raise InvalidOperationError, "Generator has been destroyed" if @destroyed
|
|
124
|
+
raise UnsupportedDTypeError.new(dtype, operation: "normal") unless %i[float32 float64].include?(dtype)
|
|
125
|
+
|
|
126
|
+
# cuRAND requires even number of elements for normal distribution
|
|
127
|
+
size = Array(shape).reduce(1, :*)
|
|
128
|
+
padded_size = size.even? ? size : size + 1
|
|
129
|
+
|
|
130
|
+
temp_output = CUDA::Memory.new(padded_size * DType.byte_size(dtype), device: @device_index)
|
|
131
|
+
|
|
132
|
+
status = if dtype == :float32
|
|
133
|
+
CuRANDBindings.curandGenerateNormal(@handle, temp_output.ffi_ptr, padded_size, mean, std)
|
|
134
|
+
else
|
|
135
|
+
CuRANDBindings.curandGenerateNormalDouble(@handle, temp_output.ffi_ptr, padded_size, mean, std)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
CuRANDBindings.check_status!(status, "Generate normal")
|
|
139
|
+
|
|
140
|
+
# Create output array and copy
|
|
141
|
+
output = NvArray.new(shape: shape, dtype: dtype, device: @device_index)
|
|
142
|
+
output.to_device
|
|
143
|
+
output.device_memory.copy_from_device(temp_output, count: output.nbytes)
|
|
144
|
+
|
|
145
|
+
temp_output.free!
|
|
146
|
+
|
|
147
|
+
output
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Generate log-normal random numbers
|
|
151
|
+
# @param shape [Array<Integer>] Shape of output array
|
|
152
|
+
# @param mean [Float] Mean of the underlying normal distribution
|
|
153
|
+
# @param std [Float] Standard deviation of the underlying normal distribution
|
|
154
|
+
# @param dtype [Symbol] Data type (:float32 or :float64)
|
|
155
|
+
# @return [NvArray] Array filled with log-normal random values
|
|
156
|
+
def log_normal(shape, mean: 0.0, std: 1.0, dtype: :float32)
|
|
157
|
+
raise InvalidOperationError, "Generator has been destroyed" if @destroyed
|
|
158
|
+
raise UnsupportedDTypeError.new(dtype, operation: "log_normal") unless %i[float32 float64].include?(dtype)
|
|
159
|
+
|
|
160
|
+
size = Array(shape).reduce(1, :*)
|
|
161
|
+
padded_size = size.even? ? size : size + 1
|
|
162
|
+
|
|
163
|
+
temp_output = CUDA::Memory.new(padded_size * DType.byte_size(dtype), device: @device_index)
|
|
164
|
+
|
|
165
|
+
status = if dtype == :float32
|
|
166
|
+
CuRANDBindings.curandGenerateLogNormal(@handle, temp_output.ffi_ptr, padded_size, mean, std)
|
|
167
|
+
else
|
|
168
|
+
CuRANDBindings.curandGenerateLogNormalDouble(@handle, temp_output.ffi_ptr, padded_size, mean, std)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
CuRANDBindings.check_status!(status, "Generate log-normal")
|
|
172
|
+
|
|
173
|
+
output = NvArray.new(shape: shape, dtype: dtype, device: @device_index)
|
|
174
|
+
output.to_device
|
|
175
|
+
output.device_memory.copy_from_device(temp_output, count: output.nbytes)
|
|
176
|
+
|
|
177
|
+
temp_output.free!
|
|
178
|
+
|
|
179
|
+
output
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Generate Poisson-distributed random numbers
|
|
183
|
+
# @param shape [Array<Integer>] Shape of output array
|
|
184
|
+
# @param lambda_param [Float] Lambda parameter (mean and variance)
|
|
185
|
+
# @return [NvArray] Array filled with Poisson random values (uint32)
|
|
186
|
+
def poisson(shape, lambda_param:)
|
|
187
|
+
raise InvalidOperationError, "Generator has been destroyed" if @destroyed
|
|
188
|
+
raise ArgumentError, "Lambda must be positive" unless lambda_param.positive?
|
|
189
|
+
|
|
190
|
+
output = NvArray.new(shape: shape, dtype: :uint32, device: @device_index)
|
|
191
|
+
output.to_device
|
|
192
|
+
|
|
193
|
+
status = CuRANDBindings.curandGeneratePoisson(@handle, output.device_ffi_ptr, output.size, lambda_param)
|
|
194
|
+
CuRANDBindings.check_status!(status, "Generate Poisson")
|
|
195
|
+
|
|
196
|
+
output
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Generate raw 32-bit unsigned integers
|
|
200
|
+
# @param shape [Array<Integer>] Shape of output array
|
|
201
|
+
# @return [NvArray] Array filled with random uint32 values
|
|
202
|
+
def integers(shape)
|
|
203
|
+
raise InvalidOperationError, "Generator has been destroyed" if @destroyed
|
|
204
|
+
|
|
205
|
+
output = NvArray.new(shape: shape, dtype: :uint32, device: @device_index)
|
|
206
|
+
output.to_device
|
|
207
|
+
|
|
208
|
+
status = CuRANDBindings.curandGenerate(@handle, output.device_ffi_ptr, output.size)
|
|
209
|
+
CuRANDBindings.check_status!(status, "Generate integers")
|
|
210
|
+
|
|
211
|
+
output
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Check if quasi-random generator
|
|
215
|
+
# @return [Boolean]
|
|
216
|
+
def quasi_random?
|
|
217
|
+
%i[sobol32 scrambled_sobol32 sobol64 scrambled_sobol64].include?(@generator_type)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Check if generator has been destroyed
|
|
221
|
+
# @return [Boolean]
|
|
222
|
+
def destroyed?
|
|
223
|
+
@destroyed
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Destroy the generator and free resources
|
|
227
|
+
# @return [void]
|
|
228
|
+
def destroy!
|
|
229
|
+
return if @destroyed
|
|
230
|
+
|
|
231
|
+
CuRANDBindings.curandDestroyGenerator(@handle)
|
|
232
|
+
@handle = nil
|
|
233
|
+
@destroyed = true
|
|
234
|
+
ObjectSpace.undefine_finalizer(self)
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# @return [String]
|
|
238
|
+
def to_s
|
|
239
|
+
status = @destroyed ? "destroyed" : "active"
|
|
240
|
+
"Generator(type=#{@generator_type}, seed=#{@seed || 'auto'}, #{status})"
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
class << self
|
|
244
|
+
# Create a finalizer for generator cleanup
|
|
245
|
+
# @param handle [FFI::Pointer] Generator handle
|
|
246
|
+
# @return [Proc]
|
|
247
|
+
def release_finalizer(handle)
|
|
248
|
+
proc do
|
|
249
|
+
CuRANDBindings.ensure_loaded!
|
|
250
|
+
CuRANDBindings.curandDestroyGenerator(handle)
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Get cuRAND version
|
|
255
|
+
# @return [Integer] Version number
|
|
256
|
+
def version
|
|
257
|
+
CuRANDBindings.ensure_loaded!
|
|
258
|
+
version_ptr = FFI::MemoryPointer.new(:int)
|
|
259
|
+
status = CuRANDBindings.curandGetVersion(version_ptr)
|
|
260
|
+
CuRANDBindings.check_status!(status, "Get cuRAND version")
|
|
261
|
+
version_ptr.read_int
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
private
|
|
266
|
+
|
|
267
|
+
# Create cuRAND generator
|
|
268
|
+
# @return [FFI::Pointer]
|
|
269
|
+
def create_generator
|
|
270
|
+
handle_ptr = FFI::MemoryPointer.new(:pointer)
|
|
271
|
+
rng_type = GENERATOR_TYPES[@generator_type]
|
|
272
|
+
raise ArgumentError, "Unknown generator type: #{@generator_type}" unless rng_type
|
|
273
|
+
|
|
274
|
+
status = CuRANDBindings.curandCreateGenerator(handle_ptr, rng_type)
|
|
275
|
+
CuRANDBindings.check_status!(status, "Create generator")
|
|
276
|
+
|
|
277
|
+
Ignis.logger.debug { "Created cuRAND generator: #{@generator_type}" }
|
|
278
|
+
|
|
279
|
+
handle_ptr.read_pointer
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
# Scale uniform output from [0,1) to [low, high) in place on the device:
|
|
283
|
+
# u' = u * (high - low) + low. (Previously a no-op that silently left values
|
|
284
|
+
# in [0,1), ignoring the requested range.)
|
|
285
|
+
def scale_uniform(output, low, high)
|
|
286
|
+
scale = (high - low).to_f
|
|
287
|
+
shift = low.to_f
|
|
288
|
+
n = output.size
|
|
289
|
+
|
|
290
|
+
unless output.dtype == :float32
|
|
291
|
+
# The affine kernel is fp32; double-precision scalar marshalling for a
|
|
292
|
+
# double kernel isn't wired yet. Fail loudly rather than return [0,1).
|
|
293
|
+
raise NotImplementedError,
|
|
294
|
+
"uniform(low:, high:) range scaling is implemented for :float32 only " \
|
|
295
|
+
"(got #{output.dtype}); default [0,1) range works for all dtypes"
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
kernel = Ignis::JIT::Kernels::Elementwise.affine_forward
|
|
299
|
+
ptr = output.device_ffi_ptr
|
|
300
|
+
kernel.launch(grid: [(n + 255) / 256], block: [256], args: [ptr, ptr, scale, shift, n])
|
|
301
|
+
Ignis.synchronize
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
end
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ignis
|
|
4
|
+
module Solver
|
|
5
|
+
# FFI bindings for NVIDIA AMGX algebraic multigrid solver library
|
|
6
|
+
# AMGX provides GPU-accelerated solvers for sparse linear systems
|
|
7
|
+
module AMGXBindings
|
|
8
|
+
extend FFI::Library
|
|
9
|
+
|
|
10
|
+
# AMGX return codes
|
|
11
|
+
module RC
|
|
12
|
+
OK = 0
|
|
13
|
+
BAD_PARAMETERS = 1
|
|
14
|
+
UNKNOWN = 2
|
|
15
|
+
NOT_SUPPORTED_TARGET = 3
|
|
16
|
+
NOT_SUPPORTED_BLOCKSIZE = 4
|
|
17
|
+
CUDA_FAILURE = 5
|
|
18
|
+
THRUST_FAILURE = 6
|
|
19
|
+
NO_MEMORY = 7
|
|
20
|
+
IO_ERROR = 8
|
|
21
|
+
BAD_MODE = 9
|
|
22
|
+
CORE = 10
|
|
23
|
+
PLUGIN = 11
|
|
24
|
+
BAD_CONFIGURATION = 12
|
|
25
|
+
NOT_IMPLEMENTED = 13
|
|
26
|
+
LICENSE_NOT_FOUND = 14
|
|
27
|
+
INTERNAL = 15
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# AMGX solve status
|
|
31
|
+
module SolveStatus
|
|
32
|
+
SUCCESS = 0
|
|
33
|
+
FAILED = 1
|
|
34
|
+
DIVERGED = 2
|
|
35
|
+
NOT_CONVERGED = 3
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# AMGX modes (memory space + precision)
|
|
39
|
+
# Format: mode_<memory><vec><mat><idx>
|
|
40
|
+
# h=host, d=device, D=double, F=float, I=int
|
|
41
|
+
module Mode
|
|
42
|
+
HOST_DDI = 8192 # Host, double vec, double mat, int idx
|
|
43
|
+
HOST_DFI = 8448 # Host, double vec, float mat, int idx
|
|
44
|
+
HOST_FFI = 8464 # Host, float vec, float mat, int idx
|
|
45
|
+
DEVICE_DDI = 8193 # Device, double vec, double mat, int idx
|
|
46
|
+
DEVICE_DFI = 8449 # Device, double vec, float mat, int idx
|
|
47
|
+
DEVICE_FFI = 8465 # Device, float vec, float mat, int idx
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
class << self
|
|
51
|
+
# @return [Boolean]
|
|
52
|
+
def loaded?
|
|
53
|
+
@loaded ||= false
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Load AMGX library
|
|
57
|
+
# @param dll_path [String, nil] Path to amgxsh.dll (searches common paths if nil)
|
|
58
|
+
# @return [void]
|
|
59
|
+
def ensure_loaded!(dll_path = nil)
|
|
60
|
+
return if @loaded
|
|
61
|
+
|
|
62
|
+
paths = dll_path ? [dll_path] : search_paths
|
|
63
|
+
loaded_path = nil
|
|
64
|
+
|
|
65
|
+
paths.each do |path|
|
|
66
|
+
next unless File.exist?(path)
|
|
67
|
+
|
|
68
|
+
begin
|
|
69
|
+
ffi_lib path
|
|
70
|
+
loaded_path = path
|
|
71
|
+
break
|
|
72
|
+
rescue FFI::LoadError
|
|
73
|
+
next
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
raise LoadError, "Could not load amgxsh.dll. Searched: #{paths.join(', ')}" unless loaded_path
|
|
78
|
+
|
|
79
|
+
attach_functions!
|
|
80
|
+
@loaded = true
|
|
81
|
+
Ignis.logger.info { "Loaded AMGX: #{loaded_path}" }
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
private
|
|
85
|
+
|
|
86
|
+
def search_paths
|
|
87
|
+
[
|
|
88
|
+
"C:/AMGX/build/Release/amgxsh.dll",
|
|
89
|
+
"C:/AMGX/build/Debug/amgxsh.dll",
|
|
90
|
+
"C:/Program Files/AMGX/bin/amgxsh.dll",
|
|
91
|
+
File.join(Ignis.configuration.cuda_bin_path || "", "amgxsh.dll")
|
|
92
|
+
].compact
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def attach_functions!
|
|
96
|
+
# Initialize/Finalize
|
|
97
|
+
attach_function :AMGX_initialize, [], :int
|
|
98
|
+
attach_function :AMGX_finalize, [], :int
|
|
99
|
+
attach_function :AMGX_get_api_version, [:pointer, :pointer], :int
|
|
100
|
+
attach_function :AMGX_get_error_string, [:int, :pointer, :int], :int
|
|
101
|
+
|
|
102
|
+
# Config
|
|
103
|
+
attach_function :AMGX_config_create, [:pointer, :string], :int
|
|
104
|
+
attach_function :AMGX_config_create_from_file, [:pointer, :string], :int
|
|
105
|
+
attach_function :AMGX_config_destroy, [:pointer], :int
|
|
106
|
+
|
|
107
|
+
# Resources
|
|
108
|
+
attach_function :AMGX_resources_create_simple, [:pointer, :pointer], :int
|
|
109
|
+
attach_function :AMGX_resources_destroy, [:pointer], :int
|
|
110
|
+
|
|
111
|
+
# Matrix
|
|
112
|
+
attach_function :AMGX_matrix_create, [:pointer, :pointer, :int], :int
|
|
113
|
+
attach_function :AMGX_matrix_destroy, [:pointer], :int
|
|
114
|
+
attach_function :AMGX_matrix_upload_all, [
|
|
115
|
+
:pointer, # mtx handle
|
|
116
|
+
:int, # n
|
|
117
|
+
:int, # nnz
|
|
118
|
+
:int, # block_dimx
|
|
119
|
+
:int, # block_dimy
|
|
120
|
+
:pointer, # row_ptrs
|
|
121
|
+
:pointer, # col_indices
|
|
122
|
+
:pointer, # data
|
|
123
|
+
:pointer # diag_data (can be NULL)
|
|
124
|
+
], :int
|
|
125
|
+
attach_function :AMGX_matrix_get_size, [:pointer, :pointer, :pointer, :pointer], :int
|
|
126
|
+
|
|
127
|
+
# Vector
|
|
128
|
+
attach_function :AMGX_vector_create, [:pointer, :pointer, :int], :int
|
|
129
|
+
attach_function :AMGX_vector_destroy, [:pointer], :int
|
|
130
|
+
attach_function :AMGX_vector_upload, [:pointer, :int, :int, :pointer], :int
|
|
131
|
+
attach_function :AMGX_vector_download, [:pointer, :pointer], :int
|
|
132
|
+
attach_function :AMGX_vector_set_zero, [:pointer, :int, :int], :int
|
|
133
|
+
|
|
134
|
+
# Solver
|
|
135
|
+
attach_function :AMGX_solver_create, [:pointer, :pointer, :int, :pointer], :int
|
|
136
|
+
attach_function :AMGX_solver_destroy, [:pointer], :int
|
|
137
|
+
attach_function :AMGX_solver_setup, [:pointer, :pointer], :int
|
|
138
|
+
attach_function :AMGX_solver_solve, [:pointer, :pointer, :pointer], :int
|
|
139
|
+
attach_function :AMGX_solver_solve_with_0_initial_guess, [:pointer, :pointer, :pointer], :int
|
|
140
|
+
attach_function :AMGX_solver_get_status, [:pointer, :pointer], :int
|
|
141
|
+
attach_function :AMGX_solver_get_iterations_number, [:pointer, :pointer], :int
|
|
142
|
+
attach_function :AMGX_solver_get_iteration_residual, [:pointer, :int, :int, :pointer], :int
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Check AMGX return code and raise error if not OK
|
|
147
|
+
# @param rc [Integer] Return code
|
|
148
|
+
# @param context [String] Error context
|
|
149
|
+
# @return [void]
|
|
150
|
+
def self.check_rc!(rc, context = "AMGX operation")
|
|
151
|
+
return if rc == RC::OK
|
|
152
|
+
|
|
153
|
+
msg_buf = FFI::MemoryPointer.new(:char, 4096)
|
|
154
|
+
AMGX_get_error_string(rc, msg_buf, 4096)
|
|
155
|
+
error_msg = msg_buf.read_string
|
|
156
|
+
|
|
157
|
+
raise AMGXError.new("#{context}: #{error_msg}", amgx_code: rc)
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# AMGX-specific error class
|
|
162
|
+
class AMGXError < Ignis::Error
|
|
163
|
+
# @return [Integer] AMGX return code
|
|
164
|
+
attr_reader :amgx_code
|
|
165
|
+
|
|
166
|
+
def initialize(message, amgx_code: nil)
|
|
167
|
+
super(message)
|
|
168
|
+
@amgx_code = amgx_code
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|