ignis 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +15 -0
- data/lib/ignis.rb +94 -0
- data/lib/nnw/platform.rb +304 -0
- data/lib/nnw/shared/event_bus.rb +240 -0
- data/lib/nnw/shared/ffi_loader.rb +63 -0
- data/lib/nnw/shared/memory_contract.rb +204 -0
- data/lib/nnw/shared/nv_array.rb +710 -0
- data/lib/nnw/shared/recovery_protocol.rb +307 -0
- data/lib/nvruby/configuration.rb +217 -0
- data/lib/nvruby/cuda/device.rb +275 -0
- data/lib/nvruby/cuda/device_props.rb +202 -0
- data/lib/nvruby/cuda/graph.rb +265 -0
- data/lib/nvruby/cuda/graph_bindings.rb +119 -0
- data/lib/nvruby/cuda/library_loader.rb +285 -0
- data/lib/nvruby/cuda/memory.rb +410 -0
- data/lib/nvruby/cuda/runtime_api.rb +804 -0
- data/lib/nvruby/cuda/stream.rb +234 -0
- data/lib/nvruby/dtype.rb +139 -0
- data/lib/nvruby/epilogues.rb +438 -0
- data/lib/nvruby/errors.rb +303 -0
- data/lib/nvruby/half.rb +97 -0
- data/lib/nvruby/jit/compiled_kernel.rb +80 -0
- data/lib/nvruby/jit/compiler.rb +231 -0
- data/lib/nvruby/jit/driver_api_bindings.rb +363 -0
- data/lib/nvruby/jit/kernel.rb +240 -0
- data/lib/nvruby/jit/kernel_module.rb +133 -0
- data/lib/nvruby/jit/kernels/activations.rb +179 -0
- data/lib/nvruby/jit/kernels/attention.rb +504 -0
- data/lib/nvruby/jit/kernels/elementwise.rb +488 -0
- data/lib/nvruby/jit/kernels/loss.rb +213 -0
- data/lib/nvruby/jit/kernels/normalization.rb +200 -0
- data/lib/nvruby/jit/kernels/optimizer.rb +193 -0
- data/lib/nvruby/jit/nvrtc_bindings.rb +282 -0
- data/lib/nvruby/linalg/cublas_bindings.rb +295 -0
- data/lib/nvruby/linalg/cublaslt_bindings.rb +342 -0
- data/lib/nvruby/linalg/epilog.rb +67 -0
- data/lib/nvruby/linalg/matmul.rb +247 -0
- data/lib/nvruby/linalg/matmul_plan.rb +229 -0
- data/lib/nvruby/linalg/optimized_matmul.rb +412 -0
- data/lib/nvruby/memory/cuda_async_memory_resource.rb +123 -0
- data/lib/nvruby/memory/cuda_memory_resource.rb +68 -0
- data/lib/nvruby/memory/device_memory_resource.rb +106 -0
- data/lib/nvruby/memory/pinned_host_memory_resource.rb +112 -0
- data/lib/nvruby/memory/pool_memory_resource.rb +242 -0
- data/lib/nvruby/memory/stats.rb +107 -0
- data/lib/nvruby/memory.rb +124 -0
- data/lib/nvruby/version.rb +5 -0
- metadata +108 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 87196fca0ddbb82a81e1cda44c85ac4ff527da783743a0cce600e92f2ea34929
|
|
4
|
+
data.tar.gz: a8ffe2c3d578b76bd0e663582b3153ac24e8d09aa54706ad718cea3d70b0f58b
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: bc828b7eb0284e8377699e7d147dad2da4cb63e129b88d6be05d5bf87cfd19508f054b3c8bb4e306fb58241a110298e4525475de148c37b3a7aabc301ba34a1a
|
|
7
|
+
data.tar.gz: 02cc7a1dd7d5b51f9718de9ba91513ba50835dc3b5bca0a2cc6bb1651a9738ab1cf12e3495867608b7bc87243536e08fb40af822fb0440fa389ddcd1f892246f
|
data/README.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# ignis
|
|
2
|
+
|
|
3
|
+
GPU compute foundation for Ruby on native Windows — the base of the [Ignis](https://github.com/tigel-agm/Ignis) ecosystem.
|
|
4
|
+
|
|
5
|
+
Provides a GPU n-dimensional array (`Ignis::NDArray`), CUDA device/memory management, a runtime kernel compiler (NVRTC) with a batteries-included kernel library, fp16/bf16 conversion (`Ignis::Half`), and cuBLAS GEMM. **No C extensions** — libraries bind via FFI and kernels compile at runtime, so `gem install` needs no compiler or devkit.
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
require "ignis"
|
|
9
|
+
a = Ignis::NDArray.new(shape: [2, 3], dtype: :float32, device_id: 0)
|
|
10
|
+
a.from_host([1, 2, 3, 4, 5, 6].map(&:to_f))
|
|
11
|
+
# custom CUDA kernels via NVRTC:
|
|
12
|
+
k = Ignis.compile_kernel(src, "my_kernel"); k.launch(grid: [..], block: [..], args: [a, ...])
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Requires an NVIDIA GPU + CUDA runtime. Ruby ≥ 3.1. MIT.
|
data/lib/ignis.rb
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Ignis — GPU compute foundation for Ruby on native Windows.
|
|
4
|
+
#
|
|
5
|
+
# Provides a GPU n-dimensional array (Ignis::NDArray), CUDA device/memory
|
|
6
|
+
# management, a runtime kernel compiler (NVRTC) with a batteries-included kernel
|
|
7
|
+
# library, fp16/bf16 conversion, and cuBLAS GEMM. Libraries bind via FFI and
|
|
8
|
+
# kernels compile at runtime — there are no C extensions, so installation needs
|
|
9
|
+
# no compiler/devkit. Requires an NVIDIA GPU + CUDA toolkit/runtime.
|
|
10
|
+
|
|
11
|
+
# --- Platform + shared foundation (Windows CUDA discovery, the GPU array) ---
|
|
12
|
+
require_relative "nnw/platform"
|
|
13
|
+
require_relative "nnw/shared/ffi_loader"
|
|
14
|
+
require_relative "nnw/shared/nv_array"
|
|
15
|
+
require_relative "nnw/shared/event_bus"
|
|
16
|
+
require_relative "nnw/shared/memory_contract"
|
|
17
|
+
require_relative "nnw/shared/recovery_protocol"
|
|
18
|
+
|
|
19
|
+
# --- Core types ---
|
|
20
|
+
require_relative "nvruby/version"
|
|
21
|
+
require_relative "nvruby/errors"
|
|
22
|
+
require_relative "nvruby/configuration"
|
|
23
|
+
require_relative "nvruby/dtype"
|
|
24
|
+
require_relative "nvruby/half"
|
|
25
|
+
|
|
26
|
+
# --- CUDA runtime/driver ---
|
|
27
|
+
require_relative "nvruby/cuda/library_loader"
|
|
28
|
+
require_relative "nvruby/cuda/runtime_api"
|
|
29
|
+
require_relative "nvruby/cuda/device_props"
|
|
30
|
+
require_relative "nvruby/cuda/device"
|
|
31
|
+
require_relative "nvruby/cuda/memory"
|
|
32
|
+
require_relative "nvruby/cuda/stream"
|
|
33
|
+
require_relative "nvruby/cuda/graph"
|
|
34
|
+
require_relative "nvruby/memory"
|
|
35
|
+
|
|
36
|
+
# --- cuBLAS GEMM ---
|
|
37
|
+
require_relative "nvruby/linalg/cublas_bindings"
|
|
38
|
+
require_relative "nvruby/linalg/epilog"
|
|
39
|
+
require_relative "nvruby/linalg/matmul"
|
|
40
|
+
require_relative "nvruby/linalg/matmul_plan"
|
|
41
|
+
|
|
42
|
+
# --- JIT (NVRTC) machinery + kernel library ---
|
|
43
|
+
require_relative "nvruby/jit/nvrtc_bindings"
|
|
44
|
+
require_relative "nvruby/jit/driver_api_bindings"
|
|
45
|
+
require_relative "nvruby/jit/compiled_kernel"
|
|
46
|
+
require_relative "nvruby/jit/kernel_module"
|
|
47
|
+
require_relative "nvruby/jit/kernel"
|
|
48
|
+
require_relative "nvruby/jit/compiler"
|
|
49
|
+
require_relative "nvruby/jit/kernels/activations"
|
|
50
|
+
require_relative "nvruby/jit/kernels/elementwise"
|
|
51
|
+
require_relative "nvruby/jit/kernels/attention"
|
|
52
|
+
require_relative "nvruby/jit/kernels/normalization"
|
|
53
|
+
require_relative "nvruby/jit/kernels/loss"
|
|
54
|
+
require_relative "nvruby/jit/kernels/optimizer"
|
|
55
|
+
|
|
56
|
+
module Ignis
|
|
57
|
+
class << self
|
|
58
|
+
# cuBLAS matrix multiply: C = A @ B (optionally transposed).
|
|
59
|
+
def matmul(a, b, transpose_a: false, transpose_b: false)
|
|
60
|
+
LinAlg::Matmul.call(a, b, transpose_a: transpose_a, transpose_b: transpose_b)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# General GEMM: D = alpha*op(A)@op(B) + beta*C.
|
|
64
|
+
def gemm(a, b, c: nil, alpha: 1.0, beta: 0.0, transpose_a: false, transpose_b: false, stream: nil)
|
|
65
|
+
LinAlg::Matmul.call(a, b, c: c, alpha: alpha, beta: beta,
|
|
66
|
+
transpose_a: transpose_a, transpose_b: transpose_b, stream: stream)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# --- Device management ---
|
|
70
|
+
def devices; CUDA::Device.list; end
|
|
71
|
+
def current_device; CUDA::Device.current; end
|
|
72
|
+
def set_device(i); CUDA::Device.new(i).set_current!; end
|
|
73
|
+
def synchronize; CUDA::Device.current.synchronize; end
|
|
74
|
+
def cuda_available?; CUDA::Device.available?; rescue StandardError; false; end
|
|
75
|
+
def cuda_version; CUDA::LibraryLoader.cuda_version; rescue StandardError; nil; end
|
|
76
|
+
|
|
77
|
+
# --- JIT kernel compilation (NVRTC) ---
|
|
78
|
+
def compile_kernel(source, name, device_id: 0, options: [])
|
|
79
|
+
JIT::Compiler.compile(source, name, device_id: device_id, options: options)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def compile_kernel_only(source, name, compute_capability:, options: [])
|
|
83
|
+
JIT::Compiler.compile_only(source, name, compute_capability: compute_capability, options: options)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def jit_cache_stats; JIT::Compiler.cache_stats; end
|
|
87
|
+
def clear_jit_cache!; JIT::Compiler.clear_cache!; end
|
|
88
|
+
|
|
89
|
+
# --- Shared foundation accessors ---
|
|
90
|
+
def event_bus; Shared::EventBus; end
|
|
91
|
+
def memory_contract; Shared::MemoryContract; end
|
|
92
|
+
def recovery_protocol; Shared::RecoveryProtocol; end
|
|
93
|
+
end
|
|
94
|
+
end
|
data/lib/nnw/platform.rb
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ignis
|
|
4
|
+
# Cross-platform detection and path resolution for Ignis.
|
|
5
|
+
#
|
|
6
|
+
# Windows is the primary target (the whole point — Ruby HPC without
|
|
7
|
+
# Python's 20-30% overhead). Linux support exists for multi-GPU bare-metal
|
|
8
|
+
# testing on Lambda / RunPod / Azure instances.
|
|
9
|
+
#
|
|
10
|
+
# Usage:
|
|
11
|
+
# Ignis::Platform.windows? # => true on dev box
|
|
12
|
+
# Ignis::Platform.cuda_lib('cudart') # => 'cudart64_130.dll' or 'libcudart.so.13'
|
|
13
|
+
# Ignis::Platform.cuda_bin_path # => 'C:/Program Files/.../bin' or '/usr/local/cuda/lib64'
|
|
14
|
+
module Platform
|
|
15
|
+
# ----------------------------------------------------------------
|
|
16
|
+
# OS Detection
|
|
17
|
+
# ----------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
# @return [Boolean] true if running on Windows
|
|
20
|
+
def self.windows?
|
|
21
|
+
RUBY_PLATFORM.match?(/mswin|mingw|cygwin/i) || (defined?(FFI) && FFI::Platform::IS_WINDOWS)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# @return [Boolean] true if running on Linux
|
|
25
|
+
def self.linux?
|
|
26
|
+
RUBY_PLATFORM.match?(/linux/i)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# @return [Boolean] true if running on macOS (unlikely for GPU work)
|
|
30
|
+
def self.macos?
|
|
31
|
+
RUBY_PLATFORM.match?(/darwin/i)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# @return [Symbol] :windows, :linux, or :macos
|
|
35
|
+
def self.os
|
|
36
|
+
if windows?
|
|
37
|
+
:windows
|
|
38
|
+
elsif linux?
|
|
39
|
+
:linux
|
|
40
|
+
elsif macos?
|
|
41
|
+
:macos
|
|
42
|
+
else
|
|
43
|
+
:unknown
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# ----------------------------------------------------------------
|
|
48
|
+
# CUDA Paths
|
|
49
|
+
# ----------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
CUDA_VERSION_MAJOR = 13
|
|
52
|
+
CUDA_VERSION_MINOR = 0
|
|
53
|
+
|
|
54
|
+
# Windows CUDA root
|
|
55
|
+
WIN_CUDA_ROOT = File.join('C:', 'Program Files', 'NVIDIA GPU Computing Toolkit', 'CUDA', "v#{CUDA_VERSION_MAJOR}.#{CUDA_VERSION_MINOR}").freeze
|
|
56
|
+
WIN_CUDA_BIN = File.join(WIN_CUDA_ROOT, 'bin').freeze
|
|
57
|
+
|
|
58
|
+
# Linux CUDA root (standard install or Lambda Stack)
|
|
59
|
+
LINUX_CUDA_ROOT = "/usr/local/cuda-#{CUDA_VERSION_MAJOR}.#{CUDA_VERSION_MINOR}".freeze
|
|
60
|
+
LINUX_CUDA_LIB = File.join(LINUX_CUDA_ROOT, 'lib64').freeze
|
|
61
|
+
LINUX_CUDA_ALT = '/usr/local/cuda'.freeze
|
|
62
|
+
LINUX_CUDA_ALT_LIB = File.join(LINUX_CUDA_ALT, 'lib64').freeze
|
|
63
|
+
|
|
64
|
+
# Candidate sub-directories that hold the CUDA redistributable DLLs under a
|
|
65
|
+
# toolkit root. CUDA 13 on Windows moved them from bin\ to bin\x64\, so both
|
|
66
|
+
# must be probed.
|
|
67
|
+
WIN_DLL_SUBDIRS = [File.join('bin', 'x64'), 'bin'].freeze
|
|
68
|
+
|
|
69
|
+
# Discover the installed Windows CUDA toolkit root.
|
|
70
|
+
#
|
|
71
|
+
# The compiled-in v#{MAJOR}.#{MINOR} default is only a last resort: real
|
|
72
|
+
# installs vary (v13.0 / v13.1 / v13.2 / ...). We prefer the CUDA_PATH env
|
|
73
|
+
# var set by the NVIDIA installer, then the highest-versioned vX.Y directory
|
|
74
|
+
# that actually contains a CUDA runtime DLL. Memoized.
|
|
75
|
+
# @return [String]
|
|
76
|
+
def self.win_cuda_root
|
|
77
|
+
@win_cuda_root ||= begin
|
|
78
|
+
candidates = []
|
|
79
|
+
env_root = ENV['CUDA_PATH']
|
|
80
|
+
candidates << env_root if env_root && !env_root.empty?
|
|
81
|
+
|
|
82
|
+
base = File.join('C:', 'Program Files', 'NVIDIA GPU Computing Toolkit', 'CUDA')
|
|
83
|
+
versioned = Dir.glob(File.join(base, 'v*')).select { |d| File.directory?(d) }
|
|
84
|
+
versioned = versioned.sort_by do |d|
|
|
85
|
+
(m = d.match(/v(\d+)\.(\d+)/)) ? [m[1].to_i, m[2].to_i] : [0, 0]
|
|
86
|
+
end.reverse # highest version first (v13.2 before v13.0)
|
|
87
|
+
candidates.concat(versioned)
|
|
88
|
+
|
|
89
|
+
chosen = candidates.find do |root|
|
|
90
|
+
WIN_DLL_SUBDIRS.any? { |sub| Dir.glob(File.join(root, sub, 'cudart64_*.dll')).any? }
|
|
91
|
+
end
|
|
92
|
+
chosen || WIN_CUDA_ROOT
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Resolve the directory that actually contains the CUDA redistributable DLLs
|
|
97
|
+
# (bin\x64 on CUDA 13, bin on older layouts). Memoized.
|
|
98
|
+
# @return [String]
|
|
99
|
+
def self.win_cuda_bin
|
|
100
|
+
@win_cuda_bin ||= begin
|
|
101
|
+
root = win_cuda_root
|
|
102
|
+
sub = WIN_DLL_SUBDIRS.find { |s| Dir.glob(File.join(root, s, 'cudart64_*.dll')).any? }
|
|
103
|
+
File.join(root, sub || 'bin')
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# @return [String] CUDA binary/library directory for the current OS
|
|
108
|
+
def self.cuda_bin_path
|
|
109
|
+
if windows?
|
|
110
|
+
win_cuda_bin
|
|
111
|
+
else
|
|
112
|
+
# Prefer versioned path, fall back to symlinked /usr/local/cuda
|
|
113
|
+
if Dir.exist?(LINUX_CUDA_LIB)
|
|
114
|
+
LINUX_CUDA_LIB
|
|
115
|
+
elsif Dir.exist?(LINUX_CUDA_ALT_LIB)
|
|
116
|
+
LINUX_CUDA_ALT_LIB
|
|
117
|
+
else
|
|
118
|
+
# Lambda Stack / system CUDA — libraries on LD_LIBRARY_PATH
|
|
119
|
+
'/usr/lib/x86_64-linux-gnu'
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# @return [String] CUDA root directory
|
|
125
|
+
def self.cuda_root
|
|
126
|
+
if windows?
|
|
127
|
+
win_cuda_root
|
|
128
|
+
else
|
|
129
|
+
Dir.exist?(LINUX_CUDA_ROOT) ? LINUX_CUDA_ROOT : LINUX_CUDA_ALT
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# ----------------------------------------------------------------
|
|
134
|
+
# Library Name Resolution
|
|
135
|
+
# ----------------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
# Maps library keys to their platform-specific filenames.
|
|
138
|
+
# Windows uses DLLs, Linux uses shared objects.
|
|
139
|
+
#
|
|
140
|
+
# @param key [Symbol] library key
|
|
141
|
+
# @return [String] platform-specific glob pattern
|
|
142
|
+
def self.cuda_lib_pattern(key)
|
|
143
|
+
if windows?
|
|
144
|
+
WIN_LIB_PATTERNS[key] || raise(ArgumentError, "Unknown CUDA library: #{key}")
|
|
145
|
+
else
|
|
146
|
+
LINUX_LIB_PATTERNS[key] || raise(ArgumentError, "Unknown CUDA library: #{key}")
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Resolve the full path to a specific CUDA library.
|
|
151
|
+
# @param key [Symbol] library key (e.g. :cuda_runtime, :cublas)
|
|
152
|
+
# @return [String, nil] full path or nil
|
|
153
|
+
def self.find_cuda_lib(key)
|
|
154
|
+
pattern = cuda_lib_pattern(key)
|
|
155
|
+
search_paths = cuda_search_paths(key)
|
|
156
|
+
|
|
157
|
+
search_paths.each do |dir|
|
|
158
|
+
matches = Dir.glob(File.join(dir, pattern))
|
|
159
|
+
return matches.max if matches.any?
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
nil
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Resolve the CUDA runtime library path directly.
|
|
166
|
+
# @return [String] path to cudart
|
|
167
|
+
def self.cudart_path
|
|
168
|
+
if windows?
|
|
169
|
+
found = find_cuda_lib(:cuda_runtime)
|
|
170
|
+
found || File.join(WIN_CUDA_BIN, "cudart64_#{CUDA_VERSION_MAJOR}0.dll")
|
|
171
|
+
else
|
|
172
|
+
found = find_cuda_lib(:cuda_runtime)
|
|
173
|
+
found || "libcudart.so.#{CUDA_VERSION_MAJOR}"
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# @return [String] path separator for the OS
|
|
178
|
+
def self.path_separator
|
|
179
|
+
windows? ? ';' : ':'
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# @return [String] shared library extension
|
|
183
|
+
def self.shared_lib_ext
|
|
184
|
+
windows? ? '.dll' : '.so'
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# ----------------------------------------------------------------
|
|
188
|
+
# Custom Library Paths (cuTENSOR, cuDSS, etc.)
|
|
189
|
+
# ----------------------------------------------------------------
|
|
190
|
+
|
|
191
|
+
# @return [Hash{Symbol => String}] custom search paths per library
|
|
192
|
+
def self.custom_lib_paths
|
|
193
|
+
if windows?
|
|
194
|
+
{
|
|
195
|
+
cutensor: 'C:/Program Files/NVIDIA cuTENSOR/v2.4/bin/13',
|
|
196
|
+
cudss: 'C:/Program Files/NVIDIA cuDSS/v0.7/bin/13'
|
|
197
|
+
}
|
|
198
|
+
else
|
|
199
|
+
{
|
|
200
|
+
cutensor: '/usr/local/cutensor/lib',
|
|
201
|
+
cudss: '/usr/local/cudss/lib'
|
|
202
|
+
}
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# ----------------------------------------------------------------
|
|
207
|
+
# Kernel32 / System Library Support
|
|
208
|
+
# ----------------------------------------------------------------
|
|
209
|
+
|
|
210
|
+
# @return [Boolean] true if Kernel32 (Windows DLL path management) is available
|
|
211
|
+
def self.kernel32_available?
|
|
212
|
+
windows?
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# ----------------------------------------------------------------
|
|
216
|
+
# .NET AOT Path (WNAIS)
|
|
217
|
+
# ----------------------------------------------------------------
|
|
218
|
+
|
|
219
|
+
# @return [String] expected path to WNAIS native AOT binary
|
|
220
|
+
def self.wnais_native_lib
|
|
221
|
+
if windows?
|
|
222
|
+
'Wnais.Core.dll'
|
|
223
|
+
else
|
|
224
|
+
'Wnais.Core.so'
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# ----------------------------------------------------------------
|
|
229
|
+
# Diagnostics
|
|
230
|
+
# ----------------------------------------------------------------
|
|
231
|
+
|
|
232
|
+
# @return [Hash] platform summary for debugging
|
|
233
|
+
def self.info
|
|
234
|
+
{
|
|
235
|
+
os: os,
|
|
236
|
+
ruby_platform: RUBY_PLATFORM,
|
|
237
|
+
cuda_version: "#{CUDA_VERSION_MAJOR}.#{CUDA_VERSION_MINOR}",
|
|
238
|
+
cuda_root: cuda_root,
|
|
239
|
+
cuda_bin: cuda_bin_path,
|
|
240
|
+
cudart: cudart_path,
|
|
241
|
+
kernel32: kernel32_available?,
|
|
242
|
+
shared_lib_ext: shared_lib_ext
|
|
243
|
+
}
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
private_class_method def self.cuda_search_paths(key)
|
|
247
|
+
paths = []
|
|
248
|
+
|
|
249
|
+
# Custom paths first
|
|
250
|
+
custom = custom_lib_paths[key]
|
|
251
|
+
paths << custom if custom && Dir.exist?(custom.to_s)
|
|
252
|
+
|
|
253
|
+
# Primary CUDA lib path
|
|
254
|
+
paths << cuda_bin_path
|
|
255
|
+
|
|
256
|
+
unless windows?
|
|
257
|
+
# Linux: also check standard system paths
|
|
258
|
+
paths << LINUX_CUDA_ALT_LIB if Dir.exist?(LINUX_CUDA_ALT_LIB)
|
|
259
|
+
paths << '/usr/lib/x86_64-linux-gnu'
|
|
260
|
+
paths << '/usr/lib64'
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# System PATH
|
|
264
|
+
env_paths = ENV['PATH']&.split(path_separator) || []
|
|
265
|
+
paths.concat(env_paths)
|
|
266
|
+
|
|
267
|
+
paths.uniq
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Windows DLL patterns
|
|
271
|
+
WIN_LIB_PATTERNS = {
|
|
272
|
+
cuda_runtime: 'cudart64_*.dll',
|
|
273
|
+
cublas: 'cublas64_*.dll',
|
|
274
|
+
cublaslt: 'cublasLt64_*.dll',
|
|
275
|
+
cufft: 'cufft64_*.dll',
|
|
276
|
+
curand: 'curand64_*.dll',
|
|
277
|
+
cusparse: 'cusparse64_*.dll',
|
|
278
|
+
cusolver: 'cusolver64_*.dll',
|
|
279
|
+
cudnn: 'cudnn64_*.dll',
|
|
280
|
+
nvrtc: 'nvrtc64_*.dll',
|
|
281
|
+
cutensor: 'cutensor.dll',
|
|
282
|
+
cudss: 'cudss64_*.dll',
|
|
283
|
+
mathdx: 'mathdx64_0.dll',
|
|
284
|
+
cuda_driver: 'nvcuda.dll'
|
|
285
|
+
}.freeze
|
|
286
|
+
|
|
287
|
+
# Linux .so patterns
|
|
288
|
+
LINUX_LIB_PATTERNS = {
|
|
289
|
+
cuda_runtime: 'libcudart.so*',
|
|
290
|
+
cublas: 'libcublas.so*',
|
|
291
|
+
cublaslt: 'libcublasLt.so*',
|
|
292
|
+
cufft: 'libcufft.so*',
|
|
293
|
+
curand: 'libcurand.so*',
|
|
294
|
+
cusparse: 'libcusparse.so*',
|
|
295
|
+
cusolver: 'libcusolver.so*',
|
|
296
|
+
cudnn: 'libcudnn.so*',
|
|
297
|
+
nvrtc: 'libnvrtc.so*',
|
|
298
|
+
cutensor: 'libcutensor.so*',
|
|
299
|
+
cudss: 'libcudss.so*',
|
|
300
|
+
mathdx: 'libmathdx.so*',
|
|
301
|
+
cuda_driver: 'libcuda.so*'
|
|
302
|
+
}.freeze
|
|
303
|
+
end
|
|
304
|
+
end
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ignis
|
|
4
|
+
module Shared
|
|
5
|
+
# EventBus — Typed, bounded, synchronous-first pub/sub bus.
|
|
6
|
+
#
|
|
7
|
+
# All cross-layer communication goes through EventBus.
|
|
8
|
+
# Zero global state except the singleton instance.
|
|
9
|
+
#
|
|
10
|
+
# Event types are frozen constants — do not add more at runtime.
|
|
11
|
+
# All operations are thread-safe via Monitor (reentrant mutex).
|
|
12
|
+
class EventBus
|
|
13
|
+
# Maximum history entries per event type (ring buffer).
|
|
14
|
+
MAX_HISTORY = 100
|
|
15
|
+
|
|
16
|
+
# Frozen set of valid event types.
|
|
17
|
+
VALID_EVENT_TYPES = %i[
|
|
18
|
+
data_ready
|
|
19
|
+
compute_done
|
|
20
|
+
all_reduce_done
|
|
21
|
+
checkpoint_ready
|
|
22
|
+
gpu_failed
|
|
23
|
+
gpu_recovered
|
|
24
|
+
backpressure_on
|
|
25
|
+
backpressure_off
|
|
26
|
+
topology_changed
|
|
27
|
+
nova_flush_done
|
|
28
|
+
health_alert
|
|
29
|
+
].freeze
|
|
30
|
+
|
|
31
|
+
# @return [EventBus] singleton instance
|
|
32
|
+
def self.instance
|
|
33
|
+
@instance ||= new
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Reset the singleton instance (for testing only).
|
|
37
|
+
# @return [void]
|
|
38
|
+
def self.reset!
|
|
39
|
+
@instance = new
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Subscribe a handler to an event type.
|
|
43
|
+
#
|
|
44
|
+
# @param event_type [Symbol] one of VALID_EVENT_TYPES
|
|
45
|
+
# @param handler_id [String, Symbol] unique identifier for this handler
|
|
46
|
+
# @yield [Hash] block called with event payload when event is published
|
|
47
|
+
# @return [void]
|
|
48
|
+
# @raise [ArgumentError] if event_type is invalid or no block given
|
|
49
|
+
def self.subscribe(event_type, handler_id:, &block)
|
|
50
|
+
instance.subscribe(event_type, handler_id: handler_id, &block)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Unsubscribe a handler from an event type.
|
|
54
|
+
#
|
|
55
|
+
# @param event_type [Symbol] one of VALID_EVENT_TYPES
|
|
56
|
+
# @param handler_id [String, Symbol] the handler identifier to remove
|
|
57
|
+
# @return [Boolean] true if handler was found and removed
|
|
58
|
+
def self.unsubscribe(event_type, handler_id:)
|
|
59
|
+
instance.unsubscribe(event_type, handler_id: handler_id)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Publish an event to all subscribers.
|
|
63
|
+
#
|
|
64
|
+
# Calls all subscribers synchronously in subscription order.
|
|
65
|
+
# Catches and logs subscriber exceptions (never raises to caller).
|
|
66
|
+
# Records publish timestamp in metrics.
|
|
67
|
+
#
|
|
68
|
+
# @param event_type [Symbol] one of VALID_EVENT_TYPES
|
|
69
|
+
# @param payload [Hash] event payload
|
|
70
|
+
# @return [Integer] number of subscribers notified
|
|
71
|
+
def self.publish(event_type, payload: {})
|
|
72
|
+
instance.publish(event_type, payload: payload)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Get recent event history for an event type.
|
|
76
|
+
#
|
|
77
|
+
# @param event_type [Symbol] one of VALID_EVENT_TYPES
|
|
78
|
+
# @param last_n [Integer] maximum number of entries to return
|
|
79
|
+
# @return [Array<Hash>] array of {event_type:, payload:, timestamp:, subscriber_count:}
|
|
80
|
+
def self.history(event_type, last_n: 10)
|
|
81
|
+
instance.history(event_type, last_n: last_n)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Get all registered handler IDs for an event type.
|
|
85
|
+
#
|
|
86
|
+
# @param event_type [Symbol] one of VALID_EVENT_TYPES
|
|
87
|
+
# @return [Array<String, Symbol>]
|
|
88
|
+
def self.handlers_for(event_type)
|
|
89
|
+
instance.handlers_for(event_type)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Get metrics summary.
|
|
93
|
+
#
|
|
94
|
+
# @return [Hash] metrics per event type
|
|
95
|
+
def self.metrics
|
|
96
|
+
instance.metrics
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Instance methods
|
|
100
|
+
|
|
101
|
+
def initialize
|
|
102
|
+
@monitor = Monitor.new
|
|
103
|
+
@subscribers = {}
|
|
104
|
+
@history = {}
|
|
105
|
+
@metrics = {}
|
|
106
|
+
@errors = []
|
|
107
|
+
|
|
108
|
+
VALID_EVENT_TYPES.each do |et|
|
|
109
|
+
@subscribers[et] = []
|
|
110
|
+
@history[et] = []
|
|
111
|
+
@metrics[et] = { publish_count: 0, last_publish_at: nil, error_count: 0 }
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# @see EventBus.subscribe
|
|
116
|
+
def subscribe(event_type, handler_id:, &block)
|
|
117
|
+
validate_event_type!(event_type)
|
|
118
|
+
raise ArgumentError, 'Block required for subscribe' unless block_given?
|
|
119
|
+
|
|
120
|
+
@monitor.synchronize do
|
|
121
|
+
# Remove existing handler with same ID to prevent duplicates
|
|
122
|
+
@subscribers[event_type].reject! { |h| h[:handler_id] == handler_id }
|
|
123
|
+
@subscribers[event_type] << { handler_id: handler_id, block: block }
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# @see EventBus.unsubscribe
|
|
128
|
+
def unsubscribe(event_type, handler_id:)
|
|
129
|
+
validate_event_type!(event_type)
|
|
130
|
+
|
|
131
|
+
@monitor.synchronize do
|
|
132
|
+
initial_size = @subscribers[event_type].size
|
|
133
|
+
@subscribers[event_type].reject! { |h| h[:handler_id] == handler_id }
|
|
134
|
+
@subscribers[event_type].size < initial_size
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# @see EventBus.publish
|
|
139
|
+
def publish(event_type, payload: {})
|
|
140
|
+
validate_event_type!(event_type)
|
|
141
|
+
|
|
142
|
+
timestamp = Time.now
|
|
143
|
+
handlers_snapshot = nil
|
|
144
|
+
|
|
145
|
+
@monitor.synchronize do
|
|
146
|
+
handlers_snapshot = @subscribers[event_type].dup
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
notified = 0
|
|
150
|
+
errors_in_publish = []
|
|
151
|
+
|
|
152
|
+
handlers_snapshot.each do |handler|
|
|
153
|
+
begin
|
|
154
|
+
handler[:block].call(payload)
|
|
155
|
+
notified += 1
|
|
156
|
+
rescue => e
|
|
157
|
+
notified += 1
|
|
158
|
+
errors_in_publish << {
|
|
159
|
+
handler_id: handler[:handler_id],
|
|
160
|
+
error: e,
|
|
161
|
+
timestamp: timestamp
|
|
162
|
+
}
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Record history and metrics under lock
|
|
167
|
+
@monitor.synchronize do
|
|
168
|
+
entry = {
|
|
169
|
+
event_type: event_type,
|
|
170
|
+
payload: payload,
|
|
171
|
+
timestamp: timestamp,
|
|
172
|
+
subscriber_count: notified,
|
|
173
|
+
errors: errors_in_publish.map { |err| { handler_id: err[:handler_id], message: err[:error].message } }
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
ring = @history[event_type]
|
|
177
|
+
ring << entry
|
|
178
|
+
ring.shift while ring.size > MAX_HISTORY
|
|
179
|
+
|
|
180
|
+
@metrics[event_type][:publish_count] += 1
|
|
181
|
+
@metrics[event_type][:last_publish_at] = timestamp
|
|
182
|
+
@metrics[event_type][:error_count] += errors_in_publish.size
|
|
183
|
+
|
|
184
|
+
@errors.concat(errors_in_publish)
|
|
185
|
+
@errors.shift while @errors.size > MAX_HISTORY
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
notified
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# @see EventBus.history
|
|
192
|
+
def history(event_type, last_n: 10)
|
|
193
|
+
validate_event_type!(event_type)
|
|
194
|
+
|
|
195
|
+
@monitor.synchronize do
|
|
196
|
+
ring = @history[event_type]
|
|
197
|
+
n = [last_n, ring.size].min
|
|
198
|
+
ring.last(n)
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# @see EventBus.handlers_for
|
|
203
|
+
def handlers_for(event_type)
|
|
204
|
+
validate_event_type!(event_type)
|
|
205
|
+
|
|
206
|
+
@monitor.synchronize do
|
|
207
|
+
@subscribers[event_type].map { |h| h[:handler_id] }
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# @see EventBus.metrics
|
|
212
|
+
def metrics
|
|
213
|
+
@monitor.synchronize do
|
|
214
|
+
@metrics.transform_values(&:dup)
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# Get recent errors across all event types.
|
|
219
|
+
# @param last_n [Integer] max entries
|
|
220
|
+
# @return [Array<Hash>]
|
|
221
|
+
def recent_errors(last_n: 20)
|
|
222
|
+
@monitor.synchronize do
|
|
223
|
+
@errors.last([last_n, @errors.size].min)
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
private
|
|
228
|
+
|
|
229
|
+
# Validate that event_type is one of the allowed types.
|
|
230
|
+
# @param event_type [Symbol]
|
|
231
|
+
# @raise [ArgumentError]
|
|
232
|
+
def validate_event_type!(event_type)
|
|
233
|
+
unless VALID_EVENT_TYPES.include?(event_type)
|
|
234
|
+
raise ArgumentError,
|
|
235
|
+
"Invalid event type #{event_type.inspect}. Valid: #{VALID_EVENT_TYPES}"
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
end
|