ignis 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +15 -0
- data/lib/ignis.rb +94 -0
- data/lib/nnw/platform.rb +304 -0
- data/lib/nnw/shared/event_bus.rb +240 -0
- data/lib/nnw/shared/ffi_loader.rb +63 -0
- data/lib/nnw/shared/memory_contract.rb +204 -0
- data/lib/nnw/shared/nv_array.rb +710 -0
- data/lib/nnw/shared/recovery_protocol.rb +307 -0
- data/lib/nvruby/configuration.rb +217 -0
- data/lib/nvruby/cuda/device.rb +275 -0
- data/lib/nvruby/cuda/device_props.rb +202 -0
- data/lib/nvruby/cuda/graph.rb +265 -0
- data/lib/nvruby/cuda/graph_bindings.rb +119 -0
- data/lib/nvruby/cuda/library_loader.rb +285 -0
- data/lib/nvruby/cuda/memory.rb +410 -0
- data/lib/nvruby/cuda/runtime_api.rb +804 -0
- data/lib/nvruby/cuda/stream.rb +234 -0
- data/lib/nvruby/dtype.rb +139 -0
- data/lib/nvruby/epilogues.rb +438 -0
- data/lib/nvruby/errors.rb +303 -0
- data/lib/nvruby/half.rb +97 -0
- data/lib/nvruby/jit/compiled_kernel.rb +80 -0
- data/lib/nvruby/jit/compiler.rb +231 -0
- data/lib/nvruby/jit/driver_api_bindings.rb +363 -0
- data/lib/nvruby/jit/kernel.rb +240 -0
- data/lib/nvruby/jit/kernel_module.rb +133 -0
- data/lib/nvruby/jit/kernels/activations.rb +179 -0
- data/lib/nvruby/jit/kernels/attention.rb +504 -0
- data/lib/nvruby/jit/kernels/elementwise.rb +488 -0
- data/lib/nvruby/jit/kernels/loss.rb +213 -0
- data/lib/nvruby/jit/kernels/normalization.rb +200 -0
- data/lib/nvruby/jit/kernels/optimizer.rb +193 -0
- data/lib/nvruby/jit/nvrtc_bindings.rb +282 -0
- data/lib/nvruby/linalg/cublas_bindings.rb +295 -0
- data/lib/nvruby/linalg/cublaslt_bindings.rb +342 -0
- data/lib/nvruby/linalg/epilog.rb +67 -0
- data/lib/nvruby/linalg/matmul.rb +247 -0
- data/lib/nvruby/linalg/matmul_plan.rb +229 -0
- data/lib/nvruby/linalg/optimized_matmul.rb +412 -0
- data/lib/nvruby/memory/cuda_async_memory_resource.rb +123 -0
- data/lib/nvruby/memory/cuda_memory_resource.rb +68 -0
- data/lib/nvruby/memory/device_memory_resource.rb +106 -0
- data/lib/nvruby/memory/pinned_host_memory_resource.rb +112 -0
- data/lib/nvruby/memory/pool_memory_resource.rb +242 -0
- data/lib/nvruby/memory/stats.rb +107 -0
- data/lib/nvruby/memory.rb +124 -0
- data/lib/nvruby/version.rb +5 -0
- metadata +108 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Ignis Shared FFI Loader
|
|
4
|
+
#
|
|
5
|
+
# Centralizes the loading of the ruby-ffi gem across the entire Ignis platform.
|
|
6
|
+
# FFI is used ONLY for struct-heavy one-shot calls (device properties, VMM structs, etc.).
|
|
7
|
+
# All hot-path calls use Fiddle directly.
|
|
8
|
+
#
|
|
9
|
+
# The FFI gem is installed system-wide in the MSVC Ruby build at:
|
|
10
|
+
# C:\RubyMSVC34\lib\ruby\gems\3.4.0\gems\ffi-1.17.2\lib\ffi.rb
|
|
11
|
+
#
|
|
12
|
+
# If a vendored copy exists at vendor/ffi/ inside the project root, it takes priority.
|
|
13
|
+
# This allows deploying to machines without system-installed FFI.
|
|
14
|
+
|
|
15
|
+
module Ignis
|
|
16
|
+
module Shared
|
|
17
|
+
module FFILoader
|
|
18
|
+
# @return [String] the project root directory
|
|
19
|
+
PROJECT_ROOT = File.expand_path(File.join(__dir__, '..', '..', '..')).freeze
|
|
20
|
+
|
|
21
|
+
# @return [String] vendored FFI gem path
|
|
22
|
+
VENDOR_FFI_PATH = File.join(PROJECT_ROOT, 'vendor', 'ffi', 'gems').freeze
|
|
23
|
+
|
|
24
|
+
@loaded = false
|
|
25
|
+
|
|
26
|
+
class << self
|
|
27
|
+
# Load the FFI gem, preferring vendored copy if available.
|
|
28
|
+
# @return [void]
|
|
29
|
+
# @raise [LoadError] if FFI cannot be loaded from any source
|
|
30
|
+
def load!
|
|
31
|
+
return if @loaded
|
|
32
|
+
|
|
33
|
+
vendored_path = detect_vendored_ffi
|
|
34
|
+
if vendored_path
|
|
35
|
+
$LOAD_PATH.unshift(vendored_path) unless $LOAD_PATH.include?(vendored_path)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
require 'ffi'
|
|
39
|
+
@loaded = true
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# @return [Boolean] whether FFI has been loaded
|
|
43
|
+
def loaded?
|
|
44
|
+
@loaded
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
# Detect vendored FFI gem directory.
|
|
50
|
+
# @return [String, nil] path to vendored FFI lib directory, or nil if not found
|
|
51
|
+
def detect_vendored_ffi
|
|
52
|
+
return nil unless Dir.exist?(VENDOR_FFI_PATH)
|
|
53
|
+
|
|
54
|
+
ffi_dirs = Dir.glob(File.join(VENDOR_FFI_PATH, 'ffi-*', 'lib'))
|
|
55
|
+
return nil if ffi_dirs.empty?
|
|
56
|
+
|
|
57
|
+
# Use the latest version if multiple exist
|
|
58
|
+
ffi_dirs.max_by { |d| d[/ffi-(\d+\.\d+\.\d+)/, 1] || '0.0.0' }
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ignis
|
|
4
|
+
module Shared
|
|
5
|
+
# MemoryContract — Pinned memory ownership enforcement.
|
|
6
|
+
#
|
|
7
|
+
# Prevents double-free and use-after-free — the actual bugs that killed
|
|
8
|
+
# the previous Ignis build.
|
|
9
|
+
#
|
|
10
|
+
# Rules:
|
|
11
|
+
# - An NvArray may only be freed by its current owner.
|
|
12
|
+
# - Ignis is the default owner after allocation.
|
|
13
|
+
# - NvCCL must call acquire before operating, release when done.
|
|
14
|
+
# - WNAIS must call acquire before NOVA I/O, release when done.
|
|
15
|
+
# - Acquiring while ref_count > 1 raises MemoryContractViolation.
|
|
16
|
+
# - Pinned host memory is always owned by Ignis::Memory. NvCCL may
|
|
17
|
+
# read it but may not free it.
|
|
18
|
+
#
|
|
19
|
+
# Thread-safe: all operations protected by Monitor.
|
|
20
|
+
class MemoryContract
|
|
21
|
+
# @return [MemoryContract] singleton instance
|
|
22
|
+
def self.instance
|
|
23
|
+
@instance ||= new
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Reset the singleton instance (for testing only).
|
|
27
|
+
# @return [void]
|
|
28
|
+
def self.reset!
|
|
29
|
+
@instance = new
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Acquire ownership of an NvArray.
|
|
33
|
+
#
|
|
34
|
+
# Transfers ownership from the current owner to the requesting layer.
|
|
35
|
+
# Fails if ref_count > 1 (concurrent shared references prevent safe transfer).
|
|
36
|
+
#
|
|
37
|
+
# @param array [NvArray] the array to acquire
|
|
38
|
+
# @param by [Symbol] the requesting owner (:nvruby, :nvccl, or :wnais)
|
|
39
|
+
# @return [Boolean] true if acquisition succeeded
|
|
40
|
+
# @raise [MemoryContractViolation] if array is already owned by `by`, or ref_count > 1
|
|
41
|
+
def self.acquire(array, by:)
|
|
42
|
+
instance.acquire(array, by: by)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Release ownership of an NvArray back to the default owner (:nvruby).
|
|
46
|
+
#
|
|
47
|
+
# @param array [NvArray] the array to release
|
|
48
|
+
# @param by [Symbol] the current owner releasing the array
|
|
49
|
+
# @return [Boolean] true if release succeeded
|
|
50
|
+
# @raise [MemoryContractViolation] if `by` is not the current owner
|
|
51
|
+
def self.release(array, by:)
|
|
52
|
+
instance.release(array, by: by)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Assert that an NvArray is owned by the expected owner.
|
|
56
|
+
#
|
|
57
|
+
# @param array [NvArray] the array to check
|
|
58
|
+
# @param expected_owner [Symbol] the expected owner
|
|
59
|
+
# @return [void]
|
|
60
|
+
# @raise [MemoryContractViolation] if the array's owner doesn't match
|
|
61
|
+
def self.assert_owner!(array, expected_owner)
|
|
62
|
+
instance.assert_owner!(array, expected_owner)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Audit all tracked arrays for potential leaks.
|
|
66
|
+
#
|
|
67
|
+
# Returns information about all live arrays that have been tracked and
|
|
68
|
+
# are older than 5 seconds.
|
|
69
|
+
#
|
|
70
|
+
# @return [Array<Hash>] array of {array_id:, owner:, age_ms:, shape:, dtype:}
|
|
71
|
+
def self.audit
|
|
72
|
+
instance.audit
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Track an NvArray in the contract system.
|
|
76
|
+
#
|
|
77
|
+
# @param array [NvArray]
|
|
78
|
+
# @return [void]
|
|
79
|
+
def self.track(array)
|
|
80
|
+
instance.track(array)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Remove an NvArray from tracking (called after free).
|
|
84
|
+
#
|
|
85
|
+
# @param array [NvArray]
|
|
86
|
+
# @return [void]
|
|
87
|
+
def self.untrack(array)
|
|
88
|
+
instance.untrack(array)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Get the number of tracked arrays.
|
|
92
|
+
# @return [Integer]
|
|
93
|
+
def self.tracked_count
|
|
94
|
+
instance.tracked_count
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Instance methods
|
|
98
|
+
|
|
99
|
+
def initialize
|
|
100
|
+
@monitor = Monitor.new
|
|
101
|
+
@tracked = {} # id -> {array:, tracked_at:}
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def acquire(array, by:)
|
|
105
|
+
validate_owner!(by)
|
|
106
|
+
|
|
107
|
+
@monitor.synchronize do
|
|
108
|
+
current_owner = array.owner
|
|
109
|
+
|
|
110
|
+
if current_owner == by
|
|
111
|
+
raise MemoryContractViolation,
|
|
112
|
+
"NvArray##{array.id} is already owned by #{by.inspect} — double-acquire"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
array.transfer_ownership(by)
|
|
116
|
+
track_if_needed(array)
|
|
117
|
+
true
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def release(array, by:)
|
|
122
|
+
validate_owner!(by)
|
|
123
|
+
|
|
124
|
+
@monitor.synchronize do
|
|
125
|
+
unless array.owner == by
|
|
126
|
+
raise MemoryContractViolation,
|
|
127
|
+
"NvArray##{array.id} is owned by #{array.owner.inspect}, " \
|
|
128
|
+
"not #{by.inspect} — cannot release"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
array.transfer_ownership(:nvruby)
|
|
132
|
+
true
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def assert_owner!(array, expected_owner)
|
|
137
|
+
validate_owner!(expected_owner)
|
|
138
|
+
|
|
139
|
+
actual_owner = @monitor.synchronize { array.owner }
|
|
140
|
+
|
|
141
|
+
unless actual_owner == expected_owner
|
|
142
|
+
raise MemoryContractViolation,
|
|
143
|
+
"NvArray##{array.id} expected owner #{expected_owner.inspect}, " \
|
|
144
|
+
"actual #{actual_owner.inspect}"
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def audit
|
|
149
|
+
now = Time.now
|
|
150
|
+
threshold_seconds = 5.0
|
|
151
|
+
|
|
152
|
+
@monitor.synchronize do
|
|
153
|
+
@tracked.values
|
|
154
|
+
.select { |entry| (now - entry[:tracked_at]) > threshold_seconds }
|
|
155
|
+
.map do |entry|
|
|
156
|
+
arr = entry[:array]
|
|
157
|
+
{
|
|
158
|
+
array_id: arr.id,
|
|
159
|
+
owner: arr.owner,
|
|
160
|
+
age_ms: ((now - entry[:tracked_at]) * 1000).round,
|
|
161
|
+
shape: arr.shape,
|
|
162
|
+
dtype: arr.dtype,
|
|
163
|
+
freed: arr.freed?
|
|
164
|
+
}
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def track(array)
|
|
170
|
+
@monitor.synchronize do
|
|
171
|
+
@tracked[array.id] = { array: array, tracked_at: Time.now }
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def untrack(array)
|
|
176
|
+
@monitor.synchronize do
|
|
177
|
+
@tracked.delete(array.id)
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def tracked_count
|
|
182
|
+
@monitor.synchronize { @tracked.size }
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
private
|
|
186
|
+
|
|
187
|
+
# Automatically track array if not already tracked.
|
|
188
|
+
# @param array [NvArray]
|
|
189
|
+
def track_if_needed(array)
|
|
190
|
+
@tracked[array.id] ||= { array: array, tracked_at: Time.now }
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Validate owner parameter.
|
|
194
|
+
# @param owner [Symbol]
|
|
195
|
+
# @raise [ArgumentError]
|
|
196
|
+
def validate_owner!(owner)
|
|
197
|
+
unless NvArray::VALID_OWNERS.include?(owner)
|
|
198
|
+
raise ArgumentError,
|
|
199
|
+
"Invalid owner #{owner.inspect}. Valid: #{NvArray::VALID_OWNERS}"
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|