ignis 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +15 -0
  3. data/lib/ignis.rb +94 -0
  4. data/lib/nnw/platform.rb +304 -0
  5. data/lib/nnw/shared/event_bus.rb +240 -0
  6. data/lib/nnw/shared/ffi_loader.rb +63 -0
  7. data/lib/nnw/shared/memory_contract.rb +204 -0
  8. data/lib/nnw/shared/nv_array.rb +710 -0
  9. data/lib/nnw/shared/recovery_protocol.rb +307 -0
  10. data/lib/nvruby/configuration.rb +217 -0
  11. data/lib/nvruby/cuda/device.rb +275 -0
  12. data/lib/nvruby/cuda/device_props.rb +202 -0
  13. data/lib/nvruby/cuda/graph.rb +265 -0
  14. data/lib/nvruby/cuda/graph_bindings.rb +119 -0
  15. data/lib/nvruby/cuda/library_loader.rb +285 -0
  16. data/lib/nvruby/cuda/memory.rb +410 -0
  17. data/lib/nvruby/cuda/runtime_api.rb +804 -0
  18. data/lib/nvruby/cuda/stream.rb +234 -0
  19. data/lib/nvruby/dtype.rb +139 -0
  20. data/lib/nvruby/epilogues.rb +438 -0
  21. data/lib/nvruby/errors.rb +303 -0
  22. data/lib/nvruby/half.rb +97 -0
  23. data/lib/nvruby/jit/compiled_kernel.rb +80 -0
  24. data/lib/nvruby/jit/compiler.rb +231 -0
  25. data/lib/nvruby/jit/driver_api_bindings.rb +363 -0
  26. data/lib/nvruby/jit/kernel.rb +240 -0
  27. data/lib/nvruby/jit/kernel_module.rb +133 -0
  28. data/lib/nvruby/jit/kernels/activations.rb +179 -0
  29. data/lib/nvruby/jit/kernels/attention.rb +504 -0
  30. data/lib/nvruby/jit/kernels/elementwise.rb +488 -0
  31. data/lib/nvruby/jit/kernels/loss.rb +213 -0
  32. data/lib/nvruby/jit/kernels/normalization.rb +200 -0
  33. data/lib/nvruby/jit/kernels/optimizer.rb +193 -0
  34. data/lib/nvruby/jit/nvrtc_bindings.rb +282 -0
  35. data/lib/nvruby/linalg/cublas_bindings.rb +295 -0
  36. data/lib/nvruby/linalg/cublaslt_bindings.rb +342 -0
  37. data/lib/nvruby/linalg/epilog.rb +67 -0
  38. data/lib/nvruby/linalg/matmul.rb +247 -0
  39. data/lib/nvruby/linalg/matmul_plan.rb +229 -0
  40. data/lib/nvruby/linalg/optimized_matmul.rb +412 -0
  41. data/lib/nvruby/memory/cuda_async_memory_resource.rb +123 -0
  42. data/lib/nvruby/memory/cuda_memory_resource.rb +68 -0
  43. data/lib/nvruby/memory/device_memory_resource.rb +106 -0
  44. data/lib/nvruby/memory/pinned_host_memory_resource.rb +112 -0
  45. data/lib/nvruby/memory/pool_memory_resource.rb +242 -0
  46. data/lib/nvruby/memory/stats.rb +107 -0
  47. data/lib/nvruby/memory.rb +124 -0
  48. data/lib/nvruby/version.rb +5 -0
  49. metadata +108 -0
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Ignis Shared FFI Loader
4
+ #
5
+ # Centralizes the loading of the ruby-ffi gem across the entire Ignis platform.
6
+ # FFI is used ONLY for struct-heavy one-shot calls (device properties, VMM structs, etc.).
7
+ # All hot-path calls use Fiddle directly.
8
+ #
9
+ # The FFI gem is installed system-wide in the MSVC Ruby build at:
10
+ # C:\RubyMSVC34\lib\ruby\gems\3.4.0\gems\ffi-1.17.2\lib\ffi.rb
11
+ #
12
+ # If a vendored copy exists at vendor/ffi/ inside the project root, it takes priority.
13
+ # This allows deploying to machines without system-installed FFI.
14
+
15
+ module Ignis
16
+ module Shared
17
+ module FFILoader
18
+ # @return [String] the project root directory
19
+ PROJECT_ROOT = File.expand_path(File.join(__dir__, '..', '..', '..')).freeze
20
+
21
+ # @return [String] vendored FFI gem path
22
+ VENDOR_FFI_PATH = File.join(PROJECT_ROOT, 'vendor', 'ffi', 'gems').freeze
23
+
24
+ @loaded = false
25
+
26
+ class << self
27
+ # Load the FFI gem, preferring vendored copy if available.
28
+ # @return [void]
29
+ # @raise [LoadError] if FFI cannot be loaded from any source
30
+ def load!
31
+ return if @loaded
32
+
33
+ vendored_path = detect_vendored_ffi
34
+ if vendored_path
35
+ $LOAD_PATH.unshift(vendored_path) unless $LOAD_PATH.include?(vendored_path)
36
+ end
37
+
38
+ require 'ffi'
39
+ @loaded = true
40
+ end
41
+
42
+ # @return [Boolean] whether FFI has been loaded
43
+ def loaded?
44
+ @loaded
45
+ end
46
+
47
+ private
48
+
49
+ # Detect vendored FFI gem directory.
50
+ # @return [String, nil] path to vendored FFI lib directory, or nil if not found
51
+ def detect_vendored_ffi
52
+ return nil unless Dir.exist?(VENDOR_FFI_PATH)
53
+
54
+ ffi_dirs = Dir.glob(File.join(VENDOR_FFI_PATH, 'ffi-*', 'lib'))
55
+ return nil if ffi_dirs.empty?
56
+
57
+ # Use the latest version if multiple exist
58
+ ffi_dirs.max_by { |d| d[/ffi-(\d+\.\d+\.\d+)/, 1] || '0.0.0' }
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,204 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ignis
4
+ module Shared
5
+ # MemoryContract — Pinned memory ownership enforcement.
6
+ #
7
+ # Prevents double-free and use-after-free — the actual bugs that killed
8
+ # the previous Ignis build.
9
+ #
10
+ # Rules:
11
+ # - An NvArray may only be freed by its current owner.
12
+ # - Ignis is the default owner after allocation.
13
+ # - NvCCL must call acquire before operating, release when done.
14
+ # - WNAIS must call acquire before NOVA I/O, release when done.
15
+ # - Acquiring while ref_count > 1 raises MemoryContractViolation.
16
+ # - Pinned host memory is always owned by Ignis::Memory. NvCCL may
17
+ # read it but may not free it.
18
+ #
19
+ # Thread-safe: all operations protected by Monitor.
20
+ class MemoryContract
21
+ # @return [MemoryContract] singleton instance
22
+ def self.instance
23
+ @instance ||= new
24
+ end
25
+
26
+ # Reset the singleton instance (for testing only).
27
+ # @return [void]
28
+ def self.reset!
29
+ @instance = new
30
+ end
31
+
32
+ # Acquire ownership of an NvArray.
33
+ #
34
+ # Transfers ownership from the current owner to the requesting layer.
35
+ # Fails if ref_count > 1 (concurrent shared references prevent safe transfer).
36
+ #
37
+ # @param array [NvArray] the array to acquire
38
+ # @param by [Symbol] the requesting owner (:nvruby, :nvccl, or :wnais)
39
+ # @return [Boolean] true if acquisition succeeded
40
+ # @raise [MemoryContractViolation] if array is already owned by `by`, or ref_count > 1
41
+ def self.acquire(array, by:)
42
+ instance.acquire(array, by: by)
43
+ end
44
+
45
+ # Release ownership of an NvArray back to the default owner (:nvruby).
46
+ #
47
+ # @param array [NvArray] the array to release
48
+ # @param by [Symbol] the current owner releasing the array
49
+ # @return [Boolean] true if release succeeded
50
+ # @raise [MemoryContractViolation] if `by` is not the current owner
51
+ def self.release(array, by:)
52
+ instance.release(array, by: by)
53
+ end
54
+
55
+ # Assert that an NvArray is owned by the expected owner.
56
+ #
57
+ # @param array [NvArray] the array to check
58
+ # @param expected_owner [Symbol] the expected owner
59
+ # @return [void]
60
+ # @raise [MemoryContractViolation] if the array's owner doesn't match
61
+ def self.assert_owner!(array, expected_owner)
62
+ instance.assert_owner!(array, expected_owner)
63
+ end
64
+
65
+ # Audit all tracked arrays for potential leaks.
66
+ #
67
+ # Returns information about all live arrays that have been tracked and
68
+ # are older than 5 seconds.
69
+ #
70
+ # @return [Array<Hash>] array of {array_id:, owner:, age_ms:, shape:, dtype:}
71
+ def self.audit
72
+ instance.audit
73
+ end
74
+
75
+ # Track an NvArray in the contract system.
76
+ #
77
+ # @param array [NvArray]
78
+ # @return [void]
79
+ def self.track(array)
80
+ instance.track(array)
81
+ end
82
+
83
+ # Remove an NvArray from tracking (called after free).
84
+ #
85
+ # @param array [NvArray]
86
+ # @return [void]
87
+ def self.untrack(array)
88
+ instance.untrack(array)
89
+ end
90
+
91
+ # Get the number of tracked arrays.
92
+ # @return [Integer]
93
+ def self.tracked_count
94
+ instance.tracked_count
95
+ end
96
+
97
+ # Instance methods
98
+
99
+ def initialize
100
+ @monitor = Monitor.new
101
+ @tracked = {} # id -> {array:, tracked_at:}
102
+ end
103
+
104
+ def acquire(array, by:)
105
+ validate_owner!(by)
106
+
107
+ @monitor.synchronize do
108
+ current_owner = array.owner
109
+
110
+ if current_owner == by
111
+ raise MemoryContractViolation,
112
+ "NvArray##{array.id} is already owned by #{by.inspect} — double-acquire"
113
+ end
114
+
115
+ array.transfer_ownership(by)
116
+ track_if_needed(array)
117
+ true
118
+ end
119
+ end
120
+
121
+ def release(array, by:)
122
+ validate_owner!(by)
123
+
124
+ @monitor.synchronize do
125
+ unless array.owner == by
126
+ raise MemoryContractViolation,
127
+ "NvArray##{array.id} is owned by #{array.owner.inspect}, " \
128
+ "not #{by.inspect} — cannot release"
129
+ end
130
+
131
+ array.transfer_ownership(:nvruby)
132
+ true
133
+ end
134
+ end
135
+
136
+ def assert_owner!(array, expected_owner)
137
+ validate_owner!(expected_owner)
138
+
139
+ actual_owner = @monitor.synchronize { array.owner }
140
+
141
+ unless actual_owner == expected_owner
142
+ raise MemoryContractViolation,
143
+ "NvArray##{array.id} expected owner #{expected_owner.inspect}, " \
144
+ "actual #{actual_owner.inspect}"
145
+ end
146
+ end
147
+
148
+ def audit
149
+ now = Time.now
150
+ threshold_seconds = 5.0
151
+
152
+ @monitor.synchronize do
153
+ @tracked.values
154
+ .select { |entry| (now - entry[:tracked_at]) > threshold_seconds }
155
+ .map do |entry|
156
+ arr = entry[:array]
157
+ {
158
+ array_id: arr.id,
159
+ owner: arr.owner,
160
+ age_ms: ((now - entry[:tracked_at]) * 1000).round,
161
+ shape: arr.shape,
162
+ dtype: arr.dtype,
163
+ freed: arr.freed?
164
+ }
165
+ end
166
+ end
167
+ end
168
+
169
+ def track(array)
170
+ @monitor.synchronize do
171
+ @tracked[array.id] = { array: array, tracked_at: Time.now }
172
+ end
173
+ end
174
+
175
+ def untrack(array)
176
+ @monitor.synchronize do
177
+ @tracked.delete(array.id)
178
+ end
179
+ end
180
+
181
+ def tracked_count
182
+ @monitor.synchronize { @tracked.size }
183
+ end
184
+
185
+ private
186
+
187
+ # Automatically track array if not already tracked.
188
+ # @param array [NvArray]
189
+ def track_if_needed(array)
190
+ @tracked[array.id] ||= { array: array, tracked_at: Time.now }
191
+ end
192
+
193
+ # Validate owner parameter.
194
+ # @param owner [Symbol]
195
+ # @raise [ArgumentError]
196
+ def validate_owner!(owner)
197
+ unless NvArray::VALID_OWNERS.include?(owner)
198
+ raise ArgumentError,
199
+ "Invalid owner #{owner.inspect}. Valid: #{NvArray::VALID_OWNERS}"
200
+ end
201
+ end
202
+ end
203
+ end
204
+ end