yakmesh 2.8.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/CHANGELOG.md +637 -0
  2. package/CONTRIBUTING.md +42 -0
  3. package/Caddyfile +77 -0
  4. package/README.md +119 -29
  5. package/adapters/adapter-mlv-bible/README.md +124 -0
  6. package/adapters/adapter-mlv-bible/index.js +400 -0
  7. package/adapters/chat-mod-adapter.js +532 -0
  8. package/adapters/content-adapter.js +273 -0
  9. package/content/api.js +50 -41
  10. package/content/index.js +2 -2
  11. package/content/store.js +355 -173
  12. package/dashboard/index.html +19 -3
  13. package/database/replication.js +117 -37
  14. package/docs/CRYPTO-AGILITY.md +204 -0
  15. package/docs/MTLS-RESEARCH.md +367 -0
  16. package/docs/NAMCHE-SPEC.md +681 -0
  17. package/docs/PEERQUANTA-YAKMESH-INTEGRATION.md +407 -0
  18. package/docs/PRECISION-DISCLOSURE.md +96 -0
  19. package/docs/README.md +76 -0
  20. package/docs/ROADMAP-2.4.0.md +447 -0
  21. package/docs/ROADMAP-2.5.0.md +244 -0
  22. package/docs/SECURITY-AUDIT-REPORT.md +306 -0
  23. package/docs/SST-INTEGRATION.md +712 -0
  24. package/docs/STEADYWATCH-IMPLEMENTATION.md +303 -0
  25. package/docs/TERNARY-AUDIT-REPORT.md +247 -0
  26. package/docs/TME-FAQ.md +221 -0
  27. package/docs/WHITEPAPER.md +623 -0
  28. package/docs/adapters.html +1001 -0
  29. package/docs/advanced-systems.html +1045 -0
  30. package/docs/annex.html +1046 -0
  31. package/docs/api.html +970 -0
  32. package/docs/business/response-templates.md +160 -0
  33. package/docs/c2c.html +1225 -0
  34. package/docs/cli.html +1332 -0
  35. package/docs/configuration.html +1248 -0
  36. package/docs/darshan.html +1085 -0
  37. package/docs/dharma.html +966 -0
  38. package/docs/docs-bundle.html +1075 -0
  39. package/docs/docs.css +3120 -0
  40. package/docs/docs.js +556 -0
  41. package/docs/doko.html +969 -0
  42. package/docs/geo-proof.html +858 -0
  43. package/docs/getting-started.html +840 -0
  44. package/docs/gumba-tutorial.html +1144 -0
  45. package/docs/gumba.html +1098 -0
  46. package/docs/index.html +914 -0
  47. package/docs/jhilke.html +1312 -0
  48. package/docs/karma.html +1100 -0
  49. package/docs/katha.html +1037 -0
  50. package/docs/lama.html +978 -0
  51. package/docs/mandala.html +1067 -0
  52. package/docs/mani.html +964 -0
  53. package/docs/mantra.html +967 -0
  54. package/docs/mesh.html +1409 -0
  55. package/docs/nakpak.html +869 -0
  56. package/docs/namche.html +928 -0
  57. package/docs/nav-order.json +53 -0
  58. package/docs/prahari.html +1043 -0
  59. package/docs/prism-bash.min.js +1 -0
  60. package/docs/prism-javascript.min.js +1 -0
  61. package/docs/prism-json.min.js +1 -0
  62. package/docs/prism-tomorrow.min.css +1 -0
  63. package/docs/prism.min.js +1 -0
  64. package/docs/privacy.html +699 -0
  65. package/docs/quick-reference.html +1181 -0
  66. package/docs/sakshi.html +1402 -0
  67. package/docs/sandboxing.md +386 -0
  68. package/docs/seva.html +911 -0
  69. package/docs/sherpa.html +871 -0
  70. package/docs/studio.html +860 -0
  71. package/docs/stupa.html +995 -0
  72. package/docs/tailwind.min.css +2 -0
  73. package/docs/tattva.html +1332 -0
  74. package/docs/terms.html +686 -0
  75. package/docs/time-server-deployment.md +166 -0
  76. package/docs/time-sources.html +1392 -0
  77. package/docs/tivra.html +1127 -0
  78. package/docs/trademark-policy.html +686 -0
  79. package/docs/tribhuj.html +1183 -0
  80. package/docs/trust-security.html +1029 -0
  81. package/docs/tutorials/backup-recovery.html +654 -0
  82. package/docs/tutorials/dashboard.html +604 -0
  83. package/docs/tutorials/domain-setup.html +605 -0
  84. package/docs/tutorials/host-website.html +456 -0
  85. package/docs/tutorials/mesh-network.html +505 -0
  86. package/docs/tutorials/mobile-access.html +445 -0
  87. package/docs/tutorials/privacy.html +467 -0
  88. package/docs/tutorials/raspberry-pi.html +600 -0
  89. package/docs/tutorials/security-basics.html +539 -0
  90. package/docs/tutorials/share-files.html +431 -0
  91. package/docs/tutorials/troubleshooting.html +637 -0
  92. package/docs/tutorials/trust-karma.html +419 -0
  93. package/docs/tutorials/yak-protocol.html +456 -0
  94. package/docs/tutorials.html +1034 -0
  95. package/docs/vani.html +1270 -0
  96. package/docs/webserver.html +809 -0
  97. package/docs/yak-protocol.html +940 -0
  98. package/docs/yak-timeserver-design.md +475 -0
  99. package/docs/yakapp.html +1015 -0
  100. package/docs/ypc27.html +1069 -0
  101. package/docs/yurt.html +1344 -0
  102. package/embedded-docs/bundle.js +334 -74
  103. package/gossip/protocol.js +247 -27
  104. package/identity/key-resolver.js +262 -0
  105. package/identity/machine-seed.js +632 -0
  106. package/identity/node-key.js +669 -368
  107. package/identity/tribhuj-ratchet.js +506 -0
  108. package/knowledge-base.js +37 -8
  109. package/launcher/yakmesh.bat +62 -0
  110. package/launcher/yakmesh.sh +70 -0
  111. package/mesh/annex.js +462 -108
  112. package/mesh/beacon-broadcast.js +113 -1
  113. package/mesh/darshan.js +1718 -0
  114. package/mesh/gumba.js +1567 -0
  115. package/mesh/jhilke.js +651 -0
  116. package/mesh/katha.js +1012 -0
  117. package/mesh/nakpak-routing.js +8 -5
  118. package/mesh/network.js +724 -34
  119. package/mesh/pulse-sync.js +4 -1
  120. package/mesh/rate-limiter.js +127 -15
  121. package/mesh/seva.js +526 -0
  122. package/mesh/sherpa-discovery.js +89 -8
  123. package/mesh/sybil-defense.js +19 -5
  124. package/mesh/temporal-encoder.js +4 -3
  125. package/mesh/vani.js +1364 -0
  126. package/mesh/yurt.js +1340 -0
  127. package/models/entropy-sentinel.onnx +0 -0
  128. package/models/karma-trust.onnx +0 -0
  129. package/models/manifest.json +43 -0
  130. package/models/sakshi-anomaly.onnx +0 -0
  131. package/oracle/code-proof-protocol.js +7 -6
  132. package/oracle/codebase-lock.js +257 -28
  133. package/oracle/index.js +74 -15
  134. package/oracle/ma902-snmp.js +678 -0
  135. package/oracle/module-sealer.js +5 -3
  136. package/oracle/network-identity.js +16 -0
  137. package/oracle/packet-checksum.js +201 -0
  138. package/oracle/sst.js +579 -0
  139. package/oracle/ternary-144t.js +714 -0
  140. package/oracle/ternary-ml.js +481 -0
  141. package/oracle/time-api.js +239 -0
  142. package/oracle/time-source.js +137 -47
  143. package/oracle/validation-oracle-hardened.js +1111 -1071
  144. package/oracle/validation-oracle.js +4 -2
  145. package/oracle/ypc27.js +211 -0
  146. package/package.json +20 -3
  147. package/protocol/yak-handler.js +35 -9
  148. package/protocol/yak-protocol.js +28 -13
  149. package/reference/cpp/yakmesh_mceliece_shard.cpp +168 -0
  150. package/reference/cpp/yakmesh_ypc27.cpp +179 -0
  151. package/sbom.json +87 -0
  152. package/scripts/security-audit.mjs +264 -0
  153. package/scripts/update-docs-nav.js +194 -0
  154. package/scripts/update-docs-sidebar.cjs +164 -0
  155. package/security/crypto-config.js +4 -3
  156. package/security/dharma-moderation.js +517 -0
  157. package/security/doko-identity.js +193 -143
  158. package/security/domain-consensus.js +86 -85
  159. package/security/fs-hardening.js +620 -0
  160. package/security/hardware-attestation.js +5 -3
  161. package/security/hybrid-trust.js +227 -87
  162. package/security/karma-rate-limiter.js +692 -0
  163. package/security/khata-protocol.js +22 -21
  164. package/security/khata-trust-integration.js +277 -150
  165. package/security/memory-safety.js +635 -0
  166. package/security/mesh-auth.js +11 -10
  167. package/security/mesh-revocation.js +373 -5
  168. package/security/namche-gateway.js +298 -69
  169. package/security/sakshi.js +460 -3
  170. package/security/sangha.js +770 -0
  171. package/security/secure-config.js +473 -0
  172. package/security/silicon-parity.js +13 -10
  173. package/security/steadywatch.js +1142 -0
  174. package/security/strike-system.js +32 -3
  175. package/security/temporal-signing.js +488 -0
  176. package/security/trit-commitment.js +464 -0
  177. package/server/crypto/annex.js +247 -0
  178. package/server/darshan-api.js +343 -0
  179. package/server/index.js +3259 -362
  180. package/server/komm-api.js +668 -0
  181. package/utils/accel.js +2273 -0
  182. package/utils/ternary-id.js +79 -0
  183. package/utils/verify-worker.js +57 -0
  184. package/webserver/index.js +95 -5
  185. package/assets/yakmesh-logo.png +0 -0
  186. package/assets/yakmesh-logo.svg +0 -80
  187. package/assets/yakmesh-logo2.png +0 -0
  188. package/assets/yakmesh-logo2sm.png +0 -0
  189. package/assets/ymsm.png +0 -0
  190. package/website/assets/silhouettes/adapters.svg +0 -107
  191. package/website/assets/silhouettes/api-endpoints.svg +0 -115
  192. package/website/assets/silhouettes/atomic-clock.svg +0 -83
  193. package/website/assets/silhouettes/base-camp.svg +0 -81
  194. package/website/assets/silhouettes/bridge.svg +0 -69
  195. package/website/assets/silhouettes/docs-bundle.svg +0 -113
  196. package/website/assets/silhouettes/doko-basket.svg +0 -70
  197. package/website/assets/silhouettes/fortress.svg +0 -93
  198. package/website/assets/silhouettes/gateway.svg +0 -54
  199. package/website/assets/silhouettes/gears.svg +0 -93
  200. package/website/assets/silhouettes/globe-satellite.svg +0 -67
  201. package/website/assets/silhouettes/karma-wheel.svg +0 -137
  202. package/website/assets/silhouettes/lama-council.svg +0 -141
  203. package/website/assets/silhouettes/mandala-network.svg +0 -169
  204. package/website/assets/silhouettes/mani-stones.svg +0 -149
  205. package/website/assets/silhouettes/mantra-wheel.svg +0 -116
  206. package/website/assets/silhouettes/mesh-nodes.svg +0 -113
  207. package/website/assets/silhouettes/nakpak.svg +0 -56
  208. package/website/assets/silhouettes/peak-lightning.svg +0 -73
  209. package/website/assets/silhouettes/sherpa.svg +0 -69
  210. package/website/assets/silhouettes/stupa-tower.svg +0 -119
  211. package/website/assets/silhouettes/tattva-eye.svg +0 -78
  212. package/website/assets/silhouettes/terminal.svg +0 -74
  213. package/website/assets/silhouettes/webserver.svg +0 -145
  214. package/website/assets/silhouettes/yak.svg +0 -78
  215. package/website/assets/yakmesh-logo.png +0 -0
  216. package/website/assets/yakmesh-logo.webp +0 -0
  217. package/website/assets/yakmesh-logo128x140.webp +0 -0
  218. package/website/assets/yakmesh-logo2.png +0 -0
  219. package/website/assets/yakmesh-logo2.svg +0 -51
  220. package/website/assets/yakmesh-logo40x44.webp +0 -0
  221. package/website/assets/yakmesh.gif +0 -0
  222. package/website/assets/yakmesh.ico +0 -0
  223. package/website/assets/yakmesh.jpg +0 -0
  224. package/website/assets/yakmesh.pdf +0 -0
  225. package/website/assets/yakmesh.png +0 -0
  226. package/website/assets/yakmesh.svg +0 -70
  227. package/website/assets/yakmesh128.webp +0 -0
  228. package/website/assets/yakmesh32.png +0 -0
  229. package/website/assets/yakmesh32.svg +0 -65
  230. package/website/assets/yakmesh32o.ico +0 -2
  231. package/website/assets/yakmesh32o.svg +0 -65
  232. package/website/assets/yakmesh32o.svgz +0 -0
package/utils/accel.js ADDED
@@ -0,0 +1,2273 @@
1
+ /**
2
+ * ACCEL — Adaptive Compute & Crypto Engine Layer
3
+ *
4
+ * Heterogeneous hardware acceleration for yakmesh data propagation.
5
+ * Routes cryptographic and ML inference operations to the fastest
6
+ * available backend: CPU-SIMD → GPU (CUDA) → NPU (DirectML) → Pure JS.
7
+ *
8
+ * Architecture:
9
+ * ┌─────────────────────────────────────────────────┐
10
+ * │ accel.js (Scheduler) │
11
+ * └───────┬───────────────┬───────────────┬─────────┘
12
+ * │ │ │
13
+ * ┌──────┴──────┐ ┌─────┴──────┐ ┌──────┴──────┐
14
+ * │ CPU-SIMD │ │ NVIDIA │ │ AMD NPU │
15
+ * │ (OpenSSL/ │ │ (CUDA/ │ │ (ONNX + │
16
+ * │ liboqs) │ │ ONNX) │ │ DirectML) │
17
+ * └─────────────┘ └────────────┘ └─────────────┘
18
+ *
19
+ * Fallback chain: Native addon → GPU batch → Node.js crypto → @noble (pure JS)
20
+ *
21
+ * Supported hardware:
22
+ * CPU: AVX-512, VAES, SHA-NI, GFNI via OpenSSL / liboqs native addon
23
+ * GPU: NVIDIA RTX (CUDA) for batch NTT / PQ crypto verification
24
+ * NPU: AMD XDNA (DirectML) for ML inference (SAKSHI anomaly, KARMA trust)
25
+ *
26
+ * @module utils/accel
27
+ * @version 1.0.0
28
+ * @license MIT
29
+ * @copyright 2026 YAKMESH™ Contributors
30
+ */
31
+
32
+ import { createHash, createCipheriv, createDecipheriv, randomBytes } from 'crypto';
33
+ import { sha3_256 as nobleSha3_256 } from '@noble/hashes/sha3.js';
34
+ import { ml_dsa65 } from '@noble/post-quantum/ml-dsa.js';
35
+ import { ml_kem768 } from '@noble/post-quantum/ml-kem.js';
36
+ import { bytesToHex, hexToBytes } from '@noble/hashes/utils.js';
37
+ import { createLogger } from './logger.js';
38
+ import os from 'os';
39
+ import { execSync } from 'child_process';
40
+ import { Worker } from 'worker_threads';
41
+ import { fileURLToPath } from 'url';
42
+
43
+ const log = createLogger('utils:accel');
44
+
45
+ // =============================================================================
46
+ // HARDWARE CAPABILITY FLAGS
47
+ // =============================================================================
48
+
49
+ /**
50
+ * Detected hardware capabilities — populated by probe() at startup.
51
+ * Immutable after initialization.
52
+ */
53
+ export const HW = Object.seal({
54
+ // CPU
55
+ cpuModel: '',
56
+ cpuArch: '',
57
+ cores: 0,
58
+ threads: 0,
59
+
60
+ // CPU SIMD features (detected via OpenSSL/OS)
61
+ avx512: false,
62
+ vaes: false,
63
+ shaNI: false,
64
+ gfni: false,
65
+
66
+ // SHA3 native support (Node.js crypto module via OpenSSL)
67
+ nativeSha3: false,
68
+
69
+ // NVIDIA GPU
70
+ nvGpu: false,
71
+ nvGpuName: '',
72
+ nvGpuVRAM: 0, // MiB
73
+ nvComputeCap: '', // e.g. '8.6'
74
+ nvCudaVersion: '', // e.g. '13.1'
75
+ nvDriverVersion: '',
76
+ nvGpuTops: 0, // INT8 Tensor Core TOPS
77
+
78
+ // AMD NPU (XDNA)
79
+ amdNpu: false,
80
+ amdNpuTops: 0,
81
+
82
+ // Combined compute budget
83
+ totalTops: 0, // GPU + NPU combined INT8 TOPS
84
+
85
+ // ONNX Runtime availability
86
+ onnxRuntime: false,
87
+ onnxProviders: [], // ['dml', 'cuda', 'cpu'] — short names per ONNX Runtime 1.24+
88
+
89
+ // Native PQ addon (liboqs bindings)
90
+ nativePQ: false,
91
+ nativePQBackend: '', // 'liboqs' | 'pqcrypto-node' | ''
92
+ });
93
+
94
+ // =============================================================================
95
+ // PERFORMANCE TELEMETRY
96
+ // =============================================================================
97
+
98
+ /**
99
+ * Running performance counters for the acceleration layer.
100
+ * Reset per epoch or on demand.
101
+ */
102
+ const telemetry = {
103
+ sha3Calls: 0,
104
+ sha3NativeHits: 0,
105
+ signCalls: 0,
106
+ signNativeHits: 0,
107
+ verifyCalls: 0,
108
+ verifyNativeHits: 0,
109
+ batchVerifyCalls: 0,
110
+ batchGpuHits: 0,
111
+ kemCalls: 0,
112
+ kemNativeHits: 0,
113
+ inferCalls: 0,
114
+ inferNpuHits: 0,
115
+ inferGpuHits: 0,
116
+ lastReset: Date.now(),
117
+ };
118
+
119
+ // =============================================================================
120
+ // HARDWARE PROBE
121
+ // =============================================================================
122
+
123
+ /**
124
+ * Detect all available hardware acceleration.
125
+ * Call once at startup, before any crypto operations.
126
+ *
127
+ * @returns {typeof HW} The populated hardware capability flags
128
+ */
129
+ export async function probe() {
130
+ const t0 = performance.now();
131
+ log.info('ACCEL probing hardware capabilities...');
132
+
133
+ // ---- CPU ----
134
+ const cpus = os.cpus();
135
+ HW.cpuModel = cpus[0]?.model || 'unknown';
136
+ HW.cpuArch = os.arch();
137
+ HW.cores = new Set(cpus.map(c => c.model)).size * (cpus.length / (cpus.length || 1));
138
+ HW.threads = cpus.length;
139
+
140
+ // Detect SIMD features from CPU model string + platform heuristics
141
+ _detectCpuFeatures();
142
+
143
+ // ---- SHA3 native ----
144
+ HW.nativeSha3 = _probeNativeSha3();
145
+
146
+ // ---- NVIDIA GPU ----
147
+ _probeNvidiaGpu();
148
+
149
+ // ---- AMD NPU ----
150
+ _probeAmdNpu();
151
+
152
+ // ---- ONNX Runtime ----
153
+ await _probeOnnxRuntime();
154
+
155
+ // ---- Native PQ addon ----
156
+ _probeNativePQ();
157
+
158
+ // ---- Compute combined TOPS budget ----
159
+ HW.totalTops = (HW.nvGpuTops || 0) + (HW.amdNpuTops || 0);
160
+
161
+ const elapsed = (performance.now() - t0).toFixed(1);
162
+
163
+ // Log capability summary
164
+ const caps = [];
165
+ if (HW.nativeSha3) caps.push('SHA3-native');
166
+ if (HW.avx512) caps.push('AVX-512');
167
+ if (HW.vaes) caps.push('VAES');
168
+ if (HW.shaNI) caps.push('SHA-NI');
169
+ if (HW.gfni) caps.push('GFNI');
170
+ if (HW.nvGpu) caps.push(`GPU:${HW.nvGpuName}(${HW.nvGpuTops}T)`);
171
+ if (HW.amdNpu) caps.push(`NPU:${HW.amdNpuTops}T`);
172
+ if (HW.totalTops > 0) caps.push(`TOTAL:${HW.totalTops}TOPS`);
173
+ if (HW.onnxRuntime) caps.push(`ONNX:[${HW.onnxProviders.join(',')}]`);
174
+ if (HW.nativePQ) caps.push(`PQ:${HW.nativePQBackend}`);
175
+
176
+ if (caps.length === 0) {
177
+ caps.push('pure-JS-only');
178
+ }
179
+
180
+ log.info(`ACCEL probe complete in ${elapsed}ms — ${caps.join(' | ')}`);
181
+
182
+ return HW;
183
+ }
184
+
185
+ /**
186
+ * Detect CPU SIMD features from model string and platform.
187
+ * On x64, Zen 4 / Intel 11th gen+ typically have AVX-512, VAES, SHA-NI, GFNI.
188
+ */
189
+ function _detectCpuFeatures() {
190
+ const model = HW.cpuModel.toLowerCase();
191
+ const arch = HW.cpuArch;
192
+
193
+ if (arch !== 'x64') return;
194
+
195
+ // AMD Zen 4 (Ryzen 7000/8000 series, EPYC Genoa) — has everything
196
+ if (model.includes('ryzen') || model.includes('epyc')) {
197
+ const genMatch = model.match(/(\d{4})/);
198
+ const gen = genMatch ? parseInt(genMatch[1]) : 0;
199
+
200
+ // Zen 4 = Ryzen 7000/8000 series, EPYC 9004
201
+ if (gen >= 7000 || (model.includes('epyc') && gen >= 9000)) {
202
+ HW.avx512 = true;
203
+ HW.vaes = true;
204
+ HW.shaNI = true;
205
+ HW.gfni = true;
206
+ } else if (gen >= 3000) {
207
+ // Zen 2+ has SHA-NI
208
+ HW.shaNI = true;
209
+ }
210
+ }
211
+
212
+ // Intel — 11th gen+ (Tiger Lake) has AVX-512, VAES, SHA-NI, GFNI
213
+ if (model.includes('core') && model.includes('intel')) {
214
+ const genMatch = model.match(/(\d{2})(\d{2,3})/);
215
+ if (genMatch) {
216
+ const gen = parseInt(genMatch[1]);
217
+ if (gen >= 11) {
218
+ HW.avx512 = true;
219
+ HW.vaes = true;
220
+ HW.shaNI = true;
221
+ HW.gfni = true;
222
+ } else if (gen >= 8) {
223
+ HW.shaNI = true;
224
+ }
225
+ }
226
+ }
227
+
228
+ // Server Xeons — Ice Lake+ has AVX-512
229
+ if (model.includes('xeon')) {
230
+ HW.avx512 = true;
231
+ HW.shaNI = true;
232
+ // Conservative: not all Xeons have VAES/GFNI
233
+ }
234
+ }
235
+
236
+ /**
237
+ * Test if Node.js crypto supports SHA3-256 natively (OpenSSL 1.1.1+).
238
+ */
239
+ function _probeNativeSha3() {
240
+ try {
241
+ const hash = createHash('sha3-256');
242
+ hash.update(Buffer.from('yakmesh-accel-probe'));
243
+ const digest = hash.digest();
244
+ return digest.length === 32;
245
+ } catch {
246
+ return false;
247
+ }
248
+ }
249
+
250
+ // NVIDIA GPU INT8 Tensor Core TOPS lookup — official NVIDIA specs.
251
+ // Maps GPU name substrings → INT8 TOPS rating.
252
+ // Sorted longest-match-first within each gen to avoid false partial matches.
253
+ const GPU_TOPS_TABLE = [
254
+ // Ada Lovelace (RTX 40-series)
255
+ ['RTX 4090', 1321],
256
+ ['RTX 4080 SUPER', 836],
257
+ ['RTX 4080', 780],
258
+ ['RTX 4070 Ti SUPER', 568],
259
+ ['RTX 4070 Ti', 485],
260
+ ['RTX 4070 SUPER', 418],
261
+ ['RTX 4070', 364],
262
+ ['RTX 4060 Ti', 353],
263
+ ['RTX 4060', 242],
264
+ // Ampere (RTX 30-series)
265
+ ['RTX 3090 Ti', 320],
266
+ ['RTX 3090', 285],
267
+ ['RTX 3080 Ti', 273],
268
+ ['RTX 3080', 238],
269
+ ['RTX 3070 Ti', 174],
270
+ ['RTX 3070', 163],
271
+ ['RTX 3060 Ti', 163],
272
+ ['RTX 3060', 101],
273
+ ['RTX 3050', 73],
274
+ // Turing (RTX 20-series)
275
+ ['RTX 2080 Ti', 215],
276
+ ['RTX 2080 SUPER', 181],
277
+ ['RTX 2080', 161],
278
+ ['RTX 2070 SUPER', 145],
279
+ ['RTX 2070', 130],
280
+ ['RTX 2060 SUPER', 115],
281
+ ['RTX 2060', 104],
282
+ // Workstation
283
+ ['RTX A6000', 310],
284
+ ['RTX A5500', 260],
285
+ ['RTX A5000', 222],
286
+ ['RTX A4500', 180],
287
+ ['RTX A4000', 153],
288
+ // Data center
289
+ ['A100', 624],
290
+ ['H100', 3958],
291
+ ['L40', 362],
292
+ ];
293
+
294
+ /**
295
+ * Look up INT8 Tensor Core TOPS for a GPU by name.
296
+ * @param {string} gpuName — full name from nvidia-smi (e.g. 'NVIDIA GeForce RTX 3060')
297
+ * @returns {number} — INT8 TOPS, or 0 if unknown
298
+ */
299
+ function _lookupGpuTops(gpuName) {
300
+ const upper = gpuName.toUpperCase();
301
+ for (const [pattern, tops] of GPU_TOPS_TABLE) {
302
+ if (upper.includes(pattern.toUpperCase())) return tops;
303
+ }
304
+ return 0;
305
+ }
306
+
307
+ /**
308
+ * Detect NVIDIA GPU via nvidia-smi.
309
+ */
310
+ function _probeNvidiaGpu() {
311
+ if (os.platform() !== 'win32' && os.platform() !== 'linux') return;
312
+
313
+ try {
314
+ const output = execSync(
315
+ 'nvidia-smi --query-gpu=name,compute_cap,memory.total,driver_version --format=csv,noheader,nounits',
316
+ { timeout: 5000, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
317
+ ).trim();
318
+
319
+ if (!output) return;
320
+
321
+ const parts = output.split(',').map(s => s.trim());
322
+ if (parts.length >= 4) {
323
+ HW.nvGpu = true;
324
+ HW.nvGpuName = parts[0];
325
+ HW.nvComputeCap = parts[1];
326
+ HW.nvGpuVRAM = parseInt(parts[2]) || 0;
327
+ HW.nvDriverVersion = parts[3];
328
+ HW.nvGpuTops = _lookupGpuTops(HW.nvGpuName);
329
+ if (HW.nvGpuTops > 0) {
330
+ log.debug(` GPU TOPS: ${HW.nvGpuName} → ${HW.nvGpuTops} INT8 TOPS`);
331
+ }
332
+ }
333
+
334
+ // Get CUDA version separately
335
+ const smiOutput = execSync('nvidia-smi', {
336
+ timeout: 5000, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe']
337
+ });
338
+ const cudaMatch = smiOutput.match(/CUDA Version:\s*([\d.]+)/);
339
+ if (cudaMatch) {
340
+ HW.nvCudaVersion = cudaMatch[1];
341
+ }
342
+ } catch {
343
+ // nvidia-smi not available
344
+ }
345
+ }
346
+
347
+ /**
348
+ * Detect AMD XDNA NPU.
349
+ * On Windows, check for AMD IPU Device in Device Manager.
350
+ */
351
+ function _probeAmdNpu() {
352
+ if (os.platform() !== 'win32') return;
353
+
354
+ try {
355
+ // Check for AMD IPU/NPU device via PowerShell.
356
+ // XDNA registers under multiple PnP classes (System, Processor, SoftwareDevice)
357
+ // so we search ALL classes rather than just 'Processor'.
358
+ const output = execSync(
359
+ 'powershell -NoProfile -Command "Get-PnpDevice -ErrorAction SilentlyContinue | Where-Object { $_.FriendlyName -match \'AMD\' -and $_.FriendlyName -match \'IPU|NPU|XDNA|AI\' } | Select-Object -First 1 -ExpandProperty FriendlyName"',
360
+ { timeout: 8000, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
361
+ ).trim();
362
+
363
+ if (output) {
364
+ HW.amdNpu = true;
365
+ log.debug(` NPU detected (PnP): ${output}`);
366
+ // Assign TOPS rating by CPU model
367
+ const model = HW.cpuModel.toLowerCase();
368
+ if (model.includes('8700') || model.includes('8600')) {
369
+ HW.amdNpuTops = 16;
370
+ } else if (model.includes('7840') || model.includes('7940')) {
371
+ HW.amdNpuTops = 10;
372
+ }
373
+ }
374
+ } catch {
375
+ // PnP query failed — fallback below will handle it
376
+ }
377
+
378
+ // Fallback: if PnP didn't detect (empty result or error), check CPU model.
379
+ // The 8700F HAS XDNA NPU — PnP can return empty if driver class doesn't match.
380
+ if (!HW.amdNpu) {
381
+ const model = HW.cpuModel.toLowerCase();
382
+ if (model.includes('8700f') || model.includes('8700g') ||
383
+ model.includes('8600g') || model.includes('8500g') ||
384
+ model.includes('7840') || model.includes('7940') ||
385
+ model.includes('ai 9')) {
386
+ HW.amdNpu = true;
387
+ HW.amdNpuTops = model.includes('8700') || model.includes('8600') ? 16 : 10;
388
+ log.debug(` NPU detected (model fallback): ${HW.cpuModel} → ${HW.amdNpuTops} TOPS`);
389
+ }
390
+ }
391
+ }
392
+
393
+ /**
394
+ * Probe for ONNX Runtime availability and execution providers.
395
+ */
396
+ async function _probeOnnxRuntime() {
397
+ try {
398
+ // Dynamic import — only resolves if onnxruntime-node is installed
399
+ const ort = await import('onnxruntime-node');
400
+ HW.onnxRuntime = true;
401
+
402
+ // ONNX Runtime 1.24+ uses listSupportedBackends() with short names
403
+ // Short names: 'cpu', 'dml' (DirectML/NPU), 'cuda', 'webgpu'
404
+ if (typeof ort.listSupportedBackends === 'function') {
405
+ HW.onnxProviders = ort.listSupportedBackends().map(b => b.name);
406
+ } else if (ort.env?.getAvailableProviders) {
407
+ // Legacy ONNX Runtime (<1.20) used long names
408
+ HW.onnxProviders = ort.env.getAvailableProviders();
409
+ } else {
410
+ // Infer from hardware
411
+ const providers = ['cpu'];
412
+ if (HW.nvGpu) providers.unshift('cuda');
413
+ if (HW.amdNpu) providers.unshift('dml');
414
+ HW.onnxProviders = providers;
415
+ }
416
+ } catch {
417
+ // onnxruntime-node not installed
418
+ HW.onnxRuntime = false;
419
+ }
420
+ }
421
+
422
+ /**
423
+ * Probe for native PQ crypto addon (liboqs bindings).
424
+ */
425
+ function _probeNativePQ() {
426
+ // Try known packages in priority order
427
+ const candidates = [
428
+ { name: 'liboqs-node', backend: 'liboqs' },
429
+ { name: 'pqcrypto-node', backend: 'pqcrypto' },
430
+ { name: '@aspect/pq-native', backend: 'aspect' },
431
+ ];
432
+
433
+ for (const { name, backend } of candidates) {
434
+ try {
435
+ // Synchronous require check (we don't actually load here, just test availability)
436
+ const resolved = import.meta.resolve?.(name);
437
+ if (resolved) {
438
+ HW.nativePQ = true;
439
+ HW.nativePQBackend = backend;
440
+ return;
441
+ }
442
+ } catch {
443
+ // Not available
444
+ }
445
+ }
446
+ }
447
+
448
+ // =============================================================================
449
+ // TIER 1: CPU-NATIVE CRYPTO ACCELERATION
450
+ // =============================================================================
451
+
452
+ /**
453
+ * SHA3-256 — accelerated via Node.js native crypto (OpenSSL → SHA-NI).
454
+ * 4.6x faster than @noble/hashes pure JS on Zen 4.
455
+ *
456
+ * Falls back to @noble/hashes if native SHA3 unavailable.
457
+ *
458
+ * @param {Uint8Array|Buffer|string} input — data to hash
459
+ * @returns {Uint8Array} — 32-byte SHA3-256 digest
460
+ */
461
+ export function sha3_256(input) {
462
+ telemetry.sha3Calls++;
463
+
464
+ if (HW.nativeSha3) {
465
+ telemetry.sha3NativeHits++;
466
+ const hash = createHash('sha3-256');
467
+
468
+ if (typeof input === 'string') {
469
+ hash.update(input, 'utf8');
470
+ } else {
471
+ hash.update(input);
472
+ }
473
+
474
+ // Return Uint8Array for compatibility with @noble/hashes API
475
+ const buf = hash.digest();
476
+ return new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength);
477
+ }
478
+
479
+ // Fallback: pure JS
480
+ if (typeof input === 'string') {
481
+ return nobleSha3_256(new TextEncoder().encode(input));
482
+ }
483
+ return nobleSha3_256(input);
484
+ }
485
+
486
+ /**
487
+ * SHA3-256 hex convenience — returns hex string instead of bytes.
488
+ *
489
+ * @param {Uint8Array|Buffer|string} input
490
+ * @returns {string} — hex-encoded SHA3-256 digest
491
+ */
492
+ export function sha3_256hex(input) {
493
+ return bytesToHex(sha3_256(input));
494
+ }
495
+
496
+ // =============================================================================
497
+ // TIER 1: ML-DSA-65 (Dilithium3) — Sign / Verify / Keygen
498
+ // =============================================================================
499
+
500
+ // Cache for native PQ module (lazy-loaded)
501
+ let _nativePQ = null;
502
+
503
+ async function _loadNativePQ() {
504
+ if (_nativePQ !== null) return _nativePQ;
505
+ if (!HW.nativePQ) { _nativePQ = false; return false; }
506
+
507
+ try {
508
+ switch (HW.nativePQBackend) {
509
+ case 'liboqs': _nativePQ = await import('liboqs-node'); break;
510
+ case 'pqcrypto': _nativePQ = await import('pqcrypto-node'); break;
511
+ case 'aspect': _nativePQ = await import('@aspect/pq-native'); break;
512
+ default: _nativePQ = false;
513
+ }
514
+ } catch {
515
+ _nativePQ = false;
516
+ HW.nativePQ = false;
517
+ }
518
+ return _nativePQ;
519
+ }
520
+
521
+ /**
522
+ * ML-DSA-65 Keygen — generate post-quantum signing keypair.
523
+ * Uses native liboqs (AVX-512 NTT) when available, else @noble pure JS.
524
+ *
525
+ * @param {Uint8Array} seed — 32-byte seed
526
+ * @returns {{ publicKey: Uint8Array, secretKey: Uint8Array }}
527
+ */
528
+ export async function mlDsa65Keygen(seed) {
529
+ const native = await _loadNativePQ();
530
+
531
+ if (native && native.ml_dsa65?.keygen) {
532
+ telemetry.signNativeHits++;
533
+ return native.ml_dsa65.keygen(seed);
534
+ }
535
+
536
+ return ml_dsa65.keygen(seed);
537
+ }
538
+
539
+ /**
540
+ * ML-DSA-65 Sign — post-quantum digital signature.
541
+ * ~4.9ms pure JS → ~0.5ms with liboqs AVX-512.
542
+ *
543
+ * @param {Uint8Array} message
544
+ * @param {Uint8Array} secretKey
545
+ * @returns {Uint8Array} signature
546
+ */
547
+ export function mlDsa65Sign(message, secretKey) {
548
+ telemetry.signCalls++;
549
+
550
+ // Defensive coercion — identity stores keys as hex strings,
551
+ // but @noble/post-quantum expects Uint8Array. Handle both.
552
+ const sk = typeof secretKey === 'string' ? hexToBytes(secretKey) : secretKey;
553
+ const msg = typeof message === 'string' ? new TextEncoder().encode(message) : message;
554
+
555
+ // Synchronous path — native addon is pre-loaded after first call
556
+ if (_nativePQ && _nativePQ.ml_dsa65?.sign) {
557
+ telemetry.signNativeHits++;
558
+ return _nativePQ.ml_dsa65.sign(msg, sk);
559
+ }
560
+
561
+ return ml_dsa65.sign(msg, sk);
562
+ }
563
+
564
+ /**
565
+ * ML-DSA-65 Verify — post-quantum signature verification.
566
+ * ~1.7ms pure JS → ~0.2ms with liboqs AVX-512.
567
+ *
568
+ * @param {Uint8Array} signature
569
+ * @param {Uint8Array} message
570
+ * @param {Uint8Array} publicKey
571
+ * @returns {boolean}
572
+ */
573
+ export function mlDsa65Verify(signature, message, publicKey) {
574
+ telemetry.verifyCalls++;
575
+
576
+ // Defensive coercion — accept hex strings or Uint8Array for all params
577
+ const sig = typeof signature === 'string' ? hexToBytes(signature) : signature;
578
+ const msg = typeof message === 'string' ? new TextEncoder().encode(message) : message;
579
+ const pk = typeof publicKey === 'string' ? hexToBytes(publicKey) : publicKey;
580
+
581
+ if (_nativePQ && _nativePQ.ml_dsa65?.verify) {
582
+ telemetry.verifyNativeHits++;
583
+ return _nativePQ.ml_dsa65.verify(sig, msg, pk);
584
+ }
585
+
586
+ return ml_dsa65.verify(sig, msg, pk);
587
+ }
588
+
589
+ // =============================================================================
590
+ // TIER 1: ML-KEM-768 (Kyber) — Key Encapsulation
591
+ // =============================================================================
592
+
593
+ /**
594
+ * ML-KEM-768 Keygen — generate post-quantum KEM keypair.
595
+ *
596
+ * @param {Uint8Array} seed — 64-byte seed
597
+ * @returns {{ publicKey: Uint8Array, secretKey: Uint8Array }}
598
+ */
599
+ export async function mlKem768Keygen(seed) {
600
+ telemetry.kemCalls++;
601
+
602
+ const native = await _loadNativePQ();
603
+ if (native && native.ml_kem768?.keygen) {
604
+ telemetry.kemNativeHits++;
605
+ return native.ml_kem768.keygen(seed);
606
+ }
607
+
608
+ return ml_kem768.keygen(seed);
609
+ }
610
+
611
+ /**
612
+ * ML-KEM-768 Encapsulate — create shared secret + ciphertext.
613
+ *
614
+ * @param {Uint8Array} publicKey
615
+ * @returns {{ cipherText: Uint8Array, sharedSecret: Uint8Array }}
616
+ */
617
+ export function mlKem768Encapsulate(publicKey) {
618
+ telemetry.kemCalls++;
619
+
620
+ // Defensive coercion — accept hex string or Uint8Array
621
+ const pk = typeof publicKey === 'string' ? hexToBytes(publicKey) : publicKey;
622
+
623
+ if (_nativePQ && _nativePQ.ml_kem768?.encapsulate) {
624
+ telemetry.kemNativeHits++;
625
+ return _nativePQ.ml_kem768.encapsulate(pk);
626
+ }
627
+
628
+ return ml_kem768.encapsulate(pk);
629
+ }
630
+
631
+ /**
632
+ * ML-KEM-768 Decapsulate — recover shared secret from ciphertext.
633
+ *
634
+ * @param {Uint8Array} cipherText
635
+ * @param {Uint8Array} secretKey
636
+ * @returns {Uint8Array} sharedSecret
637
+ */
638
+ export function mlKem768Decapsulate(cipherText, secretKey) {
639
+ telemetry.kemCalls++;
640
+
641
+ // Defensive coercion — accept hex string or Uint8Array
642
+ const ct = typeof cipherText === 'string' ? hexToBytes(cipherText) : cipherText;
643
+ const sk = typeof secretKey === 'string' ? hexToBytes(secretKey) : secretKey;
644
+
645
+ if (_nativePQ && _nativePQ.ml_kem768?.decapsulate) {
646
+ telemetry.kemNativeHits++;
647
+ return _nativePQ.ml_kem768.decapsulate(ct, sk);
648
+ }
649
+
650
+ return ml_kem768.decapsulate(ct, sk);
651
+ }
652
+
653
+ // =============================================================================
654
+ // TIER 2: GPU BATCH OPERATIONS
655
+ // =============================================================================
656
+
657
+ /**
658
+ * Batch verification queue.
659
+ * Collects individual verify requests and processes them in batches
660
+ * when queue depth reaches threshold or flush timeout fires.
661
+ *
662
+ * Acceleration tiers for batch verification:
663
+ *
664
+ * 1. Worker Thread Pool (CPU-parallel)
665
+ * Distributes verification chunks across N worker threads
666
+ * (N = CPU core count). Each worker runs ML-DSA-65 verify in
667
+ * its own V8 isolate. Achieves near-linear speedup on multi-core
668
+ * processors. Active on all platforms.
669
+ * Batch of 256 on Ryzen 8700F (8 cores): ~55ms vs ~435ms sequential.
670
+ *
671
+ * 2. GPU/CUDA NTT Kernel (future roadmap)
672
+ * ML-DSA-65 verification's inner loop is NTT (Number Theoretic
673
+ * Transform) — a prime candidate for GPU SIMD lanes. When CUDA
674
+ * compute 8.0+ is detected, a precompiled .cubin kernel could
675
+ * batch all NTT operations into a single GPU dispatch.
676
+ * Estimated: 256 verifications in <5ms on RTX 4060+.
677
+ * Blocked on: custom CUDA NTT kernel compilation pipeline.
678
+ *
679
+ * 3. Sequential CPU fallback
680
+ * Used when worker pool is unavailable or batch is trivially small.
681
+ * Calls mlDsa65Verify synchronously per item.
682
+ *
683
+ * GPU kernel launch overhead (~5-10µs) means batching must clear
684
+ * a minimum queue depth to justify the transfer cost.
685
+ */
686
+ class BatchVerifyQueue {
687
+ constructor(options = {}) {
688
+ // Scale batch sizes with available compute TOPS
689
+ // More TOPS → larger batches are worthwhile (GPU can eat them)
690
+ const topsBudget = HW.totalTops || 0;
691
+ this.minBatchSize = options.minBatchSize || (topsBudget >= 100 ? 16 : 8);
692
+ this.maxBatchSize = options.maxBatchSize || (topsBudget >= 200 ? 512 : topsBudget >= 50 ? 256 : 128);
693
+ this.flushInterval = options.flushInterval || 5; // ms
694
+ this.queue = [];
695
+ this._timer = null;
696
+ this._onnxSession = null;
697
+ this._gpuAvailable = false;
698
+
699
+ // Worker thread pool
700
+ this._workers = [];
701
+ this._workerRound = 0;
702
+ this._pendingJobs = new Map(); // jobId → { resolve, reject, batch }
703
+ this._jobCounter = 0;
704
+ this._poolReady = false;
705
+ }
706
+
707
+ /**
708
+ * Initialize batch verification subsystem.
709
+ * Creates worker thread pool and checks for GPU availability.
710
+ */
711
+ async initialize() {
712
+ // ---- Worker Thread Pool ----
713
+ // Scale pool size with available compute: more TOPS → more workers
714
+ const basePoolSize = os.cpus().length;
715
+ const topsBoost = HW.totalTops >= 200 ? 2 : (HW.totalTops >= 50 ? 1 : 0);
716
+ const poolSize = Math.max(2, Math.min(basePoolSize + topsBoost, 16));
717
+ const workerPath = new URL('./verify-worker.js', import.meta.url);
718
+
719
+ for (let i = 0; i < poolSize; i++) {
720
+ try {
721
+ const w = new Worker(workerPath);
722
+
723
+ w.on('message', ({ id, results }) => {
724
+ const job = this._pendingJobs.get(id);
725
+ if (!job) return;
726
+ this._pendingJobs.delete(id);
727
+
728
+ // Resolve each individual promise from the original enqueue calls
729
+ for (let j = 0; j < results.length; j++) {
730
+ const { ok, err } = results[j];
731
+ if (err) {
732
+ job.batch[j].reject(new Error(err));
733
+ } else {
734
+ job.batch[j].resolve(ok);
735
+ }
736
+ }
737
+ });
738
+
739
+ w.on('error', (err) => {
740
+ log.warn(`Verify worker ${i} error: ${err.message}`);
741
+ });
742
+
743
+ this._workers.push(w);
744
+ } catch (err) {
745
+ log.warn(`Failed to spawn verify worker ${i}: ${err.message}`);
746
+ }
747
+ }
748
+
749
+ if (this._workers.length > 0) {
750
+ this._poolReady = true;
751
+ log.info(`Batch verify: worker pool ready — ${this._workers.length} threads`);
752
+ } else {
753
+ log.warn('Batch verify: no workers spawned, using sequential CPU');
754
+ }
755
+
756
+ // ---- GPU (CUDA) Check ----
757
+ if (HW.onnxRuntime && HW.nvGpu) {
758
+ try {
759
+ const ort = await import('onnxruntime-node');
760
+ const providers = HW.onnxProviders;
761
+ if (providers.includes('cuda')) {
762
+ this._gpuAvailable = true;
763
+ log.info('Batch verify: CUDA provider detected (NTT kernel reserved for future)');
764
+ }
765
+ } catch {
766
+ log.debug('Batch verify: ONNX Runtime not available for GPU path');
767
+ }
768
+ }
769
+ }
770
+
771
+ /**
772
+ * Enqueue a verification request.
773
+ * Returns a promise that resolves with the verification result.
774
+ *
775
+ * @param {Uint8Array} signature
776
+ * @param {Uint8Array} message
777
+ * @param {Uint8Array} publicKey
778
+ * @returns {Promise<boolean>}
779
+ */
780
+ enqueue(signature, message, publicKey) {
781
+ return new Promise((resolve, reject) => {
782
+ this.queue.push({ signature, message, publicKey, resolve, reject });
783
+
784
+ if (this.queue.length >= this.minBatchSize) {
785
+ this._flush();
786
+ } else if (!this._timer) {
787
+ this._timer = setTimeout(() => this._flush(), this.flushInterval);
788
+ }
789
+ });
790
+ }
791
+
792
+ /**
793
+ * Process all queued verifications.
794
+ * Routes to the fastest available backend:
795
+ * Worker pool (parallel CPU) → Sequential CPU fallback.
796
+ *
797
+ * GPU/CUDA NTT batching is detected and telemetry-tracked but
798
+ * currently falls through to worker pool (CUDA kernel TBD).
799
+ */
800
+ _flush() {
801
+ if (this._timer) {
802
+ clearTimeout(this._timer);
803
+ this._timer = null;
804
+ }
805
+
806
+ if (this.queue.length === 0) return;
807
+
808
+ const batch = this.queue.splice(0, this.maxBatchSize);
809
+ telemetry.batchVerifyCalls++;
810
+
811
+ // Track GPU availability hits (CUDA NTT kernel reserved for future)
812
+ if (this._gpuAvailable && batch.length >= this.minBatchSize) {
813
+ telemetry.batchGpuHits++;
814
+ log.trace(`GPU batch verify: ${batch.length} items routed to worker pool (CUDA NTT kernel TBD)`);
815
+ }
816
+
817
+ // ---- Worker Thread Pool (true CPU parallelism) ----
818
+ if (this._poolReady && batch.length >= this.minBatchSize) {
819
+ this._dispatchToWorkers(batch);
820
+ return;
821
+ }
822
+
823
+ // ---- Sequential CPU fallback (small batches or no workers) ----
824
+ for (const item of batch) {
825
+ try {
826
+ const result = mlDsa65Verify(item.signature, item.message, item.publicKey);
827
+ item.resolve(result);
828
+ } catch (err) {
829
+ item.reject(err);
830
+ }
831
+ }
832
+ }
833
+
834
+ /**
835
+ * Distribute a batch across the worker pool for parallel verification.
836
+ * Splits the batch into N chunks (N = worker count) and dispatches
837
+ * each chunk to a worker. Worker results resolve the original promises.
838
+ *
839
+ * @param {Array} batch — items with { signature, message, publicKey, resolve, reject }
840
+ */
841
+ _dispatchToWorkers(batch) {
842
+ const workerCount = this._workers.length;
843
+ const chunkSize = Math.ceil(batch.length / workerCount);
844
+
845
+ for (let i = 0; i < workerCount && i * chunkSize < batch.length; i++) {
846
+ const start = i * chunkSize;
847
+ const end = Math.min(start + chunkSize, batch.length);
848
+ const chunk = batch.slice(start, end);
849
+ const jobId = ++this._jobCounter;
850
+
851
+ // Serialize Uint8Arrays for transfer to worker
852
+ const items = chunk.map(item => ({
853
+ signature: item.signature.buffer ? item.signature : new Uint8Array(item.signature),
854
+ message: item.message.buffer ? item.message : new Uint8Array(item.message),
855
+ publicKey: item.publicKey.buffer ? item.publicKey : new Uint8Array(item.publicKey),
856
+ }));
857
+
858
+ this._pendingJobs.set(jobId, { batch: chunk });
859
+
860
+ const worker = this._workers[i % workerCount];
861
+ worker.postMessage({ id: jobId, items });
862
+ }
863
+ }
864
+
865
+ /**
866
+ * Drain queue and stop timer. Terminate worker pool.
867
+ */
868
+ destroy() {
869
+ this._flush();
870
+ if (this._timer) {
871
+ clearTimeout(this._timer);
872
+ this._timer = null;
873
+ }
874
+ // Terminate worker threads
875
+ for (const w of this._workers) {
876
+ w.terminate().catch(() => {});
877
+ }
878
+ this._workers = [];
879
+ this._poolReady = false;
880
+ }
881
+ }
882
+
883
+ // Singleton batch verifier
884
+ export const batchVerify = new BatchVerifyQueue();
885
+
886
+ // =============================================================================
887
+ // TIER 3: NPU INFERENCE ENGINE
888
+ // =============================================================================
889
+
890
+ /**
891
+ * NPU/GPU inference engine for ML models (SAKSHI anomaly, KARMA trust).
892
+ * Uses ONNX Runtime with DirectML (NPU) or CUDA (GPU) providers.
893
+ */
894
+ class InferenceEngine {
895
+ constructor() {
896
+ this._sessions = new Map(); // modelName -> InferenceSession
897
+ this._ort = null;
898
+ this._initialized = false;
899
+ this._preferredProvider = null;
900
+ }
901
+
902
+ /**
903
+ * Initialize the inference engine.
904
+ * Detects best available provider: DirectML (NPU) > CUDA (GPU) > CPU.
905
+ */
906
+ async initialize() {
907
+ if (this._initialized) return;
908
+
909
+ if (!HW.onnxRuntime) {
910
+ log.debug('Inference engine: ONNX Runtime not available');
911
+ this._initialized = true;
912
+ return;
913
+ }
914
+
915
+ try {
916
+ this._ort = await import('onnxruntime-node');
917
+
918
+ // Provider priority: NPU (DirectML) > GPU (CUDA) > CPU
919
+ // ONNX Runtime 1.24+ uses short names: 'dml', 'cuda', 'cpu'
920
+ const providers = HW.onnxProviders;
921
+ if (providers.includes('dml') && HW.amdNpu) {
922
+ this._preferredProvider = 'dml';
923
+ log.info(`Inference engine: AMD NPU (${HW.amdNpuTops}T) + GPU (${HW.nvGpuTops}T) = ${HW.totalTops}T via DirectML`);
924
+ } else if (providers.includes('cuda') && HW.nvGpu) {
925
+ this._preferredProvider = 'cuda';
926
+ log.info(`Inference engine: NVIDIA GPU (${HW.nvGpuName}, ${HW.nvGpuTops}T) via CUDA`);
927
+ } else if (providers.includes('dml')) {
928
+ this._preferredProvider = 'dml';
929
+ log.info(`Inference engine: DirectML (${HW.totalTops}T available)`);
930
+ } else {
931
+ this._preferredProvider = 'cpu';
932
+ log.info('Inference engine: CPU fallback');
933
+ }
934
+
935
+ this._initialized = true;
936
+ } catch (err) {
937
+ log.warn('Inference engine initialization failed:', err.message);
938
+ this._initialized = true;
939
+ }
940
+ }
941
+
942
+ /**
943
+ * Load an ONNX model for inference.
944
+ *
945
+ * @param {string} modelName — unique identifier (e.g., 'sakshi-anomaly')
946
+ * @param {string} modelPath — path to .onnx file
947
+ * @returns {boolean} — true if loaded successfully
948
+ */
949
+ async loadModel(modelName, modelPath) {
950
+ if (!this._ort) {
951
+ log.debug(`Cannot load model ${modelName}: no ONNX Runtime`);
952
+ return false;
953
+ }
954
+
955
+ try {
956
+ const options = {};
957
+ if (this._preferredProvider) {
958
+ options.executionProviders = [this._preferredProvider, 'cpu'];
959
+ }
960
+
961
+ const session = await this._ort.InferenceSession.create(modelPath, options);
962
+ this._sessions.set(modelName, session);
963
+
964
+ log.info(`Model loaded: ${modelName} → ${this._preferredProvider || 'CPU'}`);
965
+ return true;
966
+ } catch (err) {
967
+ log.warn(`Failed to load model ${modelName}: ${err.message}`);
968
+ return false;
969
+ }
970
+ }
971
+
972
+ /**
973
+ * Run inference on a loaded model.
974
+ *
975
+ * @param {string} modelName — which model to run
976
+ * @param {Object<string, Float32Array|Int32Array>} inputs — named input tensors
977
+ * @returns {Object<string, Float32Array>|null} — output tensors, or null if unavailable
978
+ */
979
+ async infer(modelName, inputs) {
980
+ telemetry.inferCalls++;
981
+
982
+ const session = this._sessions.get(modelName);
983
+ if (!session) {
984
+ log.trace(`Model ${modelName} not loaded, skipping inference`);
985
+ return null;
986
+ }
987
+
988
+ try {
989
+ // Build ONNX tensor feeds
990
+ const feeds = {};
991
+ for (const [name, data] of Object.entries(inputs)) {
992
+ feeds[name] = new this._ort.Tensor('float32', data, [1, data.length]);
993
+ }
994
+
995
+ const results = await session.run(feeds);
996
+
997
+ // Track NPU/GPU hits
998
+ if (this._preferredProvider === 'dml') {
999
+ telemetry.inferNpuHits++;
1000
+ } else if (this._preferredProvider === 'cuda') {
1001
+ telemetry.inferGpuHits++;
1002
+ }
1003
+
1004
+ // Convert output tensors to plain objects
1005
+ const output = {};
1006
+ for (const [name, tensor] of Object.entries(results)) {
1007
+ output[name] = tensor.data;
1008
+ }
1009
+
1010
+ return output;
1011
+ } catch (err) {
1012
+ log.warn(`Inference failed for ${modelName}: ${err.message}`);
1013
+ return null;
1014
+ }
1015
+ }
1016
+
1017
+ /**
1018
+ * Unload a model and free resources.
1019
+ */
1020
+ async unloadModel(modelName) {
1021
+ const session = this._sessions.get(modelName);
1022
+ if (session) {
1023
+ // ONNX Runtime sessions don't have an explicit close in all versions
1024
+ this._sessions.delete(modelName);
1025
+ log.debug(`Model unloaded: ${modelName}`);
1026
+ }
1027
+ }
1028
+
1029
+ /**
1030
+ * Check if inference is available for a model.
1031
+ */
1032
+ hasModel(modelName) {
1033
+ return this._sessions.has(modelName);
1034
+ }
1035
+
1036
+ /**
1037
+ * Check if any hardware acceleration is available.
1038
+ */
1039
+ get isAccelerated() {
1040
+ return this._preferredProvider !== 'cpu' && this._preferredProvider !== null;
1041
+ }
1042
+
1043
+ /**
1044
+ * Get the active execution provider.
1045
+ */
1046
+ get provider() {
1047
+ return this._preferredProvider || 'none';
1048
+ }
1049
+ }
1050
+
1051
+ // Singleton inference engine
1052
+ export const inference = new InferenceEngine();
1053
+
1054
+ // =============================================================================
1055
+ // TIER 4: HETEROGENEOUS COMPUTE SCHEDULER
1056
+ // =============================================================================
1057
+ //
1058
+ // Routes work to GPU, NPU, or CPU based on task priority, device load,
1059
+ // queue depth, and (optionally) a trained ONNX scheduling model.
1060
+ //
1061
+ // Design principles:
1062
+ // 1. Every task gets exactly ONE outcome: completed | rejected | timed-out
1063
+ // 2. Security workloads (CRITICAL) are NEVER dropped
1064
+ // 3. Bounded queues — no unbounded memory growth under load
1065
+ // 4. Circuit breakers — a failing device is isolated, not retried blindly
1066
+ // 5. Work gifting — idle devices pull from busy neighbours
1067
+ // 6. Self-monitoring — detects own degradation, falls back to rules
1068
+ //
1069
+ // Device topology:
1070
+ // GPU (cuda/dml) — high throughput, higher latency, shared w/ display/LLM
1071
+ // NPU (dml/xdna) — low latency, dedicated silicon, always warm
1072
+ // CPU (fallback) — unlimited "TOPS", never refuses, just slower
1073
+ //
1074
+
1075
+ /** Priority classes — higher number = higher priority */
1076
+ export const Priority = Object.freeze({
1077
+ LOW: 0, // Telemetry, optional analytics — first to shed
1078
+ NORMAL: 1, // SEVA mesh work, planet enhance — rejection allowed
1079
+ HIGH: 2, // Batch verify, trust evaluation — bounded wait
1080
+ CRITICAL: 3, // Entropy sentinel, security checks — NEVER dropped, preempts
1081
+ });
1082
+
1083
+ /** Device identifiers */
1084
+ export const Device = Object.freeze({
1085
+ GPU: 'gpu',
1086
+ NPU: 'npu',
1087
+ CPU: 'cpu',
1088
+ });
1089
+
1090
+ /** Task affinity hints — what the caller prefers */
1091
+ export const Affinity = Object.freeze({
1092
+ GPU_PREFERRED: 'gpu-preferred',
1093
+ NPU_PREFERRED: 'npu-preferred',
1094
+ EITHER: 'either',
1095
+ CPU_ONLY: 'cpu-only',
1096
+ });
1097
+
1098
+ /** Task outcome states */
1099
+ const Outcome = Object.freeze({
1100
+ COMPLETED: 'completed',
1101
+ REJECTED: 'rejected',
1102
+ TIMED_OUT: 'timed-out',
1103
+ ERROR: 'error',
1104
+ });
1105
+
1106
+ // ---------------------------------------------------------------------------
1107
+ // CIRCUIT BREAKER — per-device fault isolation
1108
+ // ---------------------------------------------------------------------------
1109
+
1110
+ class CircuitBreaker {
1111
+ /**
1112
+ * @param {string} deviceName
1113
+ * @param {Object} opts
1114
+ * @param {number} opts.failThreshold — consecutive failures before opening
1115
+ * @param {number} opts.resetMs — how long the breaker stays open
1116
+ * @param {number} opts.probeIntervalMs — interval between probe jobs when open
1117
+ */
1118
+ constructor(deviceName, opts = {}) {
1119
+ this.device = deviceName;
1120
+ this.failThreshold = opts.failThreshold || 3;
1121
+ this.resetMs = opts.resetMs || 30_000;
1122
+ this.probeIntervalMs = opts.probeIntervalMs || 5_000;
1123
+
1124
+ this.state = 'closed'; // closed | open | half-open
1125
+ this.consecutiveFailures = 0;
1126
+ this.lastFailure = 0;
1127
+ this.lastProbe = 0;
1128
+ this.totalTrips = 0; // lifetime trip count
1129
+ }
1130
+
1131
+ /** Record a successful execution — resets failure counter */
1132
+ recordSuccess() {
1133
+ if (this.state === 'half-open') {
1134
+ log.info(`Circuit breaker [${this.device}]: CLOSED — probe succeeded`);
1135
+ this.state = 'closed';
1136
+ }
1137
+ this.consecutiveFailures = 0;
1138
+ }
1139
+
1140
+ /** Record a failure — may trip the breaker */
1141
+ recordFailure() {
1142
+ this.consecutiveFailures++;
1143
+ this.lastFailure = Date.now();
1144
+
1145
+ if (this.consecutiveFailures >= this.failThreshold && this.state === 'closed') {
1146
+ this.state = 'open';
1147
+ this.totalTrips++;
1148
+ log.warn(`Circuit breaker [${this.device}]: OPEN — ${this.consecutiveFailures} consecutive failures (trip #${this.totalTrips})`);
1149
+ }
1150
+ }
1151
+
1152
+ /** Can we send work to this device right now? */
1153
+ isAvailable() {
1154
+ if (this.state === 'closed') return true;
1155
+ if (this.state === 'open') {
1156
+ // Check if reset period elapsed → transition to half-open
1157
+ if (Date.now() - this.lastFailure >= this.resetMs) {
1158
+ this.state = 'half-open';
1159
+ log.info(`Circuit breaker [${this.device}]: HALF-OPEN — ready for probe`);
1160
+ return true; // allow one probe job
1161
+ }
1162
+ return false;
1163
+ }
1164
+ // half-open: allow one probe job per interval
1165
+ if (Date.now() - this.lastProbe >= this.probeIntervalMs) {
1166
+ this.lastProbe = Date.now();
1167
+ return true;
1168
+ }
1169
+ return false;
1170
+ }
1171
+
1172
+ getStatus() {
1173
+ return {
1174
+ device: this.device,
1175
+ state: this.state,
1176
+ consecutiveFailures: this.consecutiveFailures,
1177
+ totalTrips: this.totalTrips,
1178
+ lastFailure: this.lastFailure ? new Date(this.lastFailure).toISOString() : null,
1179
+ };
1180
+ }
1181
+ }
1182
+
1183
+ // ---------------------------------------------------------------------------
1184
+ // BOUNDED PRIORITY QUEUE — per-device work queue
1185
+ // ---------------------------------------------------------------------------
1186
+
1187
+ class BoundedPriorityQueue {
1188
+ /**
1189
+ * @param {string} deviceName
1190
+ * @param {number} capacity — max items (derived from device TOPS)
1191
+ */
1192
+ constructor(deviceName, capacity) {
1193
+ this.device = deviceName;
1194
+ this.capacity = capacity;
1195
+ this._queues = {
1196
+ [Priority.CRITICAL]: [],
1197
+ [Priority.HIGH]: [],
1198
+ [Priority.NORMAL]: [],
1199
+ [Priority.LOW]: [],
1200
+ };
1201
+ this._size = 0;
1202
+ this._totalEnqueued = 0;
1203
+ this._totalDropped = 0;
1204
+ this._totalCompleted = 0;
1205
+ }
1206
+
1207
+ /** Current queue depth */
1208
+ get size() { return this._size; }
1209
+
1210
+ /** Load factor 0.0-1.0 */
1211
+ get loadFactor() { return this._size / this.capacity; }
1212
+
1213
+ /**
1214
+ * Enqueue a task. Returns true if accepted, false if rejected.
1215
+ * CRITICAL tasks can preempt LOW tasks when full.
1216
+ */
1217
+ enqueue(task) {
1218
+ // Always accept CRITICAL
1219
+ if (task.priority === Priority.CRITICAL) {
1220
+ // If full, shed a LOW task to make room
1221
+ if (this._size >= this.capacity) {
1222
+ const shed = this._queues[Priority.LOW].shift();
1223
+ if (shed) {
1224
+ this._size--;
1225
+ this._totalDropped++;
1226
+ shed.reject({ outcome: Outcome.REJECTED, reason: 'shed-for-critical', device: this.device });
1227
+ log.debug(`Scheduler [${this.device}]: shed LOW task ${shed.id} to admit CRITICAL ${task.id}`);
1228
+ }
1229
+ // If still full after shedding, enqueue anyway (CRITICAL never refused)
1230
+ }
1231
+ this._queues[Priority.CRITICAL].push(task);
1232
+ this._size++;
1233
+ this._totalEnqueued++;
1234
+ return true;
1235
+ }
1236
+
1237
+ // Non-critical: reject if full
1238
+ if (this._size >= this.capacity) {
1239
+ this._totalDropped++;
1240
+ return false;
1241
+ }
1242
+
1243
+ this._queues[task.priority].push(task);
1244
+ this._size++;
1245
+ this._totalEnqueued++;
1246
+ return true;
1247
+ }
1248
+
1249
+ /**
1250
+ * Dequeue the highest-priority task.
1251
+ * Returns null if empty.
1252
+ */
1253
+ dequeue() {
1254
+ for (const p of [Priority.CRITICAL, Priority.HIGH, Priority.NORMAL, Priority.LOW]) {
1255
+ if (this._queues[p].length > 0) {
1256
+ this._size--;
1257
+ return this._queues[p].shift();
1258
+ }
1259
+ }
1260
+ return null;
1261
+ }
1262
+
1263
+ /**
1264
+ * Peek at next task without removing.
1265
+ */
1266
+ peek() {
1267
+ for (const p of [Priority.CRITICAL, Priority.HIGH, Priority.NORMAL, Priority.LOW]) {
1268
+ if (this._queues[p].length > 0) return this._queues[p][0];
1269
+ }
1270
+ return null;
1271
+ }
1272
+
1273
+ /**
1274
+ * Gift a LOW or NORMAL task for work-gifting.
1275
+ * Returns null if nothing giftable.
1276
+ */
1277
+ gift() {
1278
+ for (const p of [Priority.LOW, Priority.NORMAL]) {
1279
+ if (this._queues[p].length > 0) {
1280
+ this._size--;
1281
+ return this._queues[p].shift();
1282
+ }
1283
+ }
1284
+ return null;
1285
+ }
1286
+
1287
+ /**
1288
+ * Drain all pending tasks (returns array). Used during shutdown.
1289
+ */
1290
+ drain() {
1291
+ const all = [];
1292
+ for (const p of [Priority.CRITICAL, Priority.HIGH, Priority.NORMAL, Priority.LOW]) {
1293
+ all.push(...this._queues[p].splice(0));
1294
+ }
1295
+ this._size = 0;
1296
+ return all;
1297
+ }
1298
+
1299
+ getStatus() {
1300
+ return {
1301
+ device: this.device,
1302
+ capacity: this.capacity,
1303
+ depth: this._size,
1304
+ loadFactor: +(this.loadFactor.toFixed(2)),
1305
+ byPriority: {
1306
+ critical: this._queues[Priority.CRITICAL].length,
1307
+ high: this._queues[Priority.HIGH].length,
1308
+ normal: this._queues[Priority.NORMAL].length,
1309
+ low: this._queues[Priority.LOW].length,
1310
+ },
1311
+ lifetime: {
1312
+ enqueued: this._totalEnqueued,
1313
+ dropped: this._totalDropped,
1314
+ completed: this._totalCompleted,
1315
+ },
1316
+ };
1317
+ }
1318
+ }
1319
+
1320
+ // ---------------------------------------------------------------------------
1321
+ // TRAINING DATA LOGGER — records execution history for scheduler model
1322
+ // ---------------------------------------------------------------------------
1323
+
1324
+ class TrainingDataLogger {
1325
+ constructor(maxEntries = 10_000) {
1326
+ this._entries = [];
1327
+ this._maxEntries = maxEntries;
1328
+ this._flushCallbacks = [];
1329
+ }
1330
+
1331
+ /**
1332
+ * Record a completed task's execution data.
1333
+ * This is pure gold — every entry trains the future scheduler model.
1334
+ */
1335
+ record(entry) {
1336
+ this._entries.push({
1337
+ ts: Date.now(),
1338
+ taskType: entry.taskType,
1339
+ priority: entry.priority,
1340
+ affinity: entry.affinity,
1341
+ device: entry.device,
1342
+ inputSize: entry.inputSize || 0,
1343
+ queueDepthAtSubmit: entry.queueDepthAtSubmit || 0,
1344
+ gpuLoadAtSubmit: entry.gpuLoadAtSubmit || 0,
1345
+ npuLoadAtSubmit: entry.npuLoadAtSubmit || 0,
1346
+ cpuLoadAtSubmit: entry.cpuLoadAtSubmit || 0,
1347
+ waitMs: entry.waitMs || 0,
1348
+ execMs: entry.execMs || 0,
1349
+ outcome: entry.outcome,
1350
+ success: entry.outcome === Outcome.COMPLETED,
1351
+ });
1352
+
1353
+ // Ring buffer — drop oldest when full
1354
+ if (this._entries.length > this._maxEntries) {
1355
+ this._entries.shift();
1356
+ }
1357
+ }
1358
+
1359
+ /**
1360
+ * Get recent entries for model training.
1361
+ * @param {number} n — max entries to return
1362
+ */
1363
+ getRecent(n = 1000) {
1364
+ return this._entries.slice(-n);
1365
+ }
1366
+
1367
+ /**
1368
+ * Build a feature vector from current scheduler state for ML inference.
1369
+ * This is what the ONNX scheduler model consumes.
1370
+ *
1371
+ * @param {Object} task — incoming task descriptor
1372
+ * @param {Object} state — current scheduler state snapshot
1373
+ * @returns {Float32Array} — input vector for scheduler model
1374
+ */
1375
+ buildFeatureVector(task, state) {
1376
+ return new Float32Array([
1377
+ task.taskTypeId || 0, // 0: task type enum
1378
+ task.inputSize || 0, // 1: input payload size
1379
+ task.priority || 0, // 2: priority class
1380
+ state.gpuQueueDepth || 0, // 3: GPU queue depth
1381
+ state.npuQueueDepth || 0, // 4: NPU queue depth
1382
+ state.cpuQueueDepth || 0, // 5: CPU queue depth
1383
+ state.gpuActiveJobs || 0, // 6: GPU in-flight
1384
+ state.npuActiveJobs || 0, // 7: NPU in-flight
1385
+ state.cpuActiveJobs || 0, // 8: CPU in-flight
1386
+ state.gpuAvgLatency || 0, // 9: GPU recent avg latency (ms)
1387
+ state.npuAvgLatency || 0, // 10: NPU recent avg latency (ms)
1388
+ state.cpuAvgLatency || 0, // 11: CPU recent avg latency (ms)
1389
+ state.gpuLoadFactor || 0, // 12: GPU queue fill ratio 0-1
1390
+ state.npuLoadFactor || 0, // 13: NPU queue fill ratio 0-1
1391
+ state.burstRate10ms || 0, // 14: tasks in last 10ms
1392
+ state.burstRate100ms || 0, // 15: tasks in last 100ms
1393
+ state.gpuCircuitOpen ? 1 : 0, // 16: GPU circuit breaker state
1394
+ state.npuCircuitOpen ? 1 : 0, // 17: NPU circuit breaker state
1395
+ state.gpuTops || 0, // 18: GPU TOPS rating
1396
+ state.npuTops || 0, // 19: NPU TOPS rating
1397
+ ]);
1398
+ }
1399
+
1400
+ /** Entry count */
1401
+ get size() { return this._entries.length; }
1402
+
1403
+ getStatus() {
1404
+ return {
1405
+ entries: this._entries.length,
1406
+ maxEntries: this._maxEntries,
1407
+ oldestTs: this._entries.length > 0 ? new Date(this._entries[0].ts).toISOString() : null,
1408
+ newestTs: this._entries.length > 0 ? new Date(this._entries[this._entries.length - 1].ts).toISOString() : null,
1409
+ };
1410
+ }
1411
+ }
1412
+
1413
+ // ---------------------------------------------------------------------------
1414
+ // COMPUTE SCHEDULER — the brain
1415
+ // ---------------------------------------------------------------------------
1416
+
1417
+ /**
1418
+ * ComputeScheduler — heterogeneous GPU/NPU/CPU work router.
1419
+ *
1420
+ * Submit a task with a type, priority, affinity hint, and executor function.
1421
+ * The scheduler decides which device runs it, manages queues, circuit breakers,
1422
+ * timeouts, and work gifting. Optionally uses a trained ONNX model for routing.
1423
+ */
1424
+ class ComputeScheduler {
1425
+ constructor() {
1426
+ // Per-device state
1427
+ this._queues = {}; // device → BoundedPriorityQueue
1428
+ this._breakers = {}; // device → CircuitBreaker
1429
+ this._activeJobs = {}; // device → Set<taskId>
1430
+ this._avgLatency = {}; // device → running average (ms)
1431
+
1432
+ // Task tracking
1433
+ this._taskCounter = 0;
1434
+ this._pendingTasks = new Map(); // taskId → { task, resolve, reject, timer }
1435
+
1436
+ // ML routing model (optional — loaded via loadSchedulerModel)
1437
+ this._schedulerSession = null;
1438
+ this._useMLRouting = false;
1439
+ this._mlAccuracy = 1.0; // self-monitored accuracy — degrades → fallback to rules
1440
+
1441
+ // Training data
1442
+ this._trainingLog = new TrainingDataLogger(10_000);
1443
+
1444
+ // Burst rate tracking (sliding window)
1445
+ this._recentSubmits = []; // timestamps of recent submits
1446
+
1447
+ // Work-gifting interval
1448
+ this._giftTimer = null;
1449
+
1450
+ // Lifecycle
1451
+ this._initialized = false;
1452
+ this._shutdownRequested = false;
1453
+
1454
+ // Stats
1455
+ this._stats = {
1456
+ totalSubmitted: 0,
1457
+ totalCompleted: 0,
1458
+ totalRejected: 0,
1459
+ totalTimedOut: 0,
1460
+ totalErrors: 0,
1461
+ totalGifted: 0,
1462
+ mlRoutingDecisions: 0,
1463
+ ruleRoutingDecisions: 0,
1464
+ };
1465
+ }
1466
+
1467
+ /**
1468
+ * Initialize the scheduler. Must be called after probe().
1469
+ * Sets up queues and breakers based on detected hardware.
1470
+ */
1471
+ async initialize() {
1472
+ if (this._initialized) return;
1473
+
1474
+ // GPU queue — capacity scaled from TOPS
1475
+ const gpuTops = HW.nvGpuTops || 0;
1476
+ const npuTops = HW.amdNpuTops || 0;
1477
+
1478
+ const gpuCapacity = gpuTops > 0 ? Math.max(32, Math.ceil(gpuTops * 2)) : 0;
1479
+ const npuCapacity = npuTops > 0 ? Math.max(16, Math.ceil(npuTops * 2)) : 0;
1480
+ const cpuCapacity = Math.max(64, HW.threads * 4);
1481
+
1482
+ // Create queues for available devices
1483
+ if (gpuTops > 0 && HW.nvGpu) {
1484
+ this._queues[Device.GPU] = new BoundedPriorityQueue(Device.GPU, gpuCapacity);
1485
+ this._breakers[Device.GPU] = new CircuitBreaker(Device.GPU);
1486
+ this._activeJobs[Device.GPU] = new Set();
1487
+ this._avgLatency[Device.GPU] = 0;
1488
+ log.info(`Scheduler: GPU queue initialized — capacity ${gpuCapacity} (${gpuTops}T)`);
1489
+ }
1490
+
1491
+ if (npuTops > 0 && HW.amdNpu) {
1492
+ this._queues[Device.NPU] = new BoundedPriorityQueue(Device.NPU, npuCapacity);
1493
+ this._breakers[Device.NPU] = new CircuitBreaker(Device.NPU);
1494
+ this._activeJobs[Device.NPU] = new Set();
1495
+ this._avgLatency[Device.NPU] = 0;
1496
+ log.info(`Scheduler: NPU queue initialized — capacity ${npuCapacity} (${npuTops}T)`);
1497
+ }
1498
+
1499
+ // CPU always available
1500
+ this._queues[Device.CPU] = new BoundedPriorityQueue(Device.CPU, cpuCapacity);
1501
+ this._breakers[Device.CPU] = new CircuitBreaker(Device.CPU, { failThreshold: 10 }); // CPU is resilient
1502
+ this._activeJobs[Device.CPU] = new Set();
1503
+ this._avgLatency[Device.CPU] = 0;
1504
+ log.info(`Scheduler: CPU queue initialized — capacity ${cpuCapacity} (${HW.threads} threads)`);
1505
+
1506
+ // Start work-gifting loop (checks every 50ms)
1507
+ this._giftTimer = setInterval(() => this._workGift(), 50);
1508
+ if (this._giftTimer.unref) this._giftTimer.unref();
1509
+
1510
+ this._initialized = true;
1511
+
1512
+ const devices = Object.keys(this._queues);
1513
+ const totalCapacity = Object.values(this._queues).reduce((s, q) => s + q.capacity, 0);
1514
+ log.info(`Scheduler: ready — ${devices.length} devices, ${totalCapacity} total queue slots, ${HW.totalTops}T combined`);
1515
+ }
1516
+
1517
+ // =========================================================================
1518
+ // ML SCHEDULER MODEL
1519
+ // =========================================================================
1520
+
1521
+ /**
1522
+ * Load a trained ONNX scheduling model.
1523
+ * Input: 20-float feature vector (see TrainingDataLogger.buildFeatureVector)
1524
+ * Output: [device_id, expected_ms, should_split, split_ratio]
1525
+ *
1526
+ * @param {string} modelPath — path to scheduler.onnx
1527
+ */
1528
+ async loadSchedulerModel(modelPath) {
1529
+ if (!HW.onnxRuntime) {
1530
+ log.debug('Scheduler: cannot load ML model — no ONNX Runtime');
1531
+ return false;
1532
+ }
1533
+ try {
1534
+ const ort = await import('onnxruntime-node');
1535
+ const cpuProv = HW.onnxProviders.find(p => p.toLowerCase().includes('cpu')) || 'cpu';
1536
+ // Scheduler model always runs on NPU (tiny, low-latency) or CPU
1537
+ const dmlProv = HW.onnxProviders.find(p => p.toLowerCase().includes('dml'));
1538
+ const providers = dmlProv ? [dmlProv, cpuProv] : [cpuProv];
1539
+
1540
+ this._schedulerSession = await ort.InferenceSession.create(modelPath, {
1541
+ executionProviders: providers,
1542
+ });
1543
+ this._useMLRouting = true;
1544
+ this._ort = ort;
1545
+ log.info(`Scheduler: ML routing model loaded from ${modelPath}`);
1546
+ return true;
1547
+ } catch (err) {
1548
+ log.warn(`Scheduler: failed to load ML model: ${err.message}`);
1549
+ return false;
1550
+ }
1551
+ }
1552
+
1553
+ /**
1554
+ * Query the ML model for a routing decision.
1555
+ * Falls back to rules if model unavailable or degraded.
1556
+ *
1557
+ * @param {Object} task
1558
+ * @returns {{ device: string, expectedMs: number, shouldSplit: boolean, splitRatio: number }}
1559
+ */
1560
+ async _mlRoute(task) {
1561
+ if (!this._useMLRouting || !this._schedulerSession || this._mlAccuracy < 0.5) {
1562
+ return null; // ML unavailable or degraded — use rules
1563
+ }
1564
+
1565
+ try {
1566
+ const state = this._getStateSnapshot();
1567
+ const features = this._trainingLog.buildFeatureVector(task, state);
1568
+ const inputTensor = new this._ort.Tensor('float32', features, [1, features.length]);
1569
+ const results = await this._schedulerSession.run({ input: inputTensor });
1570
+ const output = results.output?.data || results[Object.keys(results)[0]]?.data;
1571
+
1572
+ if (!output || output.length < 4) return null;
1573
+
1574
+ const deviceMap = { 0: Device.GPU, 1: Device.NPU, 2: Device.CPU };
1575
+ const deviceId = Math.round(output[0]);
1576
+
1577
+ this._stats.mlRoutingDecisions++;
1578
+
1579
+ return {
1580
+ device: deviceMap[deviceId] || Device.CPU,
1581
+ expectedMs: output[1],
1582
+ shouldSplit: output[2] > 0.5,
1583
+ splitRatio: Math.max(0, Math.min(1, output[3])),
1584
+ };
1585
+ } catch {
1586
+ // Model inference failed — degrade gracefully
1587
+ this._mlAccuracy *= 0.9;
1588
+ if (this._mlAccuracy < 0.5) {
1589
+ log.warn('Scheduler: ML accuracy degraded below 50% — falling back to rule-based routing');
1590
+ }
1591
+ return null;
1592
+ }
1593
+ }
1594
+
1595
+ // =========================================================================
1596
+ // RULE-BASED ROUTING (fallback & default)
1597
+ // =========================================================================
1598
+
1599
+ /**
1600
+ * Determine the best device for a task using rules.
1601
+ * Considers: affinity hint, circuit breaker state, queue load, priority.
1602
+ */
1603
+ _ruleRoute(task) {
1604
+ const available = {};
1605
+ for (const [dev, breaker] of Object.entries(this._breakers)) {
1606
+ if (breaker.isAvailable()) {
1607
+ available[dev] = {
1608
+ load: this._queues[dev].loadFactor,
1609
+ active: this._activeJobs[dev].size,
1610
+ latency: this._avgLatency[dev],
1611
+ };
1612
+ }
1613
+ }
1614
+
1615
+ this._stats.ruleRoutingDecisions++;
1616
+
1617
+ // CPU-only affinity
1618
+ if (task.affinity === Affinity.CPU_ONLY) {
1619
+ return Device.CPU;
1620
+ }
1621
+
1622
+ // CRITICAL always goes to the least-loaded available accelerator
1623
+ if (task.priority === Priority.CRITICAL) {
1624
+ if (available[Device.NPU] && available[Device.NPU].load < 0.95) return Device.NPU;
1625
+ if (available[Device.GPU] && available[Device.GPU].load < 0.95) return Device.GPU;
1626
+ return Device.CPU; // CPU never refuses CRITICAL
1627
+ }
1628
+
1629
+ // Affinity-preferred routing with load-aware fallback
1630
+ if (task.affinity === Affinity.GPU_PREFERRED && available[Device.GPU]) {
1631
+ if (available[Device.GPU].load < 0.8) return Device.GPU;
1632
+ // GPU busy — can NPU help?
1633
+ if (available[Device.NPU] && available[Device.NPU].load < 0.6) return Device.NPU;
1634
+ // Both busy — still try GPU if not at wall
1635
+ if (available[Device.GPU].load < 0.95) return Device.GPU;
1636
+ return Device.CPU;
1637
+ }
1638
+
1639
+ if (task.affinity === Affinity.NPU_PREFERRED && available[Device.NPU]) {
1640
+ if (available[Device.NPU].load < 0.8) return Device.NPU;
1641
+ if (available[Device.GPU] && available[Device.GPU].load < 0.6) return Device.GPU;
1642
+ if (available[Device.NPU].load < 0.95) return Device.NPU;
1643
+ return Device.CPU;
1644
+ }
1645
+
1646
+ // EITHER affinity — pick the least loaded accelerator
1647
+ if (available[Device.NPU] && available[Device.GPU]) {
1648
+ // NPU is lower-latency for small tasks, GPU for large
1649
+ const npuBetter = available[Device.NPU].load < available[Device.GPU].load;
1650
+ const preferred = npuBetter ? Device.NPU : Device.GPU;
1651
+ const fallback = npuBetter ? Device.GPU : Device.NPU;
1652
+ if (available[preferred].load < 0.8) return preferred;
1653
+ if (available[fallback].load < 0.8) return fallback;
1654
+ return Device.CPU;
1655
+ }
1656
+
1657
+ if (available[Device.NPU]) return available[Device.NPU].load < 0.9 ? Device.NPU : Device.CPU;
1658
+ if (available[Device.GPU]) return available[Device.GPU].load < 0.9 ? Device.GPU : Device.CPU;
1659
+
1660
+ return Device.CPU;
1661
+ }
1662
+
1663
+ // =========================================================================
1664
+ // TASK SUBMISSION
1665
+ // =========================================================================
1666
+
1667
+ /**
1668
+ * Submit a task to the compute scheduler.
1669
+ *
1670
+ * @param {Object} descriptor — task descriptor
1671
+ * @param {string} descriptor.type — task type name (e.g. 'entropy-sentinel', 'batch-verify')
1672
+ * @param {number} descriptor.typeId — numeric type ID for ML model (optional)
1673
+ * @param {number} descriptor.priority — Priority.CRITICAL | HIGH | NORMAL | LOW
1674
+ * @param {string} descriptor.affinity — Affinity.GPU_PREFERRED | NPU_PREFERRED | EITHER | CPU_ONLY
1675
+ * @param {number} descriptor.timeoutMs — max allowed execution time (0 = no timeout)
1676
+ * @param {number} descriptor.inputSize — rough input payload size (for ML features)
1677
+ * @param {Object} descriptor.executors — { gpu: fn, npu: fn, cpu: fn } — at least cpu required
1678
+ * @returns {Promise<{ outcome, device, result, execMs, waitMs }>}
1679
+ */
1680
+ submit(descriptor) {
1681
+ if (this._shutdownRequested) {
1682
+ return Promise.reject({ outcome: Outcome.REJECTED, reason: 'scheduler-shutting-down' });
1683
+ }
1684
+
1685
+ const taskId = ++this._taskCounter;
1686
+ const submitTime = performance.now();
1687
+ this._stats.totalSubmitted++;
1688
+
1689
+ // Track burst rate
1690
+ this._recentSubmits.push(submitTime);
1691
+ // Prune old entries (keep last 200ms)
1692
+ const cutoff = submitTime - 200;
1693
+ while (this._recentSubmits.length > 0 && this._recentSubmits[0] < cutoff) {
1694
+ this._recentSubmits.shift();
1695
+ }
1696
+
1697
+ return new Promise(async (resolve, reject) => {
1698
+ const task = {
1699
+ id: taskId,
1700
+ type: descriptor.type || 'unknown',
1701
+ taskTypeId: descriptor.typeId || 0,
1702
+ priority: descriptor.priority ?? Priority.NORMAL,
1703
+ affinity: descriptor.affinity || Affinity.EITHER,
1704
+ timeoutMs: descriptor.timeoutMs || 5000,
1705
+ inputSize: descriptor.inputSize || 0,
1706
+ executors: descriptor.executors || {},
1707
+ submitTime,
1708
+ resolve,
1709
+ reject,
1710
+ timer: null,
1711
+ };
1712
+
1713
+ // Route decision: ML model first, fall back to rules
1714
+ let targetDevice;
1715
+ const mlDecision = await this._mlRoute(task);
1716
+ if (mlDecision) {
1717
+ targetDevice = mlDecision.device;
1718
+ task._mlExpectedMs = mlDecision.expectedMs;
1719
+ } else {
1720
+ targetDevice = this._ruleRoute(task);
1721
+ }
1722
+
1723
+ // Ensure target device has an executor; fall back through chain
1724
+ if (!task.executors[targetDevice]) {
1725
+ if (targetDevice === Device.GPU && task.executors[Device.NPU]) targetDevice = Device.NPU;
1726
+ else if (targetDevice === Device.NPU && task.executors[Device.GPU]) targetDevice = Device.GPU;
1727
+ else targetDevice = Device.CPU;
1728
+ }
1729
+
1730
+ // Final check: must have an executor for the chosen device
1731
+ if (!task.executors[targetDevice]) {
1732
+ this._stats.totalRejected++;
1733
+ reject({ outcome: Outcome.REJECTED, reason: `no-executor-for-${targetDevice}`, taskId });
1734
+ return;
1735
+ }
1736
+
1737
+ task.targetDevice = targetDevice;
1738
+
1739
+ // Enqueue
1740
+ const queue = this._queues[targetDevice];
1741
+ if (!queue) {
1742
+ // Device not available — retry on CPU
1743
+ task.targetDevice = Device.CPU;
1744
+ const cpuQueue = this._queues[Device.CPU];
1745
+ if (!cpuQueue.enqueue(task)) {
1746
+ this._stats.totalRejected++;
1747
+ reject({
1748
+ outcome: Outcome.REJECTED,
1749
+ reason: 'all-queues-full',
1750
+ taskId,
1751
+ retryAfterMs: 100,
1752
+ });
1753
+ return;
1754
+ }
1755
+ } else {
1756
+ const accepted = queue.enqueue(task);
1757
+ if (!accepted) {
1758
+ // Try CPU spillover
1759
+ if (targetDevice !== Device.CPU && this._queues[Device.CPU]) {
1760
+ task.targetDevice = Device.CPU;
1761
+ if (!task.executors[Device.CPU]) {
1762
+ this._stats.totalRejected++;
1763
+ reject({
1764
+ outcome: Outcome.REJECTED,
1765
+ reason: `${targetDevice}-queue-full-no-cpu-executor`,
1766
+ taskId,
1767
+ retryAfterMs: 200,
1768
+ });
1769
+ return;
1770
+ }
1771
+ const cpuAccepted = this._queues[Device.CPU].enqueue(task);
1772
+ if (!cpuAccepted) {
1773
+ this._stats.totalRejected++;
1774
+ reject({
1775
+ outcome: Outcome.REJECTED,
1776
+ reason: 'all-queues-full',
1777
+ taskId,
1778
+ retryAfterMs: 500,
1779
+ });
1780
+ return;
1781
+ }
1782
+ } else {
1783
+ this._stats.totalRejected++;
1784
+ reject({
1785
+ outcome: Outcome.REJECTED,
1786
+ reason: `${targetDevice}-queue-full`,
1787
+ taskId,
1788
+ retryAfterMs: 200,
1789
+ });
1790
+ return;
1791
+ }
1792
+ }
1793
+ }
1794
+
1795
+ // Set timeout
1796
+ if (task.timeoutMs > 0) {
1797
+ task.timer = setTimeout(() => {
1798
+ if (this._pendingTasks.has(taskId)) {
1799
+ this._pendingTasks.delete(taskId);
1800
+ this._stats.totalTimedOut++;
1801
+ reject({ outcome: Outcome.TIMED_OUT, taskId, device: task.targetDevice, timeoutMs: task.timeoutMs });
1802
+ }
1803
+ }, task.timeoutMs);
1804
+ if (task.timer.unref) task.timer.unref();
1805
+ }
1806
+
1807
+ this._pendingTasks.set(taskId, task);
1808
+
1809
+ // Kick the processor for this device
1810
+ this._processQueue(task.targetDevice);
1811
+ });
1812
+ }
1813
+
1814
+ // =========================================================================
1815
+ // QUEUE PROCESSING — execute tasks from a device queue
1816
+ // =========================================================================
1817
+
1818
+ /**
1819
+ * Process pending tasks on a device.
1820
+ * Runs concurrently up to device capacity.
1821
+ */
1822
+ async _processQueue(device) {
1823
+ const queue = this._queues[device];
1824
+ const breaker = this._breakers[device];
1825
+ const active = this._activeJobs[device];
1826
+ if (!queue || !breaker || !active) return;
1827
+
1828
+ // Max concurrent jobs per device
1829
+ const maxConcurrent = device === Device.GPU
1830
+ ? Math.max(4, Math.ceil((HW.nvGpuTops || 1) / 10))
1831
+ : device === Device.NPU
1832
+ ? Math.max(2, Math.ceil((HW.amdNpuTops || 1) / 4))
1833
+ : HW.threads || 4;
1834
+
1835
+ while (queue.size > 0 && active.size < maxConcurrent) {
1836
+ if (!breaker.isAvailable()) break;
1837
+
1838
+ const task = queue.dequeue();
1839
+ if (!task) break;
1840
+ if (!this._pendingTasks.has(task.id)) continue; // already timed out
1841
+
1842
+ active.add(task.id);
1843
+ const execStart = performance.now();
1844
+
1845
+ // Execute asynchronously
1846
+ this._executeTask(task, device, execStart).catch(() => {});
1847
+ }
1848
+ }
1849
+
1850
+ /**
1851
+ * Execute a single task on a device.
1852
+ */
1853
+ async _executeTask(task, device, execStart) {
1854
+ const executor = task.executors[device];
1855
+ const breaker = this._breakers[device];
1856
+ const active = this._activeJobs[device];
1857
+ const queue = this._queues[device];
1858
+
1859
+ try {
1860
+ const result = await executor();
1861
+ const execMs = performance.now() - execStart;
1862
+ const waitMs = execStart - task.submitTime;
1863
+
1864
+ // Clear timeout
1865
+ if (task.timer) clearTimeout(task.timer);
1866
+
1867
+ // Remove from tracking
1868
+ this._pendingTasks.delete(task.id);
1869
+ active.delete(task.id);
1870
+ if (queue) queue._totalCompleted++;
1871
+
1872
+ // Record success
1873
+ breaker.recordSuccess();
1874
+ this._updateAvgLatency(device, execMs);
1875
+ this._stats.totalCompleted++;
1876
+
1877
+ // Log training data
1878
+ this._trainingLog.record({
1879
+ taskType: task.type,
1880
+ priority: task.priority,
1881
+ affinity: task.affinity,
1882
+ device,
1883
+ inputSize: task.inputSize,
1884
+ queueDepthAtSubmit: queue ? queue.size : 0,
1885
+ gpuLoadAtSubmit: this._queues[Device.GPU]?.loadFactor || 0,
1886
+ npuLoadAtSubmit: this._queues[Device.NPU]?.loadFactor || 0,
1887
+ cpuLoadAtSubmit: this._queues[Device.CPU]?.loadFactor || 0,
1888
+ waitMs,
1889
+ execMs,
1890
+ outcome: Outcome.COMPLETED,
1891
+ });
1892
+
1893
+ // ML accuracy self-check
1894
+ if (task._mlExpectedMs && execMs > 0) {
1895
+ const ratio = execMs / task._mlExpectedMs;
1896
+ if (ratio > 3 || ratio < 0.1) {
1897
+ this._mlAccuracy *= 0.95; // penalize bad predictions
1898
+ } else {
1899
+ this._mlAccuracy = Math.min(1.0, this._mlAccuracy * 1.01); // reward good ones
1900
+ }
1901
+ }
1902
+
1903
+ // Resolve the promise
1904
+ task.resolve({
1905
+ outcome: Outcome.COMPLETED,
1906
+ device,
1907
+ result,
1908
+ execMs: +execMs.toFixed(2),
1909
+ waitMs: +waitMs.toFixed(2),
1910
+ taskId: task.id,
1911
+ });
1912
+ } catch (err) {
1913
+ const execMs = performance.now() - execStart;
1914
+ if (task.timer) clearTimeout(task.timer);
1915
+ this._pendingTasks.delete(task.id);
1916
+ active.delete(task.id);
1917
+
1918
+ breaker.recordFailure();
1919
+ this._stats.totalErrors++;
1920
+
1921
+ // Log failure for training
1922
+ this._trainingLog.record({
1923
+ taskType: task.type,
1924
+ priority: task.priority,
1925
+ affinity: task.affinity,
1926
+ device,
1927
+ inputSize: task.inputSize,
1928
+ queueDepthAtSubmit: 0,
1929
+ gpuLoadAtSubmit: 0,
1930
+ npuLoadAtSubmit: 0,
1931
+ cpuLoadAtSubmit: 0,
1932
+ waitMs: execStart - task.submitTime,
1933
+ execMs,
1934
+ outcome: Outcome.ERROR,
1935
+ });
1936
+
1937
+ // AUTO-RESCUE: If a non-CPU device fails and CPU executor exists, retry on CPU
1938
+ if (device !== Device.CPU && task.executors[Device.CPU]) {
1939
+ log.debug(`Scheduler: ${device} failed for task ${task.id} (${task.type}), retrying on CPU`);
1940
+ try {
1941
+ const cpuStart = performance.now();
1942
+ const result = await task.executors[Device.CPU]();
1943
+ const cpuExecMs = performance.now() - cpuStart;
1944
+
1945
+ this._stats.totalCompleted++;
1946
+ this._breakers[Device.CPU].recordSuccess();
1947
+
1948
+ task.resolve({
1949
+ outcome: Outcome.COMPLETED,
1950
+ device: Device.CPU,
1951
+ result,
1952
+ execMs: +cpuExecMs.toFixed(2),
1953
+ waitMs: +(cpuStart - task.submitTime).toFixed(2),
1954
+ taskId: task.id,
1955
+ rescue: true, // indicates this was a CPU rescue
1956
+ });
1957
+ return;
1958
+ } catch (cpuErr) {
1959
+ // Even CPU failed — truly broken task
1960
+ log.warn(`Scheduler: CPU rescue also failed for task ${task.id}: ${cpuErr.message}`);
1961
+ }
1962
+ }
1963
+
1964
+ task.reject({
1965
+ outcome: Outcome.ERROR,
1966
+ device,
1967
+ error: err.message,
1968
+ taskId: task.id,
1969
+ execMs: +execMs.toFixed(2),
1970
+ });
1971
+ } finally {
1972
+ // Always try to process more from this device's queue
1973
+ setImmediate(() => this._processQueue(device));
1974
+ }
1975
+ }
1976
+
1977
+ // =========================================================================
1978
+ // WORK GIFTING — idle devices pull from busy neighbours
1979
+ // =========================================================================
1980
+
1981
+ _workGift() {
1982
+ if (this._shutdownRequested) return;
1983
+
1984
+ for (const [device, active] of Object.entries(this._activeJobs)) {
1985
+ const queue = this._queues[device];
1986
+ const breaker = this._breakers[device];
1987
+ if (!queue || !breaker || !breaker.isAvailable()) continue;
1988
+
1989
+ // Is this device idle?
1990
+ const maxConcurrent = device === Device.GPU
1991
+ ? Math.max(4, Math.ceil((HW.nvGpuTops || 1) / 10))
1992
+ : device === Device.NPU
1993
+ ? Math.max(2, Math.ceil((HW.amdNpuTops || 1) / 4))
1994
+ : HW.threads || 4;
1995
+
1996
+ if (active.size >= maxConcurrent * 0.5) continue; // not idle enough
1997
+ if (queue.size > 0) continue; // has own work to do
1998
+
1999
+ // Find the busiest other queue and gift from it
2000
+ let busiestDevice = null;
2001
+ let busiestLoad = 0;
2002
+ for (const [otherDev, otherQueue] of Object.entries(this._queues)) {
2003
+ if (otherDev === device) continue;
2004
+ if (otherQueue.loadFactor > busiestLoad && otherQueue.size > 1) {
2005
+ busiestLoad = otherQueue.loadFactor;
2006
+ busiestDevice = otherDev;
2007
+ }
2008
+ }
2009
+
2010
+ if (busiestDevice && busiestLoad > 0.3) {
2011
+ const gifted = this._queues[busiestDevice].gift();
2012
+ if (gifted && gifted.executors[device]) {
2013
+ // Re-target to receiving device
2014
+ gifted.targetDevice = device;
2015
+ this._queues[device].enqueue(gifted);
2016
+ this._stats.totalGifted++;
2017
+ log.trace(`Scheduler: ${busiestDevice} gifted ${gifted.type} task ${gifted.id} to ${device}`);
2018
+ this._processQueue(device);
2019
+ } else if (gifted) {
2020
+ // Can't execute on this device — put it back
2021
+ this._queues[busiestDevice].enqueue(gifted);
2022
+ }
2023
+ }
2024
+ }
2025
+ }
2026
+
2027
+ // =========================================================================
2028
+ // STATE & TELEMETRY
2029
+ // =========================================================================
2030
+
2031
+ /** Update exponential moving average latency for a device */
2032
+ _updateAvgLatency(device, ms) {
2033
+ const alpha = 0.1; // smoothing factor
2034
+ this._avgLatency[device] = this._avgLatency[device] * (1 - alpha) + ms * alpha;
2035
+ }
2036
+
2037
+ /** Get burst rate (tasks submitted in last N ms) */
2038
+ _getBurstRate(windowMs) {
2039
+ const cutoff = performance.now() - windowMs;
2040
+ return this._recentSubmits.filter(t => t >= cutoff).length;
2041
+ }
2042
+
2043
+ /** Snapshot of current scheduler state (for ML model or status) */
2044
+ _getStateSnapshot() {
2045
+ return {
2046
+ gpuQueueDepth: this._queues[Device.GPU]?.size || 0,
2047
+ npuQueueDepth: this._queues[Device.NPU]?.size || 0,
2048
+ cpuQueueDepth: this._queues[Device.CPU]?.size || 0,
2049
+ gpuActiveJobs: this._activeJobs[Device.GPU]?.size || 0,
2050
+ npuActiveJobs: this._activeJobs[Device.NPU]?.size || 0,
2051
+ cpuActiveJobs: this._activeJobs[Device.CPU]?.size || 0,
2052
+ gpuAvgLatency: this._avgLatency[Device.GPU] || 0,
2053
+ npuAvgLatency: this._avgLatency[Device.NPU] || 0,
2054
+ cpuAvgLatency: this._avgLatency[Device.CPU] || 0,
2055
+ gpuLoadFactor: this._queues[Device.GPU]?.loadFactor || 0,
2056
+ npuLoadFactor: this._queues[Device.NPU]?.loadFactor || 0,
2057
+ gpuCircuitOpen: this._breakers[Device.GPU]?.state === 'open',
2058
+ npuCircuitOpen: this._breakers[Device.NPU]?.state === 'open',
2059
+ burstRate10ms: this._getBurstRate(10),
2060
+ burstRate100ms: this._getBurstRate(100),
2061
+ gpuTops: HW.nvGpuTops || 0,
2062
+ npuTops: HW.amdNpuTops || 0,
2063
+ };
2064
+ }
2065
+
2066
+ /**
2067
+ * Full scheduler status for /health and monitoring.
2068
+ */
2069
+ getStatus() {
2070
+ const deviceStatus = {};
2071
+ for (const dev of Object.keys(this._queues)) {
2072
+ deviceStatus[dev] = {
2073
+ queue: this._queues[dev].getStatus(),
2074
+ circuitBreaker: this._breakers[dev].getStatus(),
2075
+ activeJobs: this._activeJobs[dev].size,
2076
+ avgLatencyMs: +(this._avgLatency[dev] || 0).toFixed(2),
2077
+ };
2078
+ }
2079
+
2080
+ return {
2081
+ initialized: this._initialized,
2082
+ devices: deviceStatus,
2083
+ routing: {
2084
+ mode: this._useMLRouting && this._mlAccuracy >= 0.5 ? 'ml' : 'rules',
2085
+ mlAccuracy: +(this._mlAccuracy.toFixed(3)),
2086
+ mlDecisions: this._stats.mlRoutingDecisions,
2087
+ ruleDecisions: this._stats.ruleRoutingDecisions,
2088
+ },
2089
+ stats: { ...this._stats },
2090
+ trainingData: this._trainingLog.getStatus(),
2091
+ burstRate: {
2092
+ last10ms: this._getBurstRate(10),
2093
+ last100ms: this._getBurstRate(100),
2094
+ },
2095
+ };
2096
+ }
2097
+
2098
+ /**
2099
+ * Graceful shutdown. Drains all queues, rejects pending with reason.
2100
+ */
2101
+ async shutdown() {
2102
+ this._shutdownRequested = true;
2103
+ if (this._giftTimer) clearInterval(this._giftTimer);
2104
+
2105
+ // Drain all queues
2106
+ for (const [dev, queue] of Object.entries(this._queues)) {
2107
+ const remaining = queue.drain();
2108
+ for (const task of remaining) {
2109
+ if (task.timer) clearTimeout(task.timer);
2110
+ task.reject({ outcome: Outcome.REJECTED, reason: 'scheduler-shutdown', taskId: task.id });
2111
+ }
2112
+ }
2113
+
2114
+ // Clear pending
2115
+ for (const [id, task] of this._pendingTasks) {
2116
+ if (task.timer) clearTimeout(task.timer);
2117
+ task.reject({ outcome: Outcome.REJECTED, reason: 'scheduler-shutdown', taskId: id });
2118
+ }
2119
+ this._pendingTasks.clear();
2120
+
2121
+ log.info(`Scheduler: shutdown complete — ${this._stats.totalCompleted} tasks completed lifetime`);
2122
+ }
2123
+
2124
+ /**
2125
+ * Get training data for model training.
2126
+ * @param {number} n — max entries
2127
+ */
2128
+ getTrainingData(n = 5000) {
2129
+ return this._trainingLog.getRecent(n);
2130
+ }
2131
+ }
2132
+
2133
+ // Singleton scheduler
2134
+ export const scheduler = new ComputeScheduler();
2135
+
2136
+
2137
+ // =============================================================================
2138
+ // AGGREGATE INITIALIZER
2139
+ // =============================================================================
2140
+
2141
+ /**
2142
+ * Initialize the full acceleration stack.
2143
+ * Call once at yakmesh startup. Probes hardware, sets up batch queue,
2144
+ * initializes inference engine.
2145
+ *
2146
+ * @returns {{ hw: typeof HW, telemetry: Object }}
2147
+ */
2148
+ export async function initialize() {
2149
+ await probe();
2150
+ await batchVerify.initialize();
2151
+ await inference.initialize();
2152
+ await scheduler.initialize();
2153
+
2154
+ // Pre-load native PQ if available
2155
+ if (HW.nativePQ) {
2156
+ await _loadNativePQ();
2157
+ }
2158
+
2159
+ return { hw: HW, telemetry: getTelemetry() };
2160
+ }
2161
+
2162
+ // =============================================================================
2163
+ // TELEMETRY & STATUS
2164
+ // =============================================================================
2165
+
2166
+ /**
2167
+ * Get current telemetry snapshot.
2168
+ */
2169
+ export function getTelemetry() {
2170
+ const elapsed = Date.now() - telemetry.lastReset;
2171
+
2172
+ return {
2173
+ ...telemetry,
2174
+ elapsedMs: elapsed,
2175
+ sha3NativeRate: telemetry.sha3Calls > 0
2176
+ ? (telemetry.sha3NativeHits / telemetry.sha3Calls * 100).toFixed(1) + '%'
2177
+ : 'N/A',
2178
+ signNativeRate: telemetry.signCalls > 0
2179
+ ? (telemetry.signNativeHits / telemetry.signCalls * 100).toFixed(1) + '%'
2180
+ : 'N/A',
2181
+ verifyNativeRate: telemetry.verifyCalls > 0
2182
+ ? (telemetry.verifyNativeHits / telemetry.verifyCalls * 100).toFixed(1) + '%'
2183
+ : 'N/A',
2184
+ inferAccelRate: telemetry.inferCalls > 0
2185
+ ? ((telemetry.inferNpuHits + telemetry.inferGpuHits) / telemetry.inferCalls * 100).toFixed(1) + '%'
2186
+ : 'N/A',
2187
+ };
2188
+ }
2189
+
2190
+ /**
2191
+ * Reset telemetry counters.
2192
+ */
2193
+ export function resetTelemetry() {
2194
+ Object.keys(telemetry).forEach(k => {
2195
+ if (k !== 'lastReset') telemetry[k] = 0;
2196
+ });
2197
+ telemetry.lastReset = Date.now();
2198
+ }
2199
+
2200
+ /**
2201
+ * Get a human-readable status report.
2202
+ */
2203
+ export function getStatus() {
2204
+ const t = getTelemetry();
2205
+
2206
+ return {
2207
+ hardware: {
2208
+ cpu: HW.cpuModel,
2209
+ arch: HW.cpuArch,
2210
+ threads: HW.threads,
2211
+ simd: {
2212
+ avx512: HW.avx512,
2213
+ vaes: HW.vaes,
2214
+ shaNI: HW.shaNI,
2215
+ gfni: HW.gfni,
2216
+ },
2217
+ gpu: HW.nvGpu ? {
2218
+ name: HW.nvGpuName,
2219
+ vram: `${HW.nvGpuVRAM} MiB`,
2220
+ compute: HW.nvComputeCap,
2221
+ cuda: HW.nvCudaVersion,
2222
+ tops: HW.nvGpuTops,
2223
+ } : null,
2224
+ npu: HW.amdNpu ? {
2225
+ tops: HW.amdNpuTops,
2226
+ } : null,
2227
+ totalTops: HW.totalTops,
2228
+ },
2229
+ acceleration: {
2230
+ sha3: HW.nativeSha3 ? 'native (OpenSSL)' : 'pure-JS (@noble)',
2231
+ pqCrypto: HW.nativePQ ? `native (${HW.nativePQBackend})` : 'pure-JS (@noble)',
2232
+ batchVerify: batchVerify._poolReady
2233
+ ? `Worker pool (${batchVerify._workers.length} threads)${batchVerify._gpuAvailable ? ' + CUDA detected' : ''}`
2234
+ : batchVerify._gpuAvailable ? 'GPU (CUDA)' : 'CPU sequential',
2235
+ inference: inference.provider,
2236
+ },
2237
+ scheduler: scheduler.getStatus(),
2238
+ telemetry: t,
2239
+ };
2240
+ }
2241
+
2242
+ // =============================================================================
2243
+ // CONVENIENCE RE-EXPORTS
2244
+ // =============================================================================
2245
+
2246
+ // Re-export @noble utilities so consumers can import from accel
2247
+ export { bytesToHex, hexToBytes } from '@noble/hashes/utils.js';
2248
+
2249
+ // Direct pass-through for operations not yet accelerated
2250
+ export { randomBytes };
2251
+
2252
+ export default {
2253
+ probe,
2254
+ initialize,
2255
+ sha3_256,
2256
+ sha3_256hex,
2257
+ mlDsa65Keygen,
2258
+ mlDsa65Sign,
2259
+ mlDsa65Verify,
2260
+ mlKem768Keygen,
2261
+ mlKem768Encapsulate,
2262
+ mlKem768Decapsulate,
2263
+ batchVerify,
2264
+ inference,
2265
+ scheduler,
2266
+ Priority,
2267
+ Device,
2268
+ Affinity,
2269
+ getTelemetry,
2270
+ resetTelemetry,
2271
+ getStatus,
2272
+ HW,
2273
+ };