@simulatte/doppler 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1182) hide show
  1. package/BRANDING.md +14 -0
  2. package/LICENSE +201 -0
  3. package/NOTICE +5 -0
  4. package/README.md +85 -0
  5. package/SECURITY.md +19 -0
  6. package/package.json +144 -0
  7. package/src/adapters/adapter-manager.d.ts +200 -0
  8. package/src/adapters/adapter-manager.js +509 -0
  9. package/src/adapters/adapter-manifest.d.ts +290 -0
  10. package/src/adapters/adapter-manifest.js +320 -0
  11. package/src/adapters/adapter-registry.d.ts +192 -0
  12. package/src/adapters/adapter-registry.js +466 -0
  13. package/src/adapters/index.d.ts +89 -0
  14. package/src/adapters/index.js +42 -0
  15. package/src/adapters/lora-loader.d.ts +105 -0
  16. package/src/adapters/lora-loader.js +397 -0
  17. package/src/bootstrap.d.ts +1 -0
  18. package/src/bootstrap.js +30 -0
  19. package/src/bridge/extension/background.d.ts +14 -0
  20. package/src/bridge/extension/background.js +168 -0
  21. package/src/bridge/extension/manifest.json +34 -0
  22. package/src/bridge/extension-client.d.ts +109 -0
  23. package/src/bridge/extension-client.js +369 -0
  24. package/src/bridge/index.d.ts +68 -0
  25. package/src/bridge/index.js +51 -0
  26. package/src/bridge/protocol.d.ts +96 -0
  27. package/src/bridge/protocol.js +130 -0
  28. package/src/browser/browser-converter.d.ts +71 -0
  29. package/src/browser/browser-converter.js +947 -0
  30. package/src/browser/file-picker.d.ts +63 -0
  31. package/src/browser/file-picker.js +275 -0
  32. package/src/browser/gguf-importer.d.ts +136 -0
  33. package/src/browser/gguf-importer.js +532 -0
  34. package/src/browser/gguf-parser-browser.d.ts +14 -0
  35. package/src/browser/gguf-parser-browser.js +17 -0
  36. package/src/browser/quantization.d.ts +69 -0
  37. package/src/browser/quantization.js +328 -0
  38. package/src/browser/safetensors-parser-browser.d.ts +193 -0
  39. package/src/browser/safetensors-parser-browser.js +264 -0
  40. package/src/browser/shard-io-browser.d.ts +57 -0
  41. package/src/browser/shard-io-browser.js +89 -0
  42. package/src/browser/tensor-source-download.d.ts +27 -0
  43. package/src/browser/tensor-source-download.js +239 -0
  44. package/src/browser/tensor-source-file.d.ts +26 -0
  45. package/src/browser/tensor-source-file.js +53 -0
  46. package/src/browser/tensor-source-http.d.ts +28 -0
  47. package/src/browser/tensor-source-http.js +126 -0
  48. package/src/client/doppler-provider/generation.d.ts +25 -0
  49. package/src/client/doppler-provider/generation.js +114 -0
  50. package/src/client/doppler-provider/index.d.ts +2 -0
  51. package/src/client/doppler-provider/index.js +3 -0
  52. package/src/client/doppler-provider/model-manager.d.ts +61 -0
  53. package/src/client/doppler-provider/model-manager.js +667 -0
  54. package/src/client/doppler-provider/provider.d.ts +5 -0
  55. package/src/client/doppler-provider/provider.js +102 -0
  56. package/src/client/doppler-provider/source-runtime.d.ts +22 -0
  57. package/src/client/doppler-provider/source-runtime.js +522 -0
  58. package/src/client/doppler-provider/types.d.ts +127 -0
  59. package/src/client/doppler-provider/types.js +17 -0
  60. package/src/client/doppler-provider.d.ts +46 -0
  61. package/src/client/doppler-provider.js +36 -0
  62. package/src/config/README.md +69 -0
  63. package/src/config/backward-registry-loader.d.ts +3 -0
  64. package/src/config/backward-registry-loader.js +8 -0
  65. package/src/config/index.d.ts +63 -0
  66. package/src/config/index.js +31 -0
  67. package/src/config/kernel-path-loader.d.ts +149 -0
  68. package/src/config/kernel-path-loader.js +534 -0
  69. package/src/config/kernels/backward-registry.json +99 -0
  70. package/src/config/kernels/kernel-ref-digests.d.ts +1 -0
  71. package/src/config/kernels/kernel-ref-digests.js +214 -0
  72. package/src/config/kernels/kernel-ref.d.ts +17 -0
  73. package/src/config/kernels/kernel-ref.js +75 -0
  74. package/src/config/kernels/moe/gpt-oss.paths.json +49 -0
  75. package/src/config/kernels/registry.d.ts +86 -0
  76. package/src/config/kernels/registry.js +103 -0
  77. package/src/config/kernels/registry.json +6771 -0
  78. package/src/config/loader.d.ts +57 -0
  79. package/src/config/loader.js +513 -0
  80. package/src/config/merge.d.ts +142 -0
  81. package/src/config/merge.js +389 -0
  82. package/src/config/param-categories.d.ts +17 -0
  83. package/src/config/param-categories.js +72 -0
  84. package/src/config/param-validator.d.ts +26 -0
  85. package/src/config/param-validator.js +235 -0
  86. package/src/config/platforms/amd-rdna3.json +16 -0
  87. package/src/config/platforms/apple-m1.json +16 -0
  88. package/src/config/platforms/apple-m2.json +16 -0
  89. package/src/config/platforms/apple-m3.json +16 -0
  90. package/src/config/platforms/generic.json +14 -0
  91. package/src/config/platforms/loader.d.ts +65 -0
  92. package/src/config/platforms/loader.js +153 -0
  93. package/src/config/platforms/nvidia-rtx30.json +16 -0
  94. package/src/config/platforms/nvidia-rtx40.json +16 -0
  95. package/src/config/presets/kernel-paths/embeddinggemma-f16-f32a.json +60 -0
  96. package/src/config/presets/kernel-paths/embeddinggemma-f32-f32a.json +60 -0
  97. package/src/config/presets/kernel-paths/embeddinggemma-q4k-dequant-f32a.json +60 -0
  98. package/src/config/presets/kernel-paths/gemma2-f16-f16a.json +61 -0
  99. package/src/config/presets/kernel-paths/gemma2-f16-f32a.json +60 -0
  100. package/src/config/presets/kernel-paths/gemma2-q4k-dequant-f16a.json +61 -0
  101. package/src/config/presets/kernel-paths/gemma2-q4k-dequant-f32a.json +60 -0
  102. package/src/config/presets/kernel-paths/gemma2-q4k-fused-f32a.json +57 -0
  103. package/src/config/presets/kernel-paths/gemma3-f16-fused-f16a-online.json +200 -0
  104. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online.json +223 -0
  105. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f16a-online.json +60 -0
  106. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-online.json +61 -0
  107. package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a.json +61 -0
  108. package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-online.json +61 -0
  109. package/src/config/presets/kernel-paths/registry.json +103 -0
  110. package/src/config/presets/models/deepseek.json +20 -0
  111. package/src/config/presets/models/diffusion.json +10 -0
  112. package/src/config/presets/models/embeddinggemma.json +74 -0
  113. package/src/config/presets/models/functiongemma.json +31 -0
  114. package/src/config/presets/models/gemma2.json +59 -0
  115. package/src/config/presets/models/gemma3.json +75 -0
  116. package/src/config/presets/models/gpt-oss.json +68 -0
  117. package/src/config/presets/models/kimi-k2.json +25 -0
  118. package/src/config/presets/models/lfm2.json +83 -0
  119. package/src/config/presets/models/llama3.json +40 -0
  120. package/src/config/presets/models/mamba.json +34 -0
  121. package/src/config/presets/models/mixtral.json +37 -0
  122. package/src/config/presets/models/modernbert.json +32 -0
  123. package/src/config/presets/models/qwen3.json +41 -0
  124. package/src/config/presets/models/transformer.json +73 -0
  125. package/src/config/presets/models/translategemma.json +30 -0
  126. package/src/config/presets/platforms/nvidia-gb200-8gpu.json +45 -0
  127. package/src/config/presets/platforms/nvidia-gb200-nvl72.json +45 -0
  128. package/src/config/presets/platforms/nvidia-gh200-nvl2.json +44 -0
  129. package/src/config/presets/platforms/nvidia-gh200.json +44 -0
  130. package/src/config/presets/runtime/compute/f16-activations.json +30 -0
  131. package/src/config/presets/runtime/compute/f16-batched.json +32 -0
  132. package/src/config/presets/runtime/default.json +101 -0
  133. package/src/config/presets/runtime/diagnostics/debug-logits.json +53 -0
  134. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +53 -0
  135. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +210 -0
  136. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +39 -0
  137. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +20 -0
  138. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +20 -0
  139. package/src/config/presets/runtime/kernels/fused-q4k.json +20 -0
  140. package/src/config/presets/runtime/kernels/safe-q4k.json +20 -0
  141. package/src/config/presets/runtime/model/gemma2-debug.json +77 -0
  142. package/src/config/presets/runtime/model/gemma2-pipeline-debug.json +66 -0
  143. package/src/config/presets/runtime/model/gemma2-pipeline.json +75 -0
  144. package/src/config/presets/runtime/model/gemma3-layer-probe.json +85 -0
  145. package/src/config/presets/runtime/modes/bench.json +37 -0
  146. package/src/config/presets/runtime/modes/debug.json +39 -0
  147. package/src/config/presets/runtime/modes/default.json +10 -0
  148. package/src/config/presets/runtime/modes/embedding-bench.json +28 -0
  149. package/src/config/presets/runtime/modes/embedding.json +54 -0
  150. package/src/config/presets/runtime/modes/low-memory.json +40 -0
  151. package/src/config/presets/runtime/modes/production.json +48 -0
  152. package/src/config/presets/runtime/modes/simulation.json +30 -0
  153. package/src/config/presets/runtime/modes/trace-layers.json +126 -0
  154. package/src/config/presets/runtime/platform/metal-apple-q4k.json +11 -0
  155. package/src/config/runtime-merge.d.ts +5 -0
  156. package/src/config/runtime-merge.js +21 -0
  157. package/src/config/runtime.d.ts +28 -0
  158. package/src/config/runtime.js +56 -0
  159. package/src/config/schema/adapter.schema.d.ts +53 -0
  160. package/src/config/schema/adapter.schema.js +60 -0
  161. package/src/config/schema/backward-registry.schema.d.ts +14 -0
  162. package/src/config/schema/backward-registry.schema.js +46 -0
  163. package/src/config/schema/benchmark.schema.d.ts +54 -0
  164. package/src/config/schema/benchmark.schema.js +74 -0
  165. package/src/config/schema/bridge.schema.d.ts +25 -0
  166. package/src/config/schema/bridge.schema.js +22 -0
  167. package/src/config/schema/buffer-pool.schema.d.ts +92 -0
  168. package/src/config/schema/buffer-pool.schema.js +50 -0
  169. package/src/config/schema/conversion.schema.d.ts +183 -0
  170. package/src/config/schema/conversion.schema.js +13 -0
  171. package/src/config/schema/converter.schema.d.ts +123 -0
  172. package/src/config/schema/converter.schema.js +136 -0
  173. package/src/config/schema/debug.schema.d.ts +245 -0
  174. package/src/config/schema/debug.schema.js +106 -0
  175. package/src/config/schema/diffusion.schema.d.ts +88 -0
  176. package/src/config/schema/diffusion.schema.js +62 -0
  177. package/src/config/schema/distill-training.schema.d.ts +48 -0
  178. package/src/config/schema/distill-training.schema.js +139 -0
  179. package/src/config/schema/distribution.schema.d.ts +155 -0
  180. package/src/config/schema/distribution.schema.js +81 -0
  181. package/src/config/schema/doppler.schema.d.ts +75 -0
  182. package/src/config/schema/doppler.schema.js +352 -0
  183. package/src/config/schema/ecosystem.schema.d.ts +255 -0
  184. package/src/config/schema/ecosystem.schema.js +534 -0
  185. package/src/config/schema/emulation.schema.d.ts +351 -0
  186. package/src/config/schema/emulation.schema.js +299 -0
  187. package/src/config/schema/energy.schema.d.ts +102 -0
  188. package/src/config/schema/energy.schema.js +72 -0
  189. package/src/config/schema/execution-v0.schema.d.ts +187 -0
  190. package/src/config/schema/execution-v0.schema.js +55 -0
  191. package/src/config/schema/gpu-cache.schema.d.ts +26 -0
  192. package/src/config/schema/gpu-cache.schema.js +8 -0
  193. package/src/config/schema/harness.schema.d.ts +32 -0
  194. package/src/config/schema/harness.schema.js +20 -0
  195. package/src/config/schema/hotswap.schema.d.ts +55 -0
  196. package/src/config/schema/hotswap.schema.js +18 -0
  197. package/src/config/schema/index.d.ts +863 -0
  198. package/src/config/schema/index.js +471 -0
  199. package/src/config/schema/inference-defaults.schema.d.ts +276 -0
  200. package/src/config/schema/inference-defaults.schema.js +185 -0
  201. package/src/config/schema/inference.schema.d.ts +289 -0
  202. package/src/config/schema/inference.schema.js +39 -0
  203. package/src/config/schema/intent-bundle.schema.d.ts +28 -0
  204. package/src/config/schema/intent-bundle.schema.js +12 -0
  205. package/src/config/schema/kernel-path.schema.d.ts +173 -0
  206. package/src/config/schema/kernel-path.schema.js +9 -0
  207. package/src/config/schema/kernel-registry.schema.d.ts +199 -0
  208. package/src/config/schema/kernel-registry.schema.js +46 -0
  209. package/src/config/schema/kernel-thresholds.schema.d.ts +302 -0
  210. package/src/config/schema/kernel-thresholds.schema.js +187 -0
  211. package/src/config/schema/kernel-warmup.schema.d.ts +19 -0
  212. package/src/config/schema/kernel-warmup.schema.js +5 -0
  213. package/src/config/schema/kvcache.schema.d.ts +131 -0
  214. package/src/config/schema/kvcache.schema.js +31 -0
  215. package/src/config/schema/loading.schema.d.ts +153 -0
  216. package/src/config/schema/loading.schema.js +84 -0
  217. package/src/config/schema/lora.schema.d.ts +12 -0
  218. package/src/config/schema/lora.schema.js +12 -0
  219. package/src/config/schema/manifest.schema.d.ts +500 -0
  220. package/src/config/schema/manifest.schema.js +130 -0
  221. package/src/config/schema/memory-limits.schema.d.ts +107 -0
  222. package/src/config/schema/memory-limits.schema.js +57 -0
  223. package/src/config/schema/moe.schema.d.ts +78 -0
  224. package/src/config/schema/moe.schema.js +31 -0
  225. package/src/config/schema/platform.schema.d.ts +121 -0
  226. package/src/config/schema/platform.schema.js +1 -0
  227. package/src/config/schema/preset.schema.d.ts +124 -0
  228. package/src/config/schema/preset.schema.js +1 -0
  229. package/src/config/schema/quantization-defaults.schema.d.ts +34 -0
  230. package/src/config/schema/quantization-defaults.schema.js +5 -0
  231. package/src/config/schema/quantization.schema.d.ts +10 -0
  232. package/src/config/schema/quantization.schema.js +33 -0
  233. package/src/config/schema/shared-runtime.schema.d.ts +75 -0
  234. package/src/config/schema/shared-runtime.schema.js +45 -0
  235. package/src/config/schema/speculative.schema.d.ts +21 -0
  236. package/src/config/schema/speculative.schema.js +11 -0
  237. package/src/config/schema/storage.schema.d.ts +123 -0
  238. package/src/config/schema/storage.schema.js +66 -0
  239. package/src/config/schema/tooling.schema.d.ts +29 -0
  240. package/src/config/schema/tooling.schema.js +12 -0
  241. package/src/config/schema/training-metrics.schema.d.ts +89 -0
  242. package/src/config/schema/training-metrics.schema.js +374 -0
  243. package/src/config/schema/training.schema.d.ts +88 -0
  244. package/src/config/schema/training.schema.js +106 -0
  245. package/src/config/schema/tuner.schema.d.ts +39 -0
  246. package/src/config/schema/tuner.schema.js +13 -0
  247. package/src/config/schema/ul-training.schema.d.ts +61 -0
  248. package/src/config/schema/ul-training.schema.js +140 -0
  249. package/src/config/schema/units.schema.d.ts +27 -0
  250. package/src/config/schema/units.schema.js +26 -0
  251. package/src/config/training-defaults.d.ts +24 -0
  252. package/src/config/training-defaults.js +91 -0
  253. package/src/converter/conversion-plan.d.ts +64 -0
  254. package/src/converter/conversion-plan.js +472 -0
  255. package/src/converter/core.d.ts +247 -0
  256. package/src/converter/core.js +1329 -0
  257. package/src/converter/execution-v0-manifest.d.ts +15 -0
  258. package/src/converter/execution-v0-manifest.js +146 -0
  259. package/src/converter/index.d.ts +98 -0
  260. package/src/converter/index.js +59 -0
  261. package/src/converter/manifest-inference.d.ts +20 -0
  262. package/src/converter/manifest-inference.js +492 -0
  263. package/src/converter/parsers/diffusion.d.ts +50 -0
  264. package/src/converter/parsers/diffusion.js +270 -0
  265. package/src/converter/parsers/gguf.d.ts +22 -0
  266. package/src/converter/parsers/gguf.js +46 -0
  267. package/src/converter/parsers/index.d.ts +21 -0
  268. package/src/converter/parsers/index.js +12 -0
  269. package/src/converter/parsers/transformer.d.ts +16 -0
  270. package/src/converter/parsers/transformer.js +25 -0
  271. package/src/converter/quantization-info.d.ts +37 -0
  272. package/src/converter/quantization-info.js +398 -0
  273. package/src/converter/quantizer.d.ts +96 -0
  274. package/src/converter/quantizer.js +422 -0
  275. package/src/converter/rope-config.d.ts +15 -0
  276. package/src/converter/rope-config.js +218 -0
  277. package/src/converter/shard-packer.d.ts +138 -0
  278. package/src/converter/shard-packer.js +422 -0
  279. package/src/converter/tokenizer-utils.d.ts +11 -0
  280. package/src/converter/tokenizer-utils.js +87 -0
  281. package/src/debug/config.d.ts +78 -0
  282. package/src/debug/config.js +235 -0
  283. package/src/debug/history.d.ts +65 -0
  284. package/src/debug/history.js +71 -0
  285. package/src/debug/index.d.ts +268 -0
  286. package/src/debug/index.js +192 -0
  287. package/src/debug/log.d.ts +46 -0
  288. package/src/debug/log.js +132 -0
  289. package/src/debug/perf.d.ts +33 -0
  290. package/src/debug/perf.js +51 -0
  291. package/src/debug/reference/README.md +114 -0
  292. package/src/debug/reference/hf_attn_debug.py +114 -0
  293. package/src/debug/reference/hf_embed_check.py +89 -0
  294. package/src/debug/reference/hf_layer_out.py +100 -0
  295. package/src/debug/reference/hf_rope_check.py +116 -0
  296. package/src/debug/reference/hf_weights.py +75 -0
  297. package/src/debug/signals.d.ts +63 -0
  298. package/src/debug/signals.js +33 -0
  299. package/src/debug/stats.d.ts +47 -0
  300. package/src/debug/stats.js +160 -0
  301. package/src/debug/tensor.d.ts +123 -0
  302. package/src/debug/tensor.js +257 -0
  303. package/src/debug/trace.d.ts +17 -0
  304. package/src/debug/trace.js +167 -0
  305. package/src/diffusion/image-regression.d.ts +31 -0
  306. package/src/diffusion/image-regression.js +107 -0
  307. package/src/diffusion/index.d.ts +8 -0
  308. package/src/diffusion/index.js +8 -0
  309. package/src/distribution/p2p-control-plane.d.ts +52 -0
  310. package/src/distribution/p2p-control-plane.js +232 -0
  311. package/src/distribution/p2p-observability.d.ts +116 -0
  312. package/src/distribution/p2p-observability.js +267 -0
  313. package/src/distribution/p2p-transport-contract.d.ts +57 -0
  314. package/src/distribution/p2p-transport-contract.js +310 -0
  315. package/src/distribution/p2p-webrtc-browser.d.ts +37 -0
  316. package/src/distribution/p2p-webrtc-browser.js +434 -0
  317. package/src/distribution/shard-delivery.d.ts +251 -0
  318. package/src/distribution/shard-delivery.js +2096 -0
  319. package/src/energy/index.d.ts +2 -0
  320. package/src/energy/index.js +2 -0
  321. package/src/errors/doppler-error.d.ts +21 -0
  322. package/src/errors/doppler-error.js +25 -0
  323. package/src/errors/index.d.ts +1 -0
  324. package/src/errors/index.js +1 -0
  325. package/src/formats/gguf/index.d.ts +8 -0
  326. package/src/formats/gguf/index.js +4 -0
  327. package/src/formats/gguf/types.d.ts +137 -0
  328. package/src/formats/gguf/types.js +443 -0
  329. package/src/formats/index.d.ts +51 -0
  330. package/src/formats/index.js +13 -0
  331. package/src/formats/rdrr/classification.d.ts +39 -0
  332. package/src/formats/rdrr/classification.js +275 -0
  333. package/src/formats/rdrr/groups.d.ts +27 -0
  334. package/src/formats/rdrr/groups.js +76 -0
  335. package/src/formats/rdrr/index.d.ts +25 -0
  336. package/src/formats/rdrr/index.js +19 -0
  337. package/src/formats/rdrr/manifest.d.ts +32 -0
  338. package/src/formats/rdrr/manifest.js +108 -0
  339. package/src/formats/rdrr/parsing.d.ts +23 -0
  340. package/src/formats/rdrr/parsing.js +101 -0
  341. package/src/formats/rdrr/tensor-config-validator.d.ts +42 -0
  342. package/src/formats/rdrr/tensor-config-validator.js +156 -0
  343. package/src/formats/rdrr/types.d.ts +200 -0
  344. package/src/formats/rdrr/types.js +16 -0
  345. package/src/formats/rdrr/validation.d.ts +9 -0
  346. package/src/formats/rdrr/validation.js +200 -0
  347. package/src/formats/safetensors/index.d.ts +8 -0
  348. package/src/formats/safetensors/index.js +4 -0
  349. package/src/formats/safetensors/types.d.ts +67 -0
  350. package/src/formats/safetensors/types.js +102 -0
  351. package/src/formats/tokenizer/index.d.ts +5 -0
  352. package/src/formats/tokenizer/index.js +3 -0
  353. package/src/formats/tokenizer/types.d.ts +9 -0
  354. package/src/formats/tokenizer/types.js +22 -0
  355. package/src/generation/index.d.ts +18 -0
  356. package/src/generation/index.js +12 -0
  357. package/src/gpu/command-recorder.d.ts +175 -0
  358. package/src/gpu/command-recorder.js +473 -0
  359. package/src/gpu/device.d.ts +141 -0
  360. package/src/gpu/device.js +350 -0
  361. package/src/gpu/kernel-runtime.d.ts +20 -0
  362. package/src/gpu/kernel-runtime.js +37 -0
  363. package/src/gpu/kernel-selection-cache.d.ts +13 -0
  364. package/src/gpu/kernel-selection-cache.js +13 -0
  365. package/src/gpu/kernel-selection-log.d.ts +12 -0
  366. package/src/gpu/kernel-selection-log.js +28 -0
  367. package/src/gpu/kernel-selector.d.ts +11 -0
  368. package/src/gpu/kernel-selector.js +10 -0
  369. package/src/gpu/kernel-tuner/benchmarks.d.ts +144 -0
  370. package/src/gpu/kernel-tuner/benchmarks.js +892 -0
  371. package/src/gpu/kernel-tuner/cache.d.ts +55 -0
  372. package/src/gpu/kernel-tuner/cache.js +66 -0
  373. package/src/gpu/kernel-tuner/index.d.ts +59 -0
  374. package/src/gpu/kernel-tuner/index.js +38 -0
  375. package/src/gpu/kernel-tuner/tuner.d.ts +82 -0
  376. package/src/gpu/kernel-tuner/tuner.js +229 -0
  377. package/src/gpu/kernel-tuner/types.d.ts +101 -0
  378. package/src/gpu/kernel-tuner/types.js +4 -0
  379. package/src/gpu/kernel-tuner.d.ts +33 -0
  380. package/src/gpu/kernel-tuner.js +12 -0
  381. package/src/gpu/kernels/README.md +127 -0
  382. package/src/gpu/kernels/attention.d.ts +236 -0
  383. package/src/gpu/kernels/attention.js +1359 -0
  384. package/src/gpu/kernels/attention.wgsl +249 -0
  385. package/src/gpu/kernels/attention_bdpa_decode_f16.wgsl +246 -0
  386. package/src/gpu/kernels/attention_decode.wgsl +233 -0
  387. package/src/gpu/kernels/attention_decode_chunked_f16.wgsl +183 -0
  388. package/src/gpu/kernels/attention_decode_chunked_f16kv.wgsl +208 -0
  389. package/src/gpu/kernels/attention_decode_f16.wgsl +202 -0
  390. package/src/gpu/kernels/attention_decode_f16kv.wgsl +224 -0
  391. package/src/gpu/kernels/attention_decode_online_f16.wgsl +223 -0
  392. package/src/gpu/kernels/attention_decode_online_f16kv.wgsl +225 -0
  393. package/src/gpu/kernels/attention_decode_optimized.wgsl +445 -0
  394. package/src/gpu/kernels/attention_decode_paged_f16.wgsl +172 -0
  395. package/src/gpu/kernels/attention_decode_paged_f16kv.wgsl +174 -0
  396. package/src/gpu/kernels/attention_decode_subgroup.wgsl +233 -0
  397. package/src/gpu/kernels/attention_decode_tiered_f16.wgsl +218 -0
  398. package/src/gpu/kernels/attention_decode_tiered_f16kv.wgsl +220 -0
  399. package/src/gpu/kernels/attention_decode_tiered_int4_f16kv.wgsl +242 -0
  400. package/src/gpu/kernels/attention_decode_tiered_int8_f16kv.wgsl +242 -0
  401. package/src/gpu/kernels/attention_f16.wgsl +214 -0
  402. package/src/gpu/kernels/attention_f16kv.wgsl +242 -0
  403. package/src/gpu/kernels/attention_small.wgsl +260 -0
  404. package/src/gpu/kernels/attention_small_f16.wgsl +240 -0
  405. package/src/gpu/kernels/attention_small_f16kv.wgsl +266 -0
  406. package/src/gpu/kernels/attention_streaming.wgsl +149 -0
  407. package/src/gpu/kernels/attention_streaming_f16.wgsl +147 -0
  408. package/src/gpu/kernels/attention_streaming_f16kv.wgsl +151 -0
  409. package/src/gpu/kernels/backward/adam.d.ts +28 -0
  410. package/src/gpu/kernels/backward/adam.js +199 -0
  411. package/src/gpu/kernels/backward/adam.wgsl +50 -0
  412. package/src/gpu/kernels/backward/attention_backward.d.ts +22 -0
  413. package/src/gpu/kernels/backward/attention_backward.js +276 -0
  414. package/src/gpu/kernels/backward/attention_backward.wgsl +49 -0
  415. package/src/gpu/kernels/backward/bias_add_backward.d.ts +17 -0
  416. package/src/gpu/kernels/backward/bias_add_backward.js +24 -0
  417. package/src/gpu/kernels/backward/bias_add_backward.wgsl +33 -0
  418. package/src/gpu/kernels/backward/conv2d_backward.d.ts +31 -0
  419. package/src/gpu/kernels/backward/conv2d_backward.js +135 -0
  420. package/src/gpu/kernels/backward/conv2d_backward_input.wgsl +83 -0
  421. package/src/gpu/kernels/backward/conv2d_backward_weight.wgsl +70 -0
  422. package/src/gpu/kernels/backward/cross_entropy_backward.d.ts +23 -0
  423. package/src/gpu/kernels/backward/cross_entropy_backward.js +29 -0
  424. package/src/gpu/kernels/backward/cross_entropy_backward.wgsl +39 -0
  425. package/src/gpu/kernels/backward/embed_backward.d.ts +29 -0
  426. package/src/gpu/kernels/backward/embed_backward.js +118 -0
  427. package/src/gpu/kernels/backward/embed_backward.wgsl +73 -0
  428. package/src/gpu/kernels/backward/gelu_backward.d.ts +16 -0
  429. package/src/gpu/kernels/backward/gelu_backward.js +39 -0
  430. package/src/gpu/kernels/backward/gelu_backward.wgsl +38 -0
  431. package/src/gpu/kernels/backward/groupnorm_backward.d.ts +24 -0
  432. package/src/gpu/kernels/backward/groupnorm_backward.js +29 -0
  433. package/src/gpu/kernels/backward/groupnorm_backward.wgsl +143 -0
  434. package/src/gpu/kernels/backward/index.d.ts +17 -0
  435. package/src/gpu/kernels/backward/index.js +23 -0
  436. package/src/gpu/kernels/backward/layernorm_backward.d.ts +22 -0
  437. package/src/gpu/kernels/backward/layernorm_backward.js +135 -0
  438. package/src/gpu/kernels/backward/layernorm_backward.wgsl +194 -0
  439. package/src/gpu/kernels/backward/matmul_backward.d.ts +32 -0
  440. package/src/gpu/kernels/backward/matmul_backward.js +124 -0
  441. package/src/gpu/kernels/backward/matmul_backward.wgsl +90 -0
  442. package/src/gpu/kernels/backward/matmul_transpose_a.wgsl +84 -0
  443. package/src/gpu/kernels/backward/pixel_shuffle_backward.d.ts +22 -0
  444. package/src/gpu/kernels/backward/pixel_shuffle_backward.js +30 -0
  445. package/src/gpu/kernels/backward/pixel_shuffle_backward.wgsl +54 -0
  446. package/src/gpu/kernels/backward/rmsnorm_backward.d.ts +24 -0
  447. package/src/gpu/kernels/backward/rmsnorm_backward.js +101 -0
  448. package/src/gpu/kernels/backward/rmsnorm_backward.wgsl +78 -0
  449. package/src/gpu/kernels/backward/rope_backward.d.ts +25 -0
  450. package/src/gpu/kernels/backward/rope_backward.js +109 -0
  451. package/src/gpu/kernels/backward/rope_backward.wgsl +59 -0
  452. package/src/gpu/kernels/backward/scale_backward.d.ts +16 -0
  453. package/src/gpu/kernels/backward/scale_backward.js +84 -0
  454. package/src/gpu/kernels/backward/scale_backward.wgsl +27 -0
  455. package/src/gpu/kernels/backward/silu_backward.d.ts +16 -0
  456. package/src/gpu/kernels/backward/silu_backward.js +39 -0
  457. package/src/gpu/kernels/backward/silu_backward.wgsl +31 -0
  458. package/src/gpu/kernels/backward/softmax_backward.d.ts +16 -0
  459. package/src/gpu/kernels/backward/softmax_backward.js +43 -0
  460. package/src/gpu/kernels/backward/softmax_backward.wgsl +44 -0
  461. package/src/gpu/kernels/backward/upsample2d_backward.d.ts +21 -0
  462. package/src/gpu/kernels/backward/upsample2d_backward.js +30 -0
  463. package/src/gpu/kernels/backward/upsample2d_backward.wgsl +59 -0
  464. package/src/gpu/kernels/backward/utils.d.ts +45 -0
  465. package/src/gpu/kernels/backward/utils.js +371 -0
  466. package/src/gpu/kernels/bf16_to_f16.wgsl +54 -0
  467. package/src/gpu/kernels/bf16_to_f32.wgsl +70 -0
  468. package/src/gpu/kernels/bias_add.wgsl +40 -0
  469. package/src/gpu/kernels/bias_add_f16.wgsl +44 -0
  470. package/src/gpu/kernels/cast.d.ts +67 -0
  471. package/src/gpu/kernels/cast.js +422 -0
  472. package/src/gpu/kernels/cast_f16_to_f32.wgsl +31 -0
  473. package/src/gpu/kernels/cast_f32_to_f16.wgsl +36 -0
  474. package/src/gpu/kernels/check-finiteness.d.ts +15 -0
  475. package/src/gpu/kernels/check-finiteness.js +149 -0
  476. package/src/gpu/kernels/check-stop.d.ts +31 -0
  477. package/src/gpu/kernels/check-stop.js +181 -0
  478. package/src/gpu/kernels/clamp.d.ts +22 -0
  479. package/src/gpu/kernels/clamp.js +42 -0
  480. package/src/gpu/kernels/clamp.wgsl +24 -0
  481. package/src/gpu/kernels/constants.d.ts +168 -0
  482. package/src/gpu/kernels/constants.js +129 -0
  483. package/src/gpu/kernels/conv2d.d.ts +34 -0
  484. package/src/gpu/kernels/conv2d.js +81 -0
  485. package/src/gpu/kernels/conv2d.wgsl +71 -0
  486. package/src/gpu/kernels/conv2d_f16.wgsl +73 -0
  487. package/src/gpu/kernels/cross_entropy_loss.d.ts +21 -0
  488. package/src/gpu/kernels/cross_entropy_loss.js +54 -0
  489. package/src/gpu/kernels/cross_entropy_loss.wgsl +39 -0
  490. package/src/gpu/kernels/dequant.d.ts +108 -0
  491. package/src/gpu/kernels/dequant.js +524 -0
  492. package/src/gpu/kernels/dequant_f16_out.wgsl +151 -0
  493. package/src/gpu/kernels/dequant_f16_out_vec4.wgsl +149 -0
  494. package/src/gpu/kernels/dequant_f16_rowwise.wgsl +139 -0
  495. package/src/gpu/kernels/dequant_f32_rowwise.wgsl +133 -0
  496. package/src/gpu/kernels/dequant_mxfp4.wgsl +120 -0
  497. package/src/gpu/kernels/dequant_mxfp4_expert.wgsl +129 -0
  498. package/src/gpu/kernels/dequant_mxfp4_expert_f16.wgsl +105 -0
  499. package/src/gpu/kernels/dequant_mxfp4_vec4.wgsl +116 -0
  500. package/src/gpu/kernels/dequant_q6k.wgsl +140 -0
  501. package/src/gpu/kernels/dequant_q8_0.wgsl +98 -0
  502. package/src/gpu/kernels/dequant_shared.wgsl +202 -0
  503. package/src/gpu/kernels/dequant_shared_vec4.wgsl +153 -0
  504. package/src/gpu/kernels/dequant_subgroup.wgsl +202 -0
  505. package/src/gpu/kernels/dispatch.d.ts +157 -0
  506. package/src/gpu/kernels/dispatch.js +235 -0
  507. package/src/gpu/kernels/energy.d.ts +131 -0
  508. package/src/gpu/kernels/energy.js +425 -0
  509. package/src/gpu/kernels/energy_eval.wgsl +26 -0
  510. package/src/gpu/kernels/energy_eval_f16.wgsl +30 -0
  511. package/src/gpu/kernels/energy_quintel_grad.wgsl +92 -0
  512. package/src/gpu/kernels/energy_quintel_grad_f16.wgsl +96 -0
  513. package/src/gpu/kernels/energy_quintel_reduce.wgsl +112 -0
  514. package/src/gpu/kernels/energy_quintel_reduce_f16.wgsl +116 -0
  515. package/src/gpu/kernels/energy_quintel_update.wgsl +92 -0
  516. package/src/gpu/kernels/energy_quintel_update_f16.wgsl +96 -0
  517. package/src/gpu/kernels/energy_update.wgsl +25 -0
  518. package/src/gpu/kernels/energy_update_f16.wgsl +30 -0
  519. package/src/gpu/kernels/feature-check.d.ts +42 -0
  520. package/src/gpu/kernels/feature-check.js +70 -0
  521. package/src/gpu/kernels/fused_ffn.d.ts +65 -0
  522. package/src/gpu/kernels/fused_ffn.js +318 -0
  523. package/src/gpu/kernels/fused_ffn.wgsl +420 -0
  524. package/src/gpu/kernels/fused_ffn_f16.wgsl +213 -0
  525. package/src/gpu/kernels/fused_ffn_q4k.wgsl +375 -0
  526. package/src/gpu/kernels/fused_matmul_q4.wgsl +404 -0
  527. package/src/gpu/kernels/fused_matmul_q4_batched.wgsl +194 -0
  528. package/src/gpu/kernels/fused_matmul_q4_batched_f16.wgsl +170 -0
  529. package/src/gpu/kernels/fused_matmul_q4_batched_f16a.wgsl +154 -0
  530. package/src/gpu/kernels/fused_matmul_q4_f16a.wgsl +219 -0
  531. package/src/gpu/kernels/fused_matmul_q4_multicol_f16.wgsl +216 -0
  532. package/src/gpu/kernels/fused_matmul_q4_multicol_f16a.wgsl +204 -0
  533. package/src/gpu/kernels/fused_matmul_residual.d.ts +46 -0
  534. package/src/gpu/kernels/fused_matmul_residual.js +152 -0
  535. package/src/gpu/kernels/fused_matmul_rmsnorm.d.ts +64 -0
  536. package/src/gpu/kernels/fused_matmul_rmsnorm.js +273 -0
  537. package/src/gpu/kernels/fused_matmul_rmsnorm.wgsl +324 -0
  538. package/src/gpu/kernels/fused_matmul_rmsnorm_f16.wgsl +303 -0
  539. package/src/gpu/kernels/fused_swiglu.wgsl +63 -0
  540. package/src/gpu/kernels/fused_swiglu_f16.wgsl +57 -0
  541. package/src/gpu/kernels/gather.d.ts +64 -0
  542. package/src/gpu/kernels/gather.js +119 -0
  543. package/src/gpu/kernels/gather.wgsl +61 -0
  544. package/src/gpu/kernels/gather_f16.wgsl +65 -0
  545. package/src/gpu/kernels/gather_f16_f16_out.wgsl +55 -0
  546. package/src/gpu/kernels/gather_f16_out.wgsl +55 -0
  547. package/src/gpu/kernels/gather_f16_vec4.wgsl +76 -0
  548. package/src/gpu/kernels/gather_f16_vec4_f16_out.wgsl +68 -0
  549. package/src/gpu/kernels/gather_vec4.wgsl +74 -0
  550. package/src/gpu/kernels/gather_vec4_f16_out.wgsl +68 -0
  551. package/src/gpu/kernels/gelu.d.ts +33 -0
  552. package/src/gpu/kernels/gelu.js +47 -0
  553. package/src/gpu/kernels/gelu.wgsl +64 -0
  554. package/src/gpu/kernels/gelu_f16.wgsl +66 -0
  555. package/src/gpu/kernels/gptoss_mxfp4_expert_fused.wgsl +127 -0
  556. package/src/gpu/kernels/gptoss_router_topk.wgsl +119 -0
  557. package/src/gpu/kernels/groupnorm.d.ts +31 -0
  558. package/src/gpu/kernels/groupnorm.js +91 -0
  559. package/src/gpu/kernels/groupnorm_apply.wgsl +41 -0
  560. package/src/gpu/kernels/groupnorm_apply_f16.wgsl +46 -0
  561. package/src/gpu/kernels/groupnorm_stats.wgsl +76 -0
  562. package/src/gpu/kernels/groupnorm_stats_f16.wgsl +79 -0
  563. package/src/gpu/kernels/index.d.ts +336 -0
  564. package/src/gpu/kernels/index.js +284 -0
  565. package/src/gpu/kernels/kernel-base.d.ts +33 -0
  566. package/src/gpu/kernels/kernel-base.js +46 -0
  567. package/src/gpu/kernels/kernel-configs.d.ts +65 -0
  568. package/src/gpu/kernels/kernel-configs.js +50 -0
  569. package/src/gpu/kernels/kernel-tuning.d.ts +42 -0
  570. package/src/gpu/kernels/kernel-tuning.js +149 -0
  571. package/src/gpu/kernels/kv-quantize.d.ts +37 -0
  572. package/src/gpu/kernels/kv-quantize.js +138 -0
  573. package/src/gpu/kernels/kv_quantize_int4.wgsl +119 -0
  574. package/src/gpu/kernels/kv_quantize_int8.wgsl +119 -0
  575. package/src/gpu/kernels/layernorm.d.ts +37 -0
  576. package/src/gpu/kernels/layernorm.js +80 -0
  577. package/src/gpu/kernels/layernorm.wgsl +121 -0
  578. package/src/gpu/kernels/layernorm_f16.wgsl +103 -0
  579. package/src/gpu/kernels/linear-attention-core.d.ts +39 -0
  580. package/src/gpu/kernels/linear-attention-core.js +535 -0
  581. package/src/gpu/kernels/logit-merge.d.ts +110 -0
  582. package/src/gpu/kernels/logit-merge.js +392 -0
  583. package/src/gpu/kernels/matmul-dispatch.d.ts +38 -0
  584. package/src/gpu/kernels/matmul-dispatch.js +155 -0
  585. package/src/gpu/kernels/matmul-selection.d.ts +87 -0
  586. package/src/gpu/kernels/matmul-selection.js +474 -0
  587. package/src/gpu/kernels/matmul.d.ts +109 -0
  588. package/src/gpu/kernels/matmul.js +271 -0
  589. package/src/gpu/kernels/matmul_f16.wgsl +170 -0
  590. package/src/gpu/kernels/matmul_f16_tiled.wgsl +165 -0
  591. package/src/gpu/kernels/matmul_f16w_f32a.wgsl +89 -0
  592. package/src/gpu/kernels/matmul_f16w_f32a_tiled.wgsl +154 -0
  593. package/src/gpu/kernels/matmul_f32.wgsl +100 -0
  594. package/src/gpu/kernels/matmul_gemv.wgsl +80 -0
  595. package/src/gpu/kernels/matmul_gemv_f16a.wgsl +81 -0
  596. package/src/gpu/kernels/matmul_gemv_residual.wgsl +119 -0
  597. package/src/gpu/kernels/matmul_gemv_residual_f16.wgsl +78 -0
  598. package/src/gpu/kernels/matmul_gemv_subgroup.wgsl +345 -0
  599. package/src/gpu/kernels/matmul_gemv_subgroup_f16a.wgsl +514 -0
  600. package/src/gpu/kernels/modulate.d.ts +29 -0
  601. package/src/gpu/kernels/modulate.js +49 -0
  602. package/src/gpu/kernels/modulate.wgsl +40 -0
  603. package/src/gpu/kernels/modulate_f16.wgsl +43 -0
  604. package/src/gpu/kernels/moe.d.ts +164 -0
  605. package/src/gpu/kernels/moe.js +496 -0
  606. package/src/gpu/kernels/moe_gather.wgsl +170 -0
  607. package/src/gpu/kernels/moe_gather_f16.wgsl +82 -0
  608. package/src/gpu/kernels/moe_gather_vec4.wgsl +74 -0
  609. package/src/gpu/kernels/moe_offsets.wgsl +48 -0
  610. package/src/gpu/kernels/pipeline-cache.d.ts +88 -0
  611. package/src/gpu/kernels/pipeline-cache.js +305 -0
  612. package/src/gpu/kernels/pixel_shuffle.d.ts +27 -0
  613. package/src/gpu/kernels/pixel_shuffle.js +49 -0
  614. package/src/gpu/kernels/pixel_shuffle.wgsl +44 -0
  615. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +47 -0
  616. package/src/gpu/kernels/residual.d.ts +74 -0
  617. package/src/gpu/kernels/residual.js +127 -0
  618. package/src/gpu/kernels/residual.wgsl +53 -0
  619. package/src/gpu/kernels/residual_f16.wgsl +35 -0
  620. package/src/gpu/kernels/residual_f16_vec4.wgsl +47 -0
  621. package/src/gpu/kernels/residual_vec4.wgsl +46 -0
  622. package/src/gpu/kernels/rmsnorm.d.ts +53 -0
  623. package/src/gpu/kernels/rmsnorm.js +140 -0
  624. package/src/gpu/kernels/rmsnorm.wgsl +417 -0
  625. package/src/gpu/kernels/rmsnorm_f16.wgsl +164 -0
  626. package/src/gpu/kernels/rope.d.ts +48 -0
  627. package/src/gpu/kernels/rope.js +53 -0
  628. package/src/gpu/kernels/rope.wgsl +328 -0
  629. package/src/gpu/kernels/rope_f16.wgsl +271 -0
  630. package/src/gpu/kernels/rule-matcher.d.ts +30 -0
  631. package/src/gpu/kernels/rule-matcher.js +42 -0
  632. package/src/gpu/kernels/rule-registry.d.ts +7 -0
  633. package/src/gpu/kernels/rule-registry.js +41 -0
  634. package/src/gpu/kernels/sample.d.ts +75 -0
  635. package/src/gpu/kernels/sample.js +578 -0
  636. package/src/gpu/kernels/sample.wgsl +377 -0
  637. package/src/gpu/kernels/sample_f16.wgsl +331 -0
  638. package/src/gpu/kernels/scale.d.ts +35 -0
  639. package/src/gpu/kernels/scale.js +37 -0
  640. package/src/gpu/kernels/scale.wgsl +38 -0
  641. package/src/gpu/kernels/scatter_add.wgsl +88 -0
  642. package/src/gpu/kernels/scatter_add_dynamic.wgsl +59 -0
  643. package/src/gpu/kernels/scatter_add_dynamic_f16.wgsl +52 -0
  644. package/src/gpu/kernels/scatter_add_dynamic_f16_weights.wgsl +50 -0
  645. package/src/gpu/kernels/scatter_add_vec4.wgsl +70 -0
  646. package/src/gpu/kernels/shader-cache.d.ts +56 -0
  647. package/src/gpu/kernels/shader-cache.js +206 -0
  648. package/src/gpu/kernels/silu.d.ts +75 -0
  649. package/src/gpu/kernels/silu.js +340 -0
  650. package/src/gpu/kernels/silu.wgsl +99 -0
  651. package/src/gpu/kernels/silu_f16.wgsl +98 -0
  652. package/src/gpu/kernels/softmax.d.ts +57 -0
  653. package/src/gpu/kernels/softmax.js +106 -0
  654. package/src/gpu/kernels/softmax.wgsl +388 -0
  655. package/src/gpu/kernels/softmax_subgroup.wgsl +175 -0
  656. package/src/gpu/kernels/split_qkv.d.ts +51 -0
  657. package/src/gpu/kernels/split_qkv.js +41 -0
  658. package/src/gpu/kernels/split_qkv.wgsl +71 -0
  659. package/src/gpu/kernels/split_qkv_f16.wgsl +75 -0
  660. package/src/gpu/kernels/topk.wgsl +243 -0
  661. package/src/gpu/kernels/topk_f16.wgsl +108 -0
  662. package/src/gpu/kernels/topk_f16_weights.wgsl +101 -0
  663. package/src/gpu/kernels/transpose.d.ts +21 -0
  664. package/src/gpu/kernels/transpose.js +30 -0
  665. package/src/gpu/kernels/transpose.wgsl +32 -0
  666. package/src/gpu/kernels/types.d.ts +21 -0
  667. package/src/gpu/kernels/types.js +4 -0
  668. package/src/gpu/kernels/uniform-utils.d.ts +48 -0
  669. package/src/gpu/kernels/uniform-utils.js +94 -0
  670. package/src/gpu/kernels/upsample2d.d.ts +25 -0
  671. package/src/gpu/kernels/upsample2d.js +58 -0
  672. package/src/gpu/kernels/upsample2d.wgsl +37 -0
  673. package/src/gpu/kernels/upsample2d_f16.wgsl +41 -0
  674. package/src/gpu/kernels/utils.d.ts +106 -0
  675. package/src/gpu/kernels/utils.js +224 -0
  676. package/src/gpu/multi-model-recorder.d.ts +21 -0
  677. package/src/gpu/multi-model-recorder.js +31 -0
  678. package/src/gpu/partitioned-buffer-pool.d.ts +28 -0
  679. package/src/gpu/partitioned-buffer-pool.js +49 -0
  680. package/src/gpu/perf-guards.d.ts +25 -0
  681. package/src/gpu/perf-guards.js +140 -0
  682. package/src/gpu/profiler.d.ts +114 -0
  683. package/src/gpu/profiler.js +391 -0
  684. package/src/gpu/submit-tracker.d.ts +111 -0
  685. package/src/gpu/submit-tracker.js +229 -0
  686. package/src/gpu/tensor.d.ts +69 -0
  687. package/src/gpu/tensor.js +75 -0
  688. package/src/gpu/uniform-cache.d.ts +108 -0
  689. package/src/gpu/uniform-cache.js +242 -0
  690. package/src/gpu/weight-buffer.d.ts +115 -0
  691. package/src/gpu/weight-buffer.js +118 -0
  692. package/src/hotswap/intent-bundle.d.ts +37 -0
  693. package/src/hotswap/intent-bundle.js +123 -0
  694. package/src/hotswap/manifest.d.ts +33 -0
  695. package/src/hotswap/manifest.js +114 -0
  696. package/src/hotswap/runtime.d.ts +31 -0
  697. package/src/hotswap/runtime.js +128 -0
  698. package/src/index-browser.d.ts +47 -0
  699. package/src/index-browser.js +53 -0
  700. package/src/index-internal.d.ts +2 -0
  701. package/src/index-internal.js +2 -0
  702. package/src/index.d.ts +102 -0
  703. package/src/index.js +75 -0
  704. package/src/inference/README.md +593 -0
  705. package/src/inference/browser-harness.d.ts +234 -0
  706. package/src/inference/browser-harness.js +2665 -0
  707. package/src/inference/decode-buffers.d.ts +108 -0
  708. package/src/inference/decode-buffers.js +181 -0
  709. package/src/inference/decode-ring.d.ts +52 -0
  710. package/src/inference/decode-ring.js +273 -0
  711. package/src/inference/expert-router.d.ts +27 -0
  712. package/src/inference/expert-router.js +55 -0
  713. package/src/inference/functiongemma.d.ts +15 -0
  714. package/src/inference/functiongemma.js +1 -0
  715. package/src/inference/kv-cache/base.d.ts +150 -0
  716. package/src/inference/kv-cache/base.js +1037 -0
  717. package/src/inference/kv-cache/basis-decomposed-paged.d.ts +50 -0
  718. package/src/inference/kv-cache/basis-decomposed-paged.js +276 -0
  719. package/src/inference/kv-cache/index.d.ts +35 -0
  720. package/src/inference/kv-cache/index.js +20 -0
  721. package/src/inference/kv-cache/sliding-window.d.ts +72 -0
  722. package/src/inference/kv-cache/sliding-window.js +243 -0
  723. package/src/inference/kv-cache/tiered.d.ts +89 -0
  724. package/src/inference/kv-cache/tiered.js +574 -0
  725. package/src/inference/kv-cache/types.d.ts +188 -0
  726. package/src/inference/kv-cache/types.js +80 -0
  727. package/src/inference/kv-cache.d.ts +36 -0
  728. package/src/inference/kv-cache.js +18 -0
  729. package/src/inference/moe-router.d.ts +212 -0
  730. package/src/inference/moe-router.js +553 -0
  731. package/src/inference/multi-model-network.d.ts +139 -0
  732. package/src/inference/multi-model-network.js +769 -0
  733. package/src/inference/multi-pipeline-pool.d.ts +62 -0
  734. package/src/inference/multi-pipeline-pool.js +161 -0
  735. package/src/inference/network-evolution.d.ts +46 -0
  736. package/src/inference/network-evolution.js +80 -0
  737. package/src/inference/pipelines/context.d.ts +18 -0
  738. package/src/inference/pipelines/context.js +44 -0
  739. package/src/inference/pipelines/diffusion/helpers.d.ts +29 -0
  740. package/src/inference/pipelines/diffusion/helpers.js +112 -0
  741. package/src/inference/pipelines/diffusion/index.d.ts +3 -0
  742. package/src/inference/pipelines/diffusion/index.js +3 -0
  743. package/src/inference/pipelines/diffusion/init.d.ts +24 -0
  744. package/src/inference/pipelines/diffusion/init.js +124 -0
  745. package/src/inference/pipelines/diffusion/pipeline.d.ts +38 -0
  746. package/src/inference/pipelines/diffusion/pipeline.js +632 -0
  747. package/src/inference/pipelines/diffusion/scheduler.d.ts +19 -0
  748. package/src/inference/pipelines/diffusion/scheduler.js +65 -0
  749. package/src/inference/pipelines/diffusion/sd3-transformer.d.ts +20 -0
  750. package/src/inference/pipelines/diffusion/sd3-transformer.js +1194 -0
  751. package/src/inference/pipelines/diffusion/sd3-weights.d.ts +21 -0
  752. package/src/inference/pipelines/diffusion/sd3-weights.js +287 -0
  753. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +80 -0
  754. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +935 -0
  755. package/src/inference/pipelines/diffusion/text-encoder.d.ts +29 -0
  756. package/src/inference/pipelines/diffusion/text-encoder.js +178 -0
  757. package/src/inference/pipelines/diffusion/types.d.ts +112 -0
  758. package/src/inference/pipelines/diffusion/types.js +1 -0
  759. package/src/inference/pipelines/diffusion/vae.d.ts +20 -0
  760. package/src/inference/pipelines/diffusion/vae.js +675 -0
  761. package/src/inference/pipelines/diffusion/weights.d.ts +40 -0
  762. package/src/inference/pipelines/diffusion/weights.js +150 -0
  763. package/src/inference/pipelines/dream/energy-head-pipeline.d.ts +29 -0
  764. package/src/inference/pipelines/dream/energy-head-pipeline.js +6 -0
  765. package/src/inference/pipelines/dream/pipeline.d.ts +17 -0
  766. package/src/inference/pipelines/dream/pipeline.js +8 -0
  767. package/src/inference/pipelines/energy/index.d.ts +1 -0
  768. package/src/inference/pipelines/energy/index.js +1 -0
  769. package/src/inference/pipelines/energy/pipeline.d.ts +27 -0
  770. package/src/inference/pipelines/energy/pipeline.js +680 -0
  771. package/src/inference/pipelines/energy/quintel.d.ts +87 -0
  772. package/src/inference/pipelines/energy/quintel.js +207 -0
  773. package/src/inference/pipelines/energy/types.d.ts +63 -0
  774. package/src/inference/pipelines/energy/types.js +1 -0
  775. package/src/inference/pipelines/energy-head/index.d.ts +6 -0
  776. package/src/inference/pipelines/energy-head/index.js +6 -0
  777. package/src/inference/pipelines/energy-head/row-head-pipeline.d.ts +103 -0
  778. package/src/inference/pipelines/energy-head/row-head-pipeline.js +487 -0
  779. package/src/inference/pipelines/factory.d.ts +10 -0
  780. package/src/inference/pipelines/factory.js +6 -0
  781. package/src/inference/pipelines/index.d.ts +22 -0
  782. package/src/inference/pipelines/index.js +19 -0
  783. package/src/inference/pipelines/registry.d.ts +15 -0
  784. package/src/inference/pipelines/registry.js +23 -0
  785. package/src/inference/pipelines/rng.d.ts +2 -0
  786. package/src/inference/pipelines/rng.js +17 -0
  787. package/src/inference/pipelines/structured/index.d.ts +8 -0
  788. package/src/inference/pipelines/structured/index.js +8 -0
  789. package/src/inference/pipelines/structured/json-head-pipeline.d.ts +58 -0
  790. package/src/inference/pipelines/structured/json-head-pipeline.js +181 -0
  791. package/src/inference/pipelines/text/attention/index.d.ts +24 -0
  792. package/src/inference/pipelines/text/attention/index.js +17 -0
  793. package/src/inference/pipelines/text/attention/projections.d.ts +101 -0
  794. package/src/inference/pipelines/text/attention/projections.js +435 -0
  795. package/src/inference/pipelines/text/attention/record.d.ts +36 -0
  796. package/src/inference/pipelines/text/attention/record.js +613 -0
  797. package/src/inference/pipelines/text/attention/run.d.ts +38 -0
  798. package/src/inference/pipelines/text/attention/run.js +826 -0
  799. package/src/inference/pipelines/text/attention/types.d.ts +98 -0
  800. package/src/inference/pipelines/text/attention/types.js +67 -0
  801. package/src/inference/pipelines/text/attention.d.ts +23 -0
  802. package/src/inference/pipelines/text/attention.js +12 -0
  803. package/src/inference/pipelines/text/bdpa-steamroller.d.ts +22 -0
  804. package/src/inference/pipelines/text/bdpa-steamroller.js +158 -0
  805. package/src/inference/pipelines/text/buffer-types.d.ts +7 -0
  806. package/src/inference/pipelines/text/buffer-types.js +4 -0
  807. package/src/inference/pipelines/text/chat-format.d.ts +46 -0
  808. package/src/inference/pipelines/text/chat-format.js +366 -0
  809. package/src/inference/pipelines/text/config.d.ts +235 -0
  810. package/src/inference/pipelines/text/config.js +623 -0
  811. package/src/inference/pipelines/text/debug-utils/config.d.ts +144 -0
  812. package/src/inference/pipelines/text/debug-utils/config.js +156 -0
  813. package/src/inference/pipelines/text/debug-utils/index.d.ts +53 -0
  814. package/src/inference/pipelines/text/debug-utils/index.js +44 -0
  815. package/src/inference/pipelines/text/debug-utils/logging.d.ts +106 -0
  816. package/src/inference/pipelines/text/debug-utils/logging.js +152 -0
  817. package/src/inference/pipelines/text/debug-utils/tensor.d.ts +119 -0
  818. package/src/inference/pipelines/text/debug-utils/tensor.js +268 -0
  819. package/src/inference/pipelines/text/debug-utils/utils.d.ts +77 -0
  820. package/src/inference/pipelines/text/debug-utils/utils.js +139 -0
  821. package/src/inference/pipelines/text/debug-utils.d.ts +42 -0
  822. package/src/inference/pipelines/text/debug-utils.js +34 -0
  823. package/src/inference/pipelines/text/embed.d.ts +67 -0
  824. package/src/inference/pipelines/text/embed.js +461 -0
  825. package/src/inference/pipelines/text/execution-plan.d.ts +116 -0
  826. package/src/inference/pipelines/text/execution-plan.js +314 -0
  827. package/src/inference/pipelines/text/execution-v0.d.ts +66 -0
  828. package/src/inference/pipelines/text/execution-v0.js +1139 -0
  829. package/src/inference/pipelines/text/ffn/dense.d.ts +40 -0
  830. package/src/inference/pipelines/text/ffn/dense.js +759 -0
  831. package/src/inference/pipelines/text/ffn/index.d.ts +23 -0
  832. package/src/inference/pipelines/text/ffn/index.js +16 -0
  833. package/src/inference/pipelines/text/ffn/moe.d.ts +21 -0
  834. package/src/inference/pipelines/text/ffn/moe.js +49 -0
  835. package/src/inference/pipelines/text/ffn/sandwich.d.ts +25 -0
  836. package/src/inference/pipelines/text/ffn/sandwich.js +196 -0
  837. package/src/inference/pipelines/text/ffn/standard.d.ts +23 -0
  838. package/src/inference/pipelines/text/ffn/standard.js +84 -0
  839. package/src/inference/pipelines/text/ffn/types.d.ts +30 -0
  840. package/src/inference/pipelines/text/ffn/types.js +25 -0
  841. package/src/inference/pipelines/text/ffn.d.ts +31 -0
  842. package/src/inference/pipelines/text/ffn.js +18 -0
  843. package/src/inference/pipelines/text/finiteness-guard-status.d.ts +11 -0
  844. package/src/inference/pipelines/text/finiteness-guard-status.js +21 -0
  845. package/src/inference/pipelines/text/finiteness-policy.d.ts +35 -0
  846. package/src/inference/pipelines/text/finiteness-policy.js +45 -0
  847. package/src/inference/pipelines/text/generator-helpers.d.ts +34 -0
  848. package/src/inference/pipelines/text/generator-helpers.js +175 -0
  849. package/src/inference/pipelines/text/generator-runtime.d.ts +93 -0
  850. package/src/inference/pipelines/text/generator-runtime.js +373 -0
  851. package/src/inference/pipelines/text/generator-steps.d.ts +75 -0
  852. package/src/inference/pipelines/text/generator-steps.js +1078 -0
  853. package/src/inference/pipelines/text/generator.d.ts +41 -0
  854. package/src/inference/pipelines/text/generator.js +1345 -0
  855. package/src/inference/pipelines/text/index.d.ts +5 -0
  856. package/src/inference/pipelines/text/index.js +6 -0
  857. package/src/inference/pipelines/text/init.d.ts +295 -0
  858. package/src/inference/pipelines/text/init.js +965 -0
  859. package/src/inference/pipelines/text/kernel-path-auto-select.d.ts +12 -0
  860. package/src/inference/pipelines/text/kernel-path-auto-select.js +90 -0
  861. package/src/inference/pipelines/text/kernel-trace.d.ts +150 -0
  862. package/src/inference/pipelines/text/kernel-trace.js +324 -0
  863. package/src/inference/pipelines/text/layer-plan.d.ts +65 -0
  864. package/src/inference/pipelines/text/layer-plan.js +249 -0
  865. package/src/inference/pipelines/text/layer.d.ts +56 -0
  866. package/src/inference/pipelines/text/layer.js +916 -0
  867. package/src/inference/pipelines/text/linear-attention.d.ts +94 -0
  868. package/src/inference/pipelines/text/linear-attention.js +803 -0
  869. package/src/inference/pipelines/text/logits/cpu.d.ts +81 -0
  870. package/src/inference/pipelines/text/logits/cpu.js +91 -0
  871. package/src/inference/pipelines/text/logits/gpu.d.ts +113 -0
  872. package/src/inference/pipelines/text/logits/gpu.js +406 -0
  873. package/src/inference/pipelines/text/logits/index.d.ts +57 -0
  874. package/src/inference/pipelines/text/logits/index.js +305 -0
  875. package/src/inference/pipelines/text/logits/types.d.ts +46 -0
  876. package/src/inference/pipelines/text/logits/types.js +4 -0
  877. package/src/inference/pipelines/text/logits/utils.d.ts +49 -0
  878. package/src/inference/pipelines/text/logits/utils.js +59 -0
  879. package/src/inference/pipelines/text/logits.d.ts +27 -0
  880. package/src/inference/pipelines/text/logits.js +16 -0
  881. package/src/inference/pipelines/text/lora-apply.d.ts +28 -0
  882. package/src/inference/pipelines/text/lora-apply.js +58 -0
  883. package/src/inference/pipelines/text/lora-types.d.ts +39 -0
  884. package/src/inference/pipelines/text/lora-types.js +18 -0
  885. package/src/inference/pipelines/text/lora.d.ts +18 -0
  886. package/src/inference/pipelines/text/lora.js +12 -0
  887. package/src/inference/pipelines/text/model-load.d.ts +58 -0
  888. package/src/inference/pipelines/text/model-load.js +561 -0
  889. package/src/inference/pipelines/text/moe-cache.d.ts +32 -0
  890. package/src/inference/pipelines/text/moe-cache.js +107 -0
  891. package/src/inference/pipelines/text/moe-cpu-gptoss.d.ts +9 -0
  892. package/src/inference/pipelines/text/moe-cpu-gptoss.js +110 -0
  893. package/src/inference/pipelines/text/moe-cpu.d.ts +13 -0
  894. package/src/inference/pipelines/text/moe-cpu.js +116 -0
  895. package/src/inference/pipelines/text/moe-gpu.d.ts +13 -0
  896. package/src/inference/pipelines/text/moe-gpu.js +611 -0
  897. package/src/inference/pipelines/text/moe-helpers.d.ts +12 -0
  898. package/src/inference/pipelines/text/moe-helpers.js +21 -0
  899. package/src/inference/pipelines/text/moe-impl.d.ts +117 -0
  900. package/src/inference/pipelines/text/moe-impl.js +9 -0
  901. package/src/inference/pipelines/text/moe-shape-validator.d.ts +31 -0
  902. package/src/inference/pipelines/text/moe-shape-validator.js +78 -0
  903. package/src/inference/pipelines/text/ops.d.ts +167 -0
  904. package/src/inference/pipelines/text/ops.js +367 -0
  905. package/src/inference/pipelines/text/probes.d.ts +31 -0
  906. package/src/inference/pipelines/text/probes.js +170 -0
  907. package/src/inference/pipelines/text/sampling.d.ts +54 -0
  908. package/src/inference/pipelines/text/sampling.js +203 -0
  909. package/src/inference/pipelines/text/state.d.ts +112 -0
  910. package/src/inference/pipelines/text/state.js +152 -0
  911. package/src/inference/pipelines/text/types.d.ts +627 -0
  912. package/src/inference/pipelines/text/types.js +4 -0
  913. package/src/inference/pipelines/text/weights.d.ts +110 -0
  914. package/src/inference/pipelines/text/weights.js +163 -0
  915. package/src/inference/pipelines/text.d.ts +157 -0
  916. package/src/inference/pipelines/text.js +586 -0
  917. package/src/inference/speculative.d.ts +239 -0
  918. package/src/inference/speculative.js +416 -0
  919. package/src/inference/test-harness.d.ts +178 -0
  920. package/src/inference/test-harness.js +349 -0
  921. package/src/inference/tokenizer.d.ts +77 -0
  922. package/src/inference/tokenizer.js +258 -0
  923. package/src/inference/tokenizers/base.d.ts +39 -0
  924. package/src/inference/tokenizers/base.js +69 -0
  925. package/src/inference/tokenizers/bpe.d.ts +27 -0
  926. package/src/inference/tokenizers/bpe.js +171 -0
  927. package/src/inference/tokenizers/bundled.d.ts +63 -0
  928. package/src/inference/tokenizers/bundled.js +866 -0
  929. package/src/inference/tokenizers/sentencepiece.d.ts +28 -0
  930. package/src/inference/tokenizers/sentencepiece.js +389 -0
  931. package/src/inference/tokenizers/types.d.ts +166 -0
  932. package/src/inference/tokenizers/types.js +7 -0
  933. package/src/loader/doppler-loader.d.ts +134 -0
  934. package/src/loader/doppler-loader.js +1036 -0
  935. package/src/loader/dtype-utils.d.ts +40 -0
  936. package/src/loader/dtype-utils.js +102 -0
  937. package/src/loader/embedding-loader.d.ts +56 -0
  938. package/src/loader/embedding-loader.js +207 -0
  939. package/src/loader/experts/expert-cache.d.ts +156 -0
  940. package/src/loader/experts/expert-cache.js +375 -0
  941. package/src/loader/experts/expert-loader.d.ts +108 -0
  942. package/src/loader/experts/expert-loader.js +384 -0
  943. package/src/loader/final-weights-loader.d.ts +68 -0
  944. package/src/loader/final-weights-loader.js +262 -0
  945. package/src/loader/index.d.ts +150 -0
  946. package/src/loader/index.js +124 -0
  947. package/src/loader/layer-loader.d.ts +63 -0
  948. package/src/loader/layer-loader.js +417 -0
  949. package/src/loader/loader-state.d.ts +51 -0
  950. package/src/loader/loader-state.js +142 -0
  951. package/src/loader/loader-types.d.ts +236 -0
  952. package/src/loader/loader-types.js +4 -0
  953. package/src/loader/manifest-config.d.ts +97 -0
  954. package/src/loader/manifest-config.js +132 -0
  955. package/src/loader/memory-monitor.d.ts +112 -0
  956. package/src/loader/memory-monitor.js +276 -0
  957. package/src/loader/multi-model-loader.d.ts +37 -0
  958. package/src/loader/multi-model-loader.js +87 -0
  959. package/src/loader/quantization-constants.d.ts +23 -0
  960. package/src/loader/quantization-constants.js +14 -0
  961. package/src/loader/shard-cache.d.ts +60 -0
  962. package/src/loader/shard-cache.js +568 -0
  963. package/src/loader/shard-resolver.d.ts +12 -0
  964. package/src/loader/shard-resolver.js +83 -0
  965. package/src/loader/tensors/tensor-loader.d.ts +154 -0
  966. package/src/loader/tensors/tensor-loader.js +427 -0
  967. package/src/loader/tensors/tensor-reader.d.ts +22 -0
  968. package/src/loader/tensors/tensor-reader.js +56 -0
  969. package/src/loader/tensors/tensor-role.d.ts +7 -0
  970. package/src/loader/tensors/tensor-role.js +12 -0
  971. package/src/loader/weight-downcast.d.ts +62 -0
  972. package/src/loader/weight-downcast.js +213 -0
  973. package/src/loader/weights.d.ts +22 -0
  974. package/src/loader/weights.js +4 -0
  975. package/src/memory/address-table.d.ts +104 -0
  976. package/src/memory/address-table.js +114 -0
  977. package/src/memory/buffer-pool.d.ts +196 -0
  978. package/src/memory/buffer-pool.js +756 -0
  979. package/src/memory/capability.d.ts +49 -0
  980. package/src/memory/capability.js +95 -0
  981. package/src/memory/heap-manager.d.ts +104 -0
  982. package/src/memory/heap-manager.js +264 -0
  983. package/src/memory/unified-detect.d.ts +59 -0
  984. package/src/memory/unified-detect.js +192 -0
  985. package/src/rules/converter/execution.rules.json +20 -0
  986. package/src/rules/converter/tensor-roles.rules.json +13 -0
  987. package/src/rules/converter/tokenizer.rules.json +7 -0
  988. package/src/rules/inference/attention.rules.json +54 -0
  989. package/src/rules/inference/config.rules.json +58 -0
  990. package/src/rules/inference/dtype.rules.json +94 -0
  991. package/src/rules/inference/execution.rules.json +45 -0
  992. package/src/rules/inference/ffn.rules.json +35 -0
  993. package/src/rules/inference/kernel-path.rules.json +76 -0
  994. package/src/rules/inference/layer-pattern.rules.json +16 -0
  995. package/src/rules/inference/layer.rules.json +7 -0
  996. package/src/rules/inference/moe.rules.json +48 -0
  997. package/src/rules/kernels/attention.rules.json +61 -0
  998. package/src/rules/kernels/conv2d.rules.json +6 -0
  999. package/src/rules/kernels/dequant.rules.json +58 -0
  1000. package/src/rules/kernels/energy.rules.json +22 -0
  1001. package/src/rules/kernels/fused-ffn.rules.json +13 -0
  1002. package/src/rules/kernels/fused-matmul-residual.rules.json +6 -0
  1003. package/src/rules/kernels/fused-matmul-rmsnorm.rules.json +8 -0
  1004. package/src/rules/kernels/gather.rules.json +12 -0
  1005. package/src/rules/kernels/gelu.rules.json +11 -0
  1006. package/src/rules/kernels/groupnorm.rules.json +10 -0
  1007. package/src/rules/kernels/kernel-validator.d.ts +24 -0
  1008. package/src/rules/kernels/kernel-validator.js +160 -0
  1009. package/src/rules/kernels/kv_quantize.rules.json +7 -0
  1010. package/src/rules/kernels/layernorm.rules.json +6 -0
  1011. package/src/rules/kernels/matmul.rules.json +60 -0
  1012. package/src/rules/kernels/modulate.rules.json +6 -0
  1013. package/src/rules/kernels/moe.rules.gptoss.json +105 -0
  1014. package/src/rules/kernels/moe.rules.json +11 -0
  1015. package/src/rules/kernels/pixel_shuffle.rules.json +6 -0
  1016. package/src/rules/kernels/residual.rules.json +12 -0
  1017. package/src/rules/kernels/rmsnorm.rules.json +11 -0
  1018. package/src/rules/kernels/rope.rules.json +6 -0
  1019. package/src/rules/kernels/sample.rules.json +6 -0
  1020. package/src/rules/kernels/scale.rules.json +6 -0
  1021. package/src/rules/kernels/silu.rules.json +21 -0
  1022. package/src/rules/kernels/softmax.rules.json +23 -0
  1023. package/src/rules/kernels/split-qkv.rules.json +6 -0
  1024. package/src/rules/kernels/upsample2d.rules.json +6 -0
  1025. package/src/rules/loader/tensor-loader.rules.json +15 -0
  1026. package/src/rules/loader/weights.rules.json +41 -0
  1027. package/src/rules/rule-registry.d.ts +48 -0
  1028. package/src/rules/rule-registry.js +177 -0
  1029. package/src/rules/tooling/command-runtime.rules.json +38 -0
  1030. package/src/storage/backends/idb-store.d.ts +52 -0
  1031. package/src/storage/backends/idb-store.js +590 -0
  1032. package/src/storage/backends/memory-store.d.ts +36 -0
  1033. package/src/storage/backends/memory-store.js +242 -0
  1034. package/src/storage/backends/opfs-store.d.ts +41 -0
  1035. package/src/storage/backends/opfs-store.js +429 -0
  1036. package/src/storage/blake3.d.ts +17 -0
  1037. package/src/storage/blake3.js +269 -0
  1038. package/src/storage/download-types.d.ts +157 -0
  1039. package/src/storage/download-types.js +48 -0
  1040. package/src/storage/downloader.d.ts +103 -0
  1041. package/src/storage/downloader.js +839 -0
  1042. package/src/storage/emulated-vram.d.ts +264 -0
  1043. package/src/storage/emulated-vram.js +576 -0
  1044. package/src/storage/export.d.ts +20 -0
  1045. package/src/storage/export.js +159 -0
  1046. package/src/storage/index.d.ts +253 -0
  1047. package/src/storage/index.js +185 -0
  1048. package/src/storage/inventory.d.ts +26 -0
  1049. package/src/storage/inventory.js +218 -0
  1050. package/src/storage/preflight.d.ts +144 -0
  1051. package/src/storage/preflight.js +294 -0
  1052. package/src/storage/quickstart-downloader.d.ts +154 -0
  1053. package/src/storage/quickstart-downloader.js +265 -0
  1054. package/src/storage/quota.d.ts +150 -0
  1055. package/src/storage/quota.js +304 -0
  1056. package/src/storage/registry.d.ts +28 -0
  1057. package/src/storage/registry.js +125 -0
  1058. package/src/storage/reports.d.ts +20 -0
  1059. package/src/storage/reports.js +94 -0
  1060. package/src/storage/shard-manager.d.ts +137 -0
  1061. package/src/storage/shard-manager.js +801 -0
  1062. package/src/sw.d.ts +1 -0
  1063. package/src/sw.js +187 -0
  1064. package/src/tooling/browser-command-runner.d.ts +28 -0
  1065. package/src/tooling/browser-command-runner.js +82 -0
  1066. package/src/tooling/command-api.d.ts +147 -0
  1067. package/src/tooling/command-api.js +523 -0
  1068. package/src/tooling/command-envelope.d.ts +81 -0
  1069. package/src/tooling/command-envelope.js +195 -0
  1070. package/src/tooling/command-runner-shared.d.ts +73 -0
  1071. package/src/tooling/command-runner-shared.js +146 -0
  1072. package/src/tooling/command-runner.html +45 -0
  1073. package/src/tooling/node-browser-command-runner.d.ts +30 -0
  1074. package/src/tooling/node-browser-command-runner.js +868 -0
  1075. package/src/tooling/node-command-runner.d.ts +36 -0
  1076. package/src/tooling/node-command-runner.js +127 -0
  1077. package/src/tooling/node-convert-worker-pool.d.ts +16 -0
  1078. package/src/tooling/node-convert-worker-pool.js +186 -0
  1079. package/src/tooling/node-convert-worker.d.ts +1 -0
  1080. package/src/tooling/node-convert-worker.js +60 -0
  1081. package/src/tooling/node-convert.d.ts +44 -0
  1082. package/src/tooling/node-converter.d.ts +1 -0
  1083. package/src/tooling/node-converter.js +1227 -0
  1084. package/src/tooling/node-file-fetch.d.ts +1 -0
  1085. package/src/tooling/node-file-fetch.js +38 -0
  1086. package/src/tooling/node-source-runtime.d.ts +19 -0
  1087. package/src/tooling/node-source-runtime.js +469 -0
  1088. package/src/tooling/node-webgpu.d.ts +6 -0
  1089. package/src/tooling/node-webgpu.js +321 -0
  1090. package/src/tooling/opfs-cache.d.ts +11 -0
  1091. package/src/tooling/opfs-cache.js +174 -0
  1092. package/src/tooling/source-runtime-bundle.d.ts +102 -0
  1093. package/src/tooling/source-runtime-bundle.js +484 -0
  1094. package/src/tooling-exports.browser.d.ts +7 -0
  1095. package/src/tooling-exports.browser.js +2 -0
  1096. package/src/tooling-exports.d.ts +22 -0
  1097. package/src/tooling-exports.js +7 -0
  1098. package/src/tooling-exports.shared.d.ts +105 -0
  1099. package/src/tooling-exports.shared.js +92 -0
  1100. package/src/training/README.md +153 -0
  1101. package/src/training/artifacts.d.ts +160 -0
  1102. package/src/training/artifacts.js +896 -0
  1103. package/src/training/attention-backward.d.ts +30 -0
  1104. package/src/training/attention-backward.js +217 -0
  1105. package/src/training/attention-forward.d.ts +22 -0
  1106. package/src/training/attention-forward.js +82 -0
  1107. package/src/training/autograd.d.ts +51 -0
  1108. package/src/training/autograd.js +380 -0
  1109. package/src/training/checkpoint.d.ts +31 -0
  1110. package/src/training/checkpoint.js +238 -0
  1111. package/src/training/clip.d.ts +9 -0
  1112. package/src/training/clip.js +54 -0
  1113. package/src/training/dataloader.d.ts +8 -0
  1114. package/src/training/dataloader.js +44 -0
  1115. package/src/training/datasets/index.d.ts +12 -0
  1116. package/src/training/datasets/index.js +6 -0
  1117. package/src/training/datasets/jsonl.d.ts +11 -0
  1118. package/src/training/datasets/jsonl.js +50 -0
  1119. package/src/training/datasets/reploid.d.ts +3 -0
  1120. package/src/training/datasets/reploid.js +36 -0
  1121. package/src/training/datasets/text-pairs.d.ts +21 -0
  1122. package/src/training/datasets/text-pairs.js +42 -0
  1123. package/src/training/datasets/token-batch.d.ts +21 -0
  1124. package/src/training/datasets/token-batch.js +40 -0
  1125. package/src/training/datasets/translation-pairs.d.ts +34 -0
  1126. package/src/training/datasets/translation-pairs.js +49 -0
  1127. package/src/training/export.d.ts +32 -0
  1128. package/src/training/export.js +112 -0
  1129. package/src/training/index.d.ts +52 -0
  1130. package/src/training/index.js +41 -0
  1131. package/src/training/lora.d.ts +19 -0
  1132. package/src/training/lora.js +57 -0
  1133. package/src/training/loss-scaling.d.ts +21 -0
  1134. package/src/training/loss-scaling.js +80 -0
  1135. package/src/training/loss.d.ts +10 -0
  1136. package/src/training/loss.js +41 -0
  1137. package/src/training/objectives/base.d.ts +58 -0
  1138. package/src/training/objectives/base.js +38 -0
  1139. package/src/training/objectives/cross_entropy.d.ts +18 -0
  1140. package/src/training/objectives/cross_entropy.js +37 -0
  1141. package/src/training/objectives/distill_kd.d.ts +16 -0
  1142. package/src/training/objectives/distill_kd.js +369 -0
  1143. package/src/training/objectives/distill_triplet.d.ts +16 -0
  1144. package/src/training/objectives/distill_triplet.js +412 -0
  1145. package/src/training/objectives/index.d.ts +12 -0
  1146. package/src/training/objectives/index.js +6 -0
  1147. package/src/training/objectives/ul_stage1_joint.d.ts +16 -0
  1148. package/src/training/objectives/ul_stage1_joint.js +188 -0
  1149. package/src/training/objectives/ul_stage2_base.d.ts +16 -0
  1150. package/src/training/objectives/ul_stage2_base.js +222 -0
  1151. package/src/training/optimizer.d.ts +22 -0
  1152. package/src/training/optimizer.js +115 -0
  1153. package/src/training/runner.d.ts +196 -0
  1154. package/src/training/runner.js +1194 -0
  1155. package/src/training/suite.d.ts +187 -0
  1156. package/src/training/suite.js +3156 -0
  1157. package/src/training/trainer.d.ts +89 -0
  1158. package/src/training/trainer.js +301 -0
  1159. package/src/training/ul_dataset.d.ts +47 -0
  1160. package/src/training/ul_dataset.js +153 -0
  1161. package/src/training/ul_schedule.d.ts +6 -0
  1162. package/src/training/ul_schedule.js +29 -0
  1163. package/src/types/chrome.d.ts +36 -0
  1164. package/src/types/chrome.js +1 -0
  1165. package/src/types/gpu.d.ts +185 -0
  1166. package/src/types/gpu.js +5 -0
  1167. package/src/types/index.d.ts +3 -0
  1168. package/src/types/index.js +3 -0
  1169. package/src/types/inference.d.ts +197 -0
  1170. package/src/types/inference.js +5 -0
  1171. package/src/types/model.d.ts +125 -0
  1172. package/src/types/model.js +5 -0
  1173. package/src/utils/index.d.ts +7 -0
  1174. package/src/utils/index.js +7 -0
  1175. package/src/utils/load-json.d.ts +5 -0
  1176. package/src/utils/load-json.js +23 -0
  1177. package/src/utils/plain-object.d.ts +1 -0
  1178. package/src/utils/plain-object.js +3 -0
  1179. package/src/utils/sha256.d.ts +4 -0
  1180. package/src/utils/sha256.js +135 -0
  1181. package/tools/convert-safetensors-node.js +180 -0
  1182. package/tools/doppler-cli.js +1170 -0
@@ -0,0 +1,1227 @@
1
+ import fs from 'node:fs/promises';
2
+ import os from 'node:os';
3
+ import path from 'node:path';
4
+ import { installNodeFileFetchShim } from './node-file-fetch.js';
5
+ import { NodeConvertWorkerPool } from './node-convert-worker-pool.js';
6
+ import { bootstrapNodeWebGPU } from './node-webgpu.js';
7
+ import { isPlainObject } from '../utils/plain-object.js';
8
+ import { selectRuleValue } from '../rules/rule-registry.js';
9
+ import { log, trace } from '../debug/index.js';
10
+
11
+ function asPositiveInteger(value, label) {
12
+ if (!Number.isInteger(value) || value < 1) {
13
+ throw new Error(`node convert: ${label} must be a positive integer.`);
14
+ }
15
+ return value;
16
+ }
17
+
18
+ function normalizeExecutionConfig(value, defaults) {
19
+ if (!isPlainObject(defaults)) {
20
+ throw new Error('node convert: execution defaults must be an object.');
21
+ }
22
+
23
+ if (value == null) {
24
+ return { ...defaults };
25
+ }
26
+ if (!isPlainObject(value)) {
27
+ throw new Error('node convert: execution must be an object when provided.');
28
+ }
29
+ const workers = value.workers == null
30
+ ? defaults.workers
31
+ : asPositiveInteger(Number(value.workers), 'execution.workers');
32
+ const workerCountPolicyRaw = value.workerCountPolicy == null
33
+ ? defaults.workerCountPolicy
34
+ : String(value.workerCountPolicy).trim().toLowerCase();
35
+ if (workerCountPolicyRaw !== 'cap' && workerCountPolicyRaw !== 'error') {
36
+ throw new Error('node convert: execution.workerCountPolicy must be "cap" or "error".');
37
+ }
38
+ const rowChunkRows = value.rowChunkRows == null
39
+ ? defaults.rowChunkRows
40
+ : asPositiveInteger(Number(value.rowChunkRows), 'execution.rowChunkRows');
41
+ const rowChunkMinTensorBytes = value.rowChunkMinTensorBytes == null
42
+ ? defaults.rowChunkMinTensorBytes
43
+ : asPositiveInteger(Number(value.rowChunkMinTensorBytes), 'execution.rowChunkMinTensorBytes');
44
+ const maxInFlightJobs = value.maxInFlightJobs == null
45
+ ? defaults.maxInFlightJobs
46
+ : asPositiveInteger(Number(value.maxInFlightJobs), 'execution.maxInFlightJobs');
47
+ const useGpuCast = value.useGpuCast == null
48
+ ? defaults.useGpuCast === true
49
+ : value.useGpuCast === true;
50
+ if (value.useGpuCast != null && typeof value.useGpuCast !== 'boolean') {
51
+ throw new Error('node convert: execution.useGpuCast must be a boolean when provided.');
52
+ }
53
+ const gpuCastMinTensorBytes = value.gpuCastMinTensorBytes == null
54
+ ? asPositiveInteger(
55
+ Number(defaults.gpuCastMinTensorBytes ?? defaults.rowChunkMinTensorBytes ?? (32 * 1024 * 1024)),
56
+ 'execution.gpuCastMinTensorBytes'
57
+ )
58
+ : asPositiveInteger(Number(value.gpuCastMinTensorBytes), 'execution.gpuCastMinTensorBytes');
59
+
60
+ return {
61
+ workers,
62
+ workerCountPolicy: workerCountPolicyRaw,
63
+ rowChunkRows,
64
+ rowChunkMinTensorBytes,
65
+ maxInFlightJobs,
66
+ useGpuCast,
67
+ gpuCastMinTensorBytes,
68
+ };
69
+ }
70
+
71
+ function resolveHostParallelism() {
72
+ if (typeof os.availableParallelism === 'function') {
73
+ const value = os.availableParallelism();
74
+ if (Number.isInteger(value) && value > 0) return value;
75
+ }
76
+ const cpus = typeof os.cpus === 'function' ? os.cpus() : null;
77
+ return Array.isArray(cpus) && cpus.length > 0 ? cpus.length : 1;
78
+ }
79
+
80
+ function resolveExecutionPlan(executionConfig) {
81
+ const requestedWorkers = executionConfig.workers;
82
+ const availableWorkers = resolveHostParallelism();
83
+ if (executionConfig.workerCountPolicy === 'error' && requestedWorkers > availableWorkers) {
84
+ throw new Error(
85
+ `node convert: requested workers (${requestedWorkers}) exceed available CPU parallelism (${availableWorkers}).`
86
+ );
87
+ }
88
+
89
+ const effectiveWorkers = executionConfig.workerCountPolicy === 'cap'
90
+ ? Math.min(requestedWorkers, availableWorkers)
91
+ : requestedWorkers;
92
+
93
+ return {
94
+ ...executionConfig,
95
+ requestedWorkers,
96
+ availableWorkers,
97
+ effectiveWorkers: Math.max(1, effectiveWorkers),
98
+ };
99
+ }
100
+
101
+ function getDtypeBytes(dtype) {
102
+ const upper = String(dtype || '').toUpperCase();
103
+ if (upper === 'F32') return 4;
104
+ if (upper === 'F16' || upper === 'BF16') return 2;
105
+ return null;
106
+ }
107
+
108
+ function createStageTimer(label) {
109
+ const start = performance.now();
110
+ return {
111
+ stop(extra = '', data = null) {
112
+ const elapsed = performance.now() - start;
113
+ const suffix = extra ? ` - ${extra}` : '';
114
+ log.verbose('NodeConvert', `${label}: ${elapsed.toFixed(0)}ms${suffix}`);
115
+ trace.perf(`NodeConvert ${label}`, {
116
+ ms: elapsed,
117
+ ...(data && typeof data === 'object' ? data : {}),
118
+ });
119
+ return elapsed;
120
+ },
121
+ };
122
+ }
123
+
124
+ function compareNullableStrings(a, b) {
125
+ const left = typeof a === 'string' ? a : '';
126
+ const right = typeof b === 'string' ? b : '';
127
+ return left.localeCompare(right);
128
+ }
129
+
130
+ function sortTensorsByDeterministicLocality(tensors) {
131
+ if (!Array.isArray(tensors) || tensors.length <= 1) {
132
+ return tensors;
133
+ }
134
+ tensors.sort((left, right) => {
135
+ const sourcePathCmp = compareNullableStrings(left?.sourcePath, right?.sourcePath);
136
+ if (sourcePathCmp !== 0) return sourcePathCmp;
137
+ const leftOffset = Number.isFinite(left?.offset) ? Number(left.offset) : 0;
138
+ const rightOffset = Number.isFinite(right?.offset) ? Number(right.offset) : 0;
139
+ if (leftOffset !== rightOffset) {
140
+ return leftOffset - rightOffset;
141
+ }
142
+ return compareNullableStrings(left?.name, right?.name);
143
+ });
144
+ return tensors;
145
+ }
146
+
147
+ function normalizeWorkerTransformResult(result, tensor) {
148
+ if (!result || !(result.tensorData instanceof Uint8Array)) {
149
+ throw new Error(`node convert: worker transform returned invalid bytes for ${tensor.name}.`);
150
+ }
151
+ return {
152
+ tensorData: result.tensorData,
153
+ outDtype: result.outDtype ?? tensor.dtype,
154
+ outLayout: result.outLayout ?? null,
155
+ };
156
+ }
157
+
158
+ async function mapWithConcurrency(items, concurrency, mapper) {
159
+ if (!Array.isArray(items) || items.length === 0) return [];
160
+ const workerCount = Math.max(1, Math.min(concurrency, items.length));
161
+ const results = new Array(items.length);
162
+ let nextIndex = 0;
163
+
164
+ const runners = Array.from({ length: workerCount }, async () => {
165
+ while (nextIndex < items.length) {
166
+ const index = nextIndex++;
167
+ results[index] = await mapper(items[index], index);
168
+ }
169
+ });
170
+ await Promise.all(runners);
171
+ return results;
172
+ }
173
+
174
+ let gpuCastRuntimePromise = null;
175
+
176
+ async function loadNodeGpuCastRuntime() {
177
+ if (!gpuCastRuntimePromise) {
178
+ gpuCastRuntimePromise = (async () => {
179
+ await bootstrapNodeWebGPU();
180
+ const [
181
+ { initDevice, getDevice },
182
+ { castF32ToF16, runBF16ToF16 },
183
+ { createTensor },
184
+ { acquireBuffer, releaseBuffer, getBufferPool },
185
+ ] = await Promise.all([
186
+ import('../gpu/device.js'),
187
+ import('../gpu/kernel-selector.js'),
188
+ import('../gpu/tensor.js'),
189
+ import('../memory/buffer-pool.js'),
190
+ ]);
191
+ const device = await initDevice();
192
+ if (!device || !getDevice()) {
193
+ throw new Error(
194
+ 'node convert: execution.useGpuCast requires a WebGPU-capable Node runtime.'
195
+ );
196
+ }
197
+ return {
198
+ getDevice,
199
+ castF32ToF16,
200
+ runBF16ToF16,
201
+ createTensor,
202
+ acquireBuffer,
203
+ releaseBuffer,
204
+ getBufferPool,
205
+ };
206
+ })();
207
+ }
208
+ try {
209
+ return await gpuCastRuntimePromise;
210
+ } catch (error) {
211
+ gpuCastRuntimePromise = null;
212
+ throw error;
213
+ }
214
+ }
215
+
216
+ function createNodeGpuTensorTransformer(options) {
217
+ const {
218
+ runtime,
219
+ gpuCastMinTensorBytes,
220
+ resolveTensorTargetQuant,
221
+ } = options;
222
+ const {
223
+ getDevice,
224
+ castF32ToF16,
225
+ runBF16ToF16,
226
+ createTensor,
227
+ acquireBuffer,
228
+ releaseBuffer,
229
+ getBufferPool,
230
+ } = runtime;
231
+ const minTensorBytes = Math.max(1, Number(gpuCastMinTensorBytes) || 1);
232
+ let warnedFallback = false;
233
+
234
+ return async function maybeTransformWithGPU(input) {
235
+ const tensor = input?.tensor;
236
+ const tensorData = input?.tensorData;
237
+ const transformContext = input?.transformContext ?? {};
238
+ const reportProgress = typeof input?.reportProgress === 'function'
239
+ ? input.reportProgress
240
+ : null;
241
+ if (!tensor || !(tensorData instanceof Uint8Array)) {
242
+ return null;
243
+ }
244
+
245
+ const sourceDtype = String(tensor.dtype || '').toUpperCase();
246
+ if (sourceDtype !== 'F32' && sourceDtype !== 'BF16') {
247
+ return null;
248
+ }
249
+
250
+ const targetQuant = resolveTensorTargetQuant(
251
+ tensor.name,
252
+ transformContext.targetQuant,
253
+ transformContext.quantizationInfo ?? null
254
+ );
255
+ if (targetQuant !== 'f16') {
256
+ return null;
257
+ }
258
+ if (tensorData.byteLength < minTensorBytes) {
259
+ return null;
260
+ }
261
+
262
+ const elementBytes = sourceDtype === 'F32' ? 4 : 2;
263
+ if (tensorData.byteLength % elementBytes !== 0) {
264
+ return null;
265
+ }
266
+ const numElements = tensorData.byteLength / elementBytes;
267
+ const outputBytes = numElements * 2;
268
+
269
+ let inputBuffer = null;
270
+ let outputBuffer = null;
271
+ try {
272
+ const device = getDevice();
273
+ if (!device) {
274
+ return null;
275
+ }
276
+ inputBuffer = acquireBuffer(tensorData.byteLength, undefined, `convert_gpu_cast_in_${tensor.name}`);
277
+ device.queue.writeBuffer(inputBuffer, 0, tensorData, tensorData.byteOffset, tensorData.byteLength);
278
+
279
+ if (sourceDtype === 'F32') {
280
+ const inputTensor = createTensor(inputBuffer, 'f32', [numElements], `${tensor.name}_f32`);
281
+ const converted = await castF32ToF16(inputTensor);
282
+ outputBuffer = converted.buffer;
283
+ } else {
284
+ const converted = await runBF16ToF16(inputBuffer, [numElements], `${tensor.name}_f16`);
285
+ outputBuffer = converted.buffer;
286
+ }
287
+
288
+ const readback = await getBufferPool().readBuffer(outputBuffer, outputBytes);
289
+ if (!(readback instanceof ArrayBuffer) || readback.byteLength !== outputBytes) {
290
+ return null;
291
+ }
292
+ reportProgress?.(tensorData.byteLength, tensorData.byteLength);
293
+ return {
294
+ tensorData: new Uint8Array(readback),
295
+ outDtype: 'F16',
296
+ outLayout: null,
297
+ };
298
+ } catch (error) {
299
+ if (!warnedFallback) {
300
+ warnedFallback = true;
301
+ const message = error instanceof Error ? error.message : String(error);
302
+ log.warn('NodeConvert', `GPU cast fallback to CPU: ${message}`);
303
+ }
304
+ return null;
305
+ } finally {
306
+ if (outputBuffer && outputBuffer !== inputBuffer) {
307
+ releaseBuffer(outputBuffer);
308
+ }
309
+ if (inputBuffer) {
310
+ releaseBuffer(inputBuffer);
311
+ }
312
+ }
313
+ };
314
+ }
315
+
316
+
317
+ function generateShardFilename(index) {
318
+ return `shard_${String(index).padStart(5, '0')}.bin`;
319
+ }
320
+
321
+ function assertPath(value, label) {
322
+ if (typeof value !== 'string' || !value.trim()) {
323
+ throw new Error(`node convert: ${label} is required.`);
324
+ }
325
+ return path.resolve(value);
326
+ }
327
+
328
+ function readOptionalNonEmptyString(value) {
329
+ if (typeof value !== 'string') return null;
330
+ const trimmed = value.trim();
331
+ return trimmed || null;
332
+ }
333
+
334
+ function resolveConfiguredModelId(explicitModelId, converterConfig) {
335
+ return (
336
+ readOptionalNonEmptyString(explicitModelId)
337
+ ?? readOptionalNonEmptyString(converterConfig?.output?.modelBaseId)
338
+ );
339
+ }
340
+
341
+ function resolveOutputDir(outputDirOverride, converterConfig, modelId) {
342
+ const override = readOptionalNonEmptyString(outputDirOverride);
343
+ if (override) {
344
+ return path.resolve(override);
345
+ }
346
+
347
+ const configuredDir = readOptionalNonEmptyString(converterConfig?.output?.dir);
348
+ if (configuredDir) {
349
+ return path.resolve(configuredDir);
350
+ }
351
+
352
+ const configuredBaseDir = readOptionalNonEmptyString(converterConfig?.output?.baseDir);
353
+ if (configuredBaseDir) {
354
+ if (!modelId) {
355
+ throw new Error(
356
+ 'node convert: converterConfig.output.baseDir requires modelId. ' +
357
+ 'Set converterConfig.output.modelBaseId or pass modelId.'
358
+ );
359
+ }
360
+ return path.resolve(configuredBaseDir, modelId);
361
+ }
362
+
363
+ throw new Error(
364
+ 'node convert: outputDir is required. ' +
365
+ 'Provide --output-dir, converterConfig.output.dir, or converterConfig.output.baseDir.'
366
+ );
367
+ }
368
+
369
+ function normalizeConverterConfigOverride(value) {
370
+ if (value == null) return null;
371
+ if (!isPlainObject(value)) {
372
+ throw new Error('node convert: converterConfig must be an object when provided.');
373
+ }
374
+ return value;
375
+ }
376
+
377
+ function isGgufPath(filePath) {
378
+ return String(filePath || '').toLowerCase().endsWith('.gguf');
379
+ }
380
+
381
+ async function getPathStats(targetPath, label) {
382
+ try {
383
+ return await fs.stat(targetPath);
384
+ } catch (error) {
385
+ if (error?.code === 'ENOENT') {
386
+ throw new Error(`node convert: ${label} does not exist: ${targetPath}`);
387
+ }
388
+ const message = error instanceof Error ? error.message : String(error);
389
+ throw new Error(`node convert: failed to stat ${label} "${targetPath}": ${message}`);
390
+ }
391
+ }
392
+
393
+ async function readOptionalJson(filePath) {
394
+ try {
395
+ const text = await fs.readFile(filePath, 'utf8');
396
+ return JSON.parse(text);
397
+ } catch {
398
+ return null;
399
+ }
400
+ }
401
+
402
+ async function fileExists(filePath) {
403
+ try {
404
+ await fs.access(filePath);
405
+ return true;
406
+ } catch {
407
+ return false;
408
+ }
409
+ }
410
+
411
+ async function resolveGgufPathFromDirectory(inputDir) {
412
+ const entries = await fs.readdir(inputDir, { withFileTypes: true });
413
+ const ggufFiles = entries
414
+ .filter((entry) => entry.isFile() && isGgufPath(entry.name))
415
+ .map((entry) => entry.name)
416
+ .sort((a, b) => a.localeCompare(b));
417
+
418
+ if (ggufFiles.length === 0) {
419
+ return null;
420
+ }
421
+ if (ggufFiles.length > 1) {
422
+ throw new Error(
423
+ `node convert: multiple GGUF files found in "${inputDir}": ${ggufFiles.join(', ')}. ` +
424
+ 'Pass a .gguf file path directly.'
425
+ );
426
+ }
427
+ return path.join(inputDir, ggufFiles[0]);
428
+ }
429
+
430
+ function createFileRangeReader() {
431
+ const handleMap = new Map();
432
+
433
+ async function getHandleEntry(filePath) {
434
+ const existingPromise = handleMap.get(filePath);
435
+ if (existingPromise) {
436
+ return existingPromise;
437
+ }
438
+ const openPromise = (async () => {
439
+ const fd = await fs.open(filePath, 'r');
440
+ try {
441
+ const stats = await fd.stat();
442
+ return {
443
+ fd,
444
+ size: Number(stats.size),
445
+ };
446
+ } catch (error) {
447
+ await fd.close().catch(() => {});
448
+ throw error;
449
+ }
450
+ })();
451
+ handleMap.set(filePath, openPromise);
452
+ try {
453
+ return await openPromise;
454
+ } catch (error) {
455
+ if (handleMap.get(filePath) === openPromise) {
456
+ handleMap.delete(filePath);
457
+ }
458
+ throw error;
459
+ }
460
+ }
461
+
462
+ return {
463
+ async readRange(filePath, offset, length) {
464
+ if (!Number.isFinite(offset) || !Number.isFinite(length) || length <= 0) {
465
+ return new ArrayBuffer(0);
466
+ }
467
+
468
+ const entry = await getHandleEntry(filePath);
469
+ const start = Math.max(0, Math.floor(offset));
470
+ const end = Math.min(entry.size, start + Math.floor(length));
471
+ if (end <= start) {
472
+ return new ArrayBuffer(0);
473
+ }
474
+
475
+ const out = Buffer.allocUnsafe(end - start);
476
+ await entry.fd.read(out, 0, out.length, start);
477
+ return out.buffer.slice(out.byteOffset, out.byteOffset + out.byteLength);
478
+ },
479
+ async closeAll() {
480
+ const closes = [];
481
+ for (const entryPromise of handleMap.values()) {
482
+ closes.push(
483
+ Promise.resolve(entryPromise).then((entry) => entry.fd.close())
484
+ );
485
+ }
486
+ handleMap.clear();
487
+ await Promise.allSettled(closes);
488
+ },
489
+ };
490
+ }
491
+
492
+ async function readSafetensorsHeader(filePath, parseSafetensorsHeader, readRange) {
493
+ const headerPrefixBuffer = await readRange(filePath, 0, 8);
494
+ const headerPrefixBytes = new Uint8Array(headerPrefixBuffer);
495
+ if (headerPrefixBytes.byteLength < 8) {
496
+ throw new Error(`Invalid safetensors header prefix for "${filePath}"`);
497
+ }
498
+ const headerSize = Number(new DataView(headerPrefixBuffer).getBigUint64(0, true));
499
+ const headerBuffer = await readRange(filePath, 8, headerSize);
500
+ const fullHeader = new Uint8Array(8 + headerSize);
501
+ fullHeader.set(headerPrefixBytes, 0);
502
+ fullHeader.set(new Uint8Array(headerBuffer), 8);
503
+ return parseSafetensorsHeader(
504
+ fullHeader.buffer.slice(fullHeader.byteOffset, fullHeader.byteOffset + fullHeader.byteLength)
505
+ );
506
+ }
507
+
508
+ async function listRelativeFiles(rootDir, relDir = '', out = []) {
509
+ const currentDir = relDir ? path.join(rootDir, relDir) : rootDir;
510
+ const entries = await fs.readdir(currentDir, { withFileTypes: true });
511
+ for (const entry of entries) {
512
+ const relPath = relDir ? `${relDir}/${entry.name}` : entry.name;
513
+ if (entry.isDirectory()) {
514
+ await listRelativeFiles(rootDir, relPath, out);
515
+ continue;
516
+ }
517
+ out.push(relPath.replace(/\\/g, '/'));
518
+ }
519
+ return out;
520
+ }
521
+
522
+ async function clearExistingShardFiles(outputDir) {
523
+ let entries;
524
+ try {
525
+ entries = await fs.readdir(outputDir, { withFileTypes: true });
526
+ } catch {
527
+ return;
528
+ }
529
+ const shardFiles = entries
530
+ .filter((entry) => entry.isFile() && /^shard_\d{5}\.bin$/i.test(entry.name))
531
+ .map((entry) => path.join(outputDir, entry.name));
532
+ if (shardFiles.length === 0) return;
533
+ await Promise.all(shardFiles.map((filePath) => fs.unlink(filePath)));
534
+ }
535
+
536
+ function createNodeConvertIO(outputDir, options) {
537
+ const hashAlgorithm = options?.hashAlgorithm;
538
+ const computeHash = options?.computeHash;
539
+ const readRange = options?.readRange;
540
+ if (!hashAlgorithm || typeof hashAlgorithm !== 'string') {
541
+ throw new Error('node convert: hashAlgorithm is required.');
542
+ }
543
+ if (typeof computeHash !== 'function') {
544
+ throw new Error('node convert: computeHash(data, algorithm) is required.');
545
+ }
546
+ if (typeof readRange !== 'function') {
547
+ throw new Error('node convert: readRange(filePath, offset, length) is required.');
548
+ }
549
+ return {
550
+ async readTensorData(tensor) {
551
+ return readRange(tensor.sourcePath, tensor.offset, tensor.size);
552
+ },
553
+ async writeShard(index, data) {
554
+ const filename = generateShardFilename(index);
555
+ await fs.writeFile(path.join(outputDir, filename), data);
556
+ return computeHash(data, hashAlgorithm);
557
+ },
558
+ async writeManifest(manifest) {
559
+ await fs.writeFile(
560
+ path.join(outputDir, 'manifest.json'),
561
+ JSON.stringify(manifest, null, 2),
562
+ 'utf8'
563
+ );
564
+ },
565
+ };
566
+ }
567
+
568
+ function toNodeProgress(update) {
569
+ if (!update) return null;
570
+ return {
571
+ stage: update.stage ?? null,
572
+ current: Number.isFinite(update.current) ? update.current : null,
573
+ total: Number.isFinite(update.total) ? update.total : null,
574
+ message: typeof update.message === 'string' ? update.message : null,
575
+ tensorName: typeof update.tensorName === 'string' ? update.tensorName : null,
576
+ tensorBytesCurrent: Number.isFinite(update.tensorBytesCurrent)
577
+ ? update.tensorBytesCurrent
578
+ : null,
579
+ tensorBytesTotal: Number.isFinite(update.tensorBytesTotal)
580
+ ? update.tensorBytesTotal
581
+ : null,
582
+ };
583
+ }
584
+
585
+ function normalizeTokenizerManifest(manifest) {
586
+ if (!manifest?.tokenizer) return manifest;
587
+ const tokenizer = manifest.tokenizer;
588
+ if (tokenizer.type === 'bundled' || tokenizer.type === 'huggingface') {
589
+ tokenizer.file = tokenizer.file ?? 'tokenizer.json';
590
+ }
591
+ if (tokenizer.type === 'sentencepiece') {
592
+ tokenizer.sentencepieceModel = tokenizer.sentencepieceModel ?? 'tokenizer.model';
593
+ }
594
+ return manifest;
595
+ }
596
+
597
+ function createNodeTensorTransformer(options) {
598
+ const pool = options?.pool;
599
+ const execution = options?.execution;
600
+ const transformTensorBytes = options?.transformTensorBytes;
601
+ const resolveTensorTargetQuant = options?.resolveTensorTargetQuant;
602
+ const normalizeStorageQuant = options?.normalizeStorageQuant;
603
+ const shouldQuantize = options?.shouldQuantize;
604
+
605
+ if (!pool || !execution || typeof transformTensorBytes !== 'function') {
606
+ throw new Error('node convert: invalid worker tensor transformer setup.');
607
+ }
608
+
609
+ return async function tensorTransformer(input) {
610
+ const tensor = input?.tensor;
611
+ const tensorData = input?.tensorData;
612
+ const transformContext = input?.transformContext ?? {};
613
+ const reportProgress = typeof input?.reportProgress === 'function'
614
+ ? input.reportProgress
615
+ : null;
616
+
617
+ if (!tensor || !(tensorData instanceof Uint8Array)) {
618
+ throw new Error('node convert: invalid tensor transform input.');
619
+ }
620
+
621
+ const sourceDtype = String(tensor.dtype || '').toUpperCase();
622
+ const sourceQuant = normalizeStorageQuant(sourceDtype);
623
+ const tensorTargetQuant = resolveTensorTargetQuant(
624
+ tensor.name,
625
+ transformContext.targetQuant,
626
+ transformContext.quantizationInfo ?? null
627
+ );
628
+
629
+ const is2D = Array.isArray(tensor.shape) && tensor.shape.length === 2;
630
+ const rows = is2D ? tensor.shape[0] : 0;
631
+ const cols = is2D ? tensor.shape[1] : 0;
632
+ const sourceBytesPerElement = getDtypeBytes(sourceDtype);
633
+ const q4kLayout = String(transformContext.q4kLayout || 'row').trim().toLowerCase() === 'col'
634
+ ? 'col'
635
+ : 'row';
636
+ const canChunkRows = (
637
+ is2D
638
+ && rows > 0
639
+ && cols > 0
640
+ && sourceBytesPerElement != null
641
+ && sourceQuant !== 'q4k'
642
+ && tensorData.byteLength >= execution.rowChunkMinTensorBytes
643
+ && !(tensorTargetQuant === 'q4k' && q4kLayout === 'col')
644
+ );
645
+
646
+ const jobMode = selectRuleValue('converter', 'execution', 'jobMode', {
647
+ workers: execution.effectiveWorkers,
648
+ canChunkRows,
649
+ });
650
+
651
+ if (jobMode !== 'row_chunks' || !canChunkRows) {
652
+ const transformed = await pool.transformTensor(tensor, tensorData, transformContext);
653
+ const normalized = normalizeWorkerTransformResult(transformed, tensor);
654
+ reportProgress?.(tensorData.byteLength, tensorData.byteLength);
655
+ return normalized;
656
+ }
657
+
658
+ const rowChunkRows = execution.rowChunkRows
659
+ ?? selectRuleValue('converter', 'execution', 'rowChunkRows', {
660
+ workers: execution.effectiveWorkers,
661
+ canChunkRows,
662
+ });
663
+ if (!Number.isInteger(rowChunkRows) || rowChunkRows < 1) {
664
+ const transformed = await pool.transformTensor(tensor, tensorData, transformContext);
665
+ const normalized = normalizeWorkerTransformResult(transformed, tensor);
666
+ reportProgress?.(tensorData.byteLength, tensorData.byteLength);
667
+ return normalized;
668
+ }
669
+
670
+ const rowSourceBytes = cols * sourceBytesPerElement;
671
+ if (!Number.isInteger(rowSourceBytes) || rowSourceBytes < 1) {
672
+ const transformed = await pool.transformTensor(tensor, tensorData, transformContext);
673
+ const normalized = normalizeWorkerTransformResult(transformed, tensor);
674
+ reportProgress?.(tensorData.byteLength, tensorData.byteLength);
675
+ return normalized;
676
+ }
677
+
678
+ const forceQuantizeDecision = tensorTargetQuant === 'q4k'
679
+ ? shouldQuantize(tensor.name, tensor.shape, {
680
+ quantizeEmbeddings: Boolean(transformContext.quantizeEmbeddings),
681
+ })
682
+ : null;
683
+
684
+ const chunks = [];
685
+ for (let rowStart = 0; rowStart < rows; rowStart += rowChunkRows) {
686
+ const rowCount = Math.min(rowChunkRows, rows - rowStart);
687
+ const start = rowStart * rowSourceBytes;
688
+ const end = start + (rowCount * rowSourceBytes);
689
+ chunks.push({ rowStart, rowCount, start, end });
690
+ }
691
+
692
+ const maxInFlightJobs = execution.maxInFlightJobs
693
+ ?? selectRuleValue('converter', 'execution', 'maxInFlightJobs', {
694
+ workers: execution.effectiveWorkers,
695
+ });
696
+ const concurrency = Number.isInteger(maxInFlightJobs) && maxInFlightJobs > 0
697
+ ? maxInFlightJobs
698
+ : execution.effectiveWorkers;
699
+
700
+ let processedBytes = 0;
701
+ const chunkResults = await mapWithConcurrency(chunks, concurrency, async (chunk) => {
702
+ const chunkTensorData = tensorData.subarray(chunk.start, chunk.end);
703
+ const chunkTensor = {
704
+ ...tensor,
705
+ shape: [chunk.rowCount, cols],
706
+ };
707
+ const transformed = await pool.transformTensor(chunkTensor, chunkTensorData, {
708
+ ...transformContext,
709
+ forceQuantizeDecision,
710
+ });
711
+ const normalized = normalizeWorkerTransformResult(transformed, chunkTensor);
712
+ processedBytes += chunkTensorData.byteLength;
713
+ reportProgress?.(
714
+ Math.min(processedBytes, tensorData.byteLength),
715
+ tensorData.byteLength
716
+ );
717
+ return normalized;
718
+ });
719
+
720
+ if (chunkResults.length === 0) {
721
+ return transformTensorBytes(tensor, tensorData, transformContext);
722
+ }
723
+
724
+ const outDtype = chunkResults[0].outDtype ?? tensor.dtype;
725
+ const outLayout = chunkResults[0].outLayout ?? null;
726
+ for (const chunkResult of chunkResults) {
727
+ if ((chunkResult.outDtype ?? tensor.dtype) !== outDtype) {
728
+ throw new Error(`node convert: inconsistent chunk dtype for ${tensor.name}.`);
729
+ }
730
+ if ((chunkResult.outLayout ?? null) !== outLayout) {
731
+ throw new Error(`node convert: inconsistent chunk layout for ${tensor.name}.`);
732
+ }
733
+ }
734
+
735
+ const totalOutputBytes = chunkResults.reduce((sum, chunkResult) => (
736
+ sum + chunkResult.tensorData.byteLength
737
+ ), 0);
738
+ const combined = new Uint8Array(totalOutputBytes);
739
+ let outputOffset = 0;
740
+ for (const chunkResult of chunkResults) {
741
+ combined.set(chunkResult.tensorData, outputOffset);
742
+ outputOffset += chunkResult.tensorData.byteLength;
743
+ }
744
+
745
+ return {
746
+ tensorData: combined,
747
+ outDtype,
748
+ outLayout,
749
+ };
750
+ };
751
+ }
752
+
753
+ export async function convertSafetensorsDirectory(options) {
754
+ const inputDir = assertPath(options?.inputDir, 'inputDir');
755
+ const outputDirOverride = readOptionalNonEmptyString(options?.outputDir);
756
+ const converterConfigOverride = normalizeConverterConfigOverride(options?.converterConfig);
757
+ const onProgress = typeof options?.onProgress === 'function' ? options.onProgress : null;
758
+ const inputStats = await getPathStats(inputDir, 'inputDir');
759
+ const isInputDirectory = inputStats.isDirectory();
760
+ const inputGgufPath = (
761
+ inputStats.isFile() && isGgufPath(inputDir)
762
+ ? inputDir
763
+ : (isInputDirectory ? await resolveGgufPathFromDirectory(inputDir) : null)
764
+ );
765
+ const isInputGgufFile = Boolean(inputGgufPath);
766
+
767
+ installNodeFileFetchShim();
768
+ const fileRangeReader = createFileRangeReader();
769
+ const totalTimer = createStageTimer('Total');
770
+ try {
771
+
772
+ const [
773
+ { parseSafetensorsHeader },
774
+ { parseGGUFHeader },
775
+ {
776
+ convertModel,
777
+ extractArchitecture,
778
+ transformTensorBytes,
779
+ resolveTensorTargetQuant,
780
+ normalizeStorageQuant,
781
+ shouldQuantize,
782
+ },
783
+ { parseGGUFModel },
784
+ { resolveConversionPlan, inferSourceWeightQuantization, resolveConvertedModelId },
785
+ { parseDiffusionModel },
786
+ { parseTransformerModel },
787
+ { createConverterConfig, HEADER_READ_SIZE, DEFAULT_CONVERTER_EXECUTION_CONFIG },
788
+ { computeHash },
789
+ ] = await Promise.all([
790
+ import('../formats/safetensors/types.js'),
791
+ import('../formats/gguf/types.js'),
792
+ import('../converter/core.js'),
793
+ import('../converter/parsers/gguf.js'),
794
+ import('../converter/conversion-plan.js'),
795
+ import('../converter/parsers/diffusion.js'),
796
+ import('../converter/parsers/transformer.js'),
797
+ import('../config/schema/index.js'),
798
+ import('../storage/shard-manager.js'),
799
+ ]);
800
+
801
+ const converterConfig = createConverterConfig(converterConfigOverride ?? undefined);
802
+ const executionConfig = normalizeExecutionConfig(
803
+ options?.execution,
804
+ DEFAULT_CONVERTER_EXECUTION_CONFIG
805
+ );
806
+ const executionPlan = resolveExecutionPlan(executionConfig);
807
+ const diffusionIndexPath = isInputDirectory ? path.join(inputDir, 'model_index.json') : null;
808
+ const isDiffusionInput = isInputDirectory && diffusionIndexPath ? await fileExists(diffusionIndexPath) : false;
809
+
810
+ let config = null;
811
+ let tensors = [];
812
+ let architectureHint = '';
813
+ let architecture = null;
814
+ let modelKind = 'transformer';
815
+ let sourceQuantization = null;
816
+ let tokenizerJson = null;
817
+ let tokenizerConfig = null;
818
+ let hasTokenizerModel = false;
819
+ let tokenizerModelPath = null;
820
+ let diffusionAuxFiles = [];
821
+ const parseTimer = createStageTimer('Parse input');
822
+
823
+ if (isDiffusionInput) {
824
+ const relativeFiles = await listRelativeFiles(inputDir);
825
+ const fileSet = new Set(relativeFiles);
826
+ const toArrayBuffer = (buffer) => (
827
+ buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)
828
+ );
829
+ const parsedDiffusion = await parseDiffusionModel({
830
+ onProgress,
831
+ findExistingSuffix(suffixes) {
832
+ for (const suffix of suffixes || []) {
833
+ if (fileSet.has(suffix)) return suffix;
834
+ }
835
+ return null;
836
+ },
837
+ async readJson(suffix, label = 'json') {
838
+ if (!fileSet.has(suffix)) {
839
+ throw new Error(`Missing ${label} (${suffix})`);
840
+ }
841
+ const text = await fs.readFile(path.join(inputDir, suffix), 'utf8');
842
+ try {
843
+ return JSON.parse(text);
844
+ } catch (error) {
845
+ const message = error instanceof Error ? error.message : String(error);
846
+ throw new Error(`Invalid JSON in ${label} (${suffix}): ${message}`);
847
+ }
848
+ },
849
+ async readText(suffix, label = 'text') {
850
+ if (!fileSet.has(suffix)) {
851
+ throw new Error(`Missing ${label} (${suffix})`);
852
+ }
853
+ return fs.readFile(path.join(inputDir, suffix), 'utf8');
854
+ },
855
+ async readBinary(suffix, label = 'binary') {
856
+ if (!fileSet.has(suffix)) {
857
+ throw new Error(`Missing ${label} (${suffix})`);
858
+ }
859
+ const bytes = await fs.readFile(path.join(inputDir, suffix));
860
+ return toArrayBuffer(bytes);
861
+ },
862
+ async parseSingleSafetensors(suffix) {
863
+ if (!fileSet.has(suffix)) {
864
+ throw new Error(`Missing safetensors file (${suffix})`);
865
+ }
866
+ const fullPath = path.join(inputDir, suffix);
867
+ const parsed = await readSafetensorsHeader(
868
+ fullPath,
869
+ parseSafetensorsHeader,
870
+ fileRangeReader.readRange
871
+ );
872
+ return {
873
+ tensors: parsed.tensors.map((tensor) => ({
874
+ ...tensor,
875
+ sourcePath: fullPath,
876
+ })),
877
+ };
878
+ },
879
+ async parseShardedSafetensors(indexSuffix, indexJson, componentId) {
880
+ const weightMap = indexJson?.weight_map || {};
881
+ const shardNames = Array.from(new Set(Object.values(weightMap)));
882
+ if (shardNames.length === 0) {
883
+ throw new Error(`No shards listed in ${componentId} index file`);
884
+ }
885
+ const baseDir = indexSuffix.includes('/')
886
+ ? indexSuffix.split('/').slice(0, -1).join('/')
887
+ : '';
888
+ const shardSuffixes = shardNames.map((name) => (baseDir ? `${baseDir}/${name}` : name));
889
+ const missing = shardSuffixes.filter((suffix) => !fileSet.has(suffix));
890
+ if (missing.length > 0) {
891
+ throw new Error(
892
+ `Missing shard files for ${componentId} (${shardSuffixes.length - missing.length}/${shardSuffixes.length} found)`
893
+ );
894
+ }
895
+ const parsedShards = await Promise.all(
896
+ shardSuffixes.map(async (shardSuffix) => {
897
+ const fullPath = path.join(inputDir, shardSuffix);
898
+ const parsed = await readSafetensorsHeader(
899
+ fullPath,
900
+ parseSafetensorsHeader,
901
+ fileRangeReader.readRange
902
+ );
903
+ return {
904
+ fullPath,
905
+ tensors: parsed.tensors,
906
+ };
907
+ })
908
+ );
909
+ const tensorsOut = [];
910
+ for (const parsedShard of parsedShards) {
911
+ for (const tensor of parsedShard.tensors) {
912
+ tensorsOut.push({
913
+ ...tensor,
914
+ sourcePath: parsedShard.fullPath,
915
+ });
916
+ }
917
+ }
918
+ return { tensors: tensorsOut };
919
+ },
920
+ });
921
+ config = parsedDiffusion.config;
922
+ tensors = parsedDiffusion.tensors;
923
+ architectureHint = 'diffusion';
924
+ modelKind = 'diffusion';
925
+ diffusionAuxFiles = parsedDiffusion.auxFiles ?? [];
926
+ } else if (isInputGgufFile) {
927
+ const ggufPath = inputGgufPath;
928
+ const ggufStats = await getPathStats(ggufPath, 'GGUF file');
929
+ const ggufSource = {
930
+ sourceType: 'node-file',
931
+ name: path.basename(ggufPath),
932
+ size: ggufStats.size,
933
+ file: {
934
+ name: path.basename(ggufPath),
935
+ size: ggufStats.size,
936
+ },
937
+ async readRange(offset, length) {
938
+ return fileRangeReader.readRange(ggufPath, offset, length);
939
+ },
940
+ };
941
+ const normalizeTensorSource = (input) => {
942
+ if (input && typeof input.readRange === 'function' && Number.isFinite(input.size)) {
943
+ return input;
944
+ }
945
+ return ggufSource;
946
+ };
947
+ const parseGGUFHeaderFromSource = async (source) => {
948
+ const resolved = normalizeTensorSource(source);
949
+ const readSize = Math.min(resolved.size, HEADER_READ_SIZE);
950
+ const buffer = await resolved.readRange(0, readSize);
951
+ const info = parseGGUFHeader(buffer);
952
+ return {
953
+ ...info,
954
+ fileSize: resolved.size,
955
+ };
956
+ };
957
+ const parsedGGUF = await parseGGUFModel({
958
+ file: ggufSource,
959
+ parseGGUFHeaderFromSource,
960
+ normalizeTensorSource,
961
+ onProgress(update) {
962
+ onProgress?.(toNodeProgress({
963
+ stage: update?.stage ?? 'parsing',
964
+ message: update?.message ?? null,
965
+ }));
966
+ },
967
+ signal: null,
968
+ });
969
+ config = parsedGGUF.config;
970
+ tensors = parsedGGUF.tensors.map((tensor) => ({
971
+ ...tensor,
972
+ sourcePath: ggufPath,
973
+ }));
974
+ architectureHint = parsedGGUF.architecture;
975
+ sourceQuantization = parsedGGUF.quantization ?? null;
976
+ architecture = extractArchitecture({}, parsedGGUF.config || {});
977
+ } else {
978
+ if (!isInputDirectory) {
979
+ throw new Error(
980
+ 'node convert: inputDir must be a directory containing safetensors files or a .gguf file path.'
981
+ );
982
+ }
983
+ const parsedTransformer = await parseTransformerModel({
984
+ async readJson(suffix, label = 'json') {
985
+ const filePath = path.join(inputDir, suffix);
986
+ let text;
987
+ try {
988
+ text = await fs.readFile(filePath, 'utf8');
989
+ } catch (error) {
990
+ if (error?.code === 'ENOENT') {
991
+ throw new Error(`Missing ${label} (${suffix})`);
992
+ }
993
+ const message = error instanceof Error ? error.message : String(error);
994
+ throw new Error(`Failed to read ${label} (${suffix}): ${message}`);
995
+ }
996
+ try {
997
+ return JSON.parse(text);
998
+ } catch (error) {
999
+ const message = error instanceof Error ? error.message : String(error);
1000
+ throw new Error(`Invalid JSON in ${label} (${suffix}): ${message}`);
1001
+ }
1002
+ },
1003
+ async fileExists(suffix) {
1004
+ return fileExists(path.join(inputDir, suffix));
1005
+ },
1006
+ async loadSingleSafetensors(suffix) {
1007
+ const filePath = path.join(inputDir, suffix);
1008
+ const parsed = await readSafetensorsHeader(
1009
+ filePath,
1010
+ parseSafetensorsHeader,
1011
+ fileRangeReader.readRange
1012
+ );
1013
+ return parsed.tensors.map((tensor) => ({
1014
+ ...tensor,
1015
+ sourcePath: filePath,
1016
+ }));
1017
+ },
1018
+ async loadShardedSafetensors(indexJson) {
1019
+ const shardFiles = [...new Set(Object.values(indexJson.weight_map || {}))];
1020
+ const parsedShards = await Promise.all(
1021
+ shardFiles.map(async (shardFile) => {
1022
+ const shardPath = path.join(inputDir, shardFile);
1023
+ const parsed = await readSafetensorsHeader(
1024
+ shardPath,
1025
+ parseSafetensorsHeader,
1026
+ fileRangeReader.readRange
1027
+ );
1028
+ return {
1029
+ shardPath,
1030
+ tensors: parsed.tensors,
1031
+ };
1032
+ })
1033
+ );
1034
+ const tensorsOut = [];
1035
+ for (const parsedShard of parsedShards) {
1036
+ for (const tensor of parsedShard.tensors) {
1037
+ tensorsOut.push({ ...tensor, sourcePath: parsedShard.shardPath });
1038
+ }
1039
+ }
1040
+ return tensorsOut;
1041
+ },
1042
+ });
1043
+ config = parsedTransformer.config;
1044
+ tensors = parsedTransformer.tensors;
1045
+ architectureHint = parsedTransformer.architectureHint;
1046
+ architecture = extractArchitecture(config, null);
1047
+ const tokenizerJsonPath = path.join(inputDir, 'tokenizer.json');
1048
+ tokenizerModelPath = path.join(inputDir, 'tokenizer.model');
1049
+ const tokenizerConfigPath = path.join(inputDir, 'tokenizer_config.json');
1050
+ tokenizerJson = await readOptionalJson(tokenizerJsonPath);
1051
+ tokenizerConfig = await readOptionalJson(tokenizerConfigPath);
1052
+ hasTokenizerModel = await fileExists(tokenizerModelPath);
1053
+ }
1054
+ parseTimer.stop(`${modelKind} tensors=${tensors.length}`);
1055
+
1056
+ sortTensorsByDeterministicLocality(tensors);
1057
+
1058
+ const weightOverride = converterConfig.quantization?.weights ?? null;
1059
+ sourceQuantization = sourceQuantization || weightOverride || inferSourceWeightQuantization(tensors);
1060
+ const plan = resolveConversionPlan({
1061
+ rawConfig: config,
1062
+ tensors,
1063
+ converterConfig,
1064
+ sourceQuantization,
1065
+ modelKind,
1066
+ architectureHint,
1067
+ architectureConfig: architecture,
1068
+ includePresetOverrideHint: modelKind === 'transformer',
1069
+ });
1070
+ const resolvedModelType = plan.modelType;
1071
+ const targetQuantization = plan.manifestQuantization;
1072
+ const quantizationInfo = plan.quantizationInfo;
1073
+ const inference = plan.manifestInference;
1074
+ const presetId = plan.presetId;
1075
+ const explicitModelId = resolveConfiguredModelId(options?.modelId, converterConfig);
1076
+ if (!explicitModelId) {
1077
+ throw new Error(
1078
+ 'node convert: modelId is required. ' +
1079
+ 'Set converterConfig.output.modelBaseId.'
1080
+ );
1081
+ }
1082
+ const modelId = resolveConvertedModelId({
1083
+ explicitModelId,
1084
+ converterConfig,
1085
+ detectedModelId: explicitModelId,
1086
+ quantizationInfo,
1087
+ });
1088
+ if (!modelId) {
1089
+ throw new Error('node convert: failed to resolve modelId from converterConfig.output.modelBaseId.');
1090
+ }
1091
+ const outputDir = resolveOutputDir(outputDirOverride, converterConfig, modelId);
1092
+
1093
+ await fs.mkdir(outputDir, { recursive: true });
1094
+ await clearExistingShardFiles(outputDir);
1095
+
1096
+ const model = {
1097
+ name: path.basename(inputDir),
1098
+ modelId,
1099
+ tensors: tensors.map((tensor) => ({
1100
+ name: tensor.name,
1101
+ shape: tensor.shape,
1102
+ dtype: tensor.dtype,
1103
+ size: tensor.size,
1104
+ offset: tensor.offset,
1105
+ sourcePath: tensor.sourcePath,
1106
+ })),
1107
+ config,
1108
+ architecture: architectureHint || 'unknown',
1109
+ quantization: targetQuantization,
1110
+ tokenizerJson,
1111
+ tokenizerConfig,
1112
+ tokenizerModel: hasTokenizerModel ? 'tokenizer.model' : null,
1113
+ };
1114
+
1115
+ const io = createNodeConvertIO(outputDir, {
1116
+ hashAlgorithm: converterConfig.manifest.hashAlgorithm,
1117
+ computeHash,
1118
+ readRange: fileRangeReader.readRange,
1119
+ });
1120
+ const manifestArchitecture = modelKind === 'diffusion' ? 'diffusion' : architecture;
1121
+ let workerPool = null;
1122
+ let workerTensorTransformer = null;
1123
+ let gpuTensorTransformer = null;
1124
+ let tensorTransformer = null;
1125
+ let result = null;
1126
+ try {
1127
+ if (executionPlan.useGpuCast) {
1128
+ const gpuRuntime = await loadNodeGpuCastRuntime();
1129
+ gpuTensorTransformer = createNodeGpuTensorTransformer({
1130
+ runtime: gpuRuntime,
1131
+ gpuCastMinTensorBytes: executionPlan.gpuCastMinTensorBytes,
1132
+ resolveTensorTargetQuant,
1133
+ });
1134
+ }
1135
+ if (executionPlan.effectiveWorkers > 1) {
1136
+ workerPool = new NodeConvertWorkerPool({ size: executionPlan.effectiveWorkers });
1137
+ workerTensorTransformer = createNodeTensorTransformer({
1138
+ pool: workerPool,
1139
+ execution: executionPlan,
1140
+ transformTensorBytes,
1141
+ resolveTensorTargetQuant,
1142
+ normalizeStorageQuant,
1143
+ shouldQuantize,
1144
+ });
1145
+ }
1146
+ if (gpuTensorTransformer || workerTensorTransformer) {
1147
+ tensorTransformer = async (input) => {
1148
+ if (gpuTensorTransformer) {
1149
+ const gpuResult = await gpuTensorTransformer(input);
1150
+ if (gpuResult) {
1151
+ return gpuResult;
1152
+ }
1153
+ }
1154
+ if (workerTensorTransformer) {
1155
+ return workerTensorTransformer(input);
1156
+ }
1157
+ const tensor = input?.tensor;
1158
+ const tensorData = input?.tensorData;
1159
+ if (!tensor || !(tensorData instanceof Uint8Array)) {
1160
+ throw new Error('node convert: invalid tensor transform input.');
1161
+ }
1162
+ return transformTensorBytes(tensor, tensorData, input?.transformContext ?? {});
1163
+ };
1164
+ }
1165
+ onProgress?.(toNodeProgress({
1166
+ stage: 'writing',
1167
+ message: (
1168
+ `Convert execution workers: requested=${executionPlan.requestedWorkers}, ` +
1169
+ `effective=${executionPlan.effectiveWorkers}, available=${executionPlan.availableWorkers}, ` +
1170
+ `gpuCast=${executionPlan.useGpuCast ? 'on' : 'off'}`
1171
+ ),
1172
+ }));
1173
+
1174
+ const convertTimer = createStageTimer('Convert tensors');
1175
+ result = await convertModel(model, io, {
1176
+ modelId,
1177
+ modelType: resolvedModelType,
1178
+ quantization: targetQuantization,
1179
+ quantizationInfo,
1180
+ architecture: manifestArchitecture,
1181
+ inference,
1182
+ converterConfig,
1183
+ tensorTransformer,
1184
+ onProgress(update) {
1185
+ onProgress?.(toNodeProgress(update));
1186
+ },
1187
+ });
1188
+ convertTimer.stop(`tensors=${result.tensorCount}, shards=${result.shardCount}`);
1189
+ } finally {
1190
+ if (workerPool) {
1191
+ await workerPool.close();
1192
+ }
1193
+ }
1194
+
1195
+ if (tokenizerJson) {
1196
+ await fs.writeFile(path.join(outputDir, 'tokenizer.json'), JSON.stringify(tokenizerJson), 'utf8');
1197
+ }
1198
+ if (hasTokenizerModel && tokenizerModelPath) {
1199
+ await fs.copyFile(tokenizerModelPath, path.join(outputDir, 'tokenizer.model'));
1200
+ }
1201
+ if (diffusionAuxFiles.length > 0) {
1202
+ for (const asset of diffusionAuxFiles) {
1203
+ const outPath = path.join(outputDir, asset.name);
1204
+ if (typeof asset.data === 'string') {
1205
+ await fs.writeFile(outPath, asset.data, 'utf8');
1206
+ } else {
1207
+ await fs.writeFile(outPath, Buffer.from(asset.data));
1208
+ }
1209
+ }
1210
+ }
1211
+
1212
+ normalizeTokenizerManifest(result.manifest);
1213
+ await io.writeManifest(result.manifest);
1214
+
1215
+ return {
1216
+ manifest: result.manifest,
1217
+ shardCount: result.shardCount,
1218
+ tensorCount: result.tensorCount,
1219
+ presetId,
1220
+ modelType: resolvedModelType,
1221
+ outputDir,
1222
+ };
1223
+ } finally {
1224
+ await fileRangeReader.closeAll();
1225
+ totalTimer.stop();
1226
+ }
1227
+ }