@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,700 @@
1
+ /**
2
+ * Binary format for `cache/voice-preset-*.bin`.
3
+ *
4
+ * Two versions are supported:
5
+ *
6
+ * v1 (`magic='ELZ1', version=1`) — legacy two-section layout used by the
7
+ * initial Kokoro-style placeholder. Carries a Float32 speaker embedding +
8
+ * a phrase-cache seed list. Still read for back-compat (older bundles only
9
+ * contain v1).
10
+ *
11
+ * v2 (`magic='ELZ1', version=2`) — superset adopted for the OmniVoice
12
+ * freeze. Adds three OmniVoice-specific sections that the v1 layout had
13
+ * no room for: pre-encoded `ref_audio_tokens` (int32, shape
14
+ * `[K, ref_T]`), a UTF-8 `ref_text` transcript of the reference clip, and
15
+ * a closed-vocabulary `instruct` string (the resolved VoiceDesign
16
+ * attributes). v2 readers handle v1 files transparently (the new sections
17
+ * default to empty). A v1 reader applied to a v2 file fails fast on
18
+ * `truncated-header` because the v2 header is larger.
19
+ *
20
+ * Layout (little-endian throughout):
21
+ *
22
+ * v1 header (24 bytes):
23
+ * +0 4 bytes magic 'ELZ1' (0x315A4C45)
24
+ * +4 4 bytes format version (uint32) — 1
25
+ * +8 4 bytes speaker embedding offset (uint32)
26
+ * +12 4 bytes speaker embedding byte length (uint32)
27
+ * +16 4 bytes phrase cache seed offset (uint32)
28
+ * +20 4 bytes phrase cache seed byte length (uint32)
29
+ *
30
+ * v2 header (64 bytes — additive, all section descriptors are
31
+ * `(offset:uint32, length:uint32)` pairs):
32
+ * +0 4 bytes magic 'ELZ1' (0x315A4C45)
33
+ * +4 4 bytes format version (uint32) — 2
34
+ * +8 4 bytes speaker embedding offset
35
+ * +12 4 bytes speaker embedding byte length
36
+ * +16 4 bytes phrase cache seed offset
37
+ * +20 4 bytes phrase cache seed byte length
38
+ * +24 4 bytes ref_audio_tokens offset
39
+ * +28 4 bytes ref_audio_tokens byte length
40
+ * +32 4 bytes ref_text offset
41
+ * +36 4 bytes ref_text byte length
42
+ * +40 4 bytes instruct offset
43
+ * +44 4 bytes instruct byte length
44
+ * +48 4 bytes metadata offset
45
+ * +52 4 bytes metadata byte length
46
+ * +56 4 bytes reserved (must be 0)
47
+ * +60 4 bytes reserved (must be 0)
48
+ *
49
+ * `ref_audio_tokens` payload (v2):
50
+ * +0 4 bytes K — codebook count (uint32, OmniVoice = 8)
51
+ * +4 4 bytes ref_T — frames per codebook (uint32)
52
+ * +8 ... int32 LE codebook samples, row-major shape `[K, ref_T]`
53
+ *
54
+ * `ref_text` payload (v2): raw UTF-8 bytes (no NUL terminator).
55
+ * `instruct` payload (v2): raw UTF-8 bytes (closed VoiceDesign vocabulary).
56
+ * `metadata` payload (v2): raw UTF-8 JSON bytes (codec sha256, corpus
57
+ * hash, etc.); the runtime never relies on
58
+ * metadata for correctness.
59
+ *
60
+ * Phrase cache seed payload (v1 + v2, identical):
61
+ * uint32 LE N (phrase count)
62
+ * for each phrase:
63
+ * uint16 LE text_byte_len
64
+ * uint8[] canonicalized text (UTF-8)
65
+ * uint32 LE sample_rate
66
+ * uint32 LE pcm_byte_len
67
+ * uint8[] PCM (Float32 LE samples)
68
+ *
69
+ * Per-section invariants:
70
+ * - Section bounds may not overlap the header.
71
+ * - Section bounds must fit within the file length.
72
+ * - A `length=0` section is allowed (means "absent"); the corresponding
73
+ * output field is an empty `Float32Array` / `Int32Array` / empty string.
74
+ * - `embedding.length % 4 == 0` (Float32).
75
+ * - `ref_audio_tokens.length` ≥ 8 (the two header words K, ref_T) and the
76
+ * payload is `8 + K*ref_T*4` bytes.
77
+ */
78
+
79
+ export const VOICE_PRESET_MAGIC = 0x315a4c45; // 'ELZ1'
80
+
81
+ /** Header byte counts. */
82
+ export const VOICE_PRESET_HEADER_BYTES_V1 = 24;
83
+ export const VOICE_PRESET_HEADER_BYTES_V2 = 64;
84
+
85
+ /** Supported format versions. v2 is the canonical write path. */
86
+ export const VOICE_PRESET_VERSION_V1 = 1;
87
+ export const VOICE_PRESET_VERSION_V2 = 2;
88
+ export const VOICE_PRESET_VERSION_CURRENT = VOICE_PRESET_VERSION_V2;
89
+
90
+ export interface VoicePresetSeedPhrase {
91
+ /** Canonicalized text (lowercase, single-spaced, trimmed). */
92
+ text: string;
93
+ sampleRate: number;
94
+ pcm: Float32Array;
95
+ }
96
+
97
+ /**
98
+ * OmniVoice reference-audio-tokens payload. `K` is the codebook count (=8 for
99
+ * OmniVoice / HiggsAudioV2) and `refT` is the number of frames per codebook.
100
+ * `tokens` is row-major: codebook `k`, frame `t` is at `tokens[k*refT + t]`.
101
+ * An empty payload (refT=0, K=0, tokens length 0) is valid and means "no
102
+ * reference audio bound to this preset" (instruct-only voice).
103
+ */
104
+ export interface RefAudioTokens {
105
+ K: number;
106
+ refT: number;
107
+ tokens: Int32Array;
108
+ }
109
+
110
+ export interface VoicePresetFile {
111
+ version: number;
112
+ embedding: Float32Array;
113
+ phrases: ReadonlyArray<VoicePresetSeedPhrase>;
114
+ /** v2 only — empty for v1 files. */
115
+ refAudioTokens: RefAudioTokens;
116
+ /** v2 only — empty for v1 files. */
117
+ refText: string;
118
+ /** v2 only — empty for v1 files. */
119
+ instruct: string;
120
+ /** v2 only — parsed JSON object, empty `{}` for v1 files. */
121
+ metadata: Record<string, unknown>;
122
+ }
123
+
124
+ export class VoicePresetFormatError extends Error {
125
+ constructor(
126
+ message: string,
127
+ readonly code:
128
+ | "bad-magic"
129
+ | "bad-version"
130
+ | "truncated-header"
131
+ | "truncated-section"
132
+ | "bad-section-bounds"
133
+ | "bad-phrase-record"
134
+ | "bad-embedding-length"
135
+ | "bad-ref-tokens"
136
+ | "bad-metadata",
137
+ ) {
138
+ super(message);
139
+ this.name = "VoicePresetFormatError";
140
+ }
141
+ }
142
+
143
+ interface SectionView {
144
+ offset: number;
145
+ length: number;
146
+ }
147
+
148
+ interface ParsedHeader {
149
+ version: number;
150
+ headerBytes: number;
151
+ embedding: SectionView;
152
+ phrases: SectionView;
153
+ refAudioTokens: SectionView;
154
+ refText: SectionView;
155
+ instruct: SectionView;
156
+ metadata: SectionView;
157
+ }
158
+
159
+ const EMPTY_SECTION: SectionView = Object.freeze({ offset: 0, length: 0 });
160
+
161
+ function checkSectionBounds(
162
+ sec: SectionView,
163
+ fileLen: number,
164
+ headerBytes: number,
165
+ ): void {
166
+ if (sec.length === 0) return;
167
+ if (sec.offset < headerBytes) {
168
+ throw new VoicePresetFormatError(
169
+ `voice preset section overlaps header (offset=${sec.offset} < header=${headerBytes})`,
170
+ "bad-section-bounds",
171
+ );
172
+ }
173
+ if (sec.offset + sec.length > fileLen) {
174
+ throw new VoicePresetFormatError(
175
+ `voice preset section bounds exceed file length`,
176
+ "bad-section-bounds",
177
+ );
178
+ }
179
+ }
180
+
181
+ function readHeader(view: DataView): ParsedHeader {
182
+ if (view.byteLength < VOICE_PRESET_HEADER_BYTES_V1) {
183
+ throw new VoicePresetFormatError(
184
+ `voice preset file truncated: header needs ${VOICE_PRESET_HEADER_BYTES_V1} bytes, got ${view.byteLength}`,
185
+ "truncated-header",
186
+ );
187
+ }
188
+ const magic = view.getUint32(0, true);
189
+ if (magic !== VOICE_PRESET_MAGIC) {
190
+ throw new VoicePresetFormatError(
191
+ `voice preset bad magic: expected 0x${VOICE_PRESET_MAGIC.toString(16)}, got 0x${magic.toString(16)}`,
192
+ "bad-magic",
193
+ );
194
+ }
195
+ const version = view.getUint32(4, true);
196
+ if (
197
+ version !== VOICE_PRESET_VERSION_V1 &&
198
+ version !== VOICE_PRESET_VERSION_V2
199
+ ) {
200
+ throw new VoicePresetFormatError(
201
+ `voice preset unsupported version: ${version} (this build supports 1 and 2)`,
202
+ "bad-version",
203
+ );
204
+ }
205
+ const headerBytes =
206
+ version === VOICE_PRESET_VERSION_V2
207
+ ? VOICE_PRESET_HEADER_BYTES_V2
208
+ : VOICE_PRESET_HEADER_BYTES_V1;
209
+ if (view.byteLength < headerBytes) {
210
+ throw new VoicePresetFormatError(
211
+ `voice preset file truncated: v${version} header needs ${headerBytes} bytes, got ${view.byteLength}`,
212
+ "truncated-header",
213
+ );
214
+ }
215
+
216
+ const embedding: SectionView = {
217
+ offset: view.getUint32(8, true),
218
+ length: view.getUint32(12, true),
219
+ };
220
+ const phrases: SectionView = {
221
+ offset: view.getUint32(16, true),
222
+ length: view.getUint32(20, true),
223
+ };
224
+
225
+ let refAudioTokens = EMPTY_SECTION;
226
+ let refText = EMPTY_SECTION;
227
+ let instruct = EMPTY_SECTION;
228
+ let metadata = EMPTY_SECTION;
229
+ if (version === VOICE_PRESET_VERSION_V2) {
230
+ refAudioTokens = {
231
+ offset: view.getUint32(24, true),
232
+ length: view.getUint32(28, true),
233
+ };
234
+ refText = {
235
+ offset: view.getUint32(32, true),
236
+ length: view.getUint32(36, true),
237
+ };
238
+ instruct = {
239
+ offset: view.getUint32(40, true),
240
+ length: view.getUint32(44, true),
241
+ };
242
+ metadata = {
243
+ offset: view.getUint32(48, true),
244
+ length: view.getUint32(52, true),
245
+ };
246
+ // Reserved words must be zero — fail closed on accidental reuse.
247
+ const r0 = view.getUint32(56, true);
248
+ const r1 = view.getUint32(60, true);
249
+ if (r0 !== 0 || r1 !== 0) {
250
+ throw new VoicePresetFormatError(
251
+ `voice preset v2 reserved header words must be 0 (got ${r0}, ${r1})`,
252
+ "bad-section-bounds",
253
+ );
254
+ }
255
+ }
256
+
257
+ const fileLen = view.byteLength;
258
+ checkSectionBounds(embedding, fileLen, headerBytes);
259
+ checkSectionBounds(phrases, fileLen, headerBytes);
260
+ checkSectionBounds(refAudioTokens, fileLen, headerBytes);
261
+ checkSectionBounds(refText, fileLen, headerBytes);
262
+ checkSectionBounds(instruct, fileLen, headerBytes);
263
+ checkSectionBounds(metadata, fileLen, headerBytes);
264
+
265
+ return {
266
+ version,
267
+ headerBytes,
268
+ embedding,
269
+ phrases,
270
+ refAudioTokens,
271
+ refText,
272
+ instruct,
273
+ metadata,
274
+ };
275
+ }
276
+
277
+ function copyFloat32(
278
+ bytes: Uint8Array,
279
+ /** Offset relative to `bytes` (i.e. relative to bytes.byteOffset). */
280
+ relativeOffset: number,
281
+ byteLength: number,
282
+ ): Float32Array {
283
+ // The source byte offset is not guaranteed to be 4-aligned in the file
284
+ // buffer, so we copy raw bytes into a fresh ArrayBuffer first.
285
+ const aligned = new Uint8Array(byteLength);
286
+ aligned.set(bytes.subarray(relativeOffset, relativeOffset + byteLength));
287
+ return new Float32Array(aligned.buffer, 0, byteLength / 4);
288
+ }
289
+
290
+ function copyInt32(
291
+ bytes: Uint8Array,
292
+ relativeOffset: number,
293
+ byteLength: number,
294
+ ): Int32Array {
295
+ const aligned = new Uint8Array(byteLength);
296
+ aligned.set(bytes.subarray(relativeOffset, relativeOffset + byteLength));
297
+ return new Int32Array(aligned.buffer, 0, byteLength / 4);
298
+ }
299
+
300
+ function readEmbedding(bytes: Uint8Array, sec: SectionView): Float32Array {
301
+ if (sec.length === 0) return new Float32Array(0);
302
+ if (sec.length % 4 !== 0) {
303
+ throw new VoicePresetFormatError(
304
+ `voice preset embedding length ${sec.length} is not a multiple of 4`,
305
+ "bad-embedding-length",
306
+ );
307
+ }
308
+ return copyFloat32(bytes, sec.offset, sec.length);
309
+ }
310
+
311
+ function readRefAudioTokens(
312
+ bytes: Uint8Array,
313
+ sec: SectionView,
314
+ ): RefAudioTokens {
315
+ if (sec.length === 0) {
316
+ return { K: 0, refT: 0, tokens: new Int32Array(0) };
317
+ }
318
+ if (sec.length < 8) {
319
+ throw new VoicePresetFormatError(
320
+ `voice preset ref_audio_tokens section truncated (need ≥ 8 bytes, got ${sec.length})`,
321
+ "bad-ref-tokens",
322
+ );
323
+ }
324
+ const view = new DataView(
325
+ bytes.buffer,
326
+ bytes.byteOffset + sec.offset,
327
+ sec.length,
328
+ );
329
+ const K = view.getUint32(0, true);
330
+ const refT = view.getUint32(4, true);
331
+ const tokenBytes = sec.length - 8;
332
+ if (tokenBytes % 4 !== 0) {
333
+ throw new VoicePresetFormatError(
334
+ `voice preset ref_audio_tokens payload bytes ${tokenBytes} is not a multiple of 4`,
335
+ "bad-ref-tokens",
336
+ );
337
+ }
338
+ const expected = K * refT * 4;
339
+ if (tokenBytes !== expected) {
340
+ throw new VoicePresetFormatError(
341
+ `voice preset ref_audio_tokens shape mismatch: K=${K}, ref_T=${refT}, expected ${expected} bytes, got ${tokenBytes}`,
342
+ "bad-ref-tokens",
343
+ );
344
+ }
345
+ const tokens =
346
+ tokenBytes === 0
347
+ ? new Int32Array(0)
348
+ : copyInt32(bytes, sec.offset + 8, tokenBytes);
349
+ return { K, refT, tokens };
350
+ }
351
+
352
+ function readUtf8(bytes: Uint8Array, sec: SectionView): string {
353
+ if (sec.length === 0) return "";
354
+ const slice = bytes.subarray(sec.offset, sec.offset + sec.length);
355
+ return new TextDecoder("utf-8", { fatal: true }).decode(slice);
356
+ }
357
+
358
+ function readMetadata(
359
+ bytes: Uint8Array,
360
+ sec: SectionView,
361
+ ): Record<string, unknown> {
362
+ if (sec.length === 0) return {};
363
+ const text = readUtf8(bytes, sec);
364
+ let parsed: unknown;
365
+ try {
366
+ parsed = JSON.parse(text);
367
+ } catch (err) {
368
+ throw new VoicePresetFormatError(
369
+ `voice preset metadata is not valid JSON: ${(err as Error).message}`,
370
+ "bad-metadata",
371
+ );
372
+ }
373
+ if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
374
+ throw new VoicePresetFormatError(
375
+ `voice preset metadata must be a JSON object`,
376
+ "bad-metadata",
377
+ );
378
+ }
379
+ return parsed as Record<string, unknown>;
380
+ }
381
+
382
+ function readPhrases(
383
+ bytes: Uint8Array,
384
+ sec: SectionView,
385
+ ): VoicePresetSeedPhrase[] {
386
+ if (sec.length === 0) return [];
387
+ const view = new DataView(
388
+ bytes.buffer,
389
+ bytes.byteOffset + sec.offset,
390
+ sec.length,
391
+ );
392
+ const decoder = new TextDecoder("utf-8", { fatal: true });
393
+ let pos = 0;
394
+ if (sec.length < 4) {
395
+ throw new VoicePresetFormatError(
396
+ "voice preset phrase section truncated before count",
397
+ "truncated-section",
398
+ );
399
+ }
400
+ const count = view.getUint32(pos, true);
401
+ pos += 4;
402
+ const out: VoicePresetSeedPhrase[] = [];
403
+ for (let i = 0; i < count; i++) {
404
+ if (pos + 2 > sec.length) {
405
+ throw new VoicePresetFormatError(
406
+ `voice preset phrase #${i}: truncated before text length`,
407
+ "bad-phrase-record",
408
+ );
409
+ }
410
+ const textLen = view.getUint16(pos, true);
411
+ pos += 2;
412
+ if (pos + textLen > sec.length) {
413
+ throw new VoicePresetFormatError(
414
+ `voice preset phrase #${i}: text overruns section`,
415
+ "bad-phrase-record",
416
+ );
417
+ }
418
+ const textBytes = new Uint8Array(
419
+ bytes.buffer,
420
+ bytes.byteOffset + sec.offset + pos,
421
+ textLen,
422
+ );
423
+ const text = decoder.decode(textBytes);
424
+ pos += textLen;
425
+ if (pos + 8 > sec.length) {
426
+ throw new VoicePresetFormatError(
427
+ `voice preset phrase #${i}: truncated before sample_rate/pcm_len`,
428
+ "bad-phrase-record",
429
+ );
430
+ }
431
+ const sampleRate = view.getUint32(pos, true);
432
+ pos += 4;
433
+ const pcmByteLen = view.getUint32(pos, true);
434
+ pos += 4;
435
+ if (pcmByteLen % 4 !== 0) {
436
+ throw new VoicePresetFormatError(
437
+ `voice preset phrase #${i}: pcm byte length ${pcmByteLen} is not a multiple of 4`,
438
+ "bad-phrase-record",
439
+ );
440
+ }
441
+ if (pos + pcmByteLen > sec.length) {
442
+ throw new VoicePresetFormatError(
443
+ `voice preset phrase #${i}: pcm overruns section`,
444
+ "bad-phrase-record",
445
+ );
446
+ }
447
+ const pcm = copyFloat32(bytes, sec.offset + pos, pcmByteLen);
448
+ pos += pcmByteLen;
449
+ out.push({ text, sampleRate, pcm });
450
+ }
451
+ return out;
452
+ }
453
+
454
+ /**
455
+ * Parse a voice-preset binary blob. Throws `VoicePresetFormatError` on any
456
+ * malformed input — this is the single defensive boundary for the format.
457
+ * Supports both v1 and v2 files. For v1 files the v2-only fields are
458
+ * returned as their empty equivalents.
459
+ */
460
+ export function readVoicePresetFile(bytes: Uint8Array): VoicePresetFile {
461
+ const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
462
+ const header = readHeader(view);
463
+ return {
464
+ version: header.version,
465
+ embedding: readEmbedding(bytes, header.embedding),
466
+ phrases: readPhrases(bytes, header.phrases),
467
+ refAudioTokens: readRefAudioTokens(bytes, header.refAudioTokens),
468
+ refText: readUtf8(bytes, header.refText),
469
+ instruct: readUtf8(bytes, header.instruct),
470
+ metadata: readMetadata(bytes, header.metadata),
471
+ };
472
+ }
473
+
474
+ /**
475
+ * Serialize a voice preset to the v1 binary format. The output is a fresh
476
+ * `Uint8Array` ready to be written to disk.
477
+ *
478
+ * Use this only when the caller deliberately wants the legacy v1 shape (e.g.
479
+ * the existing Kokoro-style placeholder builder). New code should call
480
+ * `writeVoicePresetFileV2`.
481
+ */
482
+ export function writeVoicePresetFile(file: {
483
+ embedding: Float32Array;
484
+ phrases: ReadonlyArray<VoicePresetSeedPhrase>;
485
+ }): Uint8Array {
486
+ const encoder = new TextEncoder();
487
+ const encodedTexts = file.phrases.map((p) => encoder.encode(p.text));
488
+
489
+ const embBytes = file.embedding.byteLength;
490
+ let phrBytes = 4; // count
491
+ for (let i = 0; i < file.phrases.length; i++) {
492
+ const t = encodedTexts[i];
493
+ if (t.byteLength > 0xffff) {
494
+ throw new VoicePresetFormatError(
495
+ `phrase #${i} text too long (${t.byteLength} bytes, max 65535)`,
496
+ "bad-phrase-record",
497
+ );
498
+ }
499
+ phrBytes += 2 + t.byteLength + 4 + 4 + file.phrases[i].pcm.byteLength;
500
+ }
501
+
502
+ const embOff = VOICE_PRESET_HEADER_BYTES_V1;
503
+ const phrOff = embOff + embBytes;
504
+ const total = phrOff + phrBytes;
505
+
506
+ const out = new Uint8Array(total);
507
+ const view = new DataView(out.buffer);
508
+ view.setUint32(0, VOICE_PRESET_MAGIC, true);
509
+ view.setUint32(4, VOICE_PRESET_VERSION_V1, true);
510
+ view.setUint32(8, embOff, true);
511
+ view.setUint32(12, embBytes, true);
512
+ view.setUint32(16, phrOff, true);
513
+ view.setUint32(20, phrBytes, true);
514
+
515
+ // Embedding
516
+ out.set(
517
+ new Uint8Array(
518
+ file.embedding.buffer,
519
+ file.embedding.byteOffset,
520
+ file.embedding.byteLength,
521
+ ),
522
+ embOff,
523
+ );
524
+
525
+ // Phrases
526
+ writePhraseSection(out, view, phrOff, file.phrases, encodedTexts);
527
+
528
+ return out;
529
+ }
530
+
531
+ function writePhraseSection(
532
+ out: Uint8Array,
533
+ view: DataView,
534
+ startOff: number,
535
+ phrases: ReadonlyArray<VoicePresetSeedPhrase>,
536
+ encodedTexts: Uint8Array[],
537
+ ): void {
538
+ let pos = startOff;
539
+ view.setUint32(pos, phrases.length, true);
540
+ pos += 4;
541
+ for (let i = 0; i < phrases.length; i++) {
542
+ const t = encodedTexts[i];
543
+ const phrase = phrases[i];
544
+ view.setUint16(pos, t.byteLength, true);
545
+ pos += 2;
546
+ out.set(t, pos);
547
+ pos += t.byteLength;
548
+ view.setUint32(pos, phrase.sampleRate, true);
549
+ pos += 4;
550
+ view.setUint32(pos, phrase.pcm.byteLength, true);
551
+ pos += 4;
552
+ out.set(
553
+ new Uint8Array(
554
+ phrase.pcm.buffer,
555
+ phrase.pcm.byteOffset,
556
+ phrase.pcm.byteLength,
557
+ ),
558
+ pos,
559
+ );
560
+ pos += phrase.pcm.byteLength;
561
+ }
562
+ }
563
+
564
+ /**
565
+ * Write a voice preset in the v2 (additive) layout. Used by the OmniVoice
566
+ * freeze pipeline (`freeze-voice.mjs`) and other producers that need to
567
+ * persist `refAudioTokens` / `refText` / `instruct` alongside the v1
568
+ * embedding + phrase-seed sections.
569
+ *
570
+ * Any field that the caller doesn't need to persist can be omitted (or
571
+ * passed empty). The on-disk section is then written as length=0 and is
572
+ * read back as the empty equivalent.
573
+ */
574
+ export function writeVoicePresetFileV2(file: {
575
+ embedding?: Float32Array;
576
+ phrases?: ReadonlyArray<VoicePresetSeedPhrase>;
577
+ refAudioTokens?: RefAudioTokens;
578
+ refText?: string;
579
+ instruct?: string;
580
+ metadata?: Record<string, unknown>;
581
+ }): Uint8Array {
582
+ const embedding = file.embedding ?? new Float32Array(0);
583
+ const phrases = file.phrases ?? [];
584
+ const refAudioTokens = file.refAudioTokens ?? {
585
+ K: 0,
586
+ refT: 0,
587
+ tokens: new Int32Array(0),
588
+ };
589
+ const refText = file.refText ?? "";
590
+ const instruct = file.instruct ?? "";
591
+ const metadata = file.metadata ?? {};
592
+
593
+ if (refAudioTokens.K * refAudioTokens.refT !== refAudioTokens.tokens.length) {
594
+ throw new VoicePresetFormatError(
595
+ `ref_audio_tokens shape mismatch: K=${refAudioTokens.K}, ref_T=${refAudioTokens.refT}, but tokens.length=${refAudioTokens.tokens.length}`,
596
+ "bad-ref-tokens",
597
+ );
598
+ }
599
+
600
+ const encoder = new TextEncoder();
601
+ const encodedTexts = phrases.map((p) => encoder.encode(p.text));
602
+ const encodedRefText = encoder.encode(refText);
603
+ const encodedInstruct = encoder.encode(instruct);
604
+ const encodedMetadata =
605
+ Object.keys(metadata).length === 0
606
+ ? new Uint8Array(0)
607
+ : encoder.encode(JSON.stringify(metadata));
608
+
609
+ // Compute payload sizes up-front so we can lay out section offsets.
610
+ const embBytes = embedding.byteLength;
611
+ let phrBytes = phrases.length === 0 && encodedTexts.length === 0 ? 0 : 4;
612
+ if (phrBytes > 0) {
613
+ for (let i = 0; i < phrases.length; i++) {
614
+ const t = encodedTexts[i];
615
+ if (t.byteLength > 0xffff) {
616
+ throw new VoicePresetFormatError(
617
+ `phrase #${i} text too long (${t.byteLength} bytes, max 65535)`,
618
+ "bad-phrase-record",
619
+ );
620
+ }
621
+ phrBytes += 2 + t.byteLength + 4 + 4 + phrases[i].pcm.byteLength;
622
+ }
623
+ }
624
+ const refTokensBytes =
625
+ refAudioTokens.tokens.length === 0 && refAudioTokens.K === 0
626
+ ? 0
627
+ : 8 + refAudioTokens.tokens.byteLength;
628
+
629
+ // Lay out sections in declared order. Empty sections claim no space and
630
+ // are recorded as (offset=0, length=0).
631
+ let cursor = VOICE_PRESET_HEADER_BYTES_V2;
632
+ const embOff = embBytes > 0 ? cursor : 0;
633
+ cursor += embBytes;
634
+ const phrOff = phrBytes > 0 ? cursor : 0;
635
+ cursor += phrBytes;
636
+ const refTokensOff = refTokensBytes > 0 ? cursor : 0;
637
+ cursor += refTokensBytes;
638
+ const refTextOff = encodedRefText.byteLength > 0 ? cursor : 0;
639
+ cursor += encodedRefText.byteLength;
640
+ const instructOff = encodedInstruct.byteLength > 0 ? cursor : 0;
641
+ cursor += encodedInstruct.byteLength;
642
+ const metadataOff = encodedMetadata.byteLength > 0 ? cursor : 0;
643
+ cursor += encodedMetadata.byteLength;
644
+
645
+ const total = cursor;
646
+ const out = new Uint8Array(total);
647
+ const view = new DataView(out.buffer);
648
+
649
+ view.setUint32(0, VOICE_PRESET_MAGIC, true);
650
+ view.setUint32(4, VOICE_PRESET_VERSION_V2, true);
651
+ view.setUint32(8, embOff, true);
652
+ view.setUint32(12, embBytes, true);
653
+ view.setUint32(16, phrOff, true);
654
+ view.setUint32(20, phrBytes, true);
655
+ view.setUint32(24, refTokensOff, true);
656
+ view.setUint32(28, refTokensBytes, true);
657
+ view.setUint32(32, refTextOff, true);
658
+ view.setUint32(36, encodedRefText.byteLength, true);
659
+ view.setUint32(40, instructOff, true);
660
+ view.setUint32(44, encodedInstruct.byteLength, true);
661
+ view.setUint32(48, metadataOff, true);
662
+ view.setUint32(52, encodedMetadata.byteLength, true);
663
+ view.setUint32(56, 0, true);
664
+ view.setUint32(60, 0, true);
665
+
666
+ if (embBytes > 0) {
667
+ out.set(
668
+ new Uint8Array(embedding.buffer, embedding.byteOffset, embBytes),
669
+ embOff,
670
+ );
671
+ }
672
+ if (phrBytes > 0) {
673
+ writePhraseSection(out, view, phrOff, phrases, encodedTexts);
674
+ }
675
+ if (refTokensBytes > 0) {
676
+ view.setUint32(refTokensOff, refAudioTokens.K, true);
677
+ view.setUint32(refTokensOff + 4, refAudioTokens.refT, true);
678
+ if (refAudioTokens.tokens.byteLength > 0) {
679
+ out.set(
680
+ new Uint8Array(
681
+ refAudioTokens.tokens.buffer,
682
+ refAudioTokens.tokens.byteOffset,
683
+ refAudioTokens.tokens.byteLength,
684
+ ),
685
+ refTokensOff + 8,
686
+ );
687
+ }
688
+ }
689
+ if (encodedRefText.byteLength > 0) {
690
+ out.set(encodedRefText, refTextOff);
691
+ }
692
+ if (encodedInstruct.byteLength > 0) {
693
+ out.set(encodedInstruct, instructOff);
694
+ }
695
+ if (encodedMetadata.byteLength > 0) {
696
+ out.set(encodedMetadata, metadataOff);
697
+ }
698
+
699
+ return out;
700
+ }