@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,75 @@
1
+ /**
2
+ * pyannote-segmentation-3.0 shared types and pure segmentation logic.
3
+ *
4
+ * Diarization runs EXCLUSIVELY through the fused `libelizainference`
5
+ * `eliza_inference_diariz_*` ABI (`FusedDiarizer` in `diarizer-fused.ts`).
6
+ * The standalone `libvoice_classifier` binding has been removed — there is one
7
+ * on-device voice runtime.
8
+ *
9
+ * This file holds the shared types (`Diarizer`, `LocalSpeakerSegment`,
10
+ * `DiarizerOutput`), the model-id / window constants, the structured
11
+ * `DiarizerUnavailableError`, and the pure `classifyFramesToSegments` reducer
12
+ * the fused diarizer feeds its per-frame labels through.
13
+ */
14
+ export declare const PYANNOTE_SEGMENTATION_3_INT8_MODEL_ID: "pyannote-segmentation-3.0-int8";
15
+ export declare const PYANNOTE_SEGMENTATION_3_FP32_MODEL_ID: "pyannote-segmentation-3.0-fp32";
16
+ export type PyannoteDiarizerModelId = typeof PYANNOTE_SEGMENTATION_3_INT8_MODEL_ID | typeof PYANNOTE_SEGMENTATION_3_FP32_MODEL_ID;
17
+ /** pyannote 3.0 segmentation window length (seconds) — model-fixed. */
18
+ export declare const PYANNOTE_WINDOW_SECONDS = 5;
19
+ /** Required mono sample rate (matches upstream training config). */
20
+ export declare const PYANNOTE_SAMPLE_RATE = 16000;
21
+ /** Number of output frames per 5 s window (= 293 in the upstream export). */
22
+ export declare const PYANNOTE_FRAMES_PER_WINDOW = 293;
23
+ /** Per-frame stride in milliseconds (5_000ms / 293 frames ≈ 17.06 ms). */
24
+ export declare const PYANNOTE_FRAME_STRIDE_MS: number;
25
+ /** Output class count — 3 single + 3 overlap + 1 silence = 7. */
26
+ export declare const PYANNOTE_CLASS_COUNT = 7;
27
+ /**
28
+ * Powerset mapping of pyannote-3 segmentation classes. Each class is
29
+ * the set of local speaker indices active in that frame. Class 0 is the
30
+ * silence/no-speaker frame. This matches the upstream `Powerset` head
31
+ * with `max_speakers_per_chunk=3, max_speakers_per_frame=2`.
32
+ */
33
+ export declare const PYANNOTE_CLASS_TO_SPEAKERS: ReadonlyArray<ReadonlyArray<number>>;
34
+ /** Thrown when the diarizer cannot be constructed. */
35
+ export declare class DiarizerUnavailableError extends Error {
36
+ readonly code: "ort-missing" | "native-missing" | "library-missing" | "model-missing" | "model-unavailable" | "model-load-failed" | "model-shape-mismatch" | "forward-not-implemented" | "invalid-input";
37
+ constructor(code: DiarizerUnavailableError["code"], message: string);
38
+ }
39
+ /**
40
+ * One speaker-tagged span within a diarized window. `localSpeakerId` is
41
+ * **window-local** (0..2): the same physical speaker gets different
42
+ * local ids in different windows. The profile store re-clusters local
43
+ * ids into stable identities via the WeSpeaker embedding cosine.
44
+ */
45
+ export interface LocalSpeakerSegment {
46
+ startMs: number;
47
+ endMs: number;
48
+ localSpeakerId: number;
49
+ /** Best class confidence over the span (max softmax). */
50
+ confidence: number;
51
+ /** True if the span contains any overlap-class frames. */
52
+ hasOverlap: boolean;
53
+ }
54
+ export interface DiarizerOutput {
55
+ segments: LocalSpeakerSegment[];
56
+ /** Number of distinct local speakers observed in the window. */
57
+ localSpeakerCount: number;
58
+ /** Total speech (any-speaker) duration in milliseconds. */
59
+ speechMs: number;
60
+ }
61
+ export interface Diarizer {
62
+ readonly modelId: PyannoteDiarizerModelId;
63
+ readonly sampleRate: number;
64
+ /** Process one ~5 s window of PCM. */
65
+ diarizeWindow(pcm: Float32Array): Promise<DiarizerOutput>;
66
+ dispose(): Promise<void>;
67
+ }
68
+ /**
69
+ * Reduce a per-frame class probability tensor into one segment per
70
+ * (local speaker × contiguous frame run). Frames where the silence
71
+ * class wins are excluded; frames in overlap classes contribute to
72
+ * **all** speakers in that class.
73
+ */
74
+ export declare function classifyFramesToSegments(classProbs: Float32Array, frames: number, classCount: number, startMs: number, frameStrideMs: number): DiarizerOutput;
75
+ //# sourceMappingURL=diarizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"diarizer.d.ts","sourceRoot":"","sources":["diarizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,eAAO,MAAM,qCAAqC,EACjD,gCAAyC,CAAC;AAC3C,eAAO,MAAM,qCAAqC,EACjD,gCAAyC,CAAC;AAC3C,MAAM,MAAM,uBAAuB,GAChC,OAAO,qCAAqC,GAC5C,OAAO,qCAAqC,CAAC;AAEhD,uEAAuE;AACvE,eAAO,MAAM,uBAAuB,IAAI,CAAC;AACzC,oEAAoE;AACpE,eAAO,MAAM,oBAAoB,QAAS,CAAC;AAC3C,6EAA6E;AAC7E,eAAO,MAAM,0BAA0B,MAAM,CAAC;AAC9C,0EAA0E;AAC1E,eAAO,MAAM,wBAAwB,QAC0B,CAAC;AAChE,iEAAiE;AACjE,eAAO,MAAM,oBAAoB,IAAI,CAAC;AAEtC;;;;;GAKG;AACH,eAAO,MAAM,0BAA0B,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAS1E,CAAC;AAEH,sDAAsD;AACtD,qBAAa,wBAAyB,SAAQ,KAAK;IAClD,QAAQ,CAAC,IAAI,EACV,aAAa,GACb,gBAAgB,GAChB,iBAAiB,GACjB,eAAe,GACf,mBAAmB,GACnB,mBAAmB,GACnB,sBAAsB,GACtB,yBAAyB,GACzB,eAAe,CAAC;gBACP,IAAI,EAAE,wBAAwB,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,MAAM;CAKnE;AAED;;;;;GAKG;AACH,MAAM,WAAW,mBAAmB;IACnC,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,EAAE,MAAM,CAAC;IACvB,yDAAyD;IACzD,UAAU,EAAE,MAAM,CAAC;IACnB,0DAA0D;IAC1D,UAAU,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC9B,QAAQ,EAAE,mBAAmB,EAAE,CAAC;IAChC,gEAAgE;IAChE,iBAAiB,EAAE,MAAM,CAAC;IAC1B,2DAA2D;IAC3D,QAAQ,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,QAAQ;IACxB,QAAQ,CAAC,OAAO,EAAE,uBAAuB,CAAC;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,sCAAsC;IACtC,aAAa,CAAC,GAAG,EAAE,YAAY,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;IAC1D,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACzB;AAmBD;;;;;GAKG;AACH,wBAAgB,wBAAwB,CACvC,UAAU,EAAE,YAAY,EACxB,MAAM,EAAE,MAAM,EACd,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM,EACf,aAAa,EAAE,MAAM,GACnB,cAAc,CAsFhB"}
@@ -0,0 +1,218 @@
1
+ /**
2
+ * pyannote-segmentation-3.0 shared types and pure segmentation logic.
3
+ *
4
+ * Diarization runs EXCLUSIVELY through the fused `libelizainference`
5
+ * `eliza_inference_diariz_*` ABI (`FusedDiarizer` in `diarizer-fused.ts`).
6
+ * The standalone `libvoice_classifier` binding has been removed — there is one
7
+ * on-device voice runtime.
8
+ *
9
+ * This file holds the shared types (`Diarizer`, `LocalSpeakerSegment`,
10
+ * `DiarizerOutput`), the model-id / window constants, the structured
11
+ * `DiarizerUnavailableError`, and the pure `classifyFramesToSegments` reducer
12
+ * the fused diarizer feeds its per-frame labels through.
13
+ */
14
+
15
+ export const PYANNOTE_SEGMENTATION_3_INT8_MODEL_ID =
16
+ "pyannote-segmentation-3.0-int8" as const;
17
+ export const PYANNOTE_SEGMENTATION_3_FP32_MODEL_ID =
18
+ "pyannote-segmentation-3.0-fp32" as const;
19
+ export type PyannoteDiarizerModelId =
20
+ | typeof PYANNOTE_SEGMENTATION_3_INT8_MODEL_ID
21
+ | typeof PYANNOTE_SEGMENTATION_3_FP32_MODEL_ID;
22
+
23
+ /** pyannote 3.0 segmentation window length (seconds) — model-fixed. */
24
+ export const PYANNOTE_WINDOW_SECONDS = 5;
25
+ /** Required mono sample rate (matches upstream training config). */
26
+ export const PYANNOTE_SAMPLE_RATE = 16_000;
27
+ /** Number of output frames per 5 s window (= 293 in the upstream export). */
28
+ export const PYANNOTE_FRAMES_PER_WINDOW = 293;
29
+ /** Per-frame stride in milliseconds (5_000ms / 293 frames ≈ 17.06 ms). */
30
+ export const PYANNOTE_FRAME_STRIDE_MS =
31
+ (1_000 * PYANNOTE_WINDOW_SECONDS) / PYANNOTE_FRAMES_PER_WINDOW;
32
+ /** Output class count — 3 single + 3 overlap + 1 silence = 7. */
33
+ export const PYANNOTE_CLASS_COUNT = 7;
34
+
35
+ /**
36
+ * Powerset mapping of pyannote-3 segmentation classes. Each class is
37
+ * the set of local speaker indices active in that frame. Class 0 is the
38
+ * silence/no-speaker frame. This matches the upstream `Powerset` head
39
+ * with `max_speakers_per_chunk=3, max_speakers_per_frame=2`.
40
+ */
41
+ export const PYANNOTE_CLASS_TO_SPEAKERS: ReadonlyArray<ReadonlyArray<number>> =
42
+ [
43
+ [], // 0: silence
44
+ [0], // 1: speaker 0 only
45
+ [1], // 2: speaker 1 only
46
+ [2], // 3: speaker 2 only
47
+ [0, 1], // 4: speakers 0+1 overlap
48
+ [0, 2], // 5: speakers 0+2 overlap
49
+ [1, 2], // 6: speakers 1+2 overlap
50
+ ];
51
+
52
+ /** Thrown when the diarizer cannot be constructed. */
53
+ export class DiarizerUnavailableError extends Error {
54
+ readonly code:
55
+ | "ort-missing"
56
+ | "native-missing"
57
+ | "library-missing"
58
+ | "model-missing"
59
+ | "model-unavailable"
60
+ | "model-load-failed"
61
+ | "model-shape-mismatch"
62
+ | "forward-not-implemented"
63
+ | "invalid-input";
64
+ constructor(code: DiarizerUnavailableError["code"], message: string) {
65
+ super(message);
66
+ this.name = "DiarizerUnavailableError";
67
+ this.code = code;
68
+ }
69
+ }
70
+
71
+ /**
72
+ * One speaker-tagged span within a diarized window. `localSpeakerId` is
73
+ * **window-local** (0..2): the same physical speaker gets different
74
+ * local ids in different windows. The profile store re-clusters local
75
+ * ids into stable identities via the WeSpeaker embedding cosine.
76
+ */
77
+ export interface LocalSpeakerSegment {
78
+ startMs: number;
79
+ endMs: number;
80
+ localSpeakerId: number;
81
+ /** Best class confidence over the span (max softmax). */
82
+ confidence: number;
83
+ /** True if the span contains any overlap-class frames. */
84
+ hasOverlap: boolean;
85
+ }
86
+
87
+ export interface DiarizerOutput {
88
+ segments: LocalSpeakerSegment[];
89
+ /** Number of distinct local speakers observed in the window. */
90
+ localSpeakerCount: number;
91
+ /** Total speech (any-speaker) duration in milliseconds. */
92
+ speechMs: number;
93
+ }
94
+
95
+ export interface Diarizer {
96
+ readonly modelId: PyannoteDiarizerModelId;
97
+ readonly sampleRate: number;
98
+ /** Process one ~5 s window of PCM. */
99
+ diarizeWindow(pcm: Float32Array): Promise<DiarizerOutput>;
100
+ dispose(): Promise<void>;
101
+ }
102
+
103
+ /** Numerically-stable softmax over the last axis. */
104
+ function softmax(row: Float32Array): Float32Array {
105
+ let max = -Infinity;
106
+ for (let i = 0; i < row.length; i += 1) {
107
+ if (row[i] > max) max = row[i];
108
+ }
109
+ const out = new Float32Array(row.length);
110
+ let sum = 0;
111
+ for (let i = 0; i < row.length; i += 1) {
112
+ out[i] = Math.exp(row[i] - max);
113
+ sum += out[i];
114
+ }
115
+ if (sum === 0) return out;
116
+ for (let i = 0; i < row.length; i += 1) out[i] /= sum;
117
+ return out;
118
+ }
119
+
120
+ /**
121
+ * Reduce a per-frame class probability tensor into one segment per
122
+ * (local speaker × contiguous frame run). Frames where the silence
123
+ * class wins are excluded; frames in overlap classes contribute to
124
+ * **all** speakers in that class.
125
+ */
126
+ export function classifyFramesToSegments(
127
+ classProbs: Float32Array,
128
+ frames: number,
129
+ classCount: number,
130
+ startMs: number,
131
+ frameStrideMs: number,
132
+ ): DiarizerOutput {
133
+ if (classProbs.length !== frames * classCount) {
134
+ throw new DiarizerUnavailableError(
135
+ "model-load-failed",
136
+ `[pyannote] frame×class tensor mismatch: have ${classProbs.length}, expected ${frames * classCount}`,
137
+ );
138
+ }
139
+ type Active = {
140
+ startFrame: number;
141
+ endFrame: number;
142
+ confSum: number;
143
+ count: number;
144
+ hasOverlap: boolean;
145
+ };
146
+ // Per-speaker active runs. The pyannote-3 head supports 3 speakers.
147
+ const open = new Map<number, Active>();
148
+ const closed: Array<Active & { speakerId: number }> = [];
149
+
150
+ let speechFrames = 0;
151
+
152
+ for (let f = 0; f < frames; f += 1) {
153
+ const offset = f * classCount;
154
+ const row = classProbs.subarray(offset, offset + classCount);
155
+ const probs = softmax(row);
156
+ // Pick winning class.
157
+ let winner = 0;
158
+ let winnerProb = probs[0];
159
+ for (let c = 1; c < classCount; c += 1) {
160
+ if (probs[c] > winnerProb) {
161
+ winner = c;
162
+ winnerProb = probs[c];
163
+ }
164
+ }
165
+ const activeSpeakers = PYANNOTE_CLASS_TO_SPEAKERS[winner] ?? [];
166
+ const isOverlap = activeSpeakers.length > 1;
167
+ if (activeSpeakers.length > 0) speechFrames += 1;
168
+
169
+ // Close runs for speakers not active this frame.
170
+ for (const [sid, run] of open.entries()) {
171
+ if (!activeSpeakers.includes(sid)) {
172
+ closed.push({ ...run, speakerId: sid });
173
+ open.delete(sid);
174
+ }
175
+ }
176
+ // Open / extend runs for active speakers.
177
+ for (const sid of activeSpeakers) {
178
+ const existing = open.get(sid);
179
+ if (existing) {
180
+ existing.endFrame = f + 1;
181
+ existing.confSum += winnerProb;
182
+ existing.count += 1;
183
+ existing.hasOverlap = existing.hasOverlap || isOverlap;
184
+ } else {
185
+ open.set(sid, {
186
+ startFrame: f,
187
+ endFrame: f + 1,
188
+ confSum: winnerProb,
189
+ count: 1,
190
+ hasOverlap: isOverlap,
191
+ });
192
+ }
193
+ }
194
+ }
195
+ // Flush remaining open runs.
196
+ for (const [sid, run] of open.entries()) {
197
+ closed.push({ ...run, speakerId: sid });
198
+ }
199
+
200
+ const segments = closed
201
+ .map<LocalSpeakerSegment>((run) => ({
202
+ startMs: Math.round(startMs + run.startFrame * frameStrideMs),
203
+ endMs: Math.round(startMs + run.endFrame * frameStrideMs),
204
+ localSpeakerId: run.speakerId,
205
+ confidence: run.count > 0 ? run.confSum / run.count : 0,
206
+ hasOverlap: run.hasOverlap,
207
+ }))
208
+ .sort((a, b) =>
209
+ a.startMs !== b.startMs ? a.startMs - b.startMs : a.endMs - b.endMs,
210
+ );
211
+
212
+ const localSpeakers = new Set(segments.map((s) => s.localSpeakerId));
213
+ return {
214
+ segments,
215
+ localSpeakerCount: localSpeakers.size,
216
+ speechMs: Math.round(speechFrames * frameStrideMs),
217
+ };
218
+ }
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Speaker-embedding encoder — fused `libelizainference` binding (ABI v6).
3
+ *
4
+ * The strategic on-device voice engine is the single fused-FFI
5
+ * `libelizainference` library (the merged llama.cpp fork — see
6
+ * `plugins/plugin-local-inference/native/CLAUDE.md` §1). This class drives the
7
+ * WeSpeaker ResNet34-LM speaker encoder through that one native handle via the
8
+ * `eliza_inference_speaker_*` ABI. This is the SOLE on-device speaker-encoder
9
+ * runtime — the same `ffi`/`ctx` pair powers VAD / wake-word / TTS / ASR, so the
10
+ * whole voice pipeline runs through one library.
11
+ *
12
+ * Shape mirrors the legacy `encoder.ts::SpeakerEncoder` contract exactly:
13
+ * - 16 kHz mono fp32 PCM in,
14
+ * - one L2-normalized 256-d embedding out,
15
+ * - `encode(pcm)` / `dispose()`.
16
+ *
17
+ * No silent fallback: when the fused build does not export the speaker ABI
18
+ * (`eliza_inference_speaker_supported() == 0`) `load()` throws a structured
19
+ * `SpeakerEncoderGgmlUnavailableError` (AGENTS.md §3 — no synthetic
20
+ * embeddings, no standalone-lib fallback).
21
+ */
22
+ import type { ElizaInferenceContextHandle, ElizaInferenceFfi } from "../ffi-bindings";
23
+ import type { SpeakerEncoder } from "./encoder";
24
+ export interface FusedSpeakerEncoderOptions {
25
+ ffi: ElizaInferenceFfi;
26
+ ctx: ElizaInferenceContextHandle | (() => ElizaInferenceContextHandle);
27
+ /**
28
+ * Optional explicit WeSpeaker GGUF path. `null` lets the native runtime
29
+ * resolve the bundle's `speaker/` dir (the default).
30
+ */
31
+ ggufPath?: string | null;
32
+ }
33
+ /**
34
+ * Fused-`libelizainference` WeSpeaker speaker encoder. Owns one
35
+ * `eliza_inference_speaker_*` session; `encode()` runs one forward pass over
36
+ * the supplied 16 kHz PCM and returns the normalized 256-d embedding. The
37
+ * native side owns the model graph; this class is a thin handle.
38
+ */
39
+ export declare class FusedSpeakerEncoder implements SpeakerEncoder {
40
+ private readonly ffi;
41
+ private readonly handle;
42
+ readonly embeddingDim = 256;
43
+ readonly sampleRate = 16000;
44
+ readonly modelId: "wespeaker-resnet34-lm-int8";
45
+ private disposed;
46
+ private constructor();
47
+ /**
48
+ * True only when the fused `libelizainference` build exports the speaker
49
+ * ABI and advertises support at runtime.
50
+ */
51
+ static isSupported(ffi: ElizaInferenceFfi | null | undefined): boolean;
52
+ /**
53
+ * Open a native speaker-encoder session. Throws
54
+ * `SpeakerEncoderGgmlUnavailableError` when the runtime is not present.
55
+ */
56
+ static load(opts: FusedSpeakerEncoderOptions): Promise<FusedSpeakerEncoder>;
57
+ encode(pcm: Float32Array): Promise<Float32Array>;
58
+ dispose(): Promise<void>;
59
+ }
60
+ //# sourceMappingURL=encoder-fused.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"encoder-fused.d.ts","sourceRoot":"","sources":["encoder-fused.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,KAAK,EACX,2BAA2B,EAC3B,iBAAiB,EAEjB,MAAM,iBAAiB,CAAC;AACzB,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,WAAW,CAAC;AAShD,MAAM,WAAW,0BAA0B;IAC1C,GAAG,EAAE,iBAAiB,CAAC;IACvB,GAAG,EAAE,2BAA2B,GAAG,CAAC,MAAM,2BAA2B,CAAC,CAAC;IACvE;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACzB;AAED;;;;;GAKG;AACH,qBAAa,mBAAoB,YAAW,cAAc;IAOxD,OAAO,CAAC,QAAQ,CAAC,GAAG;IACpB,OAAO,CAAC,QAAQ,CAAC,MAAM;IAPxB,QAAQ,CAAC,YAAY,OAA8B;IACnD,QAAQ,CAAC,UAAU,SAA4B;IAC/C,QAAQ,CAAC,OAAO,+BAAuC;IACvD,OAAO,CAAC,QAAQ,CAAS;IAEzB,OAAO;IAKP;;;OAGG;IACH,MAAM,CAAC,WAAW,CAAC,GAAG,EAAE,iBAAiB,GAAG,IAAI,GAAG,SAAS,GAAG,OAAO;IAKtE;;;OAGG;WACU,IAAI,CAChB,IAAI,EAAE,0BAA0B,GAC9B,OAAO,CAAC,mBAAmB,CAAC;IAyBzB,MAAM,CAAC,GAAG,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IA6BhD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAK9B"}
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Real-FFI tests for `FusedSpeakerEncoder`: run against the ACTUAL fused
3
+ * `libelizainference` — loaded, `create`d, and probed for `speakerSupported()`
4
+ * — never a stub. The speaker encoder is the SOLE on-device speaker runtime
5
+ * (the `eliza_inference_speaker_*` ABI off the one fused handle).
6
+ *
7
+ * Skipped (not faked) when the fused lib is not resolvable, or when it does not
8
+ * link the WeSpeaker speaker graph. To run them, point `ELIZA_INFERENCE_LIBRARY`
9
+ * (or `ELIZA_INFERENCE_LIB_DIR`) at a built `libelizainference` with the speaker
10
+ * ABI, or build one via `packages/app-core/scripts/build-llama-cpp-mtp.mjs`.
11
+ * Runs in the post-merge `bun test` lane (`*.real.test.ts` is excluded from the
12
+ * default lane in `vitest.config.ts`).
13
+ */
14
+
15
+ import { existsSync, mkdtempSync, rmSync } from "node:fs";
16
+ import os from "node:os";
17
+ import path from "node:path";
18
+ import {
19
+ afterAll,
20
+ afterEach,
21
+ beforeAll,
22
+ beforeEach,
23
+ describe,
24
+ expect,
25
+ it,
26
+ } from "vitest";
27
+
28
+ import { resolveFusedLibraryPath } from "../../desktop-fused-ffi-backend-runtime";
29
+ import {
30
+ type ElizaInferenceContextHandle,
31
+ type ElizaInferenceFfi,
32
+ loadElizaInferenceFfi,
33
+ } from "../ffi-bindings";
34
+ import { FusedSpeakerEncoder } from "./encoder-fused";
35
+
36
+ const EMB_DIM = 256;
37
+ const MIN_SAMPLES = 16_000;
38
+
39
+ const isBun = typeof (globalThis as { Bun?: unknown }).Bun !== "undefined";
40
+ const LIB_PATH = resolveFusedLibraryPath(null, process.env);
41
+ // The native speaker_open needs a WeSpeaker GGUF. Provide one via
42
+ // ELIZA_TEST_SPEAKER_GGUF (e.g. wespeaker-resnet34-lm.gguf); the encode
43
+ // assertions skip honestly when it isn't supplied — they are never faked.
44
+ const SPEAKER_GGUF = process.env.ELIZA_TEST_SPEAKER_GGUF?.trim();
45
+ const HAVE_MODEL = !!SPEAKER_GGUF && existsSync(SPEAKER_GGUF);
46
+
47
+ describe.skipIf(!isBun || !LIB_PATH)("FusedSpeakerEncoder — real FFI", () => {
48
+ let ffi: ElizaInferenceFfi;
49
+ let ctx: ElizaInferenceContextHandle;
50
+ let tmp: string;
51
+
52
+ beforeAll(() => {
53
+ // LIB_PATH is non-null inside the skipIf-guarded block.
54
+ ffi = loadElizaInferenceFfi(LIB_PATH as string);
55
+ });
56
+ afterAll(() => {
57
+ ffi?.close();
58
+ });
59
+ beforeEach(() => {
60
+ tmp = mkdtempSync(path.join(os.tmpdir(), "speaker-fused-real-"));
61
+ ctx = ffi.create(tmp);
62
+ });
63
+ afterEach(() => {
64
+ ffi.destroy(ctx);
65
+ rmSync(tmp, { recursive: true, force: true });
66
+ });
67
+
68
+ it("isSupported() reflects the loaded build's speaker ABI", () => {
69
+ expect(typeof FusedSpeakerEncoder.isSupported(ffi)).toBe("boolean");
70
+ });
71
+
72
+ it.skipIf(!HAVE_MODEL)(
73
+ "encode() returns a finite 256-d embedding off the real WeSpeaker graph",
74
+ async () => {
75
+ const enc = await FusedSpeakerEncoder.load({
76
+ ffi,
77
+ ctx,
78
+ ggufPath: SPEAKER_GGUF,
79
+ });
80
+ expect(enc.embeddingDim).toBe(EMB_DIM);
81
+ expect(enc.sampleRate).toBe(MIN_SAMPLES);
82
+ // 1 s of a 220 Hz tone — a real, finite input the native graph accepts.
83
+ const pcm = new Float32Array(MIN_SAMPLES);
84
+ for (let i = 0; i < pcm.length; i += 1) {
85
+ pcm[i] = 0.2 * Math.sin((2 * Math.PI * 220 * i) / MIN_SAMPLES);
86
+ }
87
+ const emb = await enc.encode(pcm);
88
+ expect(emb.length).toBe(EMB_DIM);
89
+ expect(emb.every((v) => Number.isFinite(v))).toBe(true);
90
+ // A non-degenerate embedding has real magnitude.
91
+ let norm = 0;
92
+ for (const v of emb) norm += v * v;
93
+ expect(Math.sqrt(norm)).toBeGreaterThan(0);
94
+ await enc.dispose();
95
+ },
96
+ );
97
+
98
+ it.skipIf(!HAVE_MODEL)(
99
+ "rejects pcm shorter than the minimum window before hitting the native graph",
100
+ async () => {
101
+ const enc = await FusedSpeakerEncoder.load({
102
+ ffi,
103
+ ctx,
104
+ ggufPath: SPEAKER_GGUF,
105
+ });
106
+ await expect(enc.encode(new Float32Array(100))).rejects.toMatchObject({
107
+ name: "SpeakerEncoderGgmlUnavailableError",
108
+ code: "invalid-input",
109
+ });
110
+ await enc.dispose();
111
+ },
112
+ );
113
+ });
@@ -0,0 +1,138 @@
1
+ /**
2
+ * Speaker-embedding encoder — fused `libelizainference` binding (ABI v6).
3
+ *
4
+ * The strategic on-device voice engine is the single fused-FFI
5
+ * `libelizainference` library (the merged llama.cpp fork — see
6
+ * `plugins/plugin-local-inference/native/CLAUDE.md` §1). This class drives the
7
+ * WeSpeaker ResNet34-LM speaker encoder through that one native handle via the
8
+ * `eliza_inference_speaker_*` ABI. This is the SOLE on-device speaker-encoder
9
+ * runtime — the same `ffi`/`ctx` pair powers VAD / wake-word / TTS / ASR, so the
10
+ * whole voice pipeline runs through one library.
11
+ *
12
+ * Shape mirrors the legacy `encoder.ts::SpeakerEncoder` contract exactly:
13
+ * - 16 kHz mono fp32 PCM in,
14
+ * - one L2-normalized 256-d embedding out,
15
+ * - `encode(pcm)` / `dispose()`.
16
+ *
17
+ * No silent fallback: when the fused build does not export the speaker ABI
18
+ * (`eliza_inference_speaker_supported() == 0`) `load()` throws a structured
19
+ * `SpeakerEncoderGgmlUnavailableError` (AGENTS.md §3 — no synthetic
20
+ * embeddings, no standalone-lib fallback).
21
+ */
22
+
23
+ import type {
24
+ ElizaInferenceContextHandle,
25
+ ElizaInferenceFfi,
26
+ NativeSpeakerHandle,
27
+ } from "../ffi-bindings";
28
+ import type { SpeakerEncoder } from "./encoder";
29
+ import { WESPEAKER_RESNET34_LM_INT8_MODEL_ID } from "./encoder";
30
+ import {
31
+ SPEAKER_GGML_EMBEDDING_DIM,
32
+ SPEAKER_GGML_MIN_SAMPLES,
33
+ SPEAKER_GGML_SAMPLE_RATE,
34
+ SpeakerEncoderGgmlUnavailableError,
35
+ } from "./encoder-ggml";
36
+
37
+ export interface FusedSpeakerEncoderOptions {
38
+ ffi: ElizaInferenceFfi;
39
+ ctx: ElizaInferenceContextHandle | (() => ElizaInferenceContextHandle);
40
+ /**
41
+ * Optional explicit WeSpeaker GGUF path. `null` lets the native runtime
42
+ * resolve the bundle's `speaker/` dir (the default).
43
+ */
44
+ ggufPath?: string | null;
45
+ }
46
+
47
+ /**
48
+ * Fused-`libelizainference` WeSpeaker speaker encoder. Owns one
49
+ * `eliza_inference_speaker_*` session; `encode()` runs one forward pass over
50
+ * the supplied 16 kHz PCM and returns the normalized 256-d embedding. The
51
+ * native side owns the model graph; this class is a thin handle.
52
+ */
53
+ export class FusedSpeakerEncoder implements SpeakerEncoder {
54
+ readonly embeddingDim = SPEAKER_GGML_EMBEDDING_DIM;
55
+ readonly sampleRate = SPEAKER_GGML_SAMPLE_RATE;
56
+ readonly modelId = WESPEAKER_RESNET34_LM_INT8_MODEL_ID;
57
+ private disposed = false;
58
+
59
+ private constructor(
60
+ private readonly ffi: ElizaInferenceFfi,
61
+ private readonly handle: NativeSpeakerHandle,
62
+ ) {}
63
+
64
+ /**
65
+ * True only when the fused `libelizainference` build exports the speaker
66
+ * ABI and advertises support at runtime.
67
+ */
68
+ static isSupported(ffi: ElizaInferenceFfi | null | undefined): boolean {
69
+ if (!ffi || typeof ffi.speakerSupported !== "function") return false;
70
+ return ffi.speakerSupported();
71
+ }
72
+
73
+ /**
74
+ * Open a native speaker-encoder session. Throws
75
+ * `SpeakerEncoderGgmlUnavailableError` when the runtime is not present.
76
+ */
77
+ static async load(
78
+ opts: FusedSpeakerEncoderOptions,
79
+ ): Promise<FusedSpeakerEncoder> {
80
+ if (!FusedSpeakerEncoder.isSupported(opts.ffi)) {
81
+ throw new SpeakerEncoderGgmlUnavailableError(
82
+ "native-missing",
83
+ "[speaker-fused] The native speaker encoder is not present in this libelizainference build. Rebuild with the WeSpeaker forward graph linked in (eliza_inference_speaker_* symbols).",
84
+ );
85
+ }
86
+ if (
87
+ !opts.ffi.speakerOpen ||
88
+ !opts.ffi.speakerEmbed ||
89
+ !opts.ffi.speakerClose
90
+ ) {
91
+ throw new SpeakerEncoderGgmlUnavailableError(
92
+ "model-load-failed",
93
+ "[speaker-fused] Speaker support probe succeeded, but the required FFI methods are missing on the binding.",
94
+ );
95
+ }
96
+ const ctx = typeof opts.ctx === "function" ? opts.ctx() : opts.ctx;
97
+ const handle = opts.ffi.speakerOpen({
98
+ ctx,
99
+ ggufPath: opts.ggufPath ?? null,
100
+ });
101
+ return new FusedSpeakerEncoder(opts.ffi, handle);
102
+ }
103
+
104
+ async encode(pcm: Float32Array): Promise<Float32Array> {
105
+ if (this.disposed) {
106
+ throw new SpeakerEncoderGgmlUnavailableError(
107
+ "model-load-failed",
108
+ "[speaker-fused] encode called after dispose()",
109
+ );
110
+ }
111
+ if (!(pcm instanceof Float32Array)) {
112
+ throw new SpeakerEncoderGgmlUnavailableError(
113
+ "invalid-input",
114
+ "[speaker-fused] pcm must be a Float32Array",
115
+ );
116
+ }
117
+ if (pcm.length < SPEAKER_GGML_MIN_SAMPLES) {
118
+ throw new SpeakerEncoderGgmlUnavailableError(
119
+ "invalid-input",
120
+ `[speaker-fused] pcm too short: ${pcm.length} samples < ${SPEAKER_GGML_MIN_SAMPLES}`,
121
+ );
122
+ }
123
+ const embed = this.ffi.speakerEmbed;
124
+ if (!embed) {
125
+ throw new SpeakerEncoderGgmlUnavailableError(
126
+ "model-load-failed",
127
+ "[speaker-fused] encode missing FFI method",
128
+ );
129
+ }
130
+ return embed({ speaker: this.handle, pcm });
131
+ }
132
+
133
+ async dispose(): Promise<void> {
134
+ if (this.disposed) return;
135
+ this.disposed = true;
136
+ this.ffi.speakerClose?.(this.handle);
137
+ }
138
+ }
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Speaker-embedding encoder — shared constants, error class, and the
3
+ * embedding-distance helper.
4
+ *
5
+ * The speaker encoder runs EXCLUSIVELY through the fused `libelizainference`
6
+ * `eliza_inference_speaker_*` ABI (`FusedSpeakerEncoder` in `encoder-fused.ts`).
7
+ * The standalone `libvoice_classifier` binding that previously lived here has
8
+ * been removed — there is one on-device voice runtime.
9
+ *
10
+ * This module retains the pieces the fused path shares:
11
+ * - the canonical dims (`SPEAKER_GGML_*`), pinned at 256 to match the C-side
12
+ * `VOICE_SPEAKER_EMBEDDING_DIM` and the WeSpeaker ResNet34-LM head,
13
+ * - the structured `SpeakerEncoderGgmlUnavailableError` the fused encoder
14
+ * throws (no synthetic embedding fallback),
15
+ * - the pure `voiceSpeakerDistance` cosine-distance helper.
16
+ */
17
+ /** Output embedding dim. Matches `VOICE_SPEAKER_EMBEDDING_DIM`. */
18
+ export declare const SPEAKER_GGML_EMBEDDING_DIM = 256;
19
+ /** Required input sample rate. */
20
+ export declare const SPEAKER_GGML_SAMPLE_RATE = 16000;
21
+ /** Minimum useful audio window (~1.0 s). */
22
+ export declare const SPEAKER_GGML_MIN_SAMPLES = 16000;
23
+ export declare class SpeakerEncoderGgmlUnavailableError extends Error {
24
+ readonly code: "native-missing" | "library-missing" | "model-missing" | "model-load-failed" | "model-shape-mismatch" | "forward-not-implemented" | "invalid-input";
25
+ constructor(code: SpeakerEncoderGgmlUnavailableError["code"], message: string);
26
+ }
27
+ /**
28
+ * Cosine distance between two 256-dim speaker embeddings. Defined as
29
+ * `1 - cos_similarity(a, b)`, range [0, 2]. Mirrors the C-side
30
+ * `voice_speaker_distance` helper exactly.
31
+ */
32
+ export declare function voiceSpeakerDistance(a: Float32Array, b: Float32Array): number;
33
+ //# sourceMappingURL=encoder-ggml.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"encoder-ggml.d.ts","sourceRoot":"","sources":["encoder-ggml.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,mEAAmE;AACnE,eAAO,MAAM,0BAA0B,MAAM,CAAC;AAE9C,kCAAkC;AAClC,eAAO,MAAM,wBAAwB,QAAS,CAAC;AAE/C,4CAA4C;AAC5C,eAAO,MAAM,wBAAwB,QAAS,CAAC;AAE/C,qBAAa,kCAAmC,SAAQ,KAAK;IAC5D,QAAQ,CAAC,IAAI,EACV,gBAAgB,GAChB,iBAAiB,GACjB,eAAe,GACf,mBAAmB,GACnB,sBAAsB,GACtB,yBAAyB,GACzB,eAAe,CAAC;gBAElB,IAAI,EAAE,kCAAkC,CAAC,MAAM,CAAC,EAChD,OAAO,EAAE,MAAM;CAMhB;AAED;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CA4B7E"}