@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,827 @@
1
+ /**
2
+ * Voice activity detection — the audio front-end's two-tier gate.
3
+ *
4
+ * Tier 1 — `RmsEnergyGate`. A frame-level RMS threshold with hysteresis.
5
+ * Sub-frame latency, no model. Its rising edge is the "wake the
6
+ * response pipeline" signal (KV-prefill the response prompt,
7
+ * preload the drafter, pre-generate the first filler). It NEVER
8
+ * substitutes for the model VAD — it only decides "is there
9
+ * acoustic activity right now".
10
+ *
11
+ * Tier 2 — a model VAD provider. Resolver order is an optional injected
12
+ * Qwen toolkit adapter when supplied, otherwise the fused
13
+ * `libelizainference` Silero v5 VAD ABI (`eliza_inference_vad_*`,
14
+ * backend id `silero-ggml`). 512-sample windows at 16 kHz (32 ms
15
+ * hop), one speech probability per window. This is the
16
+ * *authoritative* speech/no-speech signal — it gates ASR and drives
17
+ * turn-taking. The fused engine is the sole on-device VAD runtime;
18
+ * there is no standalone VAD library.
19
+ *
20
+ * `VadDetector` wires both together and emits the `VadEvent` stream
21
+ * (`speech-start` / `speech-active` / `speech-pause` / `speech-end` /
22
+ * `blip`) plus the raw `EnergyGateEvent` stream.
23
+ *
24
+ * No fallback sludge: if the fused VAD ABI is unavailable (and no injected
25
+ * adapter is supplied), `createVadDetector()` throws `VadUnavailableError`. The
26
+ * caller surfaces "VAD unavailable — voice features degrade" — there is no
27
+ * silent downgrade to the RMS gate, and no standalone-library fallback
28
+ * (AGENTS.md §3).
29
+ */
30
+
31
+ import { existsSync } from "node:fs";
32
+ import path from "node:path";
33
+ import { localInferenceRoot } from "../paths";
34
+ import type {
35
+ ElizaInferenceContextHandle,
36
+ ElizaInferenceFfi,
37
+ NativeVadHandle,
38
+ } from "./ffi-bindings";
39
+ import type {
40
+ EnergyGateEvent,
41
+ EnergyGateListener,
42
+ PcmFrame,
43
+ VadEvent,
44
+ VadEventListener,
45
+ } from "./types";
46
+
47
+ /** Thrown when the Silero VAD backend cannot be loaded — the native VAD FFI
48
+ * is missing or ABI-only, the model file is absent, or the model is corrupt.
49
+ * There is no fallback; voice features that depend on VAD must surface
50
+ * this. */
51
+ export class VadUnavailableError extends Error {
52
+ readonly code:
53
+ | "ffi-missing"
54
+ | "model-missing"
55
+ | "model-load-failed"
56
+ | "provider-missing";
57
+ constructor(code: VadUnavailableError["code"], message: string) {
58
+ super(message);
59
+ this.name = "VadUnavailableError";
60
+ this.code = code;
61
+ }
62
+ }
63
+
64
+ /** Relative path of the fused Silero v5 GGML VAD model inside an Eliza-1
65
+ * bundle. The file is read by `libelizainference`'s native VAD ABI. */
66
+ const SILERO_VAD_GGML_REL_PATH = path.join("vad", "silero-vad-v5.1.2.ggml.bin");
67
+
68
+ /**
69
+ * Resolve the fused-libelizainference Silero GGML VAD model on disk. An
70
+ * explicit `modelPath` is honored exactly — if it is set but missing, the
71
+ * result is `null` (no silent substitution of a different model). When
72
+ * `modelPath` is not given the search order is:
73
+ * 1. `<bundleRoot>/vad/silero-vad-v5.1.2.ggml.bin`
74
+ * 2. `<state-dir>/local-inference/vad/silero-vad-v5.1.2.ggml.bin`
75
+ * 3. `$ELIZA_VAD_MODEL_PATH`
76
+ * Returns `null` when none exist.
77
+ */
78
+ export function resolveSileroVadPath(opts: {
79
+ modelPath?: string;
80
+ bundleRoot?: string;
81
+ }): string | null {
82
+ if (opts.modelPath) {
83
+ return existsSync(opts.modelPath) ? path.resolve(opts.modelPath) : null;
84
+ }
85
+ const candidates: Array<string | undefined> = [
86
+ opts.bundleRoot
87
+ ? path.join(opts.bundleRoot, SILERO_VAD_GGML_REL_PATH)
88
+ : undefined,
89
+ path.join(localInferenceRoot(), SILERO_VAD_GGML_REL_PATH),
90
+ process.env.ELIZA_VAD_MODEL_PATH?.trim() || undefined,
91
+ ];
92
+ for (const c of candidates) {
93
+ if (c && existsSync(c)) return path.resolve(c);
94
+ }
95
+ return null;
96
+ }
97
+
98
+ const SILERO_WINDOW_16K = 512; // samples per inference window @ 16 kHz
99
+
100
+ function validateSileroSampleRate(sampleRate: number): void {
101
+ if (sampleRate !== 16_000) {
102
+ throw new VadUnavailableError(
103
+ "model-load-failed",
104
+ `[voice] Silero VAD v5 only supports 16 kHz; got ${sampleRate}. Resample the mic stream to 16 kHz before the VAD.`,
105
+ );
106
+ }
107
+ }
108
+
109
+ /**
110
+ * Fused libelizainference-backed Silero v5 GGML VAD — the sole on-device VAD
111
+ * runtime. The model (`silero-vad-v5.1.2.ggml.bin`) is loaded by the shared
112
+ * ggml context owned by the FFI; `process()` runs one 512-sample 16 kHz window
113
+ * through the native VAD and returns the speech probability. `reset()` clears
114
+ * the recurrent state at utterance boundaries.
115
+ */
116
+ export class GgmlSileroVad {
117
+ readonly sampleRate: number;
118
+ readonly windowSamples = SILERO_WINDOW_16K;
119
+ private closed = false;
120
+
121
+ private constructor(
122
+ private readonly ffi: ElizaInferenceFfi,
123
+ private readonly handle: NativeVadHandle,
124
+ sampleRate: number,
125
+ ) {
126
+ this.sampleRate = sampleRate;
127
+ }
128
+
129
+ /** True when the libelizainference build exports the native VAD ABI and
130
+ * advertises support. False on ABI-only builds or when the C++ side has
131
+ * not been linked against the GGML Silero VAD runtime. */
132
+ static isSupported(ffi: ElizaInferenceFfi | null | undefined): boolean {
133
+ if (!ffi || typeof ffi.vadSupported !== "function") return false;
134
+ return ffi.vadSupported();
135
+ }
136
+
137
+ static async load(opts: {
138
+ ffi: ElizaInferenceFfi;
139
+ ctx: ElizaInferenceContextHandle | (() => ElizaInferenceContextHandle);
140
+ sampleRate?: number;
141
+ }): Promise<GgmlSileroVad> {
142
+ const sampleRate = opts.sampleRate ?? 16_000;
143
+ validateSileroSampleRate(sampleRate);
144
+ if (!GgmlSileroVad.isSupported(opts.ffi)) {
145
+ throw new VadUnavailableError(
146
+ "ffi-missing",
147
+ "[voice] Native GGML Silero VAD is not supported by this libelizainference build. Rebuild with the GGML VAD runtime linked in (eliza_inference_vad_* symbols).",
148
+ );
149
+ }
150
+ if (
151
+ !opts.ffi.vadOpen ||
152
+ !opts.ffi.vadProcess ||
153
+ !opts.ffi.vadReset ||
154
+ !opts.ffi.vadClose
155
+ ) {
156
+ throw new VadUnavailableError(
157
+ "model-load-failed",
158
+ "[voice] Native GGML Silero VAD support probe succeeded, but the required VAD FFI methods are missing.",
159
+ );
160
+ }
161
+ const ctx = typeof opts.ctx === "function" ? opts.ctx() : opts.ctx;
162
+ const handle = opts.ffi.vadOpen({ ctx, sampleRateHz: sampleRate });
163
+ return new GgmlSileroVad(opts.ffi, handle, sampleRate);
164
+ }
165
+
166
+ async process(window: Float32Array): Promise<number> {
167
+ if (this.closed) {
168
+ throw new Error("[voice] GgmlSileroVad.process called after close()");
169
+ }
170
+ if (window.length !== SILERO_WINDOW_16K) {
171
+ throw new Error(
172
+ `[voice] GgmlSileroVad.process expects a ${SILERO_WINDOW_16K}-sample window; got ${window.length}`,
173
+ );
174
+ }
175
+ const vadProcess = this.ffi.vadProcess;
176
+ if (!vadProcess) {
177
+ throw new Error("[voice] GgmlSileroVad.process missing FFI method");
178
+ }
179
+ return vadProcess({ vad: this.handle, pcm: window });
180
+ }
181
+
182
+ reset(): void {
183
+ if (this.closed) return;
184
+ const vadReset = this.ffi.vadReset;
185
+ if (!vadReset) {
186
+ throw new Error("[voice] GgmlSileroVad.reset missing FFI method");
187
+ }
188
+ vadReset(this.handle);
189
+ }
190
+
191
+ close(): void {
192
+ if (this.closed) return;
193
+ this.closed = true;
194
+ const vadClose = this.ffi.vadClose;
195
+ if (!vadClose) {
196
+ throw new Error("[voice] GgmlSileroVad.close missing FFI method");
197
+ }
198
+ vadClose(this.handle);
199
+ }
200
+ }
201
+
202
+ /** @deprecated Use `GgmlSileroVad`. Kept as an alias while callers migrate
203
+ * off the legacy ONNX-era name. */
204
+ export const NativeSileroVad = GgmlSileroVad;
205
+ export type NativeSileroVad = GgmlSileroVad;
206
+
207
+ // ---------------------------------------------------------------------------
208
+ // Tier 1: cheap always-on RMS energy gate.
209
+ // ---------------------------------------------------------------------------
210
+
211
+ export interface RmsEnergyGateConfig {
212
+ /** RMS above this counts as activity. Default 0.012 — between the 0.01 the
213
+ * vision capture stream uses and the 0.05 Discord uses for speaking. */
214
+ riseThreshold?: number;
215
+ /** RMS must drop below this to count as quiet (hysteresis). Default
216
+ * `0.6 * riseThreshold`. */
217
+ fallThreshold?: number;
218
+ /** Consecutive ms below `fallThreshold` before emitting `energy-fall`.
219
+ * Default 200 ms. */
220
+ fallHoldMs?: number;
221
+ }
222
+
223
+ export function rms(pcm: Float32Array): number {
224
+ if (pcm.length === 0) return 0;
225
+ let sum = 0;
226
+ for (let i = 0; i < pcm.length; i++) sum += pcm[i] * pcm[i];
227
+ return Math.sqrt(sum / pcm.length);
228
+ }
229
+
230
+ /**
231
+ * Hysteretic RMS gate. Feed it `PcmFrame`s; it emits `energy-rise` on the
232
+ * first frame above `riseThreshold` and `energy-fall` after RMS has been
233
+ * below `fallThreshold` for `fallHoldMs`. This is the fast pre-warm trigger
234
+ * — not a speech detector.
235
+ */
236
+ export class RmsEnergyGate {
237
+ private readonly riseThreshold: number;
238
+ private readonly fallThreshold: number;
239
+ private readonly fallHoldMs: number;
240
+ private active = false;
241
+ private quietSinceMs: number | null = null;
242
+ private readonly listeners = new Set<EnergyGateListener>();
243
+
244
+ constructor(config: RmsEnergyGateConfig = {}) {
245
+ this.riseThreshold = config.riseThreshold ?? 0.012;
246
+ this.fallThreshold = config.fallThreshold ?? this.riseThreshold * 0.6;
247
+ this.fallHoldMs = config.fallHoldMs ?? 200;
248
+ }
249
+
250
+ get isActive(): boolean {
251
+ return this.active;
252
+ }
253
+
254
+ onEvent(listener: EnergyGateListener): () => void {
255
+ this.listeners.add(listener);
256
+ return () => this.listeners.delete(listener);
257
+ }
258
+
259
+ /** Returns the frame RMS so callers can reuse it. */
260
+ push(frame: PcmFrame): number {
261
+ const level = rms(frame.pcm);
262
+ if (!this.active) {
263
+ if (level >= this.riseThreshold) {
264
+ this.active = true;
265
+ this.quietSinceMs = null;
266
+ this.emit({
267
+ type: "energy-rise",
268
+ timestampMs: frame.timestampMs,
269
+ rms: level,
270
+ });
271
+ }
272
+ return level;
273
+ }
274
+ // active
275
+ if (level < this.fallThreshold) {
276
+ if (this.quietSinceMs === null) this.quietSinceMs = frame.timestampMs;
277
+ const quietMs = frame.timestampMs - this.quietSinceMs;
278
+ if (quietMs >= this.fallHoldMs) {
279
+ this.active = false;
280
+ this.quietSinceMs = null;
281
+ this.emit({
282
+ type: "energy-fall",
283
+ timestampMs: frame.timestampMs,
284
+ quietMs,
285
+ });
286
+ }
287
+ } else {
288
+ this.quietSinceMs = null;
289
+ }
290
+ return level;
291
+ }
292
+
293
+ reset(): void {
294
+ this.active = false;
295
+ this.quietSinceMs = null;
296
+ }
297
+
298
+ private emit(event: EnergyGateEvent): void {
299
+ for (const l of this.listeners) l(event);
300
+ }
301
+ }
302
+
303
+ // ---------------------------------------------------------------------------
304
+ // Tier 2 driver: VadDetector — the Silero speech state machine.
305
+ // ---------------------------------------------------------------------------
306
+
307
+ export interface VadDetectorConfig {
308
+ /** Mic sample rate (Hz). MUST be 16 000 — Silero v5 is 16 kHz only. */
309
+ sampleRate?: number;
310
+ /** Speech probability above this opens a speech segment. Default 0.5. */
311
+ onsetThreshold?: number;
312
+ /** Speech probability must drop below this to count toward end-of-speech.
313
+ * Default `onsetThreshold - 0.15`. Below the onset to avoid flapping. */
314
+ offsetThreshold?: number;
315
+ /** Consecutive ms of speech-prob below `offsetThreshold` before the
316
+ * segment is considered *paused* (kick speculative response). Default
317
+ * 100 ms (lowered from 220ms; further reduction gated on semantic EOT
318
+ * classifier V2). Override via `ELIZA_PAUSE_HANGOVER_MS`. */
319
+ pauseHangoverMs?: number;
320
+ /**
321
+ * V1 — "fast endpoint" pause hangover, used when `fastEndpointEnabled`
322
+ * is true. Default 100 ms — short enough that a clean trailing-off
323
+ * end-of-utterance hits the speculative path quickly, but long enough
324
+ * to ride out mid-sentence micro-pauses. Gated by the flag so callers
325
+ * can opt in once they've validated the false-positive rate on their
326
+ * hardware. Default 100 ms.
327
+ */
328
+ fastPauseHangoverMs?: number;
329
+ /**
330
+ * V1 — when true, use `fastPauseHangoverMs` instead of `pauseHangoverMs`.
331
+ * Default false until the streaming-ASR fast path (V2) ships.
332
+ */
333
+ fastEndpointEnabled?: boolean;
334
+ /** Consecutive ms paused before the segment *ends* (finalize the turn).
335
+ * Default 700 ms. Must be ≥ `pauseHangoverMs`. */
336
+ endHangoverMs?: number;
337
+ /** A segment shorter than this (from onset to end) is reclassified as a
338
+ * `blip` rather than `speech-end`. Default 250 ms. */
339
+ minSpeechMs?: number;
340
+ /** Interval between `speech-active` heartbeats while speaking. Default
341
+ * 200 ms. */
342
+ activeHeartbeatMs?: number;
343
+ /**
344
+ * V4 — adaptive pause hangover. When the windowed RMS is in a sharp
345
+ * downward trend across the last few frames (the user audibly trailed
346
+ * off rather than stopping mid-thought), the hangover used to detect a
347
+ * pause is scaled by this factor (clamped to a minimum). Default 0.5
348
+ * (halve the hangover); set to 1.0 to disable.
349
+ */
350
+ adaptiveHangoverScaleOnDrop?: number;
351
+ /**
352
+ * V4 — minimum hangover the adaptive scale is allowed to produce, ms.
353
+ * Default 50 ms. Prevents a steep drop from collapsing the hangover to
354
+ * zero and emitting a pause on a single quiet frame.
355
+ */
356
+ adaptiveHangoverFloorMs?: number;
357
+ /**
358
+ * V4 — energy derivative (ΔRMS over the V4 history window) below this
359
+ * value, combined with RMS below `offsetThreshold`, counts as "audibly
360
+ * trailed off". Default -0.02 (negative slope: RMS dropping at least
361
+ * 0.02 / window).
362
+ */
363
+ adaptiveHangoverDropThreshold?: number;
364
+ /** RMS gate config (tier 1). */
365
+ energyGate?: RmsEnergyGateConfig;
366
+ }
367
+
368
+ type SegmentPhase = "idle" | "speaking" | "paused";
369
+
370
+ export type { VadLike } from "./types.js";
371
+
372
+ import type { VadLike } from "./types.js";
373
+
374
+ export type VadProviderId = "qwen-toolkit" | "silero-ggml";
375
+ export type VadProviderPreference = "auto" | VadProviderId;
376
+
377
+ export interface QwenToolkitVadAdapter {
378
+ isAvailable?(): boolean | Promise<boolean>;
379
+ loadVad(opts: { sampleRate: number }): Promise<VadLike>;
380
+ }
381
+
382
+ export interface ResolvedVadProvider {
383
+ id: VadProviderId;
384
+ vad: VadLike;
385
+ }
386
+
387
+ export interface CreateVadDetectorOptions {
388
+ modelPath?: string;
389
+ bundleRoot?: string;
390
+ ffi?: ElizaInferenceFfi | null;
391
+ ctx?: ElizaInferenceContextHandle | (() => ElizaInferenceContextHandle);
392
+ qwenToolkitVad?: QwenToolkitVadAdapter | null;
393
+ config?: VadDetectorConfig;
394
+ prefer?: VadProviderPreference;
395
+ }
396
+
397
+ export function vadProviderOrder(
398
+ prefer: VadProviderPreference = "auto",
399
+ ): VadProviderId[] {
400
+ if (prefer !== "auto") return [prefer];
401
+ // `silero-ggml` is the fused `libelizainference` VAD ABI — the sole
402
+ // on-device VAD runtime. The optional injected `qwen-toolkit` adapter is
403
+ // tried first only when a caller supplies one; otherwise the fused engine
404
+ // is the single path, and an unavailable fused VAD fails fast.
405
+ return ["qwen-toolkit", "silero-ggml"];
406
+ }
407
+
408
+ export async function resolveVadProvider(
409
+ opts: CreateVadDetectorOptions = {},
410
+ ): Promise<ResolvedVadProvider> {
411
+ const sampleRate = opts.config?.sampleRate ?? 16_000;
412
+ const tried: string[] = [];
413
+ const reasons: string[] = [];
414
+
415
+ for (const provider of vadProviderOrder(opts.prefer)) {
416
+ switch (provider) {
417
+ case "qwen-toolkit": {
418
+ tried.push(provider);
419
+ if (!opts.qwenToolkitVad) {
420
+ reasons.push("qwen-toolkit: no adapter supplied");
421
+ break;
422
+ }
423
+ const available = (await opts.qwenToolkitVad.isAvailable?.()) ?? true;
424
+ if (!available) {
425
+ reasons.push("qwen-toolkit: adapter reported unavailable");
426
+ break;
427
+ }
428
+ return {
429
+ id: provider,
430
+ vad: await opts.qwenToolkitVad.loadVad({ sampleRate }),
431
+ };
432
+ }
433
+ case "silero-ggml": {
434
+ tried.push(provider);
435
+ if (!opts.ffi || !opts.ctx) {
436
+ reasons.push(
437
+ "silero-ggml: libelizainference FFI / context not supplied",
438
+ );
439
+ break;
440
+ }
441
+ if (!GgmlSileroVad.isSupported(opts.ffi)) {
442
+ reasons.push(
443
+ "silero-ggml: libelizainference build does not export the VAD ABI (eliza_inference_vad_supported() == 0)",
444
+ );
445
+ break;
446
+ }
447
+ // Ensure the fused GGML model is on disk before opening the
448
+ // native session. This keeps the failure mode "no model file"
449
+ // distinct from a build with an ABI-only VAD.
450
+ const modelPath = resolveSileroVadPath({
451
+ modelPath: opts.modelPath,
452
+ bundleRoot: opts.bundleRoot,
453
+ });
454
+ if (!modelPath) {
455
+ throw new VadUnavailableError(
456
+ "model-missing",
457
+ `[voice] Fused Silero v5 GGML VAD model not found. Looked for ${SILERO_VAD_GGML_REL_PATH} in the Eliza-1 bundle and under ${localInferenceRoot()}, or set ELIZA_VAD_MODEL_PATH.`,
458
+ );
459
+ }
460
+ return {
461
+ id: provider,
462
+ vad: await GgmlSileroVad.load({
463
+ ffi: opts.ffi,
464
+ ctx: opts.ctx,
465
+ sampleRate,
466
+ }),
467
+ };
468
+ }
469
+ }
470
+ }
471
+
472
+ throw new VadUnavailableError(
473
+ "provider-missing",
474
+ `[voice] No VAD provider available. Tried: ${tried.join(", ")}. Reasons: ${reasons.join("; ") || "none reported"}.`,
475
+ );
476
+ }
477
+
478
+ /**
479
+ * The authoritative VAD. Owns a model VAD provider (or any `VadLike` for tests),
480
+ * an `RmsEnergyGate`, and the speech state machine. `pushFrame()` accepts
481
+ * mic frames of any length ≥ 1 sample; internally it re-windows to the
482
+ * provider's fixed sample window. Emits `VadEvent`s on the VAD timeline and
483
+ * `EnergyGateEvent`s on the fast timeline.
484
+ *
485
+ * Frame ingestion is serialized (`pushFrame` awaits the model forward pass)
486
+ * so events stay in order; callers that can't await may fire-and-forget — a
487
+ * dropped-frame counter (`droppedFrames`) records overruns.
488
+ */
489
+ export class VadDetector {
490
+ readonly silero: VadLike;
491
+ readonly energyGate: RmsEnergyGate;
492
+ private readonly sampleRate: number;
493
+ private readonly onsetThreshold: number;
494
+ private readonly offsetThreshold: number;
495
+ private readonly pauseHangoverMs: number;
496
+ private readonly fastPauseHangoverMs: number;
497
+ private readonly fastEndpointEnabled: boolean;
498
+ private readonly endHangoverMs: number;
499
+ private readonly minSpeechMs: number;
500
+ private readonly activeHeartbeatMs: number;
501
+ // V4 — adaptive hangover state.
502
+ private readonly adaptiveHangoverScaleOnDrop: number;
503
+ private readonly adaptiveHangoverFloorMs: number;
504
+ private readonly adaptiveHangoverDropThreshold: number;
505
+ // Rolling RMS history (last 3 windows ≈ 96 ms @ 16 kHz / 512). The
506
+ // sample-rate-of-drop check reads from this each window.
507
+ private readonly recentRms: number[] = [];
508
+ private static readonly RECENT_RMS_HISTORY = 3;
509
+
510
+ private readonly vadListeners = new Set<VadEventListener>();
511
+
512
+ private pending: Float32Array = new Float32Array(0);
513
+ private windowDurationMs: number;
514
+ private clockMs = 0; // timestamp of the *next* unconsumed sample
515
+ private busy: Promise<void> = Promise.resolve();
516
+ droppedFrames = 0;
517
+
518
+ private phase: SegmentPhase = "idle";
519
+ private speechStartMs = 0;
520
+ private lastSpeechMs = 0; // last window whose prob ≥ offsetThreshold
521
+ private pauseStartedMs = 0;
522
+ private lastHeartbeatMs = 0;
523
+ private peakRmsInSegment = 0;
524
+
525
+ constructor(silero: VadLike, config: VadDetectorConfig = {}) {
526
+ this.silero = silero;
527
+ this.sampleRate = config.sampleRate ?? silero.sampleRate;
528
+ if (this.sampleRate !== silero.sampleRate) {
529
+ throw new Error(
530
+ `[voice] VadDetector sample rate ${this.sampleRate} != Silero model rate ${silero.sampleRate}`,
531
+ );
532
+ }
533
+ this.onsetThreshold = config.onsetThreshold ?? 0.5;
534
+ this.offsetThreshold =
535
+ config.offsetThreshold ?? Math.max(0.1, this.onsetThreshold - 0.15);
536
+ // Lowered from 220ms; further reduction gated on semantic EOT classifier (V2).
537
+ // Override via ELIZA_PAUSE_HANGOVER_MS env var.
538
+ this.pauseHangoverMs =
539
+ config.pauseHangoverMs ?? readPauseHangoverMsEnv() ?? 100;
540
+ this.fastPauseHangoverMs = config.fastPauseHangoverMs ?? 100;
541
+ this.fastEndpointEnabled = config.fastEndpointEnabled ?? false;
542
+ this.endHangoverMs = Math.max(
543
+ this.fastEndpointEnabled
544
+ ? this.fastPauseHangoverMs
545
+ : this.pauseHangoverMs,
546
+ config.endHangoverMs ?? 700,
547
+ );
548
+ this.minSpeechMs = config.minSpeechMs ?? 250;
549
+ this.activeHeartbeatMs = config.activeHeartbeatMs ?? 200;
550
+ this.adaptiveHangoverScaleOnDrop = Math.max(
551
+ 0.1,
552
+ Math.min(1, config.adaptiveHangoverScaleOnDrop ?? 0.5),
553
+ );
554
+ this.adaptiveHangoverFloorMs = Math.max(
555
+ 0,
556
+ config.adaptiveHangoverFloorMs ?? 50,
557
+ );
558
+ this.adaptiveHangoverDropThreshold =
559
+ config.adaptiveHangoverDropThreshold ?? -0.02;
560
+ this.energyGate = new RmsEnergyGate(config.energyGate);
561
+ this.windowDurationMs = (silero.windowSamples / this.sampleRate) * 1000;
562
+ }
563
+
564
+ /**
565
+ * Effective pause hangover for this window. Starts from
566
+ * `fastPauseHangoverMs` or `pauseHangoverMs` (V1: gated on
567
+ * `fastEndpointEnabled`), then optionally scales it down when the RMS
568
+ * trajectory shows an audible trail-off (V4).
569
+ */
570
+ private effectivePauseHangoverMs(): number {
571
+ const base = this.fastEndpointEnabled
572
+ ? this.fastPauseHangoverMs
573
+ : this.pauseHangoverMs;
574
+ if (this.adaptiveHangoverScaleOnDrop >= 1) return base;
575
+ // V4 — need at least two samples to compute a slope.
576
+ if (this.recentRms.length < 2) return base;
577
+ const first = this.recentRms[0];
578
+ const last = this.recentRms[this.recentRms.length - 1];
579
+ // Slope per window (we sample once per window). Negative = trailing off.
580
+ const slope = (last - first) / (this.recentRms.length - 1);
581
+ const lastBelowOffset = last < this.offsetThreshold;
582
+ if (slope <= this.adaptiveHangoverDropThreshold && lastBelowOffset) {
583
+ return Math.max(
584
+ this.adaptiveHangoverFloorMs,
585
+ base * this.adaptiveHangoverScaleOnDrop,
586
+ );
587
+ }
588
+ return base;
589
+ }
590
+
591
+ onVadEvent(listener: VadEventListener): () => void {
592
+ this.vadListeners.add(listener);
593
+ return () => this.vadListeners.delete(listener);
594
+ }
595
+
596
+ onEnergyEvent(listener: EnergyGateListener): () => void {
597
+ return this.energyGate.onEvent(listener);
598
+ }
599
+
600
+ /** True while a speech segment (incl. its pause hangover) is open. */
601
+ get inSpeech(): boolean {
602
+ return this.phase !== "idle";
603
+ }
604
+
605
+ /**
606
+ * Feed a mic frame. Returns a promise that resolves once every full
607
+ * Silero window contained in (the accumulated buffer up to) this frame
608
+ * has been processed and its events emitted. The fast RMS gate fires
609
+ * synchronously before the await.
610
+ */
611
+ pushFrame(frame: PcmFrame): Promise<void> {
612
+ if (frame.sampleRate !== this.sampleRate) {
613
+ return Promise.reject(
614
+ new Error(
615
+ `[voice] VadDetector expects ${this.sampleRate} Hz frames; got ${frame.sampleRate}. Resample upstream of the VAD.`,
616
+ ),
617
+ );
618
+ }
619
+ // Tier 1: synchronous, no model.
620
+ this.energyGate.push(frame);
621
+
622
+ const pcm = frame.pcm.slice();
623
+ const timestampMs = frame.timestampMs;
624
+ const run = this.busy.then(async () => {
625
+ // Anchor the clock to the first frame so timestamps are mic-domain.
626
+ if (this.pending.length === 0 && this.clockMs === 0) {
627
+ this.clockMs = timestampMs;
628
+ }
629
+ // Append to the re-windowing buffer while holding the serialized
630
+ // drain chain. Fire-and-forget callers can overlap model inference;
631
+ // the shared pending buffer must still advance one frame at a time.
632
+ const merged = new Float32Array(this.pending.length + pcm.length);
633
+ merged.set(this.pending, 0);
634
+ merged.set(pcm, this.pending.length);
635
+ this.pending = merged;
636
+ await this.drainWindows();
637
+ });
638
+ // Keep the chain alive even if a window throws (the throw still
639
+ // surfaces via the returned promise).
640
+ this.busy = run.catch(() => {
641
+ this.droppedFrames++;
642
+ });
643
+ return run;
644
+ }
645
+
646
+ /** Flush any partial trailing samples (zero-padded to a full window) and
647
+ * finalize an open segment. Call at end-of-stream. */
648
+ flush(): Promise<void> {
649
+ const run = this.busy.then(async () => {
650
+ if (this.pending.length > 0) {
651
+ const w = new Float32Array(this.silero.windowSamples);
652
+ w.set(this.pending.subarray(0, this.silero.windowSamples));
653
+ this.pending = new Float32Array(0);
654
+ await this.processWindow(w);
655
+ }
656
+ if (this.phase !== "idle") {
657
+ this.endSegment(this.clockMs);
658
+ }
659
+ });
660
+ this.busy = run.catch(() => {
661
+ this.droppedFrames++;
662
+ });
663
+ return run;
664
+ }
665
+
666
+ reset(): void {
667
+ this.pending = new Float32Array(0);
668
+ this.clockMs = 0;
669
+ this.phase = "idle";
670
+ this.peakRmsInSegment = 0;
671
+ this.recentRms.length = 0;
672
+ this.silero.reset();
673
+ this.energyGate.reset();
674
+ }
675
+
676
+ private async drainWindows(): Promise<void> {
677
+ const win = this.silero.windowSamples;
678
+ while (this.pending.length >= win) {
679
+ const w = this.pending.subarray(0, win);
680
+ // Copy out so the slice is stable across the await.
681
+ const window = w.slice();
682
+ this.pending = this.pending.subarray(win);
683
+ await this.processWindow(window);
684
+ }
685
+ }
686
+
687
+ private async processWindow(window: Float32Array): Promise<void> {
688
+ const prob = await this.silero.process(window);
689
+ const windowRms = rms(window);
690
+ // V4 — keep a short rolling RMS history for the energy-rate-of-drop
691
+ // adaptive hangover. Three windows ≈ 96 ms at 16 kHz / 512 samples.
692
+ this.recentRms.push(windowRms);
693
+ if (this.recentRms.length > VadDetector.RECENT_RMS_HISTORY) {
694
+ this.recentRms.shift();
695
+ }
696
+ // Clock at the *end* of this window.
697
+ this.clockMs += this.windowDurationMs;
698
+ const now = this.clockMs;
699
+ const isSpeechFrame = prob >= this.onsetThreshold;
700
+ const aboveOffset = prob >= this.offsetThreshold;
701
+
702
+ switch (this.phase) {
703
+ case "idle": {
704
+ if (isSpeechFrame) {
705
+ this.phase = "speaking";
706
+ this.speechStartMs = now - this.windowDurationMs;
707
+ this.lastSpeechMs = now;
708
+ this.lastHeartbeatMs = now;
709
+ this.peakRmsInSegment = windowRms;
710
+ this.emit({
711
+ type: "speech-start",
712
+ timestampMs: this.speechStartMs,
713
+ probability: prob,
714
+ });
715
+ }
716
+ break;
717
+ }
718
+ case "speaking": {
719
+ this.peakRmsInSegment = Math.max(this.peakRmsInSegment, windowRms);
720
+ if (aboveOffset) {
721
+ this.lastSpeechMs = now;
722
+ }
723
+ const quietMs = now - this.lastSpeechMs;
724
+ if (quietMs >= this.effectivePauseHangoverMs()) {
725
+ this.phase = "paused";
726
+ this.pauseStartedMs = this.lastSpeechMs;
727
+ this.emit({
728
+ type: "speech-pause",
729
+ timestampMs: now,
730
+ pauseDurationMs: quietMs,
731
+ });
732
+ } else if (now - this.lastHeartbeatMs >= this.activeHeartbeatMs) {
733
+ this.lastHeartbeatMs = now;
734
+ this.emit({
735
+ type: "speech-active",
736
+ timestampMs: now,
737
+ probability: prob,
738
+ speechDurationMs: now - this.speechStartMs,
739
+ });
740
+ }
741
+ break;
742
+ }
743
+ case "paused": {
744
+ this.peakRmsInSegment = Math.max(this.peakRmsInSegment, windowRms);
745
+ if (isSpeechFrame) {
746
+ // Speech resumed before end-of-utterance.
747
+ this.phase = "speaking";
748
+ this.lastSpeechMs = now;
749
+ this.lastHeartbeatMs = now;
750
+ this.emit({
751
+ type: "speech-active",
752
+ timestampMs: now,
753
+ probability: prob,
754
+ speechDurationMs: now - this.speechStartMs,
755
+ });
756
+ } else {
757
+ const pauseMs = now - this.pauseStartedMs;
758
+ if (pauseMs >= this.endHangoverMs) {
759
+ this.endSegment(now);
760
+ } else {
761
+ this.emit({
762
+ type: "speech-pause",
763
+ timestampMs: now,
764
+ pauseDurationMs: pauseMs,
765
+ });
766
+ }
767
+ }
768
+ break;
769
+ }
770
+ }
771
+ }
772
+
773
+ private endSegment(now: number): void {
774
+ const speechDurationMs = this.lastSpeechMs - this.speechStartMs;
775
+ const peakRms = this.peakRmsInSegment;
776
+ this.phase = "idle";
777
+ this.peakRmsInSegment = 0;
778
+ this.silero.reset();
779
+ if (speechDurationMs < this.minSpeechMs) {
780
+ this.emit({
781
+ type: "blip",
782
+ timestampMs: now,
783
+ durationMs: Math.max(0, speechDurationMs),
784
+ peakRms,
785
+ });
786
+ return;
787
+ }
788
+ this.emit({ type: "speech-end", timestampMs: now, speechDurationMs });
789
+ }
790
+
791
+ private emit(event: VadEvent): void {
792
+ for (const l of this.vadListeners) l(event);
793
+ }
794
+ }
795
+
796
+ /**
797
+ * Back-compat wrapper for callers that still use the legacy
798
+ * `createSileroVadDetector` name. It now goes through the full provider
799
+ * resolver — same as `createVadDetector`.
800
+ */
801
+ export async function createSileroVadDetector(
802
+ opts: CreateVadDetectorOptions = {},
803
+ ): Promise<VadDetector> {
804
+ return createVadDetector(opts);
805
+ }
806
+
807
+ /**
808
+ * Convenience: resolve the best available model VAD provider and wrap it in a
809
+ * `VadDetector`.
810
+ */
811
+ export async function createVadDetector(
812
+ opts: CreateVadDetectorOptions = {},
813
+ ): Promise<VadDetector> {
814
+ const { vad } = await resolveVadProvider(opts);
815
+ return new VadDetector(vad, opts.config);
816
+ }
817
+
818
+ /**
819
+ * Read `ELIZA_PAUSE_HANGOVER_MS` from the environment. Returns a positive
820
+ * integer when the variable is set and valid, otherwise `undefined`.
821
+ */
822
+ function readPauseHangoverMsEnv(): number | undefined {
823
+ const raw = process.env.ELIZA_PAUSE_HANGOVER_MS?.trim();
824
+ if (!raw) return undefined;
825
+ const value = Number.parseInt(raw, 10);
826
+ return Number.isFinite(value) && value > 0 ? value : undefined;
827
+ }