@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/dist/actions/generate-media.d.ts +59 -0
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts +23 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts +29 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts +8 -37
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +38979 -430
- package/dist/index.js.map +217 -0
- package/dist/local-inference-routes.d.ts +47 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts +21 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes/compat-helpers.d.ts +18 -0
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts +62 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/dist/routes/index.d.ts +20 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/dist/routes/live-diarization-route.d.ts +33 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts +4 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts +16 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts +7 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts +15 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/dist/routes/transcripts-routes.d.ts +44 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts +62 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts +62 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts +77 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts +16 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/dist/runtime/index.d.ts +15 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/dist/runtime/voice-entity-binding.d.ts +113 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/dist/services/active-model.d.ts +310 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/dist/services/assignments.d.ts +84 -0
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/dist/services/backend.d.ts +440 -0
- package/dist/services/backend.d.ts.map +1 -0
- package/dist/services/bionic-host-loader.d.ts +67 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts +34 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts +206 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts +109 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts +102 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts +142 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts +188 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts +149 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/dist/services/device-tier.d.ts +133 -0
- package/dist/services/device-tier.d.ts.map +1 -0
- package/dist/services/downloader.d.ts +94 -0
- package/dist/services/downloader.d.ts.map +1 -0
- package/dist/services/engine.d.ts +579 -0
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts +17 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/dist/services/ffi-streaming-backend.d.ts +201 -0
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/dist/services/ffi-streaming-runner.d.ts +146 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts +56 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts +72 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts +63 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts +14 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts +118 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts +16 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts +58 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts +74 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts +181 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts +181 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/dist/services/index.d.ts +31 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts +132 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts +59 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts +189 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts +346 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts +96 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts +82 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/dist/services/manifest/schema.d.ts +903 -0
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/dist/services/manifest/types.d.ts +32 -0
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts +66 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/dist/services/memory-arbiter.d.ts +348 -0
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/dist/services/memory-monitor.d.ts +128 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts +130 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts +13 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts +127 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts +6 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts +124 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts +38 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts +110 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts +9 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts +111 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/dist/services/registry.d.ts +33 -0
- package/dist/services/registry.d.ts.map +1 -0
- package/dist/services/router-handler.d.ts +92 -0
- package/dist/services/router-handler.d.ts.map +1 -0
- package/dist/services/routing-policy.d.ts +92 -0
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts +8 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts +98 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/dist/services/service.d.ts +128 -0
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts +72 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts +311 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts +33 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/dist/services/types.d.ts +19 -0
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts +34 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts +8 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts +115 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts +99 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts +47 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts +71 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/dist/services/vision/index.d.ts +95 -0
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts +73 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/dist/services/vision/types.d.ts +162 -0
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts +18 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/dist/services/vision-embedding-cache.d.ts +98 -0
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts +112 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts +199 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts +170 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/dist/services/voice/embedding.d.ts +132 -0
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts +68 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/dist/services/voice/engine-bridge.d.ts +762 -0
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier.d.ts +211 -0
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/dist/services/voice/errors.d.ts +20 -0
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/dist/services/voice/expressive-tags.d.ts +158 -0
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/dist/services/voice/ffi-bindings.d.ts +696 -0
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts +181 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/index.d.ts +96 -0
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts +82 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts +30 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts +135 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/dist/services/voice/mic-source.d.ts +136 -0
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts +109 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/dist/services/voice/partial-stabilizer.d.ts +73 -0
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts +76 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts +62 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts +151 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts +216 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts +123 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts +248 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts +40 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts +24 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts +146 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/dist/services/voice/shared-resources.d.ts +204 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts +75 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts +37 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts +83 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts +73 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/dist/services/voice/transcriber.d.ts +244 -0
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts +37 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/dist/services/voice/transcript-service.d.ts +60 -0
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/dist/services/voice/transcript-store.d.ts +64 -0
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts +183 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/dist/services/voice/types.d.ts +643 -0
- package/dist/services/voice/types.d.ts.map +1 -0
- package/dist/services/voice/vad.d.ts +283 -0
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts +241 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/dist/services/voice/voice-preset-format.d.ts +158 -0
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts +83 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts +364 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/dist/services/voice/wake-word-ggml.d.ts +100 -0
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts +255 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts +240 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts +3 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +101 -15
- package/registry-entry.json +137 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +831 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.test.ts +390 -0
- package/src/local-inference-routes.ts +1625 -0
- package/src/provider.ts +1111 -0
- package/src/routes/compat-helpers.ts +275 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.ts +61 -0
- package/src/routes/live-diarization-route.test.ts +347 -0
- package/src/routes/live-diarization-route.ts +198 -0
- package/src/routes/local-inference-asr-route.test.ts +246 -0
- package/src/routes/local-inference-asr-route.ts +166 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +775 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.test.ts +195 -0
- package/src/routes/transcripts-routes.ts +191 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
- package/src/runtime/ensure-local-inference-handler.ts +1640 -0
- package/src/runtime/index.ts +36 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
- package/src/runtime/mobile-local-inference-gate.ts +99 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
- package/src/runtime/voice-entity-binding.ts +368 -0
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.ts +1416 -0
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +106 -0
- package/src/services/assignments.ts +278 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +791 -0
- package/src/services/bionic-host-loader.test.ts +226 -0
- package/src/services/bionic-host-loader.ts +252 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.test.ts +259 -0
- package/src/services/catalog.ts +33 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.test.ts +458 -0
- package/src/services/device-tier.ts +502 -0
- package/src/services/downloader.test.ts +888 -0
- package/src/services/downloader.ts +1039 -0
- package/src/services/engine-direct-bundle.test.ts +90 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.ts +2096 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +445 -0
- package/src/services/ffi-streaming-backend.ts +418 -0
- package/src/services/ffi-streaming-runner.test.ts +220 -0
- package/src/services/ffi-streaming-runner.ts +407 -0
- package/src/services/ffi-unload-ordering.test.ts +166 -0
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.test.ts +236 -0
- package/src/services/hardware.ts +438 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.ts +715 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.ts +229 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +357 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
- package/src/services/manifest/index.ts +72 -0
- package/src/services/manifest/manifest.test.ts +791 -0
- package/src/services/manifest/schema.ts +761 -0
- package/src/services/manifest/types.ts +61 -0
- package/src/services/manifest/validator.ts +633 -0
- package/src/services/memory-arbiter.test.ts +558 -0
- package/src/services/memory-arbiter.ts +991 -0
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +232 -0
- package/src/services/memory-monitor.ts +309 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.ts +86 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +164 -0
- package/src/services/ram-budget.ts +309 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.ts +157 -0
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +426 -0
- package/src/services/routing-policy.test.ts +352 -0
- package/src/services/routing-policy.ts +367 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +750 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.ts +59 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.ts +163 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +133 -0
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +139 -0
- package/src/services/voice/audio-frame-consumer.test.ts +669 -0
- package/src/services/voice/audio-frame-consumer.ts +651 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +335 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +902 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +242 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2343 -0
- package/src/services/voice/eot-classifier-ggml.ts +569 -0
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +422 -0
- package/src/services/voice/errors.ts +34 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.test.ts +735 -0
- package/src/services/voice/ffi-bindings.ts +3387 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.ts +139 -0
- package/src/services/voice/index.ts +502 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.ts +64 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +622 -0
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.ts +504 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +343 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.test.ts +195 -0
- package/src/services/voice/transcript-service.ts +205 -0
- package/src/services/voice/transcript-store.test.ts +189 -0
- package/src/services/voice/transcript-store.ts +164 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.test.ts +498 -0
- package/src/services/voice/vad.ts +832 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.test.ts +415 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +713 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +280 -0
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +367 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.ts +319 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/src/voice-workbench.ts +71 -0
|
@@ -0,0 +1,3387 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Node/Bun FFI binding to `libelizainference.{dylib,so,dll}`.
|
|
3
|
+
*
|
|
4
|
+
* The fused omnivoice + llama.cpp build (see
|
|
5
|
+
* `packages/app-core/scripts/omnivoice-fuse/`) produces ONE shared
|
|
6
|
+
* library that exports both `llama_*` and `omnivoice_*` symbols plus
|
|
7
|
+
* the C ABI declared in `scripts/omnivoice-fuse/ffi.h`. This module is
|
|
8
|
+
* the JS-side proxy for that ABI — it loads the library, binds every
|
|
9
|
+
* `eliza_inference_*` symbol declared in `ffi.h`, and exposes a typed
|
|
10
|
+
* handle (`ElizaInferenceFfi`) the voice lifecycle calls into.
|
|
11
|
+
*
|
|
12
|
+
* Runtime: production runs under Bun (Electrobun shell, Capacitor
|
|
13
|
+
* bridge), so the loader uses `bun:ffi`. Tests that need to actually
|
|
14
|
+
* load a `.dylib` against a stub library spawn a `bun` subprocess —
|
|
15
|
+
* see `ffi-bindings.test.ts`. Calling this loader from a non-Bun
|
|
16
|
+
* runtime (e.g. plain Node) throws `VoiceLifecycleError({code:
|
|
17
|
+
* "missing-ffi"})` with a diagnostic explaining why.
|
|
18
|
+
*
|
|
19
|
+
* No defensive try/catch on the success path. Any dlopen failure,
|
|
20
|
+
* symbol-resolution failure, or ABI mismatch is a structured throw
|
|
21
|
+
* (AGENTS.md §3 + §9). The caller — `voice/lifecycle.ts` and
|
|
22
|
+
* `voice/engine-bridge.ts` — surfaces it as a `VoiceLifecycleError` to
|
|
23
|
+
* the UI.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import path from "node:path";
|
|
27
|
+
|
|
28
|
+
import { VoiceLifecycleError } from "./lifecycle";
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Make a directory discoverable by the Win32 DLL loader for this process by
|
|
32
|
+
* prepending it to PATH (step 6 of the standard DLL search order).
|
|
33
|
+
*
|
|
34
|
+
* The fused lib's sibling backends (`ggml*.dll`, `llama*.dll`, `mtmd.dll`) are
|
|
35
|
+
* staged NEXT TO `elizainference.dll`, but when a DLL is opened by absolute
|
|
36
|
+
* path the Win32 loader does NOT search that DLL's own directory for its
|
|
37
|
+
* dependencies — it searches the host EXE's dir, the system dirs, and PATH. So
|
|
38
|
+
* `dlopen` fails with "error code 126" (a dependent DLL could not be found)
|
|
39
|
+
* even though the siblings are right there. Linux/macOS don't need this:
|
|
40
|
+
* `stage-desktop-fused-lib.mjs` bakes a relative rpath (`$ORIGIN` /
|
|
41
|
+
* `@loader_path`) at link time so the loader resolves siblings from the lib's
|
|
42
|
+
* own dir. Idempotent; a no-op off win32 and when `dir` is already on PATH.
|
|
43
|
+
*/
|
|
44
|
+
function ensureWin32DllSearchDir(dir: string): void {
|
|
45
|
+
if (process.platform !== "win32" || !dir) return;
|
|
46
|
+
const current = process.env.PATH ?? "";
|
|
47
|
+
const resolved = path.resolve(dir);
|
|
48
|
+
const already = current
|
|
49
|
+
.split(path.delimiter)
|
|
50
|
+
.some((seg) => seg && path.resolve(seg) === resolved);
|
|
51
|
+
if (already) return;
|
|
52
|
+
process.env.PATH = current
|
|
53
|
+
? `${resolved}${path.delimiter}${current}`
|
|
54
|
+
: resolved;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* ABI version the JS binding was authored against. Must match the value
|
|
59
|
+
* `eliza_inference_abi_version()` returns at runtime — a mismatch is a
|
|
60
|
+
* hard error (AGENTS.md §3, §9: no silent compatibility shims).
|
|
61
|
+
*
|
|
62
|
+
* Bump in lockstep with `ELIZA_INFERENCE_ABI_VERSION` in
|
|
63
|
+
* `scripts/omnivoice-fuse/ffi.h` whenever the C surface changes shape.
|
|
64
|
+
*
|
|
65
|
+
* v4: the FFI bridge resolves `speaker_preset_id` against the bundle's
|
|
66
|
+
* `cache/voice-preset-<id>.bin` (ELZ2 v2) and applies the
|
|
67
|
+
* `(instruct, ref_audio_tokens, ref_T, ref_text)` triple to
|
|
68
|
+
* `ov_tts_params` before calling `ov_synthesize`. Adds the
|
|
69
|
+
* `eliza_inference_encode_reference` entrypoint that the freeze CLI
|
|
70
|
+
* uses to pre-encode reference WAVs into the preset file. A v3 caller
|
|
71
|
+
* remains source-compatible: every v3 entry point keeps its v3 shape.
|
|
72
|
+
*
|
|
73
|
+
* v5: the FFI bridge gains the native openWakeWord surface
|
|
74
|
+
* (`eliza_inference_wakeword_supported/open/score/reset/close`). It
|
|
75
|
+
* replaces the previous `onnxruntime-node`-backed wake-word path —
|
|
76
|
+
* the JS binding routes wake-word detection exclusively through this
|
|
77
|
+
* ABI with no ONNX fallback (AGENTS.md §3, §8). v4 callers that
|
|
78
|
+
* never touched the wake-word entries are source-compatible.
|
|
79
|
+
*
|
|
80
|
+
* v6: the FFI bridge gains the native speaker-encoder + diarizer
|
|
81
|
+
* surfaces (`eliza_inference_speaker_supported/open/embed/free/close`
|
|
82
|
+
* and `eliza_inference_diariz_supported/open/segment/close`). These
|
|
83
|
+
* fuse the remaining standalone `libvoice_classifier` voice
|
|
84
|
+
* classifiers into the one `libelizainference` handle so the whole
|
|
85
|
+
* voice pipeline runs through a single native lib. v5 callers that
|
|
86
|
+
* never touched the speaker/diarizer entries are source-compatible.
|
|
87
|
+
*
|
|
88
|
+
* v9: the last text-adjacent modalities move onto the fused handle. Three
|
|
89
|
+
* additive surfaces + probes: text embeddings (`embed` / `embedSupported`),
|
|
90
|
+
* mmproj vision describe (`describeImage` / `visionSupported`), and the
|
|
91
|
+
* tokenizer (`tokenize` / `detokenize` / `tokenizeSupported`). With these,
|
|
92
|
+
* libllama is fully retired: text, embeddings, vision, and tokenization all
|
|
93
|
+
* run through the fused handle. A pre-v9 library lacks these symbols, so the
|
|
94
|
+
* probes report unsupported and the fused runtime refuses (there is no
|
|
95
|
+
* libllama fallback). v8 callers that never touched the new entries remain
|
|
96
|
+
* source-compatible (the new probes simply return false on a v8 lib).
|
|
97
|
+
*
|
|
98
|
+
* v10: Kokoro-82M TTS folded in-process. The fused handle gains
|
|
99
|
+
* `eliza_inference_kokoro_supported/load/synthesize/sample_rate` so the
|
|
100
|
+
* mobile Kokoro path synthesizes through the same dlopen()-ed
|
|
101
|
+
* libelizainference as OmniVoice instead of POSTing to the local-TCP
|
|
102
|
+
* `llama-server /v1/audio/speech` route (forbidden on iOS / Google Play).
|
|
103
|
+
* The four symbols are additive — a v9 library lacks them, so the
|
|
104
|
+
* `kokoroSupported()` probe reports false and the Kokoro FFI runtime
|
|
105
|
+
* refuses (no TCP fallback on mobile). A v9 library is still accepted at
|
|
106
|
+
* degraded capability: its voice/ASR/VAD/LLM/text surface is unchanged and
|
|
107
|
+
* Kokoro just probes unsupported on it.
|
|
108
|
+
*/
|
|
109
|
+
export const ELIZA_INFERENCE_ABI_VERSION = 13 as const;
|
|
110
|
+
|
|
111
|
+
/** One transcribed word with playback-synced timing (ms from utterance start). */
|
|
112
|
+
export interface AsrWordTiming {
|
|
113
|
+
text: string;
|
|
114
|
+
startMs: number;
|
|
115
|
+
endMs: number;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Recover per-word `{ text, startMs, endMs }` from a v12 timed-ASR result.
|
|
120
|
+
*
|
|
121
|
+
* The native `eliza_inference_asr_transcribe_timed` sizes the `startMs`/`endMs`
|
|
122
|
+
* arrays by splitting the transcript on ASCII whitespace — `std::isspace` in the
|
|
123
|
+
* C locale matches EXACTLY ` \t\n\v\f\r`. We must mirror that split byte-for-byte
|
|
124
|
+
* to recover the word strings: a broader Unicode `\s` split collapses NBSP /
|
|
125
|
+
* ideographic space (U+00A0, U+3000, …) that the native byte split keeps, which
|
|
126
|
+
* would make `tokens` shorter than `count` and silently zip each word's text
|
|
127
|
+
* against a DIFFERENT word's timing — a desync `validateAsrWordTimings` cannot
|
|
128
|
+
* see (it never compares text to count). `count` only falls below the true word
|
|
129
|
+
* count when the caller's `maxWords` cap is hit, in which case the trailing
|
|
130
|
+
* (untimed) words are dropped by `Math.min`.
|
|
131
|
+
*/
|
|
132
|
+
export function recoverAsrWords(
|
|
133
|
+
text: string,
|
|
134
|
+
count: number,
|
|
135
|
+
startMs: Int32Array,
|
|
136
|
+
endMs: Int32Array,
|
|
137
|
+
): AsrWordTiming[] {
|
|
138
|
+
const tokens = text.split(/[ \t\n\v\f\r]+/).filter(Boolean);
|
|
139
|
+
const n = Math.min(count, tokens.length);
|
|
140
|
+
const words: AsrWordTiming[] = [];
|
|
141
|
+
for (let i = 0; i < n; i++) {
|
|
142
|
+
words.push({
|
|
143
|
+
text: tokens[i] as string,
|
|
144
|
+
startMs: startMs[i] ?? 0,
|
|
145
|
+
endMs: endMs[i] ?? 0,
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
return words;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Pooling strategies for `embed`. Mirror `enum llama_pooling_type` and the
|
|
153
|
+
* `ELIZA_POOLING_*` constants in `eliza-inference-ffi.h`.
|
|
154
|
+
*/
|
|
155
|
+
export const ELIZA_POOLING_MEAN = 1;
|
|
156
|
+
export const ELIZA_POOLING_CLS = 2;
|
|
157
|
+
export const ELIZA_POOLING_LAST = 3;
|
|
158
|
+
|
|
159
|
+
/** Status codes mirrored from `ffi.h`. Negative = failure. */
|
|
160
|
+
export const ELIZA_OK = 0;
|
|
161
|
+
export const ELIZA_ERR_NOT_IMPLEMENTED = -1;
|
|
162
|
+
export const ELIZA_ERR_INVALID_ARG = -2;
|
|
163
|
+
export const ELIZA_ERR_BUNDLE_INVALID = -3;
|
|
164
|
+
export const ELIZA_ERR_FFI_FAULT = -4;
|
|
165
|
+
export const ELIZA_ERR_OOM = -5;
|
|
166
|
+
export const ELIZA_ERR_ABI_MISMATCH = -6;
|
|
167
|
+
export const ELIZA_ERR_CANCELLED = -7;
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* WeSpeaker ResNet34-LM embedding dimension. The native
|
|
171
|
+
* `eliza_inference_speaker_embed` writes exactly this many L2-normalized
|
|
172
|
+
* fp32 values into the caller-owned output buffer. Mirrors the C-side
|
|
173
|
+
* `VOICE_SPEAKER_EMBEDDING_DIM` and `SPEAKER_GGML_EMBEDDING_DIM`.
|
|
174
|
+
*/
|
|
175
|
+
const SPEAKER_EMBEDDING_DIM = 256;
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Upper bound on the per-window diarizer label count. pyannote-3 emits 293
|
|
179
|
+
* int8 frame labels per 5 s window; the caller passes a generous capacity and
|
|
180
|
+
* the library reports the real count back via `*io_n_labels`.
|
|
181
|
+
*/
|
|
182
|
+
const DIARIZ_LABELS_CAPACITY = 2048;
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Region names the lifecycle hands to `mmap_acquire` / `mmap_evict`.
|
|
186
|
+
* Mirrors the set the C stub validates in `ffi-stub.c::valid_region`.
|
|
187
|
+
*/
|
|
188
|
+
export type ElizaInferenceRegion =
|
|
189
|
+
| "tts"
|
|
190
|
+
| "asr"
|
|
191
|
+
| "text"
|
|
192
|
+
| "mtp"
|
|
193
|
+
| "vad"
|
|
194
|
+
| "wakeword";
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Opaque pointer to the C-side `EliInferenceContext`. Numeric on Bun
|
|
198
|
+
* (FFI returns the raw pointer as `bigint`); never inspected on the JS
|
|
199
|
+
* side beyond passing it back through the binding.
|
|
200
|
+
*/
|
|
201
|
+
export type ElizaInferenceContextHandle = bigint;
|
|
202
|
+
|
|
203
|
+
/** Opaque pointer to a native Silero VAD session. */
|
|
204
|
+
export type NativeVadHandle = bigint;
|
|
205
|
+
|
|
206
|
+
/** Opaque pointer to a native openWakeWord session. */
|
|
207
|
+
export type NativeWakeWordHandle = bigint;
|
|
208
|
+
|
|
209
|
+
/** Opaque pointer to a native WeSpeaker speaker-encoder session. */
|
|
210
|
+
export type NativeSpeakerHandle = bigint;
|
|
211
|
+
|
|
212
|
+
/** Opaque pointer to a native pyannote diarizer session. */
|
|
213
|
+
export type NativeDiarizHandle = bigint;
|
|
214
|
+
|
|
215
|
+
/** Opaque pointer to a streaming-LLM session. */
|
|
216
|
+
export type LlmStreamHandle = bigint;
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Per-session config handed to `llmStreamOpen`. Mirrors
|
|
220
|
+
* `eliza_llm_stream_config_t` in
|
|
221
|
+
* `native/llama.cpp/tools/omnivoice/include/eliza-inference-ffi.h` (ABI v9).
|
|
222
|
+
*/
|
|
223
|
+
export interface LlmStreamConfig {
|
|
224
|
+
maxTokens: number;
|
|
225
|
+
temperature: number;
|
|
226
|
+
topP: number;
|
|
227
|
+
topK: number;
|
|
228
|
+
repeatPenalty: number;
|
|
229
|
+
/** Pinned slot id; -1 disables pinning. */
|
|
230
|
+
slotId: number;
|
|
231
|
+
/** Optional prompt cache key used to derive a slot when `slotId === -1`. */
|
|
232
|
+
promptCacheKey: string | null;
|
|
233
|
+
/** MTP drafter bounds; `0` for either disables speculative decoding. */
|
|
234
|
+
draftMin: number;
|
|
235
|
+
draftMax: number;
|
|
236
|
+
/** Absolute MTP drafter GGUF path; null disables drafter-backed MTP. */
|
|
237
|
+
draftModelPath: string | null;
|
|
238
|
+
/**
|
|
239
|
+
* GBNF grammar source. When set the native session installs a grammar
|
|
240
|
+
* sampler FIRST in the chain so every sampled token is constrained — this
|
|
241
|
+
* is how the structured-reply envelope is forced on the in-process FFI
|
|
242
|
+
* path. `null`/empty disables grammar constraint.
|
|
243
|
+
*/
|
|
244
|
+
gbnfGrammar?: string | null;
|
|
245
|
+
/** Thinking-tag suppression passthrough (v1 no-op). */
|
|
246
|
+
disableThinking?: boolean;
|
|
247
|
+
/**
|
|
248
|
+
* Per-load GPU offload (ABI v8). Number of model layers to place on GPU.
|
|
249
|
+
* `undefined`/-1 selects the runtime default (all layers); 0 forces CPU.
|
|
250
|
+
* The model is loaded once per ctx, so the FIRST session's value wins.
|
|
251
|
+
*/
|
|
252
|
+
gpuLayers?: number;
|
|
253
|
+
/**
|
|
254
|
+
* KV-cache K quant type name (ABI v8), e.g. "f16", "q8_0", "qjl1_256".
|
|
255
|
+
* `undefined`/null leaves the f16 default. Mapped to `ggml_type` by the
|
|
256
|
+
* fused lib's `eliza_llm_stream_config_t.cache_type_k`.
|
|
257
|
+
*/
|
|
258
|
+
cacheTypeK?: string | null;
|
|
259
|
+
/** KV-cache V quant type name (ABI v8); see `cacheTypeK`. */
|
|
260
|
+
cacheTypeV?: string | null;
|
|
261
|
+
/** Runtime context window in tokens (ABI v9). `undefined`/0 uses native fallback. */
|
|
262
|
+
contextSize?: number;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* One step of streaming LLM output. `tokens` is the batch of accepted text
|
|
267
|
+
* model token ids the runtime committed this step (>= 1; > 1 only when the
|
|
268
|
+
* MTP drafter is active and the verifier accepted multiple drafts).
|
|
269
|
+
* `text` is the detokenized text for those tokens. `done` is `true` only
|
|
270
|
+
* on the final step (EOS reached). `drafterDrafted` and `drafterAccepted`
|
|
271
|
+
* are populated when the drafter is active.
|
|
272
|
+
*/
|
|
273
|
+
export interface LlmStreamStep {
|
|
274
|
+
tokens: number[];
|
|
275
|
+
text: string;
|
|
276
|
+
done: boolean;
|
|
277
|
+
drafterDrafted: number;
|
|
278
|
+
drafterAccepted: number;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* One streaming-TTS chunk delivered to the `onChunk` callback passed to
|
|
283
|
+
* `ttsSynthesizeStream`. `pcm` is a *view* over the library's buffer —
|
|
284
|
+
* valid only for the duration of the callback; copy it before
|
|
285
|
+
* returning. `isFinal` marks the zero-length tail chunk that closes the
|
|
286
|
+
* utterance. The callback returning `true` requests cancellation at the
|
|
287
|
+
* next kernel boundary.
|
|
288
|
+
*/
|
|
289
|
+
export interface TtsStreamChunk {
|
|
290
|
+
pcm: Float32Array;
|
|
291
|
+
isFinal: boolean;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* A native MTP speculative-step event from
|
|
296
|
+
* `eliza_inference_set_verifier_callback`. Token-index domain is the
|
|
297
|
+
* generated-output stream (token 0 = first generated token), matching
|
|
298
|
+
* `RejectedTokenRange`. `rejectedFrom`/`rejectedTo` are -1 when nothing
|
|
299
|
+
* was rejected this step.
|
|
300
|
+
*/
|
|
301
|
+
export interface NativeVerifierEvent {
|
|
302
|
+
acceptedTokenIds: number[];
|
|
303
|
+
rejectedFrom: number;
|
|
304
|
+
rejectedTo: number;
|
|
305
|
+
correctedTokenIds: number[];
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Typed handle returned by `loadElizaInferenceFfi`. Each method maps
|
|
310
|
+
* 1:1 to a symbol declared in `ffi.h`. Methods that allocate a context
|
|
311
|
+
* return the opaque pointer; methods that consume one take it as the
|
|
312
|
+
* first argument. Failures throw `VoiceLifecycleError` with the
|
|
313
|
+
* structured code derived from the C return value.
|
|
314
|
+
*/
|
|
315
|
+
export interface ElizaInferenceFfi {
|
|
316
|
+
/** Library path the binding was loaded from (for diagnostics). */
|
|
317
|
+
readonly libraryPath: string;
|
|
318
|
+
/** ABI version reported by the loaded library. */
|
|
319
|
+
readonly libraryAbiVersion: string;
|
|
320
|
+
/** Create a fresh context anchored at `bundleDir`. */
|
|
321
|
+
create(bundleDir: string): ElizaInferenceContextHandle;
|
|
322
|
+
/** Destroy a previously-created context. Idempotent on already-freed handles. */
|
|
323
|
+
destroy(ctx: ElizaInferenceContextHandle): void;
|
|
324
|
+
/** Map / re-page weights for a region. */
|
|
325
|
+
mmapAcquire(
|
|
326
|
+
ctx: ElizaInferenceContextHandle,
|
|
327
|
+
region: ElizaInferenceRegion,
|
|
328
|
+
): void;
|
|
329
|
+
/**
|
|
330
|
+
* Release or evict a voice-only region after the lifecycle leaves
|
|
331
|
+
* voice-on. Implementations may madvise mapped pages or unload the
|
|
332
|
+
* ASR/TTS runtime state entirely; callers must treat the region as
|
|
333
|
+
* unavailable until the next `mmapAcquire`.
|
|
334
|
+
*/
|
|
335
|
+
mmapEvict(
|
|
336
|
+
ctx: ElizaInferenceContextHandle,
|
|
337
|
+
region: ElizaInferenceRegion,
|
|
338
|
+
): void;
|
|
339
|
+
/**
|
|
340
|
+
* Synchronous TTS forward. Caller provides the output buffer; library
|
|
341
|
+
* fills up to its capacity and returns the number of samples written.
|
|
342
|
+
*/
|
|
343
|
+
ttsSynthesize(args: {
|
|
344
|
+
ctx: ElizaInferenceContextHandle;
|
|
345
|
+
text: string;
|
|
346
|
+
speakerPresetId: string | null;
|
|
347
|
+
out: Float32Array;
|
|
348
|
+
}): number;
|
|
349
|
+
/**
|
|
350
|
+
* Synchronous ASR forward. Returns the decoded transcript as a UTF-8
|
|
351
|
+
* string (allocated by the JS side, sized to fit the library's max
|
|
352
|
+
* write).
|
|
353
|
+
*/
|
|
354
|
+
asrTranscribe(args: {
|
|
355
|
+
ctx: ElizaInferenceContextHandle;
|
|
356
|
+
pcm: Float32Array;
|
|
357
|
+
sampleRateHz: number;
|
|
358
|
+
maxTextBytes?: number;
|
|
359
|
+
}): string;
|
|
360
|
+
|
|
361
|
+
/* ---- ASR word timestamps (ABI v12) --------------------------- */
|
|
362
|
+
|
|
363
|
+
/** True when this build can emit per-word ASR timestamps (v12+). v11 and
|
|
364
|
+
* older report false — callers fall back to the text-only `asrTranscribe`. */
|
|
365
|
+
timedAsrSupported(): boolean;
|
|
366
|
+
/** Transcribe like `asrTranscribe` AND return per-word `[startMs,endMs)`
|
|
367
|
+
* spans (duration-proportional, char-weighted, monotonic — the honest
|
|
368
|
+
* single-model signal; see the v12 ABI changelog). The word texts come from
|
|
369
|
+
* a whitespace split of the transcript, zipped with the native timing. */
|
|
370
|
+
asrTranscribeTimed(args: {
|
|
371
|
+
ctx: ElizaInferenceContextHandle;
|
|
372
|
+
pcm: Float32Array;
|
|
373
|
+
sampleRateHz: number;
|
|
374
|
+
maxTextBytes?: number;
|
|
375
|
+
maxWords?: number;
|
|
376
|
+
}): { text: string; words: AsrWordTiming[] };
|
|
377
|
+
|
|
378
|
+
/* ---- Streaming TTS + verifier callback (ABI v2) --------------- */
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* True when this build implements streaming TTS (false for the stub /
|
|
382
|
+
* a TTS-disabled build). Callers pick the streaming path vs the batch
|
|
383
|
+
* `ttsSynthesize` off this flag — no probe-and-catch.
|
|
384
|
+
*/
|
|
385
|
+
ttsStreamSupported(): boolean;
|
|
386
|
+
/**
|
|
387
|
+
* Chunked synthesis. `onChunk` is invoked for each decoded PCM segment
|
|
388
|
+
* as it arrives, then once more with `isFinal: true` (zero-length
|
|
389
|
+
* tail). Returning `true` from `onChunk` requests cancellation; the
|
|
390
|
+
* call then resolves with `cancelled: true` after the final-chunk
|
|
391
|
+
* callback. Any negative library return is a thrown `VoiceLifecycleError`.
|
|
392
|
+
*/
|
|
393
|
+
ttsSynthesizeStream(args: {
|
|
394
|
+
ctx: ElizaInferenceContextHandle;
|
|
395
|
+
text: string;
|
|
396
|
+
speakerPresetId: string | null;
|
|
397
|
+
onChunk: (chunk: TtsStreamChunk) => boolean | undefined;
|
|
398
|
+
}): { cancelled: boolean };
|
|
399
|
+
/**
|
|
400
|
+
* Hard-cancel any in-flight TTS forward pass on `ctx` (started on
|
|
401
|
+
* another thread by `ttsSynthesize` / `ttsSynthesizeStream`). The
|
|
402
|
+
* in-flight call returns `ELIZA_ERR_CANCELLED` at the next kernel
|
|
403
|
+
* boundary. Cancelling nothing is not an error.
|
|
404
|
+
*/
|
|
405
|
+
cancelTts(ctx: ElizaInferenceContextHandle): void;
|
|
406
|
+
/**
|
|
407
|
+
* Register (or, with `cb: null`, clear) the native MTP verifier
|
|
408
|
+
* callback. The runtime fires `cb` for every speculative accept/reject
|
|
409
|
+
* step from the in-process drafter↔target loop. The returned
|
|
410
|
+
* `JSCallbackHandle` MUST be kept alive for as long as the callback is
|
|
411
|
+
* registered and `.close()`d when it's cleared (or on dispose) — Bun's
|
|
412
|
+
* `JSCallback` is GC'd otherwise and the native side dereferences a
|
|
413
|
+
* dead pointer.
|
|
414
|
+
*/
|
|
415
|
+
setVerifierCallback(
|
|
416
|
+
ctx: ElizaInferenceContextHandle,
|
|
417
|
+
cb: ((event: NativeVerifierEvent) => void) | null,
|
|
418
|
+
): { close(): void };
|
|
419
|
+
|
|
420
|
+
/* ---- OmniVoice reference encode (ABI v4) ---------------------- */
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* True when this build exports the OmniVoice reference-encode symbols
|
|
424
|
+
* (`eliza_inference_encode_reference`). The freeze CLI uses this to
|
|
425
|
+
* pre-encode same reference audio into the persisted voice preset;
|
|
426
|
+
* the runtime synthesis path never calls it (it reads pre-encoded
|
|
427
|
+
* tokens from the preset file).
|
|
428
|
+
*/
|
|
429
|
+
encodeReferenceSupported?(): boolean;
|
|
430
|
+
/**
|
|
431
|
+
* Run the encode-only half of the TTS pipeline (HuBERT semantic + RVQ
|
|
432
|
+
* codec) on a 24 kHz mono fp32 PCM buffer and return the resulting
|
|
433
|
+
* reference-audio-token tensor `[K=8, ref_T]` as `Int32Array`
|
|
434
|
+
* row-major (`tokens[k*ref_T + t]`). The library allocates and the
|
|
435
|
+
* binding takes care of freeing the native buffer via
|
|
436
|
+
* `eliza_inference_free_tokens` before this returns.
|
|
437
|
+
*
|
|
438
|
+
* The TTS region must have been acquired (`mmapAcquire("tts")`)
|
|
439
|
+
* before the call. `sampleRateHz` must be 24000; the entrypoint does
|
|
440
|
+
* NOT resample, by design — the freeze artifact must be deterministic.
|
|
441
|
+
*/
|
|
442
|
+
encodeReference?(args: {
|
|
443
|
+
ctx: ElizaInferenceContextHandle;
|
|
444
|
+
pcm: Float32Array;
|
|
445
|
+
sampleRateHz: number;
|
|
446
|
+
}): { K: number; refT: number; tokens: Int32Array };
|
|
447
|
+
|
|
448
|
+
/* ---- Native VAD (ABI v3) -------------------------------------- */
|
|
449
|
+
|
|
450
|
+
/** True when this build exports and enables the native Silero VAD backend. */
|
|
451
|
+
vadSupported?(): boolean;
|
|
452
|
+
/** Open a native VAD session. The ABI-compatible sample rate is 16 kHz. */
|
|
453
|
+
vadOpen?(args: {
|
|
454
|
+
ctx: ElizaInferenceContextHandle;
|
|
455
|
+
sampleRateHz: number;
|
|
456
|
+
}): NativeVadHandle;
|
|
457
|
+
/** Process one 512-sample fp32 mono window and return P(speech). */
|
|
458
|
+
vadProcess?(args: { vad: NativeVadHandle; pcm: Float32Array }): number;
|
|
459
|
+
/** Clear native VAD recurrent state at utterance boundaries. */
|
|
460
|
+
vadReset?(vad: NativeVadHandle): void;
|
|
461
|
+
/** Close + free a native VAD session. Idempotent on already-closed handles. */
|
|
462
|
+
vadClose?(vad: NativeVadHandle): void;
|
|
463
|
+
|
|
464
|
+
/* ---- Native wake-word (ABI v5) -------------------------------- */
|
|
465
|
+
|
|
466
|
+
/**
|
|
467
|
+
* True when this build exports and enables the native openWakeWord
|
|
468
|
+
* backend. The JS binding routes wake-word detection exclusively
|
|
469
|
+
* through this surface; when this returns false, the wake-word path
|
|
470
|
+
* throws a structured "runtime not ready" error — no ONNX fallback
|
|
471
|
+
* (AGENTS.md §3, §8).
|
|
472
|
+
*/
|
|
473
|
+
wakewordSupported?(): boolean;
|
|
474
|
+
/**
|
|
475
|
+
* Open a native wake-word session. `sampleRateHz` must be 16000;
|
|
476
|
+
* `headName` selects the classifier head inside the bundle's combined
|
|
477
|
+
* wake-word GGUF (e.g. "hey-eliza").
|
|
478
|
+
*/
|
|
479
|
+
wakewordOpen?(args: {
|
|
480
|
+
ctx: ElizaInferenceContextHandle;
|
|
481
|
+
sampleRateHz: number;
|
|
482
|
+
headName: string;
|
|
483
|
+
}): NativeWakeWordHandle;
|
|
484
|
+
/**
|
|
485
|
+
* Score one 1280-sample (80 ms @ 16 kHz) fp32 mono frame and return
|
|
486
|
+
* the latest P(wake) in [0, 1]. Early calls before enough context
|
|
487
|
+
* accumulates return 0.
|
|
488
|
+
*/
|
|
489
|
+
wakewordScore?(args: {
|
|
490
|
+
wake: NativeWakeWordHandle;
|
|
491
|
+
pcm: Float32Array;
|
|
492
|
+
}): number;
|
|
493
|
+
/** Clear all streaming state (audio tail, mel ring, embedding ring). */
|
|
494
|
+
wakewordReset?(wake: NativeWakeWordHandle): void;
|
|
495
|
+
/** Close + free a native wake-word session. Idempotent on already-closed handles. */
|
|
496
|
+
wakewordClose?(wake: NativeWakeWordHandle): void;
|
|
497
|
+
|
|
498
|
+
/* ---- Native speaker encoder (ABI v6) -------------------------- */
|
|
499
|
+
|
|
500
|
+
/** True when this build exports and enables the native WeSpeaker encoder. */
|
|
501
|
+
speakerSupported?(): boolean;
|
|
502
|
+
/**
|
|
503
|
+
* Open a native speaker-encoder session. `ggufPath` may be null to
|
|
504
|
+
* resolve the bundle's `speaker/` dir, or an absolute path to a
|
|
505
|
+
* WeSpeaker GGUF.
|
|
506
|
+
*/
|
|
507
|
+
speakerOpen?(args: {
|
|
508
|
+
ctx: ElizaInferenceContextHandle;
|
|
509
|
+
ggufPath: string | null;
|
|
510
|
+
}): NativeSpeakerHandle;
|
|
511
|
+
/**
|
|
512
|
+
* Embed `pcm` (16 kHz mono fp32) into a 256-d L2-normalized speaker
|
|
513
|
+
* embedding. Returns a freshly-allocated `Float32Array` of length 256.
|
|
514
|
+
*/
|
|
515
|
+
speakerEmbed?(args: {
|
|
516
|
+
speaker: NativeSpeakerHandle;
|
|
517
|
+
pcm: Float32Array;
|
|
518
|
+
}): Float32Array;
|
|
519
|
+
/** Close + free a native speaker-encoder session. Idempotent on already-closed handles. */
|
|
520
|
+
speakerClose?(speaker: NativeSpeakerHandle): void;
|
|
521
|
+
|
|
522
|
+
/* ---- Native diarizer (ABI v6) --------------------------------- */
|
|
523
|
+
|
|
524
|
+
/** True when this build exports and enables the native pyannote diarizer. */
|
|
525
|
+
diarizSupported?(): boolean;
|
|
526
|
+
/**
|
|
527
|
+
* Open a native diarizer session. `ggufPath` may be null to resolve the
|
|
528
|
+
* bundle's `diariz/` dir, or an absolute path to a pyannote GGUF.
|
|
529
|
+
*/
|
|
530
|
+
diarizOpen?(args: {
|
|
531
|
+
ctx: ElizaInferenceContextHandle;
|
|
532
|
+
ggufPath: string | null;
|
|
533
|
+
}): NativeDiarizHandle;
|
|
534
|
+
/**
|
|
535
|
+
* Segment one 80000-sample (5 s @ 16 kHz) mono fp32 window into a
|
|
536
|
+
* per-frame powerset-label sequence. Returns the `Int8Array` of frame
|
|
537
|
+
* labels (293 for pyannote-segmentation-3.0), each in `[0, 7)`.
|
|
538
|
+
*/
|
|
539
|
+
diarizSegment?(args: {
|
|
540
|
+
diariz: NativeDiarizHandle;
|
|
541
|
+
pcm: Float32Array;
|
|
542
|
+
}): Int8Array;
|
|
543
|
+
/** Close + free a native diarizer session. Idempotent on already-closed handles. */
|
|
544
|
+
diarizClose?(diariz: NativeDiarizHandle): void;
|
|
545
|
+
|
|
546
|
+
/* ---- Streaming ASR (ABI v2) ----------------------------------- */
|
|
547
|
+
|
|
548
|
+
/**
|
|
549
|
+
* True when this build has a working streaming ASR decoder (false for
|
|
550
|
+
* the stub / an ASR-disabled build). Callers pick the fused streaming
|
|
551
|
+
* path vs the fused batch interim adapter off this flag — they do not
|
|
552
|
+
* have to open a session and catch `ELIZA_ERR_NOT_IMPLEMENTED`.
|
|
553
|
+
*/
|
|
554
|
+
asrStreamSupported(): boolean;
|
|
555
|
+
/** Open a streaming ASR session. The handle is closed via `asrStreamClose`. */
|
|
556
|
+
asrStreamOpen(args: {
|
|
557
|
+
ctx: ElizaInferenceContextHandle;
|
|
558
|
+
sampleRateHz: number;
|
|
559
|
+
}): bigint;
|
|
560
|
+
/** Feed one PCM frame at the session's sample rate. */
|
|
561
|
+
asrStreamFeed(args: { stream: bigint; pcm: Float32Array }): void;
|
|
562
|
+
/** Read the current running partial transcript (and token ids when available). */
|
|
563
|
+
asrStreamPartial(args: {
|
|
564
|
+
stream: bigint;
|
|
565
|
+
maxTextBytes?: number;
|
|
566
|
+
maxTokens?: number;
|
|
567
|
+
}): { partial: string; tokens?: number[] };
|
|
568
|
+
/** Force-finalize: drain buffered audio, run a final decode, return the final transcript. */
|
|
569
|
+
asrStreamFinish(args: {
|
|
570
|
+
stream: bigint;
|
|
571
|
+
maxTextBytes?: number;
|
|
572
|
+
maxTokens?: number;
|
|
573
|
+
}): { partial: string; tokens?: number[] };
|
|
574
|
+
/** Close + free a streaming ASR session. Idempotent on already-closed handles. */
|
|
575
|
+
asrStreamClose(stream: bigint): void;
|
|
576
|
+
|
|
577
|
+
/* ---- Streaming LLM (additive on top of ABI v3) ---------------- */
|
|
578
|
+
|
|
579
|
+
/**
|
|
580
|
+
* True when this build exports the streaming LLM symbols
|
|
581
|
+
* (`eliza_inference_llm_stream_*`). Transitional builds may load
|
|
582
|
+
* without them; the runner uses this to pick between the FFI streaming
|
|
583
|
+
* path.
|
|
584
|
+
*/
|
|
585
|
+
llmStreamSupported?(): boolean;
|
|
586
|
+
/**
|
|
587
|
+
* True when this build wires same-file / separate-drafter MTP
|
|
588
|
+
* speculative decoding into the streaming-LLM text path (ABI v8). A v7
|
|
589
|
+
* library returns `false` here (the symbol is absent), so the fused TEXT
|
|
590
|
+
* path can refuse to route through it without a speculative-decode
|
|
591
|
+
* regression. Anti-regression guard — see ABI v8 changelog.
|
|
592
|
+
*/
|
|
593
|
+
llmMtpSupported?(): boolean;
|
|
594
|
+
/**
|
|
595
|
+
* True when this build maps + applies KV-cache quant types in the
|
|
596
|
+
* streaming-LLM text path (ABI v8). A v7 library returns `false` (symbol
|
|
597
|
+
* absent); the fused TEXT path refuses it to avoid a silent fallback to
|
|
598
|
+
* f16 KV when a quantized cache was requested.
|
|
599
|
+
*/
|
|
600
|
+
llmKvQuantSupported?(): boolean;
|
|
601
|
+
/**
|
|
602
|
+
* Open a streaming-LLM session against `ctx`. Failure throws
|
|
603
|
+
* `VoiceLifecycleError`. Close exactly once via `llmStreamClose`.
|
|
604
|
+
*/
|
|
605
|
+
llmStreamOpen?(args: {
|
|
606
|
+
ctx: ElizaInferenceContextHandle;
|
|
607
|
+
config: LlmStreamConfig;
|
|
608
|
+
}): LlmStreamHandle;
|
|
609
|
+
/** Feed a batch of pre-tokenized prompt tokens before the first `next`. */
|
|
610
|
+
llmStreamPrefill?(args: {
|
|
611
|
+
stream: LlmStreamHandle;
|
|
612
|
+
tokens: Int32Array;
|
|
613
|
+
}): void;
|
|
614
|
+
/**
|
|
615
|
+
* Pull the next streaming step. Returns `null` when the runtime declined
|
|
616
|
+
* to emit tokens this call (rare — drafter rejected everything and the
|
|
617
|
+
* verifier had nothing to commit); poll again. `step.done === true` is
|
|
618
|
+
* the final step.
|
|
619
|
+
*/
|
|
620
|
+
llmStreamNext?(args: {
|
|
621
|
+
stream: LlmStreamHandle;
|
|
622
|
+
maxTokensPerStep?: number;
|
|
623
|
+
maxTextBytes?: number;
|
|
624
|
+
}): LlmStreamStep;
|
|
625
|
+
/** Cancel in-flight generation; the next `_next` returns CANCELLED. */
|
|
626
|
+
llmStreamCancel?(stream: LlmStreamHandle): void;
|
|
627
|
+
/** Persist the session's slot KV state to disk. */
|
|
628
|
+
llmStreamSaveSlot?(args: { stream: LlmStreamHandle; filename: string }): void;
|
|
629
|
+
/** Restore a previously-saved slot KV file. Call before the first prefill/next. */
|
|
630
|
+
llmStreamRestoreSlot?(args: {
|
|
631
|
+
stream: LlmStreamHandle;
|
|
632
|
+
filename: string;
|
|
633
|
+
}): void;
|
|
634
|
+
/** Close + free a streaming-LLM session. Idempotent on already-closed handles. */
|
|
635
|
+
llmStreamClose?(stream: LlmStreamHandle): void;
|
|
636
|
+
|
|
637
|
+
/* ---- Text embeddings (ABI v9) -------------------------------- */
|
|
638
|
+
|
|
639
|
+
/**
|
|
640
|
+
* True when this build wires the fused text-embedding path
|
|
641
|
+
* (`eliza_inference_embed`). A v8 library returns false (symbol absent),
|
|
642
|
+
* so the default TEXT_EMBEDDING handler keeps the node-llama-cpp /
|
|
643
|
+
* libllama path.
|
|
644
|
+
*/
|
|
645
|
+
embedSupported?(): boolean;
|
|
646
|
+
/**
|
|
647
|
+
* Compute a pooled, L2-normalized sentence embedding for `text` over the
|
|
648
|
+
* bundle's text model. `pooling` selects the strategy (default MEAN — the
|
|
649
|
+
* gte-small convention). Returns a `Float32Array` of length `n_embd`.
|
|
650
|
+
*/
|
|
651
|
+
embed?(args: {
|
|
652
|
+
ctx: ElizaInferenceContextHandle;
|
|
653
|
+
text: string;
|
|
654
|
+
pooling?: number;
|
|
655
|
+
}): Float32Array;
|
|
656
|
+
|
|
657
|
+
/* ---- mmproj vision describe (ABI v9) ------------------------- */
|
|
658
|
+
|
|
659
|
+
/**
|
|
660
|
+
* True when this build was compiled with vision (`-DELIZA_ENABLE_VISION`)
|
|
661
|
+
* and exports `eliza_inference_describe_image`. A v8 / vision-off library
|
|
662
|
+
* returns false, so the IMAGE_DESCRIPTION handler keeps the libllama mtmd
|
|
663
|
+
* path.
|
|
664
|
+
*/
|
|
665
|
+
visionSupported?(): boolean;
|
|
666
|
+
/**
|
|
667
|
+
* Describe `imageBytes` (raw PNG/JPEG/WebP) through the text model's
|
|
668
|
+
* mmproj projector at `mmprojPath`. `prompt` defaults to a generic
|
|
669
|
+
* caption request. Returns the description text.
|
|
670
|
+
*/
|
|
671
|
+
describeImage?(args: {
|
|
672
|
+
ctx: ElizaInferenceContextHandle;
|
|
673
|
+
imageBytes: Uint8Array;
|
|
674
|
+
mmprojPath: string;
|
|
675
|
+
prompt?: string;
|
|
676
|
+
maxTextBytes?: number;
|
|
677
|
+
}): string;
|
|
678
|
+
|
|
679
|
+
/* ---- Streaming mmproj vision describe (ABI v13) -------------- */
|
|
680
|
+
|
|
681
|
+
/**
|
|
682
|
+
* True when this build wires token-by-token vision describe
|
|
683
|
+
* (`eliza_inference_describe_image_stream_open`). A <=v12 / vision-off
|
|
684
|
+
* library returns false, so the IMAGE_DESCRIPTION handler falls back to the
|
|
685
|
+
* buffered {@link describeImage}.
|
|
686
|
+
*/
|
|
687
|
+
visionStreamSupported?(): boolean;
|
|
688
|
+
/**
|
|
689
|
+
* Open a streaming vision-describe session: prime an `LlmStreamHandle`'s KV
|
|
690
|
+
* with `imageBytes` (raw PNG/JPEG/WebP) + `prompt` through the mmproj at
|
|
691
|
+
* `mmprojPath`, then PULL tokens with the existing {@link llmStreamNext} loop
|
|
692
|
+
* and release via {@link llmStreamClose} — the same machinery as chat text.
|
|
693
|
+
* Throws `VoiceLifecycleError` when the build lacks vision streaming.
|
|
694
|
+
*/
|
|
695
|
+
describeImageStreamOpen?(args: {
|
|
696
|
+
ctx: ElizaInferenceContextHandle;
|
|
697
|
+
imageBytes: Uint8Array;
|
|
698
|
+
mmprojPath: string;
|
|
699
|
+
prompt?: string;
|
|
700
|
+
}): LlmStreamHandle;
|
|
701
|
+
|
|
702
|
+
/* ---- Tokenizer (ABI v9) -------------------------------------- */
|
|
703
|
+
|
|
704
|
+
/**
|
|
705
|
+
* True when this build exposes the tokenizer over the loaded text vocab
|
|
706
|
+
* (`eliza_inference_tokenize`). A pre-v9 library returns false, so the
|
|
707
|
+
* desktop fused runtime refuses (libllama is retired — no tokenizer sidecar).
|
|
708
|
+
*/
|
|
709
|
+
tokenizeSupported?(): boolean;
|
|
710
|
+
/**
|
|
711
|
+
* Tokenize `text` against the loaded text model's vocab. `addSpecial`
|
|
712
|
+
* (default true) adds BOS/EOS; `parseSpecial` (default false) renders
|
|
713
|
+
* special tokens from the input. Returns the token ids as an `Int32Array`.
|
|
714
|
+
*/
|
|
715
|
+
tokenize?(args: {
|
|
716
|
+
ctx: ElizaInferenceContextHandle;
|
|
717
|
+
text: string;
|
|
718
|
+
addSpecial?: boolean;
|
|
719
|
+
parseSpecial?: boolean;
|
|
720
|
+
}): Int32Array;
|
|
721
|
+
/**
|
|
722
|
+
* Detokenize `tokens` back to text against the loaded text model's vocab.
|
|
723
|
+
* `removeSpecial` (default false) strips BOS/EOS; `unparseSpecial`
|
|
724
|
+
* (default false) renders special tokens.
|
|
725
|
+
*/
|
|
726
|
+
detokenize?(args: {
|
|
727
|
+
ctx: ElizaInferenceContextHandle;
|
|
728
|
+
tokens: Int32Array;
|
|
729
|
+
removeSpecial?: boolean;
|
|
730
|
+
unparseSpecial?: boolean;
|
|
731
|
+
maxTextBytes?: number;
|
|
732
|
+
}): string;
|
|
733
|
+
|
|
734
|
+
/* ---- End-of-turn scoring (ABI v11) -------------------------- */
|
|
735
|
+
|
|
736
|
+
/**
|
|
737
|
+
* True when this build wires the fused end-of-turn scorer
|
|
738
|
+
* (`eliza_inference_llm_eot_score`). A v10 library returns false (symbol
|
|
739
|
+
* absent), so the composite EOT classifier uses the heuristic-only signal.
|
|
740
|
+
*/
|
|
741
|
+
eotSupported?(): boolean;
|
|
742
|
+
/**
|
|
743
|
+
* Single causal forward pass over `tokens` (a tokenized partial transcript)
|
|
744
|
+
* returning the next-token softmax probability of `targetTokenId` (the
|
|
745
|
+
* end-of-turn marker, e.g. `<end_of_turn>`), plus the argmax next token and its
|
|
746
|
+
* probability. Runs on a dedicated scoring context over the loaded text
|
|
747
|
+
* model; KV is cleared per call so scores are independent.
|
|
748
|
+
*/
|
|
749
|
+
eotScore?(args: {
|
|
750
|
+
ctx: ElizaInferenceContextHandle;
|
|
751
|
+
tokens: Int32Array;
|
|
752
|
+
targetTokenId: number;
|
|
753
|
+
}): { targetProb: number; topToken: number; topProb: number };
|
|
754
|
+
|
|
755
|
+
/* ---- Kokoro TTS (ABI v10) ----------------------------------- */
|
|
756
|
+
|
|
757
|
+
/**
|
|
758
|
+
* True when this build linked Eliza-1's in-process Kokoro engine
|
|
759
|
+
* (`eliza_inference_kokoro_*`). A v9 library returns false (symbols
|
|
760
|
+
* absent), so the Kokoro FFI runtime refuses rather than falling back to
|
|
761
|
+
* the local-TCP `llama-server` route (forbidden on iOS / Google Play).
|
|
762
|
+
*/
|
|
763
|
+
kokoroSupported?(): boolean;
|
|
764
|
+
/**
|
|
765
|
+
* Load the Kokoro GGUF at `ggufPath` and the voice preset `.bin` at
|
|
766
|
+
* `voiceBinPath` (raw fp32 ref_s, `styleDim` inner dim — 256 for v1.0)
|
|
767
|
+
* into `ctx`. Replaces any previously-loaded Kokoro model on the ctx.
|
|
768
|
+
* Throws `VoiceLifecycleError` on a negative return with the C diagnostic.
|
|
769
|
+
*/
|
|
770
|
+
kokoroLoad?(args: {
|
|
771
|
+
ctx: ElizaInferenceContextHandle;
|
|
772
|
+
ggufPath: string;
|
|
773
|
+
voiceBinPath: string;
|
|
774
|
+
styleDim?: number;
|
|
775
|
+
}): void;
|
|
776
|
+
/**
|
|
777
|
+
* Synthesize `text` through the loaded Kokoro model+voice at the model's
|
|
778
|
+
* native rate (24 kHz for v1.0). `speed` scales predicted durations
|
|
779
|
+
* (default 1.0). Allocates an output buffer of `maxSamples` fp32 samples,
|
|
780
|
+
* reads back the count the library wrote, and returns that slice.
|
|
781
|
+
*/
|
|
782
|
+
kokoroSynthesize?(args: {
|
|
783
|
+
ctx: ElizaInferenceContextHandle;
|
|
784
|
+
text: string;
|
|
785
|
+
speed?: number;
|
|
786
|
+
maxSamples: number;
|
|
787
|
+
}): Float32Array;
|
|
788
|
+
/** The loaded Kokoro model's audio sample rate (24000 for v1.0). */
|
|
789
|
+
kokoroSampleRate?(ctx: ElizaInferenceContextHandle): number;
|
|
790
|
+
|
|
791
|
+
/** Best-effort dispose for the binding itself (closes the dlopen handle). */
|
|
792
|
+
close(): void;
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
/* ---------------------------------------------------------------- */
|
|
796
|
+
/* Loader */
|
|
797
|
+
/* ---------------------------------------------------------------- */
|
|
798
|
+
|
|
799
|
+
/** Runtime detector: returns true when running under Bun. */
|
|
800
|
+
function isBunRuntime(): boolean {
|
|
801
|
+
return typeof (globalThis as { Bun?: unknown }).Bun !== "undefined";
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
/**
|
|
805
|
+
* Load `libelizainference` at `dylibPath` and bind every symbol
|
|
806
|
+
* declared in `ffi.h`. The returned handle's methods delegate directly
|
|
807
|
+
* to the library; they throw `VoiceLifecycleError` on any negative
|
|
808
|
+
* return value or runtime fault.
|
|
809
|
+
*
|
|
810
|
+
* Throws synchronously (no Promise) when:
|
|
811
|
+
* - the JS runtime is not Bun (no FFI primitive available),
|
|
812
|
+
* - `dlopen` cannot find or open the library,
|
|
813
|
+
* - the library's reported ABI version does not match
|
|
814
|
+
* `ELIZA_INFERENCE_ABI_VERSION`.
|
|
815
|
+
*/
|
|
816
|
+
export function loadElizaInferenceFfi(dylibPath: string): ElizaInferenceFfi {
|
|
817
|
+
if (!isBunRuntime()) {
|
|
818
|
+
throw new VoiceLifecycleError(
|
|
819
|
+
"kernel-missing",
|
|
820
|
+
`[ffi-bindings] Cannot load libelizainference: current runtime is not Bun. ` +
|
|
821
|
+
`The fused omnivoice FFI uses bun:ffi (production runs under Bun via Electrobun + Capacitor). ` +
|
|
822
|
+
`process.versions=${JSON.stringify(process.versions)}`,
|
|
823
|
+
);
|
|
824
|
+
}
|
|
825
|
+
if (!dylibPath || dylibPath.length === 0) {
|
|
826
|
+
throw new VoiceLifecycleError(
|
|
827
|
+
"kernel-missing",
|
|
828
|
+
"[ffi-bindings] loadElizaInferenceFfi: dylibPath is required",
|
|
829
|
+
);
|
|
830
|
+
}
|
|
831
|
+
return bindWithBunFfi(dylibPath);
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
/* ---------------------------------------------------------------- */
|
|
835
|
+
/* Bun:ffi binding */
|
|
836
|
+
/* ---------------------------------------------------------------- */
|
|
837
|
+
|
|
838
|
+
interface BunFfiSymbols {
|
|
839
|
+
eliza_inference_abi_version: () => unknown;
|
|
840
|
+
eliza_inference_create: (bundleDir: unknown, outErr: unknown) => unknown;
|
|
841
|
+
eliza_inference_destroy: (ctx: bigint) => void;
|
|
842
|
+
eliza_inference_mmap_acquire: (
|
|
843
|
+
ctx: bigint,
|
|
844
|
+
region: unknown,
|
|
845
|
+
outErr: unknown,
|
|
846
|
+
) => number;
|
|
847
|
+
eliza_inference_mmap_evict: (
|
|
848
|
+
ctx: bigint,
|
|
849
|
+
region: unknown,
|
|
850
|
+
outErr: unknown,
|
|
851
|
+
) => number;
|
|
852
|
+
eliza_inference_tts_synthesize: (
|
|
853
|
+
ctx: bigint,
|
|
854
|
+
text: unknown,
|
|
855
|
+
textLen: bigint | number,
|
|
856
|
+
speaker: unknown,
|
|
857
|
+
outPcm: unknown,
|
|
858
|
+
maxSamples: bigint | number,
|
|
859
|
+
outErr: unknown,
|
|
860
|
+
) => number;
|
|
861
|
+
eliza_inference_asr_transcribe: (
|
|
862
|
+
ctx: bigint,
|
|
863
|
+
pcm: unknown,
|
|
864
|
+
nSamples: bigint | number,
|
|
865
|
+
sampleRateHz: number,
|
|
866
|
+
outText: unknown,
|
|
867
|
+
maxTextBytes: bigint | number,
|
|
868
|
+
outErr: unknown,
|
|
869
|
+
) => number;
|
|
870
|
+
eliza_inference_asr_timestamps_supported?: () => number;
|
|
871
|
+
eliza_inference_asr_transcribe_timed?: (
|
|
872
|
+
ctx: bigint,
|
|
873
|
+
pcm: unknown,
|
|
874
|
+
nSamples: bigint | number,
|
|
875
|
+
sampleRateHz: number,
|
|
876
|
+
outText: unknown,
|
|
877
|
+
maxTextBytes: bigint | number,
|
|
878
|
+
outWordStartMs: unknown,
|
|
879
|
+
outWordEndMs: unknown,
|
|
880
|
+
ioNWords: unknown,
|
|
881
|
+
outErr: unknown,
|
|
882
|
+
) => number;
|
|
883
|
+
eliza_inference_tts_stream_supported: () => number;
|
|
884
|
+
eliza_inference_tts_synthesize_stream: (
|
|
885
|
+
ctx: bigint,
|
|
886
|
+
text: unknown,
|
|
887
|
+
textLen: bigint | number,
|
|
888
|
+
speaker: unknown,
|
|
889
|
+
onChunk: unknown,
|
|
890
|
+
userData: bigint | number,
|
|
891
|
+
outErr: unknown,
|
|
892
|
+
) => number;
|
|
893
|
+
eliza_inference_cancel_tts: (ctx: bigint, outErr: unknown) => number;
|
|
894
|
+
eliza_inference_set_verifier_callback: (
|
|
895
|
+
ctx: bigint,
|
|
896
|
+
cb: unknown,
|
|
897
|
+
userData: bigint | number,
|
|
898
|
+
outErr: unknown,
|
|
899
|
+
) => number;
|
|
900
|
+
eliza_inference_encode_reference?: (
|
|
901
|
+
ctx: bigint,
|
|
902
|
+
pcm: unknown,
|
|
903
|
+
nSamples: bigint | number,
|
|
904
|
+
sampleRateHz: number,
|
|
905
|
+
outK: unknown,
|
|
906
|
+
outRefT: unknown,
|
|
907
|
+
outTokens: unknown,
|
|
908
|
+
outErr: unknown,
|
|
909
|
+
) => number;
|
|
910
|
+
eliza_inference_free_tokens?: (tokens: bigint | number) => void;
|
|
911
|
+
eliza_inference_vad_supported?: () => number;
|
|
912
|
+
eliza_inference_vad_open?: (
|
|
913
|
+
ctx: bigint,
|
|
914
|
+
sampleRateHz: number,
|
|
915
|
+
outErr: unknown,
|
|
916
|
+
) => unknown;
|
|
917
|
+
eliza_inference_vad_process?: (
|
|
918
|
+
vad: bigint,
|
|
919
|
+
pcm: unknown,
|
|
920
|
+
nSamples: bigint | number,
|
|
921
|
+
outProbability: unknown,
|
|
922
|
+
outErr: unknown,
|
|
923
|
+
) => number;
|
|
924
|
+
eliza_inference_vad_reset?: (vad: bigint, outErr: unknown) => number;
|
|
925
|
+
eliza_inference_vad_close?: (vad: bigint) => void;
|
|
926
|
+
eliza_inference_wakeword_supported?: () => number;
|
|
927
|
+
eliza_inference_wakeword_open?: (
|
|
928
|
+
ctx: bigint,
|
|
929
|
+
sampleRateHz: number,
|
|
930
|
+
headName: unknown,
|
|
931
|
+
outErr: unknown,
|
|
932
|
+
) => unknown;
|
|
933
|
+
eliza_inference_wakeword_score?: (
|
|
934
|
+
wake: bigint,
|
|
935
|
+
pcm: unknown,
|
|
936
|
+
nSamples: bigint | number,
|
|
937
|
+
outProbability: unknown,
|
|
938
|
+
outErr: unknown,
|
|
939
|
+
) => number;
|
|
940
|
+
eliza_inference_wakeword_reset?: (wake: bigint, outErr: unknown) => number;
|
|
941
|
+
eliza_inference_wakeword_close?: (wake: bigint) => void;
|
|
942
|
+
eliza_inference_speaker_supported?: () => number;
|
|
943
|
+
eliza_inference_speaker_open?: (
|
|
944
|
+
ctx: bigint,
|
|
945
|
+
ggufPath: unknown,
|
|
946
|
+
outErr: unknown,
|
|
947
|
+
) => unknown;
|
|
948
|
+
eliza_inference_speaker_embed?: (
|
|
949
|
+
speaker: bigint,
|
|
950
|
+
pcm: unknown,
|
|
951
|
+
nSamples: bigint | number,
|
|
952
|
+
outEmbedding: unknown,
|
|
953
|
+
outErr: unknown,
|
|
954
|
+
) => number;
|
|
955
|
+
eliza_inference_speaker_close?: (speaker: bigint) => void;
|
|
956
|
+
eliza_inference_diariz_supported?: () => number;
|
|
957
|
+
eliza_inference_diariz_open?: (
|
|
958
|
+
ctx: bigint,
|
|
959
|
+
ggufPath: unknown,
|
|
960
|
+
outErr: unknown,
|
|
961
|
+
) => unknown;
|
|
962
|
+
eliza_inference_diariz_segment?: (
|
|
963
|
+
diariz: bigint,
|
|
964
|
+
pcm: unknown,
|
|
965
|
+
nSamples: bigint | number,
|
|
966
|
+
outLabels: unknown,
|
|
967
|
+
ioNLabels: unknown,
|
|
968
|
+
outErr: unknown,
|
|
969
|
+
) => number;
|
|
970
|
+
eliza_inference_diariz_close?: (diariz: bigint) => void;
|
|
971
|
+
eliza_inference_asr_stream_supported: () => number;
|
|
972
|
+
eliza_inference_asr_stream_open: (
|
|
973
|
+
ctx: bigint,
|
|
974
|
+
sampleRateHz: number,
|
|
975
|
+
outErr: unknown,
|
|
976
|
+
) => unknown;
|
|
977
|
+
eliza_inference_asr_stream_feed: (
|
|
978
|
+
stream: bigint,
|
|
979
|
+
pcm: unknown,
|
|
980
|
+
nSamples: bigint | number,
|
|
981
|
+
outErr: unknown,
|
|
982
|
+
) => number;
|
|
983
|
+
eliza_inference_asr_stream_partial: (
|
|
984
|
+
stream: bigint,
|
|
985
|
+
outText: unknown,
|
|
986
|
+
maxTextBytes: bigint | number,
|
|
987
|
+
outTokens: unknown,
|
|
988
|
+
ioNTokens: unknown,
|
|
989
|
+
outErr: unknown,
|
|
990
|
+
) => number;
|
|
991
|
+
eliza_inference_asr_stream_finish: (
|
|
992
|
+
stream: bigint,
|
|
993
|
+
outText: unknown,
|
|
994
|
+
maxTextBytes: bigint | number,
|
|
995
|
+
outTokens: unknown,
|
|
996
|
+
ioNTokens: unknown,
|
|
997
|
+
outErr: unknown,
|
|
998
|
+
) => number;
|
|
999
|
+
eliza_inference_asr_stream_close: (stream: bigint) => void;
|
|
1000
|
+
eliza_inference_free_string: (str: bigint | number) => void;
|
|
1001
|
+
// Streaming LLM (additive). Optional — transitional builds may omit.
|
|
1002
|
+
// ABI v8 capability probes — absent on v7 (treated as unsupported).
|
|
1003
|
+
eliza_inference_llm_mtp_supported?: () => number;
|
|
1004
|
+
eliza_inference_llm_kv_quant_supported?: () => number;
|
|
1005
|
+
eliza_inference_llm_stream_open?: (
|
|
1006
|
+
ctx: bigint,
|
|
1007
|
+
cfg: unknown,
|
|
1008
|
+
outErr: unknown,
|
|
1009
|
+
) => unknown;
|
|
1010
|
+
eliza_inference_llm_stream_prefill?: (
|
|
1011
|
+
stream: bigint,
|
|
1012
|
+
tokens: unknown,
|
|
1013
|
+
nTokens: bigint | number,
|
|
1014
|
+
outErr: unknown,
|
|
1015
|
+
) => number;
|
|
1016
|
+
eliza_inference_llm_stream_next?: (
|
|
1017
|
+
stream: bigint,
|
|
1018
|
+
tokensOut: unknown,
|
|
1019
|
+
tokensCapacity: bigint | number,
|
|
1020
|
+
numTokensOut: unknown,
|
|
1021
|
+
textOut: unknown,
|
|
1022
|
+
textCapacity: bigint | number,
|
|
1023
|
+
drafterDraftedOut: unknown,
|
|
1024
|
+
drafterAcceptedOut: unknown,
|
|
1025
|
+
outErr: unknown,
|
|
1026
|
+
) => number;
|
|
1027
|
+
eliza_inference_llm_stream_cancel?: (stream: bigint) => number;
|
|
1028
|
+
eliza_inference_llm_stream_save_slot?: (
|
|
1029
|
+
stream: bigint,
|
|
1030
|
+
filename: unknown,
|
|
1031
|
+
outErr: unknown,
|
|
1032
|
+
) => number;
|
|
1033
|
+
eliza_inference_llm_stream_restore_slot?: (
|
|
1034
|
+
stream: bigint,
|
|
1035
|
+
filename: unknown,
|
|
1036
|
+
outErr: unknown,
|
|
1037
|
+
) => number;
|
|
1038
|
+
eliza_inference_llm_stream_close?: (stream: bigint) => void;
|
|
1039
|
+
// Text embeddings (ABI v9). Optional — absent on v8 builds.
|
|
1040
|
+
eliza_inference_embed_supported?: () => number;
|
|
1041
|
+
eliza_inference_embed?: (
|
|
1042
|
+
ctx: bigint,
|
|
1043
|
+
text: unknown,
|
|
1044
|
+
textLen: bigint | number,
|
|
1045
|
+
pooling: number,
|
|
1046
|
+
outEmbedding: unknown,
|
|
1047
|
+
outCapacity: bigint | number,
|
|
1048
|
+
outDim: unknown,
|
|
1049
|
+
outErr: unknown,
|
|
1050
|
+
) => number;
|
|
1051
|
+
// mmproj vision describe (ABI v9). Optional — absent on v8 / vision-off builds.
|
|
1052
|
+
eliza_inference_vision_supported?: () => number;
|
|
1053
|
+
eliza_inference_describe_image?: (
|
|
1054
|
+
ctx: bigint,
|
|
1055
|
+
imageBytes: unknown,
|
|
1056
|
+
nBytes: bigint | number,
|
|
1057
|
+
mmprojPath: unknown,
|
|
1058
|
+
prompt: unknown,
|
|
1059
|
+
outText: unknown,
|
|
1060
|
+
maxTextBytes: bigint | number,
|
|
1061
|
+
outErr: unknown,
|
|
1062
|
+
) => number;
|
|
1063
|
+
// Streaming mmproj vision describe (ABI v13). Optional — absent on <=v12
|
|
1064
|
+
// builds (the probe then reports unsupported and IMAGE_DESCRIPTION falls back
|
|
1065
|
+
// to the buffered `eliza_inference_describe_image`). `_stream_open` returns an
|
|
1066
|
+
// EliLlmStream* (as a pointer/bigint) primed with the image+prompt KV; the
|
|
1067
|
+
// caller drives the existing `eliza_inference_llm_stream_next` loop and frees
|
|
1068
|
+
// via `eliza_inference_llm_stream_close`.
|
|
1069
|
+
eliza_inference_vision_stream_supported?: () => number;
|
|
1070
|
+
eliza_inference_describe_image_stream_open?: (
|
|
1071
|
+
ctx: bigint,
|
|
1072
|
+
imageBytes: unknown,
|
|
1073
|
+
nBytes: bigint | number,
|
|
1074
|
+
mmprojPath: unknown,
|
|
1075
|
+
prompt: unknown,
|
|
1076
|
+
outErr: unknown,
|
|
1077
|
+
) => bigint;
|
|
1078
|
+
// Tokenizer (ABI v9). Optional — absent on v8 builds.
|
|
1079
|
+
eliza_inference_tokenize_supported?: () => number;
|
|
1080
|
+
eliza_inference_tokenize?: (
|
|
1081
|
+
ctx: bigint,
|
|
1082
|
+
text: unknown,
|
|
1083
|
+
textLen: bigint | number,
|
|
1084
|
+
addSpecial: number,
|
|
1085
|
+
parseSpecial: number,
|
|
1086
|
+
outTokens: unknown,
|
|
1087
|
+
outN: unknown,
|
|
1088
|
+
outErr: unknown,
|
|
1089
|
+
) => number;
|
|
1090
|
+
eliza_inference_detokenize?: (
|
|
1091
|
+
ctx: bigint,
|
|
1092
|
+
tokens: unknown,
|
|
1093
|
+
nTokens: bigint | number,
|
|
1094
|
+
removeSpecial: number,
|
|
1095
|
+
unparseSpecial: number,
|
|
1096
|
+
outText: unknown,
|
|
1097
|
+
maxTextBytes: bigint | number,
|
|
1098
|
+
outErr: unknown,
|
|
1099
|
+
) => number;
|
|
1100
|
+
// End-of-turn scoring (ABI v11). Optional — absent on v10 builds (the probe
|
|
1101
|
+
// then reports unsupported and the composite EOT classifier uses the
|
|
1102
|
+
// heuristic-only signal).
|
|
1103
|
+
eliza_inference_llm_eot_supported?: () => number;
|
|
1104
|
+
eliza_inference_llm_eot_score?: (
|
|
1105
|
+
ctx: bigint,
|
|
1106
|
+
tokenIds: unknown,
|
|
1107
|
+
numTokens: bigint | number,
|
|
1108
|
+
targetTokenId: number,
|
|
1109
|
+
outTargetProb: unknown,
|
|
1110
|
+
outTopToken: unknown,
|
|
1111
|
+
outTopProb: unknown,
|
|
1112
|
+
outErr: unknown,
|
|
1113
|
+
) => number;
|
|
1114
|
+
// Kokoro TTS (ABI v10). Optional — absent on v9 builds (the probe then
|
|
1115
|
+
// reports unsupported and the Kokoro FFI runtime refuses).
|
|
1116
|
+
eliza_inference_kokoro_supported?: () => number;
|
|
1117
|
+
eliza_inference_kokoro_load?: (
|
|
1118
|
+
ctx: bigint,
|
|
1119
|
+
ggufPath: unknown,
|
|
1120
|
+
voiceBinPath: unknown,
|
|
1121
|
+
styleDim: number,
|
|
1122
|
+
outErr: unknown,
|
|
1123
|
+
) => number;
|
|
1124
|
+
eliza_inference_kokoro_synthesize?: (
|
|
1125
|
+
ctx: bigint,
|
|
1126
|
+
text: unknown,
|
|
1127
|
+
textLen: bigint | number,
|
|
1128
|
+
speed: number,
|
|
1129
|
+
outPcm: unknown,
|
|
1130
|
+
maxSamples: bigint | number,
|
|
1131
|
+
outErr: unknown,
|
|
1132
|
+
) => number;
|
|
1133
|
+
eliza_inference_kokoro_sample_rate?: (ctx: bigint) => number;
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
interface BunFfiLib {
|
|
1137
|
+
symbols: BunFfiSymbols;
|
|
1138
|
+
close(): void;
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
interface BunFfiJSCallback {
|
|
1142
|
+
readonly ptr: bigint | number;
|
|
1143
|
+
close(): void;
|
|
1144
|
+
}
|
|
1145
|
+
|
|
1146
|
+
interface BunFfiModule {
|
|
1147
|
+
dlopen(path: string, defs: Record<string, unknown>): BunFfiLib;
|
|
1148
|
+
FFIType: Record<string, number>;
|
|
1149
|
+
ptr(value: ArrayBufferView): unknown;
|
|
1150
|
+
CString: new (ptr: unknown) => { toString(): string };
|
|
1151
|
+
read: {
|
|
1152
|
+
ptr(buf: unknown, offset?: number): bigint;
|
|
1153
|
+
i32(buf: unknown, offset?: number): number;
|
|
1154
|
+
u64(buf: unknown, offset?: number): bigint;
|
|
1155
|
+
};
|
|
1156
|
+
toArrayBuffer(
|
|
1157
|
+
ptr: bigint | number,
|
|
1158
|
+
byteOffset?: number,
|
|
1159
|
+
byteLength?: number,
|
|
1160
|
+
): ArrayBuffer;
|
|
1161
|
+
JSCallback: new <F extends (...args: never[]) => unknown>(
|
|
1162
|
+
fn: F,
|
|
1163
|
+
def: { args: number[]; returns: number },
|
|
1164
|
+
) => BunFfiJSCallback;
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
/**
|
|
1168
|
+
* Resolve `bun:ffi` synchronously via the Bun-injected `require`.
|
|
1169
|
+
* Bun exposes a CJS `require` even from ESM modules, and `bun:ffi` is
|
|
1170
|
+
* a built-in importable that way. Doing this dynamically (rather than a
|
|
1171
|
+
* static `import "bun:ffi"`) keeps the module loadable under plain Node
|
|
1172
|
+
* for the parts of the test suite that don't need the FFI.
|
|
1173
|
+
*/
|
|
1174
|
+
function loadBunFfiModule(): BunFfiModule {
|
|
1175
|
+
const req: ((id: string) => unknown) | undefined = (
|
|
1176
|
+
globalThis as { Bun?: { __require?: (id: string) => unknown } }
|
|
1177
|
+
).Bun?.__require;
|
|
1178
|
+
if (typeof req === "function") {
|
|
1179
|
+
return req("bun:ffi") as BunFfiModule;
|
|
1180
|
+
}
|
|
1181
|
+
// Fallback to `module.createRequire` on the current file when running
|
|
1182
|
+
// under Bun via an ESM entry without `Bun.__require`. This is rare —
|
|
1183
|
+
// current Bun exposes `Bun.__require` — but we keep the path explicit
|
|
1184
|
+
// so the failure mode is `MODULE_NOT_FOUND` (a real error), not a
|
|
1185
|
+
// silent fall-through.
|
|
1186
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1187
|
+
const mod = require("node:module") as {
|
|
1188
|
+
createRequire: (filename: string) => (id: string) => unknown;
|
|
1189
|
+
};
|
|
1190
|
+
const r = mod.createRequire(import.meta.url);
|
|
1191
|
+
return r("bun:ffi") as BunFfiModule;
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
1195
|
+
let ffi: BunFfiModule;
|
|
1196
|
+
try {
|
|
1197
|
+
ffi = loadBunFfiModule();
|
|
1198
|
+
} catch (err) {
|
|
1199
|
+
throw new VoiceLifecycleError(
|
|
1200
|
+
"kernel-missing",
|
|
1201
|
+
`[ffi-bindings] Cannot load bun:ffi while opening ${dylibPath}: ${formatFfiError(err)}`,
|
|
1202
|
+
);
|
|
1203
|
+
}
|
|
1204
|
+
const T = ffi.FFIType;
|
|
1205
|
+
|
|
1206
|
+
// Windows-only: make the fused lib's co-located backends (ggml*/llama*/mtmd
|
|
1207
|
+
// .dll) resolvable before dlopen, which otherwise fails with error 126. See
|
|
1208
|
+
// ensureWin32DllSearchDir for the full rationale.
|
|
1209
|
+
ensureWin32DllSearchDir(path.dirname(dylibPath));
|
|
1210
|
+
|
|
1211
|
+
// All `char *` arguments are typed as T.ptr — Bun's `T.cstring` is a
|
|
1212
|
+
// RETURN-only type for "library hands back a NUL-terminated string".
|
|
1213
|
+
// For inputs we encode UTF-8 to a NUL-terminated Buffer on the JS
|
|
1214
|
+
// side and pass `ffi.ptr(buffer)`.
|
|
1215
|
+
let lib: BunFfiLib | null = null;
|
|
1216
|
+
let nativeVadSymbolsAvailable = true;
|
|
1217
|
+
const nativeVadDefs = {
|
|
1218
|
+
// Native Silero VAD (ABI v3). These are additive; some transitional
|
|
1219
|
+
// builds may report ABI v3 before carrying the VAD symbols, so bind
|
|
1220
|
+
// them opportunistically and advertise unsupported if absent.
|
|
1221
|
+
eliza_inference_vad_supported: { args: [], returns: T.i32 },
|
|
1222
|
+
eliza_inference_vad_open: {
|
|
1223
|
+
args: [T.ptr, T.i32, T.ptr],
|
|
1224
|
+
returns: T.ptr,
|
|
1225
|
+
},
|
|
1226
|
+
eliza_inference_vad_process: {
|
|
1227
|
+
args: [T.usize, T.ptr, T.usize, T.ptr, T.ptr],
|
|
1228
|
+
returns: T.i32,
|
|
1229
|
+
},
|
|
1230
|
+
eliza_inference_vad_reset: { args: [T.usize, T.ptr], returns: T.i32 },
|
|
1231
|
+
eliza_inference_vad_close: { args: [T.usize], returns: T.void },
|
|
1232
|
+
};
|
|
1233
|
+
// Native openWakeWord (ABI v5). Additive; transitional builds may report
|
|
1234
|
+
// v5 before the wake-word symbols ship, so bind opportunistically and
|
|
1235
|
+
// advertise unsupported when absent. The wake-word path throws a
|
|
1236
|
+
// structured "runtime not ready" error in that case (no ONNX fallback).
|
|
1237
|
+
let wakewordSymbolsAvailable = true;
|
|
1238
|
+
const wakewordDefs = {
|
|
1239
|
+
eliza_inference_wakeword_supported: { args: [], returns: T.i32 },
|
|
1240
|
+
eliza_inference_wakeword_open: {
|
|
1241
|
+
// ctx, sample_rate_hz, head_name (cstr), out_error
|
|
1242
|
+
args: [T.ptr, T.i32, T.ptr, T.ptr],
|
|
1243
|
+
returns: T.ptr,
|
|
1244
|
+
},
|
|
1245
|
+
eliza_inference_wakeword_score: {
|
|
1246
|
+
// wake (usize), pcm (ptr), n_samples (usize), out_prob (ptr),
|
|
1247
|
+
// out_error (ptr)
|
|
1248
|
+
args: [T.usize, T.ptr, T.usize, T.ptr, T.ptr],
|
|
1249
|
+
returns: T.i32,
|
|
1250
|
+
},
|
|
1251
|
+
eliza_inference_wakeword_reset: {
|
|
1252
|
+
args: [T.usize, T.ptr],
|
|
1253
|
+
returns: T.i32,
|
|
1254
|
+
},
|
|
1255
|
+
eliza_inference_wakeword_close: {
|
|
1256
|
+
args: [T.usize],
|
|
1257
|
+
returns: T.void,
|
|
1258
|
+
},
|
|
1259
|
+
};
|
|
1260
|
+
// Native voice classifiers (ABI v6): WeSpeaker speaker encoder + pyannote
|
|
1261
|
+
// diarizer, fused into the one libelizainference handle. Additive;
|
|
1262
|
+
// transitional builds may report v6 before the classifier symbols ship, so
|
|
1263
|
+
// bind opportunistically and advertise unsupported when absent (the
|
|
1264
|
+
// fused encoder/diarizer classes throw a structured error in that case —
|
|
1265
|
+
// no standalone libvoice_classifier fallback).
|
|
1266
|
+
let speakerSymbolsAvailable = true;
|
|
1267
|
+
const speakerDefs = {
|
|
1268
|
+
eliza_inference_speaker_supported: { args: [], returns: T.i32 },
|
|
1269
|
+
eliza_inference_speaker_open: {
|
|
1270
|
+
// ctx, gguf_path (cstr or NULL), out_error
|
|
1271
|
+
args: [T.ptr, T.ptr, T.ptr],
|
|
1272
|
+
returns: T.ptr,
|
|
1273
|
+
},
|
|
1274
|
+
eliza_inference_speaker_embed: {
|
|
1275
|
+
// speaker (usize), pcm (ptr), n_samples (usize), out_embedding (ptr),
|
|
1276
|
+
// out_error (ptr)
|
|
1277
|
+
args: [T.usize, T.ptr, T.usize, T.ptr, T.ptr],
|
|
1278
|
+
returns: T.i32,
|
|
1279
|
+
},
|
|
1280
|
+
eliza_inference_speaker_close: {
|
|
1281
|
+
args: [T.usize],
|
|
1282
|
+
returns: T.void,
|
|
1283
|
+
},
|
|
1284
|
+
};
|
|
1285
|
+
let diarizSymbolsAvailable = true;
|
|
1286
|
+
const diarizDefs = {
|
|
1287
|
+
eliza_inference_diariz_supported: { args: [], returns: T.i32 },
|
|
1288
|
+
eliza_inference_diariz_open: {
|
|
1289
|
+
// ctx, gguf_path (cstr or NULL), out_error
|
|
1290
|
+
args: [T.ptr, T.ptr, T.ptr],
|
|
1291
|
+
returns: T.ptr,
|
|
1292
|
+
},
|
|
1293
|
+
eliza_inference_diariz_segment: {
|
|
1294
|
+
// diariz (usize), pcm (ptr), n_samples (usize), out_labels (ptr),
|
|
1295
|
+
// io_n_labels (ptr), out_error (ptr)
|
|
1296
|
+
args: [T.usize, T.ptr, T.usize, T.ptr, T.ptr, T.ptr],
|
|
1297
|
+
returns: T.i32,
|
|
1298
|
+
},
|
|
1299
|
+
eliza_inference_diariz_close: {
|
|
1300
|
+
args: [T.usize],
|
|
1301
|
+
returns: T.void,
|
|
1302
|
+
},
|
|
1303
|
+
};
|
|
1304
|
+
// Streaming LLM (additive on top of v3). Bound opportunistically — when
|
|
1305
|
+
// absent the runner reports native streaming as unsupported.
|
|
1306
|
+
let llmStreamSymbolsAvailable = true;
|
|
1307
|
+
// ABI v8 streaming-LLM capability probes. Bound as their own family so a
|
|
1308
|
+
// v7 library (which has the `llm_stream_*` symbols but not these probes)
|
|
1309
|
+
// still binds `llmStreamDefs` while reporting MTP / KV-quant unsupported.
|
|
1310
|
+
let llmCapabilitySymbolsAvailable = true;
|
|
1311
|
+
const llmCapabilityDefs = {
|
|
1312
|
+
eliza_inference_llm_mtp_supported: { args: [], returns: T.i32 },
|
|
1313
|
+
eliza_inference_llm_kv_quant_supported: { args: [], returns: T.i32 },
|
|
1314
|
+
};
|
|
1315
|
+
const llmStreamDefs = {
|
|
1316
|
+
eliza_inference_llm_stream_open: {
|
|
1317
|
+
// ctx (ptr), cfg (ptr to eliza_llm_stream_config_t), out_error (ptr)
|
|
1318
|
+
args: [T.ptr, T.ptr, T.ptr],
|
|
1319
|
+
returns: T.ptr,
|
|
1320
|
+
},
|
|
1321
|
+
eliza_inference_llm_stream_prefill: {
|
|
1322
|
+
args: [T.usize, T.ptr, T.usize, T.ptr],
|
|
1323
|
+
returns: T.i32,
|
|
1324
|
+
},
|
|
1325
|
+
eliza_inference_llm_stream_next: {
|
|
1326
|
+
// stream, tokens_out, tokens_cap, num_tokens_out, text_out,
|
|
1327
|
+
// text_cap, drafter_drafted_out, drafter_accepted_out, out_error
|
|
1328
|
+
args: [
|
|
1329
|
+
T.usize,
|
|
1330
|
+
T.ptr,
|
|
1331
|
+
T.usize,
|
|
1332
|
+
T.ptr,
|
|
1333
|
+
T.ptr,
|
|
1334
|
+
T.usize,
|
|
1335
|
+
T.ptr,
|
|
1336
|
+
T.ptr,
|
|
1337
|
+
T.ptr,
|
|
1338
|
+
],
|
|
1339
|
+
returns: T.i32,
|
|
1340
|
+
},
|
|
1341
|
+
eliza_inference_llm_stream_cancel: {
|
|
1342
|
+
args: [T.usize],
|
|
1343
|
+
returns: T.i32,
|
|
1344
|
+
},
|
|
1345
|
+
eliza_inference_llm_stream_save_slot: {
|
|
1346
|
+
args: [T.usize, T.ptr, T.ptr],
|
|
1347
|
+
returns: T.i32,
|
|
1348
|
+
},
|
|
1349
|
+
eliza_inference_llm_stream_restore_slot: {
|
|
1350
|
+
args: [T.usize, T.ptr, T.ptr],
|
|
1351
|
+
returns: T.i32,
|
|
1352
|
+
},
|
|
1353
|
+
eliza_inference_llm_stream_close: {
|
|
1354
|
+
args: [T.usize],
|
|
1355
|
+
returns: T.void,
|
|
1356
|
+
},
|
|
1357
|
+
};
|
|
1358
|
+
const referenceEncodeDefs = {
|
|
1359
|
+
// OmniVoice reference encode (ABI v4) — optional for transitional
|
|
1360
|
+
// fused libraries. Default TTS/ASR must still load when reference-clone
|
|
1361
|
+
// freezing is unavailable; encodeReferenceSupported() exposes that state.
|
|
1362
|
+
eliza_inference_encode_reference: {
|
|
1363
|
+
// ctx, pcm, n_samples, sample_rate_hz, out_K, out_ref_T, out_tokens (int**), out_error
|
|
1364
|
+
args: [T.ptr, T.ptr, T.usize, T.i32, T.ptr, T.ptr, T.ptr, T.ptr],
|
|
1365
|
+
returns: T.i32,
|
|
1366
|
+
},
|
|
1367
|
+
eliza_inference_free_tokens: { args: [T.usize], returns: T.void },
|
|
1368
|
+
};
|
|
1369
|
+
let referenceEncodeSymbolsAvailable = true;
|
|
1370
|
+
// Text-adjacent modalities (ABI v9): embeddings, mmproj vision describe, and
|
|
1371
|
+
// the tokenizer over the loaded text vocab. They ship together in a v9
|
|
1372
|
+
// build; bound and gated as one block layered on top of the v8 surface so
|
|
1373
|
+
// the cascade peels them when a v8 library is loaded. `free_tokens` is
|
|
1374
|
+
// re-listed here (a v9 build that lacks reference-encode still needs it for
|
|
1375
|
+
// `tokenize`'s buffer); identical defs merge harmlessly.
|
|
1376
|
+
let textModalitiesSymbolsAvailable = true;
|
|
1377
|
+
const textModalitiesDefs = {
|
|
1378
|
+
eliza_inference_embed_supported: { args: [], returns: T.i32 },
|
|
1379
|
+
eliza_inference_embed: {
|
|
1380
|
+
// ctx, text, text_len, pooling, out_embedding, out_capacity, out_dim, out_error
|
|
1381
|
+
args: [T.ptr, T.ptr, T.usize, T.i32, T.ptr, T.usize, T.ptr, T.ptr],
|
|
1382
|
+
returns: T.i32,
|
|
1383
|
+
},
|
|
1384
|
+
eliza_inference_vision_supported: { args: [], returns: T.i32 },
|
|
1385
|
+
eliza_inference_describe_image: {
|
|
1386
|
+
// ctx, image_bytes, n_bytes, mmproj_path, prompt, out_text, max_text_bytes, out_error
|
|
1387
|
+
args: [T.ptr, T.ptr, T.usize, T.ptr, T.ptr, T.ptr, T.usize, T.ptr],
|
|
1388
|
+
returns: T.i32,
|
|
1389
|
+
},
|
|
1390
|
+
eliza_inference_tokenize_supported: { args: [], returns: T.i32 },
|
|
1391
|
+
eliza_inference_tokenize: {
|
|
1392
|
+
// ctx, text, text_len, add_special, parse_special, out_tokens (int**), out_n, out_error
|
|
1393
|
+
args: [T.ptr, T.ptr, T.usize, T.i32, T.i32, T.ptr, T.ptr, T.ptr],
|
|
1394
|
+
returns: T.i32,
|
|
1395
|
+
},
|
|
1396
|
+
eliza_inference_detokenize: {
|
|
1397
|
+
// ctx, tokens, n_tokens, remove_special, unparse_special, out_text, max_text_bytes, out_error
|
|
1398
|
+
args: [T.ptr, T.ptr, T.usize, T.i32, T.i32, T.ptr, T.usize, T.ptr],
|
|
1399
|
+
returns: T.i32,
|
|
1400
|
+
},
|
|
1401
|
+
eliza_inference_free_tokens: { args: [T.usize], returns: T.void },
|
|
1402
|
+
};
|
|
1403
|
+
// Kokoro TTS (ABI v10): the in-process Kokoro engine folded into the fused
|
|
1404
|
+
// handle so the mobile path stops POSTing to the local-TCP llama-server
|
|
1405
|
+
// route. Bound as its own family layered on top of the v9 surface; the
|
|
1406
|
+
// cascade peels it when a v9 library is loaded. `kokoroSupported()` reports
|
|
1407
|
+
// false in that case and the Kokoro FFI runtime refuses (no TCP fallback).
|
|
1408
|
+
let kokoroSymbolsAvailable = true;
|
|
1409
|
+
const kokoroDefs = {
|
|
1410
|
+
eliza_inference_kokoro_supported: { args: [], returns: T.i32 },
|
|
1411
|
+
eliza_inference_kokoro_load: {
|
|
1412
|
+
// ctx, gguf_path, voice_bin_path, style_dim, out_error
|
|
1413
|
+
args: [T.ptr, T.ptr, T.ptr, T.i32, T.ptr],
|
|
1414
|
+
returns: T.i32,
|
|
1415
|
+
},
|
|
1416
|
+
eliza_inference_kokoro_synthesize: {
|
|
1417
|
+
// ctx, text, text_len, speed, out_pcm, max_samples, out_error
|
|
1418
|
+
args: [T.ptr, T.ptr, T.usize, T.f32, T.ptr, T.usize, T.ptr],
|
|
1419
|
+
returns: T.i32,
|
|
1420
|
+
},
|
|
1421
|
+
eliza_inference_kokoro_sample_rate: { args: [T.ptr], returns: T.i32 },
|
|
1422
|
+
};
|
|
1423
|
+
// End-of-turn scoring (ABI v11): a single causal forward pass over a
|
|
1424
|
+
// pre-tokenized partial transcript returns P(end-of-turn token). Layered on
|
|
1425
|
+
// top of the v10 surface; the cascade peels it when a v10 library is loaded
|
|
1426
|
+
// (the `eotSupported()` probe then reports false and the composite EOT
|
|
1427
|
+
// classifier falls back to the heuristic-only signal).
|
|
1428
|
+
let eotSymbolsAvailable = true;
|
|
1429
|
+
const eotDefs = {
|
|
1430
|
+
eliza_inference_llm_eot_supported: { args: [], returns: T.i32 },
|
|
1431
|
+
eliza_inference_llm_eot_score: {
|
|
1432
|
+
// ctx, token_ids, num_tokens, target_token_id,
|
|
1433
|
+
// out_target_prob, out_top_token, out_top_prob, out_error
|
|
1434
|
+
args: [T.ptr, T.ptr, T.usize, T.i32, T.ptr, T.ptr, T.ptr, T.ptr],
|
|
1435
|
+
returns: T.i32,
|
|
1436
|
+
},
|
|
1437
|
+
};
|
|
1438
|
+
// ABI v12 — fused ASR word timestamps.
|
|
1439
|
+
let timedAsrSymbolsAvailable = true;
|
|
1440
|
+
const timedAsrDefs = {
|
|
1441
|
+
eliza_inference_asr_timestamps_supported: { args: [], returns: T.i32 },
|
|
1442
|
+
eliza_inference_asr_transcribe_timed: {
|
|
1443
|
+
// ctx, pcm, n_samples, sr, out_text, max_text_bytes,
|
|
1444
|
+
// out_word_start_ms, out_word_end_ms, io_n_words, out_error
|
|
1445
|
+
args: [
|
|
1446
|
+
T.ptr,
|
|
1447
|
+
T.ptr,
|
|
1448
|
+
T.usize,
|
|
1449
|
+
T.i32,
|
|
1450
|
+
T.ptr,
|
|
1451
|
+
T.usize,
|
|
1452
|
+
T.ptr,
|
|
1453
|
+
T.ptr,
|
|
1454
|
+
T.ptr,
|
|
1455
|
+
T.ptr,
|
|
1456
|
+
],
|
|
1457
|
+
returns: T.i32,
|
|
1458
|
+
},
|
|
1459
|
+
};
|
|
1460
|
+
// Streaming mmproj vision describe (ABI v13): open returns an EliLlmStream*
|
|
1461
|
+
// primed with the image+prompt KV; the caller drives the existing
|
|
1462
|
+
// `eliza_inference_llm_stream_next` loop. Layered on top of the v12 surface;
|
|
1463
|
+
// the cascade peels it when a <=v12 library is loaded (the
|
|
1464
|
+
// `visionStreamSupported()` probe then reports false and IMAGE_DESCRIPTION
|
|
1465
|
+
// falls back to the buffered `eliza_inference_describe_image`).
|
|
1466
|
+
let visionStreamSymbolsAvailable = true;
|
|
1467
|
+
const visionStreamDefs = {
|
|
1468
|
+
eliza_inference_vision_stream_supported: { args: [], returns: T.i32 },
|
|
1469
|
+
eliza_inference_describe_image_stream_open: {
|
|
1470
|
+
// ctx, image_bytes, n_bytes, mmproj_path, prompt, out_error -> EliLlmStream*
|
|
1471
|
+
args: [T.ptr, T.ptr, T.usize, T.ptr, T.ptr, T.ptr],
|
|
1472
|
+
returns: T.ptr,
|
|
1473
|
+
},
|
|
1474
|
+
};
|
|
1475
|
+
const coreDefs = {
|
|
1476
|
+
eliza_inference_abi_version: { args: [], returns: T.cstring },
|
|
1477
|
+
eliza_inference_create: {
|
|
1478
|
+
args: [T.ptr, T.ptr],
|
|
1479
|
+
returns: T.ptr,
|
|
1480
|
+
},
|
|
1481
|
+
eliza_inference_destroy: { args: [T.ptr], returns: T.void },
|
|
1482
|
+
eliza_inference_mmap_acquire: {
|
|
1483
|
+
args: [T.ptr, T.ptr, T.ptr],
|
|
1484
|
+
returns: T.i32,
|
|
1485
|
+
},
|
|
1486
|
+
eliza_inference_mmap_evict: {
|
|
1487
|
+
args: [T.ptr, T.ptr, T.ptr],
|
|
1488
|
+
returns: T.i32,
|
|
1489
|
+
},
|
|
1490
|
+
eliza_inference_tts_synthesize: {
|
|
1491
|
+
args: [T.ptr, T.ptr, T.usize, T.ptr, T.ptr, T.usize, T.ptr],
|
|
1492
|
+
returns: T.i32,
|
|
1493
|
+
},
|
|
1494
|
+
eliza_inference_asr_transcribe: {
|
|
1495
|
+
args: [T.ptr, T.ptr, T.usize, T.i32, T.ptr, T.usize, T.ptr],
|
|
1496
|
+
returns: T.i32,
|
|
1497
|
+
},
|
|
1498
|
+
// Streaming TTS + native verifier callback (ABI v2). The
|
|
1499
|
+
// function-pointer args are passed as raw pointer values
|
|
1500
|
+
// (`JSCallback.ptr`, or 0n to clear) so this binding owns the
|
|
1501
|
+
// JSCallback lifetime explicitly — see `ttsSynthesizeStream` /
|
|
1502
|
+
// `setVerifierCallback` below.
|
|
1503
|
+
eliza_inference_tts_stream_supported: { args: [], returns: T.i32 },
|
|
1504
|
+
eliza_inference_tts_synthesize_stream: {
|
|
1505
|
+
// ctx, text, text_len, speaker, on_chunk (fn ptr), user_data, out_error
|
|
1506
|
+
args: [T.ptr, T.ptr, T.usize, T.ptr, T.usize, T.usize, T.ptr],
|
|
1507
|
+
returns: T.i32,
|
|
1508
|
+
},
|
|
1509
|
+
eliza_inference_cancel_tts: { args: [T.ptr, T.ptr], returns: T.i32 },
|
|
1510
|
+
eliza_inference_set_verifier_callback: {
|
|
1511
|
+
// ctx, cb (fn ptr — 0 to clear), user_data, out_error
|
|
1512
|
+
args: [T.ptr, T.usize, T.usize, T.ptr],
|
|
1513
|
+
returns: T.i32,
|
|
1514
|
+
},
|
|
1515
|
+
// Streaming ASR (ABI v2).
|
|
1516
|
+
eliza_inference_asr_stream_supported: { args: [], returns: T.i32 },
|
|
1517
|
+
eliza_inference_asr_stream_open: {
|
|
1518
|
+
args: [T.ptr, T.i32, T.ptr],
|
|
1519
|
+
returns: T.ptr,
|
|
1520
|
+
},
|
|
1521
|
+
eliza_inference_asr_stream_feed: {
|
|
1522
|
+
// stream handle is a raw C pointer → pass as usize.
|
|
1523
|
+
args: [T.usize, T.ptr, T.usize, T.ptr],
|
|
1524
|
+
returns: T.i32,
|
|
1525
|
+
},
|
|
1526
|
+
eliza_inference_asr_stream_partial: {
|
|
1527
|
+
args: [T.usize, T.ptr, T.usize, T.ptr, T.ptr, T.ptr],
|
|
1528
|
+
returns: T.i32,
|
|
1529
|
+
},
|
|
1530
|
+
eliza_inference_asr_stream_finish: {
|
|
1531
|
+
args: [T.usize, T.ptr, T.usize, T.ptr, T.ptr, T.ptr],
|
|
1532
|
+
returns: T.i32,
|
|
1533
|
+
},
|
|
1534
|
+
eliza_inference_asr_stream_close: { args: [T.usize], returns: T.void },
|
|
1535
|
+
// Bun 1.3.x accepts raw pointer values passed back into C as
|
|
1536
|
+
// `usize`, while `ptr` is for JS-owned ArrayBuffer pointers.
|
|
1537
|
+
eliza_inference_free_string: { args: [T.usize], returns: T.void },
|
|
1538
|
+
};
|
|
1539
|
+
// Try the maximal additive symbol set first, then progressively drop
|
|
1540
|
+
// optional families. Each fallback flips a sentinel so `*Supported()` probes
|
|
1541
|
+
// report false instead of making an unavailable native call.
|
|
1542
|
+
// The v6 voice-classifier families (speaker encoder + diarizer) ship
|
|
1543
|
+
// together in the fused build, so they are bound and gated as one
|
|
1544
|
+
// `classifiers` block layered on top of the v5 wake-word family. The
|
|
1545
|
+
// cascade peels them in priority order: full v6 → v6-without-classifiers
|
|
1546
|
+
// (a real v5 build) → progressively smaller. Each rung flips a sentinel so
|
|
1547
|
+
// `*Supported()` reports false instead of calling an unbound symbol.
|
|
1548
|
+
const classifierDefs = { ...speakerDefs, ...diarizDefs };
|
|
1549
|
+
const attempts = [
|
|
1550
|
+
{
|
|
1551
|
+
// Full v13 surface (v12 + token-by-token mmproj vision describe).
|
|
1552
|
+
defs: {
|
|
1553
|
+
...coreDefs,
|
|
1554
|
+
...referenceEncodeDefs,
|
|
1555
|
+
...nativeVadDefs,
|
|
1556
|
+
...wakewordDefs,
|
|
1557
|
+
...classifierDefs,
|
|
1558
|
+
...llmStreamDefs,
|
|
1559
|
+
...llmCapabilityDefs,
|
|
1560
|
+
...textModalitiesDefs,
|
|
1561
|
+
...kokoroDefs,
|
|
1562
|
+
...eotDefs,
|
|
1563
|
+
...timedAsrDefs,
|
|
1564
|
+
...visionStreamDefs,
|
|
1565
|
+
},
|
|
1566
|
+
referenceEncode: true,
|
|
1567
|
+
nativeVad: true,
|
|
1568
|
+
wakeword: true,
|
|
1569
|
+
classifiers: true,
|
|
1570
|
+
llmStream: true,
|
|
1571
|
+
llmCapability: true,
|
|
1572
|
+
textModalities: true,
|
|
1573
|
+
kokoro: true,
|
|
1574
|
+
eot: true,
|
|
1575
|
+
timedAsr: true,
|
|
1576
|
+
visionStream: true,
|
|
1577
|
+
},
|
|
1578
|
+
{
|
|
1579
|
+
// Full v12 surface (v11 + the in-process ASR word-timestamp decoder);
|
|
1580
|
+
// a v12 build lacks the v13 streaming-vision symbols.
|
|
1581
|
+
defs: {
|
|
1582
|
+
...coreDefs,
|
|
1583
|
+
...referenceEncodeDefs,
|
|
1584
|
+
...nativeVadDefs,
|
|
1585
|
+
...wakewordDefs,
|
|
1586
|
+
...classifierDefs,
|
|
1587
|
+
...llmStreamDefs,
|
|
1588
|
+
...llmCapabilityDefs,
|
|
1589
|
+
...textModalitiesDefs,
|
|
1590
|
+
...kokoroDefs,
|
|
1591
|
+
...eotDefs,
|
|
1592
|
+
...timedAsrDefs,
|
|
1593
|
+
},
|
|
1594
|
+
referenceEncode: true,
|
|
1595
|
+
nativeVad: true,
|
|
1596
|
+
wakeword: true,
|
|
1597
|
+
classifiers: true,
|
|
1598
|
+
llmStream: true,
|
|
1599
|
+
llmCapability: true,
|
|
1600
|
+
textModalities: true,
|
|
1601
|
+
kokoro: true,
|
|
1602
|
+
eot: true,
|
|
1603
|
+
timedAsr: true,
|
|
1604
|
+
},
|
|
1605
|
+
{
|
|
1606
|
+
// Full v11 surface (v10 + the in-process end-of-turn scorer); a v11
|
|
1607
|
+
// build lacks the v12 timed-ASR symbols.
|
|
1608
|
+
defs: {
|
|
1609
|
+
...coreDefs,
|
|
1610
|
+
...referenceEncodeDefs,
|
|
1611
|
+
...nativeVadDefs,
|
|
1612
|
+
...wakewordDefs,
|
|
1613
|
+
...classifierDefs,
|
|
1614
|
+
...llmStreamDefs,
|
|
1615
|
+
...llmCapabilityDefs,
|
|
1616
|
+
...textModalitiesDefs,
|
|
1617
|
+
...kokoroDefs,
|
|
1618
|
+
...eotDefs,
|
|
1619
|
+
},
|
|
1620
|
+
referenceEncode: true,
|
|
1621
|
+
nativeVad: true,
|
|
1622
|
+
wakeword: true,
|
|
1623
|
+
classifiers: true,
|
|
1624
|
+
llmStream: true,
|
|
1625
|
+
llmCapability: true,
|
|
1626
|
+
textModalities: true,
|
|
1627
|
+
kokoro: true,
|
|
1628
|
+
eot: true,
|
|
1629
|
+
timedAsr: false,
|
|
1630
|
+
},
|
|
1631
|
+
{
|
|
1632
|
+
// Full v10 surface (v9 + the in-process Kokoro block).
|
|
1633
|
+
defs: {
|
|
1634
|
+
...coreDefs,
|
|
1635
|
+
...referenceEncodeDefs,
|
|
1636
|
+
...nativeVadDefs,
|
|
1637
|
+
...wakewordDefs,
|
|
1638
|
+
...classifierDefs,
|
|
1639
|
+
...llmStreamDefs,
|
|
1640
|
+
...llmCapabilityDefs,
|
|
1641
|
+
...textModalitiesDefs,
|
|
1642
|
+
...kokoroDefs,
|
|
1643
|
+
},
|
|
1644
|
+
referenceEncode: true,
|
|
1645
|
+
nativeVad: true,
|
|
1646
|
+
wakeword: true,
|
|
1647
|
+
classifiers: true,
|
|
1648
|
+
llmStream: true,
|
|
1649
|
+
llmCapability: true,
|
|
1650
|
+
textModalities: true,
|
|
1651
|
+
kokoro: true,
|
|
1652
|
+
},
|
|
1653
|
+
{
|
|
1654
|
+
// Full v9 surface (no v10 Kokoro block).
|
|
1655
|
+
defs: {
|
|
1656
|
+
...coreDefs,
|
|
1657
|
+
...referenceEncodeDefs,
|
|
1658
|
+
...nativeVadDefs,
|
|
1659
|
+
...wakewordDefs,
|
|
1660
|
+
...classifierDefs,
|
|
1661
|
+
...llmStreamDefs,
|
|
1662
|
+
...llmCapabilityDefs,
|
|
1663
|
+
...textModalitiesDefs,
|
|
1664
|
+
},
|
|
1665
|
+
referenceEncode: true,
|
|
1666
|
+
nativeVad: true,
|
|
1667
|
+
wakeword: true,
|
|
1668
|
+
classifiers: true,
|
|
1669
|
+
llmStream: true,
|
|
1670
|
+
llmCapability: true,
|
|
1671
|
+
textModalities: true,
|
|
1672
|
+
},
|
|
1673
|
+
{
|
|
1674
|
+
// Full v8 surface (no v9 text-modality block).
|
|
1675
|
+
defs: {
|
|
1676
|
+
...coreDefs,
|
|
1677
|
+
...referenceEncodeDefs,
|
|
1678
|
+
...nativeVadDefs,
|
|
1679
|
+
...wakewordDefs,
|
|
1680
|
+
...classifierDefs,
|
|
1681
|
+
...llmStreamDefs,
|
|
1682
|
+
...llmCapabilityDefs,
|
|
1683
|
+
},
|
|
1684
|
+
referenceEncode: true,
|
|
1685
|
+
nativeVad: true,
|
|
1686
|
+
wakeword: true,
|
|
1687
|
+
classifiers: true,
|
|
1688
|
+
llmStream: true,
|
|
1689
|
+
llmCapability: true,
|
|
1690
|
+
textModalities: false,
|
|
1691
|
+
},
|
|
1692
|
+
{
|
|
1693
|
+
defs: {
|
|
1694
|
+
...coreDefs,
|
|
1695
|
+
...nativeVadDefs,
|
|
1696
|
+
...wakewordDefs,
|
|
1697
|
+
...classifierDefs,
|
|
1698
|
+
...llmStreamDefs,
|
|
1699
|
+
...llmCapabilityDefs,
|
|
1700
|
+
},
|
|
1701
|
+
referenceEncode: false,
|
|
1702
|
+
nativeVad: true,
|
|
1703
|
+
wakeword: true,
|
|
1704
|
+
classifiers: true,
|
|
1705
|
+
llmStream: true,
|
|
1706
|
+
llmCapability: true,
|
|
1707
|
+
textModalities: false,
|
|
1708
|
+
},
|
|
1709
|
+
{
|
|
1710
|
+
defs: {
|
|
1711
|
+
...coreDefs,
|
|
1712
|
+
...referenceEncodeDefs,
|
|
1713
|
+
...nativeVadDefs,
|
|
1714
|
+
...wakewordDefs,
|
|
1715
|
+
...llmStreamDefs,
|
|
1716
|
+
},
|
|
1717
|
+
referenceEncode: true,
|
|
1718
|
+
nativeVad: true,
|
|
1719
|
+
wakeword: true,
|
|
1720
|
+
classifiers: false,
|
|
1721
|
+
llmStream: true,
|
|
1722
|
+
llmCapability: false,
|
|
1723
|
+
},
|
|
1724
|
+
{
|
|
1725
|
+
defs: {
|
|
1726
|
+
...coreDefs,
|
|
1727
|
+
...nativeVadDefs,
|
|
1728
|
+
...wakewordDefs,
|
|
1729
|
+
...llmStreamDefs,
|
|
1730
|
+
},
|
|
1731
|
+
referenceEncode: false,
|
|
1732
|
+
nativeVad: true,
|
|
1733
|
+
wakeword: true,
|
|
1734
|
+
classifiers: false,
|
|
1735
|
+
llmStream: true,
|
|
1736
|
+
llmCapability: false,
|
|
1737
|
+
},
|
|
1738
|
+
{
|
|
1739
|
+
defs: {
|
|
1740
|
+
...coreDefs,
|
|
1741
|
+
...referenceEncodeDefs,
|
|
1742
|
+
...nativeVadDefs,
|
|
1743
|
+
...llmStreamDefs,
|
|
1744
|
+
},
|
|
1745
|
+
referenceEncode: true,
|
|
1746
|
+
nativeVad: true,
|
|
1747
|
+
wakeword: false,
|
|
1748
|
+
classifiers: false,
|
|
1749
|
+
llmStream: true,
|
|
1750
|
+
llmCapability: false,
|
|
1751
|
+
},
|
|
1752
|
+
{
|
|
1753
|
+
defs: { ...coreDefs, ...nativeVadDefs, ...llmStreamDefs },
|
|
1754
|
+
referenceEncode: false,
|
|
1755
|
+
nativeVad: true,
|
|
1756
|
+
wakeword: false,
|
|
1757
|
+
classifiers: false,
|
|
1758
|
+
llmStream: true,
|
|
1759
|
+
llmCapability: false,
|
|
1760
|
+
},
|
|
1761
|
+
{
|
|
1762
|
+
defs: { ...coreDefs, ...referenceEncodeDefs, ...nativeVadDefs },
|
|
1763
|
+
referenceEncode: true,
|
|
1764
|
+
nativeVad: true,
|
|
1765
|
+
wakeword: false,
|
|
1766
|
+
classifiers: false,
|
|
1767
|
+
llmStream: false,
|
|
1768
|
+
llmCapability: false,
|
|
1769
|
+
},
|
|
1770
|
+
{
|
|
1771
|
+
defs: { ...coreDefs, ...nativeVadDefs },
|
|
1772
|
+
referenceEncode: false,
|
|
1773
|
+
nativeVad: true,
|
|
1774
|
+
wakeword: false,
|
|
1775
|
+
classifiers: false,
|
|
1776
|
+
llmStream: false,
|
|
1777
|
+
llmCapability: false,
|
|
1778
|
+
},
|
|
1779
|
+
{
|
|
1780
|
+
defs: { ...coreDefs, ...referenceEncodeDefs },
|
|
1781
|
+
referenceEncode: true,
|
|
1782
|
+
nativeVad: false,
|
|
1783
|
+
wakeword: false,
|
|
1784
|
+
classifiers: false,
|
|
1785
|
+
llmStream: false,
|
|
1786
|
+
llmCapability: false,
|
|
1787
|
+
},
|
|
1788
|
+
{
|
|
1789
|
+
defs: coreDefs,
|
|
1790
|
+
referenceEncode: false,
|
|
1791
|
+
nativeVad: false,
|
|
1792
|
+
wakeword: false,
|
|
1793
|
+
classifiers: false,
|
|
1794
|
+
llmStream: false,
|
|
1795
|
+
llmCapability: false,
|
|
1796
|
+
},
|
|
1797
|
+
];
|
|
1798
|
+
let lastOpenError: unknown = null;
|
|
1799
|
+
for (const attempt of attempts) {
|
|
1800
|
+
try {
|
|
1801
|
+
lib = ffi.dlopen(dylibPath, attempt.defs);
|
|
1802
|
+
referenceEncodeSymbolsAvailable = attempt.referenceEncode;
|
|
1803
|
+
nativeVadSymbolsAvailable = attempt.nativeVad;
|
|
1804
|
+
wakewordSymbolsAvailable = attempt.wakeword;
|
|
1805
|
+
speakerSymbolsAvailable = attempt.classifiers;
|
|
1806
|
+
diarizSymbolsAvailable = attempt.classifiers;
|
|
1807
|
+
llmStreamSymbolsAvailable = attempt.llmStream;
|
|
1808
|
+
llmCapabilitySymbolsAvailable = attempt.llmCapability ?? false;
|
|
1809
|
+
textModalitiesSymbolsAvailable =
|
|
1810
|
+
(attempt as { textModalities?: boolean }).textModalities ?? false;
|
|
1811
|
+
kokoroSymbolsAvailable =
|
|
1812
|
+
(attempt as { kokoro?: boolean }).kokoro ?? false;
|
|
1813
|
+
eotSymbolsAvailable = (attempt as { eot?: boolean }).eot ?? false;
|
|
1814
|
+
timedAsrSymbolsAvailable =
|
|
1815
|
+
(attempt as { timedAsr?: boolean }).timedAsr ?? false;
|
|
1816
|
+
visionStreamSymbolsAvailable =
|
|
1817
|
+
(attempt as { visionStream?: boolean }).visionStream ?? false;
|
|
1818
|
+
break;
|
|
1819
|
+
} catch (err) {
|
|
1820
|
+
lastOpenError = err;
|
|
1821
|
+
}
|
|
1822
|
+
}
|
|
1823
|
+
if (lib === null) {
|
|
1824
|
+
throw new VoiceLifecycleError(
|
|
1825
|
+
"kernel-missing",
|
|
1826
|
+
`[ffi-bindings] Failed to open libelizainference at ${dylibPath}: ${formatFfiError(lastOpenError)}`,
|
|
1827
|
+
);
|
|
1828
|
+
}
|
|
1829
|
+
const loadedLib = lib;
|
|
1830
|
+
|
|
1831
|
+
// ABI version check. v4 is the current full surface; v3 is accepted only
|
|
1832
|
+
// when the optional reference-encode symbols are absent so default TTS/ASR
|
|
1833
|
+
// can still run while sample-to-profile freezing stays explicitly disabled.
|
|
1834
|
+
const reported = readCString(
|
|
1835
|
+
loadedLib.symbols.eliza_inference_abi_version(),
|
|
1836
|
+
ffi,
|
|
1837
|
+
);
|
|
1838
|
+
// v8 is the current full surface (v8 = streaming-LLM text parity: same-file
|
|
1839
|
+
// MTP speculative decoding + KV-cache quant + per-load GPU layers, probed
|
|
1840
|
+
// via `eliza_inference_llm_{mtp,kv_quant}_supported()`). A v7 library has
|
|
1841
|
+
// the identical voice/ASR/VAD symbol surface but lacks those LLM
|
|
1842
|
+
// optimizations, so it is still accepted for voice — the new capability
|
|
1843
|
+
// probes report unsupported, and the fused TEXT path refuses to route
|
|
1844
|
+
// through it (the anti-regression guard). Older fused builds may still be
|
|
1845
|
+
// useful at degraded capability:
|
|
1846
|
+
// - v7: real Silero VAD; LLM-text optimizations absent (probed).
|
|
1847
|
+
// - v6: same symbols as v7; VAD may be a stub (probed at runtime).
|
|
1848
|
+
// - v5: no speaker/diarizer classifiers — JS reports them unsupported.
|
|
1849
|
+
// - v4: additionally no wake-word — JS reports wake-word unsupported.
|
|
1850
|
+
// - v3: additionally no reference-encode — accepted only when the
|
|
1851
|
+
// optional reference-encode symbols are absent from the binding.
|
|
1852
|
+
// v10 (current) accepts the full surface. A v9 library has the identical
|
|
1853
|
+
// voice/ASR/VAD/LLM/text surface but lacks the v10 Kokoro symbols
|
|
1854
|
+
// (`eliza_inference_kokoro_*`), so it is accepted only when those symbols
|
|
1855
|
+
// are absent — the `kokoroSupported()` probe then reports false and the
|
|
1856
|
+
// Kokoro FFI runtime refuses (no TCP fallback on mobile). A v8 library
|
|
1857
|
+
// additionally lacks the v9 text-modality symbols (embeddings, vision,
|
|
1858
|
+
// tokenizer), accepted only when those are absent too.
|
|
1859
|
+
const abiOk =
|
|
1860
|
+
reported === String(ELIZA_INFERENCE_ABI_VERSION) ||
|
|
1861
|
+
(reported === "12" && !visionStreamSymbolsAvailable) ||
|
|
1862
|
+
(reported === "11" && !timedAsrSymbolsAvailable) ||
|
|
1863
|
+
(reported === "10" && !eotSymbolsAvailable && !timedAsrSymbolsAvailable) ||
|
|
1864
|
+
(reported === "9" && !kokoroSymbolsAvailable && !eotSymbolsAvailable) ||
|
|
1865
|
+
(reported === "8" &&
|
|
1866
|
+
!kokoroSymbolsAvailable &&
|
|
1867
|
+
!textModalitiesSymbolsAvailable) ||
|
|
1868
|
+
reported === "7" ||
|
|
1869
|
+
reported === "6" ||
|
|
1870
|
+
(reported === "5" && !speakerSymbolsAvailable && !diarizSymbolsAvailable) ||
|
|
1871
|
+
(reported === "4" &&
|
|
1872
|
+
!wakewordSymbolsAvailable &&
|
|
1873
|
+
!speakerSymbolsAvailable &&
|
|
1874
|
+
!diarizSymbolsAvailable) ||
|
|
1875
|
+
(reported === "3" &&
|
|
1876
|
+
!wakewordSymbolsAvailable &&
|
|
1877
|
+
!speakerSymbolsAvailable &&
|
|
1878
|
+
!diarizSymbolsAvailable &&
|
|
1879
|
+
!referenceEncodeSymbolsAvailable);
|
|
1880
|
+
if (!abiOk) {
|
|
1881
|
+
loadedLib.close();
|
|
1882
|
+
throw new VoiceLifecycleError(
|
|
1883
|
+
"kernel-missing",
|
|
1884
|
+
`[ffi-bindings] ABI mismatch: binding expected v${ELIZA_INFERENCE_ABI_VERSION}, ` +
|
|
1885
|
+
`library at ${dylibPath} reports v${reported}. The fused build was produced ` +
|
|
1886
|
+
`against a different ffi.h — rebuild against the current header.`,
|
|
1887
|
+
);
|
|
1888
|
+
}
|
|
1889
|
+
|
|
1890
|
+
/**
|
|
1891
|
+
* Read `*outErrPtr` (a `char**` that the library populated with a
|
|
1892
|
+
* heap-allocated NUL-terminated string), free the underlying buffer
|
|
1893
|
+
* via `eliza_inference_free_string`, and return the JS string. When
|
|
1894
|
+
* the library left `*outErrPtr` as NULL, returns null.
|
|
1895
|
+
*/
|
|
1896
|
+
function takeError(outErrPtrBuf: BigUint64Array): string | null {
|
|
1897
|
+
const ptrValue = outErrPtrBuf[0];
|
|
1898
|
+
if (ptrValue === undefined || ptrValue === 0n) return null;
|
|
1899
|
+
const ptrNumber = Number(ptrValue);
|
|
1900
|
+
if (!Number.isSafeInteger(ptrNumber)) {
|
|
1901
|
+
throw new VoiceLifecycleError(
|
|
1902
|
+
"kernel-missing",
|
|
1903
|
+
`[ffi-bindings] C diagnostic pointer ${ptrValue.toString()} exceeds JS safe integer range`,
|
|
1904
|
+
);
|
|
1905
|
+
}
|
|
1906
|
+
const cstr = new ffi.CString(ptrNumber);
|
|
1907
|
+
const message = cstr.toString();
|
|
1908
|
+
loadedLib.symbols.eliza_inference_free_string(ptrValue);
|
|
1909
|
+
return message;
|
|
1910
|
+
}
|
|
1911
|
+
|
|
1912
|
+
function makeOutErr(): { buf: BigUint64Array; ptr: unknown } {
|
|
1913
|
+
const buf = new BigUint64Array(1);
|
|
1914
|
+
return { buf, ptr: ffi.ptr(buf) };
|
|
1915
|
+
}
|
|
1916
|
+
|
|
1917
|
+
/**
|
|
1918
|
+
* Encode a JS string to a NUL-terminated UTF-8 buffer and return a
|
|
1919
|
+
* `T.ptr`-compatible pointer suitable for `const char *` arguments.
|
|
1920
|
+
* Returns null when the input is null — the C ABI accepts NULL for
|
|
1921
|
+
* optional arguments like `speaker_preset_id`.
|
|
1922
|
+
*/
|
|
1923
|
+
function cstr(value: string | null): {
|
|
1924
|
+
ptr: unknown;
|
|
1925
|
+
bytes: number;
|
|
1926
|
+
buffer: Buffer | null;
|
|
1927
|
+
} {
|
|
1928
|
+
if (value === null) return { ptr: null, bytes: 0, buffer: null };
|
|
1929
|
+
const bytes = Buffer.from(value, "utf8");
|
|
1930
|
+
const buf = Buffer.alloc(bytes.byteLength + 1);
|
|
1931
|
+
bytes.copy(buf);
|
|
1932
|
+
return { ptr: ffi.ptr(buf), bytes: bytes.byteLength, buffer: buf };
|
|
1933
|
+
}
|
|
1934
|
+
|
|
1935
|
+
function failureCode(rc: number): VoiceLifecycleError["code"] {
|
|
1936
|
+
if (rc === ELIZA_ERR_OOM) return "ram-pressure";
|
|
1937
|
+
if (rc === ELIZA_ERR_FFI_FAULT) return "mmap-fail";
|
|
1938
|
+
if (rc === ELIZA_ERR_NOT_IMPLEMENTED) return "kernel-missing";
|
|
1939
|
+
if (rc === ELIZA_ERR_ABI_MISMATCH) return "kernel-missing";
|
|
1940
|
+
if (rc === ELIZA_ERR_BUNDLE_INVALID) return "kernel-missing";
|
|
1941
|
+
return "kernel-missing";
|
|
1942
|
+
}
|
|
1943
|
+
|
|
1944
|
+
function isNullPointer(value: unknown): boolean {
|
|
1945
|
+
return value === null || value === undefined || value === 0n || value === 0;
|
|
1946
|
+
}
|
|
1947
|
+
|
|
1948
|
+
return {
|
|
1949
|
+
libraryPath: dylibPath,
|
|
1950
|
+
libraryAbiVersion: reported,
|
|
1951
|
+
|
|
1952
|
+
create(bundleDir: string): ElizaInferenceContextHandle {
|
|
1953
|
+
const err = makeOutErr();
|
|
1954
|
+
const bundleArg = cstr(bundleDir);
|
|
1955
|
+
const handle = loadedLib.symbols.eliza_inference_create(
|
|
1956
|
+
bundleArg.ptr,
|
|
1957
|
+
err.ptr,
|
|
1958
|
+
);
|
|
1959
|
+
if (isNullPointer(handle)) {
|
|
1960
|
+
const message =
|
|
1961
|
+
takeError(err.buf) ??
|
|
1962
|
+
"[ffi-bindings] eliza_inference_create returned NULL with no diagnostic";
|
|
1963
|
+
throw new VoiceLifecycleError("kernel-missing", message);
|
|
1964
|
+
}
|
|
1965
|
+
return handle as ElizaInferenceContextHandle;
|
|
1966
|
+
},
|
|
1967
|
+
|
|
1968
|
+
destroy(ctx: ElizaInferenceContextHandle): void {
|
|
1969
|
+
loadedLib.symbols.eliza_inference_destroy(ctx);
|
|
1970
|
+
},
|
|
1971
|
+
|
|
1972
|
+
mmapAcquire(ctx, region) {
|
|
1973
|
+
const err = makeOutErr();
|
|
1974
|
+
const regionArg = cstr(region);
|
|
1975
|
+
const rc = loadedLib.symbols.eliza_inference_mmap_acquire(
|
|
1976
|
+
ctx,
|
|
1977
|
+
regionArg.ptr,
|
|
1978
|
+
err.ptr,
|
|
1979
|
+
);
|
|
1980
|
+
if (rc !== ELIZA_OK) {
|
|
1981
|
+
const message =
|
|
1982
|
+
takeError(err.buf) ??
|
|
1983
|
+
`[ffi-bindings] eliza_inference_mmap_acquire(${region}) rc=${rc}`;
|
|
1984
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
1985
|
+
}
|
|
1986
|
+
},
|
|
1987
|
+
|
|
1988
|
+
mmapEvict(ctx, region) {
|
|
1989
|
+
const err = makeOutErr();
|
|
1990
|
+
const regionArg = cstr(region);
|
|
1991
|
+
const rc = loadedLib.symbols.eliza_inference_mmap_evict(
|
|
1992
|
+
ctx,
|
|
1993
|
+
regionArg.ptr,
|
|
1994
|
+
err.ptr,
|
|
1995
|
+
);
|
|
1996
|
+
if (rc !== ELIZA_OK) {
|
|
1997
|
+
const message =
|
|
1998
|
+
takeError(err.buf) ??
|
|
1999
|
+
`[ffi-bindings] eliza_inference_mmap_evict(${region}) rc=${rc}`;
|
|
2000
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2001
|
+
}
|
|
2002
|
+
},
|
|
2003
|
+
|
|
2004
|
+
ttsSynthesize({ ctx, text, speakerPresetId, out }) {
|
|
2005
|
+
const err = makeOutErr();
|
|
2006
|
+
const textArg = cstr(text);
|
|
2007
|
+
const speakerArg = cstr(speakerPresetId);
|
|
2008
|
+
const rc = loadedLib.symbols.eliza_inference_tts_synthesize(
|
|
2009
|
+
ctx,
|
|
2010
|
+
textArg.ptr,
|
|
2011
|
+
BigInt(textArg.bytes),
|
|
2012
|
+
speakerArg.ptr,
|
|
2013
|
+
ffi.ptr(out),
|
|
2014
|
+
BigInt(out.length),
|
|
2015
|
+
err.ptr,
|
|
2016
|
+
);
|
|
2017
|
+
if (rc < 0) {
|
|
2018
|
+
const message =
|
|
2019
|
+
takeError(err.buf) ??
|
|
2020
|
+
`[ffi-bindings] eliza_inference_tts_synthesize rc=${rc}`;
|
|
2021
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2022
|
+
}
|
|
2023
|
+
return rc;
|
|
2024
|
+
},
|
|
2025
|
+
|
|
2026
|
+
asrTranscribe({ ctx, pcm, sampleRateHz, maxTextBytes }) {
|
|
2027
|
+
const err = makeOutErr();
|
|
2028
|
+
const cap = maxTextBytes ?? 4096;
|
|
2029
|
+
const outText = new Uint8Array(cap);
|
|
2030
|
+
const rc = loadedLib.symbols.eliza_inference_asr_transcribe(
|
|
2031
|
+
ctx,
|
|
2032
|
+
ffi.ptr(pcm),
|
|
2033
|
+
BigInt(pcm.length),
|
|
2034
|
+
sampleRateHz,
|
|
2035
|
+
ffi.ptr(outText),
|
|
2036
|
+
BigInt(cap),
|
|
2037
|
+
err.ptr,
|
|
2038
|
+
);
|
|
2039
|
+
if (rc < 0) {
|
|
2040
|
+
const message =
|
|
2041
|
+
takeError(err.buf) ??
|
|
2042
|
+
`[ffi-bindings] eliza_inference_asr_transcribe rc=${rc}`;
|
|
2043
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2044
|
+
}
|
|
2045
|
+
const nul = outText.indexOf(0, 0);
|
|
2046
|
+
const len = nul >= 0 ? nul : rc;
|
|
2047
|
+
return Buffer.from(outText.buffer, outText.byteOffset, len).toString(
|
|
2048
|
+
"utf8",
|
|
2049
|
+
);
|
|
2050
|
+
},
|
|
2051
|
+
|
|
2052
|
+
timedAsrSupported(): boolean {
|
|
2053
|
+
const probe = loadedLib.symbols.eliza_inference_asr_timestamps_supported;
|
|
2054
|
+
return (
|
|
2055
|
+
timedAsrSymbolsAvailable && typeof probe === "function" && probe() === 1
|
|
2056
|
+
);
|
|
2057
|
+
},
|
|
2058
|
+
|
|
2059
|
+
asrTranscribeTimed({ ctx, pcm, sampleRateHz, maxTextBytes, maxWords }) {
|
|
2060
|
+
const fn = loadedLib.symbols.eliza_inference_asr_transcribe_timed;
|
|
2061
|
+
if (!timedAsrSymbolsAvailable || typeof fn !== "function") {
|
|
2062
|
+
throw new VoiceLifecycleError(
|
|
2063
|
+
"kernel-missing",
|
|
2064
|
+
"[ffi-bindings] eliza_inference_asr_transcribe_timed is not exported by this build (pre-v12)",
|
|
2065
|
+
);
|
|
2066
|
+
}
|
|
2067
|
+
const err = makeOutErr();
|
|
2068
|
+
const cap = maxTextBytes ?? 4096;
|
|
2069
|
+
const wordCap = maxWords ?? 1024;
|
|
2070
|
+
const outText = new Uint8Array(cap);
|
|
2071
|
+
const startMs = new Int32Array(wordCap);
|
|
2072
|
+
const endMs = new Int32Array(wordCap);
|
|
2073
|
+
const nWords = new BigUint64Array(1);
|
|
2074
|
+
nWords[0] = BigInt(wordCap);
|
|
2075
|
+
const rc = fn(
|
|
2076
|
+
ctx,
|
|
2077
|
+
ffi.ptr(pcm),
|
|
2078
|
+
BigInt(pcm.length),
|
|
2079
|
+
sampleRateHz,
|
|
2080
|
+
ffi.ptr(outText),
|
|
2081
|
+
BigInt(cap),
|
|
2082
|
+
ffi.ptr(startMs),
|
|
2083
|
+
ffi.ptr(endMs),
|
|
2084
|
+
ffi.ptr(nWords),
|
|
2085
|
+
err.ptr,
|
|
2086
|
+
);
|
|
2087
|
+
if (rc < 0) {
|
|
2088
|
+
const message =
|
|
2089
|
+
takeError(err.buf) ??
|
|
2090
|
+
`[ffi-bindings] eliza_inference_asr_transcribe_timed rc=${rc}`;
|
|
2091
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2092
|
+
}
|
|
2093
|
+
const nul = outText.indexOf(0, 0);
|
|
2094
|
+
const len = nul >= 0 ? nul : rc;
|
|
2095
|
+
const text = Buffer.from(
|
|
2096
|
+
outText.buffer,
|
|
2097
|
+
outText.byteOffset,
|
|
2098
|
+
len,
|
|
2099
|
+
).toString("utf8");
|
|
2100
|
+
const words = recoverAsrWords(text, Number(nWords[0]), startMs, endMs);
|
|
2101
|
+
return { text, words };
|
|
2102
|
+
},
|
|
2103
|
+
|
|
2104
|
+
/* ---- Streaming TTS + verifier callback (ABI v2) ------------ */
|
|
2105
|
+
|
|
2106
|
+
ttsStreamSupported(): boolean {
|
|
2107
|
+
return loadedLib.symbols.eliza_inference_tts_stream_supported() === 1;
|
|
2108
|
+
},
|
|
2109
|
+
|
|
2110
|
+
ttsSynthesizeStream({ ctx, text, speakerPresetId, onChunk }) {
|
|
2111
|
+
const err = makeOutErr();
|
|
2112
|
+
const textArg = cstr(text);
|
|
2113
|
+
const speakerArg = cstr(speakerPresetId);
|
|
2114
|
+
// (pcm: ptr, n_samples: usize, is_final: i32, user_data: ptr) -> i32
|
|
2115
|
+
const cb = new ffi.JSCallback(
|
|
2116
|
+
(pcmPtr: bigint, nSamples: bigint, isFinal: number) => {
|
|
2117
|
+
const n = Number(nSamples);
|
|
2118
|
+
// Bun delivers the C pointer as a bigint; copy the floats out
|
|
2119
|
+
// before returning — the buffer is the library's, valid only
|
|
2120
|
+
// for this call.
|
|
2121
|
+
const pcm =
|
|
2122
|
+
n > 0 && pcmPtr !== 0n
|
|
2123
|
+
? new Float32Array(ffi.toArrayBuffer(pcmPtr, 0, n * 4).slice(0))
|
|
2124
|
+
: new Float32Array(0);
|
|
2125
|
+
const requestCancel = onChunk({ pcm, isFinal: isFinal !== 0 });
|
|
2126
|
+
return requestCancel === true ? 1 : 0;
|
|
2127
|
+
},
|
|
2128
|
+
{
|
|
2129
|
+
args: [T.ptr, T.usize, T.i32, T.ptr],
|
|
2130
|
+
returns: T.i32,
|
|
2131
|
+
},
|
|
2132
|
+
);
|
|
2133
|
+
try {
|
|
2134
|
+
const rc = loadedLib.symbols.eliza_inference_tts_synthesize_stream(
|
|
2135
|
+
ctx,
|
|
2136
|
+
textArg.ptr,
|
|
2137
|
+
BigInt(textArg.bytes),
|
|
2138
|
+
speakerArg.ptr,
|
|
2139
|
+
BigInt(cb.ptr),
|
|
2140
|
+
0n,
|
|
2141
|
+
err.ptr,
|
|
2142
|
+
);
|
|
2143
|
+
if (rc === ELIZA_ERR_CANCELLED) return { cancelled: true };
|
|
2144
|
+
if (rc < 0) {
|
|
2145
|
+
const message =
|
|
2146
|
+
takeError(err.buf) ??
|
|
2147
|
+
`[ffi-bindings] eliza_inference_tts_synthesize_stream rc=${rc}`;
|
|
2148
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2149
|
+
}
|
|
2150
|
+
return { cancelled: false };
|
|
2151
|
+
} finally {
|
|
2152
|
+
cb.close();
|
|
2153
|
+
}
|
|
2154
|
+
},
|
|
2155
|
+
|
|
2156
|
+
cancelTts(ctx) {
|
|
2157
|
+
const err = makeOutErr();
|
|
2158
|
+
const rc = loadedLib.symbols.eliza_inference_cancel_tts(ctx, err.ptr);
|
|
2159
|
+
if (rc !== ELIZA_OK) {
|
|
2160
|
+
const message =
|
|
2161
|
+
takeError(err.buf) ??
|
|
2162
|
+
`[ffi-bindings] eliza_inference_cancel_tts rc=${rc}`;
|
|
2163
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2164
|
+
}
|
|
2165
|
+
},
|
|
2166
|
+
|
|
2167
|
+
encodeReferenceSupported(): boolean {
|
|
2168
|
+
return (
|
|
2169
|
+
typeof loadedLib.symbols.eliza_inference_encode_reference === "function"
|
|
2170
|
+
);
|
|
2171
|
+
},
|
|
2172
|
+
|
|
2173
|
+
encodeReference({ ctx, pcm, sampleRateHz }) {
|
|
2174
|
+
if (
|
|
2175
|
+
typeof loadedLib.symbols.eliza_inference_encode_reference !==
|
|
2176
|
+
"function" ||
|
|
2177
|
+
typeof loadedLib.symbols.eliza_inference_free_tokens !== "function"
|
|
2178
|
+
) {
|
|
2179
|
+
throw new VoiceLifecycleError(
|
|
2180
|
+
"kernel-missing",
|
|
2181
|
+
"[ffi-bindings] eliza_inference_encode_reference is not exported by this build",
|
|
2182
|
+
);
|
|
2183
|
+
}
|
|
2184
|
+
if (sampleRateHz !== 24000) {
|
|
2185
|
+
throw new VoiceLifecycleError(
|
|
2186
|
+
"kernel-missing",
|
|
2187
|
+
`[ffi-bindings] encodeReference: sampleRateHz must be 24000 (got ${sampleRateHz})`,
|
|
2188
|
+
);
|
|
2189
|
+
}
|
|
2190
|
+
const err = makeOutErr();
|
|
2191
|
+
// out_K and out_ref_T are int*, out_tokens is int** — give the library
|
|
2192
|
+
// a slot to write into, then read back.
|
|
2193
|
+
const outK = new Int32Array(1);
|
|
2194
|
+
const outRefT = new Int32Array(1);
|
|
2195
|
+
const outTokensPtr = new BigUint64Array(1);
|
|
2196
|
+
const rc = loadedLib.symbols.eliza_inference_encode_reference(
|
|
2197
|
+
ctx,
|
|
2198
|
+
ffi.ptr(pcm),
|
|
2199
|
+
BigInt(pcm.length),
|
|
2200
|
+
sampleRateHz,
|
|
2201
|
+
ffi.ptr(outK),
|
|
2202
|
+
ffi.ptr(outRefT),
|
|
2203
|
+
ffi.ptr(outTokensPtr),
|
|
2204
|
+
err.ptr,
|
|
2205
|
+
);
|
|
2206
|
+
if (rc !== ELIZA_OK) {
|
|
2207
|
+
const message =
|
|
2208
|
+
takeError(err.buf) ??
|
|
2209
|
+
`[ffi-bindings] eliza_inference_encode_reference rc=${rc}`;
|
|
2210
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2211
|
+
}
|
|
2212
|
+
const K = outK[0];
|
|
2213
|
+
const refT = outRefT[0];
|
|
2214
|
+
const tokensRaw = outTokensPtr[0];
|
|
2215
|
+
if (K <= 0 || refT <= 0 || tokensRaw === 0n) {
|
|
2216
|
+
throw new VoiceLifecycleError(
|
|
2217
|
+
"kernel-missing",
|
|
2218
|
+
`[ffi-bindings] encodeReference returned empty result (K=${K}, refT=${refT})`,
|
|
2219
|
+
);
|
|
2220
|
+
}
|
|
2221
|
+
const tokenCount = K * refT;
|
|
2222
|
+
try {
|
|
2223
|
+
// Copy out of the library's malloc'ed buffer so we can free it
|
|
2224
|
+
// before returning. Each int32 is 4 bytes.
|
|
2225
|
+
const tokenBytes = tokenCount * 4;
|
|
2226
|
+
const tokensPtr =
|
|
2227
|
+
typeof tokensRaw === "bigint" ? Number(tokensRaw) : tokensRaw;
|
|
2228
|
+
const nativeView = ffi.toArrayBuffer(tokensPtr, 0, tokenBytes);
|
|
2229
|
+
const bytes = new Uint8Array(nativeView);
|
|
2230
|
+
if (bytes.byteLength < tokenBytes) {
|
|
2231
|
+
throw new VoiceLifecycleError(
|
|
2232
|
+
"kernel-missing",
|
|
2233
|
+
`[ffi-bindings] encodeReference returned an unreadable token buffer (K=${K}, refT=${refT}, got=${bytes.byteLength}, expected=${tokenBytes}, ctor=${nativeView.constructor.name})`,
|
|
2234
|
+
);
|
|
2235
|
+
}
|
|
2236
|
+
const copied = bytes.slice(0, tokenBytes);
|
|
2237
|
+
const tokens = new Int32Array(copied.buffer);
|
|
2238
|
+
return { K, refT, tokens };
|
|
2239
|
+
} finally {
|
|
2240
|
+
loadedLib.symbols.eliza_inference_free_tokens(tokensRaw);
|
|
2241
|
+
}
|
|
2242
|
+
},
|
|
2243
|
+
|
|
2244
|
+
setVerifierCallback(ctx, cbFn) {
|
|
2245
|
+
const err = makeOutErr();
|
|
2246
|
+
if (cbFn === null) {
|
|
2247
|
+
const rc = loadedLib.symbols.eliza_inference_set_verifier_callback(
|
|
2248
|
+
ctx,
|
|
2249
|
+
0n,
|
|
2250
|
+
0n,
|
|
2251
|
+
err.ptr,
|
|
2252
|
+
);
|
|
2253
|
+
if (rc !== ELIZA_OK) {
|
|
2254
|
+
const message =
|
|
2255
|
+
takeError(err.buf) ??
|
|
2256
|
+
`[ffi-bindings] eliza_inference_set_verifier_callback(clear) rc=${rc}`;
|
|
2257
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2258
|
+
}
|
|
2259
|
+
return { close: () => {} };
|
|
2260
|
+
}
|
|
2261
|
+
// (ev: ptr to EliVerifierEvent, user_data: ptr) -> void
|
|
2262
|
+
const cb = new ffi.JSCallback(
|
|
2263
|
+
(evPtr: bigint) => {
|
|
2264
|
+
cbFn(readVerifierEvent(evPtr, ffi));
|
|
2265
|
+
},
|
|
2266
|
+
{ args: [T.ptr, T.ptr], returns: T.void },
|
|
2267
|
+
);
|
|
2268
|
+
const rc = loadedLib.symbols.eliza_inference_set_verifier_callback(
|
|
2269
|
+
ctx,
|
|
2270
|
+
BigInt(cb.ptr),
|
|
2271
|
+
0n,
|
|
2272
|
+
err.ptr,
|
|
2273
|
+
);
|
|
2274
|
+
if (rc !== ELIZA_OK) {
|
|
2275
|
+
cb.close();
|
|
2276
|
+
const message =
|
|
2277
|
+
takeError(err.buf) ??
|
|
2278
|
+
`[ffi-bindings] eliza_inference_set_verifier_callback rc=${rc}`;
|
|
2279
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2280
|
+
}
|
|
2281
|
+
return {
|
|
2282
|
+
close: () => {
|
|
2283
|
+
// Clear the native registration FIRST, then free the
|
|
2284
|
+
// JSCallback — order matters so the native side never
|
|
2285
|
+
// dereferences a closed callback.
|
|
2286
|
+
const clearErr = makeOutErr();
|
|
2287
|
+
loadedLib.symbols.eliza_inference_set_verifier_callback(
|
|
2288
|
+
ctx,
|
|
2289
|
+
0n,
|
|
2290
|
+
0n,
|
|
2291
|
+
clearErr.ptr,
|
|
2292
|
+
);
|
|
2293
|
+
takeError(clearErr.buf);
|
|
2294
|
+
cb.close();
|
|
2295
|
+
},
|
|
2296
|
+
};
|
|
2297
|
+
},
|
|
2298
|
+
|
|
2299
|
+
/* ---- Native VAD (ABI v3) ----------------------------------- */
|
|
2300
|
+
|
|
2301
|
+
vadSupported(): boolean {
|
|
2302
|
+
if (
|
|
2303
|
+
!nativeVadSymbolsAvailable ||
|
|
2304
|
+
typeof loadedLib.symbols.eliza_inference_vad_supported !== "function"
|
|
2305
|
+
) {
|
|
2306
|
+
return false;
|
|
2307
|
+
}
|
|
2308
|
+
return loadedLib.symbols.eliza_inference_vad_supported() === 1;
|
|
2309
|
+
},
|
|
2310
|
+
|
|
2311
|
+
vadOpen({ ctx, sampleRateHz }) {
|
|
2312
|
+
const open = loadedLib.symbols.eliza_inference_vad_open;
|
|
2313
|
+
if (!nativeVadSymbolsAvailable || typeof open !== "function") {
|
|
2314
|
+
throw new VoiceLifecycleError(
|
|
2315
|
+
"kernel-missing",
|
|
2316
|
+
"[ffi-bindings] eliza_inference_vad_open is not exported by this libelizainference build",
|
|
2317
|
+
);
|
|
2318
|
+
}
|
|
2319
|
+
const err = makeOutErr();
|
|
2320
|
+
const handle = open(ctx, sampleRateHz, err.ptr);
|
|
2321
|
+
if (isNullPointer(handle)) {
|
|
2322
|
+
const message =
|
|
2323
|
+
takeError(err.buf) ??
|
|
2324
|
+
"[ffi-bindings] eliza_inference_vad_open returned NULL with no diagnostic";
|
|
2325
|
+
throw new VoiceLifecycleError("kernel-missing", message);
|
|
2326
|
+
}
|
|
2327
|
+
return handle as NativeVadHandle;
|
|
2328
|
+
},
|
|
2329
|
+
|
|
2330
|
+
vadProcess({ vad, pcm }) {
|
|
2331
|
+
const process = loadedLib.symbols.eliza_inference_vad_process;
|
|
2332
|
+
if (!nativeVadSymbolsAvailable || typeof process !== "function") {
|
|
2333
|
+
throw new VoiceLifecycleError(
|
|
2334
|
+
"kernel-missing",
|
|
2335
|
+
"[ffi-bindings] eliza_inference_vad_process is not exported by this libelizainference build",
|
|
2336
|
+
);
|
|
2337
|
+
}
|
|
2338
|
+
const err = makeOutErr();
|
|
2339
|
+
const outProbability = new Float32Array(1);
|
|
2340
|
+
const rc = process(
|
|
2341
|
+
vad,
|
|
2342
|
+
ffi.ptr(pcm),
|
|
2343
|
+
BigInt(pcm.length),
|
|
2344
|
+
ffi.ptr(outProbability),
|
|
2345
|
+
err.ptr,
|
|
2346
|
+
);
|
|
2347
|
+
if (rc !== ELIZA_OK) {
|
|
2348
|
+
const message =
|
|
2349
|
+
takeError(err.buf) ??
|
|
2350
|
+
`[ffi-bindings] eliza_inference_vad_process rc=${rc}`;
|
|
2351
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2352
|
+
}
|
|
2353
|
+
return outProbability[0] ?? 0;
|
|
2354
|
+
},
|
|
2355
|
+
|
|
2356
|
+
vadReset(vad) {
|
|
2357
|
+
const reset = loadedLib.symbols.eliza_inference_vad_reset;
|
|
2358
|
+
if (!nativeVadSymbolsAvailable || typeof reset !== "function") {
|
|
2359
|
+
throw new VoiceLifecycleError(
|
|
2360
|
+
"kernel-missing",
|
|
2361
|
+
"[ffi-bindings] eliza_inference_vad_reset is not exported by this libelizainference build",
|
|
2362
|
+
);
|
|
2363
|
+
}
|
|
2364
|
+
const err = makeOutErr();
|
|
2365
|
+
const rc = reset(vad, err.ptr);
|
|
2366
|
+
if (rc !== ELIZA_OK) {
|
|
2367
|
+
const message =
|
|
2368
|
+
takeError(err.buf) ??
|
|
2369
|
+
`[ffi-bindings] eliza_inference_vad_reset rc=${rc}`;
|
|
2370
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2371
|
+
}
|
|
2372
|
+
},
|
|
2373
|
+
|
|
2374
|
+
vadClose(vad) {
|
|
2375
|
+
loadedLib.symbols.eliza_inference_vad_close?.(vad);
|
|
2376
|
+
},
|
|
2377
|
+
|
|
2378
|
+
/* ---- Native wake-word (ABI v5) ----------------------------- */
|
|
2379
|
+
|
|
2380
|
+
wakewordSupported(): boolean {
|
|
2381
|
+
if (
|
|
2382
|
+
!wakewordSymbolsAvailable ||
|
|
2383
|
+
typeof loadedLib.symbols.eliza_inference_wakeword_supported !==
|
|
2384
|
+
"function"
|
|
2385
|
+
) {
|
|
2386
|
+
return false;
|
|
2387
|
+
}
|
|
2388
|
+
return loadedLib.symbols.eliza_inference_wakeword_supported() === 1;
|
|
2389
|
+
},
|
|
2390
|
+
|
|
2391
|
+
wakewordOpen({ ctx, sampleRateHz, headName }) {
|
|
2392
|
+
const open = loadedLib.symbols.eliza_inference_wakeword_open;
|
|
2393
|
+
if (!wakewordSymbolsAvailable || typeof open !== "function") {
|
|
2394
|
+
throw new VoiceLifecycleError(
|
|
2395
|
+
"kernel-missing",
|
|
2396
|
+
"[ffi-bindings] eliza_inference_wakeword_open is not exported by this libelizainference build (wake-word GGUF runtime not present)",
|
|
2397
|
+
);
|
|
2398
|
+
}
|
|
2399
|
+
const err = makeOutErr();
|
|
2400
|
+
const headArg = cstr(headName);
|
|
2401
|
+
const handle = open(ctx, sampleRateHz, headArg.ptr, err.ptr);
|
|
2402
|
+
if (isNullPointer(handle)) {
|
|
2403
|
+
const message =
|
|
2404
|
+
takeError(err.buf) ??
|
|
2405
|
+
"[ffi-bindings] eliza_inference_wakeword_open returned NULL with no diagnostic";
|
|
2406
|
+
throw new VoiceLifecycleError("kernel-missing", message);
|
|
2407
|
+
}
|
|
2408
|
+
return handle as NativeWakeWordHandle;
|
|
2409
|
+
},
|
|
2410
|
+
|
|
2411
|
+
wakewordScore({ wake, pcm }) {
|
|
2412
|
+
const score = loadedLib.symbols.eliza_inference_wakeword_score;
|
|
2413
|
+
if (!wakewordSymbolsAvailable || typeof score !== "function") {
|
|
2414
|
+
throw new VoiceLifecycleError(
|
|
2415
|
+
"kernel-missing",
|
|
2416
|
+
"[ffi-bindings] eliza_inference_wakeword_score is not exported by this libelizainference build",
|
|
2417
|
+
);
|
|
2418
|
+
}
|
|
2419
|
+
const err = makeOutErr();
|
|
2420
|
+
const outProbability = new Float32Array(1);
|
|
2421
|
+
const rc = score(
|
|
2422
|
+
wake,
|
|
2423
|
+
ffi.ptr(pcm),
|
|
2424
|
+
BigInt(pcm.length),
|
|
2425
|
+
ffi.ptr(outProbability),
|
|
2426
|
+
err.ptr,
|
|
2427
|
+
);
|
|
2428
|
+
if (rc !== ELIZA_OK) {
|
|
2429
|
+
const message =
|
|
2430
|
+
takeError(err.buf) ??
|
|
2431
|
+
`[ffi-bindings] eliza_inference_wakeword_score rc=${rc}`;
|
|
2432
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2433
|
+
}
|
|
2434
|
+
return outProbability[0] ?? 0;
|
|
2435
|
+
},
|
|
2436
|
+
|
|
2437
|
+
wakewordReset(wake) {
|
|
2438
|
+
const reset = loadedLib.symbols.eliza_inference_wakeword_reset;
|
|
2439
|
+
if (!wakewordSymbolsAvailable || typeof reset !== "function") {
|
|
2440
|
+
throw new VoiceLifecycleError(
|
|
2441
|
+
"kernel-missing",
|
|
2442
|
+
"[ffi-bindings] eliza_inference_wakeword_reset is not exported by this libelizainference build",
|
|
2443
|
+
);
|
|
2444
|
+
}
|
|
2445
|
+
const err = makeOutErr();
|
|
2446
|
+
const rc = reset(wake, err.ptr);
|
|
2447
|
+
if (rc !== ELIZA_OK) {
|
|
2448
|
+
const message =
|
|
2449
|
+
takeError(err.buf) ??
|
|
2450
|
+
`[ffi-bindings] eliza_inference_wakeword_reset rc=${rc}`;
|
|
2451
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2452
|
+
}
|
|
2453
|
+
},
|
|
2454
|
+
|
|
2455
|
+
wakewordClose(wake) {
|
|
2456
|
+
loadedLib.symbols.eliza_inference_wakeword_close?.(wake);
|
|
2457
|
+
},
|
|
2458
|
+
|
|
2459
|
+
/* ---- Native speaker encoder (ABI v6) ----------------------- */
|
|
2460
|
+
|
|
2461
|
+
speakerSupported(): boolean {
|
|
2462
|
+
if (
|
|
2463
|
+
!speakerSymbolsAvailable ||
|
|
2464
|
+
typeof loadedLib.symbols.eliza_inference_speaker_supported !==
|
|
2465
|
+
"function"
|
|
2466
|
+
) {
|
|
2467
|
+
return false;
|
|
2468
|
+
}
|
|
2469
|
+
return loadedLib.symbols.eliza_inference_speaker_supported() === 1;
|
|
2470
|
+
},
|
|
2471
|
+
|
|
2472
|
+
speakerOpen({ ctx, ggufPath }) {
|
|
2473
|
+
const open = loadedLib.symbols.eliza_inference_speaker_open;
|
|
2474
|
+
if (!speakerSymbolsAvailable || typeof open !== "function") {
|
|
2475
|
+
throw new VoiceLifecycleError(
|
|
2476
|
+
"kernel-missing",
|
|
2477
|
+
"[ffi-bindings] eliza_inference_speaker_open is not exported by this libelizainference build",
|
|
2478
|
+
);
|
|
2479
|
+
}
|
|
2480
|
+
const err = makeOutErr();
|
|
2481
|
+
const ggufArg = cstr(ggufPath);
|
|
2482
|
+
const handle = open(ctx, ggufArg.ptr, err.ptr);
|
|
2483
|
+
if (isNullPointer(handle)) {
|
|
2484
|
+
const message =
|
|
2485
|
+
takeError(err.buf) ??
|
|
2486
|
+
"[ffi-bindings] eliza_inference_speaker_open returned NULL with no diagnostic";
|
|
2487
|
+
throw new VoiceLifecycleError("kernel-missing", message);
|
|
2488
|
+
}
|
|
2489
|
+
return handle as NativeSpeakerHandle;
|
|
2490
|
+
},
|
|
2491
|
+
|
|
2492
|
+
speakerEmbed({ speaker, pcm }) {
|
|
2493
|
+
const embed = loadedLib.symbols.eliza_inference_speaker_embed;
|
|
2494
|
+
if (!speakerSymbolsAvailable || typeof embed !== "function") {
|
|
2495
|
+
throw new VoiceLifecycleError(
|
|
2496
|
+
"kernel-missing",
|
|
2497
|
+
"[ffi-bindings] eliza_inference_speaker_embed is not exported by this libelizainference build",
|
|
2498
|
+
);
|
|
2499
|
+
}
|
|
2500
|
+
const err = makeOutErr();
|
|
2501
|
+
const outEmbedding = new Float32Array(SPEAKER_EMBEDDING_DIM);
|
|
2502
|
+
const rc = embed(
|
|
2503
|
+
speaker,
|
|
2504
|
+
ffi.ptr(pcm),
|
|
2505
|
+
BigInt(pcm.length),
|
|
2506
|
+
ffi.ptr(outEmbedding),
|
|
2507
|
+
err.ptr,
|
|
2508
|
+
);
|
|
2509
|
+
if (rc !== ELIZA_OK) {
|
|
2510
|
+
const message =
|
|
2511
|
+
takeError(err.buf) ??
|
|
2512
|
+
`[ffi-bindings] eliza_inference_speaker_embed rc=${rc}`;
|
|
2513
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2514
|
+
}
|
|
2515
|
+
return outEmbedding;
|
|
2516
|
+
},
|
|
2517
|
+
|
|
2518
|
+
speakerClose(speaker) {
|
|
2519
|
+
loadedLib.symbols.eliza_inference_speaker_close?.(speaker);
|
|
2520
|
+
},
|
|
2521
|
+
|
|
2522
|
+
/* ---- Native diarizer (ABI v6) ------------------------------ */
|
|
2523
|
+
|
|
2524
|
+
diarizSupported(): boolean {
|
|
2525
|
+
if (
|
|
2526
|
+
!diarizSymbolsAvailable ||
|
|
2527
|
+
typeof loadedLib.symbols.eliza_inference_diariz_supported !== "function"
|
|
2528
|
+
) {
|
|
2529
|
+
return false;
|
|
2530
|
+
}
|
|
2531
|
+
return loadedLib.symbols.eliza_inference_diariz_supported() === 1;
|
|
2532
|
+
},
|
|
2533
|
+
|
|
2534
|
+
diarizOpen({ ctx, ggufPath }) {
|
|
2535
|
+
const open = loadedLib.symbols.eliza_inference_diariz_open;
|
|
2536
|
+
if (!diarizSymbolsAvailable || typeof open !== "function") {
|
|
2537
|
+
throw new VoiceLifecycleError(
|
|
2538
|
+
"kernel-missing",
|
|
2539
|
+
"[ffi-bindings] eliza_inference_diariz_open is not exported by this libelizainference build",
|
|
2540
|
+
);
|
|
2541
|
+
}
|
|
2542
|
+
const err = makeOutErr();
|
|
2543
|
+
const ggufArg = cstr(ggufPath);
|
|
2544
|
+
const handle = open(ctx, ggufArg.ptr, err.ptr);
|
|
2545
|
+
if (isNullPointer(handle)) {
|
|
2546
|
+
const message =
|
|
2547
|
+
takeError(err.buf) ??
|
|
2548
|
+
"[ffi-bindings] eliza_inference_diariz_open returned NULL with no diagnostic";
|
|
2549
|
+
throw new VoiceLifecycleError("kernel-missing", message);
|
|
2550
|
+
}
|
|
2551
|
+
return handle as NativeDiarizHandle;
|
|
2552
|
+
},
|
|
2553
|
+
|
|
2554
|
+
diarizSegment({ diariz, pcm }) {
|
|
2555
|
+
const segment = loadedLib.symbols.eliza_inference_diariz_segment;
|
|
2556
|
+
if (!diarizSymbolsAvailable || typeof segment !== "function") {
|
|
2557
|
+
throw new VoiceLifecycleError(
|
|
2558
|
+
"kernel-missing",
|
|
2559
|
+
"[ffi-bindings] eliza_inference_diariz_segment is not exported by this libelizainference build",
|
|
2560
|
+
);
|
|
2561
|
+
}
|
|
2562
|
+
const err = makeOutErr();
|
|
2563
|
+
// The library writes `frames_per_window` (293 for pyannote-3) int8
|
|
2564
|
+
// labels. Pass a generous capacity and read back the actual count
|
|
2565
|
+
// the library writes into `*io_n_labels`.
|
|
2566
|
+
const outLabels = new Int8Array(DIARIZ_LABELS_CAPACITY);
|
|
2567
|
+
const ioNLabels = new BigUint64Array([BigInt(outLabels.length)]);
|
|
2568
|
+
const rc = segment(
|
|
2569
|
+
diariz,
|
|
2570
|
+
ffi.ptr(pcm),
|
|
2571
|
+
BigInt(pcm.length),
|
|
2572
|
+
ffi.ptr(outLabels),
|
|
2573
|
+
ffi.ptr(ioNLabels),
|
|
2574
|
+
err.ptr,
|
|
2575
|
+
);
|
|
2576
|
+
if (rc !== ELIZA_OK) {
|
|
2577
|
+
const message =
|
|
2578
|
+
takeError(err.buf) ??
|
|
2579
|
+
`[ffi-bindings] eliza_inference_diariz_segment rc=${rc}`;
|
|
2580
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2581
|
+
}
|
|
2582
|
+
const nFrames = Number(ioNLabels[0] ?? 0n);
|
|
2583
|
+
return outLabels.slice(0, Math.min(nFrames, outLabels.length));
|
|
2584
|
+
},
|
|
2585
|
+
|
|
2586
|
+
diarizClose(diariz) {
|
|
2587
|
+
loadedLib.symbols.eliza_inference_diariz_close?.(diariz);
|
|
2588
|
+
},
|
|
2589
|
+
|
|
2590
|
+
/* ---- Streaming ASR (ABI v2) -------------------------------- */
|
|
2591
|
+
|
|
2592
|
+
asrStreamSupported(): boolean {
|
|
2593
|
+
return loadedLib.symbols.eliza_inference_asr_stream_supported() === 1;
|
|
2594
|
+
},
|
|
2595
|
+
|
|
2596
|
+
asrStreamOpen({ ctx, sampleRateHz }) {
|
|
2597
|
+
const err = makeOutErr();
|
|
2598
|
+
const handle = loadedLib.symbols.eliza_inference_asr_stream_open(
|
|
2599
|
+
ctx,
|
|
2600
|
+
sampleRateHz,
|
|
2601
|
+
err.ptr,
|
|
2602
|
+
);
|
|
2603
|
+
if (isNullPointer(handle)) {
|
|
2604
|
+
const message =
|
|
2605
|
+
takeError(err.buf) ??
|
|
2606
|
+
"[ffi-bindings] eliza_inference_asr_stream_open returned NULL with no diagnostic";
|
|
2607
|
+
throw new VoiceLifecycleError("kernel-missing", message);
|
|
2608
|
+
}
|
|
2609
|
+
return handle as bigint;
|
|
2610
|
+
},
|
|
2611
|
+
|
|
2612
|
+
asrStreamFeed({ stream, pcm }) {
|
|
2613
|
+
const err = makeOutErr();
|
|
2614
|
+
const rc = loadedLib.symbols.eliza_inference_asr_stream_feed(
|
|
2615
|
+
stream,
|
|
2616
|
+
ffi.ptr(pcm),
|
|
2617
|
+
BigInt(pcm.length),
|
|
2618
|
+
err.ptr,
|
|
2619
|
+
);
|
|
2620
|
+
if (rc < 0) {
|
|
2621
|
+
const message =
|
|
2622
|
+
takeError(err.buf) ??
|
|
2623
|
+
`[ffi-bindings] eliza_inference_asr_stream_feed rc=${rc}`;
|
|
2624
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2625
|
+
}
|
|
2626
|
+
},
|
|
2627
|
+
|
|
2628
|
+
asrStreamPartial(args) {
|
|
2629
|
+
return readAsrStreamResult(
|
|
2630
|
+
"partial",
|
|
2631
|
+
loadedLib.symbols.eliza_inference_asr_stream_partial,
|
|
2632
|
+
args,
|
|
2633
|
+
);
|
|
2634
|
+
},
|
|
2635
|
+
|
|
2636
|
+
asrStreamFinish(args) {
|
|
2637
|
+
return readAsrStreamResult(
|
|
2638
|
+
"finish",
|
|
2639
|
+
loadedLib.symbols.eliza_inference_asr_stream_finish,
|
|
2640
|
+
args,
|
|
2641
|
+
);
|
|
2642
|
+
},
|
|
2643
|
+
|
|
2644
|
+
asrStreamClose(stream) {
|
|
2645
|
+
loadedLib.symbols.eliza_inference_asr_stream_close(stream);
|
|
2646
|
+
},
|
|
2647
|
+
|
|
2648
|
+
/* ---- Streaming LLM (additive on top of v3) ----------------- */
|
|
2649
|
+
|
|
2650
|
+
llmStreamSupported(): boolean {
|
|
2651
|
+
// Symbols are bound at dlopen — if the fallback path stripped them
|
|
2652
|
+
// out, the runtime never advertises support.
|
|
2653
|
+
return (
|
|
2654
|
+
llmStreamSymbolsAvailable &&
|
|
2655
|
+
typeof loadedLib.symbols.eliza_inference_llm_stream_open === "function"
|
|
2656
|
+
);
|
|
2657
|
+
},
|
|
2658
|
+
|
|
2659
|
+
llmMtpSupported(): boolean {
|
|
2660
|
+
// ABI v8 capability probe. Absent (or the whole probe family
|
|
2661
|
+
// unbound) on a v7 library → unsupported, so the fused text path
|
|
2662
|
+
// refuses to route MTP through it.
|
|
2663
|
+
const probe = loadedLib.symbols.eliza_inference_llm_mtp_supported;
|
|
2664
|
+
return (
|
|
2665
|
+
llmCapabilitySymbolsAvailable &&
|
|
2666
|
+
typeof probe === "function" &&
|
|
2667
|
+
probe() === 1
|
|
2668
|
+
);
|
|
2669
|
+
},
|
|
2670
|
+
|
|
2671
|
+
llmKvQuantSupported(): boolean {
|
|
2672
|
+
const probe = loadedLib.symbols.eliza_inference_llm_kv_quant_supported;
|
|
2673
|
+
return (
|
|
2674
|
+
llmCapabilitySymbolsAvailable &&
|
|
2675
|
+
typeof probe === "function" &&
|
|
2676
|
+
probe() === 1
|
|
2677
|
+
);
|
|
2678
|
+
},
|
|
2679
|
+
|
|
2680
|
+
llmStreamOpen({ ctx, config }) {
|
|
2681
|
+
const open = loadedLib.symbols.eliza_inference_llm_stream_open;
|
|
2682
|
+
if (!llmStreamSymbolsAvailable || typeof open !== "function") {
|
|
2683
|
+
throw new VoiceLifecycleError(
|
|
2684
|
+
"kernel-missing",
|
|
2685
|
+
"[ffi-bindings] eliza_inference_llm_stream_open is not exported by this build",
|
|
2686
|
+
);
|
|
2687
|
+
}
|
|
2688
|
+
const err = makeOutErr();
|
|
2689
|
+
// Marshal the config struct into a Buffer. Layout matches
|
|
2690
|
+
// `eliza_llm_stream_config_t` in `eliza-inference-ffi.h`
|
|
2691
|
+
// (8-byte aligned, ABI v9):
|
|
2692
|
+
// off 0 : i32 max_tokens
|
|
2693
|
+
// off 4 : f32 temperature
|
|
2694
|
+
// off 8 : f32 top_p
|
|
2695
|
+
// off 12 : i32 top_k
|
|
2696
|
+
// off 16 : f32 repeat_penalty
|
|
2697
|
+
// off 20 : i32 slot_id
|
|
2698
|
+
// off 24 : ptr prompt_cache_key
|
|
2699
|
+
// off 32 : i32 draft_min
|
|
2700
|
+
// off 36 : i32 draft_max
|
|
2701
|
+
// off 40 : ptr mtp_drafter_path
|
|
2702
|
+
// off 48 : ptr gbnf_grammar
|
|
2703
|
+
// off 56 : i32 disable_thinking
|
|
2704
|
+
// off 60 : i32 n_gpu_layers (ABI v8 — fills old tail pad)
|
|
2705
|
+
// off 64 : ptr cache_type_k (ABI v8)
|
|
2706
|
+
// off 72 : ptr cache_type_v (ABI v8)
|
|
2707
|
+
// off 80 : i32 context_size (ABI v9)
|
|
2708
|
+
// sizeof = 88
|
|
2709
|
+
const buf = Buffer.alloc(88);
|
|
2710
|
+
buf.writeInt32LE(config.maxTokens, 0);
|
|
2711
|
+
buf.writeFloatLE(config.temperature, 4);
|
|
2712
|
+
buf.writeFloatLE(config.topP, 8);
|
|
2713
|
+
buf.writeInt32LE(config.topK, 12);
|
|
2714
|
+
buf.writeFloatLE(config.repeatPenalty, 16);
|
|
2715
|
+
buf.writeInt32LE(config.slotId, 20);
|
|
2716
|
+
const keyArg = cstr(config.promptCacheKey);
|
|
2717
|
+
const drafterArg = cstr(config.draftModelPath);
|
|
2718
|
+
const grammarArg = cstr(
|
|
2719
|
+
config.gbnfGrammar && config.gbnfGrammar.length > 0
|
|
2720
|
+
? config.gbnfGrammar
|
|
2721
|
+
: null,
|
|
2722
|
+
);
|
|
2723
|
+
const cacheKArg = cstr(
|
|
2724
|
+
config.cacheTypeK && config.cacheTypeK.length > 0
|
|
2725
|
+
? config.cacheTypeK
|
|
2726
|
+
: null,
|
|
2727
|
+
);
|
|
2728
|
+
const cacheVArg = cstr(
|
|
2729
|
+
config.cacheTypeV && config.cacheTypeV.length > 0
|
|
2730
|
+
? config.cacheTypeV
|
|
2731
|
+
: null,
|
|
2732
|
+
);
|
|
2733
|
+
buf.writeBigUInt64LE(toPtrBigInt(keyArg.ptr), 24);
|
|
2734
|
+
buf.writeInt32LE(config.draftMin, 32);
|
|
2735
|
+
buf.writeInt32LE(config.draftMax, 36);
|
|
2736
|
+
buf.writeBigUInt64LE(toPtrBigInt(drafterArg.ptr), 40);
|
|
2737
|
+
buf.writeBigUInt64LE(toPtrBigInt(grammarArg.ptr), 48);
|
|
2738
|
+
buf.writeInt32LE(config.disableThinking ? 1 : 0, 56);
|
|
2739
|
+
// -1 = runtime default (all layers); 0 = CPU. `undefined` -> -1.
|
|
2740
|
+
buf.writeInt32LE(
|
|
2741
|
+
config.gpuLayers === undefined ? -1 : config.gpuLayers,
|
|
2742
|
+
60,
|
|
2743
|
+
);
|
|
2744
|
+
buf.writeBigUInt64LE(toPtrBigInt(cacheKArg.ptr), 64);
|
|
2745
|
+
buf.writeBigUInt64LE(toPtrBigInt(cacheVArg.ptr), 72);
|
|
2746
|
+
buf.writeInt32LE(config.contextSize ?? 0, 80);
|
|
2747
|
+
const handle = open(ctx, ffi.ptr(buf), err.ptr);
|
|
2748
|
+
if (isNullPointer(handle)) {
|
|
2749
|
+
const message =
|
|
2750
|
+
takeError(err.buf) ??
|
|
2751
|
+
"[ffi-bindings] eliza_inference_llm_stream_open returned NULL with no diagnostic";
|
|
2752
|
+
throw new VoiceLifecycleError("kernel-missing", message);
|
|
2753
|
+
}
|
|
2754
|
+
return handle as LlmStreamHandle;
|
|
2755
|
+
},
|
|
2756
|
+
|
|
2757
|
+
llmStreamPrefill({ stream, tokens }) {
|
|
2758
|
+
const prefill = loadedLib.symbols.eliza_inference_llm_stream_prefill;
|
|
2759
|
+
if (!llmStreamSymbolsAvailable || typeof prefill !== "function") {
|
|
2760
|
+
throw new VoiceLifecycleError(
|
|
2761
|
+
"kernel-missing",
|
|
2762
|
+
"[ffi-bindings] eliza_inference_llm_stream_prefill is not exported by this build",
|
|
2763
|
+
);
|
|
2764
|
+
}
|
|
2765
|
+
const err = makeOutErr();
|
|
2766
|
+
const rc = prefill(
|
|
2767
|
+
stream,
|
|
2768
|
+
ffi.ptr(tokens),
|
|
2769
|
+
BigInt(tokens.length),
|
|
2770
|
+
err.ptr,
|
|
2771
|
+
);
|
|
2772
|
+
if (rc !== ELIZA_OK) {
|
|
2773
|
+
const message =
|
|
2774
|
+
takeError(err.buf) ??
|
|
2775
|
+
`[ffi-bindings] eliza_inference_llm_stream_prefill rc=${rc}`;
|
|
2776
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2777
|
+
}
|
|
2778
|
+
},
|
|
2779
|
+
|
|
2780
|
+
llmStreamNext({ stream, maxTokensPerStep, maxTextBytes }) {
|
|
2781
|
+
const next = loadedLib.symbols.eliza_inference_llm_stream_next;
|
|
2782
|
+
if (!llmStreamSymbolsAvailable || typeof next !== "function") {
|
|
2783
|
+
throw new VoiceLifecycleError(
|
|
2784
|
+
"kernel-missing",
|
|
2785
|
+
"[ffi-bindings] eliza_inference_llm_stream_next is not exported by this build",
|
|
2786
|
+
);
|
|
2787
|
+
}
|
|
2788
|
+
const err = makeOutErr();
|
|
2789
|
+
const tokenCap = maxTokensPerStep ?? 32;
|
|
2790
|
+
const textCap = maxTextBytes ?? 1024;
|
|
2791
|
+
const tokensOut = new Int32Array(tokenCap);
|
|
2792
|
+
const numTokensOut = new BigUint64Array(1);
|
|
2793
|
+
const textOut = new Uint8Array(textCap);
|
|
2794
|
+
const drafterDrafted = new Int32Array(1);
|
|
2795
|
+
const drafterAccepted = new Int32Array(1);
|
|
2796
|
+
const rc = next(
|
|
2797
|
+
stream,
|
|
2798
|
+
ffi.ptr(tokensOut),
|
|
2799
|
+
BigInt(tokenCap),
|
|
2800
|
+
ffi.ptr(numTokensOut),
|
|
2801
|
+
ffi.ptr(textOut),
|
|
2802
|
+
BigInt(textCap),
|
|
2803
|
+
ffi.ptr(drafterDrafted),
|
|
2804
|
+
ffi.ptr(drafterAccepted),
|
|
2805
|
+
err.ptr,
|
|
2806
|
+
);
|
|
2807
|
+
if (rc < 0) {
|
|
2808
|
+
const message =
|
|
2809
|
+
takeError(err.buf) ??
|
|
2810
|
+
`[ffi-bindings] eliza_inference_llm_stream_next rc=${rc}`;
|
|
2811
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2812
|
+
}
|
|
2813
|
+
const n = Number(numTokensOut[0] ?? 0n);
|
|
2814
|
+
const tokens = Array.from(tokensOut.subarray(0, Math.min(n, tokenCap)));
|
|
2815
|
+
const nul = textOut.indexOf(0, 0);
|
|
2816
|
+
const len = nul >= 0 ? nul : textCap;
|
|
2817
|
+
const text = Buffer.from(
|
|
2818
|
+
textOut.buffer,
|
|
2819
|
+
textOut.byteOffset,
|
|
2820
|
+
len,
|
|
2821
|
+
).toString("utf8");
|
|
2822
|
+
return {
|
|
2823
|
+
tokens,
|
|
2824
|
+
text,
|
|
2825
|
+
done: rc === 1,
|
|
2826
|
+
drafterDrafted: drafterDrafted[0] ?? 0,
|
|
2827
|
+
drafterAccepted: drafterAccepted[0] ?? 0,
|
|
2828
|
+
};
|
|
2829
|
+
},
|
|
2830
|
+
|
|
2831
|
+
llmStreamCancel(stream) {
|
|
2832
|
+
const cancel = loadedLib.symbols.eliza_inference_llm_stream_cancel;
|
|
2833
|
+
if (!llmStreamSymbolsAvailable || typeof cancel !== "function") {
|
|
2834
|
+
// Cancel is best-effort — a build without the symbol just means
|
|
2835
|
+
// the runtime cannot interrupt mid-step. The next `_next` call
|
|
2836
|
+
// will still finish normally; the caller drops the result.
|
|
2837
|
+
return;
|
|
2838
|
+
}
|
|
2839
|
+
cancel(stream);
|
|
2840
|
+
},
|
|
2841
|
+
|
|
2842
|
+
llmStreamSaveSlot({ stream, filename }) {
|
|
2843
|
+
const save = loadedLib.symbols.eliza_inference_llm_stream_save_slot;
|
|
2844
|
+
if (!llmStreamSymbolsAvailable || typeof save !== "function") {
|
|
2845
|
+
throw new VoiceLifecycleError(
|
|
2846
|
+
"kernel-missing",
|
|
2847
|
+
"[ffi-bindings] eliza_inference_llm_stream_save_slot is not exported by this build",
|
|
2848
|
+
);
|
|
2849
|
+
}
|
|
2850
|
+
const err = makeOutErr();
|
|
2851
|
+
const fnameArg = cstr(filename);
|
|
2852
|
+
const rc = save(stream, fnameArg.ptr, err.ptr);
|
|
2853
|
+
if (rc !== ELIZA_OK) {
|
|
2854
|
+
const message =
|
|
2855
|
+
takeError(err.buf) ??
|
|
2856
|
+
`[ffi-bindings] eliza_inference_llm_stream_save_slot rc=${rc}`;
|
|
2857
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2858
|
+
}
|
|
2859
|
+
},
|
|
2860
|
+
|
|
2861
|
+
llmStreamRestoreSlot({ stream, filename }) {
|
|
2862
|
+
const restore = loadedLib.symbols.eliza_inference_llm_stream_restore_slot;
|
|
2863
|
+
if (!llmStreamSymbolsAvailable || typeof restore !== "function") {
|
|
2864
|
+
throw new VoiceLifecycleError(
|
|
2865
|
+
"kernel-missing",
|
|
2866
|
+
"[ffi-bindings] eliza_inference_llm_stream_restore_slot is not exported by this build",
|
|
2867
|
+
);
|
|
2868
|
+
}
|
|
2869
|
+
const err = makeOutErr();
|
|
2870
|
+
const fnameArg = cstr(filename);
|
|
2871
|
+
const rc = restore(stream, fnameArg.ptr, err.ptr);
|
|
2872
|
+
if (rc !== ELIZA_OK) {
|
|
2873
|
+
const message =
|
|
2874
|
+
takeError(err.buf) ??
|
|
2875
|
+
`[ffi-bindings] eliza_inference_llm_stream_restore_slot rc=${rc}`;
|
|
2876
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2877
|
+
}
|
|
2878
|
+
},
|
|
2879
|
+
|
|
2880
|
+
llmStreamClose(stream) {
|
|
2881
|
+
loadedLib.symbols.eliza_inference_llm_stream_close?.(stream);
|
|
2882
|
+
},
|
|
2883
|
+
|
|
2884
|
+
/* ---- Text embeddings (ABI v9) ------------------------------ */
|
|
2885
|
+
|
|
2886
|
+
embedSupported(): boolean {
|
|
2887
|
+
const probe = loadedLib.symbols.eliza_inference_embed_supported;
|
|
2888
|
+
return (
|
|
2889
|
+
textModalitiesSymbolsAvailable &&
|
|
2890
|
+
typeof probe === "function" &&
|
|
2891
|
+
probe() === 1
|
|
2892
|
+
);
|
|
2893
|
+
},
|
|
2894
|
+
|
|
2895
|
+
embed({ ctx, text, pooling }) {
|
|
2896
|
+
const embed = loadedLib.symbols.eliza_inference_embed;
|
|
2897
|
+
if (!textModalitiesSymbolsAvailable || typeof embed !== "function") {
|
|
2898
|
+
throw new VoiceLifecycleError(
|
|
2899
|
+
"kernel-missing",
|
|
2900
|
+
"[ffi-bindings] eliza_inference_embed is not exported by this build",
|
|
2901
|
+
);
|
|
2902
|
+
}
|
|
2903
|
+
const err = makeOutErr();
|
|
2904
|
+
const textArg = cstr(text);
|
|
2905
|
+
// The C side caps the write at n_embd. Hand it a generous buffer (the
|
|
2906
|
+
// largest dedicated-embedding dim we ship is 1024; 4096 covers any
|
|
2907
|
+
// decoder-as-embedder n_embd) and read back *out_dim for the real
|
|
2908
|
+
// length.
|
|
2909
|
+
const cap = 4096;
|
|
2910
|
+
const outEmbedding = new Float32Array(cap);
|
|
2911
|
+
const outDim = new Int32Array(1);
|
|
2912
|
+
const rc = embed(
|
|
2913
|
+
ctx,
|
|
2914
|
+
textArg.ptr,
|
|
2915
|
+
BigInt(textArg.bytes),
|
|
2916
|
+
pooling ?? ELIZA_POOLING_MEAN,
|
|
2917
|
+
ffi.ptr(outEmbedding),
|
|
2918
|
+
BigInt(cap),
|
|
2919
|
+
ffi.ptr(outDim),
|
|
2920
|
+
err.ptr,
|
|
2921
|
+
);
|
|
2922
|
+
if (rc !== ELIZA_OK) {
|
|
2923
|
+
const message =
|
|
2924
|
+
takeError(err.buf) ?? `[ffi-bindings] eliza_inference_embed rc=${rc}`;
|
|
2925
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2926
|
+
}
|
|
2927
|
+
const dim = outDim[0] ?? 0;
|
|
2928
|
+
if (dim <= 0 || dim > cap) {
|
|
2929
|
+
throw new VoiceLifecycleError(
|
|
2930
|
+
"kernel-missing",
|
|
2931
|
+
`[ffi-bindings] eliza_inference_embed returned out-of-range n_embd=${dim}`,
|
|
2932
|
+
);
|
|
2933
|
+
}
|
|
2934
|
+
return outEmbedding.slice(0, dim);
|
|
2935
|
+
},
|
|
2936
|
+
|
|
2937
|
+
/* ---- mmproj vision describe (ABI v9) ----------------------- */
|
|
2938
|
+
|
|
2939
|
+
visionSupported(): boolean {
|
|
2940
|
+
const probe = loadedLib.symbols.eliza_inference_vision_supported;
|
|
2941
|
+
return (
|
|
2942
|
+
textModalitiesSymbolsAvailable &&
|
|
2943
|
+
typeof probe === "function" &&
|
|
2944
|
+
probe() === 1
|
|
2945
|
+
);
|
|
2946
|
+
},
|
|
2947
|
+
|
|
2948
|
+
describeImage({ ctx, imageBytes, mmprojPath, prompt, maxTextBytes }) {
|
|
2949
|
+
const describe = loadedLib.symbols.eliza_inference_describe_image;
|
|
2950
|
+
if (!textModalitiesSymbolsAvailable || typeof describe !== "function") {
|
|
2951
|
+
throw new VoiceLifecycleError(
|
|
2952
|
+
"kernel-missing",
|
|
2953
|
+
"[ffi-bindings] eliza_inference_describe_image is not exported by this build",
|
|
2954
|
+
);
|
|
2955
|
+
}
|
|
2956
|
+
const err = makeOutErr();
|
|
2957
|
+
const cap = maxTextBytes ?? 4096;
|
|
2958
|
+
const outText = new Uint8Array(cap);
|
|
2959
|
+
const mmprojArg = cstr(mmprojPath);
|
|
2960
|
+
const promptArg = cstr(prompt ?? null);
|
|
2961
|
+
const rc = describe(
|
|
2962
|
+
ctx,
|
|
2963
|
+
ffi.ptr(imageBytes),
|
|
2964
|
+
BigInt(imageBytes.length),
|
|
2965
|
+
mmprojArg.ptr,
|
|
2966
|
+
promptArg.ptr,
|
|
2967
|
+
ffi.ptr(outText),
|
|
2968
|
+
BigInt(cap),
|
|
2969
|
+
err.ptr,
|
|
2970
|
+
);
|
|
2971
|
+
if (rc < 0) {
|
|
2972
|
+
const message =
|
|
2973
|
+
takeError(err.buf) ??
|
|
2974
|
+
`[ffi-bindings] eliza_inference_describe_image rc=${rc}`;
|
|
2975
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
2976
|
+
}
|
|
2977
|
+
const nul = outText.indexOf(0, 0);
|
|
2978
|
+
const len = nul >= 0 ? nul : rc;
|
|
2979
|
+
return Buffer.from(outText.buffer, outText.byteOffset, len).toString(
|
|
2980
|
+
"utf8",
|
|
2981
|
+
);
|
|
2982
|
+
},
|
|
2983
|
+
|
|
2984
|
+
/* ---- Streaming mmproj vision describe (ABI v13) ------------ */
|
|
2985
|
+
|
|
2986
|
+
visionStreamSupported(): boolean {
|
|
2987
|
+
const probe = loadedLib.symbols.eliza_inference_vision_stream_supported;
|
|
2988
|
+
return (
|
|
2989
|
+
visionStreamSymbolsAvailable &&
|
|
2990
|
+
typeof probe === "function" &&
|
|
2991
|
+
probe() === 1
|
|
2992
|
+
);
|
|
2993
|
+
},
|
|
2994
|
+
|
|
2995
|
+
describeImageStreamOpen({ ctx, imageBytes, mmprojPath, prompt }) {
|
|
2996
|
+
const open = loadedLib.symbols.eliza_inference_describe_image_stream_open;
|
|
2997
|
+
if (!visionStreamSymbolsAvailable || typeof open !== "function") {
|
|
2998
|
+
throw new VoiceLifecycleError(
|
|
2999
|
+
"kernel-missing",
|
|
3000
|
+
"[ffi-bindings] eliza_inference_describe_image_stream_open is not exported by this build",
|
|
3001
|
+
);
|
|
3002
|
+
}
|
|
3003
|
+
const err = makeOutErr();
|
|
3004
|
+
const mmprojArg = cstr(mmprojPath);
|
|
3005
|
+
const promptArg = cstr(prompt ?? null);
|
|
3006
|
+
const handle = open(
|
|
3007
|
+
ctx,
|
|
3008
|
+
ffi.ptr(imageBytes),
|
|
3009
|
+
BigInt(imageBytes.length),
|
|
3010
|
+
mmprojArg.ptr,
|
|
3011
|
+
promptArg.ptr,
|
|
3012
|
+
err.ptr,
|
|
3013
|
+
);
|
|
3014
|
+
if (isNullPointer(handle)) {
|
|
3015
|
+
const message =
|
|
3016
|
+
takeError(err.buf) ??
|
|
3017
|
+
"[ffi-bindings] eliza_inference_describe_image_stream_open returned NULL with no diagnostic";
|
|
3018
|
+
throw new VoiceLifecycleError("kernel-missing", message);
|
|
3019
|
+
}
|
|
3020
|
+
return handle as LlmStreamHandle;
|
|
3021
|
+
},
|
|
3022
|
+
|
|
3023
|
+
/* ---- Tokenizer (ABI v9) ------------------------------------ */
|
|
3024
|
+
|
|
3025
|
+
tokenizeSupported(): boolean {
|
|
3026
|
+
const probe = loadedLib.symbols.eliza_inference_tokenize_supported;
|
|
3027
|
+
return (
|
|
3028
|
+
textModalitiesSymbolsAvailable &&
|
|
3029
|
+
typeof probe === "function" &&
|
|
3030
|
+
probe() === 1
|
|
3031
|
+
);
|
|
3032
|
+
},
|
|
3033
|
+
|
|
3034
|
+
tokenize({ ctx, text, addSpecial, parseSpecial }) {
|
|
3035
|
+
const tokenize = loadedLib.symbols.eliza_inference_tokenize;
|
|
3036
|
+
const freeTokens = loadedLib.symbols.eliza_inference_free_tokens;
|
|
3037
|
+
if (
|
|
3038
|
+
!textModalitiesSymbolsAvailable ||
|
|
3039
|
+
typeof tokenize !== "function" ||
|
|
3040
|
+
typeof freeTokens !== "function"
|
|
3041
|
+
) {
|
|
3042
|
+
throw new VoiceLifecycleError(
|
|
3043
|
+
"kernel-missing",
|
|
3044
|
+
"[ffi-bindings] eliza_inference_tokenize is not exported by this build",
|
|
3045
|
+
);
|
|
3046
|
+
}
|
|
3047
|
+
const err = makeOutErr();
|
|
3048
|
+
const textArg = cstr(text);
|
|
3049
|
+
// out_tokens is int** — give the library a slot to write the malloc'ed
|
|
3050
|
+
// pointer into, plus a size_t out for the count.
|
|
3051
|
+
const outTokensPtr = new BigUint64Array(1);
|
|
3052
|
+
const outN = new BigUint64Array(1);
|
|
3053
|
+
const rc = tokenize(
|
|
3054
|
+
ctx,
|
|
3055
|
+
textArg.ptr,
|
|
3056
|
+
BigInt(textArg.bytes),
|
|
3057
|
+
addSpecial === false ? 0 : 1,
|
|
3058
|
+
parseSpecial === true ? 1 : 0,
|
|
3059
|
+
ffi.ptr(outTokensPtr),
|
|
3060
|
+
ffi.ptr(outN),
|
|
3061
|
+
err.ptr,
|
|
3062
|
+
);
|
|
3063
|
+
if (rc !== ELIZA_OK) {
|
|
3064
|
+
const message =
|
|
3065
|
+
takeError(err.buf) ??
|
|
3066
|
+
`[ffi-bindings] eliza_inference_tokenize rc=${rc}`;
|
|
3067
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
3068
|
+
}
|
|
3069
|
+
const n = Number(outN[0] ?? 0n);
|
|
3070
|
+
const tokensRaw = outTokensPtr[0] ?? 0n;
|
|
3071
|
+
if (n === 0) {
|
|
3072
|
+
// Empty token sequence — the library still returns a non-NULL
|
|
3073
|
+
// 1-byte buffer to free.
|
|
3074
|
+
if (tokensRaw !== 0n) freeTokens(tokensRaw);
|
|
3075
|
+
return new Int32Array(0);
|
|
3076
|
+
}
|
|
3077
|
+
try {
|
|
3078
|
+
const tokenBytes = n * 4;
|
|
3079
|
+
const tokensPtr =
|
|
3080
|
+
typeof tokensRaw === "bigint" ? Number(tokensRaw) : tokensRaw;
|
|
3081
|
+
const view = ffi.toArrayBuffer(tokensPtr, 0, tokenBytes);
|
|
3082
|
+
// Copy out of the library's malloc'ed buffer before freeing.
|
|
3083
|
+
return new Int32Array(new Uint8Array(view).slice(0, tokenBytes).buffer);
|
|
3084
|
+
} finally {
|
|
3085
|
+
freeTokens(tokensRaw);
|
|
3086
|
+
}
|
|
3087
|
+
},
|
|
3088
|
+
|
|
3089
|
+
detokenize({ ctx, tokens, removeSpecial, unparseSpecial, maxTextBytes }) {
|
|
3090
|
+
const detokenize = loadedLib.symbols.eliza_inference_detokenize;
|
|
3091
|
+
if (!textModalitiesSymbolsAvailable || typeof detokenize !== "function") {
|
|
3092
|
+
throw new VoiceLifecycleError(
|
|
3093
|
+
"kernel-missing",
|
|
3094
|
+
"[ffi-bindings] eliza_inference_detokenize is not exported by this build",
|
|
3095
|
+
);
|
|
3096
|
+
}
|
|
3097
|
+
const err = makeOutErr();
|
|
3098
|
+
const cap = maxTextBytes ?? 4096;
|
|
3099
|
+
const outText = new Uint8Array(cap);
|
|
3100
|
+
const rc = detokenize(
|
|
3101
|
+
ctx,
|
|
3102
|
+
ffi.ptr(tokens),
|
|
3103
|
+
BigInt(tokens.length),
|
|
3104
|
+
removeSpecial === true ? 1 : 0,
|
|
3105
|
+
unparseSpecial === true ? 1 : 0,
|
|
3106
|
+
ffi.ptr(outText),
|
|
3107
|
+
BigInt(cap),
|
|
3108
|
+
err.ptr,
|
|
3109
|
+
);
|
|
3110
|
+
if (rc < 0) {
|
|
3111
|
+
const message =
|
|
3112
|
+
takeError(err.buf) ??
|
|
3113
|
+
`[ffi-bindings] eliza_inference_detokenize rc=${rc}`;
|
|
3114
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
3115
|
+
}
|
|
3116
|
+
const nul = outText.indexOf(0, 0);
|
|
3117
|
+
const len = nul >= 0 ? nul : rc;
|
|
3118
|
+
return Buffer.from(outText.buffer, outText.byteOffset, len).toString(
|
|
3119
|
+
"utf8",
|
|
3120
|
+
);
|
|
3121
|
+
},
|
|
3122
|
+
|
|
3123
|
+
/* ---- End-of-turn scoring (ABI v11) ------------------------- */
|
|
3124
|
+
|
|
3125
|
+
eotSupported(): boolean {
|
|
3126
|
+
const probe = loadedLib.symbols.eliza_inference_llm_eot_supported;
|
|
3127
|
+
return (
|
|
3128
|
+
eotSymbolsAvailable && typeof probe === "function" && probe() === 1
|
|
3129
|
+
);
|
|
3130
|
+
},
|
|
3131
|
+
|
|
3132
|
+
eotScore({ ctx, tokens, targetTokenId }) {
|
|
3133
|
+
const score = loadedLib.symbols.eliza_inference_llm_eot_score;
|
|
3134
|
+
if (!eotSymbolsAvailable || typeof score !== "function") {
|
|
3135
|
+
throw new VoiceLifecycleError(
|
|
3136
|
+
"kernel-missing",
|
|
3137
|
+
"[ffi-bindings] eliza_inference_llm_eot_score is not exported by this build (pre-v11)",
|
|
3138
|
+
);
|
|
3139
|
+
}
|
|
3140
|
+
if (tokens.length === 0) {
|
|
3141
|
+
throw new VoiceLifecycleError(
|
|
3142
|
+
"kernel-missing",
|
|
3143
|
+
"[ffi-bindings] eliza_inference_llm_eot_score requires a non-empty token sequence",
|
|
3144
|
+
);
|
|
3145
|
+
}
|
|
3146
|
+
const err = makeOutErr();
|
|
3147
|
+
const outTargetProb = new Float32Array(1);
|
|
3148
|
+
const outTopToken = new Int32Array(1);
|
|
3149
|
+
const outTopProb = new Float32Array(1);
|
|
3150
|
+
const rc = score(
|
|
3151
|
+
ctx,
|
|
3152
|
+
ffi.ptr(tokens),
|
|
3153
|
+
BigInt(tokens.length),
|
|
3154
|
+
targetTokenId,
|
|
3155
|
+
ffi.ptr(outTargetProb),
|
|
3156
|
+
ffi.ptr(outTopToken),
|
|
3157
|
+
ffi.ptr(outTopProb),
|
|
3158
|
+
err.ptr,
|
|
3159
|
+
);
|
|
3160
|
+
if (rc !== ELIZA_OK) {
|
|
3161
|
+
const message =
|
|
3162
|
+
takeError(err.buf) ??
|
|
3163
|
+
`[ffi-bindings] eliza_inference_llm_eot_score rc=${rc}`;
|
|
3164
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
3165
|
+
}
|
|
3166
|
+
return {
|
|
3167
|
+
targetProb: outTargetProb[0] ?? 0,
|
|
3168
|
+
topToken: outTopToken[0] ?? -1,
|
|
3169
|
+
topProb: outTopProb[0] ?? 0,
|
|
3170
|
+
};
|
|
3171
|
+
},
|
|
3172
|
+
|
|
3173
|
+
/* ---- Kokoro TTS (ABI v10) ---------------------------------- */
|
|
3174
|
+
|
|
3175
|
+
kokoroSupported(): boolean {
|
|
3176
|
+
const probe = loadedLib.symbols.eliza_inference_kokoro_supported;
|
|
3177
|
+
return (
|
|
3178
|
+
kokoroSymbolsAvailable && typeof probe === "function" && probe() === 1
|
|
3179
|
+
);
|
|
3180
|
+
},
|
|
3181
|
+
|
|
3182
|
+
kokoroLoad({ ctx, ggufPath, voiceBinPath, styleDim }) {
|
|
3183
|
+
const load = loadedLib.symbols.eliza_inference_kokoro_load;
|
|
3184
|
+
if (!kokoroSymbolsAvailable || typeof load !== "function") {
|
|
3185
|
+
throw new VoiceLifecycleError(
|
|
3186
|
+
"kernel-missing",
|
|
3187
|
+
"[ffi-bindings] eliza_inference_kokoro_load is not exported by this build (pre-v10; Eliza-1 Kokoro engine not linked)",
|
|
3188
|
+
);
|
|
3189
|
+
}
|
|
3190
|
+
const err = makeOutErr();
|
|
3191
|
+
const ggufArg = cstr(ggufPath);
|
|
3192
|
+
const voiceArg = cstr(voiceBinPath);
|
|
3193
|
+
const rc = load(ctx, ggufArg.ptr, voiceArg.ptr, styleDim ?? 256, err.ptr);
|
|
3194
|
+
if (rc !== ELIZA_OK) {
|
|
3195
|
+
const message =
|
|
3196
|
+
takeError(err.buf) ??
|
|
3197
|
+
`[ffi-bindings] eliza_inference_kokoro_load rc=${rc}`;
|
|
3198
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
3199
|
+
}
|
|
3200
|
+
},
|
|
3201
|
+
|
|
3202
|
+
kokoroSynthesize({ ctx, text, speed, maxSamples }) {
|
|
3203
|
+
const synth = loadedLib.symbols.eliza_inference_kokoro_synthesize;
|
|
3204
|
+
if (!kokoroSymbolsAvailable || typeof synth !== "function") {
|
|
3205
|
+
throw new VoiceLifecycleError(
|
|
3206
|
+
"kernel-missing",
|
|
3207
|
+
"[ffi-bindings] eliza_inference_kokoro_synthesize is not exported by this build",
|
|
3208
|
+
);
|
|
3209
|
+
}
|
|
3210
|
+
const err = makeOutErr();
|
|
3211
|
+
const textArg = cstr(text);
|
|
3212
|
+
const outPcm = new Float32Array(maxSamples);
|
|
3213
|
+
const rc = synth(
|
|
3214
|
+
ctx,
|
|
3215
|
+
textArg.ptr,
|
|
3216
|
+
BigInt(textArg.bytes),
|
|
3217
|
+
speed ?? 1.0,
|
|
3218
|
+
ffi.ptr(outPcm),
|
|
3219
|
+
BigInt(maxSamples),
|
|
3220
|
+
err.ptr,
|
|
3221
|
+
);
|
|
3222
|
+
if (rc < 0) {
|
|
3223
|
+
const message =
|
|
3224
|
+
takeError(err.buf) ??
|
|
3225
|
+
`[ffi-bindings] eliza_inference_kokoro_synthesize rc=${rc}`;
|
|
3226
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
3227
|
+
}
|
|
3228
|
+
return outPcm.slice(0, Math.min(rc, maxSamples));
|
|
3229
|
+
},
|
|
3230
|
+
|
|
3231
|
+
kokoroSampleRate(ctx): number {
|
|
3232
|
+
const rate = loadedLib.symbols.eliza_inference_kokoro_sample_rate;
|
|
3233
|
+
if (!kokoroSymbolsAvailable || typeof rate !== "function") {
|
|
3234
|
+
throw new VoiceLifecycleError(
|
|
3235
|
+
"kernel-missing",
|
|
3236
|
+
"[ffi-bindings] eliza_inference_kokoro_sample_rate is not exported by this build",
|
|
3237
|
+
);
|
|
3238
|
+
}
|
|
3239
|
+
const rc = rate(ctx);
|
|
3240
|
+
if (rc < 0) {
|
|
3241
|
+
throw new VoiceLifecycleError(
|
|
3242
|
+
failureCode(rc),
|
|
3243
|
+
`[ffi-bindings] eliza_inference_kokoro_sample_rate rc=${rc} (no Kokoro model loaded on this ctx)`,
|
|
3244
|
+
);
|
|
3245
|
+
}
|
|
3246
|
+
return rc;
|
|
3247
|
+
},
|
|
3248
|
+
|
|
3249
|
+
close(): void {
|
|
3250
|
+
loadedLib.close();
|
|
3251
|
+
},
|
|
3252
|
+
};
|
|
3253
|
+
|
|
3254
|
+
/**
|
|
3255
|
+
* Convert a Bun-FFI pointer value (`unknown` per the lazy types) to the
|
|
3256
|
+
* bigint the marshalled config struct stores in its `const char *`
|
|
3257
|
+
* slots. NULL inputs translate to `0n`. Used by `llmStreamOpen` to
|
|
3258
|
+
* inline the cstr pointers into the config buffer.
|
|
3259
|
+
*/
|
|
3260
|
+
function toPtrBigInt(value: unknown): bigint {
|
|
3261
|
+
if (value === null || value === undefined) return 0n;
|
|
3262
|
+
if (typeof value === "bigint") return value;
|
|
3263
|
+
if (typeof value === "number") return BigInt(value);
|
|
3264
|
+
// Bun returns its internal pointer object that coerces to bigint.
|
|
3265
|
+
return BigInt(value as number);
|
|
3266
|
+
}
|
|
3267
|
+
|
|
3268
|
+
/**
|
|
3269
|
+
* Shared body for `asr_stream_partial` / `asr_stream_finish` — both
|
|
3270
|
+
* have the same 6-arg shape (`stream, out_text, max_text_bytes,
|
|
3271
|
+
* out_tokens, io_n_tokens, out_error`). Token ids are read only when
|
|
3272
|
+
* the caller asks for them (`maxTokens > 0`); otherwise the
|
|
3273
|
+
* out_tokens / io_n_tokens pointers are NULL.
|
|
3274
|
+
*/
|
|
3275
|
+
function readAsrStreamResult(
|
|
3276
|
+
label: string,
|
|
3277
|
+
fn: (
|
|
3278
|
+
stream: bigint,
|
|
3279
|
+
outText: unknown,
|
|
3280
|
+
maxTextBytes: bigint | number,
|
|
3281
|
+
outTokens: unknown,
|
|
3282
|
+
ioNTokens: unknown,
|
|
3283
|
+
outErr: unknown,
|
|
3284
|
+
) => number,
|
|
3285
|
+
args: { stream: bigint; maxTextBytes?: number; maxTokens?: number },
|
|
3286
|
+
): { partial: string; tokens?: number[] } {
|
|
3287
|
+
const err = makeOutErr();
|
|
3288
|
+
const textCap = args.maxTextBytes ?? 4096;
|
|
3289
|
+
const outText = new Uint8Array(textCap);
|
|
3290
|
+
const wantTokens = (args.maxTokens ?? 0) > 0;
|
|
3291
|
+
const tokenCap = wantTokens ? (args.maxTokens as number) : 0;
|
|
3292
|
+
const outTokens = wantTokens ? new Int32Array(tokenCap) : null;
|
|
3293
|
+
const ioNTokens = wantTokens
|
|
3294
|
+
? new BigUint64Array([BigInt(tokenCap)])
|
|
3295
|
+
: null;
|
|
3296
|
+
const rc = fn(
|
|
3297
|
+
args.stream,
|
|
3298
|
+
ffi.ptr(outText),
|
|
3299
|
+
BigInt(textCap),
|
|
3300
|
+
outTokens ? ffi.ptr(outTokens) : null,
|
|
3301
|
+
ioNTokens ? ffi.ptr(ioNTokens) : null,
|
|
3302
|
+
err.ptr,
|
|
3303
|
+
);
|
|
3304
|
+
if (rc < 0) {
|
|
3305
|
+
const message =
|
|
3306
|
+
takeError(err.buf) ??
|
|
3307
|
+
`[ffi-bindings] eliza_inference_asr_stream_${label} rc=${rc}`;
|
|
3308
|
+
throw new VoiceLifecycleError(failureCode(rc), message);
|
|
3309
|
+
}
|
|
3310
|
+
const nul = outText.indexOf(0, 0);
|
|
3311
|
+
const len = nul >= 0 ? nul : rc;
|
|
3312
|
+
const partial = Buffer.from(
|
|
3313
|
+
outText.buffer,
|
|
3314
|
+
outText.byteOffset,
|
|
3315
|
+
len,
|
|
3316
|
+
).toString("utf8");
|
|
3317
|
+
if (wantTokens && outTokens && ioNTokens) {
|
|
3318
|
+
const n = Number(ioNTokens[0] ?? 0n);
|
|
3319
|
+
const tokens = Array.from(outTokens.subarray(0, Math.min(n, tokenCap)));
|
|
3320
|
+
return { partial, tokens };
|
|
3321
|
+
}
|
|
3322
|
+
return { partial };
|
|
3323
|
+
}
|
|
3324
|
+
}
|
|
3325
|
+
|
|
3326
|
+
function formatFfiError(err: unknown): string {
|
|
3327
|
+
if (err instanceof Error) {
|
|
3328
|
+
return err.message;
|
|
3329
|
+
}
|
|
3330
|
+
return String(err);
|
|
3331
|
+
}
|
|
3332
|
+
|
|
3333
|
+
/**
|
|
3334
|
+
* Read an `EliVerifierEvent` (see `ffi.h`) from a C struct pointer.
|
|
3335
|
+
* Layout on 64-bit (8-byte aligned, default packing):
|
|
3336
|
+
* off 0 : const int* accepted_token_ids (8)
|
|
3337
|
+
* off 8 : size_t n_accepted (8)
|
|
3338
|
+
* off 16 : int rejected_from (4)
|
|
3339
|
+
* off 20 : int rejected_to (4)
|
|
3340
|
+
* off 24 : const int* corrected_token_ids (8)
|
|
3341
|
+
* off 32 : size_t n_corrected (8)
|
|
3342
|
+
*/
|
|
3343
|
+
function readVerifierEvent(
|
|
3344
|
+
evPtr: bigint,
|
|
3345
|
+
ffi: BunFfiModule,
|
|
3346
|
+
): NativeVerifierEvent {
|
|
3347
|
+
const acceptedPtr = ffi.read.ptr(evPtr, 0);
|
|
3348
|
+
const nAccepted = Number(ffi.read.u64(evPtr, 8));
|
|
3349
|
+
const rejectedFrom = ffi.read.i32(evPtr, 16);
|
|
3350
|
+
const rejectedTo = ffi.read.i32(evPtr, 20);
|
|
3351
|
+
const correctedPtr = ffi.read.ptr(evPtr, 24);
|
|
3352
|
+
const nCorrected = Number(ffi.read.u64(evPtr, 32));
|
|
3353
|
+
return {
|
|
3354
|
+
acceptedTokenIds: readInt32Array(acceptedPtr, nAccepted, ffi),
|
|
3355
|
+
rejectedFrom,
|
|
3356
|
+
rejectedTo,
|
|
3357
|
+
correctedTokenIds: readInt32Array(correctedPtr, nCorrected, ffi),
|
|
3358
|
+
};
|
|
3359
|
+
}
|
|
3360
|
+
|
|
3361
|
+
function readInt32Array(
|
|
3362
|
+
ptr: bigint,
|
|
3363
|
+
count: number,
|
|
3364
|
+
ffi: BunFfiModule,
|
|
3365
|
+
): number[] {
|
|
3366
|
+
if (ptr === 0n || count <= 0) return [];
|
|
3367
|
+
// Copy out — the array is the library's, valid only for the callback.
|
|
3368
|
+
const view = new Int32Array(ffi.toArrayBuffer(ptr, 0, count * 4).slice(0));
|
|
3369
|
+
return Array.from(view);
|
|
3370
|
+
}
|
|
3371
|
+
|
|
3372
|
+
/**
|
|
3373
|
+
* Decode a `T.cstring` return value (Bun returns these as either a
|
|
3374
|
+
* lazy string-like object with `toString()` or a JS string depending
|
|
3375
|
+
* on version). Wrap so the caller never has to branch.
|
|
3376
|
+
*/
|
|
3377
|
+
function readCString(value: unknown, ffi: BunFfiModule): string {
|
|
3378
|
+
if (typeof value === "string") return value;
|
|
3379
|
+
if (value === null || value === undefined) return "";
|
|
3380
|
+
if (typeof value === "object" && value !== null && "toString" in value) {
|
|
3381
|
+
return (value as { toString(): string }).toString();
|
|
3382
|
+
}
|
|
3383
|
+
if (typeof value === "number" || typeof value === "bigint") {
|
|
3384
|
+
return new ffi.CString(value).toString();
|
|
3385
|
+
}
|
|
3386
|
+
return String(value);
|
|
3387
|
+
}
|