@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/dist/actions/generate-media.d.ts +59 -0
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts +23 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts +29 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts +8 -37
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +38979 -430
- package/dist/index.js.map +217 -0
- package/dist/local-inference-routes.d.ts +47 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts +21 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes/compat-helpers.d.ts +18 -0
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts +62 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/dist/routes/index.d.ts +20 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/dist/routes/live-diarization-route.d.ts +33 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts +4 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts +16 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts +7 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts +15 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/dist/routes/transcripts-routes.d.ts +44 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts +62 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts +62 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts +77 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts +16 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/dist/runtime/index.d.ts +15 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/dist/runtime/voice-entity-binding.d.ts +113 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/dist/services/active-model.d.ts +310 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/dist/services/assignments.d.ts +84 -0
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/dist/services/backend.d.ts +440 -0
- package/dist/services/backend.d.ts.map +1 -0
- package/dist/services/bionic-host-loader.d.ts +67 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts +34 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts +206 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts +109 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts +102 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts +142 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts +188 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts +149 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/dist/services/device-tier.d.ts +133 -0
- package/dist/services/device-tier.d.ts.map +1 -0
- package/dist/services/downloader.d.ts +94 -0
- package/dist/services/downloader.d.ts.map +1 -0
- package/dist/services/engine.d.ts +579 -0
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts +17 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/dist/services/ffi-streaming-backend.d.ts +201 -0
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/dist/services/ffi-streaming-runner.d.ts +146 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts +56 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts +72 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts +63 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts +14 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts +118 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts +16 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts +58 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts +74 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts +181 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts +181 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/dist/services/index.d.ts +31 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts +132 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts +59 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts +189 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts +346 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts +96 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts +82 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/dist/services/manifest/schema.d.ts +903 -0
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/dist/services/manifest/types.d.ts +32 -0
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts +66 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/dist/services/memory-arbiter.d.ts +348 -0
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/dist/services/memory-monitor.d.ts +128 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts +130 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts +13 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts +127 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts +6 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts +124 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts +38 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts +110 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts +9 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts +111 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/dist/services/registry.d.ts +33 -0
- package/dist/services/registry.d.ts.map +1 -0
- package/dist/services/router-handler.d.ts +92 -0
- package/dist/services/router-handler.d.ts.map +1 -0
- package/dist/services/routing-policy.d.ts +92 -0
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts +8 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts +98 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/dist/services/service.d.ts +128 -0
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts +72 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts +311 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts +33 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/dist/services/types.d.ts +19 -0
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts +34 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts +8 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts +115 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts +99 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts +47 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts +71 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/dist/services/vision/index.d.ts +95 -0
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts +73 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/dist/services/vision/types.d.ts +162 -0
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts +18 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/dist/services/vision-embedding-cache.d.ts +98 -0
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts +112 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts +199 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts +170 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/dist/services/voice/embedding.d.ts +132 -0
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts +68 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/dist/services/voice/engine-bridge.d.ts +762 -0
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier.d.ts +211 -0
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/dist/services/voice/errors.d.ts +20 -0
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/dist/services/voice/expressive-tags.d.ts +158 -0
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/dist/services/voice/ffi-bindings.d.ts +696 -0
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts +181 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/index.d.ts +96 -0
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts +82 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts +30 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts +135 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/dist/services/voice/mic-source.d.ts +136 -0
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts +109 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/dist/services/voice/partial-stabilizer.d.ts +73 -0
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts +76 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts +62 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts +151 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts +216 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts +123 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts +248 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts +40 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts +24 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts +146 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/dist/services/voice/shared-resources.d.ts +204 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts +75 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts +37 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts +83 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts +73 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/dist/services/voice/transcriber.d.ts +244 -0
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts +37 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/dist/services/voice/transcript-service.d.ts +60 -0
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/dist/services/voice/transcript-store.d.ts +64 -0
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts +183 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/dist/services/voice/types.d.ts +643 -0
- package/dist/services/voice/types.d.ts.map +1 -0
- package/dist/services/voice/vad.d.ts +283 -0
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts +241 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/dist/services/voice/voice-preset-format.d.ts +158 -0
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts +83 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts +364 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/dist/services/voice/wake-word-ggml.d.ts +100 -0
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts +255 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts +240 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts +3 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +101 -15
- package/registry-entry.json +137 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +831 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.test.ts +390 -0
- package/src/local-inference-routes.ts +1625 -0
- package/src/provider.ts +1111 -0
- package/src/routes/compat-helpers.ts +275 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.ts +61 -0
- package/src/routes/live-diarization-route.test.ts +347 -0
- package/src/routes/live-diarization-route.ts +198 -0
- package/src/routes/local-inference-asr-route.test.ts +246 -0
- package/src/routes/local-inference-asr-route.ts +166 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +775 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.test.ts +195 -0
- package/src/routes/transcripts-routes.ts +191 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
- package/src/runtime/ensure-local-inference-handler.ts +1640 -0
- package/src/runtime/index.ts +36 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
- package/src/runtime/mobile-local-inference-gate.ts +99 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
- package/src/runtime/voice-entity-binding.ts +368 -0
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.ts +1416 -0
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +106 -0
- package/src/services/assignments.ts +278 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +791 -0
- package/src/services/bionic-host-loader.test.ts +226 -0
- package/src/services/bionic-host-loader.ts +252 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.test.ts +259 -0
- package/src/services/catalog.ts +33 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.test.ts +458 -0
- package/src/services/device-tier.ts +502 -0
- package/src/services/downloader.test.ts +888 -0
- package/src/services/downloader.ts +1039 -0
- package/src/services/engine-direct-bundle.test.ts +90 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.ts +2096 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +445 -0
- package/src/services/ffi-streaming-backend.ts +418 -0
- package/src/services/ffi-streaming-runner.test.ts +220 -0
- package/src/services/ffi-streaming-runner.ts +407 -0
- package/src/services/ffi-unload-ordering.test.ts +166 -0
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.test.ts +236 -0
- package/src/services/hardware.ts +438 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.ts +715 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.ts +229 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +357 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
- package/src/services/manifest/index.ts +72 -0
- package/src/services/manifest/manifest.test.ts +791 -0
- package/src/services/manifest/schema.ts +761 -0
- package/src/services/manifest/types.ts +61 -0
- package/src/services/manifest/validator.ts +633 -0
- package/src/services/memory-arbiter.test.ts +558 -0
- package/src/services/memory-arbiter.ts +991 -0
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +232 -0
- package/src/services/memory-monitor.ts +309 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.ts +86 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +164 -0
- package/src/services/ram-budget.ts +309 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.ts +157 -0
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +426 -0
- package/src/services/routing-policy.test.ts +352 -0
- package/src/services/routing-policy.ts +367 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +750 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.ts +59 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.ts +163 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +133 -0
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +139 -0
- package/src/services/voice/audio-frame-consumer.test.ts +669 -0
- package/src/services/voice/audio-frame-consumer.ts +651 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +335 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +902 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +242 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2343 -0
- package/src/services/voice/eot-classifier-ggml.ts +569 -0
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +422 -0
- package/src/services/voice/errors.ts +34 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.test.ts +735 -0
- package/src/services/voice/ffi-bindings.ts +3387 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.ts +139 -0
- package/src/services/voice/index.ts +502 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.ts +64 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +622 -0
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.ts +504 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +343 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.test.ts +195 -0
- package/src/services/voice/transcript-service.ts +205 -0
- package/src/services/voice/transcript-store.test.ts +189 -0
- package/src/services/voice/transcript-store.ts +164 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.test.ts +498 -0
- package/src/services/voice/vad.ts +832 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.test.ts +415 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +713 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +280 -0
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +367 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.ts +319 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/src/voice-workbench.ts +71 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process streaming-LLM runner.
|
|
3
|
+
*
|
|
4
|
+
* FFI streaming-LLM ABI declared in `ffi-streaming-llm.h`. The
|
|
5
|
+
* token-by-token loop hands `onTextChunk` accepted chunks and surfaces
|
|
6
|
+
* verifier events from native MTP.
|
|
7
|
+
*
|
|
8
|
+
* This file deliberately does not own the FFI context or the binding
|
|
9
|
+
* itself. It takes a narrow `LlmStreamingBinding` (see
|
|
10
|
+
* `services/llm-streaming-binding.ts`) + an opaque `LlmCtxHandle` as
|
|
11
|
+
* constructor arguments — that way it can be driven by libelizainference
|
|
12
|
+
* (via `wrapElizaInferenceFfi`) or any desktop libllama shim adapter without
|
|
13
|
+
* dragging in TTS/ASR surfaces. A single context can host concurrent generation
|
|
14
|
+
* sessions (one per pinned slot); the runner serialises with
|
|
15
|
+
* `slotInFlight`.
|
|
16
|
+
*
|
|
17
|
+
* Single-flight: lock map keyed by slot id, slot id `-1` unlocked. Two concurrent generates
|
|
18
|
+
* against the same pinned slot would interleave KV cache state, so the
|
|
19
|
+
* runner serializes them at the JS layer.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { performance } from "node:perf_hooks";
|
|
23
|
+
|
|
24
|
+
import type {
|
|
25
|
+
LlmCtxHandle,
|
|
26
|
+
LlmStreamingBinding,
|
|
27
|
+
} from "./llm-streaming-binding";
|
|
28
|
+
import type { LlmStreamHandle, LlmStreamStep } from "./voice/ffi-bindings";
|
|
29
|
+
import type { TextToken, VerifierStreamEvent } from "./voice/types";
|
|
30
|
+
|
|
31
|
+
export interface FfiStreamingGenerateArgs {
|
|
32
|
+
/** Pre-tokenized prompt — the runner does not detokenize. */
|
|
33
|
+
promptTokens: Int32Array;
|
|
34
|
+
/** Pinned slot id; -1 disables pinning (any free slot). */
|
|
35
|
+
slotId: number;
|
|
36
|
+
/** Optional prompt cache key used to derive a slot when `slotId === -1`. */
|
|
37
|
+
cacheKey?: string;
|
|
38
|
+
maxTokens: number;
|
|
39
|
+
temperature: number;
|
|
40
|
+
topP: number;
|
|
41
|
+
topK: number;
|
|
42
|
+
repeatPenalty: number;
|
|
43
|
+
draftMin: number;
|
|
44
|
+
draftMax: number;
|
|
45
|
+
/** Reserved for separate draft-model speculation; null for Eliza-1 MTP. */
|
|
46
|
+
draftModelPath: string | null;
|
|
47
|
+
/**
|
|
48
|
+
* Per-load GPU offload (ABI v8). Forwarded into the native session config
|
|
49
|
+
* on `llmStreamOpen`. The fused libelizainference path loads the text model
|
|
50
|
+
* once per ctx, so the FIRST session's value wins; later sessions reuse the
|
|
51
|
+
* resident model. `undefined` selects the runtime default (all layers).
|
|
52
|
+
* The desktop libllama path already applies gpuLayers at `loadModel()`, so
|
|
53
|
+
* it ignores this field — it is load-time config, threaded here only so the
|
|
54
|
+
* fused runner can mirror the libllama load decision.
|
|
55
|
+
*/
|
|
56
|
+
gpuLayers?: number;
|
|
57
|
+
/**
|
|
58
|
+
* KV-cache K/V quant type names (ABI v8), e.g. "qjl1_256" / "q4_polar".
|
|
59
|
+
* Same load-time semantics as `gpuLayers`: forwarded into the fused
|
|
60
|
+
* session config so the first `llmStreamOpen` applies the quantized cache.
|
|
61
|
+
*/
|
|
62
|
+
cacheTypeK?: string | null;
|
|
63
|
+
cacheTypeV?: string | null;
|
|
64
|
+
/**
|
|
65
|
+
* Runtime context window in tokens (ABI v9). Forwarded into the fused
|
|
66
|
+
* session config on `llmStreamOpen`; `undefined` keeps the native
|
|
67
|
+
* ELIZA_LLM_N_CTX/default fallback.
|
|
68
|
+
*/
|
|
69
|
+
contextSize?: number;
|
|
70
|
+
/**
|
|
71
|
+
* GBNF grammar source forcing the structured-reply envelope. Passed to
|
|
72
|
+
* the native session's `llmStreamOpen` config so sampling is
|
|
73
|
+
* grammar-constrained. `null` disables the constraint (free generation).
|
|
74
|
+
*/
|
|
75
|
+
gbnfGrammar?: string | null;
|
|
76
|
+
/** Cancellation signal — fires `llmStreamCancel` on the active session. */
|
|
77
|
+
signal?: AbortSignal;
|
|
78
|
+
/**
|
|
79
|
+
* Per-step token cap for the native decode loop. Lower values make the
|
|
80
|
+
* local UI stream in finer-grained jumps (smoother token-by-token render)
|
|
81
|
+
* at the cost of more JS↔FFI round-trips per reply; higher values batch
|
|
82
|
+
* more tokens per step. When omitted, falls back to
|
|
83
|
+
* `resolveMaxTokensPerStep()` (env `ELIZA_LOCAL_STREAM_TOKENS_PER_STEP`,
|
|
84
|
+
* else `DEFAULT_MAX_TOKENS_PER_STEP`). Clamped to
|
|
85
|
+
* `[MIN_MAX_TOKENS_PER_STEP, MAX_MAX_TOKENS_PER_STEP]`.
|
|
86
|
+
*/
|
|
87
|
+
maxTokensPerStep?: number;
|
|
88
|
+
/** Per-chunk text callback. */
|
|
89
|
+
onTextChunk?: (chunk: string) => void | Promise<void>;
|
|
90
|
+
/** Speculative accept/reject events from MTP verification. */
|
|
91
|
+
onVerifierEvent?: (event: VerifierStreamEvent) => void | Promise<void>;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export interface FfiStreamingGenerateResult {
|
|
95
|
+
text: string;
|
|
96
|
+
slotId: number;
|
|
97
|
+
firstTokenMs: number | null;
|
|
98
|
+
drafted: number;
|
|
99
|
+
accepted: number;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Default per-step caps. Match upstream llama-server's `n_predict` chunk size. */
|
|
103
|
+
const DEFAULT_MAX_TOKENS_PER_STEP = 32;
|
|
104
|
+
const DEFAULT_MAX_TEXT_BYTES = 1024;
|
|
105
|
+
/**
|
|
106
|
+
* Sane bounds for the per-step token cap. The floor is 1 (true
|
|
107
|
+
* token-by-token); the ceiling guards against pathological values that would
|
|
108
|
+
* defeat streaming by emitting the whole reply in one step.
|
|
109
|
+
*/
|
|
110
|
+
const MIN_MAX_TOKENS_PER_STEP = 1;
|
|
111
|
+
const MAX_MAX_TOKENS_PER_STEP = 512;
|
|
112
|
+
|
|
113
|
+
/** Clamp a caller-supplied per-step cap into the supported range. */
|
|
114
|
+
function clampMaxTokensPerStep(value: number): number {
|
|
115
|
+
if (!Number.isFinite(value)) return DEFAULT_MAX_TOKENS_PER_STEP;
|
|
116
|
+
return Math.min(
|
|
117
|
+
MAX_MAX_TOKENS_PER_STEP,
|
|
118
|
+
Math.max(MIN_MAX_TOKENS_PER_STEP, Math.trunc(value)),
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Resolve the per-step token cap for the native decode loop. Override via the
|
|
124
|
+
* `ELIZA_LOCAL_STREAM_TOKENS_PER_STEP` env var (e.g. set to `8` for smoother
|
|
125
|
+
* local streaming, weighed against the extra JS↔FFI round-trips and the shared
|
|
126
|
+
* voice phrase-chunker). Falls back to `DEFAULT_MAX_TOKENS_PER_STEP` (32) when
|
|
127
|
+
* unset or invalid; clamped to `[MIN_MAX_TOKENS_PER_STEP, MAX_MAX_TOKENS_PER_STEP]`.
|
|
128
|
+
*/
|
|
129
|
+
export function resolveMaxTokensPerStep(): number {
|
|
130
|
+
const raw = process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP?.trim();
|
|
131
|
+
if (!raw) return DEFAULT_MAX_TOKENS_PER_STEP;
|
|
132
|
+
const parsed = Number.parseInt(raw, 10);
|
|
133
|
+
if (!Number.isFinite(parsed)) return DEFAULT_MAX_TOKENS_PER_STEP;
|
|
134
|
+
return clampMaxTokensPerStep(parsed);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Backend used by the mobile and desktop FFI routes.
|
|
139
|
+
*/
|
|
140
|
+
export class FfiStreamingRunner {
|
|
141
|
+
private readonly slotInFlight = new Map<number, Promise<void>>();
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Constructor takes the narrow `LlmStreamingBinding` (see
|
|
145
|
+
* `services/llm-streaming-binding.ts`) so both libelizainference (via
|
|
146
|
+
* `wrapElizaInferenceFfi`) and desktop libllama adapters can
|
|
147
|
+
* satisfy it. The runner never touches TTS/ASR/mmap surfaces.
|
|
148
|
+
*/
|
|
149
|
+
constructor(
|
|
150
|
+
private readonly ffi: LlmStreamingBinding,
|
|
151
|
+
private readonly ctx: LlmCtxHandle,
|
|
152
|
+
) {}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Run one generation. Mirrors `MtpLlamaServer.generateWithUsage()`
|
|
156
|
+
* — same single-flight rule, same callback shape, same result block
|
|
157
|
+
* minus the metrics scrape (FFI does not have a `/metrics` endpoint).
|
|
158
|
+
*/
|
|
159
|
+
async generateWithUsage(
|
|
160
|
+
args: FfiStreamingGenerateArgs,
|
|
161
|
+
): Promise<FfiStreamingGenerateResult> {
|
|
162
|
+
if (args.slotId < 0) {
|
|
163
|
+
return this.runGenerate(args);
|
|
164
|
+
}
|
|
165
|
+
const prior = this.slotInFlight.get(args.slotId);
|
|
166
|
+
const run = (prior ?? Promise.resolve())
|
|
167
|
+
.catch(() => {})
|
|
168
|
+
.then(() => this.runGenerate(args));
|
|
169
|
+
const tail = run.then(
|
|
170
|
+
() => {},
|
|
171
|
+
() => {},
|
|
172
|
+
);
|
|
173
|
+
this.slotInFlight.set(args.slotId, tail);
|
|
174
|
+
try {
|
|
175
|
+
return await run;
|
|
176
|
+
} finally {
|
|
177
|
+
if (this.slotInFlight.get(args.slotId) === tail) {
|
|
178
|
+
this.slotInFlight.delete(args.slotId);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Async-iterable variant. Yields each accepted-token batch as it lands
|
|
185
|
+
* so callers that want token-grained control (e.g. the voice scheduler
|
|
186
|
+
* driving phrase-chunking off accept/reject events) don't have to
|
|
187
|
+
* register a callback. Internally still routes through `generateWithUsage`
|
|
188
|
+
* via a pump so the single-flight rule applies.
|
|
189
|
+
*/
|
|
190
|
+
async *generateStream(
|
|
191
|
+
args: FfiStreamingGenerateArgs,
|
|
192
|
+
): AsyncIterable<LlmStreamStep> {
|
|
193
|
+
// Queue accumulates steps the inner callback produces; the iterator
|
|
194
|
+
// drains it. Using a plain array + resolver is simpler than wiring a
|
|
195
|
+
// real async-queue for the single-consumer case here.
|
|
196
|
+
const queue: LlmStreamStep[] = [];
|
|
197
|
+
let resume: (() => void) | null = null;
|
|
198
|
+
let finished = false;
|
|
199
|
+
let failure: Error | null = null;
|
|
200
|
+
|
|
201
|
+
const wakeConsumer = () => {
|
|
202
|
+
const wake = resume;
|
|
203
|
+
resume = null;
|
|
204
|
+
if (wake) wake();
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
const onStep = (step: LlmStreamStep) => {
|
|
208
|
+
queue.push(step);
|
|
209
|
+
wakeConsumer();
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
const work = (async () => {
|
|
213
|
+
try {
|
|
214
|
+
await this.runGenerateInner(args, onStep);
|
|
215
|
+
} catch (err) {
|
|
216
|
+
failure = err instanceof Error ? err : new Error(String(err));
|
|
217
|
+
} finally {
|
|
218
|
+
finished = true;
|
|
219
|
+
wakeConsumer();
|
|
220
|
+
}
|
|
221
|
+
})();
|
|
222
|
+
|
|
223
|
+
try {
|
|
224
|
+
while (true) {
|
|
225
|
+
if (queue.length > 0) {
|
|
226
|
+
const next = queue.shift();
|
|
227
|
+
if (next === undefined) continue;
|
|
228
|
+
yield next;
|
|
229
|
+
if (next.done) return;
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
if (failure) throw failure;
|
|
233
|
+
if (finished) return;
|
|
234
|
+
await new Promise<void>((resolve) => {
|
|
235
|
+
resume = resolve;
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
} finally {
|
|
239
|
+
await work;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Save the streaming slot KV state to disk. Best called between turns
|
|
245
|
+
* — calling mid-stream is racy and the FFI side is allowed to refuse.
|
|
246
|
+
* Surfaced here so the conversation registry can persist between
|
|
247
|
+
* mobile backgrounds the same way `MtpLlamaServer.persistSlot` does.
|
|
248
|
+
*/
|
|
249
|
+
saveSlot(stream: LlmStreamHandle, filename: string): void {
|
|
250
|
+
if (this.ffi.llmStreamSaveSlot === undefined) {
|
|
251
|
+
throw new Error(
|
|
252
|
+
"[ffi-streaming-runner] llmStreamSaveSlot is not exported by this build",
|
|
253
|
+
);
|
|
254
|
+
}
|
|
255
|
+
this.ffi.llmStreamSaveSlot({ stream, filename });
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/** Restore a previously-saved slot KV file into a fresh session. */
|
|
259
|
+
restoreSlot(stream: LlmStreamHandle, filename: string): void {
|
|
260
|
+
if (this.ffi.llmStreamRestoreSlot === undefined) {
|
|
261
|
+
throw new Error(
|
|
262
|
+
"[ffi-streaming-runner] llmStreamRestoreSlot is not exported by this build",
|
|
263
|
+
);
|
|
264
|
+
}
|
|
265
|
+
this.ffi.llmStreamRestoreSlot({ stream, filename });
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/* ----- internals -------------------------------------------------- */
|
|
269
|
+
|
|
270
|
+
private async runGenerate(
|
|
271
|
+
args: FfiStreamingGenerateArgs,
|
|
272
|
+
): Promise<FfiStreamingGenerateResult> {
|
|
273
|
+
const aggregated: string[] = [];
|
|
274
|
+
let totalDrafted = 0;
|
|
275
|
+
let totalAccepted = 0;
|
|
276
|
+
let firstTokenMs: number | null = null;
|
|
277
|
+
const startedAt = performance.now();
|
|
278
|
+
|
|
279
|
+
await this.runGenerateInner(args, (step) => {
|
|
280
|
+
if (step.text.length > 0 && firstTokenMs === null) {
|
|
281
|
+
firstTokenMs = performance.now() - startedAt;
|
|
282
|
+
}
|
|
283
|
+
aggregated.push(step.text);
|
|
284
|
+
totalDrafted += step.drafterDrafted;
|
|
285
|
+
totalAccepted += step.drafterAccepted;
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
return {
|
|
289
|
+
text: aggregated.join(""),
|
|
290
|
+
slotId: args.slotId,
|
|
291
|
+
firstTokenMs,
|
|
292
|
+
drafted: totalDrafted,
|
|
293
|
+
accepted: totalAccepted,
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Shared inner loop. Opens the session, runs the prefill + next pump,
|
|
299
|
+
* forwards each step through `onStep` plus the optional caller
|
|
300
|
+
* callbacks, and wires abort + cancel.
|
|
301
|
+
*/
|
|
302
|
+
private async runGenerateInner(
|
|
303
|
+
args: FfiStreamingGenerateArgs,
|
|
304
|
+
onStep: (step: LlmStreamStep) => void,
|
|
305
|
+
): Promise<void> {
|
|
306
|
+
if (
|
|
307
|
+
this.ffi.llmStreamOpen === undefined ||
|
|
308
|
+
this.ffi.llmStreamPrefill === undefined ||
|
|
309
|
+
this.ffi.llmStreamNext === undefined ||
|
|
310
|
+
this.ffi.llmStreamClose === undefined
|
|
311
|
+
) {
|
|
312
|
+
throw new Error(
|
|
313
|
+
"[ffi-streaming-runner] libelizainference is missing streaming-LLM symbols. " +
|
|
314
|
+
"Rebuild against the current eliza-inference-ffi.h.",
|
|
315
|
+
);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
const stream = this.ffi.llmStreamOpen({
|
|
319
|
+
ctx: this.ctx,
|
|
320
|
+
config: {
|
|
321
|
+
maxTokens: args.maxTokens,
|
|
322
|
+
temperature: args.temperature,
|
|
323
|
+
topP: args.topP,
|
|
324
|
+
topK: args.topK,
|
|
325
|
+
repeatPenalty: args.repeatPenalty,
|
|
326
|
+
slotId: args.slotId,
|
|
327
|
+
promptCacheKey: args.cacheKey ?? null,
|
|
328
|
+
draftMin: args.draftMin,
|
|
329
|
+
draftMax: args.draftMax,
|
|
330
|
+
draftModelPath: args.draftModelPath,
|
|
331
|
+
gbnfGrammar: args.gbnfGrammar ?? null,
|
|
332
|
+
gpuLayers: args.gpuLayers,
|
|
333
|
+
cacheTypeK: args.cacheTypeK,
|
|
334
|
+
cacheTypeV: args.cacheTypeV,
|
|
335
|
+
contextSize: args.contextSize,
|
|
336
|
+
},
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
let abortListener: (() => void) | null = null;
|
|
340
|
+
if (args.signal) {
|
|
341
|
+
if (args.signal.aborted) {
|
|
342
|
+
this.ffi.llmStreamCancel?.(stream);
|
|
343
|
+
this.ffi.llmStreamClose(stream);
|
|
344
|
+
throw new Error("[ffi-streaming-runner] aborted before start");
|
|
345
|
+
}
|
|
346
|
+
abortListener = () => {
|
|
347
|
+
this.ffi.llmStreamCancel?.(stream);
|
|
348
|
+
};
|
|
349
|
+
args.signal.addEventListener("abort", abortListener, { once: true });
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
try {
|
|
353
|
+
this.ffi.llmStreamPrefill({ stream, tokens: args.promptTokens });
|
|
354
|
+
if (args.maxTokens <= 0) {
|
|
355
|
+
return;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
const maxTokensPerStep =
|
|
359
|
+
args.maxTokensPerStep !== undefined
|
|
360
|
+
? clampMaxTokensPerStep(args.maxTokensPerStep)
|
|
361
|
+
: resolveMaxTokensPerStep();
|
|
362
|
+
|
|
363
|
+
let tokenIndex = 0;
|
|
364
|
+
while (true) {
|
|
365
|
+
if (args.signal?.aborted) {
|
|
366
|
+
this.ffi.llmStreamCancel?.(stream);
|
|
367
|
+
throw new Error("[ffi-streaming-runner] aborted");
|
|
368
|
+
}
|
|
369
|
+
const step = this.ffi.llmStreamNext({
|
|
370
|
+
stream,
|
|
371
|
+
maxTokensPerStep,
|
|
372
|
+
maxTextBytes: DEFAULT_MAX_TEXT_BYTES,
|
|
373
|
+
});
|
|
374
|
+
onStep(step);
|
|
375
|
+
|
|
376
|
+
if (args.onTextChunk && step.text.length > 0) {
|
|
377
|
+
await args.onTextChunk(step.text);
|
|
378
|
+
}
|
|
379
|
+
if (args.onVerifierEvent) {
|
|
380
|
+
const tokens: TextToken[] = step.tokens.map((id, i) => ({
|
|
381
|
+
index: tokenIndex + i,
|
|
382
|
+
text: i === 0 ? step.text : "",
|
|
383
|
+
id,
|
|
384
|
+
}));
|
|
385
|
+
// The FFI ABI commits accepted tokens per step (the drafter
|
|
386
|
+
// accept/reject decomposition is delivered through the
|
|
387
|
+
// separate `setVerifierCallback` channel — see ffi.h §v2).
|
|
388
|
+
// Surface the batched accept here so HTTP-path callers see a
|
|
389
|
+
// matching event shape.
|
|
390
|
+
if (tokens.length > 0) {
|
|
391
|
+
await args.onVerifierEvent({
|
|
392
|
+
kind: "accept",
|
|
393
|
+
tokens,
|
|
394
|
+
});
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
tokenIndex += step.tokens.length;
|
|
398
|
+
if (step.done) break;
|
|
399
|
+
}
|
|
400
|
+
} finally {
|
|
401
|
+
if (abortListener && args.signal) {
|
|
402
|
+
args.signal.removeEventListener("abort", abortListener);
|
|
403
|
+
}
|
|
404
|
+
this.ffi.llmStreamClose(stream);
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import { fileURLToPath } from "node:url";
|
|
2
|
+
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
3
|
+
import type { BackendPlan } from "./backend";
|
|
4
|
+
import {
|
|
5
|
+
type FfiBackendRuntime,
|
|
6
|
+
type FfiBackendSession,
|
|
7
|
+
FfiStreamingBackend,
|
|
8
|
+
} from "./ffi-streaming-backend";
|
|
9
|
+
|
|
10
|
+
// Hoisted spy shared between the mock factory (hoisted) and the test so we can
|
|
11
|
+
// assert the fused lib's native free was attempted exactly once.
|
|
12
|
+
const ffiCloseMock = vi.hoisted(() =>
|
|
13
|
+
vi.fn(() => {
|
|
14
|
+
throw new Error("ov_free segfault surrogate");
|
|
15
|
+
}),
|
|
16
|
+
);
|
|
17
|
+
|
|
18
|
+
// Replace the fused-lib FFI loader so importing the runtime never pulls bun:ffi
|
|
19
|
+
// or dlopens a native library. `loadElizaInferenceFfi` is the only value the
|
|
20
|
+
// desktop fused runtime imports from the bindings module; the fake exposes the
|
|
21
|
+
// v9 surface the runtime touches during acquire()/release().
|
|
22
|
+
vi.mock("./voice/ffi-bindings", () => ({
|
|
23
|
+
loadElizaInferenceFfi: vi.fn(() => ({
|
|
24
|
+
create: () => 1n,
|
|
25
|
+
destroy: vi.fn(),
|
|
26
|
+
close: ffiCloseMock,
|
|
27
|
+
tokenizeSupported: () => true,
|
|
28
|
+
tokenize: () => new Int32Array(),
|
|
29
|
+
llmStreamSupported: () => true,
|
|
30
|
+
llmStreamOpen: () => 0n,
|
|
31
|
+
llmStreamPrefill: () => 0,
|
|
32
|
+
llmStreamNext: () => 0,
|
|
33
|
+
llmStreamCancel: () => 0,
|
|
34
|
+
llmStreamClose: () => undefined,
|
|
35
|
+
})),
|
|
36
|
+
}));
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Tests for #14: unload() must await the native release BEFORE nulling the
|
|
40
|
+
* session refs, otherwise a throwing release leaves the backend wedged —
|
|
41
|
+
* session === null while the runtime still holds a live session, so the next
|
|
42
|
+
* load() skips unload(), calls acquire(), and acquire()'s live-session guard
|
|
43
|
+
* throws forever.
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
const PLAN: BackendPlan = {
|
|
47
|
+
modelPath: "/fake/model.gguf",
|
|
48
|
+
} as unknown as BackendPlan;
|
|
49
|
+
|
|
50
|
+
function fakeSession(): FfiBackendSession {
|
|
51
|
+
return {
|
|
52
|
+
binding: {} as never,
|
|
53
|
+
ctx: {} as never,
|
|
54
|
+
runner: {} as never,
|
|
55
|
+
tokenize: () => new Int32Array(),
|
|
56
|
+
mtp: null,
|
|
57
|
+
draftModelPath: null,
|
|
58
|
+
mmprojPath: null,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Minimal runtime that mirrors the real acquire/release live-session guard:
|
|
64
|
+
* acquire() throws if a session is already live (exactly like
|
|
65
|
+
* DesktopFusedFfiBackendRuntime). release() can be made to throw to simulate a
|
|
66
|
+
* native bun:ffi free rejecting.
|
|
67
|
+
*/
|
|
68
|
+
class GuardedRuntime implements FfiBackendRuntime {
|
|
69
|
+
private active = false;
|
|
70
|
+
releaseShouldThrow = false;
|
|
71
|
+
releaseCalls = 0;
|
|
72
|
+
|
|
73
|
+
supported(): boolean {
|
|
74
|
+
return true;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
async acquire(): Promise<FfiBackendSession> {
|
|
78
|
+
if (this.active) {
|
|
79
|
+
throw new Error("acquire() called with a live session; release() first");
|
|
80
|
+
}
|
|
81
|
+
this.active = true;
|
|
82
|
+
return fakeSession();
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async release(): Promise<void> {
|
|
86
|
+
this.releaseCalls += 1;
|
|
87
|
+
if (this.releaseShouldThrow) {
|
|
88
|
+
// The runtime still has a live session — a real release that throws
|
|
89
|
+
// mid-free leaves `active` set (the runtime's own finally is what
|
|
90
|
+
// clears it; here we model the throw-before-clear case).
|
|
91
|
+
throw new Error("native free rejected");
|
|
92
|
+
}
|
|
93
|
+
this.active = false;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
describe("FfiStreamingBackend.unload() ordering (#14)", () => {
|
|
98
|
+
it("nulls session refs even when release() throws", async () => {
|
|
99
|
+
const runtime = new GuardedRuntime();
|
|
100
|
+
const backend = new FfiStreamingBackend(runtime);
|
|
101
|
+
await backend.load(PLAN);
|
|
102
|
+
expect(backend.hasLoadedModel()).toBe(true);
|
|
103
|
+
|
|
104
|
+
runtime.releaseShouldThrow = true;
|
|
105
|
+
await expect(backend.unload()).rejects.toThrow("native free rejected");
|
|
106
|
+
|
|
107
|
+
// The finally must have cleared our refs despite the throw, so the
|
|
108
|
+
// backend doesn't report a phantom loaded model.
|
|
109
|
+
expect(backend.hasLoadedModel()).toBe(false);
|
|
110
|
+
expect(backend.currentModelPath()).toBeNull();
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it("awaits release before nulling refs (release observed first)", async () => {
|
|
114
|
+
const order: string[] = [];
|
|
115
|
+
const runtime: FfiBackendRuntime = {
|
|
116
|
+
supported: () => true,
|
|
117
|
+
acquire: async () => fakeSession(),
|
|
118
|
+
release: vi.fn(async () => {
|
|
119
|
+
order.push("release");
|
|
120
|
+
}),
|
|
121
|
+
};
|
|
122
|
+
const backend = new FfiStreamingBackend(runtime);
|
|
123
|
+
await backend.load(PLAN);
|
|
124
|
+
await backend.unload();
|
|
125
|
+
// hasLoadedModel reads session, which is nulled only after release.
|
|
126
|
+
order.push(backend.hasLoadedModel() ? "still-loaded" : "cleared");
|
|
127
|
+
expect(order).toEqual(["release", "cleared"]);
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
describe("DesktopFusedFfiBackendRuntime.release() ordering (#14)", () => {
|
|
132
|
+
beforeEach(() => {
|
|
133
|
+
// resolveFusedLibraryPath() returns the first existing candidate; point it
|
|
134
|
+
// at a real file so acquire() resolves a lib path (the FFI loader itself is
|
|
135
|
+
// mocked, so the path's contents are irrelevant).
|
|
136
|
+
process.env.ELIZA_INFERENCE_LIBRARY = fileURLToPath(import.meta.url);
|
|
137
|
+
ffiCloseMock.mockClear();
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
afterEach(() => {
|
|
141
|
+
process.env.ELIZA_INFERENCE_LIBRARY = undefined;
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it("clears the active session even when the fused close() throws", async () => {
|
|
145
|
+
const { DesktopFusedFfiBackendRuntime } = await import(
|
|
146
|
+
"./desktop-fused-ffi-backend-runtime"
|
|
147
|
+
);
|
|
148
|
+
const runtime = new DesktopFusedFfiBackendRuntime();
|
|
149
|
+
await runtime.acquire(PLAN);
|
|
150
|
+
|
|
151
|
+
// close() throws, but release() must still clear `active` via its finally.
|
|
152
|
+
await expect(runtime.release()).rejects.toThrow(
|
|
153
|
+
"ov_free segfault surrogate",
|
|
154
|
+
);
|
|
155
|
+
expect(ffiCloseMock).toHaveBeenCalledTimes(1);
|
|
156
|
+
|
|
157
|
+
// The runtime is not hidden-wedged on the old live-session guard, but it
|
|
158
|
+
// is explicitly poisoned so a new native model is not allocated over a
|
|
159
|
+
// failed cleanup state.
|
|
160
|
+
await expect(runtime.acquire(PLAN)).rejects.toThrow(/restart required/i);
|
|
161
|
+
// Heavy path (dynamic import + FFI acquire/release/acquire): fast in
|
|
162
|
+
// isolation but CPU-starved under the full 2122-test parallel suite, where
|
|
163
|
+
// it brushed the old 20s ceiling (20012ms). Headroom; a true hang still
|
|
164
|
+
// fails well within this bound.
|
|
165
|
+
}, 45_000);
|
|
166
|
+
});
|