@elizaos/plugin-local-inference 2.0.3-beta.2 → 2.0.3-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -10
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +39647 -0
- package/dist/index.js.map +217 -0
- package/{src → dist}/local-inference-routes.d.ts +9 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts.map +1 -0
- package/{src → dist}/routes/compat-helpers.d.ts +1 -1
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/{src → dist}/routes/index.d.ts +1 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/{src → dist}/routes/live-diarization-route.d.ts +7 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/{src → dist}/routes/transcripts-routes.d.ts +8 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/{src → dist}/runtime/ensure-local-inference-handler.d.ts +8 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/{src → dist}/runtime/index.d.ts +1 -1
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/{src → dist}/runtime/voice-entity-binding.d.ts +10 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/{src → dist}/services/active-model.d.ts +28 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/{src → dist}/services/assignments.d.ts +16 -3
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/{src → dist}/services/backend.d.ts +110 -16
- package/dist/services/backend.d.ts.map +1 -0
- package/{src → dist}/services/bionic-host-loader.d.ts +21 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/{src → dist}/services/desktop-fused-ffi-backend-runtime.d.ts +22 -6
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/{src → dist}/services/device-tier.d.ts +19 -1
- package/dist/services/device-tier.d.ts.map +1 -0
- package/{src → dist}/services/downloader.d.ts +16 -4
- package/dist/services/downloader.d.ts.map +1 -0
- package/{src → dist}/services/engine.d.ts +43 -4
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-backend.d.ts +28 -7
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-runner.d.ts +24 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/{src → dist}/services/imagegen/sd-cpp.d.ts +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/{src → dist}/services/index.d.ts +3 -1
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/{src → dist}/services/manifest/schema.d.ts +196 -6
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/{src → dist}/services/manifest/types.d.ts +3 -1
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/{src → dist}/services/memory-arbiter.d.ts +33 -3
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/{src → dist}/services/memory-monitor.d.ts +6 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/{src → dist}/services/registry.d.ts +11 -13
- package/dist/services/registry.d.ts.map +1 -0
- package/{src → dist}/services/router-handler.d.ts +2 -2
- package/dist/services/router-handler.d.ts.map +1 -0
- package/{src → dist}/services/routing-policy.d.ts +32 -9
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/{src → dist}/services/service.d.ts +1 -1
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/{src → dist}/services/types.d.ts +1 -1
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/{src → dist}/services/vision/index.d.ts +1 -1
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/{src → dist}/services/vision/types.d.ts +13 -4
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/{src → dist}/services/vision-embedding-cache.d.ts +1 -1
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/audio-frame-consumer.d.ts +82 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/{src → dist}/services/voice/eliza1-eot-scorer.d.ts +8 -8
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/{src → dist}/services/voice/embedding.d.ts +2 -3
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/engine-bridge.d.ts +8 -5
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier-ggml.d.ts +22 -22
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier.d.ts +9 -12
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/{src → dist}/services/voice/errors.d.ts +1 -1
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/{src → dist}/services/voice/expressive-tags.d.ts +5 -5
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/{src → dist}/services/voice/ffi-bindings.d.ts +26 -4
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/{src → dist}/services/voice/fused-eot-scorer.d.ts +6 -6
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/{src → dist}/services/voice/index.d.ts +8 -3
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-backend.d.ts +15 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-engine-discovery.d.ts +1 -1
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-ffi-runtime.d.ts +3 -3
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/pick-runtime.d.ts +1 -1
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/{src → dist}/services/voice/mic-source.d.ts +1 -1
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/{src → dist}/services/voice/partial-stabilizer.d.ts +1 -1
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/{src → dist}/services/voice/shared-resources.d.ts +14 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcriber.d.ts +4 -4
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-service.d.ts +20 -1
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-store.d.ts +12 -1
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/{src → dist}/services/voice/types.d.ts +6 -6
- package/dist/services/voice/types.d.ts.map +1 -0
- package/{src → dist}/services/voice/vad.d.ts +6 -5
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/{src → dist}/services/voice/voice-preset-format.d.ts +2 -2
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/{src → dist}/services/voice/wake-word-ggml.d.ts +8 -9
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +28 -9
- package/registry-entry.json +137 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +1 -1
- package/src/adapters/capacitor-llama/index.ts +28 -4
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +2 -2
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +1 -1
- package/src/local-inference-routes.test.ts +57 -11
- package/src/local-inference-routes.ts +90 -8
- package/src/provider.ts +32 -3
- package/src/routes/compat-helpers.ts +2 -1
- package/src/routes/index.ts +1 -0
- package/src/routes/live-diarization-route.test.ts +134 -0
- package/src/routes/live-diarization-route.ts +79 -3
- package/src/routes/local-inference-asr-route.test.ts +43 -2
- package/src/routes/local-inference-asr-route.ts +7 -4
- package/src/routes/local-inference-asr-transcribe.test.ts +4 -4
- package/src/routes/local-inference-asr-transcribe.ts +1 -1
- package/src/routes/local-inference-compat-routes.test.ts +3 -3
- package/src/routes/local-inference-compat-routes.ts +23 -56
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcripts-routes.test.ts +51 -0
- package/src/routes/transcripts-routes.ts +35 -3
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +203 -5
- package/src/runtime/ensure-local-inference-handler.ts +203 -11
- package/src/runtime/index.ts +4 -1
- package/src/runtime/mobile-local-inference-gate.test.ts +85 -2
- package/src/runtime/mobile-local-inference-gate.ts +60 -5
- package/src/runtime/voice-entity-binding.transcript.test.ts +29 -0
- package/src/runtime/voice-entity-binding.ts +46 -6
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +2 -2
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model.ts +211 -8
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +26 -0
- package/src/services/assignments.ts +52 -4
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +198 -19
- package/src/services/bionic-host-loader.test.ts +94 -1
- package/src/services/bionic-host-loader.ts +72 -0
- package/src/services/cache-bridge.test.ts +7 -7
- package/src/services/catalog.test.ts +32 -11
- package/src/services/catalog.ts +6 -0
- package/src/services/cloud-fallback.ts +1 -1
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +99 -7
- package/src/services/device-tier.test.ts +89 -2
- package/src/services/device-tier.ts +103 -11
- package/src/services/downloader.test.ts +199 -58
- package/src/services/downloader.ts +141 -27
- package/src/services/engine-direct-bundle.test.ts +38 -6
- package/src/services/engine.ts +291 -104
- package/src/services/ensure-local-artifacts.ts +1 -1
- package/src/services/ffi-llm-streaming-abi.ts +6 -3
- package/src/services/ffi-streaming-backend.ts +44 -8
- package/src/services/ffi-streaming-runner.test.ts +163 -3
- package/src/services/ffi-streaming-runner.ts +54 -1
- package/src/services/ffi-unload-ordering.test.ts +5 -1
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/hardware.test.ts +7 -2
- package/src/services/hardware.ts +28 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/sd-cpp.ts +6 -9
- package/src/services/index.ts +18 -0
- package/src/services/ios-llama-streaming.ts +1 -1
- package/src/services/kv-spill.ts +6 -5
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +84 -2
- package/src/services/manifest/index.ts +6 -0
- package/src/services/manifest/manifest.test.ts +156 -54
- package/src/services/manifest/schema.ts +160 -52
- package/src/services/manifest/types.ts +6 -0
- package/src/services/manifest/validator.ts +91 -25
- package/src/services/memory-arbiter.test.ts +139 -0
- package/src/services/memory-arbiter.ts +81 -15
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +24 -0
- package/src/services/memory-monitor.ts +12 -0
- package/src/services/mtp-doctor.ts +10 -2
- package/src/services/network-policy.ts +5 -5
- package/src/services/ram-budget-cache.test.ts +2 -1
- package/src/services/ram-budget.ts +0 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/registry.ts +25 -19
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.ts +43 -24
- package/src/services/routing-policy.test.ts +211 -23
- package/src/services/routing-policy.ts +92 -22
- package/src/services/service.test.ts +3 -3
- package/src/services/service.ts +22 -7
- package/src/services/transcription-priority.test.ts +2 -2
- package/src/services/types.ts +4 -0
- package/src/services/verify-on-device.test.ts +2 -2
- package/src/services/vision/hash.ts +1 -1
- package/src/services/vision/index.ts +2 -2
- package/src/services/vision/llama-server.ts +1 -1
- package/src/services/vision/types.ts +13 -4
- package/src/services/vision-embedding-cache.ts +1 -1
- package/src/services/voice/VOICE_WORKBENCH.md +71 -26
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +72 -2
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +29 -29
- package/src/services/voice/__tests__/streaming-asr.test.ts +1 -1
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +6 -8
- package/src/services/voice/audio-frame-consumer.test.ts +327 -1
- package/src/services/voice/audio-frame-consumer.ts +165 -5
- package/src/services/voice/barge-in.ts +2 -3
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +2 -2
- package/src/services/voice/e2e-harness.ts +175 -16
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +22 -22
- package/src/services/voice/embedding.ts +2 -3
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.ts +151 -110
- package/src/services/voice/eot-classifier-ggml.ts +42 -39
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +11 -122
- package/src/services/voice/errors.ts +2 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +8 -8
- package/src/services/voice/ffi-bindings.test.ts +10 -3
- package/src/services/voice/ffi-bindings.ts +177 -15
- package/src/services/voice/fused-eot-scorer.ts +17 -13
- package/src/services/voice/index.ts +33 -12
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +112 -1
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +88 -3
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +37 -201
- package/src/services/voice/kokoro/kokoro-backend.ts +16 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +1 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +3 -3
- package/src/services/voice/kokoro/pick-runtime.ts +1 -1
- package/src/services/voice/kokoro/runtime-selection.ts +28 -201
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +335 -2
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.ts +1 -1
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/partial-stabilizer.ts +1 -1
- package/src/services/voice/pipeline.ts +3 -4
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +23 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +85 -22
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.ts +4 -4
- package/src/services/voice/transcript-service.test.ts +58 -0
- package/src/services/voice/transcript-service.ts +64 -0
- package/src/services/voice/transcript-store.test.ts +36 -0
- package/src/services/voice/transcript-store.ts +32 -0
- package/src/services/voice/types.ts +7 -7
- package/src/services/voice/vad.test.ts +33 -15
- package/src/services/voice/vad.ts +25 -20
- package/src/services/voice/voice-budget.test.ts +0 -3
- package/src/services/voice/voice-budget.ts +6 -6
- package/src/services/voice/voice-duet.test.ts +1 -1
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +17 -4
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +133 -7
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-workbench-report.ts +58 -17
- package/src/services/voice/wake-word-ggml.ts +12 -13
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice-prewarm.ts +1 -1
- package/src/voice-workbench.ts +71 -0
- package/src/actions/generate-media.d.ts.map +0 -1
- package/src/actions/identify-speaker.d.ts.map +0 -1
- package/src/actions/transcription-control.d.ts.map +0 -1
- package/src/index.d.ts.map +0 -1
- package/src/local-inference-routes.d.ts.map +0 -1
- package/src/provider.d.ts.map +0 -1
- package/src/routes/compat-helpers.d.ts.map +0 -1
- package/src/routes/family-member-route.d.ts.map +0 -1
- package/src/routes/index.d.ts.map +0 -1
- package/src/routes/live-diarization-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-transcribe.d.ts.map +0 -1
- package/src/routes/local-inference-compat-routes.d.ts.map +0 -1
- package/src/routes/local-inference-tts-route.d.ts.map +0 -1
- package/src/routes/transcript-audio-store.d.ts.map +0 -1
- package/src/routes/transcripts-routes.d.ts.map +0 -1
- package/src/routes/voice-first-run-routes.d.ts.map +0 -1
- package/src/routes/voice-models-routes.d.ts.map +0 -1
- package/src/routes/voice-profile-plugin-routes.d.ts.map +0 -1
- package/src/routes/voice-profiles-management-routes.d.ts.map +0 -1
- package/src/routes/voice-speaker-profile-routes.d.ts.map +0 -1
- package/src/runtime/embedding-manager-support.d.ts.map +0 -1
- package/src/runtime/embedding-presets.d.ts.map +0 -1
- package/src/runtime/embedding-warmup-policy.d.ts.map +0 -1
- package/src/runtime/ensure-local-inference-handler.d.ts.map +0 -1
- package/src/runtime/index.d.ts.map +0 -1
- package/src/runtime/mobile-local-inference-gate.d.ts +0 -31
- package/src/runtime/mobile-local-inference-gate.d.ts.map +0 -1
- package/src/runtime/voice-entity-binding.d.ts.map +0 -1
- package/src/services/active-model.d.ts.map +0 -1
- package/src/services/assignments.d.ts.map +0 -1
- package/src/services/backend.d.ts.map +0 -1
- package/src/services/bionic-host-loader.d.ts.map +0 -1
- package/src/services/bundled-models.d.ts.map +0 -1
- package/src/services/cache-bridge.d.ts.map +0 -1
- package/src/services/catalog.d.ts +0 -10
- package/src/services/catalog.d.ts.map +0 -1
- package/src/services/checkpoint-client.d.ts.map +0 -1
- package/src/services/cloud-fallback.d.ts.map +0 -1
- package/src/services/conversation-registry.d.ts.map +0 -1
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +0 -1
- package/src/services/device-bridge.d.ts.map +0 -1
- package/src/services/device-resource-metrics.d.ts.map +0 -1
- package/src/services/device-tier.d.ts.map +0 -1
- package/src/services/downloader.d.ts.map +0 -1
- package/src/services/engine.d.ts.map +0 -1
- package/src/services/external-scanner.d.ts.map +0 -1
- package/src/services/ffi-streaming-backend.d.ts.map +0 -1
- package/src/services/ffi-streaming-runner.d.ts.map +0 -1
- package/src/services/gpu-detect.d.ts.map +0 -1
- package/src/services/handler-registry.d.ts.map +0 -1
- package/src/services/hardware.d.ts.map +0 -1
- package/src/services/hf-search.d.ts +0 -26
- package/src/services/hf-search.d.ts.map +0 -1
- package/src/services/hf-search.test.ts +0 -69
- package/src/services/hf-search.ts +0 -420
- package/src/services/image-description-runtime.d.ts.map +0 -1
- package/src/services/imagegen/aosp-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/backend-selector.d.ts.map +0 -1
- package/src/services/imagegen/coreml-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/errors.d.ts.map +0 -1
- package/src/services/imagegen/index.d.ts.map +0 -1
- package/src/services/imagegen/mflux.d.ts.map +0 -1
- package/src/services/imagegen/sd-cpp.d.ts.map +0 -1
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/types.d.ts.map +0 -1
- package/src/services/index.d.ts.map +0 -1
- package/src/services/inference-capabilities.d.ts.map +0 -1
- package/src/services/inference-telemetry.d.ts.map +0 -1
- package/src/services/kv-spill.d.ts.map +0 -1
- package/src/services/latency-trace.d.ts.map +0 -1
- package/src/services/llm-streaming-binding.d.ts.map +0 -1
- package/src/services/load-args.d.ts.map +0 -1
- package/src/services/manifest/index.d.ts +0 -4
- package/src/services/manifest/index.d.ts.map +0 -1
- package/src/services/manifest/schema.d.ts.map +0 -1
- package/src/services/manifest/types.d.ts.map +0 -1
- package/src/services/manifest/validator.d.ts.map +0 -1
- package/src/services/memory-arbiter.d.ts.map +0 -1
- package/src/services/memory-monitor.d.ts.map +0 -1
- package/src/services/memory-pressure.d.ts.map +0 -1
- package/src/services/mtp-doctor.d.ts.map +0 -1
- package/src/services/network-policy.d.ts.map +0 -1
- package/src/services/paths.d.ts.map +0 -1
- package/src/services/planner-skeleton.d.ts.map +0 -1
- package/src/services/providers.d.ts.map +0 -1
- package/src/services/ram-budget.d.ts.map +0 -1
- package/src/services/readiness.d.ts.map +0 -1
- package/src/services/recommendation.d.ts.map +0 -1
- package/src/services/registry.d.ts.map +0 -1
- package/src/services/router-handler.d.ts.map +0 -1
- package/src/services/routing-policy.d.ts.map +0 -1
- package/src/services/routing-preferences.d.ts.map +0 -1
- package/src/services/runtime-target.d.ts.map +0 -1
- package/src/services/service.d.ts.map +0 -1
- package/src/services/session-pool.d.ts.map +0 -1
- package/src/services/structured-output/deterministic-repair.d.ts.map +0 -1
- package/src/services/structured-output.d.ts.map +0 -1
- package/src/services/system-memory.d.ts.map +0 -1
- package/src/services/types.d.ts.map +0 -1
- package/src/services/verify-on-device.d.ts.map +0 -1
- package/src/services/verify.d.ts.map +0 -1
- package/src/services/vision/aosp-unavailable.d.ts.map +0 -1
- package/src/services/vision/capacitor-llama.d.ts.map +0 -1
- package/src/services/vision/cloud-fallback.d.ts.map +0 -1
- package/src/services/vision/hash.d.ts.map +0 -1
- package/src/services/vision/index.d.ts.map +0 -1
- package/src/services/vision/llama-server.d.ts.map +0 -1
- package/src/services/vision/types.d.ts.map +0 -1
- package/src/services/vision/vast-fallback.d.ts.map +0 -1
- package/src/services/vision-embedding-cache.d.ts.map +0 -1
- package/src/services/voice/audio-frame-consumer.d.ts.map +0 -1
- package/src/services/voice/barge-in.d.ts.map +0 -1
- package/src/services/voice/cancellation-coordinator.d.ts.map +0 -1
- package/src/services/voice/checkpoint-manager.d.ts.map +0 -1
- package/src/services/voice/eager-context-builder.d.ts.map +0 -1
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/embedding.d.ts.map +0 -1
- package/src/services/voice/emotion-attribution.d.ts.map +0 -1
- package/src/services/voice/engine-bridge.d.ts.map +0 -1
- package/src/services/voice/eot-classifier-ggml.d.ts.map +0 -1
- package/src/services/voice/eot-classifier.d.ts.map +0 -1
- package/src/services/voice/errors.d.ts.map +0 -1
- package/src/services/voice/expressive-tags.d.ts.map +0 -1
- package/src/services/voice/ffi-bindings.d.ts.map +0 -1
- package/src/services/voice/first-line-cache.d.ts.map +0 -1
- package/src/services/voice/fused-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/index.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/phonemizer.d.ts.map +0 -1
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/runtime-selection.d.ts +0 -92
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +0 -1
- package/src/services/voice/kokoro/types.d.ts.map +0 -1
- package/src/services/voice/kokoro/voice-presets.d.ts.map +0 -1
- package/src/services/voice/kokoro/voices.d.ts.map +0 -1
- package/src/services/voice/lifecycle.d.ts.map +0 -1
- package/src/services/voice/live-diarization-session.d.ts +0 -96
- package/src/services/voice/live-diarization-session.d.ts.map +0 -1
- package/src/services/voice/mic-source.d.ts.map +0 -1
- package/src/services/voice/optimistic-policy.d.ts.map +0 -1
- package/src/services/voice/partial-stabilizer.d.ts.map +0 -1
- package/src/services/voice/phoneme-tokenizer.d.ts.map +0 -1
- package/src/services/voice/phrase-cache.d.ts.map +0 -1
- package/src/services/voice/phrase-chunker.d.ts.map +0 -1
- package/src/services/voice/pipeline-impls.d.ts.map +0 -1
- package/src/services/voice/pipeline.d.ts.map +0 -1
- package/src/services/voice/prefill-client.d.ts.map +0 -1
- package/src/services/voice/prefix-preserving-queue.d.ts.map +0 -1
- package/src/services/voice/profile-store.d.ts.map +0 -1
- package/src/services/voice/ring-buffer.d.ts.map +0 -1
- package/src/services/voice/rollback-queue.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +0 -1
- package/src/services/voice/scheduler.d.ts.map +0 -1
- package/src/services/voice/shared-resources.d.ts.map +0 -1
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder.d.ts.map +0 -1
- package/src/services/voice/speaker-imprint.d.ts.map +0 -1
- package/src/services/voice/speaker-preset-cache.d.ts.map +0 -1
- package/src/services/voice/system-audio-sink.d.ts.map +0 -1
- package/src/services/voice/transcriber.d.ts.map +0 -1
- package/src/services/voice/transcript-knowledge.d.ts.map +0 -1
- package/src/services/voice/transcript-service.d.ts.map +0 -1
- package/src/services/voice/transcript-store.d.ts.map +0 -1
- package/src/services/voice/turn-controller.d.ts.map +0 -1
- package/src/services/voice/types.d.ts.map +0 -1
- package/src/services/voice/vad.d.ts.map +0 -1
- package/src/services/voice/voice-budget.d.ts.map +0 -1
- package/src/services/voice/voice-emotion-classifier.d.ts.map +0 -1
- package/src/services/voice/voice-preset-format.d.ts.map +0 -1
- package/src/services/voice/voice-profile-artifact.d.ts.map +0 -1
- package/src/services/voice/voice-profile-routes.d.ts.map +0 -1
- package/src/services/voice/voice-settings.d.ts +0 -82
- package/src/services/voice/voice-settings.d.ts.map +0 -1
- package/src/services/voice/voice-settings.ts +0 -172
- package/src/services/voice/voice-state-machine.d.ts.map +0 -1
- package/src/services/voice/wake-word-ggml.d.ts.map +0 -1
- package/src/services/voice/wake-word.d.ts.map +0 -1
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +0 -1
- package/src/services/voice-model-updater.d.ts.map +0 -1
- package/src/services/voice-prewarm.d.ts.map +0 -1
- /package/{src → dist}/actions/generate-media.d.ts +0 -0
- /package/{src → dist}/actions/identify-speaker.d.ts +0 -0
- /package/{src → dist}/actions/transcription-control.d.ts +0 -0
- /package/{src → dist}/index.d.ts +0 -0
- /package/{src → dist}/provider.d.ts +0 -0
- /package/{src → dist}/routes/family-member-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-transcribe.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-compat-routes.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-tts-route.d.ts +0 -0
- /package/{src → dist}/routes/transcript-audio-store.d.ts +0 -0
- /package/{src → dist}/routes/voice-first-run-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-models-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profile-plugin-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profiles-management-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-speaker-profile-routes.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-manager-support.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-presets.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-warmup-policy.d.ts +0 -0
- /package/{src → dist}/services/bundled-models.d.ts +0 -0
- /package/{src → dist}/services/cache-bridge.d.ts +0 -0
- /package/{src → dist}/services/checkpoint-client.d.ts +0 -0
- /package/{src → dist}/services/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/conversation-registry.d.ts +0 -0
- /package/{src → dist}/services/device-bridge.d.ts +0 -0
- /package/{src → dist}/services/device-resource-metrics.d.ts +0 -0
- /package/{src → dist}/services/external-scanner.d.ts +0 -0
- /package/{src → dist}/services/gpu-detect.d.ts +0 -0
- /package/{src → dist}/services/handler-registry.d.ts +0 -0
- /package/{src → dist}/services/hardware.d.ts +0 -0
- /package/{src → dist}/services/image-description-runtime.d.ts +0 -0
- /package/{src → dist}/services/imagegen/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/backend-selector.d.ts +0 -0
- /package/{src → dist}/services/imagegen/coreml-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/errors.d.ts +0 -0
- /package/{src → dist}/services/imagegen/index.d.ts +0 -0
- /package/{src → dist}/services/imagegen/mflux.d.ts +0 -0
- /package/{src → dist}/services/imagegen/tensorrt-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/types.d.ts +0 -0
- /package/{src → dist}/services/inference-capabilities.d.ts +0 -0
- /package/{src → dist}/services/inference-telemetry.d.ts +0 -0
- /package/{src → dist}/services/kv-spill.d.ts +0 -0
- /package/{src → dist}/services/latency-trace.d.ts +0 -0
- /package/{src → dist}/services/llm-streaming-binding.d.ts +0 -0
- /package/{src → dist}/services/load-args.d.ts +0 -0
- /package/{src → dist}/services/manifest/validator.d.ts +0 -0
- /package/{src → dist}/services/memory-pressure.d.ts +0 -0
- /package/{src → dist}/services/mtp-doctor.d.ts +0 -0
- /package/{src → dist}/services/network-policy.d.ts +0 -0
- /package/{src → dist}/services/paths.d.ts +0 -0
- /package/{src → dist}/services/planner-skeleton.d.ts +0 -0
- /package/{src → dist}/services/providers.d.ts +0 -0
- /package/{src → dist}/services/ram-budget.d.ts +0 -0
- /package/{src → dist}/services/readiness.d.ts +0 -0
- /package/{src → dist}/services/recommendation.d.ts +0 -0
- /package/{src → dist}/services/routing-preferences.d.ts +0 -0
- /package/{src → dist}/services/runtime-target.d.ts +0 -0
- /package/{src → dist}/services/session-pool.d.ts +0 -0
- /package/{src → dist}/services/structured-output/deterministic-repair.d.ts +0 -0
- /package/{src → dist}/services/structured-output.d.ts +0 -0
- /package/{src → dist}/services/system-memory.d.ts +0 -0
- /package/{src → dist}/services/verify-on-device.d.ts +0 -0
- /package/{src → dist}/services/verify.d.ts +0 -0
- /package/{src → dist}/services/vision/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/vision/capacitor-llama.d.ts +0 -0
- /package/{src → dist}/services/vision/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/vision/hash.d.ts +0 -0
- /package/{src → dist}/services/vision/llama-server.d.ts +0 -0
- /package/{src → dist}/services/vision/vast-fallback.d.ts +0 -0
- /package/{src → dist}/services/voice/barge-in.d.ts +0 -0
- /package/{src → dist}/services/voice/cancellation-coordinator.d.ts +0 -0
- /package/{src → dist}/services/voice/checkpoint-manager.d.ts +0 -0
- /package/{src → dist}/services/voice/eager-context-builder.d.ts +0 -0
- /package/{src → dist}/services/voice/emotion-attribution.d.ts +0 -0
- /package/{src → dist}/services/voice/first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/kokoro-runtime.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/phonemizer.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/types.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voice-presets.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voices.d.ts +0 -0
- /package/{src → dist}/services/voice/lifecycle.d.ts +0 -0
- /package/{src → dist}/services/voice/optimistic-policy.d.ts +0 -0
- /package/{src → dist}/services/voice/phoneme-tokenizer.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-chunker.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline-impls.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/prefill-client.d.ts +0 -0
- /package/{src → dist}/services/voice/prefix-preserving-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/profile-store.d.ts +0 -0
- /package/{src → dist}/services/voice/ring-buffer.d.ts +0 -0
- /package/{src → dist}/services/voice/rollback-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-placeholder.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-regenerator.d.ts +0 -0
- /package/{src → dist}/services/voice/scheduler.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/attribution-pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-ggml.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-imprint.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-preset-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/system-audio-sink.d.ts +0 -0
- /package/{src → dist}/services/voice/transcript-knowledge.d.ts +0 -0
- /package/{src → dist}/services/voice/turn-controller.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-budget.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-emotion-classifier.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-artifact.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-routes.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-state-machine.d.ts +0 -0
- /package/{src → dist}/services/voice/wake-word.d.ts +0 -0
- /package/{src → dist}/services/voice/wrap-with-first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice-model-updater.d.ts +0 -0
- /package/{src → dist}/services/voice-prewarm.d.ts +0 -0
|
@@ -119,7 +119,7 @@ export interface CreateVisionCapabilityRegistrationOptions {
|
|
|
119
119
|
*/
|
|
120
120
|
arbiterCache?: VisionEmbeddingCacheLike;
|
|
121
121
|
loader: VisionDescribeBackendLoader;
|
|
122
|
-
/** Default model family for the cache key. Defaults to `
|
|
122
|
+
/** Default model family for the cache key. Defaults to `gemma-vl`. */
|
|
123
123
|
modelFamily?: string;
|
|
124
124
|
estimatedMb?: number;
|
|
125
125
|
}
|
|
@@ -138,7 +138,7 @@ export function createVisionCapabilityRegistration(
|
|
|
138
138
|
VisionDescribeResult
|
|
139
139
|
> {
|
|
140
140
|
const capability: ArbiterCapability = "vision-describe";
|
|
141
|
-
const family = opts.modelFamily ?? "
|
|
141
|
+
const family = opts.modelFamily ?? "gemma-vl";
|
|
142
142
|
const cache = opts.arbiterCache;
|
|
143
143
|
const loader = opts.loader;
|
|
144
144
|
return {
|
|
@@ -157,7 +157,7 @@ function buildVisionPrompt(userPrompt?: string): string {
|
|
|
157
157
|
// the encoded image tokens. The `N` must match the `image_data[*].id`
|
|
158
158
|
// we send in the body; we use 12 because llama-server's stock
|
|
159
159
|
// example uses small integer ids — any positive integer works.
|
|
160
|
-
return
|
|
160
|
+
return `<start_of_turn>user\n[img-12]\n${ask}<end_of_turn>\n<start_of_turn>model\n`;
|
|
161
161
|
}
|
|
162
162
|
|
|
163
163
|
function shape(
|
|
@@ -46,8 +46,8 @@ export type VisionImageInput =
|
|
|
46
46
|
/**
|
|
47
47
|
* Caller request to `describeImage`. The `modelFamily` distinguishes
|
|
48
48
|
* projected-token cache entries from different VL families that share
|
|
49
|
-
* the same hash space —
|
|
50
|
-
* Florence-2 tokens. Default is `
|
|
49
|
+
* the same hash space — Gemma-VL tokens are not interchangeable with
|
|
50
|
+
* Florence-2 tokens. Default is `gemma-vl` (the WS2 deliverable);
|
|
51
51
|
* each additional family registers under its own identifier.
|
|
52
52
|
*/
|
|
53
53
|
export interface VisionDescribeRequest {
|
|
@@ -56,7 +56,7 @@ export interface VisionDescribeRequest {
|
|
|
56
56
|
/**
|
|
57
57
|
* The model family identifier. Used to namespace the projector cache
|
|
58
58
|
* so swapping the backend's model family invalidates cached tokens.
|
|
59
|
-
* Defaults to `"
|
|
59
|
+
* Defaults to `"gemma-vl"` when omitted.
|
|
60
60
|
*/
|
|
61
61
|
modelFamily?: string;
|
|
62
62
|
/** Max output tokens; defaults to 256 (description-length budget). */
|
|
@@ -64,6 +64,15 @@ export interface VisionDescribeRequest {
|
|
|
64
64
|
/** 0..1, default 0.2 (descriptions should be deterministic-ish). */
|
|
65
65
|
temperature?: number;
|
|
66
66
|
signal?: AbortSignal;
|
|
67
|
+
/**
|
|
68
|
+
* Per-token callback. When set and the backend exposes streaming vision
|
|
69
|
+
* (the fused ABI-v13 path), the description is decoded token-by-token and
|
|
70
|
+
* each piece is delivered here as it generates — the same pipe as chat text.
|
|
71
|
+
* Backends without streaming describe ignore it and return the final result.
|
|
72
|
+
*/
|
|
73
|
+
onTextChunk?: (chunk: string) => void | Promise<void>;
|
|
74
|
+
/** Per-step token cap for streaming describe (smaller = finer-grained UI). */
|
|
75
|
+
maxTokensPerStep?: number;
|
|
67
76
|
}
|
|
68
77
|
|
|
69
78
|
/** Backend response — same shape that ImageDescriptionResult expects. */
|
|
@@ -145,7 +154,7 @@ export interface VisionDescribeBackendOptions {
|
|
|
145
154
|
|
|
146
155
|
/**
|
|
147
156
|
* Capability handler load function. The arbiter calls it with a model
|
|
148
|
-
* key (e.g. `"
|
|
157
|
+
* key (e.g. `"gemma-vl-4b"`); the implementation resolves to a real
|
|
149
158
|
* `(modelPath, mmprojPath)` pair from the catalog + installed registry
|
|
150
159
|
* and returns a live backend.
|
|
151
160
|
*/
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Content-hashed cache for projected vision-language tokens (WS1 deliverable).
|
|
3
3
|
*
|
|
4
|
-
* Vision models in the Eliza-1 stack (
|
|
4
|
+
* Vision models in the Eliza-1 stack (Gemma-VL, Florence-2, Apothic-VL) all
|
|
5
5
|
* go through the same expensive projector step: raw pixel
|
|
6
6
|
* bytes → patch embeddings → projector → tokens that the text decoder
|
|
7
7
|
* actually consumes. When the user pastes the same screenshot three times
|
|
@@ -9,13 +9,35 @@ format, no shared corpus, divergent metric definitions, and a headful surface
|
|
|
9
9
|
that only covered a single-speaker, single-turn round-trip. The Voice Workbench
|
|
10
10
|
unifies them onto **one scenario format, one metric module, and one report**.
|
|
11
11
|
|
|
12
|
+
> **Capability assessment + evidence map** (what is CI-proven vs hardware/
|
|
13
|
+
> credential-gated, mapped to every #8785 AC and the product-owner questions):
|
|
14
|
+
> [research/VOICE_8785_ASSESSMENT.md](./research/VOICE_8785_ASSESSMENT.md).
|
|
15
|
+
> Research evidence base (pause lengths, VAD, AEC, diarization, owner verification,
|
|
16
|
+
> model landscape, latency math): [research/VOICE_PIPELINE_RESEARCH_2026.md](./research/VOICE_PIPELINE_RESEARCH_2026.md).
|
|
17
|
+
|
|
12
18
|
## Status
|
|
13
19
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
20
|
+
The schema, corpus generator (incl. acoustic degradation), metric module,
|
|
21
|
+
headless runner, report, scenario-runner `voice` turn kind, headful scenario
|
|
22
|
+
player, and the `voice:workbench` CLI are all **implemented and unit-tested**.
|
|
23
|
+
The real **acoustic-model** lane (`--real`) is implemented as a provisioned
|
|
24
|
+
hardware lane: it generates distinct human voices with ElevenLabs, synthesizes
|
|
25
|
+
agent echoes with the fused local TTS, and scores the corpus through fused ASR,
|
|
26
|
+
WeSpeaker, pyannote, and the shipped respond/self-voice gate. Missing real
|
|
27
|
+
artifacts are a hard failure in `--real`, not all-skipped evidence.
|
|
28
|
+
|
|
29
|
+
### Execution lanes (`voice:workbench`)
|
|
30
|
+
|
|
31
|
+
| Lane | Services | Proves | CI |
|
|
32
|
+
| --- | --- | --- | --- |
|
|
33
|
+
| `--mock` (default) | `groundTruthMockServices` echoes ground truth | runner → scorers → report wiring | ✅ always |
|
|
34
|
+
| `--logic` | `realDecisionLogicServices` runs the SHIPPED EOT + respond/echo/bystander/wake-word gate + name extraction + owner inference | the **decision logic** (catches a regression the moment it lands) | ✅ always (no models) |
|
|
35
|
+
| `--real` | ElevenLabs human speech + fused local TTS/ASR + WeSpeaker + pyannote | real WER/DER/EOT/respond/self-voice/owner-security measurements | ✅ provisioned nightly/hardware lane |
|
|
36
|
+
|
|
37
|
+
The `--logic` lane is the key anti-hollow guarantee: it does NOT echo the corpus,
|
|
38
|
+
it runs the same gate the UI client ships (`@elizaos/shared/voice/respond-gate`),
|
|
39
|
+
so the workbench genuinely suppresses a bystander, rejects the agent's echoed
|
|
40
|
+
reply, and holds on a mid-utterance pause — asserted by tests, not assumed.
|
|
19
41
|
|
|
20
42
|
### Implemented (this directory, unit-tested, no native artifacts)
|
|
21
43
|
|
|
@@ -25,9 +47,26 @@ synthesized corpus) and are listed under *Remaining* below.
|
|
|
25
47
|
| **Metric module (single source of truth)** | `e2e-harness.ts` | All voice scoring lives here. WER is delegated to `@elizaos/shared/voice-wer` (one definition for headless + headful). Added scorers: `scoreEotDecision` (latency p50/p95 + false-trigger/false-suppression rate), `scoreRespondDecision` (FP/FN split), `scoreDiarization` (DER + confusions/misses), `scoreEntityExtraction` (precision/recall/F1), `scoreVoiceEntityMatch` (recognized-voice→entity accuracy). |
|
|
26
48
|
| **Benchmark report** | `voice-workbench-report.ts` | `buildVoiceWorkbenchReport` rolls a matrix of per-scenario scorer results into one gating report (per-metric mean/worst + percentiles, per-scenario verdict). `formatVoiceWorkbenchMarkdown` renders it; `regressionsAgainstBaseline` flags metrics that worsened past a tolerance. |
|
|
27
49
|
| **WER consolidation** | `@elizaos/shared/voice-wer` | The previously-duplicated `wordErrorRate` (`e2e-harness.ts` **and** `voice-selftest-harness.ts`, with subtly different normalization) is now defined once — Unicode-aware, contraction-preserving — and imported by both. |
|
|
50
|
+
| **Acoustic robustness corpus** | `corpus-augment.ts` | Seeded, deterministic degradation DSP: additive room noise (white/pink at a target SNR), Freeverb reverb, far-field attenuation, telephone/low-quality line (band-limit + µ-law), and competing background talkers. Wired into the corpus generator via a per-turn / per-scenario `environment` so a clean scenario and a noisy one share one schema. |
|
|
51
|
+
| **Real-decision-logic adapter** | `workbench-logic-services.ts` | Runs the SHIPPED EOT + respond/echo/bystander/wake-word gate + name extraction over the corpus (no models). The `--logic` lane. |
|
|
52
|
+
| **Real acoustic adapter** | `workbench-real-services.ts` | The `--real` lane: ElevenLabs-generated human speech, fused local agent TTS, fused ASR, WeSpeaker speaker centroids, pyannote speech/overlap labels, live `selfVoiceSimilarity`, owner inference, and the same respond gate. |
|
|
53
|
+
| **Respond/echo gate (single source)** | `@elizaos/shared/voice/respond-gate` | `shouldRespondToVoiceTurn` + `buildVoiceTurnSignal`, promoted out of the UI so the client and the workbench share one definition. The UI re-exports it. |
|
|
54
|
+
| **Owner inference** | `@elizaos/shared/voice/owner-inference` | `resolveOwnerCandidate` — proposes the owner from who speaks most/most-confidently, only when sufficient AND unambiguous, else UNDECIDED. The logic an owner-detection provider/evaluator runs when no owner is enrolled. |
|
|
55
|
+
| **Echo + owner scorers** | `e2e-harness.ts` | `scoreEchoRejection` (agent-echo turns correctly suppressed) and `scoreOwnerSecurity` (owner-vs-intruder accuracy + impostor-accept rate). |
|
|
28
56
|
|
|
29
57
|
Tests: `voice-workbench.test.ts`, `voice-workbench-report.test.ts`,
|
|
30
|
-
`e2e-harness.test.ts
|
|
58
|
+
`e2e-harness.test.ts`, `corpus-augment.test.ts`,
|
|
59
|
+
`workbench-logic-services.test.ts`, `corpus-generator.test.ts`, and (in shared)
|
|
60
|
+
`voice/owner-inference.test.ts`.
|
|
61
|
+
|
|
62
|
+
### Scenario classes
|
|
63
|
+
|
|
64
|
+
`multi-voice`, `pauses`, `respond-no-respond`, `multi-speaker`, `diarization`,
|
|
65
|
+
`entity-extraction`, `voice-recognition`, `eot`, `transcription-mode`,
|
|
66
|
+
`multi-agent-room`, `long-form-monologue`, **`robustness`** (noise / reverb /
|
|
67
|
+
far-field / low-quality), **`echo-rejection`** (agent self-voice), **`owner-security`**
|
|
68
|
+
(owner vs intruder), **`overlapping-speech`** (interrupting talkers). The 12
|
|
69
|
+
built-in scenarios in `workbench-scenarios.ts` span every class.
|
|
31
70
|
|
|
32
71
|
### Honesty contract
|
|
33
72
|
|
|
@@ -63,26 +102,32 @@ The workbench is the convergence point for these previously-disjoint harnesses:
|
|
|
63
102
|
| `e2e-harness.ts:wordErrorRate` + `voice-selftest-harness.ts:wordErrorRate` | **Done** — one `@elizaos/shared/voice-wer`. |
|
|
64
103
|
| Pure scoring lib (`e2e-harness.ts`) | **Promoted** to the single metric module (EOT/diarization/respond/entity scorers added). |
|
|
65
104
|
| `packages/app-core/scripts/voice-duet.mjs` (`voice:duet`), `voice-e2e-hardware.ts`, `voice-vad-smoke.ts`, `voice-attribution-smoke.ts`, `lib/duet-bridge.mjs` | Feed measurements into the shared scorers + report (planned absorb). |
|
|
66
|
-
| `packages/benchmarks/voice/three-voice-scenario.mjs
|
|
105
|
+
| `packages/benchmarks/voice/three-voice-scenario.mjs` | Corpus-generation precedent the `VoiceScenario` corpus generator extends (planned). |
|
|
67
106
|
| `packages/benchmarks/voicebench/` (TS latency p95/p99) | The report layer mirrors its p95/p99 shape; remains a research bench linked from the workbench. |
|
|
68
107
|
| Per-spec inline `tinyWav()` fixtures (`packages/app/test/ui-smoke/voice-*.spec.ts`) | Replaced by the versioned corpus (planned). |
|
|
69
108
|
|
|
70
|
-
##
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
- **
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
- **Headful
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
-
|
|
87
|
-
|
|
88
|
-
|
|
109
|
+
## External / Device Follow-Ups
|
|
110
|
+
|
|
111
|
+
Full detail + why in
|
|
112
|
+
[research/VOICE_8785_ASSESSMENT.md §5](./research/VOICE_8785_ASSESSMENT.md).
|
|
113
|
+
|
|
114
|
+
- **Live cloud STT/TTS round-trip** — ElevenLabs via `/api/v1/voice/*`; needs an
|
|
115
|
+
authenticated Cloud session (the test account returns HTTP 402 — a billing
|
|
116
|
+
state, not a code bug).
|
|
117
|
+
- **PCM-level AEC** — still a product/runtime feature beyond the workbench
|
|
118
|
+
scorer: it needs a time-aligned playback reference and cancellation path in
|
|
119
|
+
the live audio transport, then the workbench can score the resulting echo
|
|
120
|
+
corpus.
|
|
121
|
+
- **Headful real-backend + recorded A/V** — the 10 `voice-workbench-*.spec.ts`
|
|
122
|
+
run with mocked backends; a real-backend headful lane with audio+video capture
|
|
123
|
+
needs a provisioned local backend on the CI host.
|
|
124
|
+
- **iOS device** — blocked on Apple ID provisioning; simulator local-inference is
|
|
125
|
+
Metal-limited.
|
|
126
|
+
|
|
127
|
+
## Open follow-up: PCM-level acoustic echo cancellation
|
|
128
|
+
|
|
129
|
+
Self-echo is caught at the transcript level only (word overlap). The recommended
|
|
130
|
+
next step is an `agentSpeaking` flag + ~1.5 s post-TTS cooldown (cheap, robust),
|
|
131
|
+
then WebRTC AEC3 with a time-aligned reference, then speaker-embedding self-voice
|
|
132
|
+
rejection. The `scoreEchoRejection` scorer is ready to gate it. See
|
|
133
|
+
[research/VOICE_8785_ASSESSMENT.md §6](./research/VOICE_8785_ASSESSMENT.md).
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schemaVersion": 1,
|
|
3
|
+
"overall": "pass",
|
|
4
|
+
"scenariosTotal": 14,
|
|
5
|
+
"scenariosRan": 14,
|
|
6
|
+
"scenariosSkipped": 0,
|
|
7
|
+
"scenarios": [
|
|
8
|
+
{
|
|
9
|
+
"scenarioId": "multi-voice-greeting",
|
|
10
|
+
"classes": ["multi-voice", "diarization"],
|
|
11
|
+
"status": "ran",
|
|
12
|
+
"verdict": "pass",
|
|
13
|
+
"caseCount": 8,
|
|
14
|
+
"failedCaseKinds": []
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"scenarioId": "respond-vs-bystander",
|
|
18
|
+
"classes": ["respond-no-respond", "multi-speaker"],
|
|
19
|
+
"status": "ran",
|
|
20
|
+
"verdict": "pass",
|
|
21
|
+
"caseCount": 9,
|
|
22
|
+
"failedCaseKinds": []
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"scenarioId": "pauses-midutterance",
|
|
26
|
+
"classes": ["pauses", "eot"],
|
|
27
|
+
"status": "ran",
|
|
28
|
+
"verdict": "pass",
|
|
29
|
+
"caseCount": 7,
|
|
30
|
+
"failedCaseKinds": []
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"scenarioId": "entity-from-speech",
|
|
34
|
+
"classes": ["entity-extraction", "voice-recognition"],
|
|
35
|
+
"status": "ran",
|
|
36
|
+
"verdict": "pass",
|
|
37
|
+
"caseCount": 7,
|
|
38
|
+
"failedCaseKinds": []
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"scenarioId": "transcription-mode-dictation",
|
|
42
|
+
"classes": ["transcription-mode", "long-form-monologue"],
|
|
43
|
+
"status": "ran",
|
|
44
|
+
"verdict": "pass",
|
|
45
|
+
"caseCount": 5,
|
|
46
|
+
"failedCaseKinds": []
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"scenarioId": "multi-agent-room-address",
|
|
50
|
+
"classes": ["multi-agent-room", "respond-no-respond"],
|
|
51
|
+
"status": "ran",
|
|
52
|
+
"verdict": "pass",
|
|
53
|
+
"caseCount": 8,
|
|
54
|
+
"failedCaseKinds": []
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"scenarioId": "noisy-room-commands",
|
|
58
|
+
"classes": ["robustness", "respond-no-respond"],
|
|
59
|
+
"status": "ran",
|
|
60
|
+
"verdict": "pass",
|
|
61
|
+
"caseCount": 8,
|
|
62
|
+
"failedCaseKinds": []
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"scenarioId": "far-field-reverb",
|
|
66
|
+
"classes": ["robustness", "respond-no-respond"],
|
|
67
|
+
"status": "ran",
|
|
68
|
+
"verdict": "pass",
|
|
69
|
+
"caseCount": 6,
|
|
70
|
+
"failedCaseKinds": []
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"scenarioId": "background-talkers",
|
|
74
|
+
"classes": ["robustness", "overlapping-speech", "multi-speaker"],
|
|
75
|
+
"status": "ran",
|
|
76
|
+
"verdict": "pass",
|
|
77
|
+
"caseCount": 6,
|
|
78
|
+
"failedCaseKinds": []
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"scenarioId": "echo-self-trigger",
|
|
82
|
+
"classes": ["echo-rejection", "respond-no-respond"],
|
|
83
|
+
"status": "ran",
|
|
84
|
+
"verdict": "pass",
|
|
85
|
+
"caseCount": 10,
|
|
86
|
+
"failedCaseKinds": []
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"scenarioId": "multi-speaker-name-capture",
|
|
90
|
+
"classes": [
|
|
91
|
+
"diarization",
|
|
92
|
+
"entity-extraction",
|
|
93
|
+
"multi-speaker",
|
|
94
|
+
"voice-recognition"
|
|
95
|
+
],
|
|
96
|
+
"status": "ran",
|
|
97
|
+
"verdict": "pass",
|
|
98
|
+
"caseCount": 13,
|
|
99
|
+
"failedCaseKinds": []
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"scenarioId": "echo-mistranscribed",
|
|
103
|
+
"classes": ["echo-rejection"],
|
|
104
|
+
"status": "ran",
|
|
105
|
+
"verdict": "pass",
|
|
106
|
+
"caseCount": 8,
|
|
107
|
+
"failedCaseKinds": []
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"scenarioId": "owner-enrollment-inference",
|
|
111
|
+
"classes": ["owner-security", "voice-recognition"],
|
|
112
|
+
"status": "ran",
|
|
113
|
+
"verdict": "pass",
|
|
114
|
+
"caseCount": 15,
|
|
115
|
+
"failedCaseKinds": []
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
"scenarioId": "owner-vs-intruder",
|
|
119
|
+
"classes": ["owner-security", "respond-no-respond", "multi-speaker"],
|
|
120
|
+
"status": "ran",
|
|
121
|
+
"verdict": "pass",
|
|
122
|
+
"caseCount": 10,
|
|
123
|
+
"failedCaseKinds": []
|
|
124
|
+
}
|
|
125
|
+
],
|
|
126
|
+
"metrics": {
|
|
127
|
+
"wer": {
|
|
128
|
+
"count": 32,
|
|
129
|
+
"mean": 0,
|
|
130
|
+
"worst": 0
|
|
131
|
+
},
|
|
132
|
+
"eotFalseTriggerRate": {
|
|
133
|
+
"count": 14,
|
|
134
|
+
"mean": 0,
|
|
135
|
+
"worst": 0
|
|
136
|
+
},
|
|
137
|
+
"eotLatencyP50Ms": null,
|
|
138
|
+
"eotLatencyP95Ms": null,
|
|
139
|
+
"der": {
|
|
140
|
+
"count": 14,
|
|
141
|
+
"mean": 0,
|
|
142
|
+
"worst": 0
|
|
143
|
+
},
|
|
144
|
+
"respondAccuracy": {
|
|
145
|
+
"count": 14,
|
|
146
|
+
"mean": 1,
|
|
147
|
+
"worst": 1
|
|
148
|
+
},
|
|
149
|
+
"entityF1": {
|
|
150
|
+
"count": 2,
|
|
151
|
+
"mean": 1,
|
|
152
|
+
"worst": 1
|
|
153
|
+
},
|
|
154
|
+
"voiceEntityMatchRate": {
|
|
155
|
+
"count": 14,
|
|
156
|
+
"mean": 1,
|
|
157
|
+
"worst": 1
|
|
158
|
+
},
|
|
159
|
+
"firstAudioMs": {
|
|
160
|
+
"count": 26,
|
|
161
|
+
"mean": 250,
|
|
162
|
+
"worst": 250
|
|
163
|
+
},
|
|
164
|
+
"echoRejectionRate": {
|
|
165
|
+
"count": 2,
|
|
166
|
+
"mean": 1,
|
|
167
|
+
"worst": 1
|
|
168
|
+
},
|
|
169
|
+
"ownerAccuracy": {
|
|
170
|
+
"count": 2,
|
|
171
|
+
"mean": 1,
|
|
172
|
+
"worst": 1
|
|
173
|
+
},
|
|
174
|
+
"impostorAcceptRate": {
|
|
175
|
+
"count": 2,
|
|
176
|
+
"mean": 0,
|
|
177
|
+
"worst": 0
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
@@ -21,6 +21,21 @@ export interface SpeechFixtureOptions {
|
|
|
21
21
|
tailSilenceSec?: number;
|
|
22
22
|
/** Deterministic seed for the f0 jitter. */
|
|
23
23
|
seed?: number;
|
|
24
|
+
/**
|
|
25
|
+
* Per-speaker voice colour. Two speakers with different timbres have
|
|
26
|
+
* measurably different spectral envelopes, so an acoustic diarizer can tell
|
|
27
|
+
* them apart from the audio alone. Omit for the default (shared) voice — the
|
|
28
|
+
* VAD/wake-word smoke fixtures don't care who is speaking.
|
|
29
|
+
*/
|
|
30
|
+
timbre?: SpeakerTimbre;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** A speaker's voice colour: fundamental frequency + vocal-tract formants. */
|
|
34
|
+
export interface SpeakerTimbre {
|
|
35
|
+
/** Base fundamental frequency (Hz) — speaker pitch. */
|
|
36
|
+
f0Hz: number;
|
|
37
|
+
/** Three `[centerHz, bandwidthHz]` formants — the vocal-tract resonances. */
|
|
38
|
+
formants: ReadonlyArray<readonly [number, number]>;
|
|
24
39
|
}
|
|
25
40
|
|
|
26
41
|
export interface SpeechFixture {
|
|
@@ -85,6 +100,57 @@ const DEFAULT_FORMANTS: ReadonlyArray<readonly [number, number]> = [
|
|
|
85
100
|
[2600, 120],
|
|
86
101
|
];
|
|
87
102
|
|
|
103
|
+
/** The shared (speaker-agnostic) voice used when no `timbre` is supplied. */
|
|
104
|
+
export const DEFAULT_SPEAKER_TIMBRE: SpeakerTimbre = {
|
|
105
|
+
f0Hz: 110,
|
|
106
|
+
formants: DEFAULT_FORMANTS,
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Deterministic, distinct voice colour for participant `index` of `count`
|
|
111
|
+
* speakers in a scenario. The speakers are spread EVENLY across a wide
|
|
112
|
+
* vocal-tract-length (formant-scaling) and pitch range, so every pair in a
|
|
113
|
+
* scenario is acoustically far apart — a blind acoustic diarizer can split them
|
|
114
|
+
* from the audio alone — while one participant always gets one timbre, so the
|
|
115
|
+
* same speaker clusters together. Spreading by position (not by a label hash)
|
|
116
|
+
* guarantees the separation; two labels could otherwise hash to near-identical
|
|
117
|
+
* voices and merge.
|
|
118
|
+
*/
|
|
119
|
+
export function speakerTimbreForIndex(
|
|
120
|
+
index: number,
|
|
121
|
+
count: number,
|
|
122
|
+
): SpeakerTimbre {
|
|
123
|
+
const frac = count <= 1 ? 0.5 : index / (count - 1); // 0..1
|
|
124
|
+
// Vocal-tract scaling 0.72..1.32 (shorter tract → higher formants).
|
|
125
|
+
const formantScale = 0.72 + frac * 0.6;
|
|
126
|
+
// Alternate the second formant up/down so even adjacent slots differ in
|
|
127
|
+
// formant PATTERN (F2 is the most speaker-discriminative resonance), not just
|
|
128
|
+
// a global shift.
|
|
129
|
+
const f2Bias = index % 2 === 0 ? 1.06 : 0.94;
|
|
130
|
+
const formants = DEFAULT_FORMANTS.map(([fc, bw], i) => {
|
|
131
|
+
const bias = i === 1 ? f2Bias : 1;
|
|
132
|
+
return [fc * formantScale * bias, bw] as const;
|
|
133
|
+
});
|
|
134
|
+
// Pitch 98..202 Hz.
|
|
135
|
+
const f0Hz = 98 + frac * 104;
|
|
136
|
+
return { f0Hz, formants };
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* The agent's own synthetic TTS voice — a fixed timbre, deliberately placed
|
|
141
|
+
* outside the speaker-seed range so it is acoustically distinct from every
|
|
142
|
+
* scenario participant. The corpus synthesizes agent-echo turns with this voice,
|
|
143
|
+
* and the acoustic self-voice gate enrolls it as the agent's imprint.
|
|
144
|
+
*/
|
|
145
|
+
export const AGENT_VOICE_TIMBRE: SpeakerTimbre = {
|
|
146
|
+
f0Hz: 250,
|
|
147
|
+
formants: [
|
|
148
|
+
[1100, 90],
|
|
149
|
+
[2400, 110],
|
|
150
|
+
[3800, 150],
|
|
151
|
+
],
|
|
152
|
+
};
|
|
153
|
+
|
|
88
154
|
/** Build a `silence + synthesized speech + silence` PCM buffer. */
|
|
89
155
|
export function makeSpeechWithSilenceFixture(
|
|
90
156
|
opts: SpeechFixtureOptions = {},
|
|
@@ -100,12 +166,16 @@ export function makeSpeechWithSilenceFixture(
|
|
|
100
166
|
const speechEndSample = Math.floor((leadSec + speechSec) * sampleRate);
|
|
101
167
|
|
|
102
168
|
const rng = mulberry32(opts.seed ?? 0xe11a);
|
|
103
|
-
const
|
|
169
|
+
const timbre = opts.timbre ?? DEFAULT_SPEAKER_TIMBRE;
|
|
170
|
+
const bank = new FormantBank(sampleRate, timbre.formants);
|
|
104
171
|
let phase = 0;
|
|
105
172
|
for (let i = speechStartSample; i < speechEndSample; i++) {
|
|
106
173
|
const tInSpeech = (i - speechStartSample) / sampleRate;
|
|
174
|
+
// Syllable-rate vibrato proportional to the speaker's base pitch (the
|
|
175
|
+
// original shared voice swung 30 Hz around 110 Hz ≈ ±27%).
|
|
107
176
|
const f0 =
|
|
108
|
-
|
|
177
|
+
timbre.f0Hz * (1 + 0.27 * Math.sin(2 * Math.PI * 5 * tInSpeech)) +
|
|
178
|
+
(rng() - 0.5) * 4;
|
|
109
179
|
phase += f0 / sampleRate;
|
|
110
180
|
let excitation = 0;
|
|
111
181
|
if (phase >= 1) {
|
|
@@ -19,11 +19,11 @@ import {
|
|
|
19
19
|
type Eliza1EotScorerOptions,
|
|
20
20
|
} from "../eot-classifier";
|
|
21
21
|
|
|
22
|
-
const
|
|
22
|
+
const END_OF_TURN_ID = 199;
|
|
23
23
|
|
|
24
24
|
/**
|
|
25
25
|
* Minimal fake llama model the scorer can drive. The `score()` parameter
|
|
26
|
-
* is the probability we want the fake model to return for
|
|
26
|
+
* is the probability we want the fake model to return for `<end_of_turn>`
|
|
27
27
|
* on the next token. Token IDs are derived from char codes so two calls
|
|
28
28
|
* with different prompts produce different token sequences.
|
|
29
29
|
*/
|
|
@@ -36,7 +36,7 @@ interface FakeModelHandle {
|
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
function buildFakeModel(opts: {
|
|
39
|
-
|
|
39
|
+
endOfTurnProbability: () => number;
|
|
40
40
|
disposeSpy?: () => void;
|
|
41
41
|
}): FakeModelHandle {
|
|
42
42
|
const tokenizeCalls: string[] = [];
|
|
@@ -55,11 +55,11 @@ function buildFakeModel(opts: {
|
|
|
55
55
|
i === input.length - 1
|
|
56
56
|
? {
|
|
57
57
|
next: {
|
|
58
|
-
token:
|
|
59
|
-
confidence: opts.
|
|
58
|
+
token: END_OF_TURN_ID,
|
|
59
|
+
confidence: opts.endOfTurnProbability(),
|
|
60
60
|
probabilities: new Map<number, number>([
|
|
61
|
-
[
|
|
62
|
-
[42, 1 - opts.
|
|
61
|
+
[END_OF_TURN_ID, opts.endOfTurnProbability()],
|
|
62
|
+
[42, 1 - opts.endOfTurnProbability()],
|
|
63
63
|
]),
|
|
64
64
|
},
|
|
65
65
|
}
|
|
@@ -79,8 +79,8 @@ function buildFakeModel(opts: {
|
|
|
79
79
|
const model: LlamaModelLike = {
|
|
80
80
|
tokenize(text: string, specialTokens?: boolean) {
|
|
81
81
|
tokenizeCalls.push(text);
|
|
82
|
-
if (text === "
|
|
83
|
-
return specialTokens ? [
|
|
82
|
+
if (text === "<end_of_turn>")
|
|
83
|
+
return specialTokens ? [END_OF_TURN_ID] : [101, 102];
|
|
84
84
|
return Array.from(text).map((c) => c.charCodeAt(0));
|
|
85
85
|
},
|
|
86
86
|
async createContext(args) {
|
|
@@ -99,33 +99,33 @@ function buildFakeModel(opts: {
|
|
|
99
99
|
}
|
|
100
100
|
|
|
101
101
|
describe("formatEotPrompt", () => {
|
|
102
|
-
it("renders a single-user
|
|
102
|
+
it("renders a single-user Gemma turn with the end_of_turn stripped", () => {
|
|
103
103
|
const prompt = formatEotPrompt("hello world");
|
|
104
|
-
expect(prompt).toBe("
|
|
105
|
-
expect(prompt).not.toContain("
|
|
104
|
+
expect(prompt).toBe("<start_of_turn>user\nhello world");
|
|
105
|
+
expect(prompt).not.toContain("<end_of_turn>");
|
|
106
106
|
});
|
|
107
107
|
|
|
108
108
|
it("trims whitespace so leading/trailing space does not affect scoring", () => {
|
|
109
|
-
expect(formatEotPrompt(" hi ")).toBe("
|
|
109
|
+
expect(formatEotPrompt(" hi ")).toBe("<start_of_turn>user\nhi");
|
|
110
110
|
});
|
|
111
111
|
});
|
|
112
112
|
|
|
113
113
|
describe("Eliza1EotScorer", () => {
|
|
114
|
-
it("returns P(
|
|
115
|
-
const fake = buildFakeModel({
|
|
114
|
+
it("returns P(<end_of_turn>) reported by the model on the last token", async () => {
|
|
115
|
+
const fake = buildFakeModel({ endOfTurnProbability: () => 0.83 });
|
|
116
116
|
const scorer = new Eliza1EotScorer({ model: fake.model });
|
|
117
117
|
const result = await scorer.score("hello world.");
|
|
118
118
|
expect(result.probability).toBeCloseTo(0.83, 5);
|
|
119
119
|
expect(result.promptTokens).toBeGreaterThan(0);
|
|
120
|
-
//
|
|
121
|
-
expect(fake.tokenizeCalls[0]).toBe("
|
|
120
|
+
// `<end_of_turn>` resolution happens once during initialization.
|
|
121
|
+
expect(fake.tokenizeCalls[0]).toBe("<end_of_turn>");
|
|
122
122
|
});
|
|
123
123
|
|
|
124
124
|
it("falls back to 0.5 when the probabilities map is missing", async () => {
|
|
125
125
|
const fake = {
|
|
126
126
|
model: {
|
|
127
127
|
tokenize(text: string) {
|
|
128
|
-
return text === "
|
|
128
|
+
return text === "<end_of_turn>" ? [END_OF_TURN_ID] : [1, 2, 3];
|
|
129
129
|
},
|
|
130
130
|
async createContext() {
|
|
131
131
|
return {
|
|
@@ -148,14 +148,14 @@ describe("Eliza1EotScorer", () => {
|
|
|
148
148
|
});
|
|
149
149
|
|
|
150
150
|
it("uses the model score for empty transcript input", async () => {
|
|
151
|
-
const fake = buildFakeModel({
|
|
151
|
+
const fake = buildFakeModel({ endOfTurnProbability: () => 0.9 });
|
|
152
152
|
const scorer = new Eliza1EotScorer({ model: fake.model });
|
|
153
153
|
const result = await scorer.score(" ");
|
|
154
154
|
expect(result.probability).toBe(0.9);
|
|
155
155
|
});
|
|
156
156
|
|
|
157
157
|
it("attaches a LoRA adapter to the context when loraPath is set", async () => {
|
|
158
|
-
const fake = buildFakeModel({
|
|
158
|
+
const fake = buildFakeModel({ endOfTurnProbability: () => 0.5 });
|
|
159
159
|
const scorer = new Eliza1EotScorer({
|
|
160
160
|
model: fake.model,
|
|
161
161
|
loraPath: "/tmp/fake-eot.gguf",
|
|
@@ -173,7 +173,7 @@ describe("Eliza1EotScorer", () => {
|
|
|
173
173
|
});
|
|
174
174
|
|
|
175
175
|
it("truncates the prompt to maxHistoryTokens", async () => {
|
|
176
|
-
const fake = buildFakeModel({
|
|
176
|
+
const fake = buildFakeModel({ endOfTurnProbability: () => 0.5 });
|
|
177
177
|
const scorer = new Eliza1EotScorer({
|
|
178
178
|
model: fake.model,
|
|
179
179
|
maxHistoryTokens: 5,
|
|
@@ -184,12 +184,12 @@ describe("Eliza1EotScorer", () => {
|
|
|
184
184
|
expect(fake.controlledEvaluateCalls[0]).toHaveLength(5);
|
|
185
185
|
});
|
|
186
186
|
|
|
187
|
-
it("throws a descriptive error when the tokenizer does not resolve
|
|
187
|
+
it("throws a descriptive error when the tokenizer does not resolve end_of_turn to a single id", async () => {
|
|
188
188
|
const fake: LlamaModelLike = {
|
|
189
189
|
tokenize(text: string) {
|
|
190
|
-
// Simulate a non-
|
|
190
|
+
// Simulate a non-Gemma model where <end_of_turn> tokenizes to plain
|
|
191
191
|
// text (multiple ids).
|
|
192
|
-
if (text === "
|
|
192
|
+
if (text === "<end_of_turn>") return [10, 11, 12];
|
|
193
193
|
return [1, 2, 3];
|
|
194
194
|
},
|
|
195
195
|
async createContext() {
|
|
@@ -197,13 +197,13 @@ describe("Eliza1EotScorer", () => {
|
|
|
197
197
|
},
|
|
198
198
|
};
|
|
199
199
|
const scorer = new Eliza1EotScorer({ model: fake });
|
|
200
|
-
await expect(scorer.score("x")).rejects.toThrow(
|
|
200
|
+
await expect(scorer.score("x")).rejects.toThrow(/<end_of_turn>/);
|
|
201
201
|
});
|
|
202
202
|
|
|
203
203
|
it("disposes the context on dispose()", async () => {
|
|
204
204
|
const disposeSpy = vi.fn();
|
|
205
205
|
const fake = buildFakeModel({
|
|
206
|
-
|
|
206
|
+
endOfTurnProbability: () => 0.5,
|
|
207
207
|
disposeSpy,
|
|
208
208
|
});
|
|
209
209
|
const scorer = new Eliza1EotScorer({ model: fake.model });
|
|
@@ -217,7 +217,7 @@ describe("Eliza1EotScorer", () => {
|
|
|
217
217
|
let maxInflight = 0;
|
|
218
218
|
const fake: LlamaModelLike = {
|
|
219
219
|
tokenize(text: string) {
|
|
220
|
-
if (text === "
|
|
220
|
+
if (text === "<end_of_turn>") return [END_OF_TURN_ID];
|
|
221
221
|
return [1, 2, 3];
|
|
222
222
|
},
|
|
223
223
|
async createContext() {
|
|
@@ -234,7 +234,7 @@ describe("Eliza1EotScorer", () => {
|
|
|
234
234
|
? {
|
|
235
235
|
next: {
|
|
236
236
|
probabilities: new Map<number, number>([
|
|
237
|
-
[
|
|
237
|
+
[END_OF_TURN_ID, 0.6],
|
|
238
238
|
]),
|
|
239
239
|
},
|
|
240
240
|
}
|
|
@@ -258,7 +258,7 @@ describe("Eliza1EotScorer", () => {
|
|
|
258
258
|
|
|
259
259
|
describe("Eliza1EotClassifier", () => {
|
|
260
260
|
function buildOpts(probability: number): Eliza1EotScorerOptions {
|
|
261
|
-
const fake = buildFakeModel({
|
|
261
|
+
const fake = buildFakeModel({ endOfTurnProbability: () => probability });
|
|
262
262
|
return { model: fake.model };
|
|
263
263
|
}
|
|
264
264
|
|