@elizaos/plugin-local-inference 2.0.3-beta.2 → 2.0.3-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -10
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +39647 -0
- package/dist/index.js.map +217 -0
- package/{src → dist}/local-inference-routes.d.ts +9 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts.map +1 -0
- package/{src → dist}/routes/compat-helpers.d.ts +1 -1
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/{src → dist}/routes/index.d.ts +1 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/{src → dist}/routes/live-diarization-route.d.ts +7 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/{src → dist}/routes/transcripts-routes.d.ts +8 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/{src → dist}/runtime/ensure-local-inference-handler.d.ts +8 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/{src → dist}/runtime/index.d.ts +1 -1
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/{src → dist}/runtime/voice-entity-binding.d.ts +10 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/{src → dist}/services/active-model.d.ts +28 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/{src → dist}/services/assignments.d.ts +16 -3
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/{src → dist}/services/backend.d.ts +110 -16
- package/dist/services/backend.d.ts.map +1 -0
- package/{src → dist}/services/bionic-host-loader.d.ts +21 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/{src → dist}/services/desktop-fused-ffi-backend-runtime.d.ts +22 -6
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/{src → dist}/services/device-tier.d.ts +19 -1
- package/dist/services/device-tier.d.ts.map +1 -0
- package/{src → dist}/services/downloader.d.ts +16 -4
- package/dist/services/downloader.d.ts.map +1 -0
- package/{src → dist}/services/engine.d.ts +43 -4
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-backend.d.ts +28 -7
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-runner.d.ts +24 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/{src → dist}/services/imagegen/sd-cpp.d.ts +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/{src → dist}/services/index.d.ts +3 -1
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/{src → dist}/services/manifest/schema.d.ts +196 -6
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/{src → dist}/services/manifest/types.d.ts +3 -1
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/{src → dist}/services/memory-arbiter.d.ts +33 -3
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/{src → dist}/services/memory-monitor.d.ts +6 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/{src → dist}/services/registry.d.ts +11 -13
- package/dist/services/registry.d.ts.map +1 -0
- package/{src → dist}/services/router-handler.d.ts +2 -2
- package/dist/services/router-handler.d.ts.map +1 -0
- package/{src → dist}/services/routing-policy.d.ts +32 -9
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/{src → dist}/services/service.d.ts +1 -1
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/{src → dist}/services/types.d.ts +1 -1
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/{src → dist}/services/vision/index.d.ts +1 -1
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/{src → dist}/services/vision/types.d.ts +13 -4
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/{src → dist}/services/vision-embedding-cache.d.ts +1 -1
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/audio-frame-consumer.d.ts +82 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/{src → dist}/services/voice/eliza1-eot-scorer.d.ts +8 -8
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/{src → dist}/services/voice/embedding.d.ts +2 -3
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/engine-bridge.d.ts +8 -5
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier-ggml.d.ts +22 -22
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier.d.ts +9 -12
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/{src → dist}/services/voice/errors.d.ts +1 -1
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/{src → dist}/services/voice/expressive-tags.d.ts +5 -5
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/{src → dist}/services/voice/ffi-bindings.d.ts +26 -4
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/{src → dist}/services/voice/fused-eot-scorer.d.ts +6 -6
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/{src → dist}/services/voice/index.d.ts +8 -3
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-backend.d.ts +15 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-engine-discovery.d.ts +1 -1
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-ffi-runtime.d.ts +3 -3
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/pick-runtime.d.ts +1 -1
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/{src → dist}/services/voice/mic-source.d.ts +1 -1
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/{src → dist}/services/voice/partial-stabilizer.d.ts +1 -1
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/{src → dist}/services/voice/shared-resources.d.ts +14 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcriber.d.ts +4 -4
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-service.d.ts +20 -1
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-store.d.ts +12 -1
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/{src → dist}/services/voice/types.d.ts +6 -6
- package/dist/services/voice/types.d.ts.map +1 -0
- package/{src → dist}/services/voice/vad.d.ts +6 -5
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/{src → dist}/services/voice/voice-preset-format.d.ts +2 -2
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/{src → dist}/services/voice/wake-word-ggml.d.ts +8 -9
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +28 -9
- package/registry-entry.json +137 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +1 -1
- package/src/adapters/capacitor-llama/index.ts +28 -4
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +2 -2
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +1 -1
- package/src/local-inference-routes.test.ts +57 -11
- package/src/local-inference-routes.ts +90 -8
- package/src/provider.ts +32 -3
- package/src/routes/compat-helpers.ts +2 -1
- package/src/routes/index.ts +1 -0
- package/src/routes/live-diarization-route.test.ts +134 -0
- package/src/routes/live-diarization-route.ts +79 -3
- package/src/routes/local-inference-asr-route.test.ts +43 -2
- package/src/routes/local-inference-asr-route.ts +7 -4
- package/src/routes/local-inference-asr-transcribe.test.ts +4 -4
- package/src/routes/local-inference-asr-transcribe.ts +1 -1
- package/src/routes/local-inference-compat-routes.test.ts +3 -3
- package/src/routes/local-inference-compat-routes.ts +23 -56
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcripts-routes.test.ts +51 -0
- package/src/routes/transcripts-routes.ts +35 -3
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +203 -5
- package/src/runtime/ensure-local-inference-handler.ts +203 -11
- package/src/runtime/index.ts +4 -1
- package/src/runtime/mobile-local-inference-gate.test.ts +85 -2
- package/src/runtime/mobile-local-inference-gate.ts +60 -5
- package/src/runtime/voice-entity-binding.transcript.test.ts +29 -0
- package/src/runtime/voice-entity-binding.ts +46 -6
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +2 -2
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model.ts +211 -8
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +26 -0
- package/src/services/assignments.ts +52 -4
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +198 -19
- package/src/services/bionic-host-loader.test.ts +94 -1
- package/src/services/bionic-host-loader.ts +72 -0
- package/src/services/cache-bridge.test.ts +7 -7
- package/src/services/catalog.test.ts +32 -11
- package/src/services/catalog.ts +6 -0
- package/src/services/cloud-fallback.ts +1 -1
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +99 -7
- package/src/services/device-tier.test.ts +89 -2
- package/src/services/device-tier.ts +103 -11
- package/src/services/downloader.test.ts +199 -58
- package/src/services/downloader.ts +141 -27
- package/src/services/engine-direct-bundle.test.ts +38 -6
- package/src/services/engine.ts +291 -104
- package/src/services/ensure-local-artifacts.ts +1 -1
- package/src/services/ffi-llm-streaming-abi.ts +6 -3
- package/src/services/ffi-streaming-backend.ts +44 -8
- package/src/services/ffi-streaming-runner.test.ts +163 -3
- package/src/services/ffi-streaming-runner.ts +54 -1
- package/src/services/ffi-unload-ordering.test.ts +5 -1
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/hardware.test.ts +7 -2
- package/src/services/hardware.ts +28 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/sd-cpp.ts +6 -9
- package/src/services/index.ts +18 -0
- package/src/services/ios-llama-streaming.ts +1 -1
- package/src/services/kv-spill.ts +6 -5
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +84 -2
- package/src/services/manifest/index.ts +6 -0
- package/src/services/manifest/manifest.test.ts +156 -54
- package/src/services/manifest/schema.ts +160 -52
- package/src/services/manifest/types.ts +6 -0
- package/src/services/manifest/validator.ts +91 -25
- package/src/services/memory-arbiter.test.ts +139 -0
- package/src/services/memory-arbiter.ts +81 -15
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +24 -0
- package/src/services/memory-monitor.ts +12 -0
- package/src/services/mtp-doctor.ts +10 -2
- package/src/services/network-policy.ts +5 -5
- package/src/services/ram-budget-cache.test.ts +2 -1
- package/src/services/ram-budget.ts +0 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/registry.ts +25 -19
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.ts +43 -24
- package/src/services/routing-policy.test.ts +211 -23
- package/src/services/routing-policy.ts +92 -22
- package/src/services/service.test.ts +3 -3
- package/src/services/service.ts +22 -7
- package/src/services/transcription-priority.test.ts +2 -2
- package/src/services/types.ts +4 -0
- package/src/services/verify-on-device.test.ts +2 -2
- package/src/services/vision/hash.ts +1 -1
- package/src/services/vision/index.ts +2 -2
- package/src/services/vision/llama-server.ts +1 -1
- package/src/services/vision/types.ts +13 -4
- package/src/services/vision-embedding-cache.ts +1 -1
- package/src/services/voice/VOICE_WORKBENCH.md +71 -26
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +72 -2
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +29 -29
- package/src/services/voice/__tests__/streaming-asr.test.ts +1 -1
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +6 -8
- package/src/services/voice/audio-frame-consumer.test.ts +327 -1
- package/src/services/voice/audio-frame-consumer.ts +165 -5
- package/src/services/voice/barge-in.ts +2 -3
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +2 -2
- package/src/services/voice/e2e-harness.ts +175 -16
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +22 -22
- package/src/services/voice/embedding.ts +2 -3
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.ts +151 -110
- package/src/services/voice/eot-classifier-ggml.ts +42 -39
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +11 -122
- package/src/services/voice/errors.ts +2 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +8 -8
- package/src/services/voice/ffi-bindings.test.ts +10 -3
- package/src/services/voice/ffi-bindings.ts +177 -15
- package/src/services/voice/fused-eot-scorer.ts +17 -13
- package/src/services/voice/index.ts +33 -12
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +112 -1
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +88 -3
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +37 -201
- package/src/services/voice/kokoro/kokoro-backend.ts +16 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +1 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +3 -3
- package/src/services/voice/kokoro/pick-runtime.ts +1 -1
- package/src/services/voice/kokoro/runtime-selection.ts +28 -201
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +335 -2
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.ts +1 -1
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/partial-stabilizer.ts +1 -1
- package/src/services/voice/pipeline.ts +3 -4
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +23 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +85 -22
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.ts +4 -4
- package/src/services/voice/transcript-service.test.ts +58 -0
- package/src/services/voice/transcript-service.ts +64 -0
- package/src/services/voice/transcript-store.test.ts +36 -0
- package/src/services/voice/transcript-store.ts +32 -0
- package/src/services/voice/types.ts +7 -7
- package/src/services/voice/vad.test.ts +33 -15
- package/src/services/voice/vad.ts +25 -20
- package/src/services/voice/voice-budget.test.ts +0 -3
- package/src/services/voice/voice-budget.ts +6 -6
- package/src/services/voice/voice-duet.test.ts +1 -1
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +17 -4
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +133 -7
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-workbench-report.ts +58 -17
- package/src/services/voice/wake-word-ggml.ts +12 -13
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice-prewarm.ts +1 -1
- package/src/voice-workbench.ts +71 -0
- package/src/actions/generate-media.d.ts.map +0 -1
- package/src/actions/identify-speaker.d.ts.map +0 -1
- package/src/actions/transcription-control.d.ts.map +0 -1
- package/src/index.d.ts.map +0 -1
- package/src/local-inference-routes.d.ts.map +0 -1
- package/src/provider.d.ts.map +0 -1
- package/src/routes/compat-helpers.d.ts.map +0 -1
- package/src/routes/family-member-route.d.ts.map +0 -1
- package/src/routes/index.d.ts.map +0 -1
- package/src/routes/live-diarization-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-transcribe.d.ts.map +0 -1
- package/src/routes/local-inference-compat-routes.d.ts.map +0 -1
- package/src/routes/local-inference-tts-route.d.ts.map +0 -1
- package/src/routes/transcript-audio-store.d.ts.map +0 -1
- package/src/routes/transcripts-routes.d.ts.map +0 -1
- package/src/routes/voice-first-run-routes.d.ts.map +0 -1
- package/src/routes/voice-models-routes.d.ts.map +0 -1
- package/src/routes/voice-profile-plugin-routes.d.ts.map +0 -1
- package/src/routes/voice-profiles-management-routes.d.ts.map +0 -1
- package/src/routes/voice-speaker-profile-routes.d.ts.map +0 -1
- package/src/runtime/embedding-manager-support.d.ts.map +0 -1
- package/src/runtime/embedding-presets.d.ts.map +0 -1
- package/src/runtime/embedding-warmup-policy.d.ts.map +0 -1
- package/src/runtime/ensure-local-inference-handler.d.ts.map +0 -1
- package/src/runtime/index.d.ts.map +0 -1
- package/src/runtime/mobile-local-inference-gate.d.ts +0 -31
- package/src/runtime/mobile-local-inference-gate.d.ts.map +0 -1
- package/src/runtime/voice-entity-binding.d.ts.map +0 -1
- package/src/services/active-model.d.ts.map +0 -1
- package/src/services/assignments.d.ts.map +0 -1
- package/src/services/backend.d.ts.map +0 -1
- package/src/services/bionic-host-loader.d.ts.map +0 -1
- package/src/services/bundled-models.d.ts.map +0 -1
- package/src/services/cache-bridge.d.ts.map +0 -1
- package/src/services/catalog.d.ts +0 -10
- package/src/services/catalog.d.ts.map +0 -1
- package/src/services/checkpoint-client.d.ts.map +0 -1
- package/src/services/cloud-fallback.d.ts.map +0 -1
- package/src/services/conversation-registry.d.ts.map +0 -1
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +0 -1
- package/src/services/device-bridge.d.ts.map +0 -1
- package/src/services/device-resource-metrics.d.ts.map +0 -1
- package/src/services/device-tier.d.ts.map +0 -1
- package/src/services/downloader.d.ts.map +0 -1
- package/src/services/engine.d.ts.map +0 -1
- package/src/services/external-scanner.d.ts.map +0 -1
- package/src/services/ffi-streaming-backend.d.ts.map +0 -1
- package/src/services/ffi-streaming-runner.d.ts.map +0 -1
- package/src/services/gpu-detect.d.ts.map +0 -1
- package/src/services/handler-registry.d.ts.map +0 -1
- package/src/services/hardware.d.ts.map +0 -1
- package/src/services/hf-search.d.ts +0 -26
- package/src/services/hf-search.d.ts.map +0 -1
- package/src/services/hf-search.test.ts +0 -69
- package/src/services/hf-search.ts +0 -420
- package/src/services/image-description-runtime.d.ts.map +0 -1
- package/src/services/imagegen/aosp-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/backend-selector.d.ts.map +0 -1
- package/src/services/imagegen/coreml-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/errors.d.ts.map +0 -1
- package/src/services/imagegen/index.d.ts.map +0 -1
- package/src/services/imagegen/mflux.d.ts.map +0 -1
- package/src/services/imagegen/sd-cpp.d.ts.map +0 -1
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/types.d.ts.map +0 -1
- package/src/services/index.d.ts.map +0 -1
- package/src/services/inference-capabilities.d.ts.map +0 -1
- package/src/services/inference-telemetry.d.ts.map +0 -1
- package/src/services/kv-spill.d.ts.map +0 -1
- package/src/services/latency-trace.d.ts.map +0 -1
- package/src/services/llm-streaming-binding.d.ts.map +0 -1
- package/src/services/load-args.d.ts.map +0 -1
- package/src/services/manifest/index.d.ts +0 -4
- package/src/services/manifest/index.d.ts.map +0 -1
- package/src/services/manifest/schema.d.ts.map +0 -1
- package/src/services/manifest/types.d.ts.map +0 -1
- package/src/services/manifest/validator.d.ts.map +0 -1
- package/src/services/memory-arbiter.d.ts.map +0 -1
- package/src/services/memory-monitor.d.ts.map +0 -1
- package/src/services/memory-pressure.d.ts.map +0 -1
- package/src/services/mtp-doctor.d.ts.map +0 -1
- package/src/services/network-policy.d.ts.map +0 -1
- package/src/services/paths.d.ts.map +0 -1
- package/src/services/planner-skeleton.d.ts.map +0 -1
- package/src/services/providers.d.ts.map +0 -1
- package/src/services/ram-budget.d.ts.map +0 -1
- package/src/services/readiness.d.ts.map +0 -1
- package/src/services/recommendation.d.ts.map +0 -1
- package/src/services/registry.d.ts.map +0 -1
- package/src/services/router-handler.d.ts.map +0 -1
- package/src/services/routing-policy.d.ts.map +0 -1
- package/src/services/routing-preferences.d.ts.map +0 -1
- package/src/services/runtime-target.d.ts.map +0 -1
- package/src/services/service.d.ts.map +0 -1
- package/src/services/session-pool.d.ts.map +0 -1
- package/src/services/structured-output/deterministic-repair.d.ts.map +0 -1
- package/src/services/structured-output.d.ts.map +0 -1
- package/src/services/system-memory.d.ts.map +0 -1
- package/src/services/types.d.ts.map +0 -1
- package/src/services/verify-on-device.d.ts.map +0 -1
- package/src/services/verify.d.ts.map +0 -1
- package/src/services/vision/aosp-unavailable.d.ts.map +0 -1
- package/src/services/vision/capacitor-llama.d.ts.map +0 -1
- package/src/services/vision/cloud-fallback.d.ts.map +0 -1
- package/src/services/vision/hash.d.ts.map +0 -1
- package/src/services/vision/index.d.ts.map +0 -1
- package/src/services/vision/llama-server.d.ts.map +0 -1
- package/src/services/vision/types.d.ts.map +0 -1
- package/src/services/vision/vast-fallback.d.ts.map +0 -1
- package/src/services/vision-embedding-cache.d.ts.map +0 -1
- package/src/services/voice/audio-frame-consumer.d.ts.map +0 -1
- package/src/services/voice/barge-in.d.ts.map +0 -1
- package/src/services/voice/cancellation-coordinator.d.ts.map +0 -1
- package/src/services/voice/checkpoint-manager.d.ts.map +0 -1
- package/src/services/voice/eager-context-builder.d.ts.map +0 -1
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/embedding.d.ts.map +0 -1
- package/src/services/voice/emotion-attribution.d.ts.map +0 -1
- package/src/services/voice/engine-bridge.d.ts.map +0 -1
- package/src/services/voice/eot-classifier-ggml.d.ts.map +0 -1
- package/src/services/voice/eot-classifier.d.ts.map +0 -1
- package/src/services/voice/errors.d.ts.map +0 -1
- package/src/services/voice/expressive-tags.d.ts.map +0 -1
- package/src/services/voice/ffi-bindings.d.ts.map +0 -1
- package/src/services/voice/first-line-cache.d.ts.map +0 -1
- package/src/services/voice/fused-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/index.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/phonemizer.d.ts.map +0 -1
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/runtime-selection.d.ts +0 -92
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +0 -1
- package/src/services/voice/kokoro/types.d.ts.map +0 -1
- package/src/services/voice/kokoro/voice-presets.d.ts.map +0 -1
- package/src/services/voice/kokoro/voices.d.ts.map +0 -1
- package/src/services/voice/lifecycle.d.ts.map +0 -1
- package/src/services/voice/live-diarization-session.d.ts +0 -96
- package/src/services/voice/live-diarization-session.d.ts.map +0 -1
- package/src/services/voice/mic-source.d.ts.map +0 -1
- package/src/services/voice/optimistic-policy.d.ts.map +0 -1
- package/src/services/voice/partial-stabilizer.d.ts.map +0 -1
- package/src/services/voice/phoneme-tokenizer.d.ts.map +0 -1
- package/src/services/voice/phrase-cache.d.ts.map +0 -1
- package/src/services/voice/phrase-chunker.d.ts.map +0 -1
- package/src/services/voice/pipeline-impls.d.ts.map +0 -1
- package/src/services/voice/pipeline.d.ts.map +0 -1
- package/src/services/voice/prefill-client.d.ts.map +0 -1
- package/src/services/voice/prefix-preserving-queue.d.ts.map +0 -1
- package/src/services/voice/profile-store.d.ts.map +0 -1
- package/src/services/voice/ring-buffer.d.ts.map +0 -1
- package/src/services/voice/rollback-queue.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +0 -1
- package/src/services/voice/scheduler.d.ts.map +0 -1
- package/src/services/voice/shared-resources.d.ts.map +0 -1
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder.d.ts.map +0 -1
- package/src/services/voice/speaker-imprint.d.ts.map +0 -1
- package/src/services/voice/speaker-preset-cache.d.ts.map +0 -1
- package/src/services/voice/system-audio-sink.d.ts.map +0 -1
- package/src/services/voice/transcriber.d.ts.map +0 -1
- package/src/services/voice/transcript-knowledge.d.ts.map +0 -1
- package/src/services/voice/transcript-service.d.ts.map +0 -1
- package/src/services/voice/transcript-store.d.ts.map +0 -1
- package/src/services/voice/turn-controller.d.ts.map +0 -1
- package/src/services/voice/types.d.ts.map +0 -1
- package/src/services/voice/vad.d.ts.map +0 -1
- package/src/services/voice/voice-budget.d.ts.map +0 -1
- package/src/services/voice/voice-emotion-classifier.d.ts.map +0 -1
- package/src/services/voice/voice-preset-format.d.ts.map +0 -1
- package/src/services/voice/voice-profile-artifact.d.ts.map +0 -1
- package/src/services/voice/voice-profile-routes.d.ts.map +0 -1
- package/src/services/voice/voice-settings.d.ts +0 -82
- package/src/services/voice/voice-settings.d.ts.map +0 -1
- package/src/services/voice/voice-settings.ts +0 -172
- package/src/services/voice/voice-state-machine.d.ts.map +0 -1
- package/src/services/voice/wake-word-ggml.d.ts.map +0 -1
- package/src/services/voice/wake-word.d.ts.map +0 -1
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +0 -1
- package/src/services/voice-model-updater.d.ts.map +0 -1
- package/src/services/voice-prewarm.d.ts.map +0 -1
- /package/{src → dist}/actions/generate-media.d.ts +0 -0
- /package/{src → dist}/actions/identify-speaker.d.ts +0 -0
- /package/{src → dist}/actions/transcription-control.d.ts +0 -0
- /package/{src → dist}/index.d.ts +0 -0
- /package/{src → dist}/provider.d.ts +0 -0
- /package/{src → dist}/routes/family-member-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-transcribe.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-compat-routes.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-tts-route.d.ts +0 -0
- /package/{src → dist}/routes/transcript-audio-store.d.ts +0 -0
- /package/{src → dist}/routes/voice-first-run-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-models-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profile-plugin-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profiles-management-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-speaker-profile-routes.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-manager-support.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-presets.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-warmup-policy.d.ts +0 -0
- /package/{src → dist}/services/bundled-models.d.ts +0 -0
- /package/{src → dist}/services/cache-bridge.d.ts +0 -0
- /package/{src → dist}/services/checkpoint-client.d.ts +0 -0
- /package/{src → dist}/services/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/conversation-registry.d.ts +0 -0
- /package/{src → dist}/services/device-bridge.d.ts +0 -0
- /package/{src → dist}/services/device-resource-metrics.d.ts +0 -0
- /package/{src → dist}/services/external-scanner.d.ts +0 -0
- /package/{src → dist}/services/gpu-detect.d.ts +0 -0
- /package/{src → dist}/services/handler-registry.d.ts +0 -0
- /package/{src → dist}/services/hardware.d.ts +0 -0
- /package/{src → dist}/services/image-description-runtime.d.ts +0 -0
- /package/{src → dist}/services/imagegen/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/backend-selector.d.ts +0 -0
- /package/{src → dist}/services/imagegen/coreml-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/errors.d.ts +0 -0
- /package/{src → dist}/services/imagegen/index.d.ts +0 -0
- /package/{src → dist}/services/imagegen/mflux.d.ts +0 -0
- /package/{src → dist}/services/imagegen/tensorrt-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/types.d.ts +0 -0
- /package/{src → dist}/services/inference-capabilities.d.ts +0 -0
- /package/{src → dist}/services/inference-telemetry.d.ts +0 -0
- /package/{src → dist}/services/kv-spill.d.ts +0 -0
- /package/{src → dist}/services/latency-trace.d.ts +0 -0
- /package/{src → dist}/services/llm-streaming-binding.d.ts +0 -0
- /package/{src → dist}/services/load-args.d.ts +0 -0
- /package/{src → dist}/services/manifest/validator.d.ts +0 -0
- /package/{src → dist}/services/memory-pressure.d.ts +0 -0
- /package/{src → dist}/services/mtp-doctor.d.ts +0 -0
- /package/{src → dist}/services/network-policy.d.ts +0 -0
- /package/{src → dist}/services/paths.d.ts +0 -0
- /package/{src → dist}/services/planner-skeleton.d.ts +0 -0
- /package/{src → dist}/services/providers.d.ts +0 -0
- /package/{src → dist}/services/ram-budget.d.ts +0 -0
- /package/{src → dist}/services/readiness.d.ts +0 -0
- /package/{src → dist}/services/recommendation.d.ts +0 -0
- /package/{src → dist}/services/routing-preferences.d.ts +0 -0
- /package/{src → dist}/services/runtime-target.d.ts +0 -0
- /package/{src → dist}/services/session-pool.d.ts +0 -0
- /package/{src → dist}/services/structured-output/deterministic-repair.d.ts +0 -0
- /package/{src → dist}/services/structured-output.d.ts +0 -0
- /package/{src → dist}/services/system-memory.d.ts +0 -0
- /package/{src → dist}/services/verify-on-device.d.ts +0 -0
- /package/{src → dist}/services/verify.d.ts +0 -0
- /package/{src → dist}/services/vision/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/vision/capacitor-llama.d.ts +0 -0
- /package/{src → dist}/services/vision/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/vision/hash.d.ts +0 -0
- /package/{src → dist}/services/vision/llama-server.d.ts +0 -0
- /package/{src → dist}/services/vision/vast-fallback.d.ts +0 -0
- /package/{src → dist}/services/voice/barge-in.d.ts +0 -0
- /package/{src → dist}/services/voice/cancellation-coordinator.d.ts +0 -0
- /package/{src → dist}/services/voice/checkpoint-manager.d.ts +0 -0
- /package/{src → dist}/services/voice/eager-context-builder.d.ts +0 -0
- /package/{src → dist}/services/voice/emotion-attribution.d.ts +0 -0
- /package/{src → dist}/services/voice/first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/kokoro-runtime.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/phonemizer.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/types.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voice-presets.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voices.d.ts +0 -0
- /package/{src → dist}/services/voice/lifecycle.d.ts +0 -0
- /package/{src → dist}/services/voice/optimistic-policy.d.ts +0 -0
- /package/{src → dist}/services/voice/phoneme-tokenizer.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-chunker.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline-impls.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/prefill-client.d.ts +0 -0
- /package/{src → dist}/services/voice/prefix-preserving-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/profile-store.d.ts +0 -0
- /package/{src → dist}/services/voice/ring-buffer.d.ts +0 -0
- /package/{src → dist}/services/voice/rollback-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-placeholder.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-regenerator.d.ts +0 -0
- /package/{src → dist}/services/voice/scheduler.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/attribution-pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-ggml.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-imprint.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-preset-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/system-audio-sink.d.ts +0 -0
- /package/{src → dist}/services/voice/transcript-knowledge.d.ts +0 -0
- /package/{src → dist}/services/voice/turn-controller.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-budget.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-emotion-classifier.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-artifact.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-routes.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-state-machine.d.ts +0 -0
- /package/{src → dist}/services/voice/wake-word.d.ts +0 -0
- /package/{src → dist}/services/voice/wrap-with-first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice-model-updater.d.ts +0 -0
- /package/{src → dist}/services/voice-prewarm.d.ts +0 -0
|
@@ -83,10 +83,10 @@ export interface FfiBackendSession {
|
|
|
83
83
|
} | null;
|
|
84
84
|
/**
|
|
85
85
|
* Absolute path to a *separate* MTP drafter GGUF resolved during load.
|
|
86
|
-
* `null` means
|
|
87
|
-
* text GGUF and the native runner activates `--spec-type
|
|
88
|
-
* with no `-md`. Speculative decoding is governed by `mtp`,
|
|
89
|
-
* presence of this path.
|
|
86
|
+
* `null` means embedded-draft-head MTP: the draft head is embedded in
|
|
87
|
+
* the main text GGUF and the native runner activates `--spec-type
|
|
88
|
+
* draft-mtp` with no `-md`. Speculative decoding is governed by `mtp`,
|
|
89
|
+
* not by the presence of this path.
|
|
90
90
|
*/
|
|
91
91
|
readonly draftModelPath: string | null;
|
|
92
92
|
/**
|
|
@@ -97,13 +97,14 @@ export interface FfiBackendSession {
|
|
|
97
97
|
readonly mmprojPath: string | null;
|
|
98
98
|
/**
|
|
99
99
|
* Per-load runtime config the fused libelizainference path applies at its
|
|
100
|
-
* first `llmStreamOpen` (gpuLayers + KV-cache quant types). The desktop
|
|
100
|
+
* first `llmStreamOpen` (context size, gpuLayers + KV-cache quant types). The desktop
|
|
101
101
|
* libllama runtime applies these at `loadModel()` instead and leaves this
|
|
102
102
|
* `null` — the backend forwards them into the runner's per-call config only
|
|
103
103
|
* when present, so the fused path mirrors the libllama load decision without
|
|
104
104
|
* the libllama path double-applying them.
|
|
105
105
|
*/
|
|
106
106
|
readonly loadConfig?: {
|
|
107
|
+
contextSize?: number;
|
|
107
108
|
gpuLayers?: number;
|
|
108
109
|
cacheTypeK?: string | null;
|
|
109
110
|
cacheTypeV?: string | null;
|
|
@@ -195,7 +196,9 @@ export class FfiStreamingBackend implements LocalInferenceBackend {
|
|
|
195
196
|
gpuLayers: loadConfig?.gpuLayers,
|
|
196
197
|
cacheTypeK: loadConfig?.cacheTypeK,
|
|
197
198
|
cacheTypeV: loadConfig?.cacheTypeV,
|
|
199
|
+
contextSize: loadConfig?.contextSize,
|
|
198
200
|
signal: args.signal,
|
|
201
|
+
maxTokensPerStep: args.maxTokensPerStep,
|
|
199
202
|
onTextChunk: args.onTextChunk,
|
|
200
203
|
onVerifierEvent: args.onVerifierEvent,
|
|
201
204
|
});
|
|
@@ -280,14 +283,39 @@ export class FfiStreamingBackend implements LocalInferenceBackend {
|
|
|
280
283
|
gpuLayers: loadConfig?.gpuLayers,
|
|
281
284
|
cacheTypeK: loadConfig?.cacheTypeK,
|
|
282
285
|
cacheTypeV: loadConfig?.cacheTypeV,
|
|
286
|
+
contextSize: loadConfig?.contextSize,
|
|
283
287
|
});
|
|
284
288
|
return true;
|
|
285
289
|
}
|
|
286
290
|
|
|
291
|
+
currentRuntimeLoadConfig() {
|
|
292
|
+
if (!this.session) return null;
|
|
293
|
+
const loadConfig = this.session.loadConfig;
|
|
294
|
+
return {
|
|
295
|
+
modelId: null,
|
|
296
|
+
modelPath: this.loadedPath,
|
|
297
|
+
contextSize: loadConfig?.contextSize ?? null,
|
|
298
|
+
cacheTypeK: loadConfig?.cacheTypeK ?? null,
|
|
299
|
+
cacheTypeV: loadConfig?.cacheTypeV ?? null,
|
|
300
|
+
gpuLayers:
|
|
301
|
+
typeof loadConfig?.gpuLayers === "number" ? loadConfig.gpuLayers : null,
|
|
302
|
+
parallel: this.parallelSlots(),
|
|
303
|
+
binaryPath: null,
|
|
304
|
+
backend: this.id,
|
|
305
|
+
mtp: this.session.mtp
|
|
306
|
+
? {
|
|
307
|
+
specType: "draft-mtp" as const,
|
|
308
|
+
draftMin: this.session.mtp.draftMin,
|
|
309
|
+
draftMax: this.session.mtp.draftMax,
|
|
310
|
+
}
|
|
311
|
+
: null,
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
287
315
|
/**
|
|
288
316
|
* True when Eliza-1 native MTP is active for the loaded target model.
|
|
289
|
-
* Covers both shapes:
|
|
290
|
-
* GGUF, `draftModelPath` null) and separate-drafter MTP.
|
|
317
|
+
* Covers both shapes: embedded-draft-head MTP (draft head embedded in
|
|
318
|
+
* the text GGUF, `draftModelPath` null) and separate-drafter MTP.
|
|
291
319
|
*/
|
|
292
320
|
mtpEnabled(): boolean {
|
|
293
321
|
return Boolean(this.session?.mtp);
|
|
@@ -327,6 +355,10 @@ export class FfiStreamingBackend implements LocalInferenceBackend {
|
|
|
327
355
|
maxTokens?: number;
|
|
328
356
|
temperature?: number;
|
|
329
357
|
signal?: AbortSignal;
|
|
358
|
+
/** Per-token callback — when set + the runtime supports streaming vision,
|
|
359
|
+
* the description is decoded token-by-token through the same pipe as chat. */
|
|
360
|
+
onTextChunk?: (chunk: string) => void | Promise<void>;
|
|
361
|
+
maxTokensPerStep?: number;
|
|
330
362
|
}): Promise<{ text: string; projectorMs?: number; decodeMs?: number }> {
|
|
331
363
|
if (!this.session) {
|
|
332
364
|
throw new Error(
|
|
@@ -342,7 +374,7 @@ export class FfiStreamingBackend implements LocalInferenceBackend {
|
|
|
342
374
|
// The runtime adapter has visionSupported() + describeImage(args).
|
|
343
375
|
// We re-shape `bytes` → `imageBytes` and merge in the resolved
|
|
344
376
|
// mmprojPath; the rest of args pass through unchanged.
|
|
345
|
-
const runtime = this.runtime as
|
|
377
|
+
const runtime = this.runtime as FfiBackendRuntime & {
|
|
346
378
|
describeImage?: (args: {
|
|
347
379
|
imageBytes: Uint8Array;
|
|
348
380
|
mmprojPath: string;
|
|
@@ -350,6 +382,8 @@ export class FfiStreamingBackend implements LocalInferenceBackend {
|
|
|
350
382
|
maxTokens?: number;
|
|
351
383
|
temperature?: number;
|
|
352
384
|
signal?: AbortSignal;
|
|
385
|
+
onTextChunk?: (chunk: string) => void | Promise<void>;
|
|
386
|
+
maxTokensPerStep?: number;
|
|
353
387
|
}) => Promise<{ text: string; projectorMs?: number; decodeMs?: number }>;
|
|
354
388
|
};
|
|
355
389
|
if (!runtime.describeImage) {
|
|
@@ -364,6 +398,8 @@ export class FfiStreamingBackend implements LocalInferenceBackend {
|
|
|
364
398
|
maxTokens: args.maxTokens,
|
|
365
399
|
temperature: args.temperature,
|
|
366
400
|
signal: args.signal,
|
|
401
|
+
onTextChunk: args.onTextChunk,
|
|
402
|
+
maxTokensPerStep: args.maxTokensPerStep,
|
|
367
403
|
});
|
|
368
404
|
}
|
|
369
405
|
|
|
@@ -1,10 +1,63 @@
|
|
|
1
|
-
import { describe, expect, it, vi } from "vitest";
|
|
2
|
-
import {
|
|
1
|
+
import { afterEach, describe, expect, it, vi } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
FfiStreamingRunner,
|
|
4
|
+
resolveMaxTokensPerStep,
|
|
5
|
+
} from "./ffi-streaming-runner";
|
|
3
6
|
import type {
|
|
4
7
|
LlmCtxHandle,
|
|
5
8
|
LlmStreamingBinding,
|
|
6
9
|
} from "./llm-streaming-binding";
|
|
7
|
-
import type { LlmStreamHandle } from "./voice/ffi-bindings";
|
|
10
|
+
import type { LlmStreamHandle, LlmStreamStep } from "./voice/ffi-bindings";
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Build a binding whose `llmStreamNext` emits `steps.length` steps (the last
|
|
14
|
+
* with `done: true`), and that records the `maxTokensPerStep` passed on every
|
|
15
|
+
* call so tests can assert the resolved per-step cap.
|
|
16
|
+
*/
|
|
17
|
+
function makeStepBinding(steps: string[]): {
|
|
18
|
+
binding: LlmStreamingBinding;
|
|
19
|
+
stepCaps: number[];
|
|
20
|
+
} {
|
|
21
|
+
const stream = 7n as LlmStreamHandle;
|
|
22
|
+
const stepCaps: number[] = [];
|
|
23
|
+
let i = 0;
|
|
24
|
+
const llmStreamNext = vi.fn(
|
|
25
|
+
(args: { maxTokensPerStep?: number }): LlmStreamStep => {
|
|
26
|
+
stepCaps.push(args.maxTokensPerStep ?? -1);
|
|
27
|
+
const text = steps[i] ?? "";
|
|
28
|
+
const done = i >= steps.length - 1;
|
|
29
|
+
i += 1;
|
|
30
|
+
return {
|
|
31
|
+
tokens: [i],
|
|
32
|
+
text,
|
|
33
|
+
done,
|
|
34
|
+
drafterDrafted: 0,
|
|
35
|
+
drafterAccepted: 0,
|
|
36
|
+
};
|
|
37
|
+
},
|
|
38
|
+
);
|
|
39
|
+
const binding: LlmStreamingBinding = {
|
|
40
|
+
llmStreamSupported: () => true,
|
|
41
|
+
llmStreamOpen: vi.fn().mockReturnValue(stream),
|
|
42
|
+
llmStreamPrefill: vi.fn(),
|
|
43
|
+
llmStreamNext,
|
|
44
|
+
llmStreamCancel: vi.fn(),
|
|
45
|
+
llmStreamClose: vi.fn(),
|
|
46
|
+
};
|
|
47
|
+
return { binding, stepCaps };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const BASE_ARGS = {
|
|
51
|
+
slotId: 0,
|
|
52
|
+
maxTokens: 64,
|
|
53
|
+
temperature: 0,
|
|
54
|
+
topP: 1,
|
|
55
|
+
topK: 0,
|
|
56
|
+
repeatPenalty: 1,
|
|
57
|
+
draftMin: 0,
|
|
58
|
+
draftMax: 0,
|
|
59
|
+
draftModelPath: null,
|
|
60
|
+
} as const;
|
|
8
61
|
|
|
9
62
|
describe("FfiStreamingRunner prewarm", () => {
|
|
10
63
|
it("treats maxTokens: 0 as prefill-only and never calls next-token generation", async () => {
|
|
@@ -38,10 +91,17 @@ describe("FfiStreamingRunner prewarm", () => {
|
|
|
38
91
|
draftMin: 0,
|
|
39
92
|
draftMax: 0,
|
|
40
93
|
draftModelPath: null,
|
|
94
|
+
contextSize: 32_768,
|
|
41
95
|
onTextChunk,
|
|
42
96
|
});
|
|
43
97
|
|
|
44
98
|
expect(binding.llmStreamOpen).toHaveBeenCalledTimes(1);
|
|
99
|
+
expect(binding.llmStreamOpen).toHaveBeenCalledWith({
|
|
100
|
+
ctx: 1n,
|
|
101
|
+
config: expect.objectContaining({
|
|
102
|
+
contextSize: 32_768,
|
|
103
|
+
}),
|
|
104
|
+
});
|
|
45
105
|
expect(binding.llmStreamPrefill).toHaveBeenCalledWith({
|
|
46
106
|
stream,
|
|
47
107
|
tokens: promptTokens,
|
|
@@ -58,3 +118,103 @@ describe("FfiStreamingRunner prewarm", () => {
|
|
|
58
118
|
});
|
|
59
119
|
});
|
|
60
120
|
});
|
|
121
|
+
|
|
122
|
+
describe("FfiStreamingRunner per-step granularity (#9174)", () => {
|
|
123
|
+
const ORIGINAL = process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP;
|
|
124
|
+
afterEach(() => {
|
|
125
|
+
if (ORIGINAL === undefined) {
|
|
126
|
+
delete process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP;
|
|
127
|
+
} else {
|
|
128
|
+
process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = ORIGINAL;
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it("defaults to a 32-token per-step cap when no override is set", async () => {
|
|
133
|
+
delete process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP;
|
|
134
|
+
const { binding, stepCaps } = makeStepBinding(["Hi ", "there"]);
|
|
135
|
+
const runner = new FfiStreamingRunner(binding, 1n as LlmCtxHandle);
|
|
136
|
+
await runner.generateWithUsage({
|
|
137
|
+
...BASE_ARGS,
|
|
138
|
+
promptTokens: new Int32Array([1]),
|
|
139
|
+
});
|
|
140
|
+
expect(stepCaps).toEqual([32, 32]);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it("forwards a per-call maxTokensPerStep to every llmStreamNext call", async () => {
|
|
144
|
+
delete process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP;
|
|
145
|
+
const { binding, stepCaps } = makeStepBinding(["a", "b", "c"]);
|
|
146
|
+
const runner = new FfiStreamingRunner(binding, 1n as LlmCtxHandle);
|
|
147
|
+
await runner.generateWithUsage({
|
|
148
|
+
...BASE_ARGS,
|
|
149
|
+
promptTokens: new Int32Array([1]),
|
|
150
|
+
maxTokensPerStep: 4,
|
|
151
|
+
});
|
|
152
|
+
expect(stepCaps).toEqual([4, 4, 4]);
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it("honors the ELIZA_LOCAL_STREAM_TOKENS_PER_STEP env override", async () => {
|
|
156
|
+
process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = "8";
|
|
157
|
+
const { binding, stepCaps } = makeStepBinding(["x", "y"]);
|
|
158
|
+
const runner = new FfiStreamingRunner(binding, 1n as LlmCtxHandle);
|
|
159
|
+
await runner.generateWithUsage({
|
|
160
|
+
...BASE_ARGS,
|
|
161
|
+
promptTokens: new Int32Array([1]),
|
|
162
|
+
});
|
|
163
|
+
expect(stepCaps).toEqual([8, 8]);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it("lets a per-call override win over the env var", async () => {
|
|
167
|
+
process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = "8";
|
|
168
|
+
const { binding, stepCaps } = makeStepBinding(["x"]);
|
|
169
|
+
const runner = new FfiStreamingRunner(binding, 1n as LlmCtxHandle);
|
|
170
|
+
await runner.generateWithUsage({
|
|
171
|
+
...BASE_ARGS,
|
|
172
|
+
promptTokens: new Int32Array([1]),
|
|
173
|
+
maxTokensPerStep: 1,
|
|
174
|
+
});
|
|
175
|
+
expect(stepCaps).toEqual([1]);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it("clamps out-of-range per-call overrides into the supported window", async () => {
|
|
179
|
+
delete process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP;
|
|
180
|
+
const low = makeStepBinding(["x"]);
|
|
181
|
+
const runnerLow = new FfiStreamingRunner(low.binding, 1n as LlmCtxHandle);
|
|
182
|
+
await runnerLow.generateWithUsage({
|
|
183
|
+
...BASE_ARGS,
|
|
184
|
+
promptTokens: new Int32Array([1]),
|
|
185
|
+
maxTokensPerStep: 0,
|
|
186
|
+
});
|
|
187
|
+
// 0 floors to the minimum of 1, never disables generation.
|
|
188
|
+
expect(low.stepCaps).toEqual([1]);
|
|
189
|
+
|
|
190
|
+
const high = makeStepBinding(["x"]);
|
|
191
|
+
const runnerHigh = new FfiStreamingRunner(high.binding, 1n as LlmCtxHandle);
|
|
192
|
+
await runnerHigh.generateWithUsage({
|
|
193
|
+
...BASE_ARGS,
|
|
194
|
+
promptTokens: new Int32Array([1]),
|
|
195
|
+
maxTokensPerStep: 100_000,
|
|
196
|
+
});
|
|
197
|
+
expect(high.stepCaps).toEqual([512]);
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
describe("resolveMaxTokensPerStep", () => {
|
|
201
|
+
it("returns 32 when unset", () => {
|
|
202
|
+
delete process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP;
|
|
203
|
+
expect(resolveMaxTokensPerStep()).toBe(32);
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
it("parses and clamps a valid env value", () => {
|
|
207
|
+
process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = "16";
|
|
208
|
+
expect(resolveMaxTokensPerStep()).toBe(16);
|
|
209
|
+
process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = "9999";
|
|
210
|
+
expect(resolveMaxTokensPerStep()).toBe(512);
|
|
211
|
+
process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = "0";
|
|
212
|
+
expect(resolveMaxTokensPerStep()).toBe(1);
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
it("falls back to 32 on a non-numeric env value", () => {
|
|
216
|
+
process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = "smooth";
|
|
217
|
+
expect(resolveMaxTokensPerStep()).toBe(32);
|
|
218
|
+
});
|
|
219
|
+
});
|
|
220
|
+
});
|
|
@@ -61,6 +61,12 @@ export interface FfiStreamingGenerateArgs {
|
|
|
61
61
|
*/
|
|
62
62
|
cacheTypeK?: string | null;
|
|
63
63
|
cacheTypeV?: string | null;
|
|
64
|
+
/**
|
|
65
|
+
* Runtime context window in tokens (ABI v9). Forwarded into the fused
|
|
66
|
+
* session config on `llmStreamOpen`; `undefined` keeps the native
|
|
67
|
+
* ELIZA_LLM_N_CTX/default fallback.
|
|
68
|
+
*/
|
|
69
|
+
contextSize?: number;
|
|
64
70
|
/**
|
|
65
71
|
* GBNF grammar source forcing the structured-reply envelope. Passed to
|
|
66
72
|
* the native session's `llmStreamOpen` config so sampling is
|
|
@@ -69,6 +75,16 @@ export interface FfiStreamingGenerateArgs {
|
|
|
69
75
|
gbnfGrammar?: string | null;
|
|
70
76
|
/** Cancellation signal — fires `llmStreamCancel` on the active session. */
|
|
71
77
|
signal?: AbortSignal;
|
|
78
|
+
/**
|
|
79
|
+
* Per-step token cap for the native decode loop. Lower values make the
|
|
80
|
+
* local UI stream in finer-grained jumps (smoother token-by-token render)
|
|
81
|
+
* at the cost of more JS↔FFI round-trips per reply; higher values batch
|
|
82
|
+
* more tokens per step. When omitted, falls back to
|
|
83
|
+
* `resolveMaxTokensPerStep()` (env `ELIZA_LOCAL_STREAM_TOKENS_PER_STEP`,
|
|
84
|
+
* else `DEFAULT_MAX_TOKENS_PER_STEP`). Clamped to
|
|
85
|
+
* `[MIN_MAX_TOKENS_PER_STEP, MAX_MAX_TOKENS_PER_STEP]`.
|
|
86
|
+
*/
|
|
87
|
+
maxTokensPerStep?: number;
|
|
72
88
|
/** Per-chunk text callback. */
|
|
73
89
|
onTextChunk?: (chunk: string) => void | Promise<void>;
|
|
74
90
|
/** Speculative accept/reject events from MTP verification. */
|
|
@@ -86,6 +102,37 @@ export interface FfiStreamingGenerateResult {
|
|
|
86
102
|
/** Default per-step caps. Match upstream llama-server's `n_predict` chunk size. */
|
|
87
103
|
const DEFAULT_MAX_TOKENS_PER_STEP = 32;
|
|
88
104
|
const DEFAULT_MAX_TEXT_BYTES = 1024;
|
|
105
|
+
/**
|
|
106
|
+
* Sane bounds for the per-step token cap. The floor is 1 (true
|
|
107
|
+
* token-by-token); the ceiling guards against pathological values that would
|
|
108
|
+
* defeat streaming by emitting the whole reply in one step.
|
|
109
|
+
*/
|
|
110
|
+
const MIN_MAX_TOKENS_PER_STEP = 1;
|
|
111
|
+
const MAX_MAX_TOKENS_PER_STEP = 512;
|
|
112
|
+
|
|
113
|
+
/** Clamp a caller-supplied per-step cap into the supported range. */
|
|
114
|
+
function clampMaxTokensPerStep(value: number): number {
|
|
115
|
+
if (!Number.isFinite(value)) return DEFAULT_MAX_TOKENS_PER_STEP;
|
|
116
|
+
return Math.min(
|
|
117
|
+
MAX_MAX_TOKENS_PER_STEP,
|
|
118
|
+
Math.max(MIN_MAX_TOKENS_PER_STEP, Math.trunc(value)),
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Resolve the per-step token cap for the native decode loop. Override via the
|
|
124
|
+
* `ELIZA_LOCAL_STREAM_TOKENS_PER_STEP` env var (e.g. set to `8` for smoother
|
|
125
|
+
* local streaming, weighed against the extra JS↔FFI round-trips and the shared
|
|
126
|
+
* voice phrase-chunker). Falls back to `DEFAULT_MAX_TOKENS_PER_STEP` (32) when
|
|
127
|
+
* unset or invalid; clamped to `[MIN_MAX_TOKENS_PER_STEP, MAX_MAX_TOKENS_PER_STEP]`.
|
|
128
|
+
*/
|
|
129
|
+
export function resolveMaxTokensPerStep(): number {
|
|
130
|
+
const raw = process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP?.trim();
|
|
131
|
+
if (!raw) return DEFAULT_MAX_TOKENS_PER_STEP;
|
|
132
|
+
const parsed = Number.parseInt(raw, 10);
|
|
133
|
+
if (!Number.isFinite(parsed)) return DEFAULT_MAX_TOKENS_PER_STEP;
|
|
134
|
+
return clampMaxTokensPerStep(parsed);
|
|
135
|
+
}
|
|
89
136
|
|
|
90
137
|
/**
|
|
91
138
|
* Backend used by the mobile and desktop FFI routes.
|
|
@@ -285,6 +332,7 @@ export class FfiStreamingRunner {
|
|
|
285
332
|
gpuLayers: args.gpuLayers,
|
|
286
333
|
cacheTypeK: args.cacheTypeK,
|
|
287
334
|
cacheTypeV: args.cacheTypeV,
|
|
335
|
+
contextSize: args.contextSize,
|
|
288
336
|
},
|
|
289
337
|
});
|
|
290
338
|
|
|
@@ -307,6 +355,11 @@ export class FfiStreamingRunner {
|
|
|
307
355
|
return;
|
|
308
356
|
}
|
|
309
357
|
|
|
358
|
+
const maxTokensPerStep =
|
|
359
|
+
args.maxTokensPerStep !== undefined
|
|
360
|
+
? clampMaxTokensPerStep(args.maxTokensPerStep)
|
|
361
|
+
: resolveMaxTokensPerStep();
|
|
362
|
+
|
|
310
363
|
let tokenIndex = 0;
|
|
311
364
|
while (true) {
|
|
312
365
|
if (args.signal?.aborted) {
|
|
@@ -315,7 +368,7 @@ export class FfiStreamingRunner {
|
|
|
315
368
|
}
|
|
316
369
|
const step = this.ffi.llmStreamNext({
|
|
317
370
|
stream,
|
|
318
|
-
maxTokensPerStep
|
|
371
|
+
maxTokensPerStep,
|
|
319
372
|
maxTextBytes: DEFAULT_MAX_TEXT_BYTES,
|
|
320
373
|
});
|
|
321
374
|
onStep(step);
|
|
@@ -158,5 +158,9 @@ describe("DesktopFusedFfiBackendRuntime.release() ordering (#14)", () => {
|
|
|
158
158
|
// is explicitly poisoned so a new native model is not allocated over a
|
|
159
159
|
// failed cleanup state.
|
|
160
160
|
await expect(runtime.acquire(PLAN)).rejects.toThrow(/restart required/i);
|
|
161
|
-
|
|
161
|
+
// Heavy path (dynamic import + FFI acquire/release/acquire): fast in
|
|
162
|
+
// isolation but CPU-starved under the full 2122-test parallel suite, where
|
|
163
|
+
// it brushed the old 20s ceiling (20012ms). Headroom; a true hang still
|
|
164
|
+
// fails well within this bound.
|
|
165
|
+
}, 45_000);
|
|
162
166
|
});
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* #8808 acceptance criterion 4 — fused eliza-1 no-regression.
|
|
3
|
+
*
|
|
4
|
+
* The local stack is Eliza-1 only (#8808 cutover), so serving is deterministic.
|
|
5
|
+
* This test pins the invariants:
|
|
6
|
+
* - `decideBackend` / `BackendDispatcher` route a `runtimeClass:"fused-eliza1"`
|
|
7
|
+
* model to the fused `llama-cpp` runtime,
|
|
8
|
+
* - the fused path retains its full-pipeline binding: the `BackendPlan` that
|
|
9
|
+
* reaches the fused backend still carries the catalog entry and the
|
|
10
|
+
* bundle-root override that `DesktopFusedFfiBackendRuntime.acquire()` reads
|
|
11
|
+
* to anchor the fused context.
|
|
12
|
+
*
|
|
13
|
+
* It complements `backend-runtime-class.test.ts` (which proves the binary
|
|
14
|
+
* routing) by asserting that the FULL fused load contract is forwarded intact.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { describe, expect, it } from "vitest";
|
|
18
|
+
|
|
19
|
+
import {
|
|
20
|
+
BackendDispatcher,
|
|
21
|
+
type BackendPlan,
|
|
22
|
+
decideBackend,
|
|
23
|
+
type GenerateArgs,
|
|
24
|
+
type GenerateResult,
|
|
25
|
+
type LocalInferenceBackend,
|
|
26
|
+
} from "./backend";
|
|
27
|
+
import { findCatalogModel } from "./catalog";
|
|
28
|
+
import type { CatalogModel } from "./types";
|
|
29
|
+
|
|
30
|
+
const FUSED_TIER = findCatalogModel("eliza-1-4b") as CatalogModel;
|
|
31
|
+
|
|
32
|
+
function makeBackend(id: LocalInferenceBackend["id"]): LocalInferenceBackend & {
|
|
33
|
+
loaded: BackendPlan[];
|
|
34
|
+
} {
|
|
35
|
+
const loaded: BackendPlan[] = [];
|
|
36
|
+
return {
|
|
37
|
+
id,
|
|
38
|
+
loaded,
|
|
39
|
+
available: async () => true,
|
|
40
|
+
load: async (plan: BackendPlan) => {
|
|
41
|
+
loaded.push(plan);
|
|
42
|
+
},
|
|
43
|
+
unload: async () => {},
|
|
44
|
+
generate: async (_args: GenerateArgs): Promise<GenerateResult> => "ok",
|
|
45
|
+
hasLoadedModel: () => loaded.length > 0,
|
|
46
|
+
currentModelPath: () => loaded.at(-1)?.modelPath ?? null,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
describe("fused eliza-1 no-regression (C4)", () => {
|
|
51
|
+
it("the catalog tier under test really is a fused-eliza1 tier", () => {
|
|
52
|
+
expect(FUSED_TIER).toBeTruthy();
|
|
53
|
+
expect(FUSED_TIER.runtimeClass).toBe("fused-eliza1");
|
|
54
|
+
expect(FUSED_TIER.runtime?.mtp).toBeUndefined();
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("decideBackend routes a fused Eliza-1 tier to the fused llama-cpp runtime", () => {
|
|
58
|
+
const decision = decideBackend({
|
|
59
|
+
override: "auto",
|
|
60
|
+
catalog: FUSED_TIER,
|
|
61
|
+
llamaCppAvailable: true,
|
|
62
|
+
});
|
|
63
|
+
expect(decision.backend).toBe("llama-cpp");
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it("decideBackend routes everything to llama-cpp — the stack is Eliza-1 only", () => {
|
|
67
|
+
// Post-#8808 cutover: there is no generic-gguf backend; every model
|
|
68
|
+
// (even an unknown catalog entry) routes to the fused llama-cpp runtime.
|
|
69
|
+
const decision = decideBackend({
|
|
70
|
+
override: "auto",
|
|
71
|
+
catalog: undefined,
|
|
72
|
+
llamaCppAvailable: true,
|
|
73
|
+
});
|
|
74
|
+
expect(decision.backend).toBe("llama-cpp");
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it("dispatcher forwards the fused full-pipeline binding (catalog + bundleRoot) to the fused backend", async () => {
|
|
78
|
+
const ffi = makeBackend("llama-cpp");
|
|
79
|
+
const dispatcher = new BackendDispatcher(
|
|
80
|
+
ffi,
|
|
81
|
+
() => true,
|
|
82
|
+
() => null,
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
const bundleRoot = "/models/eliza-1-4b";
|
|
86
|
+
const plan: BackendPlan = {
|
|
87
|
+
modelPath: `${bundleRoot}/text/eliza-1-4b-128k.gguf`,
|
|
88
|
+
modelId: "eliza-1-4b",
|
|
89
|
+
catalog: FUSED_TIER,
|
|
90
|
+
runtimeClass: "fused-eliza1",
|
|
91
|
+
overrides: {
|
|
92
|
+
bundleRoot,
|
|
93
|
+
draftModelPath: `${bundleRoot}/text/eliza-1-4b-mtp.gguf`,
|
|
94
|
+
gpuLayers: "max",
|
|
95
|
+
cacheTypeK: "tbq4_0",
|
|
96
|
+
cacheTypeV: "tbq3_0",
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
await dispatcher.load(plan);
|
|
101
|
+
|
|
102
|
+
// Routed to the fused runtime.
|
|
103
|
+
expect(ffi.loaded).toHaveLength(1);
|
|
104
|
+
expect(dispatcher.activeBackendId()).toBe("llama-cpp");
|
|
105
|
+
|
|
106
|
+
// The full-pipeline binding survives dispatch: the fused backend receives
|
|
107
|
+
// the same catalog entry plus the bundle-root and explicit drafter
|
|
108
|
+
// overrides that anchor the fused context and preserve fork KV-cache
|
|
109
|
+
// kernel settings.
|
|
110
|
+
const forwarded = ffi.loaded[0];
|
|
111
|
+
expect(forwarded.catalog).toBe(FUSED_TIER);
|
|
112
|
+
expect(forwarded.catalog?.runtime?.mtp).toBeUndefined();
|
|
113
|
+
expect(forwarded.overrides?.bundleRoot).toBe(bundleRoot);
|
|
114
|
+
expect(forwarded.overrides?.draftModelPath).toBe(
|
|
115
|
+
`${bundleRoot}/text/eliza-1-4b-mtp.gguf`,
|
|
116
|
+
);
|
|
117
|
+
expect(forwarded.overrides?.cacheTypeK).toBe("tbq4_0");
|
|
118
|
+
expect(forwarded.overrides?.cacheTypeV).toBe("tbq3_0");
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it("env-override=llama-cpp keeps a fused tier on the fused path", async () => {
|
|
122
|
+
const prev = process.env.ELIZA_INFERENCE_BACKEND;
|
|
123
|
+
process.env.ELIZA_INFERENCE_BACKEND = "llama-cpp";
|
|
124
|
+
try {
|
|
125
|
+
const ffi = makeBackend("llama-cpp");
|
|
126
|
+
const dispatcher = new BackendDispatcher(
|
|
127
|
+
ffi,
|
|
128
|
+
() => true,
|
|
129
|
+
() => null,
|
|
130
|
+
);
|
|
131
|
+
const decision = dispatcher.decide({
|
|
132
|
+
modelPath: "/models/eliza-1-4b/text/eliza-1-4b-128k.gguf",
|
|
133
|
+
modelId: "eliza-1-4b",
|
|
134
|
+
catalog: FUSED_TIER,
|
|
135
|
+
runtimeClass: "fused-eliza1",
|
|
136
|
+
});
|
|
137
|
+
expect(decision.backend).toBe("llama-cpp");
|
|
138
|
+
expect(decision.reason).toBe("env-override");
|
|
139
|
+
} finally {
|
|
140
|
+
if (prev === undefined) delete process.env.ELIZA_INFERENCE_BACKEND;
|
|
141
|
+
else process.env.ELIZA_INFERENCE_BACKEND = prev;
|
|
142
|
+
}
|
|
143
|
+
});
|
|
144
|
+
});
|
|
@@ -213,8 +213,13 @@ describe("probeHardware GPU detection", () => {
|
|
|
213
213
|
expect(probe.gpu).not.toBeNull();
|
|
214
214
|
expect(probe.gpu?.backend).toBe("cuda");
|
|
215
215
|
expect(probe.gpu?.totalVramGb).toBeGreaterThanOrEqual(23);
|
|
216
|
-
// A 24 GB discrete GPU must not be mis-tiered as a CPU box.
|
|
217
|
-
|
|
216
|
+
// A 24 GB discrete GPU must not be mis-tiered as a CPU box (POOR). MAX/GOOD
|
|
217
|
+
// additionally require >=16/>=8 GB *free* RAM at session start, which this test
|
|
218
|
+
// only mocks the GPU for; under parallel-suite memory pressure real free RAM
|
|
219
|
+
// can dip below those gates, legitimately tiering a 24 GB-GPU box to OKAY
|
|
220
|
+
// (still GPU-backed, not a CPU box). Assert "off CPU" (not POOR) -- the stated
|
|
221
|
+
// intent -- which is deterministic regardless of host free RAM.
|
|
222
|
+
expect(["MAX", "GOOD", "OKAY"]).toContain(classifyDeviceTier(probe).tier);
|
|
218
223
|
});
|
|
219
224
|
|
|
220
225
|
it("reports gpu:null when nvidia-smi is absent on a non-Apple host", async () => {
|
package/src/services/hardware.ts
CHANGED
|
@@ -13,8 +13,10 @@
|
|
|
13
13
|
import { execFileSync } from "node:child_process";
|
|
14
14
|
import fs from "node:fs";
|
|
15
15
|
import os from "node:os";
|
|
16
|
+
import path from "node:path";
|
|
16
17
|
import { detectGpu } from "./gpu-detect";
|
|
17
18
|
import type { Eliza1Backend, Eliza1DeviceCaps } from "./manifest";
|
|
19
|
+
import { elizaModelsDir } from "./paths";
|
|
18
20
|
import type {
|
|
19
21
|
CpuFeatureProbe,
|
|
20
22
|
HardwareProbe,
|
|
@@ -29,6 +31,30 @@ function bytesToGb(bytes: number): number {
|
|
|
29
31
|
return Math.round((bytes / BYTES_PER_GB) * 10) / 10;
|
|
30
32
|
}
|
|
31
33
|
|
|
34
|
+
/**
|
|
35
|
+
* Free disk space (GB) on the volume that holds the models directory. Walks up
|
|
36
|
+
* to the nearest existing ancestor before `statfs` so a not-yet-created models
|
|
37
|
+
* dir still resolves to its parent volume. Returns `undefined` when the volume
|
|
38
|
+
* cannot be stat'd (the fit check then falls back to RAM-only / mobile
|
|
39
|
+
* storage), never throws.
|
|
40
|
+
*/
|
|
41
|
+
async function probeFreeDiskGb(): Promise<number | undefined> {
|
|
42
|
+
try {
|
|
43
|
+
let dir = elizaModelsDir();
|
|
44
|
+
for (let i = 0; i < 12 && !fs.existsSync(dir); i += 1) {
|
|
45
|
+
const parent = path.dirname(dir);
|
|
46
|
+
if (parent === dir) break;
|
|
47
|
+
dir = parent;
|
|
48
|
+
}
|
|
49
|
+
const stats = await fs.promises.statfs(dir);
|
|
50
|
+
const available = stats.bavail * stats.bsize;
|
|
51
|
+
if (!Number.isFinite(available) || available < 0) return undefined;
|
|
52
|
+
return bytesToGb(available);
|
|
53
|
+
} catch {
|
|
54
|
+
return undefined;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
32
58
|
/**
|
|
33
59
|
* Pick a default bucket based on total available memory and architecture.
|
|
34
60
|
*
|
|
@@ -331,9 +357,11 @@ export async function probeHardware(): Promise<HardwareProbe> {
|
|
|
331
357
|
|
|
332
358
|
const gpu = detectProbeGpu(appleSilicon, totalRamBytes, freeRamBytes);
|
|
333
359
|
const totalRamGb = bytesToGb(totalRamBytes);
|
|
360
|
+
const freeDiskGb = await probeFreeDiskGb();
|
|
334
361
|
return {
|
|
335
362
|
totalRamGb,
|
|
336
363
|
freeRamGb: bytesToGb(freeRamBytes),
|
|
364
|
+
...(freeDiskGb !== undefined ? { freeDiskGb } : {}),
|
|
337
365
|
gpu,
|
|
338
366
|
cpuCores,
|
|
339
367
|
cpuFeatures,
|