@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/dist/actions/generate-media.d.ts +59 -0
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts +23 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts +29 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts +8 -37
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +38979 -430
- package/dist/index.js.map +217 -0
- package/dist/local-inference-routes.d.ts +47 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts +21 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes/compat-helpers.d.ts +18 -0
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts +62 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/dist/routes/index.d.ts +20 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/dist/routes/live-diarization-route.d.ts +33 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts +4 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts +16 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts +7 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts +15 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/dist/routes/transcripts-routes.d.ts +44 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts +62 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts +62 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts +77 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts +16 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/dist/runtime/index.d.ts +15 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/dist/runtime/voice-entity-binding.d.ts +113 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/dist/services/active-model.d.ts +310 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/dist/services/assignments.d.ts +84 -0
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/dist/services/backend.d.ts +440 -0
- package/dist/services/backend.d.ts.map +1 -0
- package/dist/services/bionic-host-loader.d.ts +67 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts +34 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts +206 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts +109 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts +102 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts +142 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts +188 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts +149 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/dist/services/device-tier.d.ts +133 -0
- package/dist/services/device-tier.d.ts.map +1 -0
- package/dist/services/downloader.d.ts +94 -0
- package/dist/services/downloader.d.ts.map +1 -0
- package/dist/services/engine.d.ts +579 -0
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts +17 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/dist/services/ffi-streaming-backend.d.ts +201 -0
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/dist/services/ffi-streaming-runner.d.ts +146 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts +56 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts +72 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts +63 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts +14 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts +118 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts +16 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts +58 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts +74 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts +181 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts +181 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/dist/services/index.d.ts +31 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts +132 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts +59 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts +189 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts +346 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts +96 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts +82 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/dist/services/manifest/schema.d.ts +903 -0
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/dist/services/manifest/types.d.ts +32 -0
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts +66 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/dist/services/memory-arbiter.d.ts +348 -0
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/dist/services/memory-monitor.d.ts +128 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts +130 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts +13 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts +127 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts +6 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts +124 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts +38 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts +110 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts +9 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts +111 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/dist/services/registry.d.ts +33 -0
- package/dist/services/registry.d.ts.map +1 -0
- package/dist/services/router-handler.d.ts +92 -0
- package/dist/services/router-handler.d.ts.map +1 -0
- package/dist/services/routing-policy.d.ts +92 -0
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts +8 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts +98 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/dist/services/service.d.ts +128 -0
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts +72 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts +311 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts +33 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/dist/services/types.d.ts +19 -0
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts +34 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts +8 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts +115 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts +99 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts +47 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts +71 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/dist/services/vision/index.d.ts +95 -0
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts +73 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/dist/services/vision/types.d.ts +162 -0
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts +18 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/dist/services/vision-embedding-cache.d.ts +98 -0
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts +112 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts +199 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts +170 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/dist/services/voice/embedding.d.ts +132 -0
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts +68 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/dist/services/voice/engine-bridge.d.ts +762 -0
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier.d.ts +211 -0
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/dist/services/voice/errors.d.ts +20 -0
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/dist/services/voice/expressive-tags.d.ts +158 -0
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/dist/services/voice/ffi-bindings.d.ts +696 -0
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts +181 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/index.d.ts +96 -0
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts +82 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts +30 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts +135 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/dist/services/voice/mic-source.d.ts +136 -0
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts +109 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/dist/services/voice/partial-stabilizer.d.ts +73 -0
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts +76 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts +62 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts +151 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts +216 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts +123 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts +248 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts +40 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts +24 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts +146 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/dist/services/voice/shared-resources.d.ts +204 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts +75 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts +37 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts +83 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts +73 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/dist/services/voice/transcriber.d.ts +244 -0
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts +37 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/dist/services/voice/transcript-service.d.ts +60 -0
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/dist/services/voice/transcript-store.d.ts +64 -0
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts +183 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/dist/services/voice/types.d.ts +643 -0
- package/dist/services/voice/types.d.ts.map +1 -0
- package/dist/services/voice/vad.d.ts +283 -0
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts +241 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/dist/services/voice/voice-preset-format.d.ts +158 -0
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts +83 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts +364 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/dist/services/voice/wake-word-ggml.d.ts +100 -0
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts +255 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts +240 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts +3 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +101 -15
- package/registry-entry.json +137 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +831 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.test.ts +390 -0
- package/src/local-inference-routes.ts +1625 -0
- package/src/provider.ts +1111 -0
- package/src/routes/compat-helpers.ts +275 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.ts +61 -0
- package/src/routes/live-diarization-route.test.ts +347 -0
- package/src/routes/live-diarization-route.ts +198 -0
- package/src/routes/local-inference-asr-route.test.ts +246 -0
- package/src/routes/local-inference-asr-route.ts +166 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +775 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.test.ts +195 -0
- package/src/routes/transcripts-routes.ts +191 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
- package/src/runtime/ensure-local-inference-handler.ts +1640 -0
- package/src/runtime/index.ts +36 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
- package/src/runtime/mobile-local-inference-gate.ts +99 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
- package/src/runtime/voice-entity-binding.ts +368 -0
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.ts +1416 -0
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +106 -0
- package/src/services/assignments.ts +278 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +791 -0
- package/src/services/bionic-host-loader.test.ts +226 -0
- package/src/services/bionic-host-loader.ts +252 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.test.ts +259 -0
- package/src/services/catalog.ts +33 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.test.ts +458 -0
- package/src/services/device-tier.ts +502 -0
- package/src/services/downloader.test.ts +888 -0
- package/src/services/downloader.ts +1039 -0
- package/src/services/engine-direct-bundle.test.ts +90 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.ts +2096 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +445 -0
- package/src/services/ffi-streaming-backend.ts +418 -0
- package/src/services/ffi-streaming-runner.test.ts +220 -0
- package/src/services/ffi-streaming-runner.ts +407 -0
- package/src/services/ffi-unload-ordering.test.ts +166 -0
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.test.ts +236 -0
- package/src/services/hardware.ts +438 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.ts +715 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.ts +229 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +357 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
- package/src/services/manifest/index.ts +72 -0
- package/src/services/manifest/manifest.test.ts +791 -0
- package/src/services/manifest/schema.ts +761 -0
- package/src/services/manifest/types.ts +61 -0
- package/src/services/manifest/validator.ts +633 -0
- package/src/services/memory-arbiter.test.ts +558 -0
- package/src/services/memory-arbiter.ts +991 -0
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +232 -0
- package/src/services/memory-monitor.ts +309 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.ts +86 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +164 -0
- package/src/services/ram-budget.ts +309 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.ts +157 -0
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +426 -0
- package/src/services/routing-policy.test.ts +352 -0
- package/src/services/routing-policy.ts +367 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +750 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.ts +59 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.ts +163 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +133 -0
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +139 -0
- package/src/services/voice/audio-frame-consumer.test.ts +669 -0
- package/src/services/voice/audio-frame-consumer.ts +651 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +335 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +902 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +242 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2343 -0
- package/src/services/voice/eot-classifier-ggml.ts +569 -0
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +422 -0
- package/src/services/voice/errors.ts +34 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.test.ts +735 -0
- package/src/services/voice/ffi-bindings.ts +3387 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.ts +139 -0
- package/src/services/voice/index.ts +502 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.ts +64 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +622 -0
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.ts +504 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +343 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.test.ts +195 -0
- package/src/services/voice/transcript-service.ts +205 -0
- package/src/services/voice/transcript-store.test.ts +189 -0
- package/src/services/voice/transcript-store.ts +164 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.test.ts +498 -0
- package/src/services/voice/vad.ts +832 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.test.ts +415 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +713 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +280 -0
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +367 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.ts +319 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/src/voice-workbench.ts +71 -0
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inference capability detection.
|
|
3
|
+
*
|
|
4
|
+
* Centralises "what does this device's local-inference stack expose"
|
|
5
|
+
* into one struct the runtime can read at startup. The shape mirrors
|
|
6
|
+
* the per-platform binding probes (Android + iOS + desktop FFI) so the
|
|
7
|
+
* runtime doesn't have to import each platform's adapter just to
|
|
8
|
+
* surface the bits.
|
|
9
|
+
*
|
|
10
|
+
* Consumed by:
|
|
11
|
+
* - the AOSP local-inference bootstrap, to choose the in-process FFI
|
|
12
|
+
* streaming path,
|
|
13
|
+
* - the desktop voice lifecycle service, to decide whether to wire the
|
|
14
|
+
* FFI streaming runner factory,
|
|
15
|
+
* - UI surfaces (model picker, voice toggle) that hide options the
|
|
16
|
+
* loaded build cannot honour.
|
|
17
|
+
*
|
|
18
|
+
* Naming:
|
|
19
|
+
* - `streamingLlm` — `eliza_inference_llm_stream_*` symbols are
|
|
20
|
+
* resolved and the build reports `_supported() === 1`.
|
|
21
|
+
* - `mtpSupported` — native MTP speculative decoding can actually run.
|
|
22
|
+
* - `omnivoiceStreaming` — `eliza_inference_tts_synthesize_stream` is
|
|
23
|
+
* present and supported.
|
|
24
|
+
* - `mmprojSupported` — the build carries the multi-modal projector
|
|
25
|
+
* and the device has the headroom to keep it resident.
|
|
26
|
+
* - `thermalState` — best-effort current thermal snapshot from the
|
|
27
|
+
* platform (`ProcessInfo.thermalState` on iOS,
|
|
28
|
+
* `PowerManager.getCurrentThermalStatus` on Android).
|
|
29
|
+
*
|
|
30
|
+
* All fields are read-only snapshots; the runtime re-probes on resume.
|
|
31
|
+
*/
|
|
32
|
+
export type ThermalState = "nominal" | "fair" | "serious" | "critical";
|
|
33
|
+
export interface InferenceCapabilities {
|
|
34
|
+
streamingLlm: boolean;
|
|
35
|
+
mtpSupported: boolean;
|
|
36
|
+
omnivoiceStreaming: boolean;
|
|
37
|
+
mmprojSupported: boolean;
|
|
38
|
+
thermalState: ThermalState;
|
|
39
|
+
/** Platform tag for diagnostics + routing. */
|
|
40
|
+
platform: "android" | "ios" | "desktop" | "unknown";
|
|
41
|
+
}
|
|
42
|
+
/** Minimal probe surface — what the caller hands in. */
|
|
43
|
+
export interface CapabilityProbes {
|
|
44
|
+
/** True only when `eliza_inference_llm_stream_supported()` returns 1. */
|
|
45
|
+
llmStreamSupported(): boolean;
|
|
46
|
+
/** True only when `eliza_inference_tts_stream_supported()` returns 1. */
|
|
47
|
+
ttsStreamSupported(): boolean;
|
|
48
|
+
/** True only when the native MTP path is available for the loaded model. */
|
|
49
|
+
mtpResident(): boolean;
|
|
50
|
+
/** True only when the mmproj weights are present in the bundle. */
|
|
51
|
+
mmprojResident(): boolean;
|
|
52
|
+
/** Current thermal snapshot. May return `nominal` on platforms without a thermal API. */
|
|
53
|
+
thermalState(): ThermalState;
|
|
54
|
+
/** Platform tag. */
|
|
55
|
+
platform(): "android" | "ios" | "desktop" | "unknown";
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Build a capability struct from a set of probes.
|
|
59
|
+
*
|
|
60
|
+
* Policy decisions encoded here:
|
|
61
|
+
* - Native MTP only fires when `llmStreamSupported` and the thermal state is
|
|
62
|
+
* at most `fair`.
|
|
63
|
+
* - mmproj is gated entirely on the bundle carrying it. Devices
|
|
64
|
+
* short on RAM can still load the chat model — they just lose the
|
|
65
|
+
* vision path; the picker UI uses this bit to grey out vision
|
|
66
|
+
* uploads.
|
|
67
|
+
* - omnivoice streaming is gated entirely on the FFI build: the JS
|
|
68
|
+
* side has no fallback path for streaming TTS, only for batch.
|
|
69
|
+
*/
|
|
70
|
+
export declare function probeCapabilities(probes: CapabilityProbes): InferenceCapabilities;
|
|
71
|
+
/**
|
|
72
|
+
* Defaults probe: every flag off, platform `unknown`, thermal `nominal`.
|
|
73
|
+
* Used by the runtime when no FFI binding could be loaded (cloud-only
|
|
74
|
+
* fallback path). Surfaces as a single struct the UI can render
|
|
75
|
+
* without branching on "no probe registered".
|
|
76
|
+
*/
|
|
77
|
+
export declare function defaultsForNoBinding(): InferenceCapabilities;
|
|
78
|
+
/**
|
|
79
|
+
* A live, *sampled* device-resource snapshot from the native probe
|
|
80
|
+
* (iOS `getResourceSnapshot`, Android `ResourceProbe.getResourceSnapshot`) — as
|
|
81
|
+
* opposed to the one-shot `InferenceCapabilities` probe. The Mobile Resource
|
|
82
|
+
* Workbench (issue #8800) samples these on an interval across a sustained
|
|
83
|
+
* workload to build a thermal/RSS/battery timeline. Every numeric field is
|
|
84
|
+
* `null` when the platform could not measure it — never a fabricated zero.
|
|
85
|
+
*/
|
|
86
|
+
export interface ResourceSnapshot {
|
|
87
|
+
/** Current thermal state; `"unknown"` on platforms without a thermal API. */
|
|
88
|
+
thermalState: ThermalState | "unknown";
|
|
89
|
+
/** Whether the OS low-power / battery-saver mode is engaged, or null. */
|
|
90
|
+
lowPowerMode: boolean | null;
|
|
91
|
+
/** Process resident set size in MB, or null. */
|
|
92
|
+
residentMemoryMb: number | null;
|
|
93
|
+
/** Device-wide available RAM in MB, or null. */
|
|
94
|
+
availableRamMb: number | null;
|
|
95
|
+
/** Cumulative process CPU time in ms, or null. */
|
|
96
|
+
cpuTimeMs: number | null;
|
|
97
|
+
/** Battery level 0..100, or null. */
|
|
98
|
+
batteryLevelPct: number | null;
|
|
99
|
+
/** Sample timestamp in ms (epoch). */
|
|
100
|
+
capturedAtMs: number;
|
|
101
|
+
}
|
|
102
|
+
export interface ThermalThrottleDecision {
|
|
103
|
+
/**
|
|
104
|
+
* Whether speculative decoding (MTP) should be disabled for the next step.
|
|
105
|
+
* MTP burns extra compute for a latency win; under heat that trade flips.
|
|
106
|
+
*/
|
|
107
|
+
throttleSpeculativeDecode: boolean;
|
|
108
|
+
/**
|
|
109
|
+
* Whether to proactively shed load (shrink batch / context, pause warmups)
|
|
110
|
+
* because the device is at the top of the thermal range.
|
|
111
|
+
*/
|
|
112
|
+
reduceLoad: boolean;
|
|
113
|
+
reason: string;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Decide whether to throttle on-device inference for the current thermal /
|
|
117
|
+
* power state. Pure and synchronous so the streaming path can call it per token
|
|
118
|
+
* (the `ProcessInfo.thermalState` throttle hook the iOS streaming bridge still
|
|
119
|
+
* lists as a TODO) and the workbench can assert the policy without a device.
|
|
120
|
+
*
|
|
121
|
+
* - `serious` / `critical` thermal → stop speculative decoding (matches the
|
|
122
|
+
* existing one-shot MTP gate in `probeCapabilities`).
|
|
123
|
+
* - `critical` thermal → additionally shed load.
|
|
124
|
+
* - low-power mode → stop speculative decoding (honour the user's power intent).
|
|
125
|
+
* - `unknown` thermal with no low-power signal → do not throttle (don't
|
|
126
|
+
* penalise a device that simply lacks a thermal API).
|
|
127
|
+
*/
|
|
128
|
+
export declare function thermalThrottleDecision(input: {
|
|
129
|
+
thermalState: ThermalState | "unknown";
|
|
130
|
+
lowPowerMode?: boolean | null;
|
|
131
|
+
}): ThermalThrottleDecision;
|
|
132
|
+
//# sourceMappingURL=inference-capabilities.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"inference-capabilities.d.ts","sourceRoot":"","sources":["../../src/services/inference-capabilities.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,MAAM,GAAG,SAAS,GAAG,UAAU,CAAC;AAEvE,MAAM,WAAW,qBAAqB;IACrC,YAAY,EAAE,OAAO,CAAC;IACtB,YAAY,EAAE,OAAO,CAAC;IACtB,kBAAkB,EAAE,OAAO,CAAC;IAC5B,eAAe,EAAE,OAAO,CAAC;IACzB,YAAY,EAAE,YAAY,CAAC;IAC3B,8CAA8C;IAC9C,QAAQ,EAAE,SAAS,GAAG,KAAK,GAAG,SAAS,GAAG,SAAS,CAAC;CACpD;AAED,wDAAwD;AACxD,MAAM,WAAW,gBAAgB;IAChC,yEAAyE;IACzE,kBAAkB,IAAI,OAAO,CAAC;IAC9B,yEAAyE;IACzE,kBAAkB,IAAI,OAAO,CAAC;IAC9B,4EAA4E;IAC5E,WAAW,IAAI,OAAO,CAAC;IACvB,mEAAmE;IACnE,cAAc,IAAI,OAAO,CAAC;IAC1B,0FAA0F;IAC1F,YAAY,IAAI,YAAY,CAAC;IAC7B,oBAAoB;IACpB,QAAQ,IAAI,SAAS,GAAG,KAAK,GAAG,SAAS,GAAG,SAAS,CAAC;CACtD;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,iBAAiB,CAChC,MAAM,EAAE,gBAAgB,GACtB,qBAAqB,CAqBvB;AAED;;;;;GAKG;AACH,wBAAgB,oBAAoB,IAAI,qBAAqB,CAS5D;AAMD;;;;;;;GAOG;AACH,MAAM,WAAW,gBAAgB;IAChC,6EAA6E;IAC7E,YAAY,EAAE,YAAY,GAAG,SAAS,CAAC;IACvC,yEAAyE;IACzE,YAAY,EAAE,OAAO,GAAG,IAAI,CAAC;IAC7B,gDAAgD;IAChD,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,gDAAgD;IAChD,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,kDAAkD;IAClD,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,qCAAqC;IACrC,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,sCAAsC;IACtC,YAAY,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,uBAAuB;IACvC;;;OAGG;IACH,yBAAyB,EAAE,OAAO,CAAC;IACnC;;;OAGG;IACH,UAAU,EAAE,OAAO,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;CACf;AASD;;;;;;;;;;;;GAYG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,EAAE;IAC9C,YAAY,EAAE,YAAY,GAAG,SAAS,CAAC;IACvC,YAAY,CAAC,EAAE,OAAO,GAAG,IAAI,CAAC;CAC9B,GAAG,uBAAuB,CAsB1B"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight structured-metrics sink for local-inference telemetry.
|
|
3
|
+
*
|
|
4
|
+
* This module is intentionally small: it is a logger-backed histogram with a
|
|
5
|
+
* stable, call-site-friendly `record(name, value, tags?)` API so individual
|
|
6
|
+
* backends (FFI runtime, voice scheduler, …) can emit point-in-time
|
|
7
|
+
* observations without importing a heavy metrics framework.
|
|
8
|
+
*
|
|
9
|
+
* Design constraints
|
|
10
|
+
* ------------------
|
|
11
|
+
* - No external dependencies beyond `@elizaos/core` logger (already a dep).
|
|
12
|
+
* - `record()` is synchronous and must never throw into the call site.
|
|
13
|
+
* - Each named metric keeps a bounded ring of recent samples (default 256) for
|
|
14
|
+
* in-process p50/p95 queries via `summary(name)`. This mirrors the
|
|
15
|
+
* `BoundedHistogram` pattern already used by `latency-trace.ts`.
|
|
16
|
+
* - The module exports a process-wide singleton (`inferenceTelemetry`) that
|
|
17
|
+
* all backends share, plus the class for test injection.
|
|
18
|
+
*
|
|
19
|
+
* Metric names used today
|
|
20
|
+
* -----------------------
|
|
21
|
+
* inference.ttfa_ms — time from fetch() to first HTTP chunk (L5)
|
|
22
|
+
* inference.first_token_ms — time from fetch() to first decoded token (L5)
|
|
23
|
+
* tts.chunk_size_ms — duration of one PCM chunk in ms (T2)
|
|
24
|
+
* tts.chunk_size_bytes — byte size of one PCM chunk (T2)
|
|
25
|
+
*/
|
|
26
|
+
interface HistogramSummary {
|
|
27
|
+
count: number;
|
|
28
|
+
p50: number | null;
|
|
29
|
+
p95: number | null;
|
|
30
|
+
mean: number | null;
|
|
31
|
+
}
|
|
32
|
+
export type TelemetryTags = Record<string, string | number | boolean>;
|
|
33
|
+
export declare class InferenceTelemetry {
|
|
34
|
+
private readonly rings;
|
|
35
|
+
private readonly capacity;
|
|
36
|
+
constructor(capacity?: number);
|
|
37
|
+
/**
|
|
38
|
+
* Record a scalar observation. Never throws — telemetry must be
|
|
39
|
+
* instrumentation, never a fault path.
|
|
40
|
+
*
|
|
41
|
+
* @param name Dot-separated metric name, e.g. `"inference.ttfa_ms"`.
|
|
42
|
+
* @param value Numeric value (non-finite values are silently dropped).
|
|
43
|
+
* @param tags Optional key/value labels emitted in the log line.
|
|
44
|
+
*/
|
|
45
|
+
record(name: string, value: number, tags?: TelemetryTags): void;
|
|
46
|
+
/**
|
|
47
|
+
* Summary statistics for a named metric over the retained ring of samples.
|
|
48
|
+
* Returns `{ count: 0, ... }` when the metric has never been recorded.
|
|
49
|
+
*/
|
|
50
|
+
summary(name: string): HistogramSummary;
|
|
51
|
+
/** Names of all metrics that have received at least one sample. */
|
|
52
|
+
metricNames(): string[];
|
|
53
|
+
/** Reset all retained samples. Useful in tests. */
|
|
54
|
+
reset(): void;
|
|
55
|
+
}
|
|
56
|
+
/** Process-wide singleton used by FFI runtime and voice scheduler. */
|
|
57
|
+
export declare const inferenceTelemetry: InferenceTelemetry;
|
|
58
|
+
export {};
|
|
59
|
+
//# sourceMappingURL=inference-telemetry.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"inference-telemetry.d.ts","sourceRoot":"","sources":["../../src/services/inference-telemetry.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAQH,UAAU,gBAAgB;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,GAAG,IAAI,CAAC;IACnB,GAAG,EAAE,MAAM,GAAG,IAAI,CAAC;IACnB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;CACpB;AAwCD,MAAM,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC,CAAC;AAEtE,qBAAa,kBAAkB;IAC9B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAkC;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;gBAEtB,QAAQ,SAAM;IAI1B;;;;;;;OAOG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,aAAa,GAAG,IAAI;IAoB/D;;;OAGG;IACH,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,gBAAgB;IAWvC,mEAAmE;IACnE,WAAW,IAAI,MAAM,EAAE;IAIvB,mDAAmD;IACnD,KAAK,IAAI,IAAI;CAGb;AAED,sEAAsE;AACtE,eAAO,MAAM,kBAAkB,oBAA2B,CAAC"}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* iOS streaming-LLM adapter.
|
|
3
|
+
*
|
|
4
|
+
* Mirror of the AOSP plugin's `aosp-llama-streaming.ts` for the iOS
|
|
5
|
+
* platform. Bridges the JS-side streaming surface (the same shape the
|
|
6
|
+
* `FfiStreamingRunner` expects) over the `LlamaCpp.xcframework` Swift
|
|
7
|
+
* bridge that ships with the iOS shell.
|
|
8
|
+
*
|
|
9
|
+
* Status (2026-05-12): the XCFramework currently ships without streaming-LLM
|
|
10
|
+
* support. The Swift implementation against
|
|
11
|
+
* `libelizainference.dylib` (built by `build-llama-cpp-mtp.mjs` with
|
|
12
|
+
* the `darwin-arm64-metal-fused` target) is the gating item; until then
|
|
13
|
+
* `loadIosStreamingLlmBinding` returns `null` and the runtime falls
|
|
14
|
+
* back to the cloud route.
|
|
15
|
+
*
|
|
16
|
+
* Why XCFramework + Swift bridge rather than `bun:ffi` directly:
|
|
17
|
+
* - bun:ffi runs only inside the Bun runtime. On iOS, Eliza ships the
|
|
18
|
+
* ElizaBunEngine.xcframework which embeds a Bun runtime — but the
|
|
19
|
+
* llama symbols cannot be resolved at `bun:ffi.dlopen` time on a
|
|
20
|
+
* codesigned device build (the dynamic linker forbids loading
|
|
21
|
+
* arbitrary `.dylib` from the app bundle on a real device).
|
|
22
|
+
* - The Swift bridge wraps the symbols at compile time inside the
|
|
23
|
+
* XCFramework so the codesignature covers them. Capacitor's
|
|
24
|
+
* `llama-cpp-capacitor` plugin already uses this pattern for the
|
|
25
|
+
* non-streaming surface.
|
|
26
|
+
*
|
|
27
|
+
* The Swift glue gating this binding needs to:
|
|
28
|
+
* 1. Re-export the streaming-LLM symbols from `ffi-streaming-llm.h`
|
|
29
|
+
* under the Swift module name `LlamaCpp.Streaming.*`.
|
|
30
|
+
* 2. Expose an Objective-C-bridgeable wrapper class
|
|
31
|
+
* (`LlamaStreamingSession`) so Capacitor's plugin can register
|
|
32
|
+
* methods on it.
|
|
33
|
+
* 3. Wire `ProcessInfo.thermalState` into a thermal-throttle hook so
|
|
34
|
+
* iOS can bail out of speculative decoding under sustained heat.
|
|
35
|
+
*
|
|
36
|
+
* Until that lands, this file:
|
|
37
|
+
* - declares the JS contract,
|
|
38
|
+
* - probes for `(window as any).LlamaStreaming` (the Capacitor plugin
|
|
39
|
+
* entry point Swift will expose),
|
|
40
|
+
* - logs + returns null when the bridge isn't there.
|
|
41
|
+
*/
|
|
42
|
+
export type IosLlmStreamHandle = bigint;
|
|
43
|
+
export type IosInferenceContextHandle = bigint;
|
|
44
|
+
export interface IosLlmStreamConfig {
|
|
45
|
+
maxTokens: number;
|
|
46
|
+
temperature: number;
|
|
47
|
+
topP: number;
|
|
48
|
+
topK: number;
|
|
49
|
+
repeatPenalty: number;
|
|
50
|
+
slotId: number;
|
|
51
|
+
promptCacheKey: string | null;
|
|
52
|
+
draftMin: number;
|
|
53
|
+
draftMax: number;
|
|
54
|
+
mtpDrafterPath: string | null;
|
|
55
|
+
disableThinking: boolean;
|
|
56
|
+
}
|
|
57
|
+
export interface IosLlmStreamStep {
|
|
58
|
+
tokens: number[];
|
|
59
|
+
text: string;
|
|
60
|
+
done: boolean;
|
|
61
|
+
drafterDrafted: number;
|
|
62
|
+
drafterAccepted: number;
|
|
63
|
+
}
|
|
64
|
+
export interface IosStreamingLlmBinding {
|
|
65
|
+
llmStreamSupported(): boolean;
|
|
66
|
+
llmStreamOpen(args: {
|
|
67
|
+
ctx: IosInferenceContextHandle;
|
|
68
|
+
config: IosLlmStreamConfig;
|
|
69
|
+
}): IosLlmStreamHandle;
|
|
70
|
+
llmStreamPrefill(args: {
|
|
71
|
+
stream: IosLlmStreamHandle;
|
|
72
|
+
tokens: Int32Array;
|
|
73
|
+
}): void;
|
|
74
|
+
llmStreamNext(args: {
|
|
75
|
+
stream: IosLlmStreamHandle;
|
|
76
|
+
maxTokensPerStep?: number;
|
|
77
|
+
maxTextBytes?: number;
|
|
78
|
+
}): IosLlmStreamStep;
|
|
79
|
+
llmStreamCancel(stream: IosLlmStreamHandle): void;
|
|
80
|
+
llmStreamSaveSlot(args: {
|
|
81
|
+
stream: IosLlmStreamHandle;
|
|
82
|
+
filename: string;
|
|
83
|
+
}): void;
|
|
84
|
+
llmStreamRestoreSlot(args: {
|
|
85
|
+
stream: IosLlmStreamHandle;
|
|
86
|
+
filename: string;
|
|
87
|
+
}): void;
|
|
88
|
+
llmStreamClose(stream: IosLlmStreamHandle): void;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Try to load the iOS streaming-LLM binding. Returns null when:
|
|
92
|
+
* - we are not on iOS,
|
|
93
|
+
* - the Capacitor `LlamaStreaming` plugin isn't registered,
|
|
94
|
+
* - the bridge reports `isAvailable() === false` (e.g. the XCFramework
|
|
95
|
+
* was built without streaming-LLM support).
|
|
96
|
+
*
|
|
97
|
+
* Returning null is NOT a failure — the runtime then falls back to the
|
|
98
|
+
* cloud route. Throws only when the bridge is present but
|
|
99
|
+
* mis-configured (probe-and-catch is reserved for the boot path).
|
|
100
|
+
*/
|
|
101
|
+
export declare function loadIosStreamingLlmBinding(): Promise<IosStreamingLlmBinding | null>;
|
|
102
|
+
export interface IosInferenceCapabilities {
|
|
103
|
+
/** True only when the iOS Swift bridge is present AND reports streaming-LLM. */
|
|
104
|
+
streamingLlm: boolean;
|
|
105
|
+
/** Always false on iOS until the drafter weights ship in the bundle. */
|
|
106
|
+
mtpSupported: boolean;
|
|
107
|
+
/** Whether the XCFramework reports omnivoice streaming. */
|
|
108
|
+
omnivoiceStreaming: boolean;
|
|
109
|
+
/** Phone-tier iOS devices rarely have headroom for mmproj. */
|
|
110
|
+
mmprojSupported: boolean;
|
|
111
|
+
/**
|
|
112
|
+
* `ProcessInfo.thermalState` snapshot at probe time. Surfaced so the
|
|
113
|
+
* runtime can refuse to start speculative decoding when the device is
|
|
114
|
+
* already in `serious` / `critical`. Always `nominal` on the
|
|
115
|
+
* web/sim fallback path.
|
|
116
|
+
*/
|
|
117
|
+
thermalState: "nominal" | "fair" | "serious" | "critical";
|
|
118
|
+
}
|
|
119
|
+
//# sourceMappingURL=ios-llama-streaming.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ios-llama-streaming.d.ts","sourceRoot":"","sources":["../../src/services/ios-llama-streaming.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwCG;AASH,MAAM,MAAM,kBAAkB,GAAG,MAAM,CAAC;AACxC,MAAM,MAAM,yBAAyB,GAAG,MAAM,CAAC;AAE/C,MAAM,WAAW,kBAAkB;IAClC,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,EAAE,MAAM,CAAC;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,eAAe,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,OAAO,CAAC;IACd,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,sBAAsB;IACtC,kBAAkB,IAAI,OAAO,CAAC;IAC9B,aAAa,CAAC,IAAI,EAAE;QACnB,GAAG,EAAE,yBAAyB,CAAC;QAC/B,MAAM,EAAE,kBAAkB,CAAC;KAC3B,GAAG,kBAAkB,CAAC;IACvB,gBAAgB,CAAC,IAAI,EAAE;QACtB,MAAM,EAAE,kBAAkB,CAAC;QAC3B,MAAM,EAAE,UAAU,CAAC;KACnB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,IAAI,EAAE;QACnB,MAAM,EAAE,kBAAkB,CAAC;QAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,YAAY,CAAC,EAAE,MAAM,CAAC;KACtB,GAAG,gBAAgB,CAAC;IACrB,eAAe,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI,CAAC;IAClD,iBAAiB,CAAC,IAAI,EAAE;QACvB,MAAM,EAAE,kBAAkB,CAAC;QAC3B,QAAQ,EAAE,MAAM,CAAC;KACjB,GAAG,IAAI,CAAC;IACT,oBAAoB,CAAC,IAAI,EAAE;QAC1B,MAAM,EAAE,kBAAkB,CAAC;QAC3B,QAAQ,EAAE,MAAM,CAAC;KACjB,GAAG,IAAI,CAAC;IACT,cAAc,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI,CAAC;CACjD;AA8DD;;;;;;;;;;GAUG;AACH,wBAAsB,0BAA0B,IAAI,OAAO,CAAC,sBAAsB,GAAG,IAAI,CAAC,CAuBzF;AAoCD,MAAM,WAAW,wBAAwB;IACxC,gFAAgF;IAChF,YAAY,EAAE,OAAO,CAAC;IACtB,wEAAwE;IACxE,YAAY,EAAE,OAAO,CAAC;IACtB,2DAA2D;IAC3D,kBAAkB,EAAE,OAAO,CAAC;IAC5B,8DAA8D;IAC9D,eAAe,EAAE,OAAO,CAAC;IACzB;;;;;OAKG;IACH,YAAY,EAAE,SAAS,GAAG,MAAM,GAAG,SAAS,GAAG,UAAU,CAAC;CAC1D"}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CPU-offloaded KV-cache spill policy.
|
|
3
|
+
*
|
|
4
|
+
* packages/inference/AGENTS.md §3 item 7 mandates that for context > 64k on a
|
|
5
|
+
* device whose RAM cannot hold the full KV cache, the runtime MUST implement
|
|
6
|
+
* *spill* — keep the hot KV pages resident, page the cold ones out to CPU RAM
|
|
7
|
+
* (or, when even that is insufficient, to disk) — rather than refusing the
|
|
8
|
+
* request. AGENTS.md §3 "Failure handling" is equally explicit that the spill
|
|
9
|
+
* is gated by a real latency budget: a device where paging the cold KV back in
|
|
10
|
+
* would miss the voice first-audio-latency target must HARD-FAIL with a
|
|
11
|
+
* structured error, not silently serve a slow session.
|
|
12
|
+
*
|
|
13
|
+
* This module is the policy core. It is pure arithmetic — no llama-server
|
|
14
|
+
* process management, no native binding. `ffi-streaming-backend.ts` consults
|
|
15
|
+
* `planKvSpill()` at activation time:
|
|
16
|
+
* - `mode: "resident"` → no spill needed; load normally.
|
|
17
|
+
* - `mode: "spill"` → pass the resulting `residentPages` /
|
|
18
|
+
* `spillBytes` / tier ("cpu" | "disk") down to the
|
|
19
|
+
* backend as a `--kv-spill` hint.
|
|
20
|
+
* - `mode: "unsupported"` → throw `KvSpillUnsupportedError` so the engine
|
|
21
|
+
* surfaces a structured 4xx to the UI.
|
|
22
|
+
*
|
|
23
|
+
* Model parameters (page size, per-page bandwidth, voice latency budget) are
|
|
24
|
+
* documented constants below — the only "measured" inputs are the device's
|
|
25
|
+
* memory bandwidth class and the KV geometry of the loaded bundle. We do not
|
|
26
|
+
* pretend to micro-benchmark the disk here; the bandwidth tiers are coarse
|
|
27
|
+
* and conservative, and the gate fails *closed*.
|
|
28
|
+
*/
|
|
29
|
+
import type { RamBudget } from "./types";
|
|
30
|
+
/** Context length below which spill never applies (AGENTS.md §3 item 7). */
|
|
31
|
+
export declare const KV_SPILL_MIN_CONTEXT = 65536;
|
|
32
|
+
/**
|
|
33
|
+
* KV-cache page granularity, in tokens. The runtime evicts/restores KV in
|
|
34
|
+
* page units, not per-token, so spill accounting is page-aligned. 256 tokens
|
|
35
|
+
* is the buun-llama-cpp fork's default `--kv-page-size` for the spillable
|
|
36
|
+
* cache; keep this in sync if that default changes.
|
|
37
|
+
*/
|
|
38
|
+
export declare const KV_PAGE_TOKENS = 256;
|
|
39
|
+
/**
|
|
40
|
+
* First-audio-latency budget for voice mode, in milliseconds. The streaming
|
|
41
|
+
* contract (AGENTS.md §4) wants the phrase chunker handing the first chunk to
|
|
42
|
+
* TTS inside a scheduler tick; a cold KV restore at decode time eats directly
|
|
43
|
+
* into this budget. If the worst-case restore for the spilled pages exceeds
|
|
44
|
+
* this, spill is not viable for a voice-enabled bundle and we hard-fail.
|
|
45
|
+
*
|
|
46
|
+
* Text-only bundles get the looser `KV_SPILL_TEXT_LATENCY_BUDGET_MS`.
|
|
47
|
+
*/
|
|
48
|
+
export declare const KV_SPILL_VOICE_LATENCY_BUDGET_MS = 200;
|
|
49
|
+
export declare const KV_SPILL_TEXT_LATENCY_BUDGET_MS = 1500;
|
|
50
|
+
/**
|
|
51
|
+
* Effective KV transfer bandwidth back into the attention kernel, by storage
|
|
52
|
+
* tier and host class, in bytes per millisecond (≈ GB/s). Conservative — the
|
|
53
|
+
* gate fails closed, so under-estimating bandwidth only makes us refuse more
|
|
54
|
+
* aggressively, never serve something too slow.
|
|
55
|
+
*
|
|
56
|
+
* - `cpu`/`apple` : Apple Silicon shared memory — "spilling to CPU" is
|
|
57
|
+
* mostly an accounting move (same physical RAM, different
|
|
58
|
+
* residency bookkeeping); effective restore bandwidth is
|
|
59
|
+
* high.
|
|
60
|
+
* - `cpu`/`pcie` : discrete-GPU x86 — cold KV pages live in host RAM and
|
|
61
|
+
* ride the PCIe bus back to VRAM. PCIe 4.0 x16 ≈ 25 GB/s
|
|
62
|
+
* after framing; we budget 12.
|
|
63
|
+
* - `disk`/`nvme` : NVMe SSD — sequential read ≈ 3 GB/s; we budget 1.5.
|
|
64
|
+
* - `disk`/`sata` : SATA SSD / spinning rust fallback — ≈ 0.4 GB/s; we
|
|
65
|
+
* budget 0.25. (Mostly here so the math is defined; in
|
|
66
|
+
* practice this tier fails the gate immediately.)
|
|
67
|
+
*/
|
|
68
|
+
declare const KV_RESTORE_BANDWIDTH_BYTES_PER_MS: {
|
|
69
|
+
readonly "cpu-apple": 40000000;
|
|
70
|
+
readonly "cpu-pcie": 12000000;
|
|
71
|
+
readonly "disk-nvme": 1500000;
|
|
72
|
+
readonly "disk-sata": 250000;
|
|
73
|
+
};
|
|
74
|
+
export type KvRestoreClass = keyof typeof KV_RESTORE_BANDWIDTH_BYTES_PER_MS;
|
|
75
|
+
/**
|
|
76
|
+
* Per-token KV-cache footprint of a loaded bundle, summed across all
|
|
77
|
+
* full-attention layers, for the *quantized* cache it actually ships with
|
|
78
|
+
* (QJL K + PolarQuant/TurboQuant V — see packages/training/AGENTS.md §3).
|
|
79
|
+
* Callers derive this from the bundle's manifest / catalog runtime block;
|
|
80
|
+
* `estimateQuantizedKvBytesPerToken()` is the fallback when only the param
|
|
81
|
+
* count is known.
|
|
82
|
+
*/
|
|
83
|
+
export interface KvGeometry {
|
|
84
|
+
/** Bytes of compressed KV the cache grows by, per generated token. */
|
|
85
|
+
bytesPerToken: number;
|
|
86
|
+
/** True when the loaded bundle has voice enabled (tighter latency gate). */
|
|
87
|
+
voiceEnabled: boolean;
|
|
88
|
+
}
|
|
89
|
+
export declare function estimateQuantizedKvBytesPerToken(params: string): number;
|
|
90
|
+
/**
|
|
91
|
+
* Where the spilled pages land. `"cpu"` = host RAM (still RAM, just not
|
|
92
|
+
* counted against the resident budget); `"disk"` = the local-inference cache
|
|
93
|
+
* directory on persistent storage.
|
|
94
|
+
*/
|
|
95
|
+
export type KvSpillTier = "cpu" | "disk";
|
|
96
|
+
export interface KvSpillPlanResident {
|
|
97
|
+
mode: "resident";
|
|
98
|
+
/** The whole KV cache fits in the resident budget; nothing spills. */
|
|
99
|
+
totalKvBytes: number;
|
|
100
|
+
residentBytes: number;
|
|
101
|
+
}
|
|
102
|
+
export interface KvSpillPlanSpill {
|
|
103
|
+
mode: "spill";
|
|
104
|
+
tier: KvSpillTier;
|
|
105
|
+
/** Pages kept resident (the hot tail of the context). */
|
|
106
|
+
residentPages: number;
|
|
107
|
+
/** Pages paged out to `tier`. */
|
|
108
|
+
spillPages: number;
|
|
109
|
+
/** Bytes of KV held resident. */
|
|
110
|
+
residentBytes: number;
|
|
111
|
+
/** Bytes of KV spilled to `tier`. */
|
|
112
|
+
spillBytes: number;
|
|
113
|
+
/** Total compressed KV footprint at full context. */
|
|
114
|
+
totalKvBytes: number;
|
|
115
|
+
/** Worst-case latency to restore one cold page, in ms. */
|
|
116
|
+
worstCaseRestoreMs: number;
|
|
117
|
+
/** The latency budget this plan was checked against, in ms. */
|
|
118
|
+
latencyBudgetMs: number;
|
|
119
|
+
}
|
|
120
|
+
export type KvSpillPlan = KvSpillPlanResident | KvSpillPlanSpill;
|
|
121
|
+
/**
|
|
122
|
+
* Structured error thrown when spill cannot meet the latency budget. The
|
|
123
|
+
* engine catches this and surfaces it to the UI as a 4xx with `code` and
|
|
124
|
+
* `details` intact — there is NO silent-slow fallback (AGENTS.md §3).
|
|
125
|
+
*/
|
|
126
|
+
export declare class KvSpillUnsupportedError extends Error {
|
|
127
|
+
readonly code = "kv-spill-unsupported";
|
|
128
|
+
readonly details: {
|
|
129
|
+
requestedContext: number;
|
|
130
|
+
totalKvBytes: number;
|
|
131
|
+
residentBytes: number;
|
|
132
|
+
spillBytes: number;
|
|
133
|
+
worstCaseRestoreMs: number;
|
|
134
|
+
latencyBudgetMs: number;
|
|
135
|
+
restoreClass: KvRestoreClass;
|
|
136
|
+
voiceEnabled: boolean;
|
|
137
|
+
};
|
|
138
|
+
constructor(details: KvSpillUnsupportedError["details"]);
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Inputs to `planKvSpill`. `residentKvBudgetBytes` is the slice of the RAM
|
|
142
|
+
* budget the runtime is willing to hand to the *resident* KV cache after
|
|
143
|
+
* weights + activations + the TTS/ASR working sets are accounted for; callers
|
|
144
|
+
* derive it from `RamBudget` via `residentKvBudgetFromRamBudget()`.
|
|
145
|
+
*/
|
|
146
|
+
export interface KvSpillInput {
|
|
147
|
+
requestedContext: number;
|
|
148
|
+
geometry: KvGeometry;
|
|
149
|
+
residentKvBudgetBytes: number;
|
|
150
|
+
restoreClass: KvRestoreClass;
|
|
151
|
+
/**
|
|
152
|
+
* True when the host can spill to CPU RAM (host RAM available beyond the
|
|
153
|
+
* resident budget). When false the spill tier degrades to `"disk"`.
|
|
154
|
+
*/
|
|
155
|
+
cpuSpillAvailable: boolean;
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Slice the resident-KV budget out of a model's `RamBudget`. The recommended
|
|
159
|
+
* budget covers weights + activations + voice working sets + KV; we reserve a
|
|
160
|
+
* fixed fraction for KV. This mirrors what `recommendation.ts` already assumes
|
|
161
|
+
* implicitly when it sizes tiers — kept as one constant so the spill policy
|
|
162
|
+
* and the recommender agree.
|
|
163
|
+
*/
|
|
164
|
+
export declare const RESIDENT_KV_BUDGET_FRACTION = 0.25;
|
|
165
|
+
export declare function residentKvBudgetFromRamBudget(budget: RamBudget): number;
|
|
166
|
+
/**
|
|
167
|
+
* Decide the KV-cache placement for a requested context.
|
|
168
|
+
*
|
|
169
|
+
* Returns `{ mode: "resident" }` when the whole compressed KV fits the
|
|
170
|
+
* resident budget; `{ mode: "spill", ... }` when it fits with paging and the
|
|
171
|
+
* cold-page restore stays inside the latency budget; throws
|
|
172
|
+
* `KvSpillUnsupportedError` when spill would miss the budget.
|
|
173
|
+
*
|
|
174
|
+
* Below `KV_SPILL_MIN_CONTEXT` this is always `{ mode: "resident" }` — there
|
|
175
|
+
* is no spill at short context, by contract.
|
|
176
|
+
*/
|
|
177
|
+
export declare function planKvSpill(input: KvSpillInput): KvSpillPlan;
|
|
178
|
+
/**
|
|
179
|
+
* Map a `HardwareProbe`-shaped descriptor to the KV restore bandwidth class.
|
|
180
|
+
* Apple Silicon → unified-memory class; discrete-GPU x86 → PCIe class;
|
|
181
|
+
* CPU-only → NVMe class (no GPU to page back to, so "restore" is a host-RAM
|
|
182
|
+
* memcpy bounded by the same order as a fast SSD on the conservative side).
|
|
183
|
+
*/
|
|
184
|
+
export declare function restoreClassForHardware(input: {
|
|
185
|
+
appleSilicon: boolean;
|
|
186
|
+
hasDiscreteGpu: boolean;
|
|
187
|
+
}): KvRestoreClass;
|
|
188
|
+
export {};
|
|
189
|
+
//# sourceMappingURL=kv-spill.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"kv-spill.d.ts","sourceRoot":"","sources":["../../src/services/kv-spill.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEzC,4EAA4E;AAC5E,eAAO,MAAM,oBAAoB,QAAQ,CAAC;AAE1C;;;;;GAKG;AACH,eAAO,MAAM,cAAc,MAAM,CAAC;AAElC;;;;;;;;GAQG;AACH,eAAO,MAAM,gCAAgC,MAAM,CAAC;AACpD,eAAO,MAAM,+BAA+B,OAAO,CAAC;AAEpD;;;;;;;;;;;;;;;;;GAiBG;AACH,QAAA,MAAM,iCAAiC;;;;;CAK7B,CAAC;AAEX,MAAM,MAAM,cAAc,GAAG,MAAM,OAAO,iCAAiC,CAAC;AAE5E;;;;;;;GAOG;AACH,MAAM,WAAW,UAAU;IAC1B,sEAAsE;IACtE,aAAa,EAAE,MAAM,CAAC;IACtB,4EAA4E;IAC5E,YAAY,EAAE,OAAO,CAAC;CACtB;AAoBD,wBAAgB,gCAAgC,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAOvE;AAED;;;;GAIG;AACH,MAAM,MAAM,WAAW,GAAG,KAAK,GAAG,MAAM,CAAC;AAEzC,MAAM,WAAW,mBAAmB;IACnC,IAAI,EAAE,UAAU,CAAC;IACjB,sEAAsE;IACtE,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,gBAAgB;IAChC,IAAI,EAAE,OAAO,CAAC;IACd,IAAI,EAAE,WAAW,CAAC;IAClB,yDAAyD;IACzD,aAAa,EAAE,MAAM,CAAC;IACtB,iCAAiC;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,iCAAiC;IACjC,aAAa,EAAE,MAAM,CAAC;IACtB,qCAAqC;IACrC,UAAU,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,YAAY,EAAE,MAAM,CAAC;IACrB,0DAA0D;IAC1D,kBAAkB,EAAE,MAAM,CAAC;IAC3B,+DAA+D;IAC/D,eAAe,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,MAAM,WAAW,GAAG,mBAAmB,GAAG,gBAAgB,CAAC;AAEjE;;;;GAIG;AACH,qBAAa,uBAAwB,SAAQ,KAAK;IACjD,QAAQ,CAAC,IAAI,0BAA0B;IACvC,QAAQ,CAAC,OAAO,EAAE;QACjB,gBAAgB,EAAE,MAAM,CAAC;QACzB,YAAY,EAAE,MAAM,CAAC;QACrB,aAAa,EAAE,MAAM,CAAC;QACtB,UAAU,EAAE,MAAM,CAAC;QACnB,kBAAkB,EAAE,MAAM,CAAC;QAC3B,eAAe,EAAE,MAAM,CAAC;QACxB,YAAY,EAAE,cAAc,CAAC;QAC7B,YAAY,EAAE,OAAO,CAAC;KACtB,CAAC;gBAEU,OAAO,EAAE,uBAAuB,CAAC,SAAS,CAAC;CAgBvD;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,UAAU,CAAC;IACrB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,YAAY,EAAE,cAAc,CAAC;IAC7B;;;OAGG;IACH,iBAAiB,EAAE,OAAO,CAAC;CAC3B;AAED;;;;;;GAMG;AACH,eAAO,MAAM,2BAA2B,OAAO,CAAC;AAEhD,wBAAgB,6BAA6B,CAAC,MAAM,EAAE,SAAS,GAAG,MAAM,CAIvE;AAMD;;;;;;;;;;GAUG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,YAAY,GAAG,WAAW,CAmG5D;AAED;;;;;GAKG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,EAAE;IAC9C,YAAY,EAAE,OAAO,CAAC;IACtB,cAAc,EAAE,OAAO,CAAC;CACxB,GAAG,cAAc,CAIjB"}
|