@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/dist/actions/generate-media.d.ts +59 -0
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts +23 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts +29 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts +8 -37
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +38979 -430
- package/dist/index.js.map +217 -0
- package/dist/local-inference-routes.d.ts +47 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts +21 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes/compat-helpers.d.ts +18 -0
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts +62 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/dist/routes/index.d.ts +20 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/dist/routes/live-diarization-route.d.ts +33 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts +4 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts +16 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts +7 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts +15 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/dist/routes/transcripts-routes.d.ts +44 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts +62 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts +62 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts +77 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts +16 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/dist/runtime/index.d.ts +15 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/dist/runtime/voice-entity-binding.d.ts +113 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/dist/services/active-model.d.ts +310 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/dist/services/assignments.d.ts +84 -0
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/dist/services/backend.d.ts +440 -0
- package/dist/services/backend.d.ts.map +1 -0
- package/dist/services/bionic-host-loader.d.ts +67 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts +34 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts +206 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts +109 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts +102 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts +142 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts +188 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts +149 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/dist/services/device-tier.d.ts +133 -0
- package/dist/services/device-tier.d.ts.map +1 -0
- package/dist/services/downloader.d.ts +94 -0
- package/dist/services/downloader.d.ts.map +1 -0
- package/dist/services/engine.d.ts +579 -0
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts +17 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/dist/services/ffi-streaming-backend.d.ts +201 -0
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/dist/services/ffi-streaming-runner.d.ts +146 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts +56 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts +72 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts +63 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts +14 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts +118 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts +16 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts +58 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts +74 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts +181 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts +181 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/dist/services/index.d.ts +31 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts +132 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts +59 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts +189 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts +346 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts +96 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts +82 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/dist/services/manifest/schema.d.ts +903 -0
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/dist/services/manifest/types.d.ts +32 -0
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts +66 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/dist/services/memory-arbiter.d.ts +348 -0
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/dist/services/memory-monitor.d.ts +128 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts +130 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts +13 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts +127 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts +6 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts +124 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts +38 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts +110 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts +9 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts +111 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/dist/services/registry.d.ts +33 -0
- package/dist/services/registry.d.ts.map +1 -0
- package/dist/services/router-handler.d.ts +92 -0
- package/dist/services/router-handler.d.ts.map +1 -0
- package/dist/services/routing-policy.d.ts +92 -0
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts +8 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts +98 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/dist/services/service.d.ts +128 -0
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts +72 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts +311 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts +33 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/dist/services/types.d.ts +19 -0
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts +34 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts +8 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts +115 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts +99 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts +47 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts +71 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/dist/services/vision/index.d.ts +95 -0
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts +73 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/dist/services/vision/types.d.ts +162 -0
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts +18 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/dist/services/vision-embedding-cache.d.ts +98 -0
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts +112 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts +199 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts +170 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/dist/services/voice/embedding.d.ts +132 -0
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts +68 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/dist/services/voice/engine-bridge.d.ts +762 -0
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier.d.ts +211 -0
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/dist/services/voice/errors.d.ts +20 -0
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/dist/services/voice/expressive-tags.d.ts +158 -0
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/dist/services/voice/ffi-bindings.d.ts +696 -0
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts +181 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/index.d.ts +96 -0
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts +82 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts +30 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts +135 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/dist/services/voice/mic-source.d.ts +136 -0
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts +109 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/dist/services/voice/partial-stabilizer.d.ts +73 -0
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts +76 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts +62 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts +151 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts +216 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts +123 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts +248 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts +40 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts +24 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts +146 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/dist/services/voice/shared-resources.d.ts +204 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts +75 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts +37 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts +83 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts +73 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/dist/services/voice/transcriber.d.ts +244 -0
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts +37 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/dist/services/voice/transcript-service.d.ts +60 -0
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/dist/services/voice/transcript-store.d.ts +64 -0
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts +183 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/dist/services/voice/types.d.ts +643 -0
- package/dist/services/voice/types.d.ts.map +1 -0
- package/dist/services/voice/vad.d.ts +283 -0
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts +241 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/dist/services/voice/voice-preset-format.d.ts +158 -0
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts +83 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts +364 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/dist/services/voice/wake-word-ggml.d.ts +100 -0
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts +255 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts +240 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts +3 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +101 -15
- package/registry-entry.json +137 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +831 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.test.ts +390 -0
- package/src/local-inference-routes.ts +1625 -0
- package/src/provider.ts +1111 -0
- package/src/routes/compat-helpers.ts +275 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.ts +61 -0
- package/src/routes/live-diarization-route.test.ts +347 -0
- package/src/routes/live-diarization-route.ts +198 -0
- package/src/routes/local-inference-asr-route.test.ts +246 -0
- package/src/routes/local-inference-asr-route.ts +166 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +775 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.test.ts +195 -0
- package/src/routes/transcripts-routes.ts +191 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
- package/src/runtime/ensure-local-inference-handler.ts +1640 -0
- package/src/runtime/index.ts +36 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
- package/src/runtime/mobile-local-inference-gate.ts +99 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
- package/src/runtime/voice-entity-binding.ts +368 -0
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.ts +1416 -0
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +106 -0
- package/src/services/assignments.ts +278 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +791 -0
- package/src/services/bionic-host-loader.test.ts +226 -0
- package/src/services/bionic-host-loader.ts +252 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.test.ts +259 -0
- package/src/services/catalog.ts +33 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.test.ts +458 -0
- package/src/services/device-tier.ts +502 -0
- package/src/services/downloader.test.ts +888 -0
- package/src/services/downloader.ts +1039 -0
- package/src/services/engine-direct-bundle.test.ts +90 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.ts +2096 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +445 -0
- package/src/services/ffi-streaming-backend.ts +418 -0
- package/src/services/ffi-streaming-runner.test.ts +220 -0
- package/src/services/ffi-streaming-runner.ts +407 -0
- package/src/services/ffi-unload-ordering.test.ts +166 -0
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.test.ts +236 -0
- package/src/services/hardware.ts +438 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.ts +715 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.ts +229 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +357 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
- package/src/services/manifest/index.ts +72 -0
- package/src/services/manifest/manifest.test.ts +791 -0
- package/src/services/manifest/schema.ts +761 -0
- package/src/services/manifest/types.ts +61 -0
- package/src/services/manifest/validator.ts +633 -0
- package/src/services/memory-arbiter.test.ts +558 -0
- package/src/services/memory-arbiter.ts +991 -0
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +232 -0
- package/src/services/memory-monitor.ts +309 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.ts +86 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +164 -0
- package/src/services/ram-budget.ts +309 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.ts +157 -0
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +426 -0
- package/src/services/routing-policy.test.ts +352 -0
- package/src/services/routing-policy.ts +367 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +750 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.ts +59 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.ts +163 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +133 -0
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +139 -0
- package/src/services/voice/audio-frame-consumer.test.ts +669 -0
- package/src/services/voice/audio-frame-consumer.ts +651 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +335 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +902 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +242 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2343 -0
- package/src/services/voice/eot-classifier-ggml.ts +569 -0
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +422 -0
- package/src/services/voice/errors.ts +34 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.test.ts +735 -0
- package/src/services/voice/ffi-bindings.ts +3387 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.ts +139 -0
- package/src/services/voice/index.ts +502 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.ts +64 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +622 -0
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.ts +504 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +343 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.test.ts +195 -0
- package/src/services/voice/transcript-service.ts +205 -0
- package/src/services/voice/transcript-store.test.ts +189 -0
- package/src/services/voice/transcript-store.ts +164 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.test.ts +498 -0
- package/src/services/voice/vad.ts +832 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.test.ts +415 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +713 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +280 -0
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +367 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.ts +319 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/src/voice-workbench.ts +71 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conversation registry for the local-inference path.
|
|
3
|
+
*
|
|
4
|
+
* Today's slot allocation is purely a hash function: `deriveSlotId` maps a
|
|
5
|
+
* `promptCacheKey` (or any stable string) to `slot_id` in `[0, parallel)`.
|
|
6
|
+
* That works for one-shot calls but breaks for long agentic loops:
|
|
7
|
+
*
|
|
8
|
+
* - Two distinct conversations whose cache keys hash to the same slot
|
|
9
|
+
* evict each other's KV every turn (slot thrashing).
|
|
10
|
+
* - The current high-water mark of concurrent conversations is invisible,
|
|
11
|
+
* so `--parallel N` cannot be tuned to fit.
|
|
12
|
+
* - There is no notion of an explicit "I am still using this slot" lease,
|
|
13
|
+
* so eviction is purely best-effort.
|
|
14
|
+
*
|
|
15
|
+
* This registry keeps a per-conversation reservation. `openConversation`
|
|
16
|
+
* picks the lowest-loaded slot and pins the conversation to it; subsequent
|
|
17
|
+
* `generateInConversation` calls always land on the same slot. When the
|
|
18
|
+
* pool is full, slot reuse falls back to the same-as-before hash policy
|
|
19
|
+
* (two leases on the same slot still serialise correctly via the dispatcher's
|
|
20
|
+
* generation queue).
|
|
21
|
+
*
|
|
22
|
+
* The registry tracks the high-water mark of concurrently-open conversations
|
|
23
|
+
* so the engine can warn, or later restart llama-server with a higher
|
|
24
|
+
* --parallel, when the load outgrows the configured slot count.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { createHash } from "node:crypto";
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Opaque handle returned by `openConversation`. Callers MUST treat this as
|
|
31
|
+
* opaque — the registry owns the slot id and lifetime.
|
|
32
|
+
*/
|
|
33
|
+
export interface ConversationHandle {
|
|
34
|
+
readonly conversationId: string;
|
|
35
|
+
readonly modelId: string;
|
|
36
|
+
/**
|
|
37
|
+
* Pinned slot id in `[0, parallel)`, or `-1` when slot pinning is disabled
|
|
38
|
+
* (parallel <= 0). Used by both backends as the cache key:
|
|
39
|
+
* - llama-server: forwarded as `slot_id` in the request payload.
|
|
40
|
+
* - node-llama-cpp: combined with the conversation id to derive the
|
|
41
|
+
* session-pool key so identical conversations share a session.
|
|
42
|
+
*/
|
|
43
|
+
readonly slotId: number;
|
|
44
|
+
/** Wall-clock ms when the handle was opened. */
|
|
45
|
+
readonly openedAtMs: number;
|
|
46
|
+
/** Wall-clock ms when the handle was last touched (open or generate). */
|
|
47
|
+
lastUsedMs: number;
|
|
48
|
+
/** TTL after which the registry MAY auto-close on the next sweep. */
|
|
49
|
+
readonly ttlMs: number;
|
|
50
|
+
/** True when `closeConversation` has been called; further use is rejected. */
|
|
51
|
+
closed: boolean;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface OpenConversationArgs {
|
|
55
|
+
conversationId: string;
|
|
56
|
+
modelId: string;
|
|
57
|
+
/** Slot count from the running server (`--parallel N`). Defaults to 1. */
|
|
58
|
+
parallel?: number;
|
|
59
|
+
/**
|
|
60
|
+
* TTL after which the handle is considered idle and may be auto-closed
|
|
61
|
+
* by `evictIdle`. Defaults to 60 minutes — long enough for an LLM call
|
|
62
|
+
* to finish even on a slow drafter, short enough to recover from forgotten
|
|
63
|
+
* close calls within the long-cache window.
|
|
64
|
+
*/
|
|
65
|
+
ttlMs?: number;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const DEFAULT_HANDLE_TTL_MS = 60 * 60 * 1000;
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* In-memory registry of open conversation handles. A single instance is
|
|
72
|
+
* shared by the engine; each backend reads from it on every generate to
|
|
73
|
+
* decide which slot to pin to.
|
|
74
|
+
*/
|
|
75
|
+
export class ConversationRegistry {
|
|
76
|
+
private readonly handles = new Map<string, ConversationHandle>();
|
|
77
|
+
/** Per-slot reference count; lowest-loaded slot wins on next open. */
|
|
78
|
+
private readonly slotLoad = new Map<number, number>();
|
|
79
|
+
/** Largest concurrent open count seen; the engine reads this for parallel auto-tune. */
|
|
80
|
+
private highWaterMark = 0;
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Lookup / open a conversation handle. Idempotent for the same
|
|
84
|
+
* conversation id + model id; callers can call this on every turn
|
|
85
|
+
* without leaking handles. When the call is reusing an existing handle,
|
|
86
|
+
* `lastUsedMs` is bumped for LRU-style eviction tracking.
|
|
87
|
+
*/
|
|
88
|
+
open(args: OpenConversationArgs): ConversationHandle {
|
|
89
|
+
if (!args.conversationId) {
|
|
90
|
+
throw new Error("[conversation-registry] conversationId is required");
|
|
91
|
+
}
|
|
92
|
+
if (!args.modelId) {
|
|
93
|
+
throw new Error("[conversation-registry] modelId is required");
|
|
94
|
+
}
|
|
95
|
+
const compositeKey = this.compositeKey(args.conversationId, args.modelId);
|
|
96
|
+
const existing = this.handles.get(compositeKey);
|
|
97
|
+
if (existing && !existing.closed) {
|
|
98
|
+
existing.lastUsedMs = Date.now();
|
|
99
|
+
return existing;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const parallel =
|
|
103
|
+
typeof args.parallel === "number" && args.parallel > 0
|
|
104
|
+
? Math.floor(args.parallel)
|
|
105
|
+
: 1;
|
|
106
|
+
const slotId = this.pickLowestLoadedSlot(parallel, args.conversationId);
|
|
107
|
+
const now = Date.now();
|
|
108
|
+
const handle: ConversationHandle = {
|
|
109
|
+
conversationId: args.conversationId,
|
|
110
|
+
modelId: args.modelId,
|
|
111
|
+
slotId,
|
|
112
|
+
openedAtMs: now,
|
|
113
|
+
lastUsedMs: now,
|
|
114
|
+
ttlMs: args.ttlMs ?? DEFAULT_HANDLE_TTL_MS,
|
|
115
|
+
closed: false,
|
|
116
|
+
};
|
|
117
|
+
this.handles.set(compositeKey, handle);
|
|
118
|
+
this.slotLoad.set(slotId, (this.slotLoad.get(slotId) ?? 0) + 1);
|
|
119
|
+
if (this.handles.size > this.highWaterMark) {
|
|
120
|
+
this.highWaterMark = this.handles.size;
|
|
121
|
+
}
|
|
122
|
+
return handle;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Lookup an open handle by conversation+model. Returns null when the
|
|
127
|
+
* conversation has not been opened or has already been closed. Bumps
|
|
128
|
+
* `lastUsedMs` so an LRU sweep treats reads as activity.
|
|
129
|
+
*/
|
|
130
|
+
get(conversationId: string, modelId: string): ConversationHandle | null {
|
|
131
|
+
const handle = this.handles.get(this.compositeKey(conversationId, modelId));
|
|
132
|
+
if (!handle || handle.closed) return null;
|
|
133
|
+
handle.lastUsedMs = Date.now();
|
|
134
|
+
return handle;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Close + drop a handle. Idempotent — closing an unknown / already-closed
|
|
139
|
+
* handle has no additional effect, so callers can call this from cleanup paths
|
|
140
|
+
* unconditionally.
|
|
141
|
+
*/
|
|
142
|
+
close(conversationId: string, modelId: string): void {
|
|
143
|
+
const compositeKey = this.compositeKey(conversationId, modelId);
|
|
144
|
+
const handle = this.handles.get(compositeKey);
|
|
145
|
+
if (!handle) return;
|
|
146
|
+
handle.closed = true;
|
|
147
|
+
this.handles.delete(compositeKey);
|
|
148
|
+
const remaining = (this.slotLoad.get(handle.slotId) ?? 0) - 1;
|
|
149
|
+
if (remaining <= 0) {
|
|
150
|
+
this.slotLoad.delete(handle.slotId);
|
|
151
|
+
} else {
|
|
152
|
+
this.slotLoad.set(handle.slotId, remaining);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Sweep handles whose `lastUsedMs` is older than their TTL. Returns the
|
|
158
|
+
* conversation ids dropped so callers can persist final KV state to
|
|
159
|
+
* disk, etc. Safe to call on a timer.
|
|
160
|
+
*/
|
|
161
|
+
evictIdle(now: number = Date.now()): string[] {
|
|
162
|
+
const dropped: string[] = [];
|
|
163
|
+
for (const [compositeKey, handle] of this.handles) {
|
|
164
|
+
if (now - handle.lastUsedMs > handle.ttlMs) {
|
|
165
|
+
handle.closed = true;
|
|
166
|
+
this.handles.delete(compositeKey);
|
|
167
|
+
const remaining = (this.slotLoad.get(handle.slotId) ?? 0) - 1;
|
|
168
|
+
if (remaining <= 0) {
|
|
169
|
+
this.slotLoad.delete(handle.slotId);
|
|
170
|
+
} else {
|
|
171
|
+
this.slotLoad.set(handle.slotId, remaining);
|
|
172
|
+
}
|
|
173
|
+
dropped.push(handle.conversationId);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return dropped;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Snapshot every currently-open handle. Used by the shutdown path to
|
|
181
|
+
* emit a save-state request per slot.
|
|
182
|
+
*/
|
|
183
|
+
snapshot(): readonly ConversationHandle[] {
|
|
184
|
+
return [...this.handles.values()];
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/** Largest concurrent open count seen since the registry was created. */
|
|
188
|
+
highWater(): number {
|
|
189
|
+
return this.highWaterMark;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/** Number of currently-open handles. */
|
|
193
|
+
size(): number {
|
|
194
|
+
return this.handles.size;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Recommended `--parallel` slot count given the observed high-water mark
|
|
199
|
+
* of concurrently-open conversations plus a small headroom (max(2, 25%)).
|
|
200
|
+
* The engine's auto-tune (J4) compares this against the running server's
|
|
201
|
+
* slot count: when this is larger AND there's RAM headroom, it restarts
|
|
202
|
+
* llama-server with the higher value so new conversations get their own
|
|
203
|
+
* KV slots instead of thrashing.
|
|
204
|
+
*
|
|
205
|
+
* `running` is the currently-configured slot count; when the high-water
|
|
206
|
+
* mark hasn't outgrown it, this returns `running` (no resize needed) so
|
|
207
|
+
* callers can compare against equality without a second branch.
|
|
208
|
+
*/
|
|
209
|
+
recommendedParallel(running: number): number {
|
|
210
|
+
const headroom = Math.max(2, Math.ceil(this.highWaterMark * 0.25));
|
|
211
|
+
const desired = Math.max(1, this.highWaterMark + headroom);
|
|
212
|
+
return Math.max(running, desired);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Drop every handle and reset the high-water mark + slot-load bookkeeping.
|
|
217
|
+
* Test-only — the module singleton leaks state across files when the suite
|
|
218
|
+
* runs together; call this in `beforeEach` to isolate. Not part of the
|
|
219
|
+
* runtime contract.
|
|
220
|
+
*/
|
|
221
|
+
__resetForTests(): void {
|
|
222
|
+
for (const handle of this.handles.values()) handle.closed = true;
|
|
223
|
+
this.handles.clear();
|
|
224
|
+
this.slotLoad.clear();
|
|
225
|
+
this.highWaterMark = 0;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Pick the slot with the fewest in-flight handles. Ties are broken by a
|
|
230
|
+
* deterministic hash of the conversation id, which avoids consistently
|
|
231
|
+
* loading slot 0 when N concurrent opens race.
|
|
232
|
+
*/
|
|
233
|
+
private pickLowestLoadedSlot(
|
|
234
|
+
parallel: number,
|
|
235
|
+
conversationId: string,
|
|
236
|
+
): number {
|
|
237
|
+
if (parallel <= 1) return 0;
|
|
238
|
+
let bestSlot = 0;
|
|
239
|
+
let bestLoad = Number.POSITIVE_INFINITY;
|
|
240
|
+
for (let slot = 0; slot < parallel; slot += 1) {
|
|
241
|
+
const load = this.slotLoad.get(slot) ?? 0;
|
|
242
|
+
if (load < bestLoad) {
|
|
243
|
+
bestLoad = load;
|
|
244
|
+
bestSlot = slot;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
if (bestLoad === 0) return bestSlot;
|
|
248
|
+
// All slots are loaded equally — use the conversation hash for a
|
|
249
|
+
// deterministic tie-break. Same conversation, same slot when reopened.
|
|
250
|
+
const digest = createHash("sha256").update(conversationId).digest();
|
|
251
|
+
return digest.readUInt32BE(0) % parallel;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
private compositeKey(conversationId: string, modelId: string): string {
|
|
255
|
+
return `${modelId}::${conversationId}`;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Module-singleton registry. The engine reads this on every generate; the
|
|
261
|
+
* conversation lifecycle API (`openConversation`, `closeConversation`)
|
|
262
|
+
* mutates it.
|
|
263
|
+
*/
|
|
264
|
+
export const conversationRegistry = new ConversationRegistry();
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Desktop production `FfiBackendRuntime` over the FUSED `libelizainference` —
|
|
3
|
+
* the SOLE desktop text runtime now that libllama has been retired.
|
|
4
|
+
*
|
|
5
|
+
* Desktop text generation runs through the fused library: the same
|
|
6
|
+
* `eliza_inference_llm_stream_*` ABI (v9) the voice subsystem already loads,
|
|
7
|
+
* so text + voice share one native lib, one GGML pin, and one resident text
|
|
8
|
+
* model.
|
|
9
|
+
*
|
|
10
|
+
* - The fused lib's `eliza_inference_llm_stream_open` loads the bundle's text
|
|
11
|
+
* GGUF (`<bundleRoot>/text/*.gguf`) and applies MTP speculative
|
|
12
|
+
* decoding + KV-cache quant + per-load GPU layers natively (ABI v9). The
|
|
13
|
+
* path is gated on the capability probes
|
|
14
|
+
* (`llmStreamSupported && llmMtpSupported && llmKvQuantSupported`).
|
|
15
|
+
* - A fused lib that lacks MTP / KV-quant / native tokenize is REFUSED by
|
|
16
|
+
* `supported()` → the engine raises LocalInferenceUnavailable. There is no
|
|
17
|
+
* libllama fallback and never an unoptimized fused loop.
|
|
18
|
+
*
|
|
19
|
+
* Tokenization runs over the fused handle's resident text vocab via ABI-v9
|
|
20
|
+
* `eliza_inference_tokenize`: the fused `create()` + first `llmStreamOpen`
|
|
21
|
+
* already made the text vocab resident, so no second model is loaded.
|
|
22
|
+
* `tokenizeSupported()` gates this; a pre-v9 lib without the symbol is refused.
|
|
23
|
+
*
|
|
24
|
+
* Lifecycle: one fused context per loaded model; `acquire()` builds it,
|
|
25
|
+
* `release()` tears it down. A throwing native free poisons the runtime so no
|
|
26
|
+
* new allocation happens over leaked resources.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import fs from "node:fs";
|
|
30
|
+
import path from "node:path";
|
|
31
|
+
|
|
32
|
+
import { resolveStateDir } from "@elizaos/core";
|
|
33
|
+
|
|
34
|
+
import type { BackendPlan } from "./backend";
|
|
35
|
+
import type {
|
|
36
|
+
FfiBackendRuntime,
|
|
37
|
+
FfiBackendSession,
|
|
38
|
+
} from "./ffi-streaming-backend";
|
|
39
|
+
import { FfiStreamingRunner } from "./ffi-streaming-runner";
|
|
40
|
+
import { wrapElizaInferenceFfi } from "./llm-streaming-binding";
|
|
41
|
+
import type { ElizaInferenceContextHandle } from "./voice/ffi-bindings";
|
|
42
|
+
import {
|
|
43
|
+
type ElizaInferenceFfi,
|
|
44
|
+
loadElizaInferenceFfi,
|
|
45
|
+
} from "./voice/ffi-bindings";
|
|
46
|
+
|
|
47
|
+
function throwIfAborted(signal: AbortSignal | undefined): void {
|
|
48
|
+
if (!signal?.aborted) return;
|
|
49
|
+
throw signal.reason instanceof Error
|
|
50
|
+
? signal.reason
|
|
51
|
+
: new DOMException("Aborted", "AbortError");
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Candidate filenames for the fused library, per platform. Mirrors
|
|
56
|
+
* `samantha-preset-regenerator.ts::libraryFilenames` so the runtime and the
|
|
57
|
+
* voice regenerator resolve the same artifact.
|
|
58
|
+
*/
|
|
59
|
+
function fusedLibraryFilenames(): string[] {
|
|
60
|
+
if (process.platform === "darwin") return ["libelizainference.dylib"];
|
|
61
|
+
if (process.platform === "win32") {
|
|
62
|
+
return ["elizainference.dll", "libelizainference.dll"];
|
|
63
|
+
}
|
|
64
|
+
return ["libelizainference.so"];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Resolve the on-disk path to the fused `libelizainference`. Precedence:
|
|
69
|
+
* 1. `ELIZA_INFERENCE_LIBRARY` — an explicit absolute path.
|
|
70
|
+
* 2. `<bundleRoot>/lib/<name>` — the bundle-local lib.
|
|
71
|
+
* 3. `ELIZA_INFERENCE_LIB_DIR/<name>` — an explicit lib directory.
|
|
72
|
+
* 4. `<stateDir>/local-inference/lib/<name>` — the default staging dir written
|
|
73
|
+
* by `scripts/stage-desktop-fused-lib.mjs`, so a staged desktop build is
|
|
74
|
+
* found with no env wiring.
|
|
75
|
+
* Returns null when none of the candidates exist on disk — `supported()` then
|
|
76
|
+
* reports unavailable and the engine raises LocalInferenceUnavailable.
|
|
77
|
+
*/
|
|
78
|
+
export function resolveFusedLibraryPath(
|
|
79
|
+
bundleRoot: string | null,
|
|
80
|
+
env: NodeJS.ProcessEnv = process.env,
|
|
81
|
+
): string | null {
|
|
82
|
+
const exact = env.ELIZA_INFERENCE_LIBRARY?.trim();
|
|
83
|
+
if (exact && fs.existsSync(exact)) return exact;
|
|
84
|
+
const dirs = [
|
|
85
|
+
bundleRoot ? path.join(bundleRoot, "lib") : null,
|
|
86
|
+
exact ? path.dirname(exact) : null,
|
|
87
|
+
env.ELIZA_INFERENCE_LIB_DIR?.trim() || null,
|
|
88
|
+
path.join(resolveStateDir(env), "local-inference", "lib"),
|
|
89
|
+
].filter((dir): dir is string => Boolean(dir));
|
|
90
|
+
for (const dir of dirs) {
|
|
91
|
+
for (const name of fusedLibraryFilenames()) {
|
|
92
|
+
const candidate = path.join(dir, name);
|
|
93
|
+
if (fs.existsSync(candidate)) return candidate;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Derive the bundle root (the dir the fused `create()` anchors at) from a
|
|
101
|
+
* BackendPlan. Eliza-1 bundles set `overrides.bundleRoot` explicitly; otherwise
|
|
102
|
+
* the GGUF lives at `<bundleRoot>/text/<file>.gguf`, so the bundle root is
|
|
103
|
+
* `dirname(dirname(modelPath))`.
|
|
104
|
+
*/
|
|
105
|
+
function bundleRootForPlan(plan: BackendPlan): string {
|
|
106
|
+
if (plan.overrides?.bundleRoot) return plan.overrides.bundleRoot;
|
|
107
|
+
return path.dirname(path.dirname(plan.modelPath));
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
interface ActiveFusedSession {
|
|
111
|
+
ffi: ElizaInferenceFfi;
|
|
112
|
+
ctx: ElizaInferenceContextHandle;
|
|
113
|
+
session: FfiBackendSession;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export class DesktopFusedFfiBackendRuntime implements FfiBackendRuntime {
|
|
117
|
+
private active: ActiveFusedSession | null = null;
|
|
118
|
+
private poisonedError: Error | null = null;
|
|
119
|
+
/** Cached `supported()` result so the engine gate and the dispatcher agree. */
|
|
120
|
+
private supportedCache: boolean | null = null;
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Viable only when:
|
|
124
|
+
* - bun:ffi resolves on the current runtime,
|
|
125
|
+
* - the fused dylib is present AND reports ABI-v9 capability: the
|
|
126
|
+
* streaming-LLM surface, MTP, KV-cache quant, AND native
|
|
127
|
+
* tokenization (`eliza_inference_tokenize`).
|
|
128
|
+
* A pre-v9 fused lib reports the probes as unsupported → refused, and the
|
|
129
|
+
* engine raises LocalInferenceUnavailable. libllama has been retired; there
|
|
130
|
+
* is no fallback runtime and no tokenizer sidecar.
|
|
131
|
+
*/
|
|
132
|
+
supported(): boolean {
|
|
133
|
+
if (this.supportedCache !== null) return this.supportedCache;
|
|
134
|
+
this.supportedCache = this.computeSupported();
|
|
135
|
+
return this.supportedCache;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/** Clear the cached `supported()` result (tests / lib swaps). */
|
|
139
|
+
resetSupportedCache(): void {
|
|
140
|
+
this.supportedCache = null;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
private computeSupported(): boolean {
|
|
144
|
+
try {
|
|
145
|
+
require.resolve("bun:ffi");
|
|
146
|
+
} catch {
|
|
147
|
+
return false;
|
|
148
|
+
}
|
|
149
|
+
const libPath = resolveFusedLibraryPath(null);
|
|
150
|
+
if (!libPath) return false;
|
|
151
|
+
// Load the lib and probe the v8 LLM capabilities. This dlopen is cheap (no
|
|
152
|
+
// model load); we close it immediately after probing.
|
|
153
|
+
let ffi: ElizaInferenceFfi | null = null;
|
|
154
|
+
try {
|
|
155
|
+
ffi = loadElizaInferenceFfi(libPath);
|
|
156
|
+
const llmOk =
|
|
157
|
+
typeof ffi.llmStreamSupported === "function" &&
|
|
158
|
+
ffi.llmStreamSupported() === true &&
|
|
159
|
+
typeof ffi.llmMtpSupported === "function" &&
|
|
160
|
+
ffi.llmMtpSupported() === true &&
|
|
161
|
+
typeof ffi.llmKvQuantSupported === "function" &&
|
|
162
|
+
ffi.llmKvQuantSupported() === true;
|
|
163
|
+
if (!llmOk) return false;
|
|
164
|
+
// Native tokenization over the fused handle's resident text vocab
|
|
165
|
+
// (ABI v9) is required: libllama has been retired, so there is no
|
|
166
|
+
// tokenizer sidecar. A pre-v9 fused lib without `eliza_inference_tokenize`
|
|
167
|
+
// is refused → the engine raises LocalInferenceUnavailable.
|
|
168
|
+
const fusedTokenize =
|
|
169
|
+
typeof ffi.tokenizeSupported === "function" &&
|
|
170
|
+
ffi.tokenizeSupported() === true;
|
|
171
|
+
if (!fusedTokenize) return false;
|
|
172
|
+
return true;
|
|
173
|
+
} catch {
|
|
174
|
+
// dlopen / ABI-mismatch / non-Bun runtime → not viable.
|
|
175
|
+
return false;
|
|
176
|
+
} finally {
|
|
177
|
+
ffi?.close();
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
async acquire(plan: BackendPlan): Promise<FfiBackendSession> {
|
|
182
|
+
if (this.poisonedError) {
|
|
183
|
+
throw new Error(
|
|
184
|
+
`[desktop-fused-ffi-runtime] native cleanup previously failed; restart required before acquiring a new session: ${this.poisonedError.message}`,
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
if (this.active) {
|
|
188
|
+
throw new Error(
|
|
189
|
+
"[desktop-fused-ffi-runtime] acquire() called with a live session; release() first",
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
const bundleRoot = bundleRootForPlan(plan);
|
|
193
|
+
const libPath = resolveFusedLibraryPath(bundleRoot);
|
|
194
|
+
if (!libPath) {
|
|
195
|
+
throw new Error(
|
|
196
|
+
`[desktop-fused-ffi-runtime] fused libelizainference not found for bundle ${bundleRoot}. ` +
|
|
197
|
+
"Dispatcher should not have routed here; check supported().",
|
|
198
|
+
);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// 1. Fused lib + bundle context for the generation path. `create()`
|
|
202
|
+
// anchors at the bundle root; the first `llmStreamOpen` loads
|
|
203
|
+
// `<bundleRoot>/text/*.gguf` and applies gpuLayers + KV-cache quant
|
|
204
|
+
// from the session config (threaded via loadConfig below).
|
|
205
|
+
const ffi = loadElizaInferenceFfi(libPath);
|
|
206
|
+
let ctx: ElizaInferenceContextHandle;
|
|
207
|
+
try {
|
|
208
|
+
ctx = ffi.create(bundleRoot);
|
|
209
|
+
} catch (err) {
|
|
210
|
+
ffi.close();
|
|
211
|
+
throw err;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// 2. Tokenization over the fused handle's resident text vocab via ABI-v9
|
|
215
|
+
// `eliza_inference_tokenize` — no second model load. `supported()`
|
|
216
|
+
// already refused a pre-v9 lib, so the symbol is present here; this
|
|
217
|
+
// guard turns any surprise absence into a loud failure (the session is
|
|
218
|
+
// torn down) rather than a silent tokenizer gap. libllama is retired.
|
|
219
|
+
const fusedTokenizeFn = ffi.tokenize;
|
|
220
|
+
if (
|
|
221
|
+
typeof ffi.tokenizeSupported !== "function" ||
|
|
222
|
+
ffi.tokenizeSupported() !== true ||
|
|
223
|
+
typeof fusedTokenizeFn !== "function"
|
|
224
|
+
) {
|
|
225
|
+
ffi.destroy(ctx);
|
|
226
|
+
ffi.close();
|
|
227
|
+
throw new Error(
|
|
228
|
+
"[desktop-fused-ffi-runtime] fused lib lacks eliza_inference_tokenize (pre-v9). " +
|
|
229
|
+
"libllama has been retired; rebuild the fused lib with the v9 tokenizer ABI.",
|
|
230
|
+
);
|
|
231
|
+
}
|
|
232
|
+
const tokenizeFn = (prompt: string): Int32Array =>
|
|
233
|
+
fusedTokenizeFn({ ctx, text: prompt });
|
|
234
|
+
|
|
235
|
+
const binding = wrapElizaInferenceFfi(ffi);
|
|
236
|
+
const runner = new FfiStreamingRunner(binding, ctx);
|
|
237
|
+
const overrides = plan.overrides;
|
|
238
|
+
const session: FfiBackendSession = {
|
|
239
|
+
binding,
|
|
240
|
+
ctx,
|
|
241
|
+
runner,
|
|
242
|
+
tokenize: (prompt) => tokenizeFn(prompt),
|
|
243
|
+
mtp: plan.catalog?.runtime?.mtp ?? null,
|
|
244
|
+
draftModelPath: overrides?.draftModelPath ?? null,
|
|
245
|
+
mmprojPath: overrides?.mmprojPath ?? null,
|
|
246
|
+
// The fused path applies these at its first `llmStreamOpen`:
|
|
247
|
+
// context size, gpuLayers, and KV-cache quant types from the
|
|
248
|
+
// session config.
|
|
249
|
+
loadConfig: {
|
|
250
|
+
contextSize:
|
|
251
|
+
typeof overrides?.contextSize === "number"
|
|
252
|
+
? overrides.contextSize
|
|
253
|
+
: undefined,
|
|
254
|
+
gpuLayers:
|
|
255
|
+
typeof overrides?.gpuLayers === "number"
|
|
256
|
+
? overrides.gpuLayers
|
|
257
|
+
: undefined,
|
|
258
|
+
cacheTypeK: overrides?.cacheTypeK ?? null,
|
|
259
|
+
cacheTypeV: overrides?.cacheTypeV ?? null,
|
|
260
|
+
},
|
|
261
|
+
};
|
|
262
|
+
this.active = { ffi, ctx, session };
|
|
263
|
+
return session;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
parallelSlots(): number {
|
|
267
|
+
// The fused runtime holds one resident text context per loaded model;
|
|
268
|
+
// multi-slot parallelism is not exposed by the fused ABI.
|
|
269
|
+
return 1;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Whether the LIVE session can describe images through the fused
|
|
274
|
+
* `eliza_inference_describe_image`. Mirrors the FfiStreamingBackend gate:
|
|
275
|
+
* true only when a session is bound and the fused lib exposes vision.
|
|
276
|
+
*/
|
|
277
|
+
visionSupported(): boolean {
|
|
278
|
+
if (!this.active) return false;
|
|
279
|
+
return (
|
|
280
|
+
typeof this.active.ffi.visionSupported === "function" &&
|
|
281
|
+
this.active.ffi.visionSupported() === true &&
|
|
282
|
+
typeof this.active.ffi.describeImage === "function"
|
|
283
|
+
);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Whether the LIVE session can STREAM a vision describe token-by-token
|
|
288
|
+
* through `eliza_inference_describe_image_stream_open` + the existing
|
|
289
|
+
* `llmStreamNext` loop (ABI v13). A <=v12 lib reports false and the handler
|
|
290
|
+
* uses the buffered one-shot `describeImage` path.
|
|
291
|
+
*/
|
|
292
|
+
visionStreamSupported(): boolean {
|
|
293
|
+
if (!this.active) return false;
|
|
294
|
+
const { ffi } = this.active;
|
|
295
|
+
return (
|
|
296
|
+
typeof ffi.visionStreamSupported === "function" &&
|
|
297
|
+
ffi.visionStreamSupported() === true &&
|
|
298
|
+
typeof ffi.describeImageStreamOpen === "function" &&
|
|
299
|
+
typeof ffi.llmStreamNext === "function" &&
|
|
300
|
+
typeof ffi.llmStreamClose === "function"
|
|
301
|
+
);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Vision describe through the fused mmproj path. Reuses the mtmd machinery
|
|
306
|
+
* linked for ASR over the bundle's text model + the passed mmproj projector.
|
|
307
|
+
* The `FfiStreamingBackend` forwards `describeImage`/`visionSupported` to this
|
|
308
|
+
* runtime by duck-typing.
|
|
309
|
+
*
|
|
310
|
+
* When `onTextChunk` is supplied AND the fused lib exposes ABI-v13 streaming
|
|
311
|
+
* vision, the description is decoded token-by-token: `describeImageStreamOpen`
|
|
312
|
+
* primes a stream with the image+prompt KV and the EXISTING `llmStreamNext`
|
|
313
|
+
* loop pulls tokens — the same machinery that streams chat text, so vision
|
|
314
|
+
* flows into the dashboard through one pipe. Otherwise it falls back to the
|
|
315
|
+
* buffered one-shot `eliza_inference_describe_image`.
|
|
316
|
+
*/
|
|
317
|
+
async describeImage(args: {
|
|
318
|
+
imageBytes: Uint8Array;
|
|
319
|
+
mmprojPath: string;
|
|
320
|
+
prompt?: string;
|
|
321
|
+
maxTokens?: number;
|
|
322
|
+
temperature?: number;
|
|
323
|
+
signal?: AbortSignal;
|
|
324
|
+
onTextChunk?: (chunk: string) => void | Promise<void>;
|
|
325
|
+
maxTokensPerStep?: number;
|
|
326
|
+
}): Promise<{ text: string; projectorMs?: number; decodeMs?: number }> {
|
|
327
|
+
if (!this.active) {
|
|
328
|
+
throw new Error(
|
|
329
|
+
"[desktop-fused-ffi-runtime] describeImage before acquire — no session",
|
|
330
|
+
);
|
|
331
|
+
}
|
|
332
|
+
const { ffi, ctx } = this.active;
|
|
333
|
+
if (
|
|
334
|
+
typeof ffi.visionSupported !== "function" ||
|
|
335
|
+
ffi.visionSupported() !== true ||
|
|
336
|
+
typeof ffi.describeImage !== "function"
|
|
337
|
+
) {
|
|
338
|
+
throw new Error(
|
|
339
|
+
"[desktop-fused-ffi-runtime] describeImage: fused lib was built without " +
|
|
340
|
+
"vision (eliza_inference_vision_supported() == 0). Rebuild the fused " +
|
|
341
|
+
"lib with -DELIZA_ENABLE_VISION=ON (verify-fused-symbols requires it).",
|
|
342
|
+
);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Token-by-token streaming path (ABI v13): open a vision stream and drive
|
|
346
|
+
// the shared `llmStreamNext` loop, surfacing each decoded piece through
|
|
347
|
+
// `onTextChunk` so the description renders as it generates.
|
|
348
|
+
if (
|
|
349
|
+
typeof args.onTextChunk === "function" &&
|
|
350
|
+
this.visionStreamSupported() &&
|
|
351
|
+
typeof ffi.describeImageStreamOpen === "function" &&
|
|
352
|
+
typeof ffi.llmStreamNext === "function" &&
|
|
353
|
+
typeof ffi.llmStreamClose === "function"
|
|
354
|
+
) {
|
|
355
|
+
throwIfAborted(args.signal);
|
|
356
|
+
const startedAt = Date.now();
|
|
357
|
+
const stream = ffi.describeImageStreamOpen({
|
|
358
|
+
ctx,
|
|
359
|
+
imageBytes: args.imageBytes,
|
|
360
|
+
mmprojPath: args.mmprojPath,
|
|
361
|
+
prompt: args.prompt,
|
|
362
|
+
});
|
|
363
|
+
let full = "";
|
|
364
|
+
let generated = 0;
|
|
365
|
+
// JS-side token budget: the native ELIZA_VISION_MAX_TOKENS env does not
|
|
366
|
+
// reliably reach the loaded DLL's getenv across runtimes, so cap here.
|
|
367
|
+
const tokenBudget =
|
|
368
|
+
typeof args.maxTokens === "number" && args.maxTokens > 0
|
|
369
|
+
? args.maxTokens
|
|
370
|
+
: 256;
|
|
371
|
+
try {
|
|
372
|
+
for (;;) {
|
|
373
|
+
if (args.signal?.aborted) {
|
|
374
|
+
ffi.llmStreamCancel?.(stream);
|
|
375
|
+
throwIfAborted(args.signal);
|
|
376
|
+
}
|
|
377
|
+
const step = ffi.llmStreamNext({
|
|
378
|
+
stream,
|
|
379
|
+
// Fine-grained by default so the description renders token-by-token
|
|
380
|
+
// in the dashboard rather than in coarse ~32-token jumps (matches
|
|
381
|
+
// the tuned chat default). Callers may override per request.
|
|
382
|
+
maxTokensPerStep: args.maxTokensPerStep ?? 8,
|
|
383
|
+
});
|
|
384
|
+
if (step.text.length > 0) {
|
|
385
|
+
full += step.text;
|
|
386
|
+
await args.onTextChunk(step.text);
|
|
387
|
+
}
|
|
388
|
+
generated += step.tokens.length;
|
|
389
|
+
if (step.done || generated >= tokenBudget) break;
|
|
390
|
+
}
|
|
391
|
+
} finally {
|
|
392
|
+
ffi.llmStreamClose(stream);
|
|
393
|
+
}
|
|
394
|
+
return { text: full, decodeMs: Date.now() - startedAt };
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
const startedAt = Date.now();
|
|
398
|
+
const text = ffi.describeImage({
|
|
399
|
+
ctx,
|
|
400
|
+
imageBytes: args.imageBytes,
|
|
401
|
+
mmprojPath: args.mmprojPath,
|
|
402
|
+
prompt: args.prompt,
|
|
403
|
+
});
|
|
404
|
+
return { text, decodeMs: Date.now() - startedAt };
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
async release(): Promise<void> {
|
|
408
|
+
if (!this.active) return;
|
|
409
|
+
const { ffi, ctx } = this.active;
|
|
410
|
+
// Free the native handles. A throwing free poisons the runtime so a new
|
|
411
|
+
// model cannot be allocated over leaked resources. Clear `active` in the
|
|
412
|
+
// finally so a throwing free can't wedge the live-session guard.
|
|
413
|
+
try {
|
|
414
|
+
ffi.destroy(ctx);
|
|
415
|
+
ffi.close();
|
|
416
|
+
} catch (err) {
|
|
417
|
+
this.poisonedError = err instanceof Error ? err : new Error(String(err));
|
|
418
|
+
throw err;
|
|
419
|
+
} finally {
|
|
420
|
+
this.active = null;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Process singleton — the engine wires this as the sole `FfiBackendRuntime` for
|
|
427
|
+
* the dispatcher's `"llama-cpp"` slot. The ABI-v9 capability probes in
|
|
428
|
+
* `supported()` gate whether the fused lib serves text at all.
|
|
429
|
+
*/
|
|
430
|
+
export const desktopFusedFfiBackendRuntime =
|
|
431
|
+
new DesktopFusedFfiBackendRuntime();
|