@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/dist/actions/generate-media.d.ts +59 -0
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts +23 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts +29 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts +8 -37
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +38979 -430
- package/dist/index.js.map +217 -0
- package/dist/local-inference-routes.d.ts +47 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts +21 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes/compat-helpers.d.ts +18 -0
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts +62 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/dist/routes/index.d.ts +20 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/dist/routes/live-diarization-route.d.ts +33 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts +4 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts +16 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts +7 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts +15 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/dist/routes/transcripts-routes.d.ts +44 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts +62 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts +62 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts +77 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts +16 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/dist/runtime/index.d.ts +15 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/dist/runtime/voice-entity-binding.d.ts +113 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/dist/services/active-model.d.ts +310 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/dist/services/assignments.d.ts +84 -0
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/dist/services/backend.d.ts +440 -0
- package/dist/services/backend.d.ts.map +1 -0
- package/dist/services/bionic-host-loader.d.ts +67 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts +34 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts +206 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts +109 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts +102 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts +142 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts +188 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts +149 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/dist/services/device-tier.d.ts +133 -0
- package/dist/services/device-tier.d.ts.map +1 -0
- package/dist/services/downloader.d.ts +94 -0
- package/dist/services/downloader.d.ts.map +1 -0
- package/dist/services/engine.d.ts +579 -0
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts +17 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/dist/services/ffi-streaming-backend.d.ts +201 -0
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/dist/services/ffi-streaming-runner.d.ts +146 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts +56 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts +72 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts +63 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts +14 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts +118 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts +16 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts +58 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts +74 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts +181 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts +181 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/dist/services/index.d.ts +31 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts +132 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts +59 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts +189 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts +346 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts +96 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts +82 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/dist/services/manifest/schema.d.ts +903 -0
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/dist/services/manifest/types.d.ts +32 -0
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts +66 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/dist/services/memory-arbiter.d.ts +348 -0
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/dist/services/memory-monitor.d.ts +128 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts +130 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts +13 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts +127 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts +6 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts +124 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts +38 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts +110 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts +9 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts +111 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/dist/services/registry.d.ts +33 -0
- package/dist/services/registry.d.ts.map +1 -0
- package/dist/services/router-handler.d.ts +92 -0
- package/dist/services/router-handler.d.ts.map +1 -0
- package/dist/services/routing-policy.d.ts +92 -0
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts +8 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts +98 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/dist/services/service.d.ts +128 -0
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts +72 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts +311 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts +33 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/dist/services/types.d.ts +19 -0
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts +34 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts +8 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts +115 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts +99 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts +47 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts +71 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/dist/services/vision/index.d.ts +95 -0
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts +73 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/dist/services/vision/types.d.ts +162 -0
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts +18 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/dist/services/vision-embedding-cache.d.ts +98 -0
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts +112 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts +199 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts +170 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/dist/services/voice/embedding.d.ts +132 -0
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts +68 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/dist/services/voice/engine-bridge.d.ts +762 -0
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier.d.ts +211 -0
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/dist/services/voice/errors.d.ts +20 -0
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/dist/services/voice/expressive-tags.d.ts +158 -0
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/dist/services/voice/ffi-bindings.d.ts +696 -0
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts +181 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/index.d.ts +96 -0
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts +82 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts +30 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts +135 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/dist/services/voice/mic-source.d.ts +136 -0
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts +109 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/dist/services/voice/partial-stabilizer.d.ts +73 -0
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts +76 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts +62 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts +151 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts +216 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts +123 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts +248 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts +40 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts +24 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts +146 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/dist/services/voice/shared-resources.d.ts +204 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts +75 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts +37 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts +83 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts +73 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/dist/services/voice/transcriber.d.ts +244 -0
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts +37 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/dist/services/voice/transcript-service.d.ts +60 -0
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/dist/services/voice/transcript-store.d.ts +64 -0
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts +183 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/dist/services/voice/types.d.ts +643 -0
- package/dist/services/voice/types.d.ts.map +1 -0
- package/dist/services/voice/vad.d.ts +283 -0
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts +241 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/dist/services/voice/voice-preset-format.d.ts +158 -0
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts +83 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts +364 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/dist/services/voice/wake-word-ggml.d.ts +100 -0
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts +255 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts +240 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts +3 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +101 -15
- package/registry-entry.json +137 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +831 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.test.ts +390 -0
- package/src/local-inference-routes.ts +1625 -0
- package/src/provider.ts +1111 -0
- package/src/routes/compat-helpers.ts +275 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.ts +61 -0
- package/src/routes/live-diarization-route.test.ts +347 -0
- package/src/routes/live-diarization-route.ts +198 -0
- package/src/routes/local-inference-asr-route.test.ts +246 -0
- package/src/routes/local-inference-asr-route.ts +166 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +775 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.test.ts +195 -0
- package/src/routes/transcripts-routes.ts +191 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
- package/src/runtime/ensure-local-inference-handler.ts +1640 -0
- package/src/runtime/index.ts +36 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
- package/src/runtime/mobile-local-inference-gate.ts +99 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
- package/src/runtime/voice-entity-binding.ts +368 -0
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.ts +1416 -0
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +106 -0
- package/src/services/assignments.ts +278 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +791 -0
- package/src/services/bionic-host-loader.test.ts +226 -0
- package/src/services/bionic-host-loader.ts +252 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.test.ts +259 -0
- package/src/services/catalog.ts +33 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.test.ts +458 -0
- package/src/services/device-tier.ts +502 -0
- package/src/services/downloader.test.ts +888 -0
- package/src/services/downloader.ts +1039 -0
- package/src/services/engine-direct-bundle.test.ts +90 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.ts +2096 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +445 -0
- package/src/services/ffi-streaming-backend.ts +418 -0
- package/src/services/ffi-streaming-runner.test.ts +220 -0
- package/src/services/ffi-streaming-runner.ts +407 -0
- package/src/services/ffi-unload-ordering.test.ts +166 -0
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.test.ts +236 -0
- package/src/services/hardware.ts +438 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.ts +715 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.ts +229 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +357 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
- package/src/services/manifest/index.ts +72 -0
- package/src/services/manifest/manifest.test.ts +791 -0
- package/src/services/manifest/schema.ts +761 -0
- package/src/services/manifest/types.ts +61 -0
- package/src/services/manifest/validator.ts +633 -0
- package/src/services/memory-arbiter.test.ts +558 -0
- package/src/services/memory-arbiter.ts +991 -0
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +232 -0
- package/src/services/memory-monitor.ts +309 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.ts +86 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +164 -0
- package/src/services/ram-budget.ts +309 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.ts +157 -0
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +426 -0
- package/src/services/routing-policy.test.ts +352 -0
- package/src/services/routing-policy.ts +367 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +750 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.ts +59 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.ts +163 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +133 -0
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +139 -0
- package/src/services/voice/audio-frame-consumer.test.ts +669 -0
- package/src/services/voice/audio-frame-consumer.ts +651 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +335 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +902 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +242 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2343 -0
- package/src/services/voice/eot-classifier-ggml.ts +569 -0
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +422 -0
- package/src/services/voice/errors.ts +34 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.test.ts +735 -0
- package/src/services/voice/ffi-bindings.ts +3387 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.ts +139 -0
- package/src/services/voice/index.ts +502 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.ts +64 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +622 -0
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.ts +504 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +343 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.test.ts +195 -0
- package/src/services/voice/transcript-service.ts +205 -0
- package/src/services/voice/transcript-store.test.ts +189 -0
- package/src/services/voice/transcript-store.ts +164 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.test.ts +498 -0
- package/src/services/voice/vad.ts +832 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.test.ts +415 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +713 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +280 -0
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +367 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.ts +319 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/src/voice-workbench.ts +71 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public facade for the local-inference service.
|
|
3
|
+
*
|
|
4
|
+
* Single entry point used by the API routes, settings UI, and orchestration
|
|
5
|
+
* code. Holds singleton instances of the downloader
|
|
6
|
+
* and active-model coordinator so subscribers receive the same event
|
|
7
|
+
* stream across the process.
|
|
8
|
+
*/
|
|
9
|
+
import { type AgentRuntime } from "@elizaos/core";
|
|
10
|
+
import { type LocalInferenceLoadOverrides } from "./active-model";
|
|
11
|
+
import { MemoryArbiter } from "./memory-arbiter";
|
|
12
|
+
import { type RecommendedModelSelection } from "./recommendation";
|
|
13
|
+
import type { ActiveModelState, AgentModelSlot, CatalogModel, DownloadEvent, DownloadJob, HardwareProbe, LocalInferenceReadiness, ModelAssignments, ModelHubSnapshot, TextGenerationSlot } from "./types";
|
|
14
|
+
import { type VerifyResult } from "./verify";
|
|
15
|
+
export declare class LocalInferenceService {
|
|
16
|
+
private readonly downloader;
|
|
17
|
+
private readonly activeModel;
|
|
18
|
+
private bundledBootstrap;
|
|
19
|
+
/**
|
|
20
|
+
* Memory Arbiter (WS1). Lazily created on first access so the heavy
|
|
21
|
+
* pressure-source machinery doesn't run for processes that never load
|
|
22
|
+
* a local model (CI, dev shells, etc.). Once created, the arbiter is
|
|
23
|
+
* also published via `setMemoryArbiter` so cross-plugin consumers
|
|
24
|
+
* (plugin-vision, plugin-image-gen) can use `getMemoryArbiter()`.
|
|
25
|
+
*/
|
|
26
|
+
private memoryArbiter;
|
|
27
|
+
/**
|
|
28
|
+
* Mobile pressure bridge — populated by the Capacitor host (iOS / Android
|
|
29
|
+
* onTrimMemory) so a native pressure callback can reach the arbiter.
|
|
30
|
+
* Stays null on desktop until WS2/WS8 wire the native side.
|
|
31
|
+
*/
|
|
32
|
+
private mobilePressureBridge;
|
|
33
|
+
private imageGenCapabilityRegistered;
|
|
34
|
+
getCatalog(): CatalogModel[];
|
|
35
|
+
/**
|
|
36
|
+
* Register any bundled GGUF files staged by the AOSP build (or any
|
|
37
|
+
* other install path that drops a `manifest.json` next to the model
|
|
38
|
+
* files) into the registry. Runs at most once per process; the
|
|
39
|
+
* promise is cached so concurrent first callers wait on the same
|
|
40
|
+
* work.
|
|
41
|
+
*/
|
|
42
|
+
private bootstrapBundled;
|
|
43
|
+
getInstalled(): Promise<import("@elizaos/shared").InstalledModel[]>;
|
|
44
|
+
getHardware(): Promise<HardwareProbe>;
|
|
45
|
+
getDownloads(): DownloadJob[];
|
|
46
|
+
getActive(): ActiveModelState;
|
|
47
|
+
getAssignments(): Promise<ModelAssignments>;
|
|
48
|
+
setSlotAssignment(slot: AgentModelSlot, modelId: string | null): Promise<ModelAssignments>;
|
|
49
|
+
snapshot(): Promise<ModelHubSnapshot>;
|
|
50
|
+
getTextReadiness(): Promise<LocalInferenceReadiness>;
|
|
51
|
+
getRecommendedModel(slot: TextGenerationSlot, hardware?: HardwareProbe): Promise<RecommendedModelSelection>;
|
|
52
|
+
getRecommendedModels(hardware?: HardwareProbe): Promise<Record<TextGenerationSlot, RecommendedModelSelection>>;
|
|
53
|
+
/**
|
|
54
|
+
* Kernel capability probing is now owned by the native FFI runtime. Null
|
|
55
|
+
* means "no static CAPABILITIES.json probe"; the dispatcher still enforces
|
|
56
|
+
* runtime-required kernels at load time.
|
|
57
|
+
*/
|
|
58
|
+
private installedBinaryKernels;
|
|
59
|
+
startDownload(modelId: string): Promise<DownloadJob>;
|
|
60
|
+
startSmallerFallbackDownload(currentModelId: string, slot?: TextGenerationSlot, hardware?: HardwareProbe): Promise<{
|
|
61
|
+
model: CatalogModel;
|
|
62
|
+
job: DownloadJob;
|
|
63
|
+
} | null>;
|
|
64
|
+
searchHuggingFace(query: string, limit?: number): Promise<CatalogModel[]>;
|
|
65
|
+
searchModelHub(query: string, hub: "huggingface" | "modelscope", limit?: number): Promise<CatalogModel[]>;
|
|
66
|
+
/**
|
|
67
|
+
* Verify an installed model's file integrity. When the model was a
|
|
68
|
+
* Eliza-download and there was no stored sha256 yet (legacy entry), the
|
|
69
|
+
* computed hash is persisted so subsequent verifies have a baseline.
|
|
70
|
+
*/
|
|
71
|
+
verifyModel(id: string): Promise<VerifyResult>;
|
|
72
|
+
cancelDownload(modelId: string): boolean;
|
|
73
|
+
subscribeDownloads(listener: (event: DownloadEvent) => void): () => void;
|
|
74
|
+
subscribeActive(listener: (state: ActiveModelState) => void): () => void;
|
|
75
|
+
setActive(runtime: AgentRuntime | null, modelId: string, overrides?: LocalInferenceLoadOverrides): Promise<ActiveModelState>;
|
|
76
|
+
prewarmActiveVoice(modelId: string): Promise<boolean>;
|
|
77
|
+
/**
|
|
78
|
+
* Warm the Stage-1 stable prefix after an explicit model activation.
|
|
79
|
+
*
|
|
80
|
+
* `ensureLocalInferenceHandler` also attempts this at runtime boot, but
|
|
81
|
+
* desktop activation often happens later through `/api/local-inference/active`;
|
|
82
|
+
* at boot there may be no resident model, so that early warmup correctly
|
|
83
|
+
* stays inactive. Running it here closes that gap without blocking activation.
|
|
84
|
+
*/
|
|
85
|
+
prewarmSystemPrefix(runtime: AgentRuntime): Promise<boolean>;
|
|
86
|
+
clearActive(runtime: AgentRuntime | null): Promise<ActiveModelState>;
|
|
87
|
+
/**
|
|
88
|
+
* Diagnostic snapshot of the local prefix-cache state. Returns:
|
|
89
|
+
* - `engine`: in-process session-pool size and live cache keys.
|
|
90
|
+
* Used by the API layer to render a "local cache" debug panel.
|
|
91
|
+
*/
|
|
92
|
+
getLocalCacheStats(): Promise<{
|
|
93
|
+
engine: {
|
|
94
|
+
size: number;
|
|
95
|
+
maxSize: number;
|
|
96
|
+
keys: string[];
|
|
97
|
+
} | null;
|
|
98
|
+
}>;
|
|
99
|
+
/**
|
|
100
|
+
* Memory Arbiter (WS1). Returns the process-wide arbiter, creating it on
|
|
101
|
+
* first call. The arbiter is constructed against the engine's existing
|
|
102
|
+
* `SharedResourceRegistry` so eviction policy is consistent across the
|
|
103
|
+
* voice/text paths and the cross-plugin handles.
|
|
104
|
+
*
|
|
105
|
+
* The pressure source is a composite of:
|
|
106
|
+
* - `nodeOsPressureSource()` — desktop polling at 5 s.
|
|
107
|
+
* - A `capacitorPressureSource()` bridge — populated by the Capacitor
|
|
108
|
+
* host on iOS/Android. The native side calls `dispatchMobilePressure`
|
|
109
|
+
* when the OS hands it a memory-warning callback.
|
|
110
|
+
*/
|
|
111
|
+
getMemoryArbiter(): MemoryArbiter;
|
|
112
|
+
private registerImageGenCapability;
|
|
113
|
+
private loadImageGenBackend;
|
|
114
|
+
private resolveImageGenLoadArgs;
|
|
115
|
+
private imageGenFileExists;
|
|
116
|
+
/**
|
|
117
|
+
* Capacitor bridge entrypoint. The mobile host (iOS / Android) calls
|
|
118
|
+
* this from the native pressure callback. Safe to call before the
|
|
119
|
+
* arbiter has been created — we create it on demand.
|
|
120
|
+
*/
|
|
121
|
+
dispatchMobilePressure(level: "nominal" | "low" | "critical", freeMb?: number): void;
|
|
122
|
+
uninstall(modelId: string): Promise<{
|
|
123
|
+
removed: boolean;
|
|
124
|
+
reason?: "external" | "not-found";
|
|
125
|
+
}>;
|
|
126
|
+
}
|
|
127
|
+
export declare const localInferenceService: LocalInferenceService;
|
|
128
|
+
//# sourceMappingURL=service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"service.d.ts","sourceRoot":"","sources":["../../src/services/service.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAKH,OAAO,EACN,KAAK,YAAY,EAIjB,MAAM,eAAe,CAAC;AACvB,OAAO,EAEN,KAAK,2BAA2B,EAChC,MAAM,gBAAgB,CAAC;AAoBxB,OAAO,EACN,aAAa,EAGb,MAAM,kBAAkB,CAAC;AAS1B,OAAO,EAEN,KAAK,yBAAyB,EAG9B,MAAM,kBAAkB,CAAC;AAW1B,OAAO,KAAK,EACX,gBAAgB,EAChB,cAAc,EACd,YAAY,EACZ,aAAa,EACb,WAAW,EACX,aAAa,EACb,uBAAuB,EACvB,gBAAgB,EAChB,gBAAgB,EAChB,kBAAkB,EAClB,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,KAAK,YAAY,EAAwB,MAAM,UAAU,CAAC;AA0DnE,qBAAa,qBAAqB;IAKjC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAExB;IACH,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAgC;IAC5D,OAAO,CAAC,gBAAgB,CAA8B;IACtD;;;;;;OAMG;IACH,OAAO,CAAC,aAAa,CAA8B;IACnD;;;;OAIG;IACH,OAAO,CAAC,oBAAoB,CAEZ;IAChB,OAAO,CAAC,4BAA4B,CAAS;IAE7C,UAAU;IAIV;;;;;;OAMG;IACH,OAAO,CAAC,gBAAgB;IASlB,YAAY;IAKZ,WAAW,IAAI,OAAO,CAAC,aAAa,CAAC;IAI3C,YAAY,IAAI,WAAW,EAAE;IAI7B,SAAS,IAAI,gBAAgB;IAIvB,cAAc,IAAI,OAAO,CAAC,gBAAgB,CAAC;IAI3C,iBAAiB,CACtB,IAAI,EAAE,cAAc,EACpB,OAAO,EAAE,MAAM,GAAG,IAAI,GACpB,OAAO,CAAC,gBAAgB,CAAC;IAKtB,QAAQ,IAAI,OAAO,CAAC,gBAAgB,CAAC;IAyBrC,gBAAgB,IAAI,OAAO,CAAC,uBAAuB,CAAC;IAcpD,mBAAmB,CACxB,IAAI,EAAE,kBAAkB,EACxB,QAAQ,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,yBAAyB,CAAC;IAS/B,oBAAoB,CACzB,QAAQ,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,MAAM,CAAC,kBAAkB,EAAE,yBAAyB,CAAC,CAAC;IAQjE;;;;OAIG;IACH,OAAO,CAAC,sBAAsB;IAIxB,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAIpD,4BAA4B,CACjC,cAAc,EAAE,MAAM,EACtB,IAAI,GAAE,kBAAiC,EACvC,QAAQ,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC;QAAE,KAAK,EAAE,YAAY,CAAC;QAAC,GAAG,EAAE,WAAW,CAAA;KAAE,GAAG,IAAI,CAAC;IActD,iBAAiB,CACtB,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,MAAM,GACZ,OAAO,CAAC,YAAY,EAAE,CAAC;IAMpB,cAAc,CACnB,KAAK,EAAE,MAAM,EACb,GAAG,EAAE,aAAa,GAAG,YAAY,EACjC,KAAK,CAAC,EAAE,MAAM,GACZ,OAAO,CAAC,YAAY,EAAE,CAAC;IAO1B;;;;OAIG;IACG,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAoCpD,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO;IAIxC,kBAAkB,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,aAAa,KAAK,IAAI,GAAG,MAAM,IAAI;IAIxE,eAAe,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,GAAG,MAAM,IAAI;IAIlE,SAAS,CACd,OAAO,EAAE,YAAY,GAAG,IAAI,EAC5B,OAAO,EAAE,MAAM,EACf,SAAS,CAAC,EAAE,2BAA2B,GACrC,OAAO,CAAC,gBAAgB,CAAC;IAyBtB,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAI3D;;;;;;;OAOG;IACG,mBAAmB,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,OAAO,CAAC;IAwB5D,WAAW,CAAC,OAAO,EAAE,YAAY,GAAG,IAAI,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAI1E;;;;OAIG;IACG,kBAAkB,IAAI,OAAO,CAAC;QACnC,MAAM,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,EAAE,CAAA;SAAE,GAAG,IAAI,CAAC;KACjE,CAAC;IAMF;;;;;;;;;;;OAWG;IACH,gBAAgB,IAAI,aAAa;IAiGjC,OAAO,CAAC,0BAA0B;YAiBpB,mBAAmB;YAyDnB,uBAAuB;IAmDrC,OAAO,CAAC,kBAAkB;IAI1B;;;;OAIG;IACH,sBAAsB,CACrB,KAAK,EAAE,SAAS,GAAG,KAAK,GAAG,UAAU,EACrC,MAAM,CAAC,EAAE,MAAM,GACb,IAAI;IAKD,SAAS,CACd,OAAO,EAAE,MAAM,GACb,OAAO,CAAC;QAAE,OAAO,EAAE,OAAO,CAAC;QAAC,MAAM,CAAC,EAAE,UAAU,GAAG,WAAW,CAAA;KAAE,CAAC;CAQnE;AAwBD,eAAO,MAAM,qBAAqB,uBAA8B,CAAC"}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session pool for the in-process node-llama-cpp engine.
|
|
3
|
+
*
|
|
4
|
+
* node-llama-cpp's `LlamaChatSession` keeps an internal KV cache. The
|
|
5
|
+
* stock engine code resets that cache between every turn, which is
|
|
6
|
+
* correct for stateless per-call generation but actively defeats prefix
|
|
7
|
+
* reuse for callers that pass a `promptCacheKey` (the runtime's
|
|
8
|
+
* cloud-style cache hint).
|
|
9
|
+
*
|
|
10
|
+
* This pool keeps one session per `promptCacheKey`, LRU-evicted, so
|
|
11
|
+
* sequential calls with the same key reuse the on-GPU/on-CPU KV cache.
|
|
12
|
+
* Calls without a cache key share the synthetic `_default` slot, which
|
|
13
|
+
* preserves the previous "stateless per-call" semantics by resetting
|
|
14
|
+
* before each turn.
|
|
15
|
+
*
|
|
16
|
+
* The pool owns nothing at module scope; the engine constructs a pool
|
|
17
|
+
* tied to the loaded model and disposes it on unload.
|
|
18
|
+
*/
|
|
19
|
+
export interface PoolSession {
|
|
20
|
+
/** Reset accumulated chat history. Called for the default slot only. */
|
|
21
|
+
resetChatHistory?(): void | Promise<void>;
|
|
22
|
+
/** Dispose underlying KV state. Called on eviction + on pool close. */
|
|
23
|
+
dispose?(): void | Promise<void>;
|
|
24
|
+
}
|
|
25
|
+
export type SessionFactory<TSession extends PoolSession> = (key: string) => Promise<TSession>;
|
|
26
|
+
/**
|
|
27
|
+
* Synthetic key used for callers that didn't supply a `promptCacheKey`.
|
|
28
|
+
* These callers want the old "history-free" behaviour, so the engine
|
|
29
|
+
* resets chat history each turn for this slot only.
|
|
30
|
+
*/
|
|
31
|
+
export declare const DEFAULT_SESSION_KEY = "_default";
|
|
32
|
+
export declare class SessionPool<TSession extends PoolSession> {
|
|
33
|
+
private readonly maxSize;
|
|
34
|
+
private readonly factory;
|
|
35
|
+
/**
|
|
36
|
+
* Insertion order = LRU order. We re-key on each access so the most
|
|
37
|
+
* recently used entry is always last in iteration order.
|
|
38
|
+
*/
|
|
39
|
+
private readonly entries;
|
|
40
|
+
constructor(args: {
|
|
41
|
+
maxSize: number;
|
|
42
|
+
factory: SessionFactory<TSession>;
|
|
43
|
+
});
|
|
44
|
+
/**
|
|
45
|
+
* Get-or-create the session for `key`. Promotes the entry to MRU.
|
|
46
|
+
* On eviction, the oldest entry's `dispose()` is awaited before the
|
|
47
|
+
* new entry is returned so the caller never holds two live sessions
|
|
48
|
+
* over the same KV memory.
|
|
49
|
+
*/
|
|
50
|
+
acquire(key: string): Promise<TSession>;
|
|
51
|
+
/** Number of live sessions, for diagnostics. */
|
|
52
|
+
size(): number;
|
|
53
|
+
/** Snapshot of live keys ordered LRU → MRU. */
|
|
54
|
+
keys(): string[];
|
|
55
|
+
/**
|
|
56
|
+
* Drop a single session by key. Used when the caller knows the prefix
|
|
57
|
+
* has gone stale (e.g. system prompt changed) and the cached KV is no
|
|
58
|
+
* longer valid.
|
|
59
|
+
*/
|
|
60
|
+
drop(key: string): Promise<void>;
|
|
61
|
+
/**
|
|
62
|
+
* Tear down every cached session. Called by the engine on model
|
|
63
|
+
* unload. After `close()` the pool is empty but reusable.
|
|
64
|
+
*/
|
|
65
|
+
close(): Promise<void>;
|
|
66
|
+
private disposeQuietly;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Resolve the pool size from env, with a sane default. Bound by 1..64.
|
|
70
|
+
*/
|
|
71
|
+
export declare function resolveDefaultPoolSize(envValue?: string | null): number;
|
|
72
|
+
//# sourceMappingURL=session-pool.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"session-pool.d.ts","sourceRoot":"","sources":["../../src/services/session-pool.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,MAAM,WAAW,WAAW;IAC3B,wEAAwE;IACxE,gBAAgB,CAAC,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC1C,uEAAuE;IACvE,OAAO,CAAC,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACjC;AAED,MAAM,MAAM,cAAc,CAAC,QAAQ,SAAS,WAAW,IAAI,CAC1D,GAAG,EAAE,MAAM,KACP,OAAO,CAAC,QAAQ,CAAC,CAAC;AASvB;;;;GAIG;AACH,eAAO,MAAM,mBAAmB,aAAa,CAAC;AAE9C,qBAAa,WAAW,CAAC,QAAQ,SAAS,WAAW;IACpD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA2B;IACnD;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAsC;gBAElD,IAAI,EAAE;QACjB,OAAO,EAAE,MAAM,CAAC;QAChB,OAAO,EAAE,cAAc,CAAC,QAAQ,CAAC,CAAC;KAClC;IAUD;;;;;OAKG;IACG,OAAO,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC;IA0B7C,gDAAgD;IAChD,IAAI,IAAI,MAAM;IAId,+CAA+C;IAC/C,IAAI,IAAI,MAAM,EAAE;IAIhB;;;;OAIG;IACG,IAAI,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAOtC;;;OAGG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;YAQd,cAAc;CAS5B;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,CAMvE"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { JSONSchema, ResponseSkeleton } from "@elizaos/core";
|
|
2
|
+
export type StructuredOutputRepairStatus = "unchanged" | "repaired" | "ambiguous" | "invalid";
|
|
3
|
+
export interface StructuredOutputRepairOptions {
|
|
4
|
+
skeleton?: ResponseSkeleton;
|
|
5
|
+
jsonSchema?: JSONSchema;
|
|
6
|
+
jsonSchemasByKey?: Readonly<Record<string, JSONSchema | undefined>>;
|
|
7
|
+
}
|
|
8
|
+
export interface StructuredOutputRepairResult {
|
|
9
|
+
text: string;
|
|
10
|
+
status: StructuredOutputRepairStatus;
|
|
11
|
+
reason?: string;
|
|
12
|
+
}
|
|
13
|
+
export declare function repairStructuredOutput(text: string, options: StructuredOutputRepairOptions): StructuredOutputRepairResult;
|
|
14
|
+
export declare class StructuredOutputRepairStream {
|
|
15
|
+
private readonly options;
|
|
16
|
+
private text;
|
|
17
|
+
private syntheticTail;
|
|
18
|
+
constructor(options: StructuredOutputRepairOptions);
|
|
19
|
+
push(chunk: string): string;
|
|
20
|
+
flush(): string;
|
|
21
|
+
currentText(): string;
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=deterministic-repair.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"deterministic-repair.d.ts","sourceRoot":"","sources":["../../../src/services/structured-output/deterministic-repair.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACX,UAAU,EACV,gBAAgB,EAEhB,MAAM,eAAe,CAAC;AAEvB,MAAM,MAAM,4BAA4B,GACrC,WAAW,GACX,UAAU,GACV,WAAW,GACX,SAAS,CAAC;AAEb,MAAM,WAAW,6BAA6B;IAC7C,QAAQ,CAAC,EAAE,gBAAgB,CAAC;IAC5B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,gBAAgB,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,UAAU,GAAG,SAAS,CAAC,CAAC,CAAC;CACpE;AAED,MAAM,WAAW,4BAA4B;IAC5C,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,4BAA4B,CAAC;IACrC,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAuWD,wBAAgB,sBAAsB,CACrC,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,6BAA6B,GACpC,4BAA4B,CAa9B;AASD,qBAAa,4BAA4B;IAI5B,OAAO,CAAC,QAAQ,CAAC,OAAO;IAHpC,OAAO,CAAC,IAAI,CAAM;IAClB,OAAO,CAAC,aAAa,CAAM;gBAEE,OAAO,EAAE,6BAA6B;IAEnE,IAAI,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAkB3B,KAAK,IAAI,MAAM;IASf,WAAW,IAAI,MAAM;CAGrB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/services/structured-output/index.ts"],"names":[],"mappings":"AAGA,cAAc,yBAAyB,CAAC"}
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured-output / forced-span / prefill plumbing for the local-inference
|
|
3
|
+
* engine path.
|
|
4
|
+
*
|
|
5
|
+
* The canonical contract lives in `@elizaos/core` `GenerateTextParams`
|
|
6
|
+
* (`prefill`, `responseSkeleton`, `grammar`, `streamStructured`) and is
|
|
7
|
+
* threaded through `useModel` → router. This module is the
|
|
8
|
+
* local-inference-layer mirror of the relevant subset plus the GBNF
|
|
9
|
+
* compilation that turns a `ResponseSkeleton` into a *lazy* grammar so the
|
|
10
|
+
* model only ever samples the free positions of the response envelope
|
|
11
|
+
* (single-value enums collapse to literals — no tokens spent on the scaffold).
|
|
12
|
+
*
|
|
13
|
+
* Nothing here is local-model-specific in shape; cloud adapters never read
|
|
14
|
+
* these fields. There is no fallback path — adapters that can't honour
|
|
15
|
+
* `grammar` / `prefill` / `responseSkeleton` ignore them, full stop.
|
|
16
|
+
*/
|
|
17
|
+
import type { JSONSchema, ResponseSkeleton, ResponseSkeletonSpan, SpanSamplerPlan } from "@elizaos/core";
|
|
18
|
+
export { repairStructuredOutput, type StructuredOutputRepairOptions, type StructuredOutputRepairResult, type StructuredOutputRepairStatus, StructuredOutputRepairStream, } from "./structured-output/deterministic-repair";
|
|
19
|
+
export type { ResponseSkeleton, ResponseSkeletonSpan, SpanSamplerPlan };
|
|
20
|
+
/**
|
|
21
|
+
* GBNF grammar fragment ready for a llama-server request body. `lazy` grammars
|
|
22
|
+
* only kick in once a trigger word/sequence appears in the stream
|
|
23
|
+
* (llama.cpp's `grammar_lazy` + `grammar_triggers`) — that lets the model
|
|
24
|
+
* free-run the prose `replyText` and only constrain the structured scaffold
|
|
25
|
+
* once the envelope boundary is reached.
|
|
26
|
+
*/
|
|
27
|
+
export interface GbnfGrammar {
|
|
28
|
+
/** GBNF source. */
|
|
29
|
+
source: string;
|
|
30
|
+
/** When true, the server applies the grammar lazily (`grammar_lazy: true`). */
|
|
31
|
+
lazy?: boolean;
|
|
32
|
+
/** Trigger words that activate a lazy grammar (`grammar_triggers`). */
|
|
33
|
+
triggers?: ReadonlyArray<string>;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Local-inference mirror of the structured-output extensions on
|
|
37
|
+
* `GenerateTextParams`. Threaded `useModel` → router → local handler →
|
|
38
|
+
* engine → FFI runtime.
|
|
39
|
+
*/
|
|
40
|
+
export interface StructuredGenerateParams {
|
|
41
|
+
/**
|
|
42
|
+
* Assistant-turn prefill — a partial assistant message the model should
|
|
43
|
+
* *continue* rather than start fresh. On llama-server this is sent as a
|
|
44
|
+
* trailing assistant message with `continue_final_message` / the
|
|
45
|
+
* `assistant` chat-template prefix; the capacitor-llama path seeds the
|
|
46
|
+
* prompt text and re-prepends the prefill to the result.
|
|
47
|
+
*/
|
|
48
|
+
prefill?: string;
|
|
49
|
+
/**
|
|
50
|
+
* Forced response skeleton. When set the engine compiles it to a lazy GBNF
|
|
51
|
+
* (single-value enums → literals) so the model only samples the free
|
|
52
|
+
* positions of the envelope.
|
|
53
|
+
*/
|
|
54
|
+
responseSkeleton?: ResponseSkeleton;
|
|
55
|
+
/** Optional whole-response JSON schema from `GenerateTextParams`. */
|
|
56
|
+
responseSchema?: JSONSchema;
|
|
57
|
+
/**
|
|
58
|
+
* Explicit GBNF grammar string. When both `grammar` and `responseSkeleton`
|
|
59
|
+
* are present, the explicit `grammar` wins.
|
|
60
|
+
*/
|
|
61
|
+
grammar?: string;
|
|
62
|
+
/**
|
|
63
|
+
* When true, the engine streams per-token chunks back via `onTextChunk`
|
|
64
|
+
* (and structured-field events) instead of returning the whole string in
|
|
65
|
+
* one shot.
|
|
66
|
+
*/
|
|
67
|
+
streamStructured?: boolean;
|
|
68
|
+
/**
|
|
69
|
+
* The eliza harness schema for this call — the compact descriptor bundling
|
|
70
|
+
* the response skeleton, a pre-built grammar (optional), the derived
|
|
71
|
+
* deterministic-token {@link ElizaPrefillPlan}, and the short/long name maps.
|
|
72
|
+
* When present, guided structured decode is *on* for this call: the engine
|
|
73
|
+
* sends the grammar AND the prefill plan, and seeds the leading literal run
|
|
74
|
+
* as an assistant-turn prefill. Absent → guided decode is off (the engine
|
|
75
|
+
* may still honour a bare `grammar` / `responseSkeleton`, but never emits a
|
|
76
|
+
* prefill plan). This is the off-by-default switch for the deterministic
|
|
77
|
+
* short-circuit.
|
|
78
|
+
*/
|
|
79
|
+
elizaSchema?: ElizaHarnessSchema;
|
|
80
|
+
/**
|
|
81
|
+
* Per-span sampler overrides for the {@link responseSkeleton}. When set,
|
|
82
|
+
* the engine emits `eliza_span_samplers` on the llama-server request body so
|
|
83
|
+
* the fork-side server swaps to argmax (`llama_sampler_init_greedy()`) at
|
|
84
|
+
* the indicated enum / number / boolean positions. Stock llama-server
|
|
85
|
+
* ignores the field — the grammar still constrains the same tokens, we
|
|
86
|
+
* just lose the argmax determinism guarantee on that path.
|
|
87
|
+
*
|
|
88
|
+
* Producer: `@elizaos/core` `buildSpanSamplerPlan(skeleton)`.
|
|
89
|
+
*/
|
|
90
|
+
spanSamplerPlan?: SpanSamplerPlan;
|
|
91
|
+
/**
|
|
92
|
+
* Per-request chat-template thinking control for reasoning-capable local
|
|
93
|
+
* models. `off` maps to `chat_template_kwargs.enable_thinking=false` for
|
|
94
|
+
* response-handler/direct-reply calls that must emit user-visible text, while
|
|
95
|
+
* planner/action calls can omit this and use the catalog/server default.
|
|
96
|
+
*/
|
|
97
|
+
thinking?: "auto" | "on" | "off";
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Collapse a skeleton: `enum` spans with exactly one value (or zero values)
|
|
101
|
+
* become `literal` spans (C4). Adjacent literals stay separate spans — the
|
|
102
|
+
* compiler merges them in the root rule.
|
|
103
|
+
*/
|
|
104
|
+
export declare function collapseSkeleton(skeleton: ResponseSkeleton): ResponseSkeleton;
|
|
105
|
+
/**
|
|
106
|
+
* Compile a `ResponseSkeleton` to a *lazy* GBNF grammar. The grammar's `root`
|
|
107
|
+
* rule is the concatenation of every span:
|
|
108
|
+
* - `literal` spans → GBNF string literals (the JSON key/glue scaffold),
|
|
109
|
+
* - `enum` spans (≥2 values) → an alternation of quoted-string literals,
|
|
110
|
+
* - `free-string` spans → a quoted JSON string rule,
|
|
111
|
+
* - `free-json` spans → the recursive JSON-value rule.
|
|
112
|
+
*
|
|
113
|
+
* The grammar runs *lazily* when the skeleton opens with a literal (the
|
|
114
|
+
* trigger word) — generation free-runs until that literal is seen, then the
|
|
115
|
+
* grammar pins the rest of the envelope. That keeps the prose prefix
|
|
116
|
+
* unconstrained while forcing the JSON scaffold.
|
|
117
|
+
*
|
|
118
|
+
* Returns `null` when the skeleton has no free spans (nothing for the model to
|
|
119
|
+
* sample — the caller should just emit the literal text and skip generation).
|
|
120
|
+
*/
|
|
121
|
+
export declare function compileSkeletonToGbnf(skeletonInput: ResponseSkeleton): GbnfGrammar | null;
|
|
122
|
+
/**
|
|
123
|
+
* Resolve the GBNF grammar to apply for a generation call. Precedence: an
|
|
124
|
+
* explicit `grammar` string on the params, then a compiled `responseSkeleton`.
|
|
125
|
+
* Returns null when neither is set.
|
|
126
|
+
*/
|
|
127
|
+
export declare function resolveGrammarForParams(params: StructuredGenerateParams | undefined): GbnfGrammar | null;
|
|
128
|
+
/**
|
|
129
|
+
* Build the OpenAI-/llama-server-compatible request-body fragment for a
|
|
130
|
+
* grammar. Returns `grammar` + (when lazy) `grammar_lazy` / `grammar_triggers`.
|
|
131
|
+
* Recent llama.cpp accepts these on both `/v1/chat/completions` and
|
|
132
|
+
* `/completion`.
|
|
133
|
+
*/
|
|
134
|
+
export declare function grammarRequestFields(grammar: GbnfGrammar): Record<string, unknown>;
|
|
135
|
+
/**
|
|
136
|
+
* Split a skeleton's leading literal run off as an assistant-turn prefill
|
|
137
|
+
* candidate, returning that prefix plus the remaining spans. Used by the
|
|
138
|
+
* multi-call infill fallback (emit prefix as a prefill, generate the first
|
|
139
|
+
* free span, then loop).
|
|
140
|
+
*/
|
|
141
|
+
export declare function splitSkeletonAtFirstFree(skeleton: ResponseSkeleton): {
|
|
142
|
+
prefixLiteral: string;
|
|
143
|
+
rest: ResponseSkeletonSpan[];
|
|
144
|
+
};
|
|
145
|
+
/**
|
|
146
|
+
* One deterministically-forced byte run in an {@link ElizaPrefillPlan}. The
|
|
147
|
+
* runs alternate with the free (sampled) spans, so a run is unambiguously
|
|
148
|
+
* anchored by *position* in that alternation rather than by an absolute byte
|
|
149
|
+
* offset (the sampled spans have unknown length at plan time):
|
|
150
|
+
*
|
|
151
|
+
* run[0] free[0] run[1] free[1] … run[n] (n = number of free spans)
|
|
152
|
+
*
|
|
153
|
+
* `afterFreeSpan` is `-1` for the leading run (before any free span — the
|
|
154
|
+
* assistant-turn prefill), then `0, 1, 2, …` for the run that follows free
|
|
155
|
+
* span 0, 1, 2, … . The server resumes sampling after writing each run; once
|
|
156
|
+
* the matching free span is sampled it writes the next run's token ids without
|
|
157
|
+
* a forward pass and advances the decoder to the next free span.
|
|
158
|
+
*/
|
|
159
|
+
export interface PrefillRun {
|
|
160
|
+
/**
|
|
161
|
+
* Index of the free span this run *follows*. `-1` = the leading run (the
|
|
162
|
+
* prefill); `k >= 0` = the run after free span `k`. The last run (`n`) is the
|
|
163
|
+
* tail scaffold (closing braces) after the final free span.
|
|
164
|
+
*/
|
|
165
|
+
afterFreeSpan: number;
|
|
166
|
+
/** The deterministically-forced bytes. */
|
|
167
|
+
text: string;
|
|
168
|
+
/**
|
|
169
|
+
* Optional pre-tokenized token IDs for this run. When provided at compile time
|
|
170
|
+
* via a tokenizer callback, the FFI runtime can use these directly without
|
|
171
|
+
* re-tokenizing, improving latency.
|
|
172
|
+
*/
|
|
173
|
+
tokenIds?: number[];
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Compact descriptor of the deterministic structure of a constrained decode:
|
|
177
|
+
* the ordered runs of bytes that are fixed (so the server can prefill their
|
|
178
|
+
* token ids and skip the forward passes) interleaved with the count of free
|
|
179
|
+
* positions, plus the leading literal run that should be seeded as an
|
|
180
|
+
* assistant-turn prefill (`prefix`). Sent on the request as `eliza_prefill_plan`.
|
|
181
|
+
*
|
|
182
|
+
* Purely a speedup hint — a server that ignores it produces the identical
|
|
183
|
+
* output because the lazy GBNF already forces the same bytes.
|
|
184
|
+
*/
|
|
185
|
+
export interface ElizaPrefillPlan {
|
|
186
|
+
/**
|
|
187
|
+
* The leading deterministic run — emitted as an assistant-turn prefill so
|
|
188
|
+
* the model never samples it. Empty when the skeleton opens with a free span.
|
|
189
|
+
*/
|
|
190
|
+
prefix: string;
|
|
191
|
+
/**
|
|
192
|
+
* Deterministic byte runs alternating with the free spans (see
|
|
193
|
+
* {@link PrefillRun}), in output order, including the prefix run when
|
|
194
|
+
* non-empty.
|
|
195
|
+
*/
|
|
196
|
+
runs: PrefillRun[];
|
|
197
|
+
/** Number of free (sampled) spans in the skeleton. `runs.length` is `freeCount + 1` minus the leading run when the skeleton starts free. */
|
|
198
|
+
freeCount: number;
|
|
199
|
+
/**
|
|
200
|
+
* Opaque cache key (mirrors the skeleton's `id`) so the server can cache the
|
|
201
|
+
* tokenised form of the runs across turns when the structure is unchanged.
|
|
202
|
+
*/
|
|
203
|
+
id?: string;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Compute the {@link ElizaPrefillPlan} for a response skeleton: walk the spans,
|
|
207
|
+
* accumulating consecutive `literal` spans (and single-value enums collapsed to
|
|
208
|
+
* literals) into deterministic byte runs and counting the free spans. Adjacent
|
|
209
|
+
* literals merge into one run. Returns `null` when the skeleton has no
|
|
210
|
+
* deterministic runs at all (nothing to prefill).
|
|
211
|
+
*
|
|
212
|
+
* Invariant the consumer relies on: concatenating the runs interleaved with the
|
|
213
|
+
* (eventually-sampled) free-span values, in order, reproduces a byte-identical
|
|
214
|
+
* JSON document to what the lazy GBNF from {@link compileSkeletonToGbnf} would
|
|
215
|
+
* have produced. The tests assert this.
|
|
216
|
+
*/
|
|
217
|
+
export declare function compilePrefillPlan(skeletonInput: ResponseSkeleton, tokenize?: (text: string) => number[]): ElizaPrefillPlan | null;
|
|
218
|
+
/**
|
|
219
|
+
* Build the request-body fragment carrying the prefill plan. The server reads
|
|
220
|
+
* `eliza_prefill_plan` (a tolerant extension — old binaries ignore it and the
|
|
221
|
+
* grammar still forces the same bytes). Returns `{}` when there is no plan.
|
|
222
|
+
*/
|
|
223
|
+
export declare function prefillPlanRequestFields(plan: ElizaPrefillPlan | null): Record<string, unknown>;
|
|
224
|
+
/**
|
|
225
|
+
* Build the request-body fragment carrying per-span sampler overrides. The
|
|
226
|
+
* fork-side llama-server reads `eliza_span_samplers` (a tolerant extension —
|
|
227
|
+
* old binaries ignore it; the grammar still constrains the same tokens, we
|
|
228
|
+
* just lose the per-span argmax determinism guarantee on the legacy path).
|
|
229
|
+
*
|
|
230
|
+
* Wire schema (snake_case for OpenAI body conventions):
|
|
231
|
+
* {
|
|
232
|
+
* overrides: [
|
|
233
|
+
* { span_index: number, temperature: number, top_k?: number, top_p?: number }
|
|
234
|
+
* ],
|
|
235
|
+
* strict?: boolean
|
|
236
|
+
* }
|
|
237
|
+
*
|
|
238
|
+
* Returns `{}` when there is no plan or no overrides — keep the wire surface
|
|
239
|
+
* narrow so a stock server never has to skip past empty fork extensions.
|
|
240
|
+
*/
|
|
241
|
+
export declare function spanSamplerPlanRequestFields(plan: SpanSamplerPlan | undefined | null): Record<string, unknown>;
|
|
242
|
+
/**
|
|
243
|
+
* The compact, engine-facing descriptor for a structured output the agent loop
|
|
244
|
+
* wants forced. It is the bundle of (a) a {@link ResponseSkeleton} (which
|
|
245
|
+
* compiles to a lazy GBNF for the constrained-decode path), (b) the derived
|
|
246
|
+
* {@link ElizaPrefillPlan} (the deterministic-token short-circuit), and (c) the
|
|
247
|
+
* short-name ↔ long-name maps so the on-wire/decoded form uses canonical short
|
|
248
|
+
* action ids / enum values and the runtime expands them for the caller.
|
|
249
|
+
*
|
|
250
|
+
* Producers: `@elizaos/core` `buildPlannerActionGrammar` / `buildResponseGrammar`
|
|
251
|
+
* wrapped by {@link elizaHarnessSchemaFromSkeleton}. Consumer: the local engine
|
|
252
|
+
* (`ffi-streaming-backend.ts` / `engine.ts`).
|
|
253
|
+
*/
|
|
254
|
+
export interface ElizaHarnessSchema {
|
|
255
|
+
/** Structure-forcing description; compiles to a lazy GBNF. */
|
|
256
|
+
skeleton: ResponseSkeleton;
|
|
257
|
+
/** Pre-built GBNF (wins over compiling the skeleton), when the producer made one. */
|
|
258
|
+
grammar?: string;
|
|
259
|
+
/** Deterministic-token short-circuit derived from the skeleton. */
|
|
260
|
+
prefillPlan: ElizaPrefillPlan | null;
|
|
261
|
+
/**
|
|
262
|
+
* Canonical short id → human-facing long name (display label), for any
|
|
263
|
+
* closed enum the descriptor pins (action ids, known enum values). The wire
|
|
264
|
+
* form is the short id; callers that want the long name look it up here.
|
|
265
|
+
* Empty when nothing needs expanding.
|
|
266
|
+
*/
|
|
267
|
+
longNames: Record<string, string>;
|
|
268
|
+
/** Cache key (the skeleton's id). */
|
|
269
|
+
id?: string;
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Wrap a {@link ResponseSkeleton} (+ optional pre-built grammar + name map)
|
|
273
|
+
* into an {@link ElizaHarnessSchema}, computing the prefill plan. This is the
|
|
274
|
+
* single place the prefill plan is derived so producers don't each reimplement
|
|
275
|
+
* it.
|
|
276
|
+
*/
|
|
277
|
+
export declare function elizaHarnessSchemaFromSkeleton(input: {
|
|
278
|
+
skeleton: ResponseSkeleton;
|
|
279
|
+
grammar?: string;
|
|
280
|
+
longNames?: Record<string, string>;
|
|
281
|
+
tokenize?: (text: string) => number[];
|
|
282
|
+
}): ElizaHarnessSchema;
|
|
283
|
+
/**
|
|
284
|
+
* Expand a canonical short id decoded out of a constrained generation back to
|
|
285
|
+
* its human-facing long name (display label), using the descriptor's
|
|
286
|
+
* {@link ElizaHarnessSchema.longNames} map (sourced from the action catalog).
|
|
287
|
+
* Identity when there is no mapping — the canonical action ids
|
|
288
|
+
* (`normalizeActionName` results, e.g. `SEND_MESSAGE`) are already the on-wire
|
|
289
|
+
* form, so this is only meaningful when a producer registered a separate
|
|
290
|
+
* display label.
|
|
291
|
+
*/
|
|
292
|
+
export declare function expandShortName(schema: ElizaHarnessSchema | undefined, shortId: string): string;
|
|
293
|
+
/**
|
|
294
|
+
* Invert {@link expandShortName}: given a (possibly long) name the caller
|
|
295
|
+
* supplied, return the canonical short id the wire form expects. Identity when
|
|
296
|
+
* the name is already a known short id or no mapping matches.
|
|
297
|
+
*/
|
|
298
|
+
export declare function canonicalizeShortName(schema: ElizaHarnessSchema | undefined, name: string): string;
|
|
299
|
+
/**
|
|
300
|
+
* Resolve the GBNF + prefill plan + assistant-turn prefill to apply for a
|
|
301
|
+
* generation call given the structured params. Precedence for the grammar:
|
|
302
|
+
* an explicit `grammar` string, then a harness schema's `grammar`, then
|
|
303
|
+
* compiling the harness schema's / params' `responseSkeleton`. The prefill plan
|
|
304
|
+
* is only present when a harness schema is supplied (off by default).
|
|
305
|
+
*/
|
|
306
|
+
export declare function resolveGuidedDecodeForParams(params: StructuredGenerateParams | undefined): {
|
|
307
|
+
grammar: GbnfGrammar | null;
|
|
308
|
+
prefillPlan: ElizaPrefillPlan | null;
|
|
309
|
+
prefill: string | null;
|
|
310
|
+
};
|
|
311
|
+
//# sourceMappingURL=structured-output.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"structured-output.d.ts","sourceRoot":"","sources":["../../src/services/structured-output.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EACX,UAAU,EACV,gBAAgB,EAChB,oBAAoB,EACpB,eAAe,EACf,MAAM,eAAe,CAAC;AAEvB,OAAO,EACN,sBAAsB,EACtB,KAAK,6BAA6B,EAClC,KAAK,4BAA4B,EACjC,KAAK,4BAA4B,EACjC,4BAA4B,GAC5B,MAAM,0CAA0C,CAAC;AAClD,YAAY,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,eAAe,EAAE,CAAC;AAExE;;;;;;GAMG;AACH,MAAM,WAAW,WAAW;IAC3B,mBAAmB;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,+EAA+E;IAC/E,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,uEAAuE;IACvE,QAAQ,CAAC,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;CACjC;AAED;;;;GAIG;AACH,MAAM,WAAW,wBAAwB;IACxC;;;;;;OAMG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC,qEAAqE;IACrE,cAAc,CAAC,EAAE,UAAU,CAAC;IAC5B;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B;;;;;;;;;;OAUG;IACH,WAAW,CAAC,EAAE,kBAAkB,CAAC;IACjC;;;;;;;;;OASG;IACH,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,GAAG,KAAK,CAAC;CACjC;AAiCD;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,gBAAgB,GAAG,gBAAgB,CAe7E;AAoBD;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,qBAAqB,CACpC,aAAa,EAAE,gBAAgB,GAC7B,WAAW,GAAG,IAAI,CA+EpB;AAED;;;;GAIG;AACH,wBAAgB,uBAAuB,CACtC,MAAM,EAAE,wBAAwB,GAAG,SAAS,GAC1C,WAAW,GAAG,IAAI,CASpB;AA+BD;;;;;GAKG;AACH,wBAAgB,oBAAoB,CACnC,OAAO,EAAE,WAAW,GAClB,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAYzB;AAED;;;;;GAKG;AACH,wBAAgB,wBAAwB,CAAC,QAAQ,EAAE,gBAAgB,GAAG;IACrE,aAAa,EAAE,MAAM,CAAC;IACtB,IAAI,EAAE,oBAAoB,EAAE,CAAC;CAC7B,CAWA;AAsBD;;;;;;;;;;;;;GAaG;AACH,MAAM,WAAW,UAAU;IAC1B;;;;OAIG;IACH,aAAa,EAAE,MAAM,CAAC;IACtB,0CAA0C;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,gBAAgB;IAChC;;;OAGG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;;;OAIG;IACH,IAAI,EAAE,UAAU,EAAE,CAAC;IACnB,4IAA4I;IAC5I,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,EAAE,CAAC,EAAE,MAAM,CAAC;CACZ;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,kBAAkB,CACjC,aAAa,EAAE,gBAAgB,EAC/B,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,GACnC,gBAAgB,GAAG,IAAI,CA0CzB;AAED;;;;GAIG;AACH,wBAAgB,wBAAwB,CACvC,IAAI,EAAE,gBAAgB,GAAG,IAAI,GAC3B,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAmBzB;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,4BAA4B,CAC3C,IAAI,EAAE,eAAe,GAAG,SAAS,GAAG,IAAI,GACtC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAczB;AAMD;;;;;;;;;;;GAWG;AACH,MAAM,WAAW,kBAAkB;IAClC,8DAA8D;IAC9D,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,qFAAqF;IACrF,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mEAAmE;IACnE,WAAW,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACrC;;;;;OAKG;IACH,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClC,qCAAqC;IACrC,EAAE,CAAC,EAAE,MAAM,CAAC;CACZ;AAED;;;;;GAKG;AACH,wBAAgB,8BAA8B,CAAC,KAAK,EAAE;IACrD,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACnC,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;CACtC,GAAG,kBAAkB,CAQrB;AAED;;;;;;;;GAQG;AACH,wBAAgB,eAAe,CAC9B,MAAM,EAAE,kBAAkB,GAAG,SAAS,EACtC,OAAO,EAAE,MAAM,GACb,MAAM,CAGR;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CACpC,MAAM,EAAE,kBAAkB,GAAG,SAAS,EACtC,IAAI,EAAE,MAAM,GACV,MAAM,CAOR;AAED;;;;;;GAMG;AACH,wBAAgB,4BAA4B,CAC3C,MAAM,EAAE,wBAAwB,GAAG,SAAS,GAC1C;IACF,OAAO,EAAE,WAAW,GAAG,IAAI,CAAC;IAC5B,WAAW,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACrC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB,CA8BA"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System memory reader — the single source of "how much RAM can we actually
|
|
3
|
+
* allocate right now" for the local-inference memory arbiter and pressure
|
|
4
|
+
* sources.
|
|
5
|
+
*
|
|
6
|
+
* Node's `os.freemem()` returns the kernel's `MemFree` on Linux, which counts
|
|
7
|
+
* only never-touched pages and EXCLUDES reclaimable page cache + slab. On
|
|
8
|
+
* Android — where the app process runs under a large page cache — `MemFree`
|
|
9
|
+
* undercounts allocatable memory by gigabytes, so a `MemFree`-driven arbiter
|
|
10
|
+
* evicts models it didn't need to and refuses loads that would have fit.
|
|
11
|
+
*
|
|
12
|
+
* `/proc/meminfo`'s `MemAvailable` is the kernel's own estimate of how much
|
|
13
|
+
* memory is available for starting new applications without swapping (free +
|
|
14
|
+
* reclaimable cache/slab, minus the low watermark). That is exactly the number
|
|
15
|
+
* the arbiter wants. Read it on Linux/Android; fall back to `os.freemem()` /
|
|
16
|
+
* `os.totalmem()` everywhere else (macOS, Windows) or if `/proc/meminfo` is
|
|
17
|
+
* unreadable or pre-3.14 (no `MemAvailable`).
|
|
18
|
+
*/
|
|
19
|
+
export interface SystemMemory {
|
|
20
|
+
freeBytes: number;
|
|
21
|
+
totalBytes: number;
|
|
22
|
+
}
|
|
23
|
+
/** Injectable for tests: returns the raw `/proc/meminfo` text, or null. */
|
|
24
|
+
export type MeminfoReader = () => string | null;
|
|
25
|
+
/**
|
|
26
|
+
* Read available + total system memory in bytes. Prefers `/proc/meminfo`
|
|
27
|
+
* `MemAvailable`/`MemTotal` on Linux; falls back to `os.freemem()/totalmem()`.
|
|
28
|
+
*
|
|
29
|
+
* @param read injectable meminfo reader (tests). Defaults to reading
|
|
30
|
+
* `/proc/meminfo` on Linux and returning null elsewhere.
|
|
31
|
+
*/
|
|
32
|
+
export declare function readSystemMemory(read?: MeminfoReader): SystemMemory;
|
|
33
|
+
//# sourceMappingURL=system-memory.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"system-memory.d.ts","sourceRoot":"","sources":["../../src/services/system-memory.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAKH,MAAM,WAAW,YAAY;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,2EAA2E;AAC3E,MAAM,MAAM,aAAa,GAAG,MAAM,MAAM,GAAG,IAAI,CAAC;AAmBhD;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC/B,IAAI,GAAE,aAAoC,GACxC,YAAY,CAUd"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local inference type re-exports.
|
|
3
|
+
*
|
|
4
|
+
* The canonical definitions live in `@elizaos/shared/local-inference`.
|
|
5
|
+
* This shim preserves the historical import path
|
|
6
|
+
* `../services/local-inference/types` for server-side code.
|
|
7
|
+
*/
|
|
8
|
+
export { type ActiveModelState, AGENT_MODEL_SLOTS, type AgentModelSlot, type CatalogModel, type CatalogQuantizationId, type CatalogQuantizationMatrix, type CatalogQuantizationVariant, type CpuFeatureProbe, classifyCatalogModelRuntimeClass, classifyInstalledModelRuntimeClass, type DownloadEvent, type DownloadJob, type DownloadState, type GpuProfile, type GpuProfileId, type HardwareFitLevel, type HardwareProbe, type InstalledModel, type KvCacheType, type LocalInferenceDownloadStatus, type LocalInferenceReadiness, type LocalInferenceSlotReadiness, type LocalRuntimeAcceleration, type LocalRuntimeBackend, type LocalRuntimeKernel, type LocalRuntimeOptimizations, type MobileHardwareProbe, type ModelAssignments, type ModelBucket, type ModelCategory, type ModelHubSnapshot, type OpenVinoDeviceKind, type OpenVinoHardwareProbe, type RuntimeClass, TEXT_GENERATION_SLOTS, type TextGenerationSlot, type TokenizerFamily, withRuntimeClass, } from "@elizaos/shared";
|
|
9
|
+
/** RAM requirements for a model bundle. */
|
|
10
|
+
export interface RamBudget {
|
|
11
|
+
/** Minimum RAM the bundle will boot under, in megabytes. */
|
|
12
|
+
minMb: number;
|
|
13
|
+
/** RAM the bundle expects for nominal workloads, in megabytes. */
|
|
14
|
+
recommendedMb: number;
|
|
15
|
+
/** Where the numbers came from. `manifest` only when both came from
|
|
16
|
+
* a validated `eliza-1.manifest.json` next to the installed bundle. */
|
|
17
|
+
source: "manifest" | "catalog";
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/services/types.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACN,KAAK,gBAAgB,EACrB,iBAAiB,EACjB,KAAK,cAAc,EACnB,KAAK,YAAY,EACjB,KAAK,qBAAqB,EAC1B,KAAK,yBAAyB,EAC9B,KAAK,0BAA0B,EAC/B,KAAK,eAAe,EACpB,gCAAgC,EAChC,kCAAkC,EAClC,KAAK,aAAa,EAClB,KAAK,WAAW,EAChB,KAAK,aAAa,EAClB,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,WAAW,EAChB,KAAK,4BAA4B,EACjC,KAAK,uBAAuB,EAC5B,KAAK,2BAA2B,EAChC,KAAK,wBAAwB,EAC7B,KAAK,mBAAmB,EACxB,KAAK,kBAAkB,EACvB,KAAK,yBAAyB,EAC9B,KAAK,mBAAmB,EACxB,KAAK,gBAAgB,EACrB,KAAK,WAAW,EAChB,KAAK,aAAa,EAClB,KAAK,gBAAgB,EACrB,KAAK,kBAAkB,EACvB,KAAK,qBAAqB,EAC1B,KAAK,YAAY,EACjB,qBAAqB,EACrB,KAAK,kBAAkB,EACvB,KAAK,eAAe,EACpB,gBAAgB,GAChB,MAAM,iBAAiB,CAAC;AAEzB,2CAA2C;AAC3C,MAAM,WAAW,SAAS;IACzB,4DAA4D;IAC5D,KAAK,EAAE,MAAM,CAAC;IACd,kEAAkE;IAClE,aAAa,EAAE,MAAM,CAAC;IACtB;4EACwE;IACxE,MAAM,EAAE,UAAU,GAAG,SAAS,CAAC;CAC/B"}
|