@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TypeScript-side ABI surface for the in-process FFI streaming LLM path.
|
|
3
|
+
*
|
|
4
|
+
* This file mirrors the C header at
|
|
5
|
+
* `packages/inference/llama.cpp/omnivoice/src/ffi-streaming.h` — the
|
|
6
|
+
* function names are the same so that bun:ffi symbol resolution uses the
|
|
7
|
+
* exact C exports without any aliasing.
|
|
8
|
+
*
|
|
9
|
+
* Rationale for a separate ABI module
|
|
10
|
+
* ────────────────────────────────────
|
|
11
|
+
* `ffi-streaming-runner.ts` depends on the `ElizaInferenceFfi` handle from
|
|
12
|
+
* `voice/ffi-bindings.ts`, which in turn is tied to the omnivoice-fused
|
|
13
|
+
* build of `libelizainference`. That handle carries TTS, ASR, embedding,
|
|
14
|
+
* and streaming-LLM symbols together. The ABI declared here is the
|
|
15
|
+
* *streaming-LLM-only* slice that the mobile bootstrap needs to reason
|
|
16
|
+
* about independently — it does not assume the full fused binary is
|
|
17
|
+
* loaded. Callers that already have an `ElizaInferenceFfi` can implement
|
|
18
|
+
* `FfiLlmStreamingAbi` as a thin wrapper; callers that only have the
|
|
19
|
+
* llama.cpp-only `libelizainference.so` (e.g. the Android AOSP bootstrap
|
|
20
|
+
* before omnivoice ships) can implement it directly.
|
|
21
|
+
*
|
|
22
|
+
* MTP phasing
|
|
23
|
+
* ──────────────
|
|
24
|
+
* Phase 1 — target model only. The `FfiLlmStreamingAbi` alone is
|
|
25
|
+
* sufficient: open a single-model streaming session, prefill, generate,
|
|
26
|
+
* cancel, close. No drafter weights required.
|
|
27
|
+
*
|
|
28
|
+
* Phase 2 — speculative decoding. When `MobileInferenceCapabilities.
|
|
29
|
+
* mtpSupported` is `true`, swap to `FfiMtpStreamingAbi` which opens
|
|
30
|
+
* a paired drafter + verifier session and runs the speculative decode loop
|
|
31
|
+
* on-device. The two ABI surfaces share the same `FfiLlmHandle` brand so
|
|
32
|
+
* the dispatcher (`runtime-dispatcher.ts`) sees a uniform handle type.
|
|
33
|
+
*
|
|
34
|
+
* iOS XCFramework gap
|
|
35
|
+
* ───────────────────
|
|
36
|
+
* The ABI is defined here, the C header is frozen, but the iOS
|
|
37
|
+
* XCFramework that re-exports these symbols through the Swift bridge has
|
|
38
|
+
* not shipped yet. `loadIosStreamingLlmBinding()` in
|
|
39
|
+
* `ios-llama-streaming.ts` returns `null` until the XCFramework build
|
|
40
|
+
* lands. See `docs/inference/ffi-streaming.md` §iOS XCFramework gap for
|
|
41
|
+
* the current status.
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// Core handle types
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Opaque handle to an open streaming-LLM session. The underlying C value
|
|
50
|
+
* is a pointer to a heap-allocated session struct; we brand it at the TS
|
|
51
|
+
* layer to prevent accidental mixing with other handle types.
|
|
52
|
+
*
|
|
53
|
+
* Concrete implementations will typically alias this to `bigint` (the
|
|
54
|
+
* bun:ffi representation of a C pointer) — but callers should treat it as
|
|
55
|
+
* opaque.
|
|
56
|
+
*/
|
|
57
|
+
export interface FfiLlmHandle {
|
|
58
|
+
readonly _brand: "FfiLlmHandle";
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Token callback fired from the generation background thread once per
|
|
63
|
+
* decoded token (or once per speculative-accept batch in MTP mode).
|
|
64
|
+
*
|
|
65
|
+
* `isDone` is `true` on the *last* invocation for a given generate call.
|
|
66
|
+
* After `isDone` the handle remains open but must not be passed to
|
|
67
|
+
* `generate` again until the caller re-prefills.
|
|
68
|
+
*
|
|
69
|
+
* The callback executes synchronously on the background thread the C
|
|
70
|
+
* library uses for decoding — callers must not call any FFI method
|
|
71
|
+
* back from inside the callback (the lock is not re-entrant).
|
|
72
|
+
*/
|
|
73
|
+
export type TokenCallback = (
|
|
74
|
+
tokenId: number,
|
|
75
|
+
tokenText: string,
|
|
76
|
+
isDone: boolean,
|
|
77
|
+
) => void;
|
|
78
|
+
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
// Single-model streaming ABI
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* C ABI surface for the in-process streaming LLM path.
|
|
85
|
+
*
|
|
86
|
+
* Function names match the C exports in `ffi-streaming.h` exactly; bun:ffi
|
|
87
|
+
* resolves them by string match against the shared library symbol table.
|
|
88
|
+
*
|
|
89
|
+
* All methods are synchronous from the JS perspective (bun:ffi calls are
|
|
90
|
+
* synchronous unless declared `nonblocking`). `generate` is the one
|
|
91
|
+
* exception: it returns immediately after scheduling the background decode
|
|
92
|
+
* loop and delivers results via `tokenCallback`.
|
|
93
|
+
*/
|
|
94
|
+
export interface FfiLlmStreamingAbi {
|
|
95
|
+
/**
|
|
96
|
+
* Open a streaming-LLM session against the model at `modelPath`.
|
|
97
|
+
*
|
|
98
|
+
* The model is memory-mapped into the process — this call may block
|
|
99
|
+
* briefly on a cold filesystem. Subsequent calls with the same path
|
|
100
|
+
* share the mmap region (the C library uses a ref-counted mmap cache).
|
|
101
|
+
*
|
|
102
|
+
* Returns an opaque handle on success, or `null` when:
|
|
103
|
+
* - the model file does not exist or cannot be read,
|
|
104
|
+
* - the device lacks the RAM required for `contextSizeTokens`,
|
|
105
|
+
* - `gpuLayers > 0` and the Metal / Vulkan device is unavailable.
|
|
106
|
+
*
|
|
107
|
+
* @param modelPath Absolute path to a GGUF model file.
|
|
108
|
+
* @param contextSizeTokens KV cache size in tokens (must be power-of-two
|
|
109
|
+
* aligned; the library rounds up if needed).
|
|
110
|
+
* @param numThreads CPU decode threads. 0 = auto-detect (uses
|
|
111
|
+
* `eliza_inference_default_thread_count()`).
|
|
112
|
+
* @param gpuLayers Number of transformer layers to offload to
|
|
113
|
+
* GPU. 0 = CPU only.
|
|
114
|
+
*/
|
|
115
|
+
eliza_inference_llm_stream_open(
|
|
116
|
+
modelPath: string,
|
|
117
|
+
contextSizeTokens: number,
|
|
118
|
+
numThreads: number,
|
|
119
|
+
gpuLayers: number,
|
|
120
|
+
): FfiLlmHandle | null;
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Prefill the KV cache with the supplied token ids.
|
|
124
|
+
*
|
|
125
|
+
* Blocks until all tokens are evaluated. On a large prompt this can
|
|
126
|
+
* take several hundred milliseconds on CPU — callers should not invoke
|
|
127
|
+
* on the main thread.
|
|
128
|
+
*
|
|
129
|
+
* @param handle Active session from `open`.
|
|
130
|
+
* @param promptTokens Pre-tokenized prompt; row-major int32 ids.
|
|
131
|
+
* @param slotId KV slot index (0-based). Use -1 to allocate a
|
|
132
|
+
* fresh slot; use 0..N-1 to pin a conversational
|
|
133
|
+
* turn for KV reuse across multi-turn sessions.
|
|
134
|
+
* @returns Number of tokens prefilled, or -1 on error (invalid handle,
|
|
135
|
+
* OOM, or KV cache exhausted).
|
|
136
|
+
*/
|
|
137
|
+
eliza_inference_llm_stream_prefill(
|
|
138
|
+
handle: FfiLlmHandle,
|
|
139
|
+
promptTokens: Int32Array,
|
|
140
|
+
slotId: number,
|
|
141
|
+
): number;
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Start async token generation.
|
|
145
|
+
*
|
|
146
|
+
* The library spins up an internal worker thread (or reuses a pooled
|
|
147
|
+
* one) and begins decoding. Each decoded token fires `tokenCallback`
|
|
148
|
+
* from that thread. The final callback invocation has `isDone = true`.
|
|
149
|
+
*
|
|
150
|
+
* This call is non-blocking from the C caller's perspective: the C
|
|
151
|
+
* function returns 0 as soon as the worker is scheduled. From the JS
|
|
152
|
+
* perspective, callers should await the returned Promise — it resolves
|
|
153
|
+
* after the final `isDone = true` callback fires so that the JS async
|
|
154
|
+
* iterator can drain cleanly without a separate synchronisation
|
|
155
|
+
* mechanism. Mock implementations fulfil this contract by resolving
|
|
156
|
+
* the Promise after the last synthetic token; native FFI wrappers wrap
|
|
157
|
+
* a completion event or condition variable.
|
|
158
|
+
*
|
|
159
|
+
* Calling `generate` on a handle that is already generating is a hard
|
|
160
|
+
* error (returns -1 / rejects). Callers must wait for the Promise to
|
|
161
|
+
* resolve (or call `cancel` and await the resulting `isDone` callback)
|
|
162
|
+
* before re-using the handle.
|
|
163
|
+
*
|
|
164
|
+
* @param handle Active session from `open`.
|
|
165
|
+
* @param maxNewTokens Budget cap. Generation stops at `maxNewTokens`
|
|
166
|
+
* even if no EOS token was produced.
|
|
167
|
+
* @param temperature Softmax temperature. 0.0 = greedy.
|
|
168
|
+
* @param topP Nucleus sampling threshold (0.0–1.0).
|
|
169
|
+
* @param tokenCallback Callback fired per token from the decode thread.
|
|
170
|
+
* @returns Promise resolving to 0 on success, -1 on error.
|
|
171
|
+
*/
|
|
172
|
+
eliza_inference_llm_stream_generate(
|
|
173
|
+
handle: FfiLlmHandle,
|
|
174
|
+
maxNewTokens: number,
|
|
175
|
+
temperature: number,
|
|
176
|
+
topP: number,
|
|
177
|
+
tokenCallback: TokenCallback,
|
|
178
|
+
): number | Promise<number>;
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Signal the active generation to stop at the next safe cancellation
|
|
182
|
+
* point (after the current speculative batch is retired).
|
|
183
|
+
*
|
|
184
|
+
* This does NOT wait for the background thread to finish — the thread
|
|
185
|
+
* fires a final `tokenCallback` with `isDone = true` shortly after the
|
|
186
|
+
* cancel flag is observed. Callers that need to know the thread has
|
|
187
|
+
* stopped must wait for that final callback.
|
|
188
|
+
*
|
|
189
|
+
* Calling `cancel` on a handle that is not currently generating is a
|
|
190
|
+
* no-op.
|
|
191
|
+
*
|
|
192
|
+
* @param handle Active session from `open`.
|
|
193
|
+
*/
|
|
194
|
+
eliza_inference_llm_stream_cancel(handle: FfiLlmHandle): void;
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Release all resources associated with `handle`.
|
|
198
|
+
*
|
|
199
|
+
* Evicts the KV cache slots occupied by this session and releases the
|
|
200
|
+
* mmap reference. The model's mmap region stays mapped until the ref
|
|
201
|
+
* count reaches zero (i.e. all sessions against that path are closed).
|
|
202
|
+
*
|
|
203
|
+
* Calling `close` on a handle that is still generating is a hard error
|
|
204
|
+
* — cancel first and wait for `isDone` before closing.
|
|
205
|
+
*
|
|
206
|
+
* @param handle Active session from `open`.
|
|
207
|
+
*/
|
|
208
|
+
eliza_inference_llm_stream_close(handle: FfiLlmHandle): void;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// ---------------------------------------------------------------------------
|
|
212
|
+
// MTP (speculative decoding) streaming ABI — Phase 2
|
|
213
|
+
// ---------------------------------------------------------------------------
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* C ABI surface for paired drafter + verifier speculative decoding.
|
|
217
|
+
*
|
|
218
|
+
* Phase 2 only — the mobile runtime enables this path when
|
|
219
|
+
* `MobileInferenceCapabilities.mtpSupported` is `true`. Phase 1
|
|
220
|
+
* devices use `FfiLlmStreamingAbi` only (target model, no drafter).
|
|
221
|
+
*
|
|
222
|
+
* The MTP session holds two model contexts internally:
|
|
223
|
+
* 1. The *drafter* — a small, fast model that proposes `speculativeWindowSize`
|
|
224
|
+
* candidate tokens per step.
|
|
225
|
+
* 2. The *verifier* — the full target model that accepts or rejects the
|
|
226
|
+
* drafter's proposals in one parallel evaluation batch.
|
|
227
|
+
*
|
|
228
|
+
* The token callback fires once per *accepted* token (after the verifier's
|
|
229
|
+
* accept decision). Rejected tokens are silently discarded at the C layer;
|
|
230
|
+
* the JS consumer always sees a stream of accepted tokens identical to
|
|
231
|
+
* what a greedy target-only decode would have produced (assuming the
|
|
232
|
+
* drafter and verifier share vocabulary and a compatible chat template).
|
|
233
|
+
*
|
|
234
|
+
* Method signatures mirror `FfiLlmStreamingAbi` exactly — only the `open`
|
|
235
|
+
* argument list differs (adds drafter path + speculative window). This
|
|
236
|
+
* keeps the dispatcher (`runtime-dispatcher.ts`) agnostic to which ABI is
|
|
237
|
+
* in use.
|
|
238
|
+
*/
|
|
239
|
+
export interface FfiMtpStreamingAbi {
|
|
240
|
+
/**
|
|
241
|
+
* Open a paired drafter + verifier streaming session.
|
|
242
|
+
*
|
|
243
|
+
* Both models are mmap'd; the KV cache is sized to `contextSizeTokens`
|
|
244
|
+
* for the verifier and proportionally smaller for the drafter (the C
|
|
245
|
+
* library computes the drafter KV budget automatically from the
|
|
246
|
+
* `speculativeWindowSize`).
|
|
247
|
+
*
|
|
248
|
+
* @param drafterModelPath Absolute path to the drafter GGUF.
|
|
249
|
+
* @param verifierModelPath Absolute path to the verifier GGUF.
|
|
250
|
+
* @param contextSizeTokens Verifier KV size in tokens.
|
|
251
|
+
* @param numThreads CPU threads for verifier (drafter shares
|
|
252
|
+
* the same thread pool).
|
|
253
|
+
* @param gpuLayers Verifier GPU layer count. The drafter
|
|
254
|
+
* always runs on CPU in Phase 2 to avoid
|
|
255
|
+
* competing for Metal/Vulkan resources.
|
|
256
|
+
* @param speculativeWindowSize Number of drafter candidate tokens per
|
|
257
|
+
* speculative step (1–16; 4 is a safe
|
|
258
|
+
* starting point for mobile).
|
|
259
|
+
* @returns Opaque session handle, or `null` on failure.
|
|
260
|
+
*/
|
|
261
|
+
eliza_inference_mtp_stream_open(
|
|
262
|
+
drafterModelPath: string,
|
|
263
|
+
verifierModelPath: string,
|
|
264
|
+
contextSizeTokens: number,
|
|
265
|
+
numThreads: number,
|
|
266
|
+
gpuLayers: number,
|
|
267
|
+
speculativeWindowSize: number,
|
|
268
|
+
): FfiLlmHandle | null;
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Prefill both the drafter and verifier KV caches in a single blocking
|
|
272
|
+
* call. The verifier is prefilled first (it owns the ground-truth KV
|
|
273
|
+
* state); the drafter is then fast-forwarded to match.
|
|
274
|
+
*
|
|
275
|
+
* Same contract as `FfiLlmStreamingAbi.eliza_inference_llm_stream_prefill`.
|
|
276
|
+
*/
|
|
277
|
+
eliza_inference_mtp_stream_prefill(
|
|
278
|
+
handle: FfiLlmHandle,
|
|
279
|
+
promptTokens: Int32Array,
|
|
280
|
+
slotId: number,
|
|
281
|
+
): number;
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Start speculative-decoding generation. The token callback fires for
|
|
285
|
+
* each verifier-accepted token — not for each drafter proposal.
|
|
286
|
+
*
|
|
287
|
+
* Same contract as `FfiLlmStreamingAbi.eliza_inference_llm_stream_generate`.
|
|
288
|
+
*/
|
|
289
|
+
eliza_inference_mtp_stream_generate(
|
|
290
|
+
handle: FfiLlmHandle,
|
|
291
|
+
maxNewTokens: number,
|
|
292
|
+
temperature: number,
|
|
293
|
+
topP: number,
|
|
294
|
+
tokenCallback: TokenCallback,
|
|
295
|
+
): number | Promise<number>;
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Cancel an active MTP generation at the next speculation boundary.
|
|
299
|
+
* Same contract as `FfiLlmStreamingAbi.eliza_inference_llm_stream_cancel`.
|
|
300
|
+
*/
|
|
301
|
+
eliza_inference_mtp_stream_cancel(handle: FfiLlmHandle): void;
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Release both drafter and verifier sessions.
|
|
305
|
+
* Same contract as `FfiLlmStreamingAbi.eliza_inference_llm_stream_close`.
|
|
306
|
+
*/
|
|
307
|
+
eliza_inference_mtp_stream_close(handle: FfiLlmHandle): void;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// ---------------------------------------------------------------------------
|
|
311
|
+
// Mobile capability snapshot
|
|
312
|
+
// ---------------------------------------------------------------------------
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Device-side inference capability snapshot used by the mobile bootstrap
|
|
316
|
+
* to decide which ABI path to activate at startup.
|
|
317
|
+
*
|
|
318
|
+
* Produced by `detectMobileCapabilities()`. The runtime re-probes on
|
|
319
|
+
* every foreground resume (thermal / memory state can change while the
|
|
320
|
+
* app is backgrounded).
|
|
321
|
+
*/
|
|
322
|
+
export type MobileInferenceCapabilities = {
|
|
323
|
+
/**
|
|
324
|
+
* True when the `eliza_inference_llm_stream_*` symbols are present in
|
|
325
|
+
* the loaded `libelizainference` and `llmStreamSupported()` returns 1.
|
|
326
|
+
* This is the gate for Phase 1 on-device inference.
|
|
327
|
+
*/
|
|
328
|
+
streamingLlm: boolean;
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* True when `streamingLlm` is true AND the drafter GGUF is bundled AND
|
|
332
|
+
* the device's thermal state is below `serious`. Gate for Phase 2
|
|
333
|
+
* speculative decoding.
|
|
334
|
+
*/
|
|
335
|
+
mtpSupported: boolean;
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* True when the `eliza_inference_tts_synthesize_stream` symbol is
|
|
339
|
+
* present and `ttsStreamSupported()` returns 1. Gate for the OmniVoice
|
|
340
|
+
* TTS streaming path.
|
|
341
|
+
*/
|
|
342
|
+
omnivoiceStreaming: boolean;
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Device-reported maximum KV context in tokens. Derived from available
|
|
346
|
+
* device RAM minus the model weights footprint. The runtime clamps
|
|
347
|
+
* user-configured context sizes to this value.
|
|
348
|
+
*
|
|
349
|
+
* 0 when `streamingLlm` is false (no context available).
|
|
350
|
+
*/
|
|
351
|
+
maxContextTokens: number;
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Number of transformer layers the device can offload to GPU/NPU at
|
|
355
|
+
* the current thermal state without risking thermal throttling. 0 means
|
|
356
|
+
* CPU-only execution. The runtime uses this as the initial `gpuLayers`
|
|
357
|
+
* argument to `open`; it can be reduced dynamically when the thermal
|
|
358
|
+
* state worsens mid-session.
|
|
359
|
+
*/
|
|
360
|
+
recommendedGpuLayers: number;
|
|
361
|
+
};
|
|
362
|
+
|
|
363
|
+
// ---------------------------------------------------------------------------
|
|
364
|
+
// Capability detection
|
|
365
|
+
// ---------------------------------------------------------------------------
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* Derive a `MobileInferenceCapabilities` snapshot from an FFI binding.
|
|
369
|
+
*
|
|
370
|
+
* When `ffi` is `null` (e.g. in test environments, cloud-only builds, or
|
|
371
|
+
* when the native library failed to load), all boolean flags are `false`
|
|
372
|
+
* and numeric fields take safe zero defaults. This keeps the downstream
|
|
373
|
+
* runtime uniform: it can always read the capability struct without
|
|
374
|
+
* branching on "was an FFI loaded".
|
|
375
|
+
*
|
|
376
|
+
* When `ffi` is non-null, the function:
|
|
377
|
+
* 1. Calls `llmStreamSupported()` to set `streamingLlm`.
|
|
378
|
+
* 2. Sets `mtpSupported = false` for Phase 1 (drafter support
|
|
379
|
+
* detection requires a platform-specific bundle probe that is NOT
|
|
380
|
+
* part of this function; callers that have done the probe should set
|
|
381
|
+
* the field themselves after receiving the snapshot).
|
|
382
|
+
* 3. Calls `ttsStreamSupported()` to set `omnivoiceStreaming`.
|
|
383
|
+
* 4. Uses conservative device defaults for `maxContextTokens` and
|
|
384
|
+
* `recommendedGpuLayers` when the underlying library does not
|
|
385
|
+
* expose separate capability-query symbols (Phase 1 does not require
|
|
386
|
+
* them).
|
|
387
|
+
*
|
|
388
|
+
* @param ffi A loaded FFI binding, or `null` for an all-false defaults
|
|
389
|
+
* snapshot.
|
|
390
|
+
*/
|
|
391
|
+
export function detectMobileCapabilities(
|
|
392
|
+
ffi: FfiLlmStreamingAbi | null,
|
|
393
|
+
): MobileInferenceCapabilities {
|
|
394
|
+
if (ffi === null) {
|
|
395
|
+
return {
|
|
396
|
+
streamingLlm: false,
|
|
397
|
+
mtpSupported: false,
|
|
398
|
+
omnivoiceStreaming: false,
|
|
399
|
+
maxContextTokens: 0,
|
|
400
|
+
recommendedGpuLayers: 0,
|
|
401
|
+
};
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// Phase 1: probe only the streaming-LLM surface. The FFI binding we
|
|
405
|
+
// receive here is typed as `FfiLlmStreamingAbi`, which does not expose
|
|
406
|
+
// ttsStreamSupported(). Cast to unknown to peek at the full binding if
|
|
407
|
+
// it happens to be the fused omnivoice build — but don't fail if it
|
|
408
|
+
// isn't; omnivoiceStreaming gracefully defaults to false.
|
|
409
|
+
const anyFfi = ffi as unknown as Record<string, unknown>;
|
|
410
|
+
|
|
411
|
+
const streamingLlm =
|
|
412
|
+
typeof anyFfi.llmStreamSupported === "function"
|
|
413
|
+
? (anyFfi.llmStreamSupported as () => boolean)()
|
|
414
|
+
: // If the binding doesn't expose a supported() query but was handed
|
|
415
|
+
// to us at all, assume yes — the caller already verified the symbols
|
|
416
|
+
// exist via `llmStreamOpen !== undefined` elsewhere.
|
|
417
|
+
true;
|
|
418
|
+
|
|
419
|
+
const omnivoiceStreaming =
|
|
420
|
+
typeof anyFfi.ttsStreamSupported === "function"
|
|
421
|
+
? (anyFfi.ttsStreamSupported as () => boolean)()
|
|
422
|
+
: false;
|
|
423
|
+
|
|
424
|
+
// mtpSupported requires a drafter bundle probe that is not part of
|
|
425
|
+
// this function's responsibility. Phase 1 always returns false here;
|
|
426
|
+
// callers that have completed the bundle probe should OR in their result.
|
|
427
|
+
const mtpSupported = false;
|
|
428
|
+
|
|
429
|
+
// Conservative defaults for Phase 1. Devices with more RAM will
|
|
430
|
+
// override these through the platform-specific capability probe once
|
|
431
|
+
// the full `InferenceCapabilities` path is unified.
|
|
432
|
+
const maxContextTokens = streamingLlm ? 2048 : 0;
|
|
433
|
+
const recommendedGpuLayers = 0;
|
|
434
|
+
|
|
435
|
+
return {
|
|
436
|
+
streamingLlm,
|
|
437
|
+
mtpSupported,
|
|
438
|
+
omnivoiceStreaming,
|
|
439
|
+
maxContextTokens,
|
|
440
|
+
recommendedGpuLayers,
|
|
441
|
+
};
|
|
442
|
+
}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process FFI streaming backend adapter.
|
|
3
|
+
*
|
|
4
|
+
* Implements `LocalInferenceBackend` as the optimized in-process
|
|
5
|
+
* llama.cpp path used by Eliza-1 on desktop and mobile.
|
|
6
|
+
*
|
|
7
|
+
* What this class deliberately does NOT do:
|
|
8
|
+
* - Own the FFI context. The runtime provider passed to this class owns
|
|
9
|
+
* native load/unload and hands back the binding, context, and tokenizer.
|
|
10
|
+
* - Decode image bytes or call mtmd directly. Vision requests are validated
|
|
11
|
+
* here, then forwarded to runtimes that expose `describeImage`.
|
|
12
|
+
*/
|
|
13
|
+
import type { BackendPlan, GenerateArgs, GenerateResult, LocalGenerateWithUsageResult, LocalInferenceBackend } from "./backend";
|
|
14
|
+
import type { FfiStreamingRunner } from "./ffi-streaming-runner";
|
|
15
|
+
import type { LlmCtxHandle, LlmStreamingBinding } from "./llm-streaming-binding";
|
|
16
|
+
/**
|
|
17
|
+
* Constructor-injected adapter that resolves the FFI binding, context, and
|
|
18
|
+
* tokenizer for a given load. Two responsibilities:
|
|
19
|
+
*
|
|
20
|
+
* 1. Decide whether the FFI path is viable on the current binding
|
|
21
|
+
* (`supported()`). Mirrors `LlmStreamingBinding.llmStreamSupported()`
|
|
22
|
+
* plus any higher-level constraints (e.g. dylib path exists, build
|
|
23
|
+
* target matches the bundle's required kernels).
|
|
24
|
+
* 2. Lifecycle: `acquire(plan)` returns the FFI runner ready for
|
|
25
|
+
* `generate()` against the requested model, plus a tokenizer that
|
|
26
|
+
* matches that model's vocab. `release()` tears everything down.
|
|
27
|
+
*
|
|
28
|
+
* Production runtime implementation: the fused libelizainference path
|
|
29
|
+
* (`desktop-fused-ffi-backend-runtime.ts`), which wraps `ElizaInferenceFfi`
|
|
30
|
+
* via `wrapElizaInferenceFfi()` from `services/llm-streaming-binding.ts`.
|
|
31
|
+
* libllama has been retired — there is no second runtime behind this slot.
|
|
32
|
+
*/
|
|
33
|
+
export interface FfiBackendRuntime {
|
|
34
|
+
supported(): boolean;
|
|
35
|
+
acquire(plan: BackendPlan): Promise<FfiBackendSession>;
|
|
36
|
+
release(): Promise<void>;
|
|
37
|
+
/**
|
|
38
|
+
* Optional parallel-slot pool surface. When the runtime exposes a
|
|
39
|
+
* ctx pool (the desktop libllama path does), `parallelSlots()`
|
|
40
|
+
* reports the live count and `resizeParallel(N)` grows/shrinks it.
|
|
41
|
+
* Runtimes without a pool report 1 and ignore resize requests.
|
|
42
|
+
*/
|
|
43
|
+
parallelSlots?(): number;
|
|
44
|
+
resizeParallel?(target: number): Promise<boolean>;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Result of `FfiBackendRuntime.acquire()` — a live FFI session bound to a
|
|
48
|
+
* specific loaded model.
|
|
49
|
+
*/
|
|
50
|
+
export interface FfiBackendSession {
|
|
51
|
+
readonly binding: LlmStreamingBinding;
|
|
52
|
+
readonly ctx: LlmCtxHandle;
|
|
53
|
+
readonly runner: FfiStreamingRunner;
|
|
54
|
+
/**
|
|
55
|
+
* Tokenize a prompt string into model token ids using the loaded model's
|
|
56
|
+
* tokenizer. The vocab MUST match the GGUF — mismatches produce gibberish
|
|
57
|
+
* silently. The runtime is responsible for asserting this at acquire
|
|
58
|
+
* time.
|
|
59
|
+
*/
|
|
60
|
+
readonly tokenize: (prompt: string) => Int32Array;
|
|
61
|
+
/**
|
|
62
|
+
* Native MTP speculative-decoding policy from the catalog. `null`
|
|
63
|
+
* disables speculative decoding for this session.
|
|
64
|
+
*/
|
|
65
|
+
readonly mtp: {
|
|
66
|
+
specType: "draft-mtp";
|
|
67
|
+
draftMin: number;
|
|
68
|
+
draftMax: number;
|
|
69
|
+
gpuLayers: number | "auto";
|
|
70
|
+
} | null;
|
|
71
|
+
/**
|
|
72
|
+
* Absolute path to a *separate* MTP drafter GGUF resolved during load.
|
|
73
|
+
* `null` means same-file MTP: the NextN head is embedded in the main
|
|
74
|
+
* text GGUF and the native runner activates `--spec-type draft-mtp`
|
|
75
|
+
* with no `-md`. Speculative decoding is governed by `mtp`, not by the
|
|
76
|
+
* presence of this path.
|
|
77
|
+
*/
|
|
78
|
+
readonly draftModelPath: string | null;
|
|
79
|
+
/**
|
|
80
|
+
* Multimodal projector (mmproj) GGUF path for vision describe. Resolved
|
|
81
|
+
* from `plan.overrides.mmprojPath` at acquire time. `null` disables
|
|
82
|
+
* vision — `describeImage` then throws an actionable error.
|
|
83
|
+
*/
|
|
84
|
+
readonly mmprojPath: string | null;
|
|
85
|
+
/**
|
|
86
|
+
* Per-load runtime config the fused libelizainference path applies at its
|
|
87
|
+
* first `llmStreamOpen` (gpuLayers + KV-cache quant types). The desktop
|
|
88
|
+
* libllama runtime applies these at `loadModel()` instead and leaves this
|
|
89
|
+
* `null` — the backend forwards them into the runner's per-call config only
|
|
90
|
+
* when present, so the fused path mirrors the libllama load decision without
|
|
91
|
+
* the libllama path double-applying them.
|
|
92
|
+
*/
|
|
93
|
+
readonly loadConfig?: {
|
|
94
|
+
gpuLayers?: number;
|
|
95
|
+
cacheTypeK?: string | null;
|
|
96
|
+
cacheTypeV?: string | null;
|
|
97
|
+
} | null;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Adapter that satisfies `LocalInferenceBackend` by delegating to
|
|
101
|
+
* `FfiStreamingRunner`. The `id` is `"llama-cpp"` because this is the
|
|
102
|
+
* in-process variant of the optimized llama.cpp path.
|
|
103
|
+
*/
|
|
104
|
+
export declare class FfiStreamingBackend implements LocalInferenceBackend {
|
|
105
|
+
private readonly runtime;
|
|
106
|
+
readonly id: "llama-cpp";
|
|
107
|
+
private session;
|
|
108
|
+
private loadedPath;
|
|
109
|
+
constructor(runtime: FfiBackendRuntime);
|
|
110
|
+
available(): Promise<boolean>;
|
|
111
|
+
hasLoadedModel(): boolean;
|
|
112
|
+
currentModelPath(): string | null;
|
|
113
|
+
load(plan: BackendPlan): Promise<void>;
|
|
114
|
+
unload(): Promise<void>;
|
|
115
|
+
generate(args: GenerateArgs): Promise<GenerateResult>;
|
|
116
|
+
generateWithUsage(args: GenerateArgs & {
|
|
117
|
+
slotId?: number;
|
|
118
|
+
}): Promise<LocalGenerateWithUsageResult>;
|
|
119
|
+
/**
|
|
120
|
+
* Persist the active session's KV state to a per-conversation file.
|
|
121
|
+
* v1 uses `llama_state_seq_save_file` against seq_id=0. The on-disk file
|
|
122
|
+
* path mirrors `ffi-streaming-backend.ts`'s conversation-keyed slot layout
|
|
123
|
+
* (`<cacheDir>/<conversationId>/<slotId>.kv`) so a switch between
|
|
124
|
+
* FFI and subprocess can resume each other's slots — once both
|
|
125
|
+
* paths agree on the file format.
|
|
126
|
+
*/
|
|
127
|
+
persistConversationKv(conversationId: string, slotId: number): Promise<void>;
|
|
128
|
+
/** Restore a previously persisted KV state. Mirror of `persistConversationKv`. */
|
|
129
|
+
restoreConversationKv(conversationId: string, slotId: number): Promise<boolean>;
|
|
130
|
+
/**
|
|
131
|
+
* Pre-decode `promptPrefix` so the next `generate` against the same
|
|
132
|
+
* `cacheKey` skips re-prefill. Returns `false` when the prefix is
|
|
133
|
+
* empty or no session is loaded. The FFI runner serializes by
|
|
134
|
+
* `cacheKey` internally via the `slotInFlight` map.
|
|
135
|
+
*/
|
|
136
|
+
prewarmConversation(promptPrefix: string, opts: {
|
|
137
|
+
slotId: number;
|
|
138
|
+
cacheKey: string;
|
|
139
|
+
}): Promise<boolean>;
|
|
140
|
+
/**
|
|
141
|
+
* True when Eliza-1 native MTP is active for the loaded target model.
|
|
142
|
+
* Covers both shapes: same-file MTP (NextN head embedded in the text
|
|
143
|
+
* GGUF, `draftModelPath` null) and separate-drafter MTP.
|
|
144
|
+
*/
|
|
145
|
+
mtpEnabled(): boolean;
|
|
146
|
+
/**
|
|
147
|
+
* Parallel-slot pool size. Routed to the runtime's ctx pool when one
|
|
148
|
+
* exists; defaults to 1 otherwise.
|
|
149
|
+
*/
|
|
150
|
+
parallelSlots(): number;
|
|
151
|
+
/**
|
|
152
|
+
* Grow or shrink the runtime's ctx pool to `target` slots. Returns
|
|
153
|
+
* false when the runtime has no pool surface (in which case parallel
|
|
154
|
+
* resize is ignored — the conversation registry tolerates
|
|
155
|
+
* fixed 1-slot operation).
|
|
156
|
+
*/
|
|
157
|
+
resizeParallel(target: number): Promise<boolean>;
|
|
158
|
+
/**
|
|
159
|
+
* Vision describe via mmproj. Requires:
|
|
160
|
+
* - The shim built with `-DELIZA_ENABLE_VISION=1` (ELIZA_ENABLE_VISION=1
|
|
161
|
+
* at the build script env). When absent the runtime throws an
|
|
162
|
+
* actionable error.
|
|
163
|
+
* - `plan.overrides.mmprojPath` was passed at load time so the
|
|
164
|
+
* adapter knows which mmproj GGUF to feed clip.
|
|
165
|
+
*/
|
|
166
|
+
describeImage(args: {
|
|
167
|
+
bytes: Uint8Array;
|
|
168
|
+
mimeType?: string;
|
|
169
|
+
prompt?: string;
|
|
170
|
+
maxTokens?: number;
|
|
171
|
+
temperature?: number;
|
|
172
|
+
signal?: AbortSignal;
|
|
173
|
+
}): Promise<{
|
|
174
|
+
text: string;
|
|
175
|
+
projectorMs?: number;
|
|
176
|
+
decodeMs?: number;
|
|
177
|
+
}>;
|
|
178
|
+
currentMmprojPath(): string | null;
|
|
179
|
+
}
|
|
180
|
+
//# sourceMappingURL=ffi-streaming-backend.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ffi-streaming-backend.d.ts","sourceRoot":"","sources":["ffi-streaming-backend.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EACX,WAAW,EACX,YAAY,EACZ,cAAc,EACd,4BAA4B,EAC5B,qBAAqB,EACrB,MAAM,WAAW,CAAC;AACnB,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,KAAK,EACX,YAAY,EACZ,mBAAmB,EACnB,MAAM,yBAAyB,CAAC;AAGjC;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,WAAW,iBAAiB;IACjC,SAAS,IAAI,OAAO,CAAC;IACrB,OAAO,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;IACvD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACzB;;;;;OAKG;IACH,aAAa,CAAC,IAAI,MAAM,CAAC;IACzB,cAAc,CAAC,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;CAClD;AAED;;;GAGG;AACH,MAAM,WAAW,iBAAiB;IACjC,QAAQ,CAAC,OAAO,EAAE,mBAAmB,CAAC;IACtC,QAAQ,CAAC,GAAG,EAAE,YAAY,CAAC;IAC3B,QAAQ,CAAC,MAAM,EAAE,kBAAkB,CAAC;IACpC;;;;;OAKG;IACH,QAAQ,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,UAAU,CAAC;IAClD;;;OAGG;IACH,QAAQ,CAAC,GAAG,EAAE;QACb,QAAQ,EAAE,WAAW,CAAC;QACtB,QAAQ,EAAE,MAAM,CAAC;QACjB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,GAAG,MAAM,CAAC;KAC3B,GAAG,IAAI,CAAC;IACT;;;;;;OAMG;IACH,QAAQ,CAAC,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC;;;;OAIG;IACH,QAAQ,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC;;;;;;;OAOG;IACH,QAAQ,CAAC,UAAU,CAAC,EAAE;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC3B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;KAC3B,GAAG,IAAI,CAAC;CACT;AAED;;;;GAIG;AACH,qBAAa,mBAAoB,YAAW,qBAAqB;IAMpD,OAAO,CAAC,QAAQ,CAAC,OAAO;IALpC,QAAQ,CAAC,EAAE,EAAG,WAAW,CAAU;IAEnC,OAAO,CAAC,OAAO,CAAkC;IACjD,OAAO,CAAC,UAAU,CAAuB;gBAEZ,OAAO,EAAE,iBAAiB;IAEjD,SAAS,IAAI,OAAO,CAAC,OAAO,CAAC;IAInC,cAAc,IAAI,OAAO;IAIzB,gBAAgB,IAAI,MAAM,GAAG,IAAI;IAI3B,IAAI,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAMtC,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAgBvB,QAAQ,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,cAAc,CAAC;IAKrD,iBAAiB,CACtB,IAAI,EAAE,YAAY,GAAG;QAAE,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,GACtC,OAAO,CAAC,4BAA4B,CAAC;IAqDxC;;;;;;;OAOG;IACG,qBAAqB,CAC1B,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,MAAM,GACZ,OAAO,CAAC,IAAI,CAAC;IAYhB,kFAAkF;IAC5E,qBAAqB,CAC1B,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,MAAM,GACZ,OAAO,CAAC,OAAO,CAAC;IASnB;;;;;OAKG;IACG,mBAAmB,CACxB,YAAY,EAAE,MAAM,EACpB,IAAI,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,GACxC,OAAO,CAAC,OAAO,CAAC;IAsBnB;;;;OAIG;IACH,UAAU,IAAI,OAAO;IAIrB;;;OAGG;IACH,aAAa,IAAI,MAAM;IAIvB;;;;;OAKG;IACG,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAKtD;;;;;;;OAOG;IACG,aAAa,CAAC,IAAI,EAAE;QACzB,KAAK,EAAE,UAAU,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,MAAM,CAAC,EAAE,WAAW,CAAC;KACrB,GAAG,OAAO,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAwCtE,iBAAiB,IAAI,MAAM,GAAG,IAAI;CAGlC"}
|