@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scraper for llama-server's `/metrics` (Prometheus exposition format)
|
|
3
|
+
* endpoint. Translates the running counters into the
|
|
4
|
+
* Anthropic-SDK-shaped `usage` block that callers already know how to
|
|
5
|
+
* consume from the cloud Anthropic plugin.
|
|
6
|
+
*
|
|
7
|
+
* llama-server publishes the following counters (per-process, monotonic):
|
|
8
|
+
*
|
|
9
|
+
* llamacpp:n_decode_total — context tokens decoded (prefill + gen)
|
|
10
|
+
* llamacpp:n_tokens_predicted_total — output tokens
|
|
11
|
+
* llamacpp:prompt_tokens_total — total input tokens accepted
|
|
12
|
+
* llamacpp:n_past_max — high-water mark of cached past-tokens
|
|
13
|
+
* llamacpp:n_prompt_tokens_processed_total — fresh tokens prefilled
|
|
14
|
+
* (i.e. cache MISS), excludes cache hits
|
|
15
|
+
* llamacpp:kv_cache_tokens — current size of KV cache (gauge)
|
|
16
|
+
* llamacpp:kv_cache_used_cells — slots with active KV (gauge)
|
|
17
|
+
*
|
|
18
|
+
* For MTP speculative decoding, the fork additionally publishes:
|
|
19
|
+
*
|
|
20
|
+
* llamacpp:n_drafted_total — drafter-emitted tokens
|
|
21
|
+
* llamacpp:n_drafted_accepted_total — accepted speculative tokens
|
|
22
|
+
*
|
|
23
|
+
* The mapping into Anthropic shape:
|
|
24
|
+
*
|
|
25
|
+
* prompt_tokens_total → input_tokens
|
|
26
|
+
* n_tokens_predicted_total → output_tokens
|
|
27
|
+
* n_prompt_tokens_processed_total → cache_creation_input_tokens
|
|
28
|
+
* prompt_tokens_total - n_prompt_tokens_processed_total → cache_read_input_tokens
|
|
29
|
+
* n_drafted_total / n_drafted_accepted_total → MTP extension fields
|
|
30
|
+
*
|
|
31
|
+
* Counters are taken as deltas across two snapshots: take one before
|
|
32
|
+
* `generate`, one after, and subtract. Losing a few samples to process
|
|
33
|
+
* restart is acceptable — the deltas are useful for the call's own
|
|
34
|
+
* usage accounting, not for global monitoring.
|
|
35
|
+
*/
|
|
36
|
+
|
|
37
|
+
export interface LlamaServerMetricSnapshot {
|
|
38
|
+
/** Wall-clock ms when the snapshot was taken; useful for diagnostics. */
|
|
39
|
+
takenAtMs: number;
|
|
40
|
+
/** True when `/metrics` was fetched and parsed. False means scrape failure. */
|
|
41
|
+
scrapeOk?: boolean;
|
|
42
|
+
/** True when the scrape included at least one generation/speculation counter. */
|
|
43
|
+
hasGenerationCounters?: boolean;
|
|
44
|
+
promptTokensTotal: number;
|
|
45
|
+
predictedTokensTotal: number;
|
|
46
|
+
/** Tokens that had to be freshly prefilled — i.e. cache MISS this turn. */
|
|
47
|
+
promptTokensProcessedTotal: number;
|
|
48
|
+
draftedTotal: number;
|
|
49
|
+
acceptedTotal: number;
|
|
50
|
+
/** Current size of the KV cache (gauge). */
|
|
51
|
+
kvCacheTokens: number;
|
|
52
|
+
/** Number of slots currently holding active KV (gauge). */
|
|
53
|
+
kvCacheUsedCells: number;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
type MetricNumericField = Exclude<
|
|
57
|
+
keyof LlamaServerMetricSnapshot,
|
|
58
|
+
"scrapeOk" | "hasGenerationCounters"
|
|
59
|
+
>;
|
|
60
|
+
|
|
61
|
+
const METRIC_KEYS: Record<string, MetricNumericField> = {
|
|
62
|
+
"llamacpp:prompt_tokens_total": "promptTokensTotal",
|
|
63
|
+
"llamacpp:n_tokens_predicted_total": "predictedTokensTotal",
|
|
64
|
+
"llamacpp:n_prompt_tokens_processed_total": "promptTokensProcessedTotal",
|
|
65
|
+
"llamacpp:n_drafted_total": "draftedTotal",
|
|
66
|
+
"llamacpp:n_drafted": "draftedTotal",
|
|
67
|
+
"llamacpp:n_drafted_accepted_total": "acceptedTotal",
|
|
68
|
+
"llamacpp:n_drafted_accepted": "acceptedTotal",
|
|
69
|
+
"llamacpp:n_accepted_total": "acceptedTotal",
|
|
70
|
+
"llamacpp:n_accepted": "acceptedTotal",
|
|
71
|
+
"llamacpp:kv_cache_tokens": "kvCacheTokens",
|
|
72
|
+
"llamacpp:kv_cache_used_cells": "kvCacheUsedCells",
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
const DEFAULT_METRICS_SCRAPE_TIMEOUT_MS = 2_000;
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Parse a Prometheus exposition-format payload into a metric snapshot.
|
|
79
|
+
* Unknown or malformed lines are silently skipped — counters we don't
|
|
80
|
+
* recognise are not interesting and metric exporters add new ones over
|
|
81
|
+
* time.
|
|
82
|
+
*
|
|
83
|
+
* llama-server usually exposes one sample per metric (no labels), e.g.
|
|
84
|
+
* `llamacpp:prompt_tokens_total 1234`
|
|
85
|
+
* Some MTP forks expose per-slot labelled samples, e.g.
|
|
86
|
+
* `llamacpp:n_drafted_accepted_total{slot_id="0"} 12`
|
|
87
|
+
* Labelled samples are summed unless an unlabelled total exists for the same
|
|
88
|
+
* canonical field, in which case the unlabelled total wins.
|
|
89
|
+
*/
|
|
90
|
+
export function parsePrometheusMetrics(
|
|
91
|
+
body: string,
|
|
92
|
+
takenAtMs: number = Date.now(),
|
|
93
|
+
): LlamaServerMetricSnapshot {
|
|
94
|
+
const snapshot: LlamaServerMetricSnapshot = {
|
|
95
|
+
takenAtMs,
|
|
96
|
+
scrapeOk: true,
|
|
97
|
+
hasGenerationCounters: false,
|
|
98
|
+
promptTokensTotal: 0,
|
|
99
|
+
predictedTokensTotal: 0,
|
|
100
|
+
promptTokensProcessedTotal: 0,
|
|
101
|
+
draftedTotal: 0,
|
|
102
|
+
acceptedTotal: 0,
|
|
103
|
+
kvCacheTokens: 0,
|
|
104
|
+
kvCacheUsedCells: 0,
|
|
105
|
+
};
|
|
106
|
+
const buckets = new Map<
|
|
107
|
+
MetricNumericField,
|
|
108
|
+
{ unlabeled: number | null; labeledSum: number }
|
|
109
|
+
>();
|
|
110
|
+
let hasGenerationCounters = false;
|
|
111
|
+
|
|
112
|
+
for (const rawLine of body.split(/\r?\n/)) {
|
|
113
|
+
const line = rawLine.trim();
|
|
114
|
+
if (!line || line.startsWith("#")) continue;
|
|
115
|
+
// Prometheus line format: `name{labels?} value [timestamp]`.
|
|
116
|
+
const match = line.match(
|
|
117
|
+
/^([a-zA-Z_:][\w:]*)(\{[^}]*\})?\s+([+-]?\d+(?:\.\d+)?(?:e[+-]?\d+)?)/i,
|
|
118
|
+
);
|
|
119
|
+
if (!match) continue;
|
|
120
|
+
const name = match[1];
|
|
121
|
+
const labels = match[2];
|
|
122
|
+
const value = Number(match[3]);
|
|
123
|
+
if (!Number.isFinite(value) || name === undefined) continue;
|
|
124
|
+
const field = METRIC_KEYS[name];
|
|
125
|
+
if (!field) continue;
|
|
126
|
+
if (
|
|
127
|
+
field === "promptTokensTotal" ||
|
|
128
|
+
field === "predictedTokensTotal" ||
|
|
129
|
+
field === "promptTokensProcessedTotal" ||
|
|
130
|
+
field === "draftedTotal" ||
|
|
131
|
+
field === "acceptedTotal"
|
|
132
|
+
) {
|
|
133
|
+
hasGenerationCounters = true;
|
|
134
|
+
}
|
|
135
|
+
const bucket = buckets.get(field) ?? { unlabeled: null, labeledSum: 0 };
|
|
136
|
+
if (labels) bucket.labeledSum += value;
|
|
137
|
+
else bucket.unlabeled = value;
|
|
138
|
+
buckets.set(field, bucket);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
for (const [field, bucket] of buckets) {
|
|
142
|
+
snapshot[field] = bucket.unlabeled ?? bucket.labeledSum;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
snapshot.hasGenerationCounters = hasGenerationCounters;
|
|
146
|
+
|
|
147
|
+
return snapshot;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Anthropic-SDK-shaped usage block, optionally extended with MTP
|
|
152
|
+
* speculative-decoding metrics. The cloud plugin (plugin-anthropic)
|
|
153
|
+
* emits the first three fields verbatim; local inference adds the
|
|
154
|
+
* `mtp_*` fields when speculative decoding is active. Callers that
|
|
155
|
+
* already handle the cloud `usage` shape need no change.
|
|
156
|
+
*/
|
|
157
|
+
export interface LocalUsageBlock {
|
|
158
|
+
[key: string]: unknown;
|
|
159
|
+
input_tokens: number;
|
|
160
|
+
output_tokens: number;
|
|
161
|
+
cache_creation_input_tokens: number;
|
|
162
|
+
cache_read_input_tokens: number;
|
|
163
|
+
mtp_drafted_tokens?: number;
|
|
164
|
+
mtp_accepted_tokens?: number;
|
|
165
|
+
/** 0..1 — proportion of drafted tokens that were accepted. */
|
|
166
|
+
mtp_acceptance_rate?: number;
|
|
167
|
+
/** 0..1 — proportion of input tokens that hit a warm slot (cache reuse). */
|
|
168
|
+
cache_hit_rate?: number;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Compute the Anthropic-shape usage block for a single generation by
|
|
173
|
+
* differencing two snapshots. `before` is taken just before the request,
|
|
174
|
+
* `after` just after the response was received. Negative deltas (caused
|
|
175
|
+
* by a metric reset between snapshots, e.g. server restart) are clamped
|
|
176
|
+
* to 0 — losing the sample is preferable to surfacing nonsense to the
|
|
177
|
+
* caller.
|
|
178
|
+
*
|
|
179
|
+
* Pass `responseUsage` to override input/output counts when the response
|
|
180
|
+
* payload itself reports per-call counters that are more accurate than
|
|
181
|
+
* the metric delta — llama-server's chat completion response includes
|
|
182
|
+
* `usage.{prompt,completion}_tokens` per request, which is exact while
|
|
183
|
+
* the metric delta is "everything that happened during the wall-clock
|
|
184
|
+
* window of the request."
|
|
185
|
+
*/
|
|
186
|
+
export function diffSnapshots(
|
|
187
|
+
before: LlamaServerMetricSnapshot,
|
|
188
|
+
after: LlamaServerMetricSnapshot,
|
|
189
|
+
responseUsage?: { prompt_tokens?: number; completion_tokens?: number },
|
|
190
|
+
): LocalUsageBlock {
|
|
191
|
+
const promptDelta = clampNonNegative(
|
|
192
|
+
after.promptTokensTotal - before.promptTokensTotal,
|
|
193
|
+
);
|
|
194
|
+
const predictedDelta = clampNonNegative(
|
|
195
|
+
after.predictedTokensTotal - before.predictedTokensTotal,
|
|
196
|
+
);
|
|
197
|
+
const processedDelta = clampNonNegative(
|
|
198
|
+
after.promptTokensProcessedTotal - before.promptTokensProcessedTotal,
|
|
199
|
+
);
|
|
200
|
+
const draftedDelta = clampNonNegative(
|
|
201
|
+
after.draftedTotal - before.draftedTotal,
|
|
202
|
+
);
|
|
203
|
+
const acceptedDelta = clampNonNegative(
|
|
204
|
+
after.acceptedTotal - before.acceptedTotal,
|
|
205
|
+
);
|
|
206
|
+
|
|
207
|
+
const responsePrompt = responseUsage?.prompt_tokens ?? promptDelta;
|
|
208
|
+
const responseCompletion = responseUsage?.completion_tokens ?? predictedDelta;
|
|
209
|
+
|
|
210
|
+
const inputTokens = responsePrompt;
|
|
211
|
+
const outputTokens = responseCompletion;
|
|
212
|
+
// Tokens that had to be freshly prefilled this call. Bounded above by
|
|
213
|
+
// the per-call input count — a metric-delta wider than the call's own
|
|
214
|
+
// input is a sampling artifact.
|
|
215
|
+
const cacheCreation = Math.min(processedDelta, inputTokens);
|
|
216
|
+
const cacheRead = Math.max(0, inputTokens - cacheCreation);
|
|
217
|
+
|
|
218
|
+
const block: LocalUsageBlock = {
|
|
219
|
+
input_tokens: inputTokens,
|
|
220
|
+
output_tokens: outputTokens,
|
|
221
|
+
cache_creation_input_tokens: cacheCreation,
|
|
222
|
+
cache_read_input_tokens: cacheRead,
|
|
223
|
+
};
|
|
224
|
+
if (inputTokens > 0) {
|
|
225
|
+
block.cache_hit_rate = cacheRead / inputTokens;
|
|
226
|
+
}
|
|
227
|
+
if (draftedDelta > 0) {
|
|
228
|
+
block.mtp_drafted_tokens = draftedDelta;
|
|
229
|
+
block.mtp_accepted_tokens = acceptedDelta;
|
|
230
|
+
block.mtp_acceptance_rate = acceptedDelta / draftedDelta;
|
|
231
|
+
}
|
|
232
|
+
return block;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function clampNonNegative(value: number): number {
|
|
236
|
+
if (!Number.isFinite(value)) return 0;
|
|
237
|
+
return value < 0 ? 0 : value;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* GET `/metrics` from a running llama-server and parse it. Errors fall
|
|
242
|
+
* back to a zero-valued snapshot rather than throwing — observability
|
|
243
|
+
* MUST NOT break generation. `scrapeOk=false` tells callers that the
|
|
244
|
+
* zeros are not evidence of absent MTP/KV activity.
|
|
245
|
+
*/
|
|
246
|
+
export async function fetchMetricsSnapshot(
|
|
247
|
+
baseUrl: string,
|
|
248
|
+
signal?: AbortSignal,
|
|
249
|
+
timeoutMs = DEFAULT_METRICS_SCRAPE_TIMEOUT_MS,
|
|
250
|
+
): Promise<LlamaServerMetricSnapshot> {
|
|
251
|
+
const takenAtMs = Date.now();
|
|
252
|
+
const empty: LlamaServerMetricSnapshot = {
|
|
253
|
+
takenAtMs,
|
|
254
|
+
scrapeOk: false,
|
|
255
|
+
hasGenerationCounters: false,
|
|
256
|
+
promptTokensTotal: 0,
|
|
257
|
+
predictedTokensTotal: 0,
|
|
258
|
+
promptTokensProcessedTotal: 0,
|
|
259
|
+
draftedTotal: 0,
|
|
260
|
+
acceptedTotal: 0,
|
|
261
|
+
kvCacheTokens: 0,
|
|
262
|
+
kvCacheUsedCells: 0,
|
|
263
|
+
};
|
|
264
|
+
const controller = new AbortController();
|
|
265
|
+
const abortFromCaller = () => controller.abort(signal?.reason);
|
|
266
|
+
if (signal?.aborted) {
|
|
267
|
+
abortFromCaller();
|
|
268
|
+
} else {
|
|
269
|
+
signal?.addEventListener("abort", abortFromCaller, { once: true });
|
|
270
|
+
}
|
|
271
|
+
const timer = setTimeout(
|
|
272
|
+
() =>
|
|
273
|
+
controller.abort(
|
|
274
|
+
new DOMException(
|
|
275
|
+
`llama-server metrics scrape timed out after ${timeoutMs}ms`,
|
|
276
|
+
"TimeoutError",
|
|
277
|
+
),
|
|
278
|
+
),
|
|
279
|
+
Math.max(1, Math.floor(timeoutMs)),
|
|
280
|
+
);
|
|
281
|
+
let res: Response | null = null;
|
|
282
|
+
let bodySettled = false;
|
|
283
|
+
try {
|
|
284
|
+
res = await fetch(`${baseUrl.replace(/\/$/, "")}/metrics`, {
|
|
285
|
+
method: "GET",
|
|
286
|
+
signal: controller.signal,
|
|
287
|
+
});
|
|
288
|
+
if (!res.ok) return empty;
|
|
289
|
+
const body = await res.text();
|
|
290
|
+
bodySettled = true;
|
|
291
|
+
return parsePrometheusMetrics(body, takenAtMs);
|
|
292
|
+
} catch {
|
|
293
|
+
// Best effort: a metrics scrape failure must not abort the response
|
|
294
|
+
// path. Returning an empty snapshot causes diffSnapshots to surface
|
|
295
|
+
// zero deltas; the caller still sees the response payload usage.
|
|
296
|
+
return empty;
|
|
297
|
+
} finally {
|
|
298
|
+
clearTimeout(timer);
|
|
299
|
+
signal?.removeEventListener("abort", abortFromCaller);
|
|
300
|
+
if (res?.body && (!bodySettled || controller.signal.aborted)) {
|
|
301
|
+
await res.body.cancel(controller.signal.reason).catch(() => undefined);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Narrow streaming-LLM binding.
|
|
3
|
+
*
|
|
4
|
+
* `FfiStreamingRunner` (`services/ffi-streaming-runner.ts`) used to require
|
|
5
|
+
* the full `ElizaInferenceFfi` surface (TTS + ASR + VAD + mmap regions +
|
|
6
|
+
* the entire fused libelizainference) just to run text generation. That
|
|
7
|
+
* surface implies a *bundle-anchored* runtime — libelizainference owns a
|
|
8
|
+
* context built from a bundle root, not a single GGUF — and ~25 methods
|
|
9
|
+
* that have nothing to do with LLM streaming.
|
|
10
|
+
*
|
|
11
|
+
* This file extracts the actual contract the runner depends on: the seven
|
|
12
|
+
* `llmStream*` methods plus the (optional) two slot save/restore methods.
|
|
13
|
+
* Both libelizainference (via a tiny adapter) and the desktop
|
|
14
|
+
* libllama + eliza-llama-shim path (built by `build-llama-cpp-desktop-dylib.mjs`,
|
|
15
|
+
* mirroring the AOSP adapter pattern) can implement this narrow contract
|
|
16
|
+
* without dragging in TTS/ASR.
|
|
17
|
+
*
|
|
18
|
+
* See `plugins/plugin-local-inference/FFI_BACKEND_WIREUP_PLAN.md` Step B
|
|
19
|
+
* for the desktop adapter follow-up that implements this interface against
|
|
20
|
+
* the libllama symbols.
|
|
21
|
+
*/
|
|
22
|
+
import type { ElizaInferenceContextHandle, ElizaInferenceFfi, LlmStreamConfig, LlmStreamHandle, LlmStreamStep } from "./voice/ffi-bindings";
|
|
23
|
+
/**
|
|
24
|
+
* Opaque per-context handle. For libelizainference this is the
|
|
25
|
+
* `ElizaInferenceContextHandle` (a bigint pointer to the bundle context).
|
|
26
|
+
* For the desktop libllama path it's a bigint pointer to a per-model
|
|
27
|
+
* llama_context. The runner only needs to pass it through to
|
|
28
|
+
* `llmStreamOpen`.
|
|
29
|
+
*/
|
|
30
|
+
export type LlmCtxHandle = ElizaInferenceContextHandle;
|
|
31
|
+
/**
|
|
32
|
+
* The streaming-LLM contract `FfiStreamingRunner` consumes. Methods
|
|
33
|
+
* mirror the C ABI declared in `tools/omnivoice/include/eliza-inference-ffi.h`
|
|
34
|
+
* (the `eliza_inference_llm_stream_*` surface), but the binding doesn't have
|
|
35
|
+
* to come from libelizainference — any implementation that satisfies this
|
|
36
|
+
* interface works.
|
|
37
|
+
*
|
|
38
|
+
* Slot save/restore are optional because the desktop libllama path
|
|
39
|
+
* does not expose `llama_state_seq_save_file` / `_load_file` through
|
|
40
|
+
* the shim yet. The runner already guards both methods via
|
|
41
|
+
* `if (this.ffi.llmStreamSaveSlot === undefined) throw ...`.
|
|
42
|
+
*/
|
|
43
|
+
export interface LlmStreamingBinding {
|
|
44
|
+
/** Probe — must return `true` for the binding to be usable by the runner. */
|
|
45
|
+
llmStreamSupported(): boolean;
|
|
46
|
+
/**
|
|
47
|
+
* Open a streaming-LLM session against `ctx`. Failure throws an
|
|
48
|
+
* implementation-specific error (`VoiceLifecycleError` for
|
|
49
|
+
* libelizainference). Close exactly once via `llmStreamClose`.
|
|
50
|
+
*/
|
|
51
|
+
llmStreamOpen(args: {
|
|
52
|
+
ctx: LlmCtxHandle;
|
|
53
|
+
config: LlmStreamConfig;
|
|
54
|
+
}): LlmStreamHandle;
|
|
55
|
+
/** Feed a batch of pre-tokenized prompt tokens before the first `next`. */
|
|
56
|
+
llmStreamPrefill(args: {
|
|
57
|
+
stream: LlmStreamHandle;
|
|
58
|
+
tokens: Int32Array;
|
|
59
|
+
}): void;
|
|
60
|
+
/**
|
|
61
|
+
* Pull the next streaming step. `step.done === true` is the final step.
|
|
62
|
+
* Implementations may bound the step by `maxTokensPerStep` /
|
|
63
|
+
* `maxTextBytes`; defaults are runner-side.
|
|
64
|
+
*/
|
|
65
|
+
llmStreamNext(args: {
|
|
66
|
+
stream: LlmStreamHandle;
|
|
67
|
+
maxTokensPerStep?: number;
|
|
68
|
+
maxTextBytes?: number;
|
|
69
|
+
}): LlmStreamStep;
|
|
70
|
+
/** Cancel in-flight generation; the next `_next` returns CANCELLED. */
|
|
71
|
+
llmStreamCancel(stream: LlmStreamHandle): void;
|
|
72
|
+
/** Close + free a streaming-LLM session. Idempotent on already-closed handles. */
|
|
73
|
+
llmStreamClose(stream: LlmStreamHandle): void;
|
|
74
|
+
/** Optional — persist the session's slot KV state to disk. */
|
|
75
|
+
llmStreamSaveSlot?(args: {
|
|
76
|
+
stream: LlmStreamHandle;
|
|
77
|
+
filename: string;
|
|
78
|
+
}): void;
|
|
79
|
+
/** Optional — restore a previously-saved slot KV file. */
|
|
80
|
+
llmStreamRestoreSlot?(args: {
|
|
81
|
+
stream: LlmStreamHandle;
|
|
82
|
+
filename: string;
|
|
83
|
+
}): void;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Wrap a full `ElizaInferenceFfi` as a narrow `LlmStreamingBinding`.
|
|
87
|
+
* The libelizainference path already implements the `llmStream*` methods
|
|
88
|
+
* as optional properties; this adapter promotes them to required and
|
|
89
|
+
* throws if the loaded library is too old to expose them.
|
|
90
|
+
*
|
|
91
|
+
* Usage:
|
|
92
|
+
* const binding = wrapElizaInferenceFfi(ffi);
|
|
93
|
+
* const runner = new FfiStreamingRunner(binding, ctxHandle);
|
|
94
|
+
*/
|
|
95
|
+
export declare function wrapElizaInferenceFfi(ffi: ElizaInferenceFfi): LlmStreamingBinding;
|
|
96
|
+
//# sourceMappingURL=llm-streaming-binding.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-streaming-binding.d.ts","sourceRoot":"","sources":["llm-streaming-binding.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,KAAK,EACX,2BAA2B,EAC3B,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,aAAa,EACb,MAAM,sBAAsB,CAAC;AAE9B;;;;;;GAMG;AACH,MAAM,MAAM,YAAY,GAAG,2BAA2B,CAAC;AAEvD;;;;;;;;;;;GAWG;AACH,MAAM,WAAW,mBAAmB;IACnC,6EAA6E;IAC7E,kBAAkB,IAAI,OAAO,CAAC;IAC9B;;;;OAIG;IACH,aAAa,CAAC,IAAI,EAAE;QACnB,GAAG,EAAE,YAAY,CAAC;QAClB,MAAM,EAAE,eAAe,CAAC;KACxB,GAAG,eAAe,CAAC;IACpB,2EAA2E;IAC3E,gBAAgB,CAAC,IAAI,EAAE;QAAE,MAAM,EAAE,eAAe,CAAC;QAAC,MAAM,EAAE,UAAU,CAAA;KAAE,GAAG,IAAI,CAAC;IAC9E;;;;OAIG;IACH,aAAa,CAAC,IAAI,EAAE;QACnB,MAAM,EAAE,eAAe,CAAC;QACxB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,YAAY,CAAC,EAAE,MAAM,CAAC;KACtB,GAAG,aAAa,CAAC;IAClB,uEAAuE;IACvE,eAAe,CAAC,MAAM,EAAE,eAAe,GAAG,IAAI,CAAC;IAC/C,kFAAkF;IAClF,cAAc,CAAC,MAAM,EAAE,eAAe,GAAG,IAAI,CAAC;IAC9C,8DAA8D;IAC9D,iBAAiB,CAAC,CAAC,IAAI,EAAE;QAAE,MAAM,EAAE,eAAe,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IAC9E,0DAA0D;IAC1D,oBAAoB,CAAC,CAAC,IAAI,EAAE;QAC3B,MAAM,EAAE,eAAe,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;KACjB,GAAG,IAAI,CAAC;CACT;AAED;;;;;;;;;GASG;AACH,wBAAgB,qBAAqB,CACpC,GAAG,EAAE,iBAAiB,GACpB,mBAAmB,CAmCrB"}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Narrow streaming-LLM binding.
|
|
3
|
+
*
|
|
4
|
+
* `FfiStreamingRunner` (`services/ffi-streaming-runner.ts`) used to require
|
|
5
|
+
* the full `ElizaInferenceFfi` surface (TTS + ASR + VAD + mmap regions +
|
|
6
|
+
* the entire fused libelizainference) just to run text generation. That
|
|
7
|
+
* surface implies a *bundle-anchored* runtime — libelizainference owns a
|
|
8
|
+
* context built from a bundle root, not a single GGUF — and ~25 methods
|
|
9
|
+
* that have nothing to do with LLM streaming.
|
|
10
|
+
*
|
|
11
|
+
* This file extracts the actual contract the runner depends on: the seven
|
|
12
|
+
* `llmStream*` methods plus the (optional) two slot save/restore methods.
|
|
13
|
+
* Both libelizainference (via a tiny adapter) and the desktop
|
|
14
|
+
* libllama + eliza-llama-shim path (built by `build-llama-cpp-desktop-dylib.mjs`,
|
|
15
|
+
* mirroring the AOSP adapter pattern) can implement this narrow contract
|
|
16
|
+
* without dragging in TTS/ASR.
|
|
17
|
+
*
|
|
18
|
+
* See `plugins/plugin-local-inference/FFI_BACKEND_WIREUP_PLAN.md` Step B
|
|
19
|
+
* for the desktop adapter follow-up that implements this interface against
|
|
20
|
+
* the libllama symbols.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import type {
|
|
24
|
+
ElizaInferenceContextHandle,
|
|
25
|
+
ElizaInferenceFfi,
|
|
26
|
+
LlmStreamConfig,
|
|
27
|
+
LlmStreamHandle,
|
|
28
|
+
LlmStreamStep,
|
|
29
|
+
} from "./voice/ffi-bindings";
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Opaque per-context handle. For libelizainference this is the
|
|
33
|
+
* `ElizaInferenceContextHandle` (a bigint pointer to the bundle context).
|
|
34
|
+
* For the desktop libllama path it's a bigint pointer to a per-model
|
|
35
|
+
* llama_context. The runner only needs to pass it through to
|
|
36
|
+
* `llmStreamOpen`.
|
|
37
|
+
*/
|
|
38
|
+
export type LlmCtxHandle = ElizaInferenceContextHandle;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* The streaming-LLM contract `FfiStreamingRunner` consumes. Methods
|
|
42
|
+
* mirror the C ABI declared in `tools/omnivoice/include/eliza-inference-ffi.h`
|
|
43
|
+
* (the `eliza_inference_llm_stream_*` surface), but the binding doesn't have
|
|
44
|
+
* to come from libelizainference — any implementation that satisfies this
|
|
45
|
+
* interface works.
|
|
46
|
+
*
|
|
47
|
+
* Slot save/restore are optional because the desktop libllama path
|
|
48
|
+
* does not expose `llama_state_seq_save_file` / `_load_file` through
|
|
49
|
+
* the shim yet. The runner already guards both methods via
|
|
50
|
+
* `if (this.ffi.llmStreamSaveSlot === undefined) throw ...`.
|
|
51
|
+
*/
|
|
52
|
+
export interface LlmStreamingBinding {
|
|
53
|
+
/** Probe — must return `true` for the binding to be usable by the runner. */
|
|
54
|
+
llmStreamSupported(): boolean;
|
|
55
|
+
/**
|
|
56
|
+
* Open a streaming-LLM session against `ctx`. Failure throws an
|
|
57
|
+
* implementation-specific error (`VoiceLifecycleError` for
|
|
58
|
+
* libelizainference). Close exactly once via `llmStreamClose`.
|
|
59
|
+
*/
|
|
60
|
+
llmStreamOpen(args: {
|
|
61
|
+
ctx: LlmCtxHandle;
|
|
62
|
+
config: LlmStreamConfig;
|
|
63
|
+
}): LlmStreamHandle;
|
|
64
|
+
/** Feed a batch of pre-tokenized prompt tokens before the first `next`. */
|
|
65
|
+
llmStreamPrefill(args: { stream: LlmStreamHandle; tokens: Int32Array }): void;
|
|
66
|
+
/**
|
|
67
|
+
* Pull the next streaming step. `step.done === true` is the final step.
|
|
68
|
+
* Implementations may bound the step by `maxTokensPerStep` /
|
|
69
|
+
* `maxTextBytes`; defaults are runner-side.
|
|
70
|
+
*/
|
|
71
|
+
llmStreamNext(args: {
|
|
72
|
+
stream: LlmStreamHandle;
|
|
73
|
+
maxTokensPerStep?: number;
|
|
74
|
+
maxTextBytes?: number;
|
|
75
|
+
}): LlmStreamStep;
|
|
76
|
+
/** Cancel in-flight generation; the next `_next` returns CANCELLED. */
|
|
77
|
+
llmStreamCancel(stream: LlmStreamHandle): void;
|
|
78
|
+
/** Close + free a streaming-LLM session. Idempotent on already-closed handles. */
|
|
79
|
+
llmStreamClose(stream: LlmStreamHandle): void;
|
|
80
|
+
/** Optional — persist the session's slot KV state to disk. */
|
|
81
|
+
llmStreamSaveSlot?(args: { stream: LlmStreamHandle; filename: string }): void;
|
|
82
|
+
/** Optional — restore a previously-saved slot KV file. */
|
|
83
|
+
llmStreamRestoreSlot?(args: {
|
|
84
|
+
stream: LlmStreamHandle;
|
|
85
|
+
filename: string;
|
|
86
|
+
}): void;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Wrap a full `ElizaInferenceFfi` as a narrow `LlmStreamingBinding`.
|
|
91
|
+
* The libelizainference path already implements the `llmStream*` methods
|
|
92
|
+
* as optional properties; this adapter promotes them to required and
|
|
93
|
+
* throws if the loaded library is too old to expose them.
|
|
94
|
+
*
|
|
95
|
+
* Usage:
|
|
96
|
+
* const binding = wrapElizaInferenceFfi(ffi);
|
|
97
|
+
* const runner = new FfiStreamingRunner(binding, ctxHandle);
|
|
98
|
+
*/
|
|
99
|
+
export function wrapElizaInferenceFfi(
|
|
100
|
+
ffi: ElizaInferenceFfi,
|
|
101
|
+
): LlmStreamingBinding {
|
|
102
|
+
if (
|
|
103
|
+
typeof ffi.llmStreamSupported !== "function" ||
|
|
104
|
+
!ffi.llmStreamSupported() ||
|
|
105
|
+
typeof ffi.llmStreamOpen !== "function" ||
|
|
106
|
+
typeof ffi.llmStreamPrefill !== "function" ||
|
|
107
|
+
typeof ffi.llmStreamNext !== "function" ||
|
|
108
|
+
typeof ffi.llmStreamCancel !== "function" ||
|
|
109
|
+
typeof ffi.llmStreamClose !== "function"
|
|
110
|
+
) {
|
|
111
|
+
throw new Error(
|
|
112
|
+
"[llm-streaming-binding] The loaded libelizainference does not expose " +
|
|
113
|
+
"the streaming-LLM symbol set (llmStreamSupported/Open/Prefill/Next/" +
|
|
114
|
+
"Cancel/Close). Rebuild the omnivoice fuse against the current " +
|
|
115
|
+
"eliza-inference-ffi.h (verify-fused-symbols requires this set).",
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
// Narrowed function references so the returned object types are
|
|
119
|
+
// non-optional even though the source surface declares them
|
|
120
|
+
// optional.
|
|
121
|
+
const open = ffi.llmStreamOpen;
|
|
122
|
+
const prefill = ffi.llmStreamPrefill;
|
|
123
|
+
const next = ffi.llmStreamNext;
|
|
124
|
+
const cancel = ffi.llmStreamCancel;
|
|
125
|
+
const close = ffi.llmStreamClose;
|
|
126
|
+
return {
|
|
127
|
+
llmStreamSupported: () => true,
|
|
128
|
+
llmStreamOpen: open,
|
|
129
|
+
llmStreamPrefill: prefill,
|
|
130
|
+
llmStreamNext: next,
|
|
131
|
+
llmStreamCancel: cancel,
|
|
132
|
+
llmStreamClose: close,
|
|
133
|
+
llmStreamSaveSlot: ffi.llmStreamSaveSlot,
|
|
134
|
+
llmStreamRestoreSlot: ffi.llmStreamRestoreSlot,
|
|
135
|
+
};
|
|
136
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-load override types for the local inference engine.
|
|
3
|
+
*
|
|
4
|
+
* Extracted from active-model.ts to break the active-model ↔ engine
|
|
5
|
+
* circular dependency. Both modules import from here; neither imports
|
|
6
|
+
* from the other for these definitions.
|
|
7
|
+
*
|
|
8
|
+
* @module services/load-args
|
|
9
|
+
*/
|
|
10
|
+
/**
|
|
11
|
+
* KV cache placement strategy. `capacitor-llama` does not currently expose a
|
|
12
|
+
* direct KV-cache placement knob distinct from the model-level `gpuLayers`
|
|
13
|
+
* setting (the KV cache lives wherever the layer that owns it lives). We
|
|
14
|
+
* keep the type here so the API/UI surface and the upstream out-of-process
|
|
15
|
+
* `llama-server` backend can plumb a real choice through; the in-process
|
|
16
|
+
* binding maps any non-default value to a `gpuLayers` override or warns
|
|
17
|
+
* loudly when the value cannot be honoured.
|
|
18
|
+
*/
|
|
19
|
+
export type KvOffloadMode = "cpu" | "gpu" | "split" | {
|
|
20
|
+
gpuLayers: number;
|
|
21
|
+
};
|
|
22
|
+
/**
|
|
23
|
+
* Per-load overrides accepted by `localInferenceLoader.loadModel(...)` and
|
|
24
|
+
* `POST /api/local-inference/active`. Catalog defaults are merged in
|
|
25
|
+
* `resolveLocalInferenceLoadArgs`; per-call overrides supplied by the
|
|
26
|
+
* caller win over both catalog metadata and env-var fallbacks.
|
|
27
|
+
*/
|
|
28
|
+
export interface LocalInferenceLoadArgs {
|
|
29
|
+
modelPath: string;
|
|
30
|
+
/**
|
|
31
|
+
* Catalog id for direct bundle loads where `modelPath` points at a GGUF
|
|
32
|
+
* inside an Eliza-1 bundle that is not present in the installed-model
|
|
33
|
+
* registry yet.
|
|
34
|
+
*/
|
|
35
|
+
modelId?: string;
|
|
36
|
+
contextSize?: number;
|
|
37
|
+
useGpu?: boolean;
|
|
38
|
+
maxThreads?: number;
|
|
39
|
+
draftModelPath?: string;
|
|
40
|
+
draftContextSize?: number;
|
|
41
|
+
draftMin?: number;
|
|
42
|
+
draftMax?: number;
|
|
43
|
+
speculativeSamples?: number;
|
|
44
|
+
mobileSpeculative?: boolean;
|
|
45
|
+
cacheTypeK?: string;
|
|
46
|
+
cacheTypeV?: string;
|
|
47
|
+
disableThinking?: boolean;
|
|
48
|
+
/**
|
|
49
|
+
* Number of model layers to offload to the GPU. `"auto"` and `"max"` are
|
|
50
|
+
* resolved by the backend's own probing — keep the explicit number type
|
|
51
|
+
* here so the API surface accepts the most common `gpuLayers: 32` shape
|
|
52
|
+
* without an extra string branch.
|
|
53
|
+
*/
|
|
54
|
+
gpuLayers?: number;
|
|
55
|
+
/**
|
|
56
|
+
* Where to place the KV cache. See `KvOffloadMode`. node-llama-cpp does
|
|
57
|
+
* not expose this distinct from `gpuLayers`; the backend translates
|
|
58
|
+
* the request to a `gpuLayers` override or throws when the value
|
|
59
|
+
* cannot be honoured.
|
|
60
|
+
*/
|
|
61
|
+
kvOffload?: KvOffloadMode;
|
|
62
|
+
flashAttention?: boolean;
|
|
63
|
+
mmap?: boolean;
|
|
64
|
+
mlock?: boolean;
|
|
65
|
+
/**
|
|
66
|
+
* Path to the multi-modal projector GGUF (mmproj-<tier>.gguf), when the
|
|
67
|
+
* loaded tier supports vision (`catalog.sourceModel.components.vision`
|
|
68
|
+
* is present AND the file exists on disk). WS2 (vision-describe)
|
|
69
|
+
* resolves this from the installed bundle root in
|
|
70
|
+
* `resolveLocalInferenceLoadArgs`. Backends that support vision use the
|
|
71
|
+
* path verbatim:
|
|
72
|
+
* - llama-server: `--mmproj <path>` flag on spawn.
|
|
73
|
+
* - node-llama-cpp: `mtmd_init_from_file(<path>)` (planned in fork).
|
|
74
|
+
* - AOSP libllama shim: `eliza_llama_mtmd_init_from_file(<path>)`.
|
|
75
|
+
* Undefined when the tier doesn't ship vision or the file isn't on
|
|
76
|
+
* disk yet (e.g. downloaded text-only bundle). The text load is NOT
|
|
77
|
+
* gated on mmproj presence — text+drafter still load and vision is
|
|
78
|
+
* marked unavailable for that session.
|
|
79
|
+
*/
|
|
80
|
+
mmprojPath?: string;
|
|
81
|
+
}
|
|
82
|
+
//# sourceMappingURL=load-args.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"load-args.d.ts","sourceRoot":"","sources":["load-args.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH;;;;;;;;GAQG;AACH,MAAM,MAAM,aAAa,GAAG,KAAK,GAAG,KAAK,GAAG,OAAO,GAAG;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,CAAC;AAE5E;;;;;GAKG;AACH,MAAM,WAAW,sBAAsB;IACtC,SAAS,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B;;;;;OAKG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB;;;;;;;;;;;;;;OAcG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB"}
|