@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process FFI streaming backend adapter.
|
|
3
|
+
*
|
|
4
|
+
* Implements `LocalInferenceBackend` as the optimized in-process
|
|
5
|
+
* llama.cpp path used by Eliza-1 on desktop and mobile.
|
|
6
|
+
*
|
|
7
|
+
* What this class deliberately does NOT do:
|
|
8
|
+
* - Own the FFI context. The runtime provider passed to this class owns
|
|
9
|
+
* native load/unload and hands back the binding, context, and tokenizer.
|
|
10
|
+
* - Decode image bytes or call mtmd directly. Vision requests are validated
|
|
11
|
+
* here, then forwarded to runtimes that expose `describeImage`.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import type {
|
|
15
|
+
BackendPlan,
|
|
16
|
+
GenerateArgs,
|
|
17
|
+
GenerateResult,
|
|
18
|
+
LocalGenerateWithUsageResult,
|
|
19
|
+
LocalInferenceBackend,
|
|
20
|
+
} from "./backend";
|
|
21
|
+
import type { FfiStreamingRunner } from "./ffi-streaming-runner";
|
|
22
|
+
import type {
|
|
23
|
+
LlmCtxHandle,
|
|
24
|
+
LlmStreamingBinding,
|
|
25
|
+
} from "./llm-streaming-binding";
|
|
26
|
+
import { resolveGuidedDecodeForParams } from "./structured-output";
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Constructor-injected adapter that resolves the FFI binding, context, and
|
|
30
|
+
* tokenizer for a given load. Two responsibilities:
|
|
31
|
+
*
|
|
32
|
+
* 1. Decide whether the FFI path is viable on the current binding
|
|
33
|
+
* (`supported()`). Mirrors `LlmStreamingBinding.llmStreamSupported()`
|
|
34
|
+
* plus any higher-level constraints (e.g. dylib path exists, build
|
|
35
|
+
* target matches the bundle's required kernels).
|
|
36
|
+
* 2. Lifecycle: `acquire(plan)` returns the FFI runner ready for
|
|
37
|
+
* `generate()` against the requested model, plus a tokenizer that
|
|
38
|
+
* matches that model's vocab. `release()` tears everything down.
|
|
39
|
+
*
|
|
40
|
+
* Production runtime implementation: the fused libelizainference path
|
|
41
|
+
* (`desktop-fused-ffi-backend-runtime.ts`), which wraps `ElizaInferenceFfi`
|
|
42
|
+
* via `wrapElizaInferenceFfi()` from `services/llm-streaming-binding.ts`.
|
|
43
|
+
* libllama has been retired — there is no second runtime behind this slot.
|
|
44
|
+
*/
|
|
45
|
+
export interface FfiBackendRuntime {
|
|
46
|
+
supported(): boolean;
|
|
47
|
+
acquire(plan: BackendPlan): Promise<FfiBackendSession>;
|
|
48
|
+
release(): Promise<void>;
|
|
49
|
+
/**
|
|
50
|
+
* Optional parallel-slot pool surface. When the runtime exposes a
|
|
51
|
+
* ctx pool (the desktop libllama path does), `parallelSlots()`
|
|
52
|
+
* reports the live count and `resizeParallel(N)` grows/shrinks it.
|
|
53
|
+
* Runtimes without a pool report 1 and ignore resize requests.
|
|
54
|
+
*/
|
|
55
|
+
parallelSlots?(): number;
|
|
56
|
+
resizeParallel?(target: number): Promise<boolean>;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Result of `FfiBackendRuntime.acquire()` — a live FFI session bound to a
|
|
61
|
+
* specific loaded model.
|
|
62
|
+
*/
|
|
63
|
+
export interface FfiBackendSession {
|
|
64
|
+
readonly binding: LlmStreamingBinding;
|
|
65
|
+
readonly ctx: LlmCtxHandle;
|
|
66
|
+
readonly runner: FfiStreamingRunner;
|
|
67
|
+
/**
|
|
68
|
+
* Tokenize a prompt string into model token ids using the loaded model's
|
|
69
|
+
* tokenizer. The vocab MUST match the GGUF — mismatches produce gibberish
|
|
70
|
+
* silently. The runtime is responsible for asserting this at acquire
|
|
71
|
+
* time.
|
|
72
|
+
*/
|
|
73
|
+
readonly tokenize: (prompt: string) => Int32Array;
|
|
74
|
+
/**
|
|
75
|
+
* Native MTP speculative-decoding policy from the catalog. `null`
|
|
76
|
+
* disables speculative decoding for this session.
|
|
77
|
+
*/
|
|
78
|
+
readonly mtp: {
|
|
79
|
+
specType: "draft-mtp";
|
|
80
|
+
draftMin: number;
|
|
81
|
+
draftMax: number;
|
|
82
|
+
gpuLayers: number | "auto";
|
|
83
|
+
} | null;
|
|
84
|
+
/**
|
|
85
|
+
* Absolute path to a *separate* MTP drafter GGUF resolved during load.
|
|
86
|
+
* `null` means same-file MTP: the NextN head is embedded in the main
|
|
87
|
+
* text GGUF and the native runner activates `--spec-type draft-mtp`
|
|
88
|
+
* with no `-md`. Speculative decoding is governed by `mtp`, not by the
|
|
89
|
+
* presence of this path.
|
|
90
|
+
*/
|
|
91
|
+
readonly draftModelPath: string | null;
|
|
92
|
+
/**
|
|
93
|
+
* Multimodal projector (mmproj) GGUF path for vision describe. Resolved
|
|
94
|
+
* from `plan.overrides.mmprojPath` at acquire time. `null` disables
|
|
95
|
+
* vision — `describeImage` then throws an actionable error.
|
|
96
|
+
*/
|
|
97
|
+
readonly mmprojPath: string | null;
|
|
98
|
+
/**
|
|
99
|
+
* Per-load runtime config the fused libelizainference path applies at its
|
|
100
|
+
* first `llmStreamOpen` (gpuLayers + KV-cache quant types). The desktop
|
|
101
|
+
* libllama runtime applies these at `loadModel()` instead and leaves this
|
|
102
|
+
* `null` — the backend forwards them into the runner's per-call config only
|
|
103
|
+
* when present, so the fused path mirrors the libllama load decision without
|
|
104
|
+
* the libllama path double-applying them.
|
|
105
|
+
*/
|
|
106
|
+
readonly loadConfig?: {
|
|
107
|
+
gpuLayers?: number;
|
|
108
|
+
cacheTypeK?: string | null;
|
|
109
|
+
cacheTypeV?: string | null;
|
|
110
|
+
} | null;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Adapter that satisfies `LocalInferenceBackend` by delegating to
|
|
115
|
+
* `FfiStreamingRunner`. The `id` is `"llama-cpp"` because this is the
|
|
116
|
+
* in-process variant of the optimized llama.cpp path.
|
|
117
|
+
*/
|
|
118
|
+
export class FfiStreamingBackend implements LocalInferenceBackend {
|
|
119
|
+
readonly id = "llama-cpp" as const;
|
|
120
|
+
|
|
121
|
+
private session: FfiBackendSession | null = null;
|
|
122
|
+
private loadedPath: string | null = null;
|
|
123
|
+
|
|
124
|
+
constructor(private readonly runtime: FfiBackendRuntime) {}
|
|
125
|
+
|
|
126
|
+
async available(): Promise<boolean> {
|
|
127
|
+
return this.runtime.supported();
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
hasLoadedModel(): boolean {
|
|
131
|
+
return this.session !== null;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
currentModelPath(): string | null {
|
|
135
|
+
return this.loadedPath;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
async load(plan: BackendPlan): Promise<void> {
|
|
139
|
+
if (this.session) await this.unload();
|
|
140
|
+
this.session = await this.runtime.acquire(plan);
|
|
141
|
+
this.loadedPath = plan.modelPath;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
async unload(): Promise<void> {
|
|
145
|
+
// Await the native release BEFORE nulling our refs. If we null first and
|
|
146
|
+
// release() throws (a raw bun:ffi free can reject), this.session would be
|
|
147
|
+
// null while the runtime still holds a live session — the next load()
|
|
148
|
+
// would skip unload() and call acquire(), which throws on its live-session
|
|
149
|
+
// guard, wedging the backend until process restart. The finally guarantees
|
|
150
|
+
// our refs are cleared regardless so a failed release can't leave a stale
|
|
151
|
+
// "loaded" view either.
|
|
152
|
+
try {
|
|
153
|
+
await this.runtime.release();
|
|
154
|
+
} finally {
|
|
155
|
+
this.session = null;
|
|
156
|
+
this.loadedPath = null;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
async generate(args: GenerateArgs): Promise<GenerateResult> {
|
|
161
|
+
const result = await this.generateWithUsage(args);
|
|
162
|
+
return result.text;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
async generateWithUsage(
|
|
166
|
+
args: GenerateArgs & { slotId?: number },
|
|
167
|
+
): Promise<LocalGenerateWithUsageResult> {
|
|
168
|
+
if (!this.session) {
|
|
169
|
+
throw new Error(
|
|
170
|
+
"[ffi-streaming-backend] generate() called before load() — " +
|
|
171
|
+
"the FFI session has not been acquired.",
|
|
172
|
+
);
|
|
173
|
+
}
|
|
174
|
+
const { runner, tokenize, mtp, draftModelPath, loadConfig } = this.session;
|
|
175
|
+
// Force the structured-reply envelope: compile the GBNF from the
|
|
176
|
+
// caller's `responseSkeleton` / explicit `grammar` (precedence handled
|
|
177
|
+
// by `resolveGuidedDecodeForParams`, mirroring `engine.ts`'s
|
|
178
|
+
// `resolveBindingGrammarSource`). The native session installs it FIRST
|
|
179
|
+
// in the sampler chain so every sampled token is grammar-constrained.
|
|
180
|
+
const gbnfGrammar =
|
|
181
|
+
resolveGuidedDecodeForParams(args).grammar?.source ?? null;
|
|
182
|
+
const result = await runner.generateWithUsage({
|
|
183
|
+
promptTokens: tokenize(args.prompt),
|
|
184
|
+
slotId: args.slotId ?? -1,
|
|
185
|
+
cacheKey: args.cacheKey,
|
|
186
|
+
maxTokens: args.maxTokens ?? 2048,
|
|
187
|
+
temperature: args.temperature ?? 0.7,
|
|
188
|
+
topP: args.topP ?? 0.9,
|
|
189
|
+
topK: 40,
|
|
190
|
+
repeatPenalty: 1.1,
|
|
191
|
+
draftMin: mtp?.draftMin ?? 0,
|
|
192
|
+
draftMax: mtp?.draftMax ?? 0,
|
|
193
|
+
draftModelPath,
|
|
194
|
+
gbnfGrammar,
|
|
195
|
+
gpuLayers: loadConfig?.gpuLayers,
|
|
196
|
+
cacheTypeK: loadConfig?.cacheTypeK,
|
|
197
|
+
cacheTypeV: loadConfig?.cacheTypeV,
|
|
198
|
+
signal: args.signal,
|
|
199
|
+
onTextChunk: args.onTextChunk,
|
|
200
|
+
onVerifierEvent: args.onVerifierEvent,
|
|
201
|
+
});
|
|
202
|
+
return {
|
|
203
|
+
text: result.text,
|
|
204
|
+
slotId: result.slotId,
|
|
205
|
+
firstTokenMs: result.firstTokenMs,
|
|
206
|
+
usage: {
|
|
207
|
+
completion_tokens: result.accepted,
|
|
208
|
+
},
|
|
209
|
+
mtpStats: {
|
|
210
|
+
drafted: result.drafted,
|
|
211
|
+
accepted: result.accepted,
|
|
212
|
+
acceptanceRate:
|
|
213
|
+
result.drafted > 0 ? result.accepted / result.drafted : null,
|
|
214
|
+
},
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// === Optional `LocalInferenceBackend` methods routed through the runner.
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Persist the active session's KV state to a per-conversation file.
|
|
222
|
+
* v1 uses `llama_state_seq_save_file` against seq_id=0. The on-disk file
|
|
223
|
+
* path mirrors `ffi-streaming-backend.ts`'s conversation-keyed slot layout
|
|
224
|
+
* (`<cacheDir>/<conversationId>/<slotId>.kv`) so a switch between
|
|
225
|
+
* FFI and subprocess can resume each other's slots — once both
|
|
226
|
+
* paths agree on the file format.
|
|
227
|
+
*/
|
|
228
|
+
async persistConversationKv(
|
|
229
|
+
conversationId: string,
|
|
230
|
+
slotId: number,
|
|
231
|
+
): Promise<void> {
|
|
232
|
+
if (!this.session) return; // no active session to persist
|
|
233
|
+
const { binding } = this.session;
|
|
234
|
+
if (!binding.llmStreamSaveSlot) return; // adapter doesn't support save
|
|
235
|
+
const filename = slotFilename(conversationId, slotId);
|
|
236
|
+
// llmStreamSaveSlot is per-stream in the binding API; the desktop
|
|
237
|
+
// adapter currently saves the ctx-wide seq=0 state, so the stream
|
|
238
|
+
// handle is informational. We pass the runner's most recent
|
|
239
|
+
// stream id when available; 0n is the binding-level sentinel.
|
|
240
|
+
binding.llmStreamSaveSlot({ stream: 0n, filename });
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/** Restore a previously persisted KV state. Mirror of `persistConversationKv`. */
|
|
244
|
+
async restoreConversationKv(
|
|
245
|
+
conversationId: string,
|
|
246
|
+
slotId: number,
|
|
247
|
+
): Promise<boolean> {
|
|
248
|
+
if (!this.session) return false;
|
|
249
|
+
const { binding } = this.session;
|
|
250
|
+
if (!binding.llmStreamRestoreSlot) return false;
|
|
251
|
+
const filename = slotFilename(conversationId, slotId);
|
|
252
|
+
binding.llmStreamRestoreSlot({ stream: 0n, filename });
|
|
253
|
+
return true;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Pre-decode `promptPrefix` so the next `generate` against the same
|
|
258
|
+
* `cacheKey` skips re-prefill. Returns `false` when the prefix is
|
|
259
|
+
* empty or no session is loaded. The FFI runner serializes by
|
|
260
|
+
* `cacheKey` internally via the `slotInFlight` map.
|
|
261
|
+
*/
|
|
262
|
+
async prewarmConversation(
|
|
263
|
+
promptPrefix: string,
|
|
264
|
+
opts: { slotId: number; cacheKey: string },
|
|
265
|
+
): Promise<boolean> {
|
|
266
|
+
if (!this.session || promptPrefix.length === 0) return false;
|
|
267
|
+
const { runner, tokenize, mtp, draftModelPath, loadConfig } = this.session;
|
|
268
|
+
await runner.generateWithUsage({
|
|
269
|
+
promptTokens: tokenize(promptPrefix),
|
|
270
|
+
slotId: opts.slotId,
|
|
271
|
+
cacheKey: opts.cacheKey,
|
|
272
|
+
maxTokens: 0, // prefill-only: feed prompt, generate nothing
|
|
273
|
+
temperature: 0,
|
|
274
|
+
topP: 1,
|
|
275
|
+
topK: 1,
|
|
276
|
+
repeatPenalty: 1,
|
|
277
|
+
draftMin: mtp?.draftMin ?? 0,
|
|
278
|
+
draftMax: mtp?.draftMax ?? 0,
|
|
279
|
+
draftModelPath,
|
|
280
|
+
gpuLayers: loadConfig?.gpuLayers,
|
|
281
|
+
cacheTypeK: loadConfig?.cacheTypeK,
|
|
282
|
+
cacheTypeV: loadConfig?.cacheTypeV,
|
|
283
|
+
});
|
|
284
|
+
return true;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* True when Eliza-1 native MTP is active for the loaded target model.
|
|
289
|
+
* Covers both shapes: same-file MTP (NextN head embedded in the text
|
|
290
|
+
* GGUF, `draftModelPath` null) and separate-drafter MTP.
|
|
291
|
+
*/
|
|
292
|
+
mtpEnabled(): boolean {
|
|
293
|
+
return Boolean(this.session?.mtp);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Parallel-slot pool size. Routed to the runtime's ctx pool when one
|
|
298
|
+
* exists; defaults to 1 otherwise.
|
|
299
|
+
*/
|
|
300
|
+
parallelSlots(): number {
|
|
301
|
+
return this.runtime.parallelSlots?.() ?? 1;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Grow or shrink the runtime's ctx pool to `target` slots. Returns
|
|
306
|
+
* false when the runtime has no pool surface (in which case parallel
|
|
307
|
+
* resize is ignored — the conversation registry tolerates
|
|
308
|
+
* fixed 1-slot operation).
|
|
309
|
+
*/
|
|
310
|
+
async resizeParallel(target: number): Promise<boolean> {
|
|
311
|
+
if (!this.runtime.resizeParallel) return false;
|
|
312
|
+
return this.runtime.resizeParallel(target);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Vision describe via mmproj. Requires:
|
|
317
|
+
* - The shim built with `-DELIZA_ENABLE_VISION=1` (ELIZA_ENABLE_VISION=1
|
|
318
|
+
* at the build script env). When absent the runtime throws an
|
|
319
|
+
* actionable error.
|
|
320
|
+
* - `plan.overrides.mmprojPath` was passed at load time so the
|
|
321
|
+
* adapter knows which mmproj GGUF to feed clip.
|
|
322
|
+
*/
|
|
323
|
+
async describeImage(args: {
|
|
324
|
+
bytes: Uint8Array;
|
|
325
|
+
mimeType?: string;
|
|
326
|
+
prompt?: string;
|
|
327
|
+
maxTokens?: number;
|
|
328
|
+
temperature?: number;
|
|
329
|
+
signal?: AbortSignal;
|
|
330
|
+
}): Promise<{ text: string; projectorMs?: number; decodeMs?: number }> {
|
|
331
|
+
if (!this.session) {
|
|
332
|
+
throw new Error(
|
|
333
|
+
"[ffi-streaming-backend] describeImage before load — no session acquired",
|
|
334
|
+
);
|
|
335
|
+
}
|
|
336
|
+
if (!this.session.mmprojPath) {
|
|
337
|
+
throw new Error(
|
|
338
|
+
"[ffi-streaming-backend] describeImage: no mmproj GGUF loaded for this session. " +
|
|
339
|
+
"Pass `overrides.mmprojPath` in the BackendPlan when activating a vision-capable bundle.",
|
|
340
|
+
);
|
|
341
|
+
}
|
|
342
|
+
// The runtime adapter has visionSupported() + describeImage(args).
|
|
343
|
+
// We re-shape `bytes` → `imageBytes` and merge in the resolved
|
|
344
|
+
// mmprojPath; the rest of args pass through unchanged.
|
|
345
|
+
const runtime = this.runtime as unknown as {
|
|
346
|
+
describeImage?: (args: {
|
|
347
|
+
imageBytes: Uint8Array;
|
|
348
|
+
mmprojPath: string;
|
|
349
|
+
prompt?: string;
|
|
350
|
+
maxTokens?: number;
|
|
351
|
+
temperature?: number;
|
|
352
|
+
signal?: AbortSignal;
|
|
353
|
+
}) => Promise<{ text: string; projectorMs?: number; decodeMs?: number }>;
|
|
354
|
+
};
|
|
355
|
+
if (!runtime.describeImage) {
|
|
356
|
+
throw new Error(
|
|
357
|
+
"[ffi-streaming-backend] runtime lacks describeImage support",
|
|
358
|
+
);
|
|
359
|
+
}
|
|
360
|
+
return runtime.describeImage({
|
|
361
|
+
imageBytes: args.bytes,
|
|
362
|
+
mmprojPath: this.session.mmprojPath,
|
|
363
|
+
prompt: args.prompt,
|
|
364
|
+
maxTokens: args.maxTokens,
|
|
365
|
+
temperature: args.temperature,
|
|
366
|
+
signal: args.signal,
|
|
367
|
+
});
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
currentMmprojPath(): string | null {
|
|
371
|
+
return this.session?.mmprojPath ?? null;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
/**
|
|
376
|
+
* Conversation-keyed slot file layout. Mirrors `cache-bridge.ts`'s
|
|
377
|
+
* `slotSavePath` so an `ELIZA_INFERENCE_BACKEND=http` opt-out can resume
|
|
378
|
+
* an FFI-saved conversation and vice-versa once the file formats align.
|
|
379
|
+
*/
|
|
380
|
+
function slotFilename(conversationId: string, slotId: number): string {
|
|
381
|
+
return `${conversationId}__slot${slotId}.kv`;
|
|
382
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process streaming-LLM runner.
|
|
3
|
+
*
|
|
4
|
+
* FFI streaming-LLM ABI declared in `ffi-streaming-llm.h`. The
|
|
5
|
+
* token-by-token loop hands `onTextChunk` accepted chunks and surfaces
|
|
6
|
+
* verifier events from native MTP.
|
|
7
|
+
*
|
|
8
|
+
* This file deliberately does not own the FFI context or the binding
|
|
9
|
+
* itself. It takes a narrow `LlmStreamingBinding` (see
|
|
10
|
+
* `services/llm-streaming-binding.ts`) + an opaque `LlmCtxHandle` as
|
|
11
|
+
* constructor arguments — that way it can be driven by libelizainference
|
|
12
|
+
* (via `wrapElizaInferenceFfi`) or any desktop libllama shim adapter without
|
|
13
|
+
* dragging in TTS/ASR surfaces. A single context can host concurrent generation
|
|
14
|
+
* sessions (one per pinned slot); the runner serialises with
|
|
15
|
+
* `slotInFlight`.
|
|
16
|
+
*
|
|
17
|
+
* Single-flight: lock map keyed by slot id, slot id `-1` unlocked. Two concurrent generates
|
|
18
|
+
* against the same pinned slot would interleave KV cache state, so the
|
|
19
|
+
* runner serializes them at the JS layer.
|
|
20
|
+
*/
|
|
21
|
+
import type { LlmCtxHandle, LlmStreamingBinding } from "./llm-streaming-binding";
|
|
22
|
+
import type { LlmStreamHandle, LlmStreamStep } from "./voice/ffi-bindings";
|
|
23
|
+
import type { VerifierStreamEvent } from "./voice/types";
|
|
24
|
+
export interface FfiStreamingGenerateArgs {
|
|
25
|
+
/** Pre-tokenized prompt — the runner does not detokenize. */
|
|
26
|
+
promptTokens: Int32Array;
|
|
27
|
+
/** Pinned slot id; -1 disables pinning (any free slot). */
|
|
28
|
+
slotId: number;
|
|
29
|
+
/** Optional prompt cache key used to derive a slot when `slotId === -1`. */
|
|
30
|
+
cacheKey?: string;
|
|
31
|
+
maxTokens: number;
|
|
32
|
+
temperature: number;
|
|
33
|
+
topP: number;
|
|
34
|
+
topK: number;
|
|
35
|
+
repeatPenalty: number;
|
|
36
|
+
draftMin: number;
|
|
37
|
+
draftMax: number;
|
|
38
|
+
/** Reserved for separate draft-model speculation; null for Eliza-1 MTP. */
|
|
39
|
+
draftModelPath: string | null;
|
|
40
|
+
/**
|
|
41
|
+
* Per-load GPU offload (ABI v8). Forwarded into the native session config
|
|
42
|
+
* on `llmStreamOpen`. The fused libelizainference path loads the text model
|
|
43
|
+
* once per ctx, so the FIRST session's value wins; later sessions reuse the
|
|
44
|
+
* resident model. `undefined` selects the runtime default (all layers).
|
|
45
|
+
* The desktop libllama path already applies gpuLayers at `loadModel()`, so
|
|
46
|
+
* it ignores this field — it is load-time config, threaded here only so the
|
|
47
|
+
* fused runner can mirror the libllama load decision.
|
|
48
|
+
*/
|
|
49
|
+
gpuLayers?: number;
|
|
50
|
+
/**
|
|
51
|
+
* KV-cache K/V quant type names (ABI v8), e.g. "qjl1_256" / "q4_polar".
|
|
52
|
+
* Same load-time semantics as `gpuLayers`: forwarded into the fused
|
|
53
|
+
* session config so the first `llmStreamOpen` applies the quantized cache.
|
|
54
|
+
*/
|
|
55
|
+
cacheTypeK?: string | null;
|
|
56
|
+
cacheTypeV?: string | null;
|
|
57
|
+
/**
|
|
58
|
+
* GBNF grammar source forcing the structured-reply envelope. Passed to
|
|
59
|
+
* the native session's `llmStreamOpen` config so sampling is
|
|
60
|
+
* grammar-constrained. `null` disables the constraint (free generation).
|
|
61
|
+
*/
|
|
62
|
+
gbnfGrammar?: string | null;
|
|
63
|
+
/** Cancellation signal — fires `llmStreamCancel` on the active session. */
|
|
64
|
+
signal?: AbortSignal;
|
|
65
|
+
/** Per-chunk text callback. */
|
|
66
|
+
onTextChunk?: (chunk: string) => void | Promise<void>;
|
|
67
|
+
/** Speculative accept/reject events from MTP verification. */
|
|
68
|
+
onVerifierEvent?: (event: VerifierStreamEvent) => void | Promise<void>;
|
|
69
|
+
}
|
|
70
|
+
export interface FfiStreamingGenerateResult {
|
|
71
|
+
text: string;
|
|
72
|
+
slotId: number;
|
|
73
|
+
firstTokenMs: number | null;
|
|
74
|
+
drafted: number;
|
|
75
|
+
accepted: number;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Backend used by the mobile and desktop FFI routes.
|
|
79
|
+
*/
|
|
80
|
+
export declare class FfiStreamingRunner {
|
|
81
|
+
private readonly ffi;
|
|
82
|
+
private readonly ctx;
|
|
83
|
+
private readonly slotInFlight;
|
|
84
|
+
/**
|
|
85
|
+
* Constructor takes the narrow `LlmStreamingBinding` (see
|
|
86
|
+
* `services/llm-streaming-binding.ts`) so both libelizainference (via
|
|
87
|
+
* `wrapElizaInferenceFfi`) and desktop libllama adapters can
|
|
88
|
+
* satisfy it. The runner never touches TTS/ASR/mmap surfaces.
|
|
89
|
+
*/
|
|
90
|
+
constructor(ffi: LlmStreamingBinding, ctx: LlmCtxHandle);
|
|
91
|
+
/**
|
|
92
|
+
* Run one generation. Mirrors `MtpLlamaServer.generateWithUsage()`
|
|
93
|
+
* — same single-flight rule, same callback shape, same result block
|
|
94
|
+
* minus the metrics scrape (FFI does not have a `/metrics` endpoint).
|
|
95
|
+
*/
|
|
96
|
+
generateWithUsage(args: FfiStreamingGenerateArgs): Promise<FfiStreamingGenerateResult>;
|
|
97
|
+
/**
|
|
98
|
+
* Async-iterable variant. Yields each accepted-token batch as it lands
|
|
99
|
+
* so callers that want token-grained control (e.g. the voice scheduler
|
|
100
|
+
* driving phrase-chunking off accept/reject events) don't have to
|
|
101
|
+
* register a callback. Internally still routes through `generateWithUsage`
|
|
102
|
+
* via a pump so the single-flight rule applies.
|
|
103
|
+
*/
|
|
104
|
+
generateStream(args: FfiStreamingGenerateArgs): AsyncIterable<LlmStreamStep>;
|
|
105
|
+
/**
|
|
106
|
+
* Save the streaming slot KV state to disk. Best called between turns
|
|
107
|
+
* — calling mid-stream is racy and the FFI side is allowed to refuse.
|
|
108
|
+
* Surfaced here so the conversation registry can persist between
|
|
109
|
+
* mobile backgrounds the same way `MtpLlamaServer.persistSlot` does.
|
|
110
|
+
*/
|
|
111
|
+
saveSlot(stream: LlmStreamHandle, filename: string): void;
|
|
112
|
+
/** Restore a previously-saved slot KV file into a fresh session. */
|
|
113
|
+
restoreSlot(stream: LlmStreamHandle, filename: string): void;
|
|
114
|
+
private runGenerate;
|
|
115
|
+
/**
|
|
116
|
+
* Shared inner loop. Opens the session, runs the prefill + next pump,
|
|
117
|
+
* forwards each step through `onStep` plus the optional caller
|
|
118
|
+
* callbacks, and wires abort + cancel.
|
|
119
|
+
*/
|
|
120
|
+
private runGenerateInner;
|
|
121
|
+
}
|
|
122
|
+
//# sourceMappingURL=ffi-streaming-runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ffi-streaming-runner.d.ts","sourceRoot":"","sources":["ffi-streaming-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAIH,OAAO,KAAK,EACX,YAAY,EACZ,mBAAmB,EACnB,MAAM,yBAAyB,CAAC;AACjC,OAAO,KAAK,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAC3E,OAAO,KAAK,EAAa,mBAAmB,EAAE,MAAM,eAAe,CAAC;AAEpE,MAAM,WAAW,wBAAwB;IACxC,6DAA6D;IAC7D,YAAY,EAAE,UAAU,CAAC;IACzB,2DAA2D;IAC3D,MAAM,EAAE,MAAM,CAAC;IACf,4EAA4E;IAC5E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;OAIG;IACH,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,2EAA2E;IAC3E,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,+BAA+B;IAC/B,WAAW,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACtD,8DAA8D;IAC9D,eAAe,CAAC,EAAE,CAAC,KAAK,EAAE,mBAAmB,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACvE;AAED,MAAM,WAAW,0BAA0B;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;CACjB;AAMD;;GAEG;AACH,qBAAa,kBAAkB;IAU7B,OAAO,CAAC,QAAQ,CAAC,GAAG;IACpB,OAAO,CAAC,QAAQ,CAAC,GAAG;IAVrB,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAoC;IAEjE;;;;;OAKG;gBAEe,GAAG,EAAE,mBAAmB,EACxB,GAAG,EAAE,YAAY;IAGnC;;;;OAIG;IACG,iBAAiB,CACtB,IAAI,EAAE,wBAAwB,GAC5B,OAAO,CAAC,0BAA0B,CAAC;IAsBtC;;;;;;OAMG;IACI,cAAc,CACpB,IAAI,EAAE,wBAAwB,GAC5B,aAAa,CAAC,aAAa,CAAC;IAmD/B;;;;;OAKG;IACH,QAAQ,CAAC,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IASzD,oEAAoE;IACpE,WAAW,CAAC,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;YAW9C,WAAW;IA2BzB;;;;OAIG;YACW,gBAAgB;CAmG9B"}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from "vitest";
|
|
2
|
+
import { FfiStreamingRunner } from "./ffi-streaming-runner";
|
|
3
|
+
import type {
|
|
4
|
+
LlmCtxHandle,
|
|
5
|
+
LlmStreamingBinding,
|
|
6
|
+
} from "./llm-streaming-binding";
|
|
7
|
+
import type { LlmStreamHandle } from "./voice/ffi-bindings";
|
|
8
|
+
|
|
9
|
+
describe("FfiStreamingRunner prewarm", () => {
|
|
10
|
+
it("treats maxTokens: 0 as prefill-only and never calls next-token generation", async () => {
|
|
11
|
+
const stream = 7n as LlmStreamHandle;
|
|
12
|
+
const binding: LlmStreamingBinding = {
|
|
13
|
+
llmStreamSupported: () => true,
|
|
14
|
+
llmStreamOpen: vi.fn().mockReturnValue(stream),
|
|
15
|
+
llmStreamPrefill: vi.fn(),
|
|
16
|
+
llmStreamNext: vi.fn().mockReturnValue({
|
|
17
|
+
tokens: [1],
|
|
18
|
+
text: "x",
|
|
19
|
+
done: true,
|
|
20
|
+
drafterDrafted: 0,
|
|
21
|
+
drafterAccepted: 0,
|
|
22
|
+
}),
|
|
23
|
+
llmStreamCancel: vi.fn(),
|
|
24
|
+
llmStreamClose: vi.fn(),
|
|
25
|
+
};
|
|
26
|
+
const onTextChunk = vi.fn();
|
|
27
|
+
const runner = new FfiStreamingRunner(binding, 1n as LlmCtxHandle);
|
|
28
|
+
const promptTokens = new Int32Array([11, 12, 13]);
|
|
29
|
+
|
|
30
|
+
const result = await runner.generateWithUsage({
|
|
31
|
+
promptTokens,
|
|
32
|
+
slotId: 0,
|
|
33
|
+
maxTokens: 0,
|
|
34
|
+
temperature: 0,
|
|
35
|
+
topP: 1,
|
|
36
|
+
topK: 0,
|
|
37
|
+
repeatPenalty: 1,
|
|
38
|
+
draftMin: 0,
|
|
39
|
+
draftMax: 0,
|
|
40
|
+
draftModelPath: null,
|
|
41
|
+
onTextChunk,
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
expect(binding.llmStreamOpen).toHaveBeenCalledTimes(1);
|
|
45
|
+
expect(binding.llmStreamPrefill).toHaveBeenCalledWith({
|
|
46
|
+
stream,
|
|
47
|
+
tokens: promptTokens,
|
|
48
|
+
});
|
|
49
|
+
expect(binding.llmStreamNext).not.toHaveBeenCalled();
|
|
50
|
+
expect(onTextChunk).not.toHaveBeenCalled();
|
|
51
|
+
expect(binding.llmStreamClose).toHaveBeenCalledWith(stream);
|
|
52
|
+
expect(result).toEqual({
|
|
53
|
+
text: "",
|
|
54
|
+
slotId: 0,
|
|
55
|
+
firstTokenMs: null,
|
|
56
|
+
drafted: 0,
|
|
57
|
+
accepted: 0,
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
});
|